{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9999292135626814, "eval_steps": 500, "global_step": 14126, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00014157287463721952, "grad_norm": 121.29455930579698, "learning_rate": 1.179245283018868e-08, "loss": 2.1241, "step": 1 }, { "epoch": 0.00028314574927443904, "grad_norm": 119.0568240116878, "learning_rate": 2.358490566037736e-08, "loss": 1.9513, "step": 2 }, { "epoch": 0.00042471862391165854, "grad_norm": 109.14217630429357, "learning_rate": 3.537735849056604e-08, "loss": 2.1135, "step": 3 }, { "epoch": 0.0005662914985488781, "grad_norm": 149.004587806886, "learning_rate": 4.716981132075472e-08, "loss": 2.0895, "step": 4 }, { "epoch": 0.0007078643731860976, "grad_norm": 101.64176714385184, "learning_rate": 5.89622641509434e-08, "loss": 1.8525, "step": 5 }, { "epoch": 0.0008494372478233171, "grad_norm": 135.70088602787382, "learning_rate": 7.075471698113208e-08, "loss": 1.8955, "step": 6 }, { "epoch": 0.0009910101224605366, "grad_norm": 113.37866931768956, "learning_rate": 8.254716981132076e-08, "loss": 1.9702, "step": 7 }, { "epoch": 0.0011325829970977562, "grad_norm": 123.19107126355587, "learning_rate": 9.433962264150944e-08, "loss": 1.9369, "step": 8 }, { "epoch": 0.0012741558717349756, "grad_norm": 87.14484563698389, "learning_rate": 1.0613207547169811e-07, "loss": 1.9844, "step": 9 }, { "epoch": 0.0014157287463721952, "grad_norm": 100.80036155999144, "learning_rate": 1.179245283018868e-07, "loss": 1.8479, "step": 10 }, { "epoch": 0.0015573016210094145, "grad_norm": 71.909370178936, "learning_rate": 1.297169811320755e-07, "loss": 1.9629, "step": 11 }, { "epoch": 0.0016988744956466342, "grad_norm": 68.60155849632719, "learning_rate": 1.4150943396226417e-07, "loss": 1.8874, "step": 12 }, { "epoch": 0.0018404473702838535, "grad_norm": 65.15083910386343, "learning_rate": 1.5330188679245283e-07, "loss": 1.9847, "step": 13 }, { "epoch": 0.001982020244921073, "grad_norm": 65.12410403054501, "learning_rate": 1.6509433962264153e-07, "loss": 1.9638, "step": 14 }, { "epoch": 0.0021235931195582925, "grad_norm": 47.316547981553185, "learning_rate": 1.768867924528302e-07, "loss": 1.6393, "step": 15 }, { "epoch": 0.0022651659941955123, "grad_norm": 40.437747535950244, "learning_rate": 1.886792452830189e-07, "loss": 1.676, "step": 16 }, { "epoch": 0.0024067388688327317, "grad_norm": 35.73549373795348, "learning_rate": 2.0047169811320755e-07, "loss": 1.8091, "step": 17 }, { "epoch": 0.002548311743469951, "grad_norm": 35.40390031093414, "learning_rate": 2.1226415094339622e-07, "loss": 1.776, "step": 18 }, { "epoch": 0.0026898846181071705, "grad_norm": 35.708471973792506, "learning_rate": 2.2405660377358492e-07, "loss": 1.6792, "step": 19 }, { "epoch": 0.0028314574927443903, "grad_norm": 38.410961684642345, "learning_rate": 2.358490566037736e-07, "loss": 1.7793, "step": 20 }, { "epoch": 0.0029730303673816097, "grad_norm": 43.7853749283615, "learning_rate": 2.476415094339623e-07, "loss": 1.7117, "step": 21 }, { "epoch": 0.003114603242018829, "grad_norm": 59.49889007302468, "learning_rate": 2.59433962264151e-07, "loss": 1.8114, "step": 22 }, { "epoch": 0.0032561761166560485, "grad_norm": 61.98233863556921, "learning_rate": 2.7122641509433966e-07, "loss": 1.567, "step": 23 }, { "epoch": 0.0033977489912932683, "grad_norm": 60.86880543110219, "learning_rate": 2.8301886792452833e-07, "loss": 1.7123, "step": 24 }, { "epoch": 0.0035393218659304877, "grad_norm": 49.47726257301176, "learning_rate": 2.94811320754717e-07, "loss": 1.6848, "step": 25 }, { "epoch": 0.003680894740567707, "grad_norm": 41.5570191404984, "learning_rate": 3.0660377358490567e-07, "loss": 1.6172, "step": 26 }, { "epoch": 0.003822467615204927, "grad_norm": 39.94731179599652, "learning_rate": 3.183962264150944e-07, "loss": 1.5371, "step": 27 }, { "epoch": 0.003964040489842146, "grad_norm": 33.341760205976115, "learning_rate": 3.3018867924528305e-07, "loss": 1.6682, "step": 28 }, { "epoch": 0.004105613364479366, "grad_norm": 34.564514874023075, "learning_rate": 3.419811320754717e-07, "loss": 1.4421, "step": 29 }, { "epoch": 0.004247186239116585, "grad_norm": 36.3629923399335, "learning_rate": 3.537735849056604e-07, "loss": 1.4295, "step": 30 }, { "epoch": 0.004388759113753805, "grad_norm": 41.21595570750256, "learning_rate": 3.6556603773584905e-07, "loss": 1.4103, "step": 31 }, { "epoch": 0.004530331988391025, "grad_norm": 37.32966196082379, "learning_rate": 3.773584905660378e-07, "loss": 1.6439, "step": 32 }, { "epoch": 0.004671904863028244, "grad_norm": 32.3715026616052, "learning_rate": 3.8915094339622644e-07, "loss": 1.4486, "step": 33 }, { "epoch": 0.0048134777376654635, "grad_norm": 27.53994236183091, "learning_rate": 4.009433962264151e-07, "loss": 1.3836, "step": 34 }, { "epoch": 0.004955050612302682, "grad_norm": 22.26037218740682, "learning_rate": 4.127358490566038e-07, "loss": 1.6212, "step": 35 }, { "epoch": 0.005096623486939902, "grad_norm": 21.816463951915637, "learning_rate": 4.2452830188679244e-07, "loss": 1.5516, "step": 36 }, { "epoch": 0.005238196361577122, "grad_norm": 21.77305556154965, "learning_rate": 4.3632075471698116e-07, "loss": 1.513, "step": 37 }, { "epoch": 0.005379769236214341, "grad_norm": 22.454007082969206, "learning_rate": 4.4811320754716983e-07, "loss": 1.5693, "step": 38 }, { "epoch": 0.005521342110851561, "grad_norm": 24.280641683177876, "learning_rate": 4.599056603773585e-07, "loss": 1.6282, "step": 39 }, { "epoch": 0.005662914985488781, "grad_norm": 22.294075162551948, "learning_rate": 4.716981132075472e-07, "loss": 1.4611, "step": 40 }, { "epoch": 0.005804487860126, "grad_norm": 14.24960433678363, "learning_rate": 4.834905660377359e-07, "loss": 1.2234, "step": 41 }, { "epoch": 0.005946060734763219, "grad_norm": 16.99897673276064, "learning_rate": 4.952830188679246e-07, "loss": 1.3884, "step": 42 }, { "epoch": 0.006087633609400439, "grad_norm": 18.36343455783165, "learning_rate": 5.070754716981133e-07, "loss": 1.4757, "step": 43 }, { "epoch": 0.006229206484037658, "grad_norm": 18.134672964390358, "learning_rate": 5.18867924528302e-07, "loss": 1.5975, "step": 44 }, { "epoch": 0.006370779358674878, "grad_norm": 19.825536055370925, "learning_rate": 5.306603773584906e-07, "loss": 1.5375, "step": 45 }, { "epoch": 0.006512352233312097, "grad_norm": 16.95551538447372, "learning_rate": 5.424528301886793e-07, "loss": 1.3552, "step": 46 }, { "epoch": 0.006653925107949317, "grad_norm": 18.30464748715217, "learning_rate": 5.542452830188679e-07, "loss": 1.4309, "step": 47 }, { "epoch": 0.006795497982586537, "grad_norm": 16.399239573643346, "learning_rate": 5.660377358490567e-07, "loss": 1.3062, "step": 48 }, { "epoch": 0.0069370708572237556, "grad_norm": 16.27369628934892, "learning_rate": 5.778301886792454e-07, "loss": 1.5305, "step": 49 }, { "epoch": 0.007078643731860975, "grad_norm": 17.799185504923305, "learning_rate": 5.89622641509434e-07, "loss": 1.5249, "step": 50 }, { "epoch": 0.007220216606498195, "grad_norm": 15.969736565200883, "learning_rate": 6.014150943396227e-07, "loss": 1.5306, "step": 51 }, { "epoch": 0.007361789481135414, "grad_norm": 15.57897175560422, "learning_rate": 6.132075471698113e-07, "loss": 1.4391, "step": 52 }, { "epoch": 0.007503362355772634, "grad_norm": 13.591220804585008, "learning_rate": 6.25e-07, "loss": 1.3186, "step": 53 }, { "epoch": 0.007644935230409854, "grad_norm": 15.868291924420625, "learning_rate": 6.367924528301888e-07, "loss": 1.2862, "step": 54 }, { "epoch": 0.007786508105047073, "grad_norm": 16.218645569569862, "learning_rate": 6.485849056603774e-07, "loss": 1.3619, "step": 55 }, { "epoch": 0.007928080979684293, "grad_norm": 19.71968939670054, "learning_rate": 6.603773584905661e-07, "loss": 1.5541, "step": 56 }, { "epoch": 0.008069653854321512, "grad_norm": 18.530815235203494, "learning_rate": 6.721698113207547e-07, "loss": 1.5588, "step": 57 }, { "epoch": 0.008211226728958732, "grad_norm": 15.304690989503175, "learning_rate": 6.839622641509434e-07, "loss": 1.2499, "step": 58 }, { "epoch": 0.00835279960359595, "grad_norm": 15.021865675216837, "learning_rate": 6.957547169811322e-07, "loss": 1.5457, "step": 59 }, { "epoch": 0.00849437247823317, "grad_norm": 15.16657086576171, "learning_rate": 7.075471698113208e-07, "loss": 1.4426, "step": 60 }, { "epoch": 0.00863594535287039, "grad_norm": 18.84819583100686, "learning_rate": 7.193396226415095e-07, "loss": 1.554, "step": 61 }, { "epoch": 0.00877751822750761, "grad_norm": 22.17246271904282, "learning_rate": 7.311320754716981e-07, "loss": 1.4508, "step": 62 }, { "epoch": 0.00891909110214483, "grad_norm": 14.210870391368484, "learning_rate": 7.429245283018868e-07, "loss": 1.3182, "step": 63 }, { "epoch": 0.00906066397678205, "grad_norm": 14.479562496931289, "learning_rate": 7.547169811320755e-07, "loss": 1.5386, "step": 64 }, { "epoch": 0.009202236851419267, "grad_norm": 14.237824173904917, "learning_rate": 7.665094339622642e-07, "loss": 1.3154, "step": 65 }, { "epoch": 0.009343809726056487, "grad_norm": 15.567396749384091, "learning_rate": 7.783018867924529e-07, "loss": 1.4482, "step": 66 }, { "epoch": 0.009485382600693707, "grad_norm": 12.367544066638064, "learning_rate": 7.900943396226415e-07, "loss": 1.3596, "step": 67 }, { "epoch": 0.009626955475330927, "grad_norm": 16.57972276702026, "learning_rate": 8.018867924528302e-07, "loss": 1.5529, "step": 68 }, { "epoch": 0.009768528349968147, "grad_norm": 15.40148480676229, "learning_rate": 8.136792452830189e-07, "loss": 1.4806, "step": 69 }, { "epoch": 0.009910101224605365, "grad_norm": 14.42100368525156, "learning_rate": 8.254716981132076e-07, "loss": 1.3727, "step": 70 }, { "epoch": 0.010051674099242585, "grad_norm": 17.339697232006568, "learning_rate": 8.372641509433963e-07, "loss": 1.6527, "step": 71 }, { "epoch": 0.010193246973879804, "grad_norm": 17.66194511994423, "learning_rate": 8.490566037735849e-07, "loss": 1.5529, "step": 72 }, { "epoch": 0.010334819848517024, "grad_norm": 16.891061330800365, "learning_rate": 8.608490566037736e-07, "loss": 1.5494, "step": 73 }, { "epoch": 0.010476392723154244, "grad_norm": 16.937066667369585, "learning_rate": 8.726415094339623e-07, "loss": 1.6682, "step": 74 }, { "epoch": 0.010617965597791464, "grad_norm": 16.226649012768895, "learning_rate": 8.844339622641509e-07, "loss": 1.5249, "step": 75 }, { "epoch": 0.010759538472428682, "grad_norm": 16.40357307029954, "learning_rate": 8.962264150943397e-07, "loss": 1.3741, "step": 76 }, { "epoch": 0.010901111347065902, "grad_norm": 15.525129559109915, "learning_rate": 9.080188679245283e-07, "loss": 1.5133, "step": 77 }, { "epoch": 0.011042684221703122, "grad_norm": 13.56741567382699, "learning_rate": 9.19811320754717e-07, "loss": 1.4195, "step": 78 }, { "epoch": 0.011184257096340341, "grad_norm": 14.74642331999261, "learning_rate": 9.316037735849057e-07, "loss": 1.3685, "step": 79 }, { "epoch": 0.011325829970977561, "grad_norm": 16.251830951537237, "learning_rate": 9.433962264150944e-07, "loss": 1.486, "step": 80 }, { "epoch": 0.01146740284561478, "grad_norm": 17.668033954804507, "learning_rate": 9.551886792452833e-07, "loss": 1.3934, "step": 81 }, { "epoch": 0.011608975720252, "grad_norm": 12.921665397521629, "learning_rate": 9.669811320754719e-07, "loss": 1.4425, "step": 82 }, { "epoch": 0.011750548594889219, "grad_norm": 13.93411172642491, "learning_rate": 9.787735849056605e-07, "loss": 1.4677, "step": 83 }, { "epoch": 0.011892121469526439, "grad_norm": 14.75777217603014, "learning_rate": 9.90566037735849e-07, "loss": 1.5183, "step": 84 }, { "epoch": 0.012033694344163659, "grad_norm": 15.629449561757305, "learning_rate": 1.002358490566038e-06, "loss": 1.3184, "step": 85 }, { "epoch": 0.012175267218800878, "grad_norm": 14.497413667677343, "learning_rate": 1.0141509433962265e-06, "loss": 1.4178, "step": 86 }, { "epoch": 0.012316840093438097, "grad_norm": 14.196434132042796, "learning_rate": 1.0259433962264152e-06, "loss": 1.3565, "step": 87 }, { "epoch": 0.012458412968075316, "grad_norm": 16.083600030509533, "learning_rate": 1.037735849056604e-06, "loss": 1.5473, "step": 88 }, { "epoch": 0.012599985842712536, "grad_norm": 15.203808478020504, "learning_rate": 1.0495283018867926e-06, "loss": 1.4148, "step": 89 }, { "epoch": 0.012741558717349756, "grad_norm": 12.79371359949939, "learning_rate": 1.0613207547169812e-06, "loss": 1.1923, "step": 90 }, { "epoch": 0.012883131591986976, "grad_norm": 15.054172604064586, "learning_rate": 1.07311320754717e-06, "loss": 1.425, "step": 91 }, { "epoch": 0.013024704466624194, "grad_norm": 14.330066279000487, "learning_rate": 1.0849056603773587e-06, "loss": 1.4584, "step": 92 }, { "epoch": 0.013166277341261414, "grad_norm": 12.479710095290676, "learning_rate": 1.0966981132075473e-06, "loss": 1.36, "step": 93 }, { "epoch": 0.013307850215898634, "grad_norm": 11.726724500255083, "learning_rate": 1.1084905660377359e-06, "loss": 1.2778, "step": 94 }, { "epoch": 0.013449423090535853, "grad_norm": 13.547945008924554, "learning_rate": 1.1202830188679247e-06, "loss": 1.285, "step": 95 }, { "epoch": 0.013590995965173073, "grad_norm": 15.417117605401765, "learning_rate": 1.1320754716981133e-06, "loss": 1.3899, "step": 96 }, { "epoch": 0.013732568839810293, "grad_norm": 11.532991934190433, "learning_rate": 1.143867924528302e-06, "loss": 1.3581, "step": 97 }, { "epoch": 0.013874141714447511, "grad_norm": 12.664093224631143, "learning_rate": 1.1556603773584908e-06, "loss": 1.4268, "step": 98 }, { "epoch": 0.014015714589084731, "grad_norm": 13.257509226974738, "learning_rate": 1.1674528301886794e-06, "loss": 1.3512, "step": 99 }, { "epoch": 0.01415728746372195, "grad_norm": 13.838309499879424, "learning_rate": 1.179245283018868e-06, "loss": 1.5228, "step": 100 }, { "epoch": 0.01429886033835917, "grad_norm": 14.436307783566926, "learning_rate": 1.1910377358490568e-06, "loss": 1.2803, "step": 101 }, { "epoch": 0.01444043321299639, "grad_norm": 12.812840005968805, "learning_rate": 1.2028301886792454e-06, "loss": 1.4513, "step": 102 }, { "epoch": 0.01458200608763361, "grad_norm": 13.756676734777054, "learning_rate": 1.214622641509434e-06, "loss": 1.2679, "step": 103 }, { "epoch": 0.014723578962270828, "grad_norm": 15.361732278734571, "learning_rate": 1.2264150943396227e-06, "loss": 1.4398, "step": 104 }, { "epoch": 0.014865151836908048, "grad_norm": 13.663749817748833, "learning_rate": 1.2382075471698115e-06, "loss": 1.4919, "step": 105 }, { "epoch": 0.015006724711545268, "grad_norm": 14.859561890911984, "learning_rate": 1.25e-06, "loss": 1.3755, "step": 106 }, { "epoch": 0.015148297586182488, "grad_norm": 15.988494551014254, "learning_rate": 1.261792452830189e-06, "loss": 1.3923, "step": 107 }, { "epoch": 0.015289870460819708, "grad_norm": 13.087171700193727, "learning_rate": 1.2735849056603775e-06, "loss": 1.3192, "step": 108 }, { "epoch": 0.015431443335456926, "grad_norm": 15.614573669769914, "learning_rate": 1.2853773584905664e-06, "loss": 1.5417, "step": 109 }, { "epoch": 0.015573016210094145, "grad_norm": 15.076533068488736, "learning_rate": 1.2971698113207548e-06, "loss": 1.4763, "step": 110 }, { "epoch": 0.015714589084731365, "grad_norm": 16.353077898006884, "learning_rate": 1.3089622641509436e-06, "loss": 1.3288, "step": 111 }, { "epoch": 0.015856161959368585, "grad_norm": 11.209985396199578, "learning_rate": 1.3207547169811322e-06, "loss": 1.3905, "step": 112 }, { "epoch": 0.015997734834005805, "grad_norm": 13.851585082645157, "learning_rate": 1.332547169811321e-06, "loss": 1.4071, "step": 113 }, { "epoch": 0.016139307708643025, "grad_norm": 14.70222209302581, "learning_rate": 1.3443396226415094e-06, "loss": 1.4229, "step": 114 }, { "epoch": 0.016280880583280245, "grad_norm": 15.428510140998657, "learning_rate": 1.3561320754716983e-06, "loss": 1.3655, "step": 115 }, { "epoch": 0.016422453457917464, "grad_norm": 17.12382202552102, "learning_rate": 1.3679245283018869e-06, "loss": 1.3941, "step": 116 }, { "epoch": 0.016564026332554684, "grad_norm": 15.14471764458419, "learning_rate": 1.3797169811320757e-06, "loss": 1.4809, "step": 117 }, { "epoch": 0.0167055992071919, "grad_norm": 13.088974111294315, "learning_rate": 1.3915094339622643e-06, "loss": 1.3392, "step": 118 }, { "epoch": 0.01684717208182912, "grad_norm": 15.46623588707506, "learning_rate": 1.4033018867924531e-06, "loss": 1.5449, "step": 119 }, { "epoch": 0.01698874495646634, "grad_norm": 15.425498343506048, "learning_rate": 1.4150943396226415e-06, "loss": 1.3002, "step": 120 }, { "epoch": 0.01713031783110356, "grad_norm": 13.751255602294957, "learning_rate": 1.4268867924528304e-06, "loss": 1.4212, "step": 121 }, { "epoch": 0.01727189070574078, "grad_norm": 18.32240959479334, "learning_rate": 1.438679245283019e-06, "loss": 1.5834, "step": 122 }, { "epoch": 0.017413463580378, "grad_norm": 12.542056325813675, "learning_rate": 1.4504716981132078e-06, "loss": 1.4252, "step": 123 }, { "epoch": 0.01755503645501522, "grad_norm": 12.514336261286369, "learning_rate": 1.4622641509433962e-06, "loss": 1.3438, "step": 124 }, { "epoch": 0.01769660932965244, "grad_norm": 12.382560501181882, "learning_rate": 1.474056603773585e-06, "loss": 1.2151, "step": 125 }, { "epoch": 0.01783818220428966, "grad_norm": 13.23031939593143, "learning_rate": 1.4858490566037737e-06, "loss": 1.3262, "step": 126 }, { "epoch": 0.01797975507892688, "grad_norm": 11.950770805749832, "learning_rate": 1.4976415094339625e-06, "loss": 1.363, "step": 127 }, { "epoch": 0.0181213279535641, "grad_norm": 16.434237660459605, "learning_rate": 1.509433962264151e-06, "loss": 1.429, "step": 128 }, { "epoch": 0.018262900828201315, "grad_norm": 13.441668480441182, "learning_rate": 1.52122641509434e-06, "loss": 1.4674, "step": 129 }, { "epoch": 0.018404473702838535, "grad_norm": 12.86568313818993, "learning_rate": 1.5330188679245283e-06, "loss": 1.4072, "step": 130 }, { "epoch": 0.018546046577475755, "grad_norm": 15.928791374530595, "learning_rate": 1.5448113207547172e-06, "loss": 1.4589, "step": 131 }, { "epoch": 0.018687619452112975, "grad_norm": 15.912107337248242, "learning_rate": 1.5566037735849058e-06, "loss": 1.5128, "step": 132 }, { "epoch": 0.018829192326750194, "grad_norm": 14.888340713759353, "learning_rate": 1.5683962264150946e-06, "loss": 1.396, "step": 133 }, { "epoch": 0.018970765201387414, "grad_norm": 12.026406162708362, "learning_rate": 1.580188679245283e-06, "loss": 1.4695, "step": 134 }, { "epoch": 0.019112338076024634, "grad_norm": 13.183272565532027, "learning_rate": 1.5919811320754718e-06, "loss": 1.3016, "step": 135 }, { "epoch": 0.019253910950661854, "grad_norm": 14.884251386316166, "learning_rate": 1.6037735849056604e-06, "loss": 1.1814, "step": 136 }, { "epoch": 0.019395483825299074, "grad_norm": 18.106386084501114, "learning_rate": 1.6155660377358493e-06, "loss": 1.7024, "step": 137 }, { "epoch": 0.019537056699936294, "grad_norm": 13.064821465926101, "learning_rate": 1.6273584905660379e-06, "loss": 1.2782, "step": 138 }, { "epoch": 0.019678629574573513, "grad_norm": 15.246682728663163, "learning_rate": 1.6391509433962267e-06, "loss": 1.4774, "step": 139 }, { "epoch": 0.01982020244921073, "grad_norm": 13.790624358252591, "learning_rate": 1.650943396226415e-06, "loss": 1.3797, "step": 140 }, { "epoch": 0.01996177532384795, "grad_norm": 16.909322011790938, "learning_rate": 1.662735849056604e-06, "loss": 1.4021, "step": 141 }, { "epoch": 0.02010334819848517, "grad_norm": 16.2697840053896, "learning_rate": 1.6745283018867925e-06, "loss": 1.3668, "step": 142 }, { "epoch": 0.02024492107312239, "grad_norm": 13.646123068648015, "learning_rate": 1.6863207547169814e-06, "loss": 1.4009, "step": 143 }, { "epoch": 0.02038649394775961, "grad_norm": 13.559847945989636, "learning_rate": 1.6981132075471698e-06, "loss": 1.2664, "step": 144 }, { "epoch": 0.02052806682239683, "grad_norm": 13.702088824671211, "learning_rate": 1.7099056603773586e-06, "loss": 1.3278, "step": 145 }, { "epoch": 0.02066963969703405, "grad_norm": 16.107451123298592, "learning_rate": 1.7216981132075472e-06, "loss": 1.5612, "step": 146 }, { "epoch": 0.02081121257167127, "grad_norm": 18.369908879635027, "learning_rate": 1.733490566037736e-06, "loss": 1.4239, "step": 147 }, { "epoch": 0.020952785446308488, "grad_norm": 13.095989026754228, "learning_rate": 1.7452830188679247e-06, "loss": 1.3918, "step": 148 }, { "epoch": 0.021094358320945708, "grad_norm": 15.889432339092348, "learning_rate": 1.7570754716981135e-06, "loss": 1.3373, "step": 149 }, { "epoch": 0.021235931195582928, "grad_norm": 14.45320154118246, "learning_rate": 1.7688679245283019e-06, "loss": 1.2945, "step": 150 }, { "epoch": 0.021377504070220144, "grad_norm": 16.7893469685649, "learning_rate": 1.7806603773584907e-06, "loss": 1.4648, "step": 151 }, { "epoch": 0.021519076944857364, "grad_norm": 14.000667011175306, "learning_rate": 1.7924528301886793e-06, "loss": 1.3804, "step": 152 }, { "epoch": 0.021660649819494584, "grad_norm": 18.536878789163527, "learning_rate": 1.8042452830188682e-06, "loss": 1.6283, "step": 153 }, { "epoch": 0.021802222694131804, "grad_norm": 14.151127509496868, "learning_rate": 1.8160377358490566e-06, "loss": 1.4481, "step": 154 }, { "epoch": 0.021943795568769024, "grad_norm": 16.459274432481827, "learning_rate": 1.8278301886792454e-06, "loss": 1.4495, "step": 155 }, { "epoch": 0.022085368443406243, "grad_norm": 12.67356671236991, "learning_rate": 1.839622641509434e-06, "loss": 1.2051, "step": 156 }, { "epoch": 0.022226941318043463, "grad_norm": 15.932252027352755, "learning_rate": 1.8514150943396228e-06, "loss": 1.341, "step": 157 }, { "epoch": 0.022368514192680683, "grad_norm": 17.166469733333898, "learning_rate": 1.8632075471698114e-06, "loss": 1.3481, "step": 158 }, { "epoch": 0.022510087067317903, "grad_norm": 12.999988262464653, "learning_rate": 1.8750000000000003e-06, "loss": 1.3389, "step": 159 }, { "epoch": 0.022651659941955123, "grad_norm": 12.903660549100254, "learning_rate": 1.8867924528301889e-06, "loss": 1.4309, "step": 160 }, { "epoch": 0.022793232816592342, "grad_norm": 15.283369732518395, "learning_rate": 1.8985849056603775e-06, "loss": 1.359, "step": 161 }, { "epoch": 0.02293480569122956, "grad_norm": 14.434631326880806, "learning_rate": 1.9103773584905665e-06, "loss": 1.3803, "step": 162 }, { "epoch": 0.02307637856586678, "grad_norm": 12.973079096729505, "learning_rate": 1.9221698113207547e-06, "loss": 1.341, "step": 163 }, { "epoch": 0.023217951440504, "grad_norm": 14.156342158743893, "learning_rate": 1.9339622641509438e-06, "loss": 1.4757, "step": 164 }, { "epoch": 0.023359524315141218, "grad_norm": 15.270836775060719, "learning_rate": 1.9457547169811324e-06, "loss": 1.2662, "step": 165 }, { "epoch": 0.023501097189778438, "grad_norm": 14.88472141629211, "learning_rate": 1.957547169811321e-06, "loss": 1.4459, "step": 166 }, { "epoch": 0.023642670064415658, "grad_norm": 10.798641783468687, "learning_rate": 1.9693396226415096e-06, "loss": 1.3516, "step": 167 }, { "epoch": 0.023784242939052878, "grad_norm": 16.05639532431071, "learning_rate": 1.981132075471698e-06, "loss": 1.376, "step": 168 }, { "epoch": 0.023925815813690098, "grad_norm": 14.256998585501515, "learning_rate": 1.992924528301887e-06, "loss": 1.3827, "step": 169 }, { "epoch": 0.024067388688327317, "grad_norm": 11.411312286310594, "learning_rate": 2.004716981132076e-06, "loss": 1.3477, "step": 170 }, { "epoch": 0.024208961562964537, "grad_norm": 10.768624074845032, "learning_rate": 2.0165094339622645e-06, "loss": 1.4118, "step": 171 }, { "epoch": 0.024350534437601757, "grad_norm": 16.673546146914315, "learning_rate": 2.028301886792453e-06, "loss": 1.4381, "step": 172 }, { "epoch": 0.024492107312238973, "grad_norm": 13.066369464634318, "learning_rate": 2.0400943396226417e-06, "loss": 1.2711, "step": 173 }, { "epoch": 0.024633680186876193, "grad_norm": 14.641156523836113, "learning_rate": 2.0518867924528303e-06, "loss": 1.4645, "step": 174 }, { "epoch": 0.024775253061513413, "grad_norm": 12.596181478598856, "learning_rate": 2.063679245283019e-06, "loss": 1.218, "step": 175 }, { "epoch": 0.024916825936150633, "grad_norm": 48.47642085495019, "learning_rate": 2.075471698113208e-06, "loss": 1.474, "step": 176 }, { "epoch": 0.025058398810787853, "grad_norm": 12.921768723111931, "learning_rate": 2.087264150943396e-06, "loss": 1.3453, "step": 177 }, { "epoch": 0.025199971685425072, "grad_norm": 656.8065497161246, "learning_rate": 2.099056603773585e-06, "loss": 1.6087, "step": 178 }, { "epoch": 0.025341544560062292, "grad_norm": 81.37365590287806, "learning_rate": 2.110849056603774e-06, "loss": 1.8483, "step": 179 }, { "epoch": 0.025483117434699512, "grad_norm": 44.47773785883296, "learning_rate": 2.1226415094339624e-06, "loss": 1.531, "step": 180 }, { "epoch": 0.025624690309336732, "grad_norm": 51.18866375355973, "learning_rate": 2.134433962264151e-06, "loss": 1.3976, "step": 181 }, { "epoch": 0.02576626318397395, "grad_norm": 50.31035515614554, "learning_rate": 2.14622641509434e-06, "loss": 1.5783, "step": 182 }, { "epoch": 0.02590783605861117, "grad_norm": 34.76350590451771, "learning_rate": 2.1580188679245283e-06, "loss": 1.5042, "step": 183 }, { "epoch": 0.026049408933248388, "grad_norm": 30.91565477166766, "learning_rate": 2.1698113207547173e-06, "loss": 1.5888, "step": 184 }, { "epoch": 0.026190981807885608, "grad_norm": 35.967820090064535, "learning_rate": 2.181603773584906e-06, "loss": 1.377, "step": 185 }, { "epoch": 0.026332554682522828, "grad_norm": 26.842783651843703, "learning_rate": 2.1933962264150945e-06, "loss": 1.6296, "step": 186 }, { "epoch": 0.026474127557160047, "grad_norm": 19.853782989007644, "learning_rate": 2.205188679245283e-06, "loss": 1.5105, "step": 187 }, { "epoch": 0.026615700431797267, "grad_norm": 24.43692804237958, "learning_rate": 2.2169811320754718e-06, "loss": 1.4891, "step": 188 }, { "epoch": 0.026757273306434487, "grad_norm": 20.36722421002637, "learning_rate": 2.2287735849056604e-06, "loss": 1.554, "step": 189 }, { "epoch": 0.026898846181071707, "grad_norm": 24.52982305762276, "learning_rate": 2.2405660377358494e-06, "loss": 1.4262, "step": 190 }, { "epoch": 0.027040419055708927, "grad_norm": 19.72628248563639, "learning_rate": 2.252358490566038e-06, "loss": 1.5792, "step": 191 }, { "epoch": 0.027181991930346146, "grad_norm": 23.5257427361829, "learning_rate": 2.2641509433962266e-06, "loss": 1.5574, "step": 192 }, { "epoch": 0.027323564804983366, "grad_norm": 21.03270681247126, "learning_rate": 2.2759433962264153e-06, "loss": 1.6306, "step": 193 }, { "epoch": 0.027465137679620586, "grad_norm": 17.538686660783817, "learning_rate": 2.287735849056604e-06, "loss": 1.442, "step": 194 }, { "epoch": 0.027606710554257806, "grad_norm": 15.643172537752642, "learning_rate": 2.2995283018867925e-06, "loss": 1.5995, "step": 195 }, { "epoch": 0.027748283428895022, "grad_norm": 15.870021249604255, "learning_rate": 2.3113207547169815e-06, "loss": 1.3497, "step": 196 }, { "epoch": 0.027889856303532242, "grad_norm": 16.165620016189063, "learning_rate": 2.3231132075471697e-06, "loss": 1.2779, "step": 197 }, { "epoch": 0.028031429178169462, "grad_norm": 14.241154116891463, "learning_rate": 2.3349056603773588e-06, "loss": 1.4612, "step": 198 }, { "epoch": 0.02817300205280668, "grad_norm": 13.48230799174161, "learning_rate": 2.3466981132075474e-06, "loss": 1.4279, "step": 199 }, { "epoch": 0.0283145749274439, "grad_norm": 18.39935474093623, "learning_rate": 2.358490566037736e-06, "loss": 1.4753, "step": 200 }, { "epoch": 0.02845614780208112, "grad_norm": 16.43557832457627, "learning_rate": 2.3702830188679246e-06, "loss": 1.5159, "step": 201 }, { "epoch": 0.02859772067671834, "grad_norm": 12.088619435180876, "learning_rate": 2.3820754716981136e-06, "loss": 1.459, "step": 202 }, { "epoch": 0.02873929355135556, "grad_norm": 14.132605201630598, "learning_rate": 2.393867924528302e-06, "loss": 1.6052, "step": 203 }, { "epoch": 0.02888086642599278, "grad_norm": 15.22492345029632, "learning_rate": 2.405660377358491e-06, "loss": 1.3795, "step": 204 }, { "epoch": 0.02902243930063, "grad_norm": 16.753665352225912, "learning_rate": 2.4174528301886795e-06, "loss": 1.5709, "step": 205 }, { "epoch": 0.02916401217526722, "grad_norm": 17.074835820895814, "learning_rate": 2.429245283018868e-06, "loss": 1.621, "step": 206 }, { "epoch": 0.029305585049904437, "grad_norm": 13.666067963178683, "learning_rate": 2.4410377358490567e-06, "loss": 1.3863, "step": 207 }, { "epoch": 0.029447157924541657, "grad_norm": 13.415963733021277, "learning_rate": 2.4528301886792453e-06, "loss": 1.4193, "step": 208 }, { "epoch": 0.029588730799178876, "grad_norm": 10.970783968847135, "learning_rate": 2.464622641509434e-06, "loss": 1.3117, "step": 209 }, { "epoch": 0.029730303673816096, "grad_norm": 16.2458725602579, "learning_rate": 2.476415094339623e-06, "loss": 1.5011, "step": 210 }, { "epoch": 0.029871876548453316, "grad_norm": 14.902333510809065, "learning_rate": 2.4882075471698116e-06, "loss": 1.3908, "step": 211 }, { "epoch": 0.030013449423090536, "grad_norm": 13.034732490489892, "learning_rate": 2.5e-06, "loss": 1.3411, "step": 212 }, { "epoch": 0.030155022297727756, "grad_norm": 15.551096710094718, "learning_rate": 2.511792452830189e-06, "loss": 1.4325, "step": 213 }, { "epoch": 0.030296595172364976, "grad_norm": 13.376671802460512, "learning_rate": 2.523584905660378e-06, "loss": 1.3764, "step": 214 }, { "epoch": 0.030438168047002195, "grad_norm": 13.845114474940926, "learning_rate": 2.535377358490566e-06, "loss": 1.4753, "step": 215 }, { "epoch": 0.030579740921639415, "grad_norm": 15.889047730073527, "learning_rate": 2.547169811320755e-06, "loss": 1.5592, "step": 216 }, { "epoch": 0.030721313796276635, "grad_norm": 24.883472322491865, "learning_rate": 2.5589622641509437e-06, "loss": 1.4782, "step": 217 }, { "epoch": 0.03086288667091385, "grad_norm": 14.244440968698708, "learning_rate": 2.5707547169811327e-06, "loss": 1.4606, "step": 218 }, { "epoch": 0.03100445954555107, "grad_norm": 16.3549822107893, "learning_rate": 2.582547169811321e-06, "loss": 1.476, "step": 219 }, { "epoch": 0.03114603242018829, "grad_norm": 13.793412911462259, "learning_rate": 2.5943396226415095e-06, "loss": 1.4309, "step": 220 }, { "epoch": 0.031287605294825514, "grad_norm": 16.38341882372283, "learning_rate": 2.6061320754716986e-06, "loss": 1.5463, "step": 221 }, { "epoch": 0.03142917816946273, "grad_norm": 20.68726028349596, "learning_rate": 2.617924528301887e-06, "loss": 1.3248, "step": 222 }, { "epoch": 0.03157075104409995, "grad_norm": 17.332494764339593, "learning_rate": 2.6297169811320754e-06, "loss": 1.4447, "step": 223 }, { "epoch": 0.03171232391873717, "grad_norm": 15.04400994108359, "learning_rate": 2.6415094339622644e-06, "loss": 1.378, "step": 224 }, { "epoch": 0.03185389679337439, "grad_norm": 19.328153420553075, "learning_rate": 2.653301886792453e-06, "loss": 1.3475, "step": 225 }, { "epoch": 0.03199546966801161, "grad_norm": 13.357533110417448, "learning_rate": 2.665094339622642e-06, "loss": 1.3967, "step": 226 }, { "epoch": 0.032137042542648826, "grad_norm": 15.328500290066804, "learning_rate": 2.6768867924528303e-06, "loss": 1.4294, "step": 227 }, { "epoch": 0.03227861541728605, "grad_norm": 17.906320728208918, "learning_rate": 2.688679245283019e-06, "loss": 1.3476, "step": 228 }, { "epoch": 0.032420188291923266, "grad_norm": 15.040548830772744, "learning_rate": 2.700471698113208e-06, "loss": 1.4049, "step": 229 }, { "epoch": 0.03256176116656049, "grad_norm": 13.99416556718817, "learning_rate": 2.7122641509433965e-06, "loss": 1.4607, "step": 230 }, { "epoch": 0.032703334041197706, "grad_norm": 14.83817278036487, "learning_rate": 2.724056603773585e-06, "loss": 1.373, "step": 231 }, { "epoch": 0.03284490691583493, "grad_norm": 13.830395183530266, "learning_rate": 2.7358490566037738e-06, "loss": 1.3777, "step": 232 }, { "epoch": 0.032986479790472145, "grad_norm": 13.325371335922853, "learning_rate": 2.7476415094339624e-06, "loss": 1.2737, "step": 233 }, { "epoch": 0.03312805266510937, "grad_norm": 13.312170463387593, "learning_rate": 2.7594339622641514e-06, "loss": 1.3327, "step": 234 }, { "epoch": 0.033269625539746585, "grad_norm": 12.94071647724097, "learning_rate": 2.7712264150943396e-06, "loss": 1.2098, "step": 235 }, { "epoch": 0.0334111984143838, "grad_norm": 13.08302789481412, "learning_rate": 2.7830188679245286e-06, "loss": 1.3416, "step": 236 }, { "epoch": 0.033552771289021024, "grad_norm": 12.120285533399484, "learning_rate": 2.7948113207547173e-06, "loss": 1.2832, "step": 237 }, { "epoch": 0.03369434416365824, "grad_norm": 11.403081082703522, "learning_rate": 2.8066037735849063e-06, "loss": 1.3297, "step": 238 }, { "epoch": 0.033835917038295464, "grad_norm": 14.370637181861328, "learning_rate": 2.8183962264150945e-06, "loss": 1.3262, "step": 239 }, { "epoch": 0.03397748991293268, "grad_norm": 11.199915980977583, "learning_rate": 2.830188679245283e-06, "loss": 1.619, "step": 240 }, { "epoch": 0.034119062787569904, "grad_norm": 12.091003579261356, "learning_rate": 2.841981132075472e-06, "loss": 1.2579, "step": 241 }, { "epoch": 0.03426063566220712, "grad_norm": 14.295124551311192, "learning_rate": 2.8537735849056608e-06, "loss": 1.5216, "step": 242 }, { "epoch": 0.03440220853684434, "grad_norm": 13.00094366316586, "learning_rate": 2.865566037735849e-06, "loss": 1.519, "step": 243 }, { "epoch": 0.03454378141148156, "grad_norm": 11.33689450431436, "learning_rate": 2.877358490566038e-06, "loss": 1.4327, "step": 244 }, { "epoch": 0.03468535428611878, "grad_norm": 11.98875981653134, "learning_rate": 2.8891509433962266e-06, "loss": 1.2686, "step": 245 }, { "epoch": 0.034826927160756, "grad_norm": 12.274950425927772, "learning_rate": 2.9009433962264156e-06, "loss": 1.3846, "step": 246 }, { "epoch": 0.034968500035393216, "grad_norm": 12.938800248859351, "learning_rate": 2.912735849056604e-06, "loss": 1.2984, "step": 247 }, { "epoch": 0.03511007291003044, "grad_norm": 13.326715034777477, "learning_rate": 2.9245283018867924e-06, "loss": 1.2661, "step": 248 }, { "epoch": 0.035251645784667655, "grad_norm": 11.939831251424787, "learning_rate": 2.9363207547169815e-06, "loss": 1.4225, "step": 249 }, { "epoch": 0.03539321865930488, "grad_norm": 13.089879886551135, "learning_rate": 2.94811320754717e-06, "loss": 1.4894, "step": 250 }, { "epoch": 0.035534791533942095, "grad_norm": 14.474098485831131, "learning_rate": 2.9599056603773587e-06, "loss": 1.3853, "step": 251 }, { "epoch": 0.03567636440857932, "grad_norm": 11.92179884580035, "learning_rate": 2.9716981132075473e-06, "loss": 1.1871, "step": 252 }, { "epoch": 0.035817937283216535, "grad_norm": 14.081686130479678, "learning_rate": 2.983490566037736e-06, "loss": 1.5305, "step": 253 }, { "epoch": 0.03595951015785376, "grad_norm": 11.97425432850133, "learning_rate": 2.995283018867925e-06, "loss": 1.5336, "step": 254 }, { "epoch": 0.036101083032490974, "grad_norm": 12.494022618688069, "learning_rate": 3.007075471698113e-06, "loss": 1.3953, "step": 255 }, { "epoch": 0.0362426559071282, "grad_norm": 14.930315021479787, "learning_rate": 3.018867924528302e-06, "loss": 1.2638, "step": 256 }, { "epoch": 0.036384228781765414, "grad_norm": 12.862478148327831, "learning_rate": 3.030660377358491e-06, "loss": 1.3439, "step": 257 }, { "epoch": 0.03652580165640263, "grad_norm": 11.539152420096492, "learning_rate": 3.04245283018868e-06, "loss": 1.4055, "step": 258 }, { "epoch": 0.036667374531039854, "grad_norm": 11.803910619290674, "learning_rate": 3.054245283018868e-06, "loss": 1.5259, "step": 259 }, { "epoch": 0.03680894740567707, "grad_norm": 14.421626889624994, "learning_rate": 3.0660377358490567e-06, "loss": 1.2817, "step": 260 }, { "epoch": 0.03695052028031429, "grad_norm": 11.740396024142923, "learning_rate": 3.0778301886792457e-06, "loss": 1.2797, "step": 261 }, { "epoch": 0.03709209315495151, "grad_norm": 11.063184124326504, "learning_rate": 3.0896226415094343e-06, "loss": 1.2087, "step": 262 }, { "epoch": 0.03723366602958873, "grad_norm": 13.605330241722475, "learning_rate": 3.1014150943396225e-06, "loss": 1.3838, "step": 263 }, { "epoch": 0.03737523890422595, "grad_norm": 11.135770919964795, "learning_rate": 3.1132075471698115e-06, "loss": 1.1975, "step": 264 }, { "epoch": 0.03751681177886317, "grad_norm": 11.536879739176054, "learning_rate": 3.125e-06, "loss": 1.3687, "step": 265 }, { "epoch": 0.03765838465350039, "grad_norm": 12.519514796206037, "learning_rate": 3.136792452830189e-06, "loss": 1.3645, "step": 266 }, { "epoch": 0.03779995752813761, "grad_norm": 11.276205102501102, "learning_rate": 3.148584905660378e-06, "loss": 1.3653, "step": 267 }, { "epoch": 0.03794153040277483, "grad_norm": 11.077943873034453, "learning_rate": 3.160377358490566e-06, "loss": 1.3345, "step": 268 }, { "epoch": 0.038083103277412045, "grad_norm": 13.986098335678662, "learning_rate": 3.172169811320755e-06, "loss": 1.2719, "step": 269 }, { "epoch": 0.03822467615204927, "grad_norm": 14.053571384254742, "learning_rate": 3.1839622641509436e-06, "loss": 1.363, "step": 270 }, { "epoch": 0.038366249026686484, "grad_norm": 13.42423491331303, "learning_rate": 3.1957547169811327e-06, "loss": 1.3443, "step": 271 }, { "epoch": 0.03850782190132371, "grad_norm": 12.474812269613896, "learning_rate": 3.207547169811321e-06, "loss": 1.3672, "step": 272 }, { "epoch": 0.038649394775960924, "grad_norm": 13.285256748554016, "learning_rate": 3.2193396226415095e-06, "loss": 1.4421, "step": 273 }, { "epoch": 0.03879096765059815, "grad_norm": 17.26019339761426, "learning_rate": 3.2311320754716985e-06, "loss": 1.4036, "step": 274 }, { "epoch": 0.038932540525235364, "grad_norm": 13.945575102569498, "learning_rate": 3.242924528301887e-06, "loss": 1.4378, "step": 275 }, { "epoch": 0.03907411339987259, "grad_norm": 13.246069559105011, "learning_rate": 3.2547169811320758e-06, "loss": 1.3973, "step": 276 }, { "epoch": 0.0392156862745098, "grad_norm": 16.237221726168723, "learning_rate": 3.2665094339622644e-06, "loss": 1.5402, "step": 277 }, { "epoch": 0.03935725914914703, "grad_norm": 14.043449009520696, "learning_rate": 3.2783018867924534e-06, "loss": 1.239, "step": 278 }, { "epoch": 0.03949883202378424, "grad_norm": 13.636943656839119, "learning_rate": 3.290094339622642e-06, "loss": 1.5294, "step": 279 }, { "epoch": 0.03964040489842146, "grad_norm": 16.26166198119445, "learning_rate": 3.30188679245283e-06, "loss": 1.4481, "step": 280 }, { "epoch": 0.03978197777305868, "grad_norm": 15.846052555504064, "learning_rate": 3.3136792452830192e-06, "loss": 1.2376, "step": 281 }, { "epoch": 0.0399235506476959, "grad_norm": 16.491902531881934, "learning_rate": 3.325471698113208e-06, "loss": 1.4671, "step": 282 }, { "epoch": 0.04006512352233312, "grad_norm": 14.563171633184103, "learning_rate": 3.337264150943397e-06, "loss": 1.3251, "step": 283 }, { "epoch": 0.04020669639697034, "grad_norm": 17.283303661314836, "learning_rate": 3.349056603773585e-06, "loss": 1.3061, "step": 284 }, { "epoch": 0.04034826927160756, "grad_norm": 16.170815371384048, "learning_rate": 3.3608490566037737e-06, "loss": 1.2924, "step": 285 }, { "epoch": 0.04048984214624478, "grad_norm": 12.378758948121703, "learning_rate": 3.3726415094339627e-06, "loss": 1.4264, "step": 286 }, { "epoch": 0.040631415020882, "grad_norm": 14.428329131211093, "learning_rate": 3.3844339622641514e-06, "loss": 1.3145, "step": 287 }, { "epoch": 0.04077298789551922, "grad_norm": 15.011881382478398, "learning_rate": 3.3962264150943395e-06, "loss": 1.321, "step": 288 }, { "epoch": 0.04091456077015644, "grad_norm": 10.76157448009014, "learning_rate": 3.4080188679245286e-06, "loss": 1.4422, "step": 289 }, { "epoch": 0.04105613364479366, "grad_norm": 12.857352893869718, "learning_rate": 3.419811320754717e-06, "loss": 1.412, "step": 290 }, { "epoch": 0.041197706519430874, "grad_norm": 14.025678790252675, "learning_rate": 3.4316037735849062e-06, "loss": 1.3854, "step": 291 }, { "epoch": 0.0413392793940681, "grad_norm": 16.75807109357589, "learning_rate": 3.4433962264150944e-06, "loss": 1.5068, "step": 292 }, { "epoch": 0.041480852268705314, "grad_norm": 15.087898589196927, "learning_rate": 3.455188679245283e-06, "loss": 1.3329, "step": 293 }, { "epoch": 0.04162242514334254, "grad_norm": 9.963438334961975, "learning_rate": 3.466981132075472e-06, "loss": 1.3944, "step": 294 }, { "epoch": 0.04176399801797975, "grad_norm": 15.348769227429116, "learning_rate": 3.4787735849056607e-06, "loss": 1.2675, "step": 295 }, { "epoch": 0.041905570892616976, "grad_norm": 15.033953068739002, "learning_rate": 3.4905660377358493e-06, "loss": 1.3775, "step": 296 }, { "epoch": 0.04204714376725419, "grad_norm": 11.989043001825015, "learning_rate": 3.502358490566038e-06, "loss": 1.3579, "step": 297 }, { "epoch": 0.042188716641891416, "grad_norm": 15.109326020312286, "learning_rate": 3.514150943396227e-06, "loss": 1.4424, "step": 298 }, { "epoch": 0.04233028951652863, "grad_norm": 14.483194017051172, "learning_rate": 3.5259433962264156e-06, "loss": 1.3946, "step": 299 }, { "epoch": 0.042471862391165856, "grad_norm": 11.405688797195747, "learning_rate": 3.5377358490566038e-06, "loss": 1.465, "step": 300 }, { "epoch": 0.04261343526580307, "grad_norm": 14.991190548698352, "learning_rate": 3.549528301886793e-06, "loss": 1.3021, "step": 301 }, { "epoch": 0.04275500814044029, "grad_norm": 15.877665618815938, "learning_rate": 3.5613207547169814e-06, "loss": 1.266, "step": 302 }, { "epoch": 0.04289658101507751, "grad_norm": 13.927222598094533, "learning_rate": 3.5731132075471705e-06, "loss": 1.4001, "step": 303 }, { "epoch": 0.04303815388971473, "grad_norm": 13.072661175707253, "learning_rate": 3.5849056603773586e-06, "loss": 1.3691, "step": 304 }, { "epoch": 0.04317972676435195, "grad_norm": 13.881264861681599, "learning_rate": 3.5966981132075473e-06, "loss": 1.2131, "step": 305 }, { "epoch": 0.04332129963898917, "grad_norm": 12.850894027067605, "learning_rate": 3.6084905660377363e-06, "loss": 1.3927, "step": 306 }, { "epoch": 0.04346287251362639, "grad_norm": 15.330053614493401, "learning_rate": 3.620283018867925e-06, "loss": 1.201, "step": 307 }, { "epoch": 0.04360444538826361, "grad_norm": 15.395519922737467, "learning_rate": 3.632075471698113e-06, "loss": 1.3529, "step": 308 }, { "epoch": 0.04374601826290083, "grad_norm": 12.763616135350746, "learning_rate": 3.643867924528302e-06, "loss": 1.538, "step": 309 }, { "epoch": 0.04388759113753805, "grad_norm": 17.169238345405077, "learning_rate": 3.6556603773584908e-06, "loss": 1.4586, "step": 310 }, { "epoch": 0.04402916401217527, "grad_norm": 11.195678449401036, "learning_rate": 3.66745283018868e-06, "loss": 1.3422, "step": 311 }, { "epoch": 0.04417073688681249, "grad_norm": 13.802004041144091, "learning_rate": 3.679245283018868e-06, "loss": 1.505, "step": 312 }, { "epoch": 0.0443123097614497, "grad_norm": 13.014260906347182, "learning_rate": 3.6910377358490566e-06, "loss": 1.3153, "step": 313 }, { "epoch": 0.044453882636086926, "grad_norm": 13.686884618015709, "learning_rate": 3.7028301886792456e-06, "loss": 1.3495, "step": 314 }, { "epoch": 0.04459545551072414, "grad_norm": 12.511558986647795, "learning_rate": 3.7146226415094343e-06, "loss": 1.4985, "step": 315 }, { "epoch": 0.044737028385361366, "grad_norm": 11.8707704741685, "learning_rate": 3.726415094339623e-06, "loss": 1.2791, "step": 316 }, { "epoch": 0.04487860125999858, "grad_norm": 11.869620620590947, "learning_rate": 3.7382075471698115e-06, "loss": 1.3623, "step": 317 }, { "epoch": 0.045020174134635806, "grad_norm": 12.307936698981386, "learning_rate": 3.7500000000000005e-06, "loss": 1.2448, "step": 318 }, { "epoch": 0.04516174700927302, "grad_norm": 11.062891031886666, "learning_rate": 3.761792452830189e-06, "loss": 1.3428, "step": 319 }, { "epoch": 0.045303319883910245, "grad_norm": 10.920482715934467, "learning_rate": 3.7735849056603777e-06, "loss": 1.2818, "step": 320 }, { "epoch": 0.04544489275854746, "grad_norm": 13.986021965691485, "learning_rate": 3.7853773584905664e-06, "loss": 1.4893, "step": 321 }, { "epoch": 0.045586465633184685, "grad_norm": 12.46147138354732, "learning_rate": 3.797169811320755e-06, "loss": 1.1815, "step": 322 }, { "epoch": 0.0457280385078219, "grad_norm": 13.137897657479238, "learning_rate": 3.808962264150944e-06, "loss": 1.4134, "step": 323 }, { "epoch": 0.04586961138245912, "grad_norm": 12.96703737279796, "learning_rate": 3.820754716981133e-06, "loss": 1.3488, "step": 324 }, { "epoch": 0.04601118425709634, "grad_norm": 12.046975062408691, "learning_rate": 3.832547169811321e-06, "loss": 1.2617, "step": 325 }, { "epoch": 0.04615275713173356, "grad_norm": 12.331468870808488, "learning_rate": 3.8443396226415094e-06, "loss": 1.2339, "step": 326 }, { "epoch": 0.04629433000637078, "grad_norm": 12.743901608151335, "learning_rate": 3.856132075471699e-06, "loss": 1.1912, "step": 327 }, { "epoch": 0.046435902881008, "grad_norm": 14.606010955434527, "learning_rate": 3.8679245283018875e-06, "loss": 1.5428, "step": 328 }, { "epoch": 0.04657747575564522, "grad_norm": 11.13449274600947, "learning_rate": 3.879716981132075e-06, "loss": 1.2862, "step": 329 }, { "epoch": 0.046719048630282436, "grad_norm": 16.590772228697833, "learning_rate": 3.891509433962265e-06, "loss": 1.4969, "step": 330 }, { "epoch": 0.04686062150491966, "grad_norm": 12.485899028926466, "learning_rate": 3.903301886792453e-06, "loss": 1.4518, "step": 331 }, { "epoch": 0.047002194379556876, "grad_norm": 10.22019495649327, "learning_rate": 3.915094339622642e-06, "loss": 1.3273, "step": 332 }, { "epoch": 0.0471437672541941, "grad_norm": 13.69524286473312, "learning_rate": 3.926886792452831e-06, "loss": 1.4633, "step": 333 }, { "epoch": 0.047285340128831316, "grad_norm": 11.077958680102865, "learning_rate": 3.938679245283019e-06, "loss": 1.2038, "step": 334 }, { "epoch": 0.04742691300346853, "grad_norm": 15.559659767647908, "learning_rate": 3.950471698113208e-06, "loss": 1.4272, "step": 335 }, { "epoch": 0.047568485878105755, "grad_norm": 11.20396192502379, "learning_rate": 3.962264150943396e-06, "loss": 1.4383, "step": 336 }, { "epoch": 0.04771005875274297, "grad_norm": 12.233991478064368, "learning_rate": 3.974056603773585e-06, "loss": 1.3003, "step": 337 }, { "epoch": 0.047851631627380195, "grad_norm": 13.486972881265833, "learning_rate": 3.985849056603774e-06, "loss": 1.2461, "step": 338 }, { "epoch": 0.04799320450201741, "grad_norm": 15.592697077428186, "learning_rate": 3.997641509433962e-06, "loss": 1.387, "step": 339 }, { "epoch": 0.048134777376654635, "grad_norm": 10.386897974666585, "learning_rate": 4.009433962264152e-06, "loss": 1.4218, "step": 340 }, { "epoch": 0.04827635025129185, "grad_norm": 12.408557744470208, "learning_rate": 4.0212264150943395e-06, "loss": 1.4185, "step": 341 }, { "epoch": 0.048417923125929074, "grad_norm": 14.726424692657604, "learning_rate": 4.033018867924529e-06, "loss": 1.3281, "step": 342 }, { "epoch": 0.04855949600056629, "grad_norm": 10.854841468809587, "learning_rate": 4.0448113207547176e-06, "loss": 1.3354, "step": 343 }, { "epoch": 0.048701068875203514, "grad_norm": 13.429052350090991, "learning_rate": 4.056603773584906e-06, "loss": 1.4187, "step": 344 }, { "epoch": 0.04884264174984073, "grad_norm": 12.680547303142186, "learning_rate": 4.068396226415095e-06, "loss": 1.3159, "step": 345 }, { "epoch": 0.04898421462447795, "grad_norm": 12.83528723436573, "learning_rate": 4.080188679245283e-06, "loss": 1.4475, "step": 346 }, { "epoch": 0.04912578749911517, "grad_norm": 15.881338220896986, "learning_rate": 4.091981132075472e-06, "loss": 1.37, "step": 347 }, { "epoch": 0.049267360373752386, "grad_norm": 12.300567886764988, "learning_rate": 4.103773584905661e-06, "loss": 1.3186, "step": 348 }, { "epoch": 0.04940893324838961, "grad_norm": 13.10569578789928, "learning_rate": 4.115566037735849e-06, "loss": 1.2651, "step": 349 }, { "epoch": 0.049550506123026826, "grad_norm": 16.155756988972033, "learning_rate": 4.127358490566038e-06, "loss": 1.3081, "step": 350 }, { "epoch": 0.04969207899766405, "grad_norm": 11.937044943879012, "learning_rate": 4.1391509433962265e-06, "loss": 1.2452, "step": 351 }, { "epoch": 0.049833651872301266, "grad_norm": 15.181139457868696, "learning_rate": 4.150943396226416e-06, "loss": 1.6196, "step": 352 }, { "epoch": 0.04997522474693849, "grad_norm": 12.337563347774616, "learning_rate": 4.162735849056604e-06, "loss": 1.3569, "step": 353 }, { "epoch": 0.050116797621575705, "grad_norm": 11.854489894721851, "learning_rate": 4.174528301886792e-06, "loss": 1.3269, "step": 354 }, { "epoch": 0.05025837049621293, "grad_norm": 10.78985270176028, "learning_rate": 4.186320754716982e-06, "loss": 1.2117, "step": 355 }, { "epoch": 0.050399943370850145, "grad_norm": 11.588248621060718, "learning_rate": 4.19811320754717e-06, "loss": 1.5377, "step": 356 }, { "epoch": 0.05054151624548736, "grad_norm": 13.922846596462694, "learning_rate": 4.209905660377359e-06, "loss": 1.4247, "step": 357 }, { "epoch": 0.050683089120124585, "grad_norm": 15.398907194130286, "learning_rate": 4.221698113207548e-06, "loss": 1.3843, "step": 358 }, { "epoch": 0.0508246619947618, "grad_norm": 10.892665502215886, "learning_rate": 4.233490566037736e-06, "loss": 1.3387, "step": 359 }, { "epoch": 0.050966234869399024, "grad_norm": 13.163482924926964, "learning_rate": 4.245283018867925e-06, "loss": 1.4666, "step": 360 }, { "epoch": 0.05110780774403624, "grad_norm": 13.612877168500695, "learning_rate": 4.2570754716981135e-06, "loss": 1.3287, "step": 361 }, { "epoch": 0.051249380618673464, "grad_norm": 14.277865286302047, "learning_rate": 4.268867924528302e-06, "loss": 1.4391, "step": 362 }, { "epoch": 0.05139095349331068, "grad_norm": 10.61564313984136, "learning_rate": 4.280660377358491e-06, "loss": 1.3418, "step": 363 }, { "epoch": 0.0515325263679479, "grad_norm": 15.893354243407693, "learning_rate": 4.29245283018868e-06, "loss": 1.342, "step": 364 }, { "epoch": 0.05167409924258512, "grad_norm": 9.934015394855917, "learning_rate": 4.304245283018868e-06, "loss": 1.2354, "step": 365 }, { "epoch": 0.05181567211722234, "grad_norm": 14.0352116962445, "learning_rate": 4.3160377358490565e-06, "loss": 1.5302, "step": 366 }, { "epoch": 0.05195724499185956, "grad_norm": 13.241681060559127, "learning_rate": 4.327830188679246e-06, "loss": 1.5103, "step": 367 }, { "epoch": 0.052098817866496776, "grad_norm": 13.45389964922752, "learning_rate": 4.339622641509435e-06, "loss": 1.2574, "step": 368 }, { "epoch": 0.052240390741134, "grad_norm": 11.515073103082582, "learning_rate": 4.351415094339622e-06, "loss": 1.3481, "step": 369 }, { "epoch": 0.052381963615771215, "grad_norm": 11.709530227320448, "learning_rate": 4.363207547169812e-06, "loss": 1.4447, "step": 370 }, { "epoch": 0.05252353649040844, "grad_norm": 12.553601505807071, "learning_rate": 4.3750000000000005e-06, "loss": 1.2257, "step": 371 }, { "epoch": 0.052665109365045655, "grad_norm": 13.943497671650967, "learning_rate": 4.386792452830189e-06, "loss": 1.3147, "step": 372 }, { "epoch": 0.05280668223968288, "grad_norm": 20.992122080750317, "learning_rate": 4.398584905660378e-06, "loss": 1.4319, "step": 373 }, { "epoch": 0.052948255114320095, "grad_norm": 10.77638873645614, "learning_rate": 4.410377358490566e-06, "loss": 1.3874, "step": 374 }, { "epoch": 0.05308982798895732, "grad_norm": 12.952017043047714, "learning_rate": 4.422169811320755e-06, "loss": 1.0522, "step": 375 }, { "epoch": 0.053231400863594534, "grad_norm": 15.886989601514593, "learning_rate": 4.4339622641509435e-06, "loss": 1.2156, "step": 376 }, { "epoch": 0.05337297373823176, "grad_norm": 10.187253984160451, "learning_rate": 4.445754716981133e-06, "loss": 1.2963, "step": 377 }, { "epoch": 0.053514546612868974, "grad_norm": 15.625767071015016, "learning_rate": 4.457547169811321e-06, "loss": 1.3131, "step": 378 }, { "epoch": 0.0536561194875062, "grad_norm": 12.548768999131138, "learning_rate": 4.469339622641509e-06, "loss": 1.4243, "step": 379 }, { "epoch": 0.053797692362143414, "grad_norm": 10.499811806581716, "learning_rate": 4.481132075471699e-06, "loss": 1.2221, "step": 380 }, { "epoch": 0.05393926523678063, "grad_norm": 11.322330144988237, "learning_rate": 4.4929245283018875e-06, "loss": 1.188, "step": 381 }, { "epoch": 0.05408083811141785, "grad_norm": 14.286758929613484, "learning_rate": 4.504716981132076e-06, "loss": 1.391, "step": 382 }, { "epoch": 0.05422241098605507, "grad_norm": 10.084625371538467, "learning_rate": 4.516509433962265e-06, "loss": 1.3035, "step": 383 }, { "epoch": 0.05436398386069229, "grad_norm": 15.323337503338198, "learning_rate": 4.528301886792453e-06, "loss": 1.373, "step": 384 }, { "epoch": 0.05450555673532951, "grad_norm": 13.563097400077329, "learning_rate": 4.540094339622642e-06, "loss": 1.5422, "step": 385 }, { "epoch": 0.05464712960996673, "grad_norm": 12.770889821428055, "learning_rate": 4.5518867924528305e-06, "loss": 1.4298, "step": 386 }, { "epoch": 0.05478870248460395, "grad_norm": 11.02346414607428, "learning_rate": 4.563679245283019e-06, "loss": 1.4119, "step": 387 }, { "epoch": 0.05493027535924117, "grad_norm": 11.648491862507285, "learning_rate": 4.575471698113208e-06, "loss": 1.2964, "step": 388 }, { "epoch": 0.05507184823387839, "grad_norm": 12.31247707306717, "learning_rate": 4.587264150943397e-06, "loss": 1.3641, "step": 389 }, { "epoch": 0.05521342110851561, "grad_norm": 9.872778534001728, "learning_rate": 4.599056603773585e-06, "loss": 1.2028, "step": 390 }, { "epoch": 0.05535499398315283, "grad_norm": 10.433223007967594, "learning_rate": 4.610849056603774e-06, "loss": 1.2448, "step": 391 }, { "epoch": 0.055496566857790045, "grad_norm": 10.951118405155269, "learning_rate": 4.622641509433963e-06, "loss": 1.4041, "step": 392 }, { "epoch": 0.05563813973242727, "grad_norm": 12.131939033070864, "learning_rate": 4.634433962264152e-06, "loss": 1.2779, "step": 393 }, { "epoch": 0.055779712607064484, "grad_norm": 12.96912473573806, "learning_rate": 4.6462264150943394e-06, "loss": 1.2562, "step": 394 }, { "epoch": 0.05592128548170171, "grad_norm": 14.258114294751165, "learning_rate": 4.658018867924529e-06, "loss": 1.4827, "step": 395 }, { "epoch": 0.056062858356338924, "grad_norm": 11.8370274611581, "learning_rate": 4.6698113207547175e-06, "loss": 1.2211, "step": 396 }, { "epoch": 0.05620443123097615, "grad_norm": 12.58237142023079, "learning_rate": 4.681603773584906e-06, "loss": 1.2796, "step": 397 }, { "epoch": 0.05634600410561336, "grad_norm": 12.766545350085169, "learning_rate": 4.693396226415095e-06, "loss": 1.4063, "step": 398 }, { "epoch": 0.05648757698025059, "grad_norm": 15.055676699468087, "learning_rate": 4.705188679245283e-06, "loss": 1.3137, "step": 399 }, { "epoch": 0.0566291498548878, "grad_norm": 11.828790643423254, "learning_rate": 4.716981132075472e-06, "loss": 1.1981, "step": 400 }, { "epoch": 0.056770722729525026, "grad_norm": 12.031942451664081, "learning_rate": 4.728773584905661e-06, "loss": 1.4947, "step": 401 }, { "epoch": 0.05691229560416224, "grad_norm": 11.582765048423981, "learning_rate": 4.740566037735849e-06, "loss": 1.255, "step": 402 }, { "epoch": 0.05705386847879946, "grad_norm": 12.56696600789366, "learning_rate": 4.752358490566038e-06, "loss": 1.4081, "step": 403 }, { "epoch": 0.05719544135343668, "grad_norm": 13.17012680235974, "learning_rate": 4.764150943396227e-06, "loss": 1.3052, "step": 404 }, { "epoch": 0.0573370142280739, "grad_norm": 12.089309231923805, "learning_rate": 4.775943396226416e-06, "loss": 1.4884, "step": 405 }, { "epoch": 0.05747858710271112, "grad_norm": 12.068369964036867, "learning_rate": 4.787735849056604e-06, "loss": 1.3884, "step": 406 }, { "epoch": 0.05762015997734834, "grad_norm": 14.323231767367263, "learning_rate": 4.799528301886793e-06, "loss": 1.3232, "step": 407 }, { "epoch": 0.05776173285198556, "grad_norm": 11.326135331733166, "learning_rate": 4.811320754716982e-06, "loss": 1.2089, "step": 408 }, { "epoch": 0.05790330572662278, "grad_norm": 14.827417001011167, "learning_rate": 4.82311320754717e-06, "loss": 1.2901, "step": 409 }, { "epoch": 0.05804487860126, "grad_norm": 10.518661264735455, "learning_rate": 4.834905660377359e-06, "loss": 1.3523, "step": 410 }, { "epoch": 0.05818645147589722, "grad_norm": 13.72157706847481, "learning_rate": 4.8466981132075476e-06, "loss": 1.4333, "step": 411 }, { "epoch": 0.05832802435053444, "grad_norm": 13.40178700388351, "learning_rate": 4.858490566037736e-06, "loss": 1.4465, "step": 412 }, { "epoch": 0.05846959722517166, "grad_norm": 13.296925491972289, "learning_rate": 4.870283018867925e-06, "loss": 1.3178, "step": 413 }, { "epoch": 0.058611170099808874, "grad_norm": 14.390002043553592, "learning_rate": 4.882075471698113e-06, "loss": 1.3941, "step": 414 }, { "epoch": 0.0587527429744461, "grad_norm": 12.630127120658145, "learning_rate": 4.893867924528302e-06, "loss": 1.378, "step": 415 }, { "epoch": 0.05889431584908331, "grad_norm": 12.623947855098349, "learning_rate": 4.905660377358491e-06, "loss": 1.2889, "step": 416 }, { "epoch": 0.05903588872372054, "grad_norm": 10.289471061316986, "learning_rate": 4.91745283018868e-06, "loss": 1.3342, "step": 417 }, { "epoch": 0.05917746159835775, "grad_norm": 15.88286774077334, "learning_rate": 4.929245283018868e-06, "loss": 1.1747, "step": 418 }, { "epoch": 0.059319034472994976, "grad_norm": 14.574491483477326, "learning_rate": 4.9410377358490565e-06, "loss": 1.4469, "step": 419 }, { "epoch": 0.05946060734763219, "grad_norm": 12.34680539717157, "learning_rate": 4.952830188679246e-06, "loss": 1.3322, "step": 420 }, { "epoch": 0.059602180222269416, "grad_norm": 14.597756074886133, "learning_rate": 4.9646226415094346e-06, "loss": 1.4214, "step": 421 }, { "epoch": 0.05974375309690663, "grad_norm": 12.29061881733731, "learning_rate": 4.976415094339623e-06, "loss": 1.3526, "step": 422 }, { "epoch": 0.059885325971543855, "grad_norm": 14.497702054745229, "learning_rate": 4.988207547169812e-06, "loss": 1.3256, "step": 423 }, { "epoch": 0.06002689884618107, "grad_norm": 11.683256143776603, "learning_rate": 5e-06, "loss": 1.1031, "step": 424 }, { "epoch": 0.06016847172081829, "grad_norm": 11.020298003167305, "learning_rate": 4.999999934288433e-06, "loss": 1.2661, "step": 425 }, { "epoch": 0.06031004459545551, "grad_norm": 8.876806370830382, "learning_rate": 4.999999737153732e-06, "loss": 1.3019, "step": 426 }, { "epoch": 0.06045161747009273, "grad_norm": 13.256694901701437, "learning_rate": 4.999999408595909e-06, "loss": 1.2848, "step": 427 }, { "epoch": 0.06059319034472995, "grad_norm": 11.812442517014567, "learning_rate": 4.999998948614983e-06, "loss": 1.2761, "step": 428 }, { "epoch": 0.06073476321936717, "grad_norm": 13.074561869937375, "learning_rate": 4.999998357210974e-06, "loss": 1.6092, "step": 429 }, { "epoch": 0.06087633609400439, "grad_norm": 10.316764181125073, "learning_rate": 4.999997634383916e-06, "loss": 1.1995, "step": 430 }, { "epoch": 0.06101790896864161, "grad_norm": 9.8709557859126, "learning_rate": 4.9999967801338475e-06, "loss": 1.2532, "step": 431 }, { "epoch": 0.06115948184327883, "grad_norm": 11.748892873996615, "learning_rate": 4.9999957944608115e-06, "loss": 1.3003, "step": 432 }, { "epoch": 0.06130105471791605, "grad_norm": 15.487926580113188, "learning_rate": 4.999994677364861e-06, "loss": 1.4142, "step": 433 }, { "epoch": 0.06144262759255327, "grad_norm": 12.992639658773516, "learning_rate": 4.999993428846054e-06, "loss": 1.2875, "step": 434 }, { "epoch": 0.061584200467190486, "grad_norm": 12.05484066048197, "learning_rate": 4.999992048904457e-06, "loss": 1.2039, "step": 435 }, { "epoch": 0.0617257733418277, "grad_norm": 10.520718796109723, "learning_rate": 4.999990537540142e-06, "loss": 1.2555, "step": 436 }, { "epoch": 0.061867346216464926, "grad_norm": 11.412212830322186, "learning_rate": 4.999988894753189e-06, "loss": 1.3087, "step": 437 }, { "epoch": 0.06200891909110214, "grad_norm": 12.69500961177142, "learning_rate": 4.999987120543682e-06, "loss": 1.3933, "step": 438 }, { "epoch": 0.062150491965739366, "grad_norm": 11.391043614778633, "learning_rate": 4.999985214911718e-06, "loss": 1.4295, "step": 439 }, { "epoch": 0.06229206484037658, "grad_norm": 10.495851332914196, "learning_rate": 4.9999831778573945e-06, "loss": 1.16, "step": 440 }, { "epoch": 0.062433637715013805, "grad_norm": 13.01366352079313, "learning_rate": 4.99998100938082e-06, "loss": 1.4517, "step": 441 }, { "epoch": 0.06257521058965103, "grad_norm": 12.606027378580901, "learning_rate": 4.999978709482108e-06, "loss": 1.4081, "step": 442 }, { "epoch": 0.06271678346428824, "grad_norm": 10.18896136889602, "learning_rate": 4.999976278161378e-06, "loss": 1.4185, "step": 443 }, { "epoch": 0.06285835633892546, "grad_norm": 12.06093701924633, "learning_rate": 4.9999737154187596e-06, "loss": 1.3328, "step": 444 }, { "epoch": 0.06299992921356268, "grad_norm": 12.282064396304106, "learning_rate": 4.999971021254387e-06, "loss": 1.2864, "step": 445 }, { "epoch": 0.0631415020881999, "grad_norm": 12.926349054467378, "learning_rate": 4.9999681956684025e-06, "loss": 1.3148, "step": 446 }, { "epoch": 0.06328307496283712, "grad_norm": 10.870524921674846, "learning_rate": 4.999965238660954e-06, "loss": 1.382, "step": 447 }, { "epoch": 0.06342464783747434, "grad_norm": 11.464150319745068, "learning_rate": 4.999962150232197e-06, "loss": 1.4535, "step": 448 }, { "epoch": 0.06356622071211156, "grad_norm": 13.892821817286617, "learning_rate": 4.999958930382293e-06, "loss": 1.4222, "step": 449 }, { "epoch": 0.06370779358674877, "grad_norm": 11.724898777506501, "learning_rate": 4.999955579111413e-06, "loss": 1.4271, "step": 450 }, { "epoch": 0.063849366461386, "grad_norm": 14.243670212177802, "learning_rate": 4.999952096419731e-06, "loss": 1.3339, "step": 451 }, { "epoch": 0.06399093933602322, "grad_norm": 14.258348662864044, "learning_rate": 4.999948482307433e-06, "loss": 1.3313, "step": 452 }, { "epoch": 0.06413251221066044, "grad_norm": 14.704845558867875, "learning_rate": 4.999944736774706e-06, "loss": 1.363, "step": 453 }, { "epoch": 0.06427408508529765, "grad_norm": 13.64748773616555, "learning_rate": 4.999940859821749e-06, "loss": 1.1092, "step": 454 }, { "epoch": 0.06441565795993488, "grad_norm": 13.167513341279493, "learning_rate": 4.999936851448764e-06, "loss": 1.2688, "step": 455 }, { "epoch": 0.0645572308345721, "grad_norm": 17.134755718002666, "learning_rate": 4.9999327116559634e-06, "loss": 1.3592, "step": 456 }, { "epoch": 0.06469880370920932, "grad_norm": 13.792619163298502, "learning_rate": 4.999928440443565e-06, "loss": 1.2916, "step": 457 }, { "epoch": 0.06484037658384653, "grad_norm": 10.757457187078922, "learning_rate": 4.999924037811792e-06, "loss": 1.414, "step": 458 }, { "epoch": 0.06498194945848375, "grad_norm": 13.370948249098795, "learning_rate": 4.9999195037608765e-06, "loss": 1.1395, "step": 459 }, { "epoch": 0.06512352233312098, "grad_norm": 15.55794106009201, "learning_rate": 4.999914838291056e-06, "loss": 1.3602, "step": 460 }, { "epoch": 0.0652650952077582, "grad_norm": 14.155093922682426, "learning_rate": 4.999910041402577e-06, "loss": 1.3612, "step": 461 }, { "epoch": 0.06540666808239541, "grad_norm": 13.46514065345709, "learning_rate": 4.999905113095691e-06, "loss": 1.4451, "step": 462 }, { "epoch": 0.06554824095703263, "grad_norm": 15.90038618662418, "learning_rate": 4.999900053370657e-06, "loss": 1.3172, "step": 463 }, { "epoch": 0.06568981383166986, "grad_norm": 16.597435148709724, "learning_rate": 4.999894862227741e-06, "loss": 1.3949, "step": 464 }, { "epoch": 0.06583138670630707, "grad_norm": 12.76833625679562, "learning_rate": 4.999889539667217e-06, "loss": 1.3699, "step": 465 }, { "epoch": 0.06597295958094429, "grad_norm": 17.967562800454637, "learning_rate": 4.999884085689363e-06, "loss": 1.5186, "step": 466 }, { "epoch": 0.0661145324555815, "grad_norm": 14.858541663836885, "learning_rate": 4.9998785002944665e-06, "loss": 1.464, "step": 467 }, { "epoch": 0.06625610533021874, "grad_norm": 15.605281503100661, "learning_rate": 4.999872783482822e-06, "loss": 1.6588, "step": 468 }, { "epoch": 0.06639767820485595, "grad_norm": 19.68071327095427, "learning_rate": 4.999866935254729e-06, "loss": 1.3792, "step": 469 }, { "epoch": 0.06653925107949317, "grad_norm": 14.3375430172156, "learning_rate": 4.999860955610495e-06, "loss": 1.4806, "step": 470 }, { "epoch": 0.06668082395413039, "grad_norm": 12.299551259360669, "learning_rate": 4.9998548445504345e-06, "loss": 1.4422, "step": 471 }, { "epoch": 0.0668223968287676, "grad_norm": 16.59114011078748, "learning_rate": 4.999848602074869e-06, "loss": 1.4648, "step": 472 }, { "epoch": 0.06696396970340483, "grad_norm": 11.972554929220543, "learning_rate": 4.999842228184127e-06, "loss": 1.4041, "step": 473 }, { "epoch": 0.06710554257804205, "grad_norm": 12.222928398616075, "learning_rate": 4.999835722878542e-06, "loss": 1.3165, "step": 474 }, { "epoch": 0.06724711545267927, "grad_norm": 11.346664516689337, "learning_rate": 4.999829086158458e-06, "loss": 1.5785, "step": 475 }, { "epoch": 0.06738868832731648, "grad_norm": 14.892288035084531, "learning_rate": 4.999822318024222e-06, "loss": 1.4094, "step": 476 }, { "epoch": 0.06753026120195371, "grad_norm": 11.534868535820939, "learning_rate": 4.999815418476191e-06, "loss": 1.3199, "step": 477 }, { "epoch": 0.06767183407659093, "grad_norm": 12.680342736651298, "learning_rate": 4.9998083875147275e-06, "loss": 1.5629, "step": 478 }, { "epoch": 0.06781340695122814, "grad_norm": 12.239221307256757, "learning_rate": 4.9998012251402005e-06, "loss": 1.3286, "step": 479 }, { "epoch": 0.06795497982586536, "grad_norm": 11.666721089554054, "learning_rate": 4.9997939313529875e-06, "loss": 1.3202, "step": 480 }, { "epoch": 0.06809655270050258, "grad_norm": 10.904015079815318, "learning_rate": 4.999786506153471e-06, "loss": 1.3799, "step": 481 }, { "epoch": 0.06823812557513981, "grad_norm": 13.99782109334621, "learning_rate": 4.999778949542042e-06, "loss": 1.1482, "step": 482 }, { "epoch": 0.06837969844977702, "grad_norm": 11.17853651646325, "learning_rate": 4.999771261519099e-06, "loss": 1.4144, "step": 483 }, { "epoch": 0.06852127132441424, "grad_norm": 12.416435768660248, "learning_rate": 4.999763442085043e-06, "loss": 1.2661, "step": 484 }, { "epoch": 0.06866284419905146, "grad_norm": 11.424040332113924, "learning_rate": 4.999755491240287e-06, "loss": 1.2752, "step": 485 }, { "epoch": 0.06880441707368869, "grad_norm": 10.757989797467827, "learning_rate": 4.999747408985249e-06, "loss": 1.3763, "step": 486 }, { "epoch": 0.0689459899483259, "grad_norm": 10.293018782492252, "learning_rate": 4.9997391953203535e-06, "loss": 1.3052, "step": 487 }, { "epoch": 0.06908756282296312, "grad_norm": 10.105414014033247, "learning_rate": 4.999730850246032e-06, "loss": 1.3876, "step": 488 }, { "epoch": 0.06922913569760034, "grad_norm": 14.862855714177837, "learning_rate": 4.999722373762725e-06, "loss": 1.3205, "step": 489 }, { "epoch": 0.06937070857223757, "grad_norm": 10.416026062658844, "learning_rate": 4.999713765870875e-06, "loss": 1.335, "step": 490 }, { "epoch": 0.06951228144687478, "grad_norm": 10.312339827709712, "learning_rate": 4.999705026570937e-06, "loss": 1.2676, "step": 491 }, { "epoch": 0.069653854321512, "grad_norm": 12.91107693388795, "learning_rate": 4.999696155863369e-06, "loss": 1.3693, "step": 492 }, { "epoch": 0.06979542719614922, "grad_norm": 13.575118562802338, "learning_rate": 4.999687153748638e-06, "loss": 1.3042, "step": 493 }, { "epoch": 0.06993700007078643, "grad_norm": 12.127830568242294, "learning_rate": 4.9996780202272175e-06, "loss": 1.2554, "step": 494 }, { "epoch": 0.07007857294542366, "grad_norm": 12.894252567232353, "learning_rate": 4.999668755299588e-06, "loss": 1.3077, "step": 495 }, { "epoch": 0.07022014582006088, "grad_norm": 13.360133955963505, "learning_rate": 4.999659358966235e-06, "loss": 1.203, "step": 496 }, { "epoch": 0.0703617186946981, "grad_norm": 15.130566289960294, "learning_rate": 4.999649831227654e-06, "loss": 1.415, "step": 497 }, { "epoch": 0.07050329156933531, "grad_norm": 10.699239785827258, "learning_rate": 4.999640172084345e-06, "loss": 1.2839, "step": 498 }, { "epoch": 0.07064486444397254, "grad_norm": 9.37531249479184, "learning_rate": 4.999630381536815e-06, "loss": 1.1048, "step": 499 }, { "epoch": 0.07078643731860976, "grad_norm": 11.656265054237933, "learning_rate": 4.99962045958558e-06, "loss": 1.3127, "step": 500 }, { "epoch": 0.07092801019324697, "grad_norm": 10.96934677196029, "learning_rate": 4.999610406231162e-06, "loss": 1.3822, "step": 501 }, { "epoch": 0.07106958306788419, "grad_norm": 11.318106426476767, "learning_rate": 4.999600221474089e-06, "loss": 1.4181, "step": 502 }, { "epoch": 0.0712111559425214, "grad_norm": 12.707805151464123, "learning_rate": 4.999589905314895e-06, "loss": 1.2218, "step": 503 }, { "epoch": 0.07135272881715864, "grad_norm": 14.408993010958252, "learning_rate": 4.9995794577541235e-06, "loss": 1.4879, "step": 504 }, { "epoch": 0.07149430169179585, "grad_norm": 12.760325719807701, "learning_rate": 4.999568878792324e-06, "loss": 1.2773, "step": 505 }, { "epoch": 0.07163587456643307, "grad_norm": 11.698394430475345, "learning_rate": 4.999558168430053e-06, "loss": 1.2514, "step": 506 }, { "epoch": 0.07177744744107029, "grad_norm": 14.780289923233703, "learning_rate": 4.999547326667872e-06, "loss": 1.3436, "step": 507 }, { "epoch": 0.07191902031570752, "grad_norm": 10.828447345377873, "learning_rate": 4.999536353506352e-06, "loss": 1.2795, "step": 508 }, { "epoch": 0.07206059319034473, "grad_norm": 11.300483855294406, "learning_rate": 4.99952524894607e-06, "loss": 1.3318, "step": 509 }, { "epoch": 0.07220216606498195, "grad_norm": 14.000091552435023, "learning_rate": 4.999514012987609e-06, "loss": 1.4039, "step": 510 }, { "epoch": 0.07234373893961916, "grad_norm": 10.874070401854688, "learning_rate": 4.99950264563156e-06, "loss": 1.2127, "step": 511 }, { "epoch": 0.0724853118142564, "grad_norm": 13.301793624284166, "learning_rate": 4.99949114687852e-06, "loss": 1.3644, "step": 512 }, { "epoch": 0.07262688468889361, "grad_norm": 13.329451822512127, "learning_rate": 4.9994795167290954e-06, "loss": 1.3856, "step": 513 }, { "epoch": 0.07276845756353083, "grad_norm": 14.570031310121804, "learning_rate": 4.999467755183895e-06, "loss": 1.317, "step": 514 }, { "epoch": 0.07291003043816804, "grad_norm": 10.8871294100875, "learning_rate": 4.999455862243539e-06, "loss": 1.2137, "step": 515 }, { "epoch": 0.07305160331280526, "grad_norm": 15.26225563747229, "learning_rate": 4.999443837908653e-06, "loss": 1.4751, "step": 516 }, { "epoch": 0.07319317618744249, "grad_norm": 12.034062678529693, "learning_rate": 4.999431682179867e-06, "loss": 1.1623, "step": 517 }, { "epoch": 0.07333474906207971, "grad_norm": 10.650407638270384, "learning_rate": 4.999419395057821e-06, "loss": 1.4239, "step": 518 }, { "epoch": 0.07347632193671692, "grad_norm": 12.43419507460456, "learning_rate": 4.999406976543162e-06, "loss": 1.4516, "step": 519 }, { "epoch": 0.07361789481135414, "grad_norm": 10.547338314476288, "learning_rate": 4.999394426636541e-06, "loss": 1.1837, "step": 520 }, { "epoch": 0.07375946768599137, "grad_norm": 9.487410786929246, "learning_rate": 4.9993817453386185e-06, "loss": 1.2666, "step": 521 }, { "epoch": 0.07390104056062859, "grad_norm": 11.404823862042708, "learning_rate": 4.999368932650062e-06, "loss": 1.1123, "step": 522 }, { "epoch": 0.0740426134352658, "grad_norm": 11.524024084646632, "learning_rate": 4.999355988571544e-06, "loss": 1.3334, "step": 523 }, { "epoch": 0.07418418630990302, "grad_norm": 10.873560316880749, "learning_rate": 4.999342913103745e-06, "loss": 1.4456, "step": 524 }, { "epoch": 0.07432575918454024, "grad_norm": 10.583913191481884, "learning_rate": 4.999329706247353e-06, "loss": 1.3349, "step": 525 }, { "epoch": 0.07446733205917747, "grad_norm": 10.986333506943126, "learning_rate": 4.999316368003062e-06, "loss": 1.4422, "step": 526 }, { "epoch": 0.07460890493381468, "grad_norm": 11.199316508009854, "learning_rate": 4.999302898371572e-06, "loss": 1.3577, "step": 527 }, { "epoch": 0.0747504778084519, "grad_norm": 10.13097666725286, "learning_rate": 4.999289297353593e-06, "loss": 1.1547, "step": 528 }, { "epoch": 0.07489205068308911, "grad_norm": 13.26677174997805, "learning_rate": 4.9992755649498395e-06, "loss": 1.3009, "step": 529 }, { "epoch": 0.07503362355772634, "grad_norm": 13.795211536186207, "learning_rate": 4.999261701161033e-06, "loss": 1.378, "step": 530 }, { "epoch": 0.07517519643236356, "grad_norm": 10.96051951214331, "learning_rate": 4.999247705987902e-06, "loss": 1.3641, "step": 531 }, { "epoch": 0.07531676930700078, "grad_norm": 12.058477333824182, "learning_rate": 4.999233579431183e-06, "loss": 1.3134, "step": 532 }, { "epoch": 0.075458342181638, "grad_norm": 13.961508561522876, "learning_rate": 4.999219321491618e-06, "loss": 1.386, "step": 533 }, { "epoch": 0.07559991505627522, "grad_norm": 9.573515089146168, "learning_rate": 4.999204932169958e-06, "loss": 1.2154, "step": 534 }, { "epoch": 0.07574148793091244, "grad_norm": 13.93334406315392, "learning_rate": 4.999190411466956e-06, "loss": 1.274, "step": 535 }, { "epoch": 0.07588306080554966, "grad_norm": 12.267960441473338, "learning_rate": 4.999175759383379e-06, "loss": 1.4162, "step": 536 }, { "epoch": 0.07602463368018687, "grad_norm": 9.74014057598459, "learning_rate": 4.9991609759199954e-06, "loss": 1.3555, "step": 537 }, { "epoch": 0.07616620655482409, "grad_norm": 11.102117085681437, "learning_rate": 4.9991460610775825e-06, "loss": 1.35, "step": 538 }, { "epoch": 0.07630777942946132, "grad_norm": 15.566372431738673, "learning_rate": 4.999131014856925e-06, "loss": 1.4925, "step": 539 }, { "epoch": 0.07644935230409854, "grad_norm": 12.009602201736131, "learning_rate": 4.999115837258813e-06, "loss": 1.2716, "step": 540 }, { "epoch": 0.07659092517873575, "grad_norm": 15.650642045473626, "learning_rate": 4.999100528284045e-06, "loss": 1.2196, "step": 541 }, { "epoch": 0.07673249805337297, "grad_norm": 17.357260890337347, "learning_rate": 4.999085087933426e-06, "loss": 1.3604, "step": 542 }, { "epoch": 0.0768740709280102, "grad_norm": 10.972162916664345, "learning_rate": 4.999069516207767e-06, "loss": 1.2931, "step": 543 }, { "epoch": 0.07701564380264742, "grad_norm": 13.371674026080864, "learning_rate": 4.9990538131078885e-06, "loss": 1.3963, "step": 544 }, { "epoch": 0.07715721667728463, "grad_norm": 15.364016695942604, "learning_rate": 4.9990379786346126e-06, "loss": 1.3673, "step": 545 }, { "epoch": 0.07729878955192185, "grad_norm": 11.307779412605315, "learning_rate": 4.999022012788774e-06, "loss": 1.2572, "step": 546 }, { "epoch": 0.07744036242655906, "grad_norm": 10.46241719020581, "learning_rate": 4.999005915571211e-06, "loss": 1.256, "step": 547 }, { "epoch": 0.0775819353011963, "grad_norm": 14.402831095972124, "learning_rate": 4.998989686982771e-06, "loss": 1.1402, "step": 548 }, { "epoch": 0.07772350817583351, "grad_norm": 13.667085098435972, "learning_rate": 4.998973327024306e-06, "loss": 1.4835, "step": 549 }, { "epoch": 0.07786508105047073, "grad_norm": 11.790078495401731, "learning_rate": 4.998956835696676e-06, "loss": 1.4663, "step": 550 }, { "epoch": 0.07800665392510794, "grad_norm": 17.198981427927734, "learning_rate": 4.99894021300075e-06, "loss": 1.4011, "step": 551 }, { "epoch": 0.07814822679974517, "grad_norm": 11.57424577599781, "learning_rate": 4.998923458937399e-06, "loss": 1.3159, "step": 552 }, { "epoch": 0.07828979967438239, "grad_norm": 13.965907411415083, "learning_rate": 4.998906573507506e-06, "loss": 1.4253, "step": 553 }, { "epoch": 0.0784313725490196, "grad_norm": 8.86520834190108, "learning_rate": 4.998889556711958e-06, "loss": 1.2393, "step": 554 }, { "epoch": 0.07857294542365682, "grad_norm": 13.871249405103844, "learning_rate": 4.998872408551648e-06, "loss": 1.3475, "step": 555 }, { "epoch": 0.07871451829829405, "grad_norm": 12.368686491199384, "learning_rate": 4.998855129027479e-06, "loss": 1.3085, "step": 556 }, { "epoch": 0.07885609117293127, "grad_norm": 9.848433285736048, "learning_rate": 4.998837718140359e-06, "loss": 1.1592, "step": 557 }, { "epoch": 0.07899766404756849, "grad_norm": 11.734781352193533, "learning_rate": 4.998820175891204e-06, "loss": 1.1986, "step": 558 }, { "epoch": 0.0791392369222057, "grad_norm": 10.976289337125944, "learning_rate": 4.998802502280936e-06, "loss": 1.336, "step": 559 }, { "epoch": 0.07928080979684292, "grad_norm": 12.580823904933775, "learning_rate": 4.998784697310483e-06, "loss": 1.2131, "step": 560 }, { "epoch": 0.07942238267148015, "grad_norm": 9.179087829482167, "learning_rate": 4.998766760980781e-06, "loss": 1.1454, "step": 561 }, { "epoch": 0.07956395554611737, "grad_norm": 13.01424683673777, "learning_rate": 4.998748693292774e-06, "loss": 1.2983, "step": 562 }, { "epoch": 0.07970552842075458, "grad_norm": 10.980993322804125, "learning_rate": 4.9987304942474115e-06, "loss": 1.2591, "step": 563 }, { "epoch": 0.0798471012953918, "grad_norm": 10.585622905265446, "learning_rate": 4.99871216384565e-06, "loss": 1.3555, "step": 564 }, { "epoch": 0.07998867417002903, "grad_norm": 10.039192834149091, "learning_rate": 4.998693702088453e-06, "loss": 1.1637, "step": 565 }, { "epoch": 0.08013024704466624, "grad_norm": 11.088964499311565, "learning_rate": 4.998675108976792e-06, "loss": 1.2953, "step": 566 }, { "epoch": 0.08027181991930346, "grad_norm": 10.041261614680355, "learning_rate": 4.998656384511643e-06, "loss": 1.2113, "step": 567 }, { "epoch": 0.08041339279394068, "grad_norm": 10.763280073333208, "learning_rate": 4.998637528693991e-06, "loss": 1.1626, "step": 568 }, { "epoch": 0.0805549656685779, "grad_norm": 9.625755775801249, "learning_rate": 4.998618541524827e-06, "loss": 1.3207, "step": 569 }, { "epoch": 0.08069653854321512, "grad_norm": 11.407802361187455, "learning_rate": 4.998599423005149e-06, "loss": 1.4362, "step": 570 }, { "epoch": 0.08083811141785234, "grad_norm": 11.552456839703108, "learning_rate": 4.998580173135963e-06, "loss": 1.4688, "step": 571 }, { "epoch": 0.08097968429248956, "grad_norm": 12.845620420565828, "learning_rate": 4.99856079191828e-06, "loss": 1.4241, "step": 572 }, { "epoch": 0.08112125716712677, "grad_norm": 12.087570464787824, "learning_rate": 4.998541279353119e-06, "loss": 1.2565, "step": 573 }, { "epoch": 0.081262830041764, "grad_norm": 11.163074276153585, "learning_rate": 4.998521635441506e-06, "loss": 1.4476, "step": 574 }, { "epoch": 0.08140440291640122, "grad_norm": 13.303378279866605, "learning_rate": 4.998501860184474e-06, "loss": 1.3609, "step": 575 }, { "epoch": 0.08154597579103844, "grad_norm": 12.495307346699262, "learning_rate": 4.998481953583062e-06, "loss": 1.2572, "step": 576 }, { "epoch": 0.08168754866567565, "grad_norm": 11.908351602578126, "learning_rate": 4.998461915638316e-06, "loss": 1.4414, "step": 577 }, { "epoch": 0.08182912154031288, "grad_norm": 10.232461578647442, "learning_rate": 4.9984417463512916e-06, "loss": 1.3962, "step": 578 }, { "epoch": 0.0819706944149501, "grad_norm": 11.957993417266243, "learning_rate": 4.998421445723046e-06, "loss": 1.3661, "step": 579 }, { "epoch": 0.08211226728958732, "grad_norm": 11.872206188622073, "learning_rate": 4.9984010137546475e-06, "loss": 1.3048, "step": 580 }, { "epoch": 0.08225384016422453, "grad_norm": 10.40895095918896, "learning_rate": 4.998380450447172e-06, "loss": 1.1703, "step": 581 }, { "epoch": 0.08239541303886175, "grad_norm": 11.841888845150441, "learning_rate": 4.998359755801699e-06, "loss": 1.3072, "step": 582 }, { "epoch": 0.08253698591349898, "grad_norm": 12.867294227699446, "learning_rate": 4.9983389298193165e-06, "loss": 1.2732, "step": 583 }, { "epoch": 0.0826785587881362, "grad_norm": 10.896968356958673, "learning_rate": 4.998317972501119e-06, "loss": 1.3347, "step": 584 }, { "epoch": 0.08282013166277341, "grad_norm": 9.797803624873385, "learning_rate": 4.9982968838482085e-06, "loss": 1.3041, "step": 585 }, { "epoch": 0.08296170453741063, "grad_norm": 10.230166343015236, "learning_rate": 4.998275663861692e-06, "loss": 1.2963, "step": 586 }, { "epoch": 0.08310327741204786, "grad_norm": 12.938214710783855, "learning_rate": 4.998254312542689e-06, "loss": 1.283, "step": 587 }, { "epoch": 0.08324485028668507, "grad_norm": 10.514247129499825, "learning_rate": 4.998232829892319e-06, "loss": 1.1872, "step": 588 }, { "epoch": 0.08338642316132229, "grad_norm": 10.851152251626933, "learning_rate": 4.998211215911711e-06, "loss": 1.395, "step": 589 }, { "epoch": 0.0835279960359595, "grad_norm": 9.954278181453919, "learning_rate": 4.998189470602003e-06, "loss": 1.3091, "step": 590 }, { "epoch": 0.08366956891059672, "grad_norm": 12.665492622442311, "learning_rate": 4.998167593964337e-06, "loss": 1.4515, "step": 591 }, { "epoch": 0.08381114178523395, "grad_norm": 11.95695914814099, "learning_rate": 4.998145585999864e-06, "loss": 1.5373, "step": 592 }, { "epoch": 0.08395271465987117, "grad_norm": 9.606077146246976, "learning_rate": 4.998123446709739e-06, "loss": 1.3087, "step": 593 }, { "epoch": 0.08409428753450839, "grad_norm": 16.36124687088126, "learning_rate": 4.998101176095128e-06, "loss": 1.4167, "step": 594 }, { "epoch": 0.0842358604091456, "grad_norm": 11.064195406034283, "learning_rate": 4.9980787741572e-06, "loss": 1.3792, "step": 595 }, { "epoch": 0.08437743328378283, "grad_norm": 11.72608754659416, "learning_rate": 4.998056240897134e-06, "loss": 1.2751, "step": 596 }, { "epoch": 0.08451900615842005, "grad_norm": 11.13533654227244, "learning_rate": 4.9980335763161145e-06, "loss": 1.2633, "step": 597 }, { "epoch": 0.08466057903305726, "grad_norm": 9.471289366587055, "learning_rate": 4.998010780415332e-06, "loss": 1.3874, "step": 598 }, { "epoch": 0.08480215190769448, "grad_norm": 11.23512675140415, "learning_rate": 4.997987853195985e-06, "loss": 1.1664, "step": 599 }, { "epoch": 0.08494372478233171, "grad_norm": 12.260968338777007, "learning_rate": 4.99796479465928e-06, "loss": 1.3992, "step": 600 }, { "epoch": 0.08508529765696893, "grad_norm": 11.218075702772937, "learning_rate": 4.997941604806428e-06, "loss": 1.3271, "step": 601 }, { "epoch": 0.08522687053160614, "grad_norm": 11.225805077855044, "learning_rate": 4.997918283638647e-06, "loss": 1.3784, "step": 602 }, { "epoch": 0.08536844340624336, "grad_norm": 10.241207538573981, "learning_rate": 4.9978948311571666e-06, "loss": 1.245, "step": 603 }, { "epoch": 0.08551001628088058, "grad_norm": 11.540936131315751, "learning_rate": 4.997871247363217e-06, "loss": 1.3433, "step": 604 }, { "epoch": 0.08565158915551781, "grad_norm": 11.325896871391963, "learning_rate": 4.997847532258037e-06, "loss": 1.1703, "step": 605 }, { "epoch": 0.08579316203015502, "grad_norm": 9.875033221611444, "learning_rate": 4.997823685842875e-06, "loss": 1.2388, "step": 606 }, { "epoch": 0.08593473490479224, "grad_norm": 12.577781682038196, "learning_rate": 4.997799708118985e-06, "loss": 1.3149, "step": 607 }, { "epoch": 0.08607630777942946, "grad_norm": 13.512307667311818, "learning_rate": 4.997775599087627e-06, "loss": 1.287, "step": 608 }, { "epoch": 0.08621788065406669, "grad_norm": 9.137699852465609, "learning_rate": 4.997751358750068e-06, "loss": 1.2284, "step": 609 }, { "epoch": 0.0863594535287039, "grad_norm": 11.482775850609972, "learning_rate": 4.997726987107582e-06, "loss": 1.4326, "step": 610 }, { "epoch": 0.08650102640334112, "grad_norm": 14.250894083450847, "learning_rate": 4.997702484161451e-06, "loss": 1.4719, "step": 611 }, { "epoch": 0.08664259927797834, "grad_norm": 14.845015879410871, "learning_rate": 4.997677849912963e-06, "loss": 1.4166, "step": 612 }, { "epoch": 0.08678417215261557, "grad_norm": 12.076554565190174, "learning_rate": 4.997653084363412e-06, "loss": 1.2483, "step": 613 }, { "epoch": 0.08692574502725278, "grad_norm": 13.08025821255936, "learning_rate": 4.997628187514101e-06, "loss": 1.2175, "step": 614 }, { "epoch": 0.08706731790189, "grad_norm": 13.616168318321112, "learning_rate": 4.997603159366339e-06, "loss": 1.1612, "step": 615 }, { "epoch": 0.08720889077652721, "grad_norm": 14.213845415272315, "learning_rate": 4.99757799992144e-06, "loss": 1.3592, "step": 616 }, { "epoch": 0.08735046365116443, "grad_norm": 12.259830811968245, "learning_rate": 4.997552709180729e-06, "loss": 1.3082, "step": 617 }, { "epoch": 0.08749203652580166, "grad_norm": 11.81456855544743, "learning_rate": 4.997527287145534e-06, "loss": 1.4459, "step": 618 }, { "epoch": 0.08763360940043888, "grad_norm": 11.788340895948911, "learning_rate": 4.997501733817191e-06, "loss": 1.3229, "step": 619 }, { "epoch": 0.0877751822750761, "grad_norm": 10.943540812289864, "learning_rate": 4.997476049197046e-06, "loss": 1.2924, "step": 620 }, { "epoch": 0.08791675514971331, "grad_norm": 12.666931015736285, "learning_rate": 4.9974502332864464e-06, "loss": 1.5053, "step": 621 }, { "epoch": 0.08805832802435054, "grad_norm": 13.020911783617835, "learning_rate": 4.99742428608675e-06, "loss": 1.2484, "step": 622 }, { "epoch": 0.08819990089898776, "grad_norm": 11.978602087012673, "learning_rate": 4.9973982075993204e-06, "loss": 1.3418, "step": 623 }, { "epoch": 0.08834147377362497, "grad_norm": 11.185203955968845, "learning_rate": 4.99737199782553e-06, "loss": 1.233, "step": 624 }, { "epoch": 0.08848304664826219, "grad_norm": 13.142902480811351, "learning_rate": 4.997345656766755e-06, "loss": 1.3168, "step": 625 }, { "epoch": 0.0886246195228994, "grad_norm": 12.865164842731867, "learning_rate": 4.997319184424382e-06, "loss": 1.4294, "step": 626 }, { "epoch": 0.08876619239753664, "grad_norm": 12.546507111472986, "learning_rate": 4.997292580799801e-06, "loss": 1.282, "step": 627 }, { "epoch": 0.08890776527217385, "grad_norm": 11.044353882665426, "learning_rate": 4.997265845894411e-06, "loss": 1.2473, "step": 628 }, { "epoch": 0.08904933814681107, "grad_norm": 13.940241659469683, "learning_rate": 4.997238979709617e-06, "loss": 1.2876, "step": 629 }, { "epoch": 0.08919091102144829, "grad_norm": 10.224255493219298, "learning_rate": 4.997211982246833e-06, "loss": 1.178, "step": 630 }, { "epoch": 0.08933248389608552, "grad_norm": 13.304089965662932, "learning_rate": 4.997184853507476e-06, "loss": 1.3456, "step": 631 }, { "epoch": 0.08947405677072273, "grad_norm": 12.412265347416945, "learning_rate": 4.997157593492974e-06, "loss": 1.2608, "step": 632 }, { "epoch": 0.08961562964535995, "grad_norm": 14.08661669015632, "learning_rate": 4.997130202204759e-06, "loss": 1.2374, "step": 633 }, { "epoch": 0.08975720251999716, "grad_norm": 11.15081569271024, "learning_rate": 4.997102679644271e-06, "loss": 1.2611, "step": 634 }, { "epoch": 0.0898987753946344, "grad_norm": 11.200203103539993, "learning_rate": 4.997075025812957e-06, "loss": 1.193, "step": 635 }, { "epoch": 0.09004034826927161, "grad_norm": 15.392216665000031, "learning_rate": 4.997047240712272e-06, "loss": 1.382, "step": 636 }, { "epoch": 0.09018192114390883, "grad_norm": 10.937044145076294, "learning_rate": 4.997019324343674e-06, "loss": 1.1991, "step": 637 }, { "epoch": 0.09032349401854604, "grad_norm": 10.058749240303035, "learning_rate": 4.996991276708633e-06, "loss": 1.3606, "step": 638 }, { "epoch": 0.09046506689318326, "grad_norm": 10.008912120624244, "learning_rate": 4.996963097808622e-06, "loss": 1.1332, "step": 639 }, { "epoch": 0.09060663976782049, "grad_norm": 12.15954262826982, "learning_rate": 4.996934787645123e-06, "loss": 1.2292, "step": 640 }, { "epoch": 0.0907482126424577, "grad_norm": 11.347594395270317, "learning_rate": 4.996906346219623e-06, "loss": 1.1721, "step": 641 }, { "epoch": 0.09088978551709492, "grad_norm": 10.13366592095667, "learning_rate": 4.996877773533619e-06, "loss": 1.3545, "step": 642 }, { "epoch": 0.09103135839173214, "grad_norm": 9.248041306426519, "learning_rate": 4.996849069588612e-06, "loss": 1.177, "step": 643 }, { "epoch": 0.09117293126636937, "grad_norm": 8.534905066384013, "learning_rate": 4.996820234386112e-06, "loss": 1.2485, "step": 644 }, { "epoch": 0.09131450414100659, "grad_norm": 15.404690491397163, "learning_rate": 4.996791267927632e-06, "loss": 1.5162, "step": 645 }, { "epoch": 0.0914560770156438, "grad_norm": 10.581502761120168, "learning_rate": 4.996762170214698e-06, "loss": 1.209, "step": 646 }, { "epoch": 0.09159764989028102, "grad_norm": 10.338312569617411, "learning_rate": 4.996732941248839e-06, "loss": 1.3463, "step": 647 }, { "epoch": 0.09173922276491824, "grad_norm": 11.185939914464923, "learning_rate": 4.99670358103159e-06, "loss": 1.2642, "step": 648 }, { "epoch": 0.09188079563955547, "grad_norm": 8.566191163082953, "learning_rate": 4.996674089564495e-06, "loss": 1.2167, "step": 649 }, { "epoch": 0.09202236851419268, "grad_norm": 8.426012144784892, "learning_rate": 4.9966444668491055e-06, "loss": 1.2238, "step": 650 }, { "epoch": 0.0921639413888299, "grad_norm": 9.181059987157962, "learning_rate": 4.996614712886978e-06, "loss": 1.2288, "step": 651 }, { "epoch": 0.09230551426346711, "grad_norm": 11.370595896865067, "learning_rate": 4.996584827679676e-06, "loss": 1.1733, "step": 652 }, { "epoch": 0.09244708713810434, "grad_norm": 9.701456365682132, "learning_rate": 4.996554811228772e-06, "loss": 1.1174, "step": 653 }, { "epoch": 0.09258866001274156, "grad_norm": 10.802043114013744, "learning_rate": 4.996524663535842e-06, "loss": 1.2716, "step": 654 }, { "epoch": 0.09273023288737878, "grad_norm": 8.456578365471888, "learning_rate": 4.996494384602473e-06, "loss": 1.2204, "step": 655 }, { "epoch": 0.092871805762016, "grad_norm": 10.472774631581007, "learning_rate": 4.996463974430255e-06, "loss": 1.262, "step": 656 }, { "epoch": 0.09301337863665322, "grad_norm": 9.306793717589201, "learning_rate": 4.996433433020788e-06, "loss": 1.2645, "step": 657 }, { "epoch": 0.09315495151129044, "grad_norm": 11.121467629603933, "learning_rate": 4.996402760375676e-06, "loss": 1.2378, "step": 658 }, { "epoch": 0.09329652438592766, "grad_norm": 10.826905989734412, "learning_rate": 4.996371956496532e-06, "loss": 1.2663, "step": 659 }, { "epoch": 0.09343809726056487, "grad_norm": 10.85358398591707, "learning_rate": 4.996341021384976e-06, "loss": 1.3574, "step": 660 }, { "epoch": 0.09357967013520209, "grad_norm": 9.66504335475617, "learning_rate": 4.996309955042634e-06, "loss": 1.1665, "step": 661 }, { "epoch": 0.09372124300983932, "grad_norm": 11.634859231671877, "learning_rate": 4.996278757471139e-06, "loss": 1.3596, "step": 662 }, { "epoch": 0.09386281588447654, "grad_norm": 12.765047655625521, "learning_rate": 4.996247428672132e-06, "loss": 1.3199, "step": 663 }, { "epoch": 0.09400438875911375, "grad_norm": 10.897493098642501, "learning_rate": 4.996215968647258e-06, "loss": 1.2128, "step": 664 }, { "epoch": 0.09414596163375097, "grad_norm": 12.759962191630347, "learning_rate": 4.996184377398171e-06, "loss": 1.3942, "step": 665 }, { "epoch": 0.0942875345083882, "grad_norm": 13.792781788594114, "learning_rate": 4.996152654926534e-06, "loss": 1.3835, "step": 666 }, { "epoch": 0.09442910738302542, "grad_norm": 11.549301615156285, "learning_rate": 4.996120801234012e-06, "loss": 1.4365, "step": 667 }, { "epoch": 0.09457068025766263, "grad_norm": 14.33371810988048, "learning_rate": 4.996088816322281e-06, "loss": 1.321, "step": 668 }, { "epoch": 0.09471225313229985, "grad_norm": 11.225655558247025, "learning_rate": 4.996056700193023e-06, "loss": 1.3038, "step": 669 }, { "epoch": 0.09485382600693706, "grad_norm": 11.011212962843192, "learning_rate": 4.996024452847924e-06, "loss": 1.3881, "step": 670 }, { "epoch": 0.0949953988815743, "grad_norm": 10.142533932300699, "learning_rate": 4.9959920742886815e-06, "loss": 1.436, "step": 671 }, { "epoch": 0.09513697175621151, "grad_norm": 13.164013816675558, "learning_rate": 4.995959564516997e-06, "loss": 1.4428, "step": 672 }, { "epoch": 0.09527854463084873, "grad_norm": 14.982757321927544, "learning_rate": 4.995926923534578e-06, "loss": 1.2932, "step": 673 }, { "epoch": 0.09542011750548594, "grad_norm": 10.50378567706923, "learning_rate": 4.995894151343143e-06, "loss": 1.3027, "step": 674 }, { "epoch": 0.09556169038012317, "grad_norm": 10.62650064081025, "learning_rate": 4.9958612479444125e-06, "loss": 1.1967, "step": 675 }, { "epoch": 0.09570326325476039, "grad_norm": 13.976349062966438, "learning_rate": 4.995828213340118e-06, "loss": 1.4671, "step": 676 }, { "epoch": 0.0958448361293976, "grad_norm": 13.19980426990006, "learning_rate": 4.995795047531994e-06, "loss": 1.3193, "step": 677 }, { "epoch": 0.09598640900403482, "grad_norm": 11.698466169450311, "learning_rate": 4.995761750521787e-06, "loss": 1.3019, "step": 678 }, { "epoch": 0.09612798187867205, "grad_norm": 11.344228182715801, "learning_rate": 4.995728322311244e-06, "loss": 1.1708, "step": 679 }, { "epoch": 0.09626955475330927, "grad_norm": 12.283426572393186, "learning_rate": 4.995694762902125e-06, "loss": 1.3846, "step": 680 }, { "epoch": 0.09641112762794649, "grad_norm": 14.095200153186845, "learning_rate": 4.9956610722961936e-06, "loss": 1.3638, "step": 681 }, { "epoch": 0.0965527005025837, "grad_norm": 12.031367947879643, "learning_rate": 4.99562725049522e-06, "loss": 1.3452, "step": 682 }, { "epoch": 0.09669427337722092, "grad_norm": 10.744873598827729, "learning_rate": 4.9955932975009825e-06, "loss": 1.397, "step": 683 }, { "epoch": 0.09683584625185815, "grad_norm": 13.333260981045555, "learning_rate": 4.995559213315267e-06, "loss": 1.2592, "step": 684 }, { "epoch": 0.09697741912649536, "grad_norm": 14.02281128911729, "learning_rate": 4.9955249979398625e-06, "loss": 1.2449, "step": 685 }, { "epoch": 0.09711899200113258, "grad_norm": 12.269606489541673, "learning_rate": 4.995490651376571e-06, "loss": 1.3023, "step": 686 }, { "epoch": 0.0972605648757698, "grad_norm": 9.657505780879184, "learning_rate": 4.9954561736271966e-06, "loss": 1.1404, "step": 687 }, { "epoch": 0.09740213775040703, "grad_norm": 10.964893152052943, "learning_rate": 4.995421564693551e-06, "loss": 1.3765, "step": 688 }, { "epoch": 0.09754371062504424, "grad_norm": 13.799689380632925, "learning_rate": 4.995386824577455e-06, "loss": 1.3444, "step": 689 }, { "epoch": 0.09768528349968146, "grad_norm": 9.965420253560715, "learning_rate": 4.995351953280735e-06, "loss": 1.3421, "step": 690 }, { "epoch": 0.09782685637431868, "grad_norm": 9.19125600838747, "learning_rate": 4.995316950805223e-06, "loss": 1.237, "step": 691 }, { "epoch": 0.0979684292489559, "grad_norm": 9.522164038242117, "learning_rate": 4.995281817152759e-06, "loss": 1.2544, "step": 692 }, { "epoch": 0.09811000212359312, "grad_norm": 10.549326248711706, "learning_rate": 4.995246552325191e-06, "loss": 1.4141, "step": 693 }, { "epoch": 0.09825157499823034, "grad_norm": 11.138286757161787, "learning_rate": 4.9952111563243715e-06, "loss": 1.329, "step": 694 }, { "epoch": 0.09839314787286756, "grad_norm": 10.446212695526345, "learning_rate": 4.995175629152162e-06, "loss": 1.2535, "step": 695 }, { "epoch": 0.09853472074750477, "grad_norm": 12.989525903708918, "learning_rate": 4.995139970810431e-06, "loss": 1.3377, "step": 696 }, { "epoch": 0.098676293622142, "grad_norm": 12.303101956377645, "learning_rate": 4.995104181301052e-06, "loss": 1.2611, "step": 697 }, { "epoch": 0.09881786649677922, "grad_norm": 12.067995707065121, "learning_rate": 4.995068260625906e-06, "loss": 1.2915, "step": 698 }, { "epoch": 0.09895943937141644, "grad_norm": 13.392321627128476, "learning_rate": 4.995032208786883e-06, "loss": 1.2165, "step": 699 }, { "epoch": 0.09910101224605365, "grad_norm": 10.61585443364995, "learning_rate": 4.994996025785876e-06, "loss": 1.2256, "step": 700 }, { "epoch": 0.09924258512069088, "grad_norm": 11.35792080938693, "learning_rate": 4.99495971162479e-06, "loss": 1.4416, "step": 701 }, { "epoch": 0.0993841579953281, "grad_norm": 10.102448769005022, "learning_rate": 4.9949232663055304e-06, "loss": 1.2256, "step": 702 }, { "epoch": 0.09952573086996531, "grad_norm": 15.605910702440248, "learning_rate": 4.994886689830015e-06, "loss": 1.5258, "step": 703 }, { "epoch": 0.09966730374460253, "grad_norm": 14.37785244712196, "learning_rate": 4.994849982200168e-06, "loss": 1.3249, "step": 704 }, { "epoch": 0.09980887661923975, "grad_norm": 10.976160399107403, "learning_rate": 4.994813143417917e-06, "loss": 1.3144, "step": 705 }, { "epoch": 0.09995044949387698, "grad_norm": 12.084455836982341, "learning_rate": 4.994776173485199e-06, "loss": 1.2222, "step": 706 }, { "epoch": 0.1000920223685142, "grad_norm": 12.09929393883624, "learning_rate": 4.994739072403958e-06, "loss": 1.3185, "step": 707 }, { "epoch": 0.10023359524315141, "grad_norm": 12.091502688080558, "learning_rate": 4.994701840176144e-06, "loss": 1.3982, "step": 708 }, { "epoch": 0.10037516811778863, "grad_norm": 11.217591802375702, "learning_rate": 4.994664476803714e-06, "loss": 1.2386, "step": 709 }, { "epoch": 0.10051674099242586, "grad_norm": 11.62375363467285, "learning_rate": 4.9946269822886335e-06, "loss": 1.4211, "step": 710 }, { "epoch": 0.10065831386706307, "grad_norm": 12.20091471760577, "learning_rate": 4.994589356632872e-06, "loss": 1.3166, "step": 711 }, { "epoch": 0.10079988674170029, "grad_norm": 8.772474363457526, "learning_rate": 4.994551599838408e-06, "loss": 1.2528, "step": 712 }, { "epoch": 0.1009414596163375, "grad_norm": 12.128002306291265, "learning_rate": 4.994513711907227e-06, "loss": 1.2395, "step": 713 }, { "epoch": 0.10108303249097472, "grad_norm": 8.399531632944306, "learning_rate": 4.994475692841319e-06, "loss": 1.1757, "step": 714 }, { "epoch": 0.10122460536561195, "grad_norm": 10.6982888538034, "learning_rate": 4.9944375426426846e-06, "loss": 1.3685, "step": 715 }, { "epoch": 0.10136617824024917, "grad_norm": 10.986734888303229, "learning_rate": 4.994399261313329e-06, "loss": 1.0813, "step": 716 }, { "epoch": 0.10150775111488639, "grad_norm": 10.483686580882313, "learning_rate": 4.994360848855264e-06, "loss": 1.1864, "step": 717 }, { "epoch": 0.1016493239895236, "grad_norm": 12.767611438928926, "learning_rate": 4.994322305270508e-06, "loss": 1.3392, "step": 718 }, { "epoch": 0.10179089686416083, "grad_norm": 10.711997349095604, "learning_rate": 4.994283630561089e-06, "loss": 1.4533, "step": 719 }, { "epoch": 0.10193246973879805, "grad_norm": 10.573499020227965, "learning_rate": 4.994244824729039e-06, "loss": 1.2649, "step": 720 }, { "epoch": 0.10207404261343526, "grad_norm": 13.227513648058364, "learning_rate": 4.994205887776399e-06, "loss": 1.35, "step": 721 }, { "epoch": 0.10221561548807248, "grad_norm": 9.103303800032359, "learning_rate": 4.9941668197052155e-06, "loss": 1.1791, "step": 722 }, { "epoch": 0.10235718836270971, "grad_norm": 10.042042283331357, "learning_rate": 4.9941276205175405e-06, "loss": 1.3153, "step": 723 }, { "epoch": 0.10249876123734693, "grad_norm": 12.115293412569761, "learning_rate": 4.994088290215438e-06, "loss": 1.3786, "step": 724 }, { "epoch": 0.10264033411198414, "grad_norm": 8.728091468842234, "learning_rate": 4.994048828800972e-06, "loss": 1.1818, "step": 725 }, { "epoch": 0.10278190698662136, "grad_norm": 11.649890791090531, "learning_rate": 4.994009236276219e-06, "loss": 1.3218, "step": 726 }, { "epoch": 0.10292347986125858, "grad_norm": 10.274669083141628, "learning_rate": 4.993969512643261e-06, "loss": 1.317, "step": 727 }, { "epoch": 0.1030650527358958, "grad_norm": 10.7200403799179, "learning_rate": 4.993929657904185e-06, "loss": 1.4202, "step": 728 }, { "epoch": 0.10320662561053302, "grad_norm": 10.310826391184202, "learning_rate": 4.993889672061087e-06, "loss": 1.2162, "step": 729 }, { "epoch": 0.10334819848517024, "grad_norm": 9.701846814086682, "learning_rate": 4.993849555116067e-06, "loss": 1.2401, "step": 730 }, { "epoch": 0.10348977135980746, "grad_norm": 8.817206674731056, "learning_rate": 4.993809307071236e-06, "loss": 1.1165, "step": 731 }, { "epoch": 0.10363134423444469, "grad_norm": 9.540474052247172, "learning_rate": 4.99376892792871e-06, "loss": 1.1432, "step": 732 }, { "epoch": 0.1037729171090819, "grad_norm": 11.430023918493948, "learning_rate": 4.99372841769061e-06, "loss": 1.1904, "step": 733 }, { "epoch": 0.10391448998371912, "grad_norm": 10.765853192882723, "learning_rate": 4.9936877763590664e-06, "loss": 1.322, "step": 734 }, { "epoch": 0.10405606285835634, "grad_norm": 9.07973635320169, "learning_rate": 4.9936470039362165e-06, "loss": 1.2673, "step": 735 }, { "epoch": 0.10419763573299355, "grad_norm": 10.17090101603724, "learning_rate": 4.993606100424202e-06, "loss": 1.2873, "step": 736 }, { "epoch": 0.10433920860763078, "grad_norm": 9.742501138320476, "learning_rate": 4.993565065825175e-06, "loss": 1.2184, "step": 737 }, { "epoch": 0.104480781482268, "grad_norm": 9.121276932047056, "learning_rate": 4.9935239001412915e-06, "loss": 1.1234, "step": 738 }, { "epoch": 0.10462235435690521, "grad_norm": 9.46174911684397, "learning_rate": 4.993482603374715e-06, "loss": 1.1913, "step": 739 }, { "epoch": 0.10476392723154243, "grad_norm": 9.964416135841711, "learning_rate": 4.993441175527619e-06, "loss": 1.33, "step": 740 }, { "epoch": 0.10490550010617966, "grad_norm": 8.177337163075354, "learning_rate": 4.993399616602178e-06, "loss": 1.2912, "step": 741 }, { "epoch": 0.10504707298081688, "grad_norm": 11.01301775439383, "learning_rate": 4.99335792660058e-06, "loss": 1.3704, "step": 742 }, { "epoch": 0.1051886458554541, "grad_norm": 8.90779394854782, "learning_rate": 4.993316105525013e-06, "loss": 1.2062, "step": 743 }, { "epoch": 0.10533021873009131, "grad_norm": 9.669827788435935, "learning_rate": 4.993274153377678e-06, "loss": 1.3612, "step": 744 }, { "epoch": 0.10547179160472854, "grad_norm": 10.485346059841273, "learning_rate": 4.993232070160781e-06, "loss": 1.3723, "step": 745 }, { "epoch": 0.10561336447936576, "grad_norm": 9.332760884122921, "learning_rate": 4.993189855876531e-06, "loss": 1.2639, "step": 746 }, { "epoch": 0.10575493735400297, "grad_norm": 11.263828171329772, "learning_rate": 4.993147510527151e-06, "loss": 1.3777, "step": 747 }, { "epoch": 0.10589651022864019, "grad_norm": 11.995987539215587, "learning_rate": 4.993105034114864e-06, "loss": 1.2939, "step": 748 }, { "epoch": 0.1060380831032774, "grad_norm": 10.124974003511186, "learning_rate": 4.993062426641906e-06, "loss": 1.2735, "step": 749 }, { "epoch": 0.10617965597791464, "grad_norm": 8.854139715415476, "learning_rate": 4.993019688110514e-06, "loss": 1.151, "step": 750 }, { "epoch": 0.10632122885255185, "grad_norm": 8.821861110869557, "learning_rate": 4.992976818522936e-06, "loss": 1.2217, "step": 751 }, { "epoch": 0.10646280172718907, "grad_norm": 10.481799018142748, "learning_rate": 4.992933817881426e-06, "loss": 1.2479, "step": 752 }, { "epoch": 0.10660437460182628, "grad_norm": 9.886317853512681, "learning_rate": 4.992890686188243e-06, "loss": 1.2019, "step": 753 }, { "epoch": 0.10674594747646352, "grad_norm": 8.856856161303046, "learning_rate": 4.992847423445657e-06, "loss": 1.1952, "step": 754 }, { "epoch": 0.10688752035110073, "grad_norm": 8.70804718374875, "learning_rate": 4.992804029655939e-06, "loss": 1.163, "step": 755 }, { "epoch": 0.10702909322573795, "grad_norm": 9.4696938827972, "learning_rate": 4.992760504821373e-06, "loss": 1.2739, "step": 756 }, { "epoch": 0.10717066610037516, "grad_norm": 10.631617022710051, "learning_rate": 4.992716848944245e-06, "loss": 1.3024, "step": 757 }, { "epoch": 0.1073122389750124, "grad_norm": 9.428287278884941, "learning_rate": 4.992673062026851e-06, "loss": 1.201, "step": 758 }, { "epoch": 0.10745381184964961, "grad_norm": 9.551481393687832, "learning_rate": 4.992629144071494e-06, "loss": 1.2685, "step": 759 }, { "epoch": 0.10759538472428683, "grad_norm": 9.424200729356683, "learning_rate": 4.99258509508048e-06, "loss": 1.1927, "step": 760 }, { "epoch": 0.10773695759892404, "grad_norm": 8.924791811269982, "learning_rate": 4.9925409150561264e-06, "loss": 1.158, "step": 761 }, { "epoch": 0.10787853047356126, "grad_norm": 9.241182403214031, "learning_rate": 4.992496604000756e-06, "loss": 1.4169, "step": 762 }, { "epoch": 0.10802010334819849, "grad_norm": 11.88329017777563, "learning_rate": 4.992452161916698e-06, "loss": 1.2994, "step": 763 }, { "epoch": 0.1081616762228357, "grad_norm": 10.729388682219534, "learning_rate": 4.992407588806287e-06, "loss": 1.3418, "step": 764 }, { "epoch": 0.10830324909747292, "grad_norm": 9.663716710163687, "learning_rate": 4.9923628846718685e-06, "loss": 1.2734, "step": 765 }, { "epoch": 0.10844482197211014, "grad_norm": 10.079665050088353, "learning_rate": 4.992318049515791e-06, "loss": 1.2282, "step": 766 }, { "epoch": 0.10858639484674737, "grad_norm": 11.795569179590606, "learning_rate": 4.992273083340412e-06, "loss": 1.2936, "step": 767 }, { "epoch": 0.10872796772138459, "grad_norm": 11.325367390650209, "learning_rate": 4.992227986148096e-06, "loss": 1.1805, "step": 768 }, { "epoch": 0.1088695405960218, "grad_norm": 8.197128109327233, "learning_rate": 4.992182757941212e-06, "loss": 1.1417, "step": 769 }, { "epoch": 0.10901111347065902, "grad_norm": 10.05958543351754, "learning_rate": 4.992137398722139e-06, "loss": 1.156, "step": 770 }, { "epoch": 0.10915268634529623, "grad_norm": 15.271701567749878, "learning_rate": 4.992091908493262e-06, "loss": 1.326, "step": 771 }, { "epoch": 0.10929425921993347, "grad_norm": 9.815211996469792, "learning_rate": 4.992046287256971e-06, "loss": 1.1999, "step": 772 }, { "epoch": 0.10943583209457068, "grad_norm": 11.77426498937573, "learning_rate": 4.992000535015664e-06, "loss": 1.1902, "step": 773 }, { "epoch": 0.1095774049692079, "grad_norm": 10.95308494832638, "learning_rate": 4.991954651771748e-06, "loss": 1.2945, "step": 774 }, { "epoch": 0.10971897784384511, "grad_norm": 10.987460879103244, "learning_rate": 4.991908637527634e-06, "loss": 1.2826, "step": 775 }, { "epoch": 0.10986055071848234, "grad_norm": 12.402242248600402, "learning_rate": 4.991862492285741e-06, "loss": 1.3355, "step": 776 }, { "epoch": 0.11000212359311956, "grad_norm": 8.982247540233093, "learning_rate": 4.991816216048494e-06, "loss": 1.2067, "step": 777 }, { "epoch": 0.11014369646775678, "grad_norm": 10.202052669051918, "learning_rate": 4.991769808818328e-06, "loss": 1.2633, "step": 778 }, { "epoch": 0.110285269342394, "grad_norm": 7.49850347211289, "learning_rate": 4.991723270597679e-06, "loss": 1.1044, "step": 779 }, { "epoch": 0.11042684221703122, "grad_norm": 10.025164032543035, "learning_rate": 4.9916766013889975e-06, "loss": 1.2246, "step": 780 }, { "epoch": 0.11056841509166844, "grad_norm": 10.663352053944726, "learning_rate": 4.991629801194734e-06, "loss": 1.2774, "step": 781 }, { "epoch": 0.11070998796630566, "grad_norm": 9.402398116014407, "learning_rate": 4.9915828700173495e-06, "loss": 1.297, "step": 782 }, { "epoch": 0.11085156084094287, "grad_norm": 11.75858204499949, "learning_rate": 4.991535807859312e-06, "loss": 1.1894, "step": 783 }, { "epoch": 0.11099313371558009, "grad_norm": 9.108675215000439, "learning_rate": 4.991488614723094e-06, "loss": 1.3467, "step": 784 }, { "epoch": 0.11113470659021732, "grad_norm": 11.505287613371786, "learning_rate": 4.991441290611177e-06, "loss": 1.3952, "step": 785 }, { "epoch": 0.11127627946485454, "grad_norm": 10.20677337145535, "learning_rate": 4.991393835526051e-06, "loss": 1.3733, "step": 786 }, { "epoch": 0.11141785233949175, "grad_norm": 9.979065535529985, "learning_rate": 4.991346249470207e-06, "loss": 1.2371, "step": 787 }, { "epoch": 0.11155942521412897, "grad_norm": 11.962855388978719, "learning_rate": 4.991298532446149e-06, "loss": 1.15, "step": 788 }, { "epoch": 0.1117009980887662, "grad_norm": 7.3343367612579025, "learning_rate": 4.991250684456385e-06, "loss": 1.0393, "step": 789 }, { "epoch": 0.11184257096340341, "grad_norm": 11.786144747448697, "learning_rate": 4.9912027055034295e-06, "loss": 1.3606, "step": 790 }, { "epoch": 0.11198414383804063, "grad_norm": 10.322654579651534, "learning_rate": 4.9911545955898055e-06, "loss": 1.3026, "step": 791 }, { "epoch": 0.11212571671267785, "grad_norm": 10.461956678204183, "learning_rate": 4.991106354718042e-06, "loss": 1.2346, "step": 792 }, { "epoch": 0.11226728958731506, "grad_norm": 11.740505196978257, "learning_rate": 4.991057982890674e-06, "loss": 1.2565, "step": 793 }, { "epoch": 0.1124088624619523, "grad_norm": 9.71955814252067, "learning_rate": 4.991009480110246e-06, "loss": 1.4036, "step": 794 }, { "epoch": 0.11255043533658951, "grad_norm": 10.991710313580676, "learning_rate": 4.990960846379307e-06, "loss": 1.2631, "step": 795 }, { "epoch": 0.11269200821122673, "grad_norm": 10.331444557421117, "learning_rate": 4.990912081700413e-06, "loss": 1.208, "step": 796 }, { "epoch": 0.11283358108586394, "grad_norm": 8.372150448791936, "learning_rate": 4.990863186076129e-06, "loss": 1.1339, "step": 797 }, { "epoch": 0.11297515396050117, "grad_norm": 9.88396693788477, "learning_rate": 4.990814159509025e-06, "loss": 1.3699, "step": 798 }, { "epoch": 0.11311672683513839, "grad_norm": 10.423706076845454, "learning_rate": 4.990765002001677e-06, "loss": 1.211, "step": 799 }, { "epoch": 0.1132582997097756, "grad_norm": 11.179111511192986, "learning_rate": 4.99071571355667e-06, "loss": 1.4106, "step": 800 }, { "epoch": 0.11339987258441282, "grad_norm": 10.396944301363938, "learning_rate": 4.990666294176596e-06, "loss": 1.3545, "step": 801 }, { "epoch": 0.11354144545905005, "grad_norm": 9.422977414646263, "learning_rate": 4.990616743864051e-06, "loss": 1.2286, "step": 802 }, { "epoch": 0.11368301833368727, "grad_norm": 14.332273458875951, "learning_rate": 4.99056706262164e-06, "loss": 1.231, "step": 803 }, { "epoch": 0.11382459120832449, "grad_norm": 14.844820068337915, "learning_rate": 4.990517250451978e-06, "loss": 1.2813, "step": 804 }, { "epoch": 0.1139661640829617, "grad_norm": 9.647108571815014, "learning_rate": 4.99046730735768e-06, "loss": 1.2269, "step": 805 }, { "epoch": 0.11410773695759892, "grad_norm": 12.03221827982681, "learning_rate": 4.990417233341373e-06, "loss": 1.3411, "step": 806 }, { "epoch": 0.11424930983223615, "grad_norm": 15.006152607809478, "learning_rate": 4.990367028405688e-06, "loss": 1.2279, "step": 807 }, { "epoch": 0.11439088270687336, "grad_norm": 12.356750595456733, "learning_rate": 4.990316692553265e-06, "loss": 1.3016, "step": 808 }, { "epoch": 0.11453245558151058, "grad_norm": 13.135744184040533, "learning_rate": 4.990266225786751e-06, "loss": 1.1734, "step": 809 }, { "epoch": 0.1146740284561478, "grad_norm": 12.437384676758063, "learning_rate": 4.9902156281087985e-06, "loss": 1.3198, "step": 810 }, { "epoch": 0.11481560133078503, "grad_norm": 12.122360728780178, "learning_rate": 4.990164899522068e-06, "loss": 1.1873, "step": 811 }, { "epoch": 0.11495717420542224, "grad_norm": 9.03604874942323, "learning_rate": 4.990114040029224e-06, "loss": 1.2407, "step": 812 }, { "epoch": 0.11509874708005946, "grad_norm": 12.62337249236727, "learning_rate": 4.990063049632943e-06, "loss": 1.4276, "step": 813 }, { "epoch": 0.11524031995469668, "grad_norm": 11.715937162418976, "learning_rate": 4.9900119283359025e-06, "loss": 1.3116, "step": 814 }, { "epoch": 0.11538189282933389, "grad_norm": 12.160599820250695, "learning_rate": 4.989960676140793e-06, "loss": 1.256, "step": 815 }, { "epoch": 0.11552346570397112, "grad_norm": 12.433247054740718, "learning_rate": 4.989909293050307e-06, "loss": 1.2722, "step": 816 }, { "epoch": 0.11566503857860834, "grad_norm": 12.388275116074977, "learning_rate": 4.989857779067146e-06, "loss": 1.1793, "step": 817 }, { "epoch": 0.11580661145324556, "grad_norm": 10.282034336771058, "learning_rate": 4.989806134194018e-06, "loss": 1.2869, "step": 818 }, { "epoch": 0.11594818432788277, "grad_norm": 10.611625241733526, "learning_rate": 4.9897543584336376e-06, "loss": 1.2072, "step": 819 }, { "epoch": 0.11608975720252, "grad_norm": 12.86072645351379, "learning_rate": 4.989702451788727e-06, "loss": 1.3683, "step": 820 }, { "epoch": 0.11623133007715722, "grad_norm": 9.496103792801147, "learning_rate": 4.989650414262015e-06, "loss": 1.2605, "step": 821 }, { "epoch": 0.11637290295179444, "grad_norm": 9.500758492155747, "learning_rate": 4.989598245856238e-06, "loss": 1.1751, "step": 822 }, { "epoch": 0.11651447582643165, "grad_norm": 9.652652008432627, "learning_rate": 4.989545946574136e-06, "loss": 1.2603, "step": 823 }, { "epoch": 0.11665604870106888, "grad_norm": 9.574068140645268, "learning_rate": 4.989493516418461e-06, "loss": 1.3169, "step": 824 }, { "epoch": 0.1167976215757061, "grad_norm": 10.32779996651937, "learning_rate": 4.9894409553919675e-06, "loss": 1.4384, "step": 825 }, { "epoch": 0.11693919445034331, "grad_norm": 11.41761059351032, "learning_rate": 4.98938826349742e-06, "loss": 1.2575, "step": 826 }, { "epoch": 0.11708076732498053, "grad_norm": 9.426246249020231, "learning_rate": 4.989335440737587e-06, "loss": 1.2241, "step": 827 }, { "epoch": 0.11722234019961775, "grad_norm": 8.811693519862585, "learning_rate": 4.989282487115246e-06, "loss": 1.3909, "step": 828 }, { "epoch": 0.11736391307425498, "grad_norm": 11.893688393782579, "learning_rate": 4.98922940263318e-06, "loss": 1.4105, "step": 829 }, { "epoch": 0.1175054859488922, "grad_norm": 13.198747078358481, "learning_rate": 4.989176187294182e-06, "loss": 1.234, "step": 830 }, { "epoch": 0.11764705882352941, "grad_norm": 10.236313032151152, "learning_rate": 4.989122841101047e-06, "loss": 1.3469, "step": 831 }, { "epoch": 0.11778863169816663, "grad_norm": 9.846149648610467, "learning_rate": 4.98906936405658e-06, "loss": 1.4028, "step": 832 }, { "epoch": 0.11793020457280386, "grad_norm": 9.666874543508952, "learning_rate": 4.989015756163593e-06, "loss": 1.3119, "step": 833 }, { "epoch": 0.11807177744744107, "grad_norm": 8.694529372163739, "learning_rate": 4.988962017424903e-06, "loss": 1.219, "step": 834 }, { "epoch": 0.11821335032207829, "grad_norm": 13.844428947797919, "learning_rate": 4.988908147843336e-06, "loss": 1.4285, "step": 835 }, { "epoch": 0.1183549231967155, "grad_norm": 11.570931654390927, "learning_rate": 4.988854147421724e-06, "loss": 1.3598, "step": 836 }, { "epoch": 0.11849649607135272, "grad_norm": 11.310955806316343, "learning_rate": 4.988800016162904e-06, "loss": 1.3003, "step": 837 }, { "epoch": 0.11863806894598995, "grad_norm": 10.131961642668987, "learning_rate": 4.9887457540697235e-06, "loss": 1.4045, "step": 838 }, { "epoch": 0.11877964182062717, "grad_norm": 8.555945790736606, "learning_rate": 4.988691361145035e-06, "loss": 1.2107, "step": 839 }, { "epoch": 0.11892121469526439, "grad_norm": 9.39337107896046, "learning_rate": 4.988636837391696e-06, "loss": 1.2059, "step": 840 }, { "epoch": 0.1190627875699016, "grad_norm": 10.21443332409215, "learning_rate": 4.988582182812575e-06, "loss": 1.373, "step": 841 }, { "epoch": 0.11920436044453883, "grad_norm": 10.278460184989484, "learning_rate": 4.988527397410544e-06, "loss": 1.2551, "step": 842 }, { "epoch": 0.11934593331917605, "grad_norm": 11.009568994108497, "learning_rate": 4.988472481188484e-06, "loss": 1.3743, "step": 843 }, { "epoch": 0.11948750619381326, "grad_norm": 10.007876155510868, "learning_rate": 4.988417434149279e-06, "loss": 1.2295, "step": 844 }, { "epoch": 0.11962907906845048, "grad_norm": 9.884526160715799, "learning_rate": 4.988362256295827e-06, "loss": 1.3755, "step": 845 }, { "epoch": 0.11977065194308771, "grad_norm": 12.193057627062828, "learning_rate": 4.988306947631025e-06, "loss": 1.2931, "step": 846 }, { "epoch": 0.11991222481772493, "grad_norm": 9.959435778343531, "learning_rate": 4.988251508157784e-06, "loss": 1.3093, "step": 847 }, { "epoch": 0.12005379769236214, "grad_norm": 11.064625681300075, "learning_rate": 4.988195937879015e-06, "loss": 1.2372, "step": 848 }, { "epoch": 0.12019537056699936, "grad_norm": 12.893663822686744, "learning_rate": 4.988140236797642e-06, "loss": 1.3031, "step": 849 }, { "epoch": 0.12033694344163658, "grad_norm": 9.630521701454574, "learning_rate": 4.988084404916591e-06, "loss": 1.2984, "step": 850 }, { "epoch": 0.1204785163162738, "grad_norm": 14.039574411576838, "learning_rate": 4.988028442238798e-06, "loss": 1.2259, "step": 851 }, { "epoch": 0.12062008919091102, "grad_norm": 9.323164395640813, "learning_rate": 4.987972348767206e-06, "loss": 1.1984, "step": 852 }, { "epoch": 0.12076166206554824, "grad_norm": 10.366804815845803, "learning_rate": 4.987916124504761e-06, "loss": 1.2188, "step": 853 }, { "epoch": 0.12090323494018546, "grad_norm": 9.22366460630108, "learning_rate": 4.9878597694544215e-06, "loss": 1.3409, "step": 854 }, { "epoch": 0.12104480781482269, "grad_norm": 9.631894107299887, "learning_rate": 4.987803283619149e-06, "loss": 1.3144, "step": 855 }, { "epoch": 0.1211863806894599, "grad_norm": 11.162002918072831, "learning_rate": 4.987746667001913e-06, "loss": 1.2647, "step": 856 }, { "epoch": 0.12132795356409712, "grad_norm": 10.622072063255464, "learning_rate": 4.98768991960569e-06, "loss": 1.2267, "step": 857 }, { "epoch": 0.12146952643873433, "grad_norm": 12.450814486292465, "learning_rate": 4.987633041433462e-06, "loss": 1.3755, "step": 858 }, { "epoch": 0.12161109931337155, "grad_norm": 9.640923345515281, "learning_rate": 4.98757603248822e-06, "loss": 1.3951, "step": 859 }, { "epoch": 0.12175267218800878, "grad_norm": 9.304429211622901, "learning_rate": 4.987518892772961e-06, "loss": 1.3175, "step": 860 }, { "epoch": 0.121894245062646, "grad_norm": 9.10320239048913, "learning_rate": 4.987461622290688e-06, "loss": 1.0922, "step": 861 }, { "epoch": 0.12203581793728321, "grad_norm": 9.35309003880139, "learning_rate": 4.987404221044413e-06, "loss": 1.117, "step": 862 }, { "epoch": 0.12217739081192043, "grad_norm": 10.539008930089945, "learning_rate": 4.9873466890371525e-06, "loss": 1.3962, "step": 863 }, { "epoch": 0.12231896368655766, "grad_norm": 9.393142845039897, "learning_rate": 4.987289026271931e-06, "loss": 1.179, "step": 864 }, { "epoch": 0.12246053656119488, "grad_norm": 9.687496456022537, "learning_rate": 4.98723123275178e-06, "loss": 1.1578, "step": 865 }, { "epoch": 0.1226021094358321, "grad_norm": 9.863796397438321, "learning_rate": 4.987173308479738e-06, "loss": 1.2434, "step": 866 }, { "epoch": 0.12274368231046931, "grad_norm": 9.699942787237644, "learning_rate": 4.98711525345885e-06, "loss": 1.2898, "step": 867 }, { "epoch": 0.12288525518510654, "grad_norm": 9.641146505090418, "learning_rate": 4.987057067692167e-06, "loss": 1.2354, "step": 868 }, { "epoch": 0.12302682805974376, "grad_norm": 9.813425591422137, "learning_rate": 4.986998751182748e-06, "loss": 1.2553, "step": 869 }, { "epoch": 0.12316840093438097, "grad_norm": 9.61160574971168, "learning_rate": 4.98694030393366e-06, "loss": 1.2162, "step": 870 }, { "epoch": 0.12330997380901819, "grad_norm": 8.88884847419876, "learning_rate": 4.986881725947974e-06, "loss": 1.3355, "step": 871 }, { "epoch": 0.1234515466836554, "grad_norm": 9.340991783662021, "learning_rate": 4.98682301722877e-06, "loss": 1.2687, "step": 872 }, { "epoch": 0.12359311955829264, "grad_norm": 9.77086109626475, "learning_rate": 4.986764177779134e-06, "loss": 1.324, "step": 873 }, { "epoch": 0.12373469243292985, "grad_norm": 10.245255651602806, "learning_rate": 4.986705207602161e-06, "loss": 1.3855, "step": 874 }, { "epoch": 0.12387626530756707, "grad_norm": 9.856854728768349, "learning_rate": 4.986646106700948e-06, "loss": 1.3347, "step": 875 }, { "epoch": 0.12401783818220428, "grad_norm": 10.060275193198773, "learning_rate": 4.986586875078603e-06, "loss": 1.3933, "step": 876 }, { "epoch": 0.12415941105684151, "grad_norm": 13.237740657301039, "learning_rate": 4.98652751273824e-06, "loss": 1.2129, "step": 877 }, { "epoch": 0.12430098393147873, "grad_norm": 11.090935492448608, "learning_rate": 4.986468019682981e-06, "loss": 1.1293, "step": 878 }, { "epoch": 0.12444255680611595, "grad_norm": 10.686070396778337, "learning_rate": 4.98640839591595e-06, "loss": 1.2449, "step": 879 }, { "epoch": 0.12458412968075316, "grad_norm": 9.737898138523015, "learning_rate": 4.986348641440286e-06, "loss": 1.2085, "step": 880 }, { "epoch": 0.12472570255539038, "grad_norm": 13.091615489242114, "learning_rate": 4.986288756259126e-06, "loss": 1.3095, "step": 881 }, { "epoch": 0.12486727543002761, "grad_norm": 13.152900033714047, "learning_rate": 4.986228740375621e-06, "loss": 1.2481, "step": 882 }, { "epoch": 0.12500884830466483, "grad_norm": 9.150468791355037, "learning_rate": 4.986168593792924e-06, "loss": 1.2765, "step": 883 }, { "epoch": 0.12515042117930206, "grad_norm": 13.786857450285478, "learning_rate": 4.986108316514199e-06, "loss": 1.265, "step": 884 }, { "epoch": 0.12529199405393926, "grad_norm": 11.51609538345612, "learning_rate": 4.986047908542613e-06, "loss": 1.3738, "step": 885 }, { "epoch": 0.1254335669285765, "grad_norm": 13.158139199646188, "learning_rate": 4.9859873698813425e-06, "loss": 1.2417, "step": 886 }, { "epoch": 0.1255751398032137, "grad_norm": 10.411525912476609, "learning_rate": 4.985926700533569e-06, "loss": 1.2357, "step": 887 }, { "epoch": 0.12571671267785092, "grad_norm": 13.946331698575067, "learning_rate": 4.985865900502482e-06, "loss": 1.1827, "step": 888 }, { "epoch": 0.12585828555248815, "grad_norm": 11.037963207716464, "learning_rate": 4.985804969791278e-06, "loss": 1.2614, "step": 889 }, { "epoch": 0.12599985842712536, "grad_norm": 10.35800065119878, "learning_rate": 4.9857439084031614e-06, "loss": 1.1909, "step": 890 }, { "epoch": 0.12614143130176259, "grad_norm": 10.303456172879336, "learning_rate": 4.985682716341341e-06, "loss": 1.1585, "step": 891 }, { "epoch": 0.1262830041763998, "grad_norm": 13.56182662768892, "learning_rate": 4.985621393609032e-06, "loss": 1.4337, "step": 892 }, { "epoch": 0.12642457705103702, "grad_norm": 9.283352084001088, "learning_rate": 4.985559940209462e-06, "loss": 1.212, "step": 893 }, { "epoch": 0.12656614992567425, "grad_norm": 8.076624599992204, "learning_rate": 4.985498356145858e-06, "loss": 1.1064, "step": 894 }, { "epoch": 0.12670772280031145, "grad_norm": 9.677651007145963, "learning_rate": 4.985436641421458e-06, "loss": 1.1424, "step": 895 }, { "epoch": 0.12684929567494868, "grad_norm": 11.611176834853321, "learning_rate": 4.985374796039508e-06, "loss": 1.2865, "step": 896 }, { "epoch": 0.1269908685495859, "grad_norm": 10.29363249460957, "learning_rate": 4.985312820003258e-06, "loss": 1.3008, "step": 897 }, { "epoch": 0.1271324414242231, "grad_norm": 9.748497480381271, "learning_rate": 4.985250713315966e-06, "loss": 1.2101, "step": 898 }, { "epoch": 0.12727401429886034, "grad_norm": 10.286953813554673, "learning_rate": 4.985188475980898e-06, "loss": 1.2747, "step": 899 }, { "epoch": 0.12741558717349755, "grad_norm": 9.22796232633668, "learning_rate": 4.985126108001323e-06, "loss": 1.2587, "step": 900 }, { "epoch": 0.12755716004813478, "grad_norm": 8.677541610015748, "learning_rate": 4.985063609380522e-06, "loss": 1.2273, "step": 901 }, { "epoch": 0.127698732922772, "grad_norm": 9.27547884216577, "learning_rate": 4.985000980121782e-06, "loss": 1.1358, "step": 902 }, { "epoch": 0.1278403057974092, "grad_norm": 10.484293862041666, "learning_rate": 4.984938220228391e-06, "loss": 1.2561, "step": 903 }, { "epoch": 0.12798187867204644, "grad_norm": 9.860092693148502, "learning_rate": 4.9848753297036515e-06, "loss": 1.0612, "step": 904 }, { "epoch": 0.12812345154668364, "grad_norm": 9.24912072203944, "learning_rate": 4.984812308550869e-06, "loss": 1.1157, "step": 905 }, { "epoch": 0.12826502442132087, "grad_norm": 9.869026405372589, "learning_rate": 4.984749156773355e-06, "loss": 1.2703, "step": 906 }, { "epoch": 0.1284065972959581, "grad_norm": 9.62620635956622, "learning_rate": 4.984685874374432e-06, "loss": 1.196, "step": 907 }, { "epoch": 0.1285481701705953, "grad_norm": 11.46243852538552, "learning_rate": 4.984622461357425e-06, "loss": 1.3458, "step": 908 }, { "epoch": 0.12868974304523254, "grad_norm": 12.733053876063753, "learning_rate": 4.984558917725667e-06, "loss": 1.1781, "step": 909 }, { "epoch": 0.12883131591986977, "grad_norm": 9.367206640279653, "learning_rate": 4.9844952434825e-06, "loss": 1.2037, "step": 910 }, { "epoch": 0.12897288879450697, "grad_norm": 11.148543915377715, "learning_rate": 4.98443143863127e-06, "loss": 1.29, "step": 911 }, { "epoch": 0.1291144616691442, "grad_norm": 12.04810292359827, "learning_rate": 4.984367503175332e-06, "loss": 1.3516, "step": 912 }, { "epoch": 0.1292560345437814, "grad_norm": 10.698945993579702, "learning_rate": 4.984303437118047e-06, "loss": 1.2887, "step": 913 }, { "epoch": 0.12939760741841863, "grad_norm": 9.330585302247632, "learning_rate": 4.984239240462783e-06, "loss": 1.3955, "step": 914 }, { "epoch": 0.12953918029305586, "grad_norm": 8.97436985364541, "learning_rate": 4.984174913212913e-06, "loss": 1.2522, "step": 915 }, { "epoch": 0.12968075316769306, "grad_norm": 10.496789350554858, "learning_rate": 4.984110455371822e-06, "loss": 1.3541, "step": 916 }, { "epoch": 0.1298223260423303, "grad_norm": 10.226981391225118, "learning_rate": 4.984045866942895e-06, "loss": 1.2695, "step": 917 }, { "epoch": 0.1299638989169675, "grad_norm": 9.84378875240388, "learning_rate": 4.98398114792953e-06, "loss": 1.0601, "step": 918 }, { "epoch": 0.13010547179160473, "grad_norm": 10.456550436041724, "learning_rate": 4.983916298335127e-06, "loss": 1.2075, "step": 919 }, { "epoch": 0.13024704466624196, "grad_norm": 10.216145874822097, "learning_rate": 4.9838513181630975e-06, "loss": 1.1839, "step": 920 }, { "epoch": 0.13038861754087916, "grad_norm": 11.32493826405117, "learning_rate": 4.983786207416856e-06, "loss": 1.2724, "step": 921 }, { "epoch": 0.1305301904155164, "grad_norm": 9.945110357946676, "learning_rate": 4.983720966099826e-06, "loss": 1.2601, "step": 922 }, { "epoch": 0.13067176329015362, "grad_norm": 9.710858541890385, "learning_rate": 4.983655594215436e-06, "loss": 1.2131, "step": 923 }, { "epoch": 0.13081333616479082, "grad_norm": 8.008635152103741, "learning_rate": 4.983590091767123e-06, "loss": 1.1776, "step": 924 }, { "epoch": 0.13095490903942805, "grad_norm": 10.023380032595394, "learning_rate": 4.983524458758331e-06, "loss": 1.3792, "step": 925 }, { "epoch": 0.13109648191406525, "grad_norm": 9.962072545625075, "learning_rate": 4.98345869519251e-06, "loss": 1.1989, "step": 926 }, { "epoch": 0.13123805478870249, "grad_norm": 9.69521452848868, "learning_rate": 4.9833928010731185e-06, "loss": 1.2764, "step": 927 }, { "epoch": 0.13137962766333972, "grad_norm": 10.208736821718796, "learning_rate": 4.983326776403618e-06, "loss": 1.2693, "step": 928 }, { "epoch": 0.13152120053797692, "grad_norm": 10.354373881200141, "learning_rate": 4.983260621187479e-06, "loss": 1.0645, "step": 929 }, { "epoch": 0.13166277341261415, "grad_norm": 9.89855465597598, "learning_rate": 4.983194335428183e-06, "loss": 1.2537, "step": 930 }, { "epoch": 0.13180434628725135, "grad_norm": 10.520948312464741, "learning_rate": 4.9831279191292114e-06, "loss": 1.2519, "step": 931 }, { "epoch": 0.13194591916188858, "grad_norm": 9.743025804073786, "learning_rate": 4.983061372294057e-06, "loss": 1.2859, "step": 932 }, { "epoch": 0.1320874920365258, "grad_norm": 11.42390142121511, "learning_rate": 4.982994694926217e-06, "loss": 1.4336, "step": 933 }, { "epoch": 0.132229064911163, "grad_norm": 11.158629946271722, "learning_rate": 4.9829278870291975e-06, "loss": 1.2017, "step": 934 }, { "epoch": 0.13237063778580024, "grad_norm": 8.080495224685915, "learning_rate": 4.982860948606511e-06, "loss": 1.2593, "step": 935 }, { "epoch": 0.13251221066043747, "grad_norm": 10.912403196274706, "learning_rate": 4.9827938796616745e-06, "loss": 1.3289, "step": 936 }, { "epoch": 0.13265378353507468, "grad_norm": 9.764069407352734, "learning_rate": 4.982726680198217e-06, "loss": 1.3231, "step": 937 }, { "epoch": 0.1327953564097119, "grad_norm": 10.101199209196261, "learning_rate": 4.982659350219668e-06, "loss": 1.3159, "step": 938 }, { "epoch": 0.1329369292843491, "grad_norm": 10.672006542015072, "learning_rate": 4.982591889729567e-06, "loss": 1.169, "step": 939 }, { "epoch": 0.13307850215898634, "grad_norm": 9.120843227956769, "learning_rate": 4.982524298731463e-06, "loss": 1.1656, "step": 940 }, { "epoch": 0.13322007503362357, "grad_norm": 9.460401625384316, "learning_rate": 4.982456577228907e-06, "loss": 1.3018, "step": 941 }, { "epoch": 0.13336164790826077, "grad_norm": 12.096671764309285, "learning_rate": 4.98238872522546e-06, "loss": 1.2405, "step": 942 }, { "epoch": 0.133503220782898, "grad_norm": 9.684574989993955, "learning_rate": 4.982320742724688e-06, "loss": 1.4, "step": 943 }, { "epoch": 0.1336447936575352, "grad_norm": 10.044339870182329, "learning_rate": 4.982252629730167e-06, "loss": 1.2976, "step": 944 }, { "epoch": 0.13378636653217243, "grad_norm": 12.59186337256561, "learning_rate": 4.982184386245475e-06, "loss": 1.2846, "step": 945 }, { "epoch": 0.13392793940680967, "grad_norm": 10.831577295640086, "learning_rate": 4.9821160122742e-06, "loss": 1.2872, "step": 946 }, { "epoch": 0.13406951228144687, "grad_norm": 12.1910714418541, "learning_rate": 4.982047507819938e-06, "loss": 1.2177, "step": 947 }, { "epoch": 0.1342110851560841, "grad_norm": 10.423059399059765, "learning_rate": 4.981978872886288e-06, "loss": 1.3024, "step": 948 }, { "epoch": 0.1343526580307213, "grad_norm": 8.532410259786422, "learning_rate": 4.981910107476861e-06, "loss": 1.1813, "step": 949 }, { "epoch": 0.13449423090535853, "grad_norm": 9.839911593528523, "learning_rate": 4.9818412115952685e-06, "loss": 1.1695, "step": 950 }, { "epoch": 0.13463580377999576, "grad_norm": 9.798021654013606, "learning_rate": 4.981772185245135e-06, "loss": 1.0792, "step": 951 }, { "epoch": 0.13477737665463296, "grad_norm": 10.240092257441725, "learning_rate": 4.981703028430088e-06, "loss": 1.3533, "step": 952 }, { "epoch": 0.1349189495292702, "grad_norm": 10.382752980218791, "learning_rate": 4.981633741153764e-06, "loss": 1.2852, "step": 953 }, { "epoch": 0.13506052240390742, "grad_norm": 10.58399068237725, "learning_rate": 4.981564323419804e-06, "loss": 1.2882, "step": 954 }, { "epoch": 0.13520209527854463, "grad_norm": 9.692757441201387, "learning_rate": 4.981494775231857e-06, "loss": 1.1782, "step": 955 }, { "epoch": 0.13534366815318186, "grad_norm": 9.609347211595999, "learning_rate": 4.981425096593582e-06, "loss": 1.2843, "step": 956 }, { "epoch": 0.13548524102781906, "grad_norm": 8.149000197793152, "learning_rate": 4.981355287508638e-06, "loss": 1.2553, "step": 957 }, { "epoch": 0.1356268139024563, "grad_norm": 8.313568734897384, "learning_rate": 4.981285347980698e-06, "loss": 1.2092, "step": 958 }, { "epoch": 0.13576838677709352, "grad_norm": 8.339597204347564, "learning_rate": 4.981215278013436e-06, "loss": 1.3148, "step": 959 }, { "epoch": 0.13590995965173072, "grad_norm": 9.609702103172541, "learning_rate": 4.981145077610538e-06, "loss": 1.0769, "step": 960 }, { "epoch": 0.13605153252636795, "grad_norm": 14.479204722109762, "learning_rate": 4.981074746775693e-06, "loss": 1.3441, "step": 961 }, { "epoch": 0.13619310540100515, "grad_norm": 9.853655188246018, "learning_rate": 4.9810042855125985e-06, "loss": 1.1877, "step": 962 }, { "epoch": 0.13633467827564238, "grad_norm": 9.011027044595833, "learning_rate": 4.980933693824959e-06, "loss": 1.3406, "step": 963 }, { "epoch": 0.13647625115027961, "grad_norm": 11.753258435953184, "learning_rate": 4.9808629717164845e-06, "loss": 1.2891, "step": 964 }, { "epoch": 0.13661782402491682, "grad_norm": 12.88706745417123, "learning_rate": 4.980792119190894e-06, "loss": 1.2105, "step": 965 }, { "epoch": 0.13675939689955405, "grad_norm": 9.072478595118685, "learning_rate": 4.98072113625191e-06, "loss": 1.1945, "step": 966 }, { "epoch": 0.13690096977419128, "grad_norm": 10.921994799392152, "learning_rate": 4.980650022903267e-06, "loss": 1.2265, "step": 967 }, { "epoch": 0.13704254264882848, "grad_norm": 10.008569478836389, "learning_rate": 4.980578779148702e-06, "loss": 1.2405, "step": 968 }, { "epoch": 0.1371841155234657, "grad_norm": 9.818273315902431, "learning_rate": 4.98050740499196e-06, "loss": 1.2713, "step": 969 }, { "epoch": 0.1373256883981029, "grad_norm": 10.32670733580719, "learning_rate": 4.980435900436793e-06, "loss": 1.1327, "step": 970 }, { "epoch": 0.13746726127274014, "grad_norm": 8.954501966538954, "learning_rate": 4.98036426548696e-06, "loss": 1.2368, "step": 971 }, { "epoch": 0.13760883414737737, "grad_norm": 9.146671844718217, "learning_rate": 4.980292500146227e-06, "loss": 1.4908, "step": 972 }, { "epoch": 0.13775040702201458, "grad_norm": 10.228739577400784, "learning_rate": 4.980220604418367e-06, "loss": 1.5053, "step": 973 }, { "epoch": 0.1378919798966518, "grad_norm": 11.387256097955719, "learning_rate": 4.980148578307159e-06, "loss": 1.2802, "step": 974 }, { "epoch": 0.138033552771289, "grad_norm": 9.255153148503059, "learning_rate": 4.98007642181639e-06, "loss": 1.2442, "step": 975 }, { "epoch": 0.13817512564592624, "grad_norm": 14.06581720857758, "learning_rate": 4.980004134949853e-06, "loss": 1.3981, "step": 976 }, { "epoch": 0.13831669852056347, "grad_norm": 10.375969898894919, "learning_rate": 4.979931717711347e-06, "loss": 1.1488, "step": 977 }, { "epoch": 0.13845827139520067, "grad_norm": 10.610052028172499, "learning_rate": 4.979859170104679e-06, "loss": 1.312, "step": 978 }, { "epoch": 0.1385998442698379, "grad_norm": 9.372217604686195, "learning_rate": 4.979786492133665e-06, "loss": 1.2072, "step": 979 }, { "epoch": 0.13874141714447513, "grad_norm": 10.23066301660246, "learning_rate": 4.979713683802123e-06, "loss": 1.2884, "step": 980 }, { "epoch": 0.13888299001911233, "grad_norm": 12.007806781675606, "learning_rate": 4.979640745113883e-06, "loss": 1.3682, "step": 981 }, { "epoch": 0.13902456289374956, "grad_norm": 9.898417845407677, "learning_rate": 4.979567676072776e-06, "loss": 1.3005, "step": 982 }, { "epoch": 0.13916613576838677, "grad_norm": 8.598694422781122, "learning_rate": 4.979494476682647e-06, "loss": 1.2515, "step": 983 }, { "epoch": 0.139307708643024, "grad_norm": 8.886125066020893, "learning_rate": 4.979421146947341e-06, "loss": 1.245, "step": 984 }, { "epoch": 0.13944928151766123, "grad_norm": 11.682830367279369, "learning_rate": 4.979347686870714e-06, "loss": 1.4003, "step": 985 }, { "epoch": 0.13959085439229843, "grad_norm": 8.49134071430801, "learning_rate": 4.979274096456629e-06, "loss": 1.0968, "step": 986 }, { "epoch": 0.13973242726693566, "grad_norm": 9.751484684510666, "learning_rate": 4.979200375708951e-06, "loss": 1.3729, "step": 987 }, { "epoch": 0.13987400014157286, "grad_norm": 10.784714471099573, "learning_rate": 4.97912652463156e-06, "loss": 1.4794, "step": 988 }, { "epoch": 0.1400155730162101, "grad_norm": 13.478171326070976, "learning_rate": 4.979052543228335e-06, "loss": 1.2457, "step": 989 }, { "epoch": 0.14015714589084732, "grad_norm": 9.97351496070401, "learning_rate": 4.978978431503167e-06, "loss": 1.2258, "step": 990 }, { "epoch": 0.14029871876548453, "grad_norm": 9.772612734359308, "learning_rate": 4.978904189459951e-06, "loss": 1.352, "step": 991 }, { "epoch": 0.14044029164012176, "grad_norm": 10.70545851615033, "learning_rate": 4.97882981710259e-06, "loss": 1.2332, "step": 992 }, { "epoch": 0.14058186451475896, "grad_norm": 10.339534580521352, "learning_rate": 4.978755314434994e-06, "loss": 1.4282, "step": 993 }, { "epoch": 0.1407234373893962, "grad_norm": 9.385279601885413, "learning_rate": 4.978680681461079e-06, "loss": 1.1481, "step": 994 }, { "epoch": 0.14086501026403342, "grad_norm": 7.947048661482985, "learning_rate": 4.978605918184769e-06, "loss": 1.2718, "step": 995 }, { "epoch": 0.14100658313867062, "grad_norm": 11.18787711589431, "learning_rate": 4.978531024609994e-06, "loss": 1.3802, "step": 996 }, { "epoch": 0.14114815601330785, "grad_norm": 9.505756842407271, "learning_rate": 4.978456000740691e-06, "loss": 1.231, "step": 997 }, { "epoch": 0.14128972888794508, "grad_norm": 9.401974945379038, "learning_rate": 4.9783808465808035e-06, "loss": 1.2095, "step": 998 }, { "epoch": 0.14143130176258228, "grad_norm": 12.053041851379001, "learning_rate": 4.978305562134284e-06, "loss": 1.3796, "step": 999 }, { "epoch": 0.14157287463721951, "grad_norm": 10.032367964863623, "learning_rate": 4.978230147405089e-06, "loss": 1.1996, "step": 1000 }, { "epoch": 0.14171444751185672, "grad_norm": 9.690192654390273, "learning_rate": 4.978154602397182e-06, "loss": 1.3767, "step": 1001 }, { "epoch": 0.14185602038649395, "grad_norm": 10.938877912646843, "learning_rate": 4.978078927114536e-06, "loss": 1.2107, "step": 1002 }, { "epoch": 0.14199759326113118, "grad_norm": 10.8489520432626, "learning_rate": 4.978003121561128e-06, "loss": 1.1212, "step": 1003 }, { "epoch": 0.14213916613576838, "grad_norm": 9.421167653925862, "learning_rate": 4.977927185740944e-06, "loss": 1.3405, "step": 1004 }, { "epoch": 0.1422807390104056, "grad_norm": 12.389221650707182, "learning_rate": 4.977851119657976e-06, "loss": 1.1891, "step": 1005 }, { "epoch": 0.1424223118850428, "grad_norm": 8.584698269162324, "learning_rate": 4.977774923316221e-06, "loss": 1.258, "step": 1006 }, { "epoch": 0.14256388475968004, "grad_norm": 9.900938710107672, "learning_rate": 4.977698596719686e-06, "loss": 1.2271, "step": 1007 }, { "epoch": 0.14270545763431727, "grad_norm": 9.194125115828466, "learning_rate": 4.977622139872384e-06, "loss": 1.2378, "step": 1008 }, { "epoch": 0.14284703050895448, "grad_norm": 9.243828854279325, "learning_rate": 4.977545552778333e-06, "loss": 1.2695, "step": 1009 }, { "epoch": 0.1429886033835917, "grad_norm": 10.364152131943186, "learning_rate": 4.97746883544156e-06, "loss": 1.2519, "step": 1010 }, { "epoch": 0.14313017625822894, "grad_norm": 9.65213231063944, "learning_rate": 4.977391987866097e-06, "loss": 1.2549, "step": 1011 }, { "epoch": 0.14327174913286614, "grad_norm": 10.29928204154489, "learning_rate": 4.9773150100559844e-06, "loss": 1.1967, "step": 1012 }, { "epoch": 0.14341332200750337, "grad_norm": 9.579577785418051, "learning_rate": 4.9772379020152695e-06, "loss": 1.2822, "step": 1013 }, { "epoch": 0.14355489488214057, "grad_norm": 10.319877436301894, "learning_rate": 4.977160663748005e-06, "loss": 1.2098, "step": 1014 }, { "epoch": 0.1436964677567778, "grad_norm": 11.054258218255052, "learning_rate": 4.977083295258251e-06, "loss": 1.204, "step": 1015 }, { "epoch": 0.14383804063141503, "grad_norm": 10.260982677255122, "learning_rate": 4.977005796550076e-06, "loss": 1.3268, "step": 1016 }, { "epoch": 0.14397961350605223, "grad_norm": 9.912076470080851, "learning_rate": 4.976928167627553e-06, "loss": 1.3478, "step": 1017 }, { "epoch": 0.14412118638068946, "grad_norm": 11.222569232607302, "learning_rate": 4.976850408494762e-06, "loss": 1.273, "step": 1018 }, { "epoch": 0.14426275925532667, "grad_norm": 10.955048352774217, "learning_rate": 4.976772519155793e-06, "loss": 1.3197, "step": 1019 }, { "epoch": 0.1444043321299639, "grad_norm": 10.502113129651017, "learning_rate": 4.976694499614739e-06, "loss": 1.2443, "step": 1020 }, { "epoch": 0.14454590500460113, "grad_norm": 8.996170924820946, "learning_rate": 4.976616349875702e-06, "loss": 1.0897, "step": 1021 }, { "epoch": 0.14468747787923833, "grad_norm": 11.193761000635822, "learning_rate": 4.9765380699427905e-06, "loss": 1.2348, "step": 1022 }, { "epoch": 0.14482905075387556, "grad_norm": 11.199313783059468, "learning_rate": 4.9764596598201185e-06, "loss": 1.3324, "step": 1023 }, { "epoch": 0.1449706236285128, "grad_norm": 10.74581082358831, "learning_rate": 4.97638111951181e-06, "loss": 1.2942, "step": 1024 }, { "epoch": 0.14511219650315, "grad_norm": 13.190785075691512, "learning_rate": 4.976302449021991e-06, "loss": 1.5125, "step": 1025 }, { "epoch": 0.14525376937778722, "grad_norm": 9.399213116764374, "learning_rate": 4.9762236483547985e-06, "loss": 1.235, "step": 1026 }, { "epoch": 0.14539534225242443, "grad_norm": 10.026511716186453, "learning_rate": 4.976144717514376e-06, "loss": 1.4014, "step": 1027 }, { "epoch": 0.14553691512706166, "grad_norm": 11.26317892621465, "learning_rate": 4.976065656504873e-06, "loss": 1.2709, "step": 1028 }, { "epoch": 0.14567848800169889, "grad_norm": 10.621402625152006, "learning_rate": 4.975986465330443e-06, "loss": 1.1528, "step": 1029 }, { "epoch": 0.1458200608763361, "grad_norm": 9.548556665130445, "learning_rate": 4.975907143995251e-06, "loss": 1.2254, "step": 1030 }, { "epoch": 0.14596163375097332, "grad_norm": 11.137596628302946, "learning_rate": 4.975827692503467e-06, "loss": 1.4465, "step": 1031 }, { "epoch": 0.14610320662561052, "grad_norm": 8.027069072247043, "learning_rate": 4.975748110859267e-06, "loss": 1.111, "step": 1032 }, { "epoch": 0.14624477950024775, "grad_norm": 9.18949027788717, "learning_rate": 4.975668399066835e-06, "loss": 1.1043, "step": 1033 }, { "epoch": 0.14638635237488498, "grad_norm": 8.641405066929899, "learning_rate": 4.975588557130361e-06, "loss": 1.1521, "step": 1034 }, { "epoch": 0.14652792524952218, "grad_norm": 9.636625724748612, "learning_rate": 4.9755085850540426e-06, "loss": 1.1353, "step": 1035 }, { "epoch": 0.14666949812415941, "grad_norm": 9.914121370614218, "learning_rate": 4.975428482842083e-06, "loss": 1.3619, "step": 1036 }, { "epoch": 0.14681107099879662, "grad_norm": 9.02955141560697, "learning_rate": 4.975348250498695e-06, "loss": 1.2077, "step": 1037 }, { "epoch": 0.14695264387343385, "grad_norm": 9.71825228578855, "learning_rate": 4.975267888028094e-06, "loss": 1.3664, "step": 1038 }, { "epoch": 0.14709421674807108, "grad_norm": 11.8486079818854, "learning_rate": 4.975187395434506e-06, "loss": 1.356, "step": 1039 }, { "epoch": 0.14723578962270828, "grad_norm": 8.563661816707704, "learning_rate": 4.975106772722164e-06, "loss": 1.1979, "step": 1040 }, { "epoch": 0.1473773624973455, "grad_norm": 9.909588849789632, "learning_rate": 4.975026019895302e-06, "loss": 1.1584, "step": 1041 }, { "epoch": 0.14751893537198274, "grad_norm": 10.161747310302372, "learning_rate": 4.9749451369581694e-06, "loss": 1.1869, "step": 1042 }, { "epoch": 0.14766050824661994, "grad_norm": 8.826317742280487, "learning_rate": 4.974864123915015e-06, "loss": 1.136, "step": 1043 }, { "epoch": 0.14780208112125717, "grad_norm": 9.672518657903948, "learning_rate": 4.9747829807701e-06, "loss": 1.2573, "step": 1044 }, { "epoch": 0.14794365399589438, "grad_norm": 9.386607804041592, "learning_rate": 4.974701707527688e-06, "loss": 1.1112, "step": 1045 }, { "epoch": 0.1480852268705316, "grad_norm": 10.271826973928455, "learning_rate": 4.9746203041920534e-06, "loss": 1.3302, "step": 1046 }, { "epoch": 0.14822679974516884, "grad_norm": 9.601073809013908, "learning_rate": 4.974538770767474e-06, "loss": 1.3073, "step": 1047 }, { "epoch": 0.14836837261980604, "grad_norm": 11.205952520299514, "learning_rate": 4.9744571072582365e-06, "loss": 1.1705, "step": 1048 }, { "epoch": 0.14850994549444327, "grad_norm": 9.909120739405944, "learning_rate": 4.974375313668633e-06, "loss": 1.2375, "step": 1049 }, { "epoch": 0.14865151836908047, "grad_norm": 12.621850555690925, "learning_rate": 4.974293390002966e-06, "loss": 1.353, "step": 1050 }, { "epoch": 0.1487930912437177, "grad_norm": 10.61130206173175, "learning_rate": 4.97421133626554e-06, "loss": 1.3061, "step": 1051 }, { "epoch": 0.14893466411835493, "grad_norm": 8.966663503350855, "learning_rate": 4.9741291524606684e-06, "loss": 1.272, "step": 1052 }, { "epoch": 0.14907623699299213, "grad_norm": 9.644694206804767, "learning_rate": 4.974046838592672e-06, "loss": 1.0321, "step": 1053 }, { "epoch": 0.14921780986762936, "grad_norm": 11.516814171766088, "learning_rate": 4.973964394665878e-06, "loss": 1.248, "step": 1054 }, { "epoch": 0.1493593827422666, "grad_norm": 10.466768265087321, "learning_rate": 4.973881820684621e-06, "loss": 1.2177, "step": 1055 }, { "epoch": 0.1495009556169038, "grad_norm": 10.05453156748259, "learning_rate": 4.973799116653241e-06, "loss": 1.3453, "step": 1056 }, { "epoch": 0.14964252849154103, "grad_norm": 12.920453769521856, "learning_rate": 4.973716282576086e-06, "loss": 1.3403, "step": 1057 }, { "epoch": 0.14978410136617823, "grad_norm": 8.703592229659849, "learning_rate": 4.9736333184575105e-06, "loss": 1.2636, "step": 1058 }, { "epoch": 0.14992567424081546, "grad_norm": 9.912430913811896, "learning_rate": 4.973550224301875e-06, "loss": 1.3757, "step": 1059 }, { "epoch": 0.1500672471154527, "grad_norm": 10.587869187801116, "learning_rate": 4.9734670001135495e-06, "loss": 1.4526, "step": 1060 }, { "epoch": 0.1502088199900899, "grad_norm": 10.058553170427853, "learning_rate": 4.973383645896908e-06, "loss": 1.1804, "step": 1061 }, { "epoch": 0.15035039286472712, "grad_norm": 8.936841393593056, "learning_rate": 4.973300161656332e-06, "loss": 1.2143, "step": 1062 }, { "epoch": 0.15049196573936433, "grad_norm": 9.52317472976285, "learning_rate": 4.973216547396212e-06, "loss": 1.3541, "step": 1063 }, { "epoch": 0.15063353861400156, "grad_norm": 10.545610094866452, "learning_rate": 4.9731328031209414e-06, "loss": 1.2403, "step": 1064 }, { "epoch": 0.15077511148863879, "grad_norm": 11.315132635656553, "learning_rate": 4.973048928834923e-06, "loss": 1.3415, "step": 1065 }, { "epoch": 0.150916684363276, "grad_norm": 8.92502304629815, "learning_rate": 4.972964924542567e-06, "loss": 1.3098, "step": 1066 }, { "epoch": 0.15105825723791322, "grad_norm": 9.566516307043525, "learning_rate": 4.9728807902482885e-06, "loss": 1.0825, "step": 1067 }, { "epoch": 0.15119983011255045, "grad_norm": 10.752481573355592, "learning_rate": 4.97279652595651e-06, "loss": 1.3519, "step": 1068 }, { "epoch": 0.15134140298718765, "grad_norm": 10.18430020684411, "learning_rate": 4.972712131671663e-06, "loss": 1.3068, "step": 1069 }, { "epoch": 0.15148297586182488, "grad_norm": 13.18840884290509, "learning_rate": 4.972627607398183e-06, "loss": 1.3834, "step": 1070 }, { "epoch": 0.15162454873646208, "grad_norm": 10.589501893573837, "learning_rate": 4.972542953140513e-06, "loss": 1.2907, "step": 1071 }, { "epoch": 0.1517661216110993, "grad_norm": 9.480327113177665, "learning_rate": 4.972458168903104e-06, "loss": 1.2222, "step": 1072 }, { "epoch": 0.15190769448573654, "grad_norm": 10.610387470098583, "learning_rate": 4.972373254690411e-06, "loss": 1.1963, "step": 1073 }, { "epoch": 0.15204926736037375, "grad_norm": 10.53272307193437, "learning_rate": 4.972288210506902e-06, "loss": 1.1032, "step": 1074 }, { "epoch": 0.15219084023501098, "grad_norm": 9.625512443871697, "learning_rate": 4.972203036357043e-06, "loss": 1.1651, "step": 1075 }, { "epoch": 0.15233241310964818, "grad_norm": 10.73619790403336, "learning_rate": 4.972117732245314e-06, "loss": 1.2916, "step": 1076 }, { "epoch": 0.1524739859842854, "grad_norm": 15.489621533276422, "learning_rate": 4.972032298176201e-06, "loss": 1.2633, "step": 1077 }, { "epoch": 0.15261555885892264, "grad_norm": 10.176473255020928, "learning_rate": 4.9719467341541914e-06, "loss": 1.2702, "step": 1078 }, { "epoch": 0.15275713173355984, "grad_norm": 11.038539304436341, "learning_rate": 4.971861040183785e-06, "loss": 1.2949, "step": 1079 }, { "epoch": 0.15289870460819707, "grad_norm": 11.3461716434185, "learning_rate": 4.971775216269488e-06, "loss": 1.2135, "step": 1080 }, { "epoch": 0.1530402774828343, "grad_norm": 9.093939605519058, "learning_rate": 4.971689262415811e-06, "loss": 1.2693, "step": 1081 }, { "epoch": 0.1531818503574715, "grad_norm": 10.2059469950094, "learning_rate": 4.971603178627271e-06, "loss": 1.2226, "step": 1082 }, { "epoch": 0.15332342323210874, "grad_norm": 9.543550752309859, "learning_rate": 4.971516964908396e-06, "loss": 1.1771, "step": 1083 }, { "epoch": 0.15346499610674594, "grad_norm": 9.696403497363928, "learning_rate": 4.9714306212637165e-06, "loss": 1.295, "step": 1084 }, { "epoch": 0.15360656898138317, "grad_norm": 8.94437597444705, "learning_rate": 4.971344147697772e-06, "loss": 1.2893, "step": 1085 }, { "epoch": 0.1537481418560204, "grad_norm": 9.642406626677761, "learning_rate": 4.9712575442151086e-06, "loss": 1.3165, "step": 1086 }, { "epoch": 0.1538897147306576, "grad_norm": 10.264490073536365, "learning_rate": 4.971170810820279e-06, "loss": 1.1975, "step": 1087 }, { "epoch": 0.15403128760529483, "grad_norm": 8.351316757083785, "learning_rate": 4.971083947517842e-06, "loss": 1.1798, "step": 1088 }, { "epoch": 0.15417286047993203, "grad_norm": 10.392966283450809, "learning_rate": 4.970996954312365e-06, "loss": 1.2803, "step": 1089 }, { "epoch": 0.15431443335456926, "grad_norm": 8.731950727127161, "learning_rate": 4.97090983120842e-06, "loss": 1.2011, "step": 1090 }, { "epoch": 0.1544560062292065, "grad_norm": 10.716300484277955, "learning_rate": 4.970822578210587e-06, "loss": 1.3721, "step": 1091 }, { "epoch": 0.1545975791038437, "grad_norm": 9.829328163138713, "learning_rate": 4.970735195323454e-06, "loss": 1.1171, "step": 1092 }, { "epoch": 0.15473915197848093, "grad_norm": 8.04673515124229, "learning_rate": 4.970647682551614e-06, "loss": 1.2012, "step": 1093 }, { "epoch": 0.15488072485311813, "grad_norm": 10.67030387695965, "learning_rate": 4.970560039899668e-06, "loss": 1.3021, "step": 1094 }, { "epoch": 0.15502229772775536, "grad_norm": 8.409904961701038, "learning_rate": 4.970472267372223e-06, "loss": 1.2785, "step": 1095 }, { "epoch": 0.1551638706023926, "grad_norm": 10.609831989766578, "learning_rate": 4.9703843649738926e-06, "loss": 1.2139, "step": 1096 }, { "epoch": 0.1553054434770298, "grad_norm": 9.583174950216907, "learning_rate": 4.970296332709298e-06, "loss": 1.0362, "step": 1097 }, { "epoch": 0.15544701635166702, "grad_norm": 9.882524158960676, "learning_rate": 4.970208170583066e-06, "loss": 1.227, "step": 1098 }, { "epoch": 0.15558858922630425, "grad_norm": 10.518599249699612, "learning_rate": 4.9701198785998335e-06, "loss": 1.2179, "step": 1099 }, { "epoch": 0.15573016210094145, "grad_norm": 8.181579779031924, "learning_rate": 4.970031456764242e-06, "loss": 1.1641, "step": 1100 }, { "epoch": 0.15587173497557869, "grad_norm": 10.773685709726855, "learning_rate": 4.969942905080936e-06, "loss": 1.2994, "step": 1101 }, { "epoch": 0.1560133078502159, "grad_norm": 10.994005824198021, "learning_rate": 4.969854223554575e-06, "loss": 1.337, "step": 1102 }, { "epoch": 0.15615488072485312, "grad_norm": 13.681765953432041, "learning_rate": 4.969765412189819e-06, "loss": 1.4118, "step": 1103 }, { "epoch": 0.15629645359949035, "grad_norm": 10.164411534408684, "learning_rate": 4.969676470991336e-06, "loss": 1.2416, "step": 1104 }, { "epoch": 0.15643802647412755, "grad_norm": 14.010316589829996, "learning_rate": 4.969587399963802e-06, "loss": 1.1703, "step": 1105 }, { "epoch": 0.15657959934876478, "grad_norm": 10.218206455345404, "learning_rate": 4.969498199111901e-06, "loss": 1.0592, "step": 1106 }, { "epoch": 0.15672117222340198, "grad_norm": 9.843678695178253, "learning_rate": 4.9694088684403205e-06, "loss": 1.3575, "step": 1107 }, { "epoch": 0.1568627450980392, "grad_norm": 10.502248614089671, "learning_rate": 4.969319407953756e-06, "loss": 1.2304, "step": 1108 }, { "epoch": 0.15700431797267644, "grad_norm": 9.717684279515757, "learning_rate": 4.969229817656913e-06, "loss": 1.2115, "step": 1109 }, { "epoch": 0.15714589084731365, "grad_norm": 9.652451641571227, "learning_rate": 4.969140097554499e-06, "loss": 1.2541, "step": 1110 }, { "epoch": 0.15728746372195088, "grad_norm": 11.014271927648755, "learning_rate": 4.969050247651231e-06, "loss": 1.1153, "step": 1111 }, { "epoch": 0.1574290365965881, "grad_norm": 8.742487707088513, "learning_rate": 4.968960267951833e-06, "loss": 1.2253, "step": 1112 }, { "epoch": 0.1575706094712253, "grad_norm": 8.573053024400332, "learning_rate": 4.9688701584610345e-06, "loss": 1.2367, "step": 1113 }, { "epoch": 0.15771218234586254, "grad_norm": 8.965939815893622, "learning_rate": 4.968779919183573e-06, "loss": 1.3743, "step": 1114 }, { "epoch": 0.15785375522049974, "grad_norm": 8.838425182383446, "learning_rate": 4.96868955012419e-06, "loss": 1.3144, "step": 1115 }, { "epoch": 0.15799532809513697, "grad_norm": 8.956986957478799, "learning_rate": 4.96859905128764e-06, "loss": 1.1694, "step": 1116 }, { "epoch": 0.1581369009697742, "grad_norm": 9.55198699891501, "learning_rate": 4.968508422678679e-06, "loss": 1.3561, "step": 1117 }, { "epoch": 0.1582784738444114, "grad_norm": 9.611068749992887, "learning_rate": 4.968417664302069e-06, "loss": 1.1314, "step": 1118 }, { "epoch": 0.15842004671904863, "grad_norm": 9.18039516886626, "learning_rate": 4.968326776162584e-06, "loss": 1.3249, "step": 1119 }, { "epoch": 0.15856161959368584, "grad_norm": 9.195885808115412, "learning_rate": 4.968235758265001e-06, "loss": 1.1937, "step": 1120 }, { "epoch": 0.15870319246832307, "grad_norm": 9.820770859927602, "learning_rate": 4.968144610614104e-06, "loss": 1.258, "step": 1121 }, { "epoch": 0.1588447653429603, "grad_norm": 8.590171040973724, "learning_rate": 4.9680533332146855e-06, "loss": 1.2483, "step": 1122 }, { "epoch": 0.1589863382175975, "grad_norm": 8.876187527147556, "learning_rate": 4.967961926071543e-06, "loss": 1.2471, "step": 1123 }, { "epoch": 0.15912791109223473, "grad_norm": 10.966589038268797, "learning_rate": 4.967870389189483e-06, "loss": 1.1096, "step": 1124 }, { "epoch": 0.15926948396687196, "grad_norm": 13.240156439925887, "learning_rate": 4.967778722573317e-06, "loss": 1.2622, "step": 1125 }, { "epoch": 0.15941105684150916, "grad_norm": 8.498432688107664, "learning_rate": 4.967686926227862e-06, "loss": 1.4674, "step": 1126 }, { "epoch": 0.1595526297161464, "grad_norm": 9.938345627223494, "learning_rate": 4.967595000157946e-06, "loss": 1.1524, "step": 1127 }, { "epoch": 0.1596942025907836, "grad_norm": 12.988120299754636, "learning_rate": 4.967502944368402e-06, "loss": 1.2269, "step": 1128 }, { "epoch": 0.15983577546542083, "grad_norm": 11.505704708740206, "learning_rate": 4.967410758864066e-06, "loss": 1.252, "step": 1129 }, { "epoch": 0.15997734834005806, "grad_norm": 9.823049523489026, "learning_rate": 4.967318443649788e-06, "loss": 1.3575, "step": 1130 }, { "epoch": 0.16011892121469526, "grad_norm": 12.322417781444871, "learning_rate": 4.967225998730419e-06, "loss": 1.2108, "step": 1131 }, { "epoch": 0.1602604940893325, "grad_norm": 10.55247428046813, "learning_rate": 4.967133424110817e-06, "loss": 1.3949, "step": 1132 }, { "epoch": 0.1604020669639697, "grad_norm": 9.28807520773024, "learning_rate": 4.967040719795853e-06, "loss": 1.4435, "step": 1133 }, { "epoch": 0.16054363983860692, "grad_norm": 9.122017150795873, "learning_rate": 4.966947885790396e-06, "loss": 1.1241, "step": 1134 }, { "epoch": 0.16068521271324415, "grad_norm": 10.308918648963019, "learning_rate": 4.966854922099329e-06, "loss": 1.3325, "step": 1135 }, { "epoch": 0.16082678558788135, "grad_norm": 12.19566721036839, "learning_rate": 4.966761828727537e-06, "loss": 1.1959, "step": 1136 }, { "epoch": 0.16096835846251858, "grad_norm": 11.116865634615088, "learning_rate": 4.9666686056799165e-06, "loss": 1.2172, "step": 1137 }, { "epoch": 0.1611099313371558, "grad_norm": 11.373175506152624, "learning_rate": 4.966575252961365e-06, "loss": 1.4312, "step": 1138 }, { "epoch": 0.16125150421179302, "grad_norm": 12.629391954393343, "learning_rate": 4.966481770576793e-06, "loss": 1.1507, "step": 1139 }, { "epoch": 0.16139307708643025, "grad_norm": 11.039445032185828, "learning_rate": 4.9663881585311126e-06, "loss": 1.2183, "step": 1140 }, { "epoch": 0.16153464996106745, "grad_norm": 9.540169766433053, "learning_rate": 4.9662944168292455e-06, "loss": 1.1531, "step": 1141 }, { "epoch": 0.16167622283570468, "grad_norm": 10.374553624857267, "learning_rate": 4.966200545476121e-06, "loss": 1.3067, "step": 1142 }, { "epoch": 0.1618177957103419, "grad_norm": 9.99383698330755, "learning_rate": 4.966106544476672e-06, "loss": 1.1849, "step": 1143 }, { "epoch": 0.1619593685849791, "grad_norm": 11.58470998217451, "learning_rate": 4.9660124138358415e-06, "loss": 1.1871, "step": 1144 }, { "epoch": 0.16210094145961634, "grad_norm": 9.68813120108484, "learning_rate": 4.965918153558576e-06, "loss": 1.1673, "step": 1145 }, { "epoch": 0.16224251433425355, "grad_norm": 9.152410437763871, "learning_rate": 4.965823763649832e-06, "loss": 1.2374, "step": 1146 }, { "epoch": 0.16238408720889078, "grad_norm": 9.66166974407904, "learning_rate": 4.965729244114572e-06, "loss": 1.0706, "step": 1147 }, { "epoch": 0.162525660083528, "grad_norm": 9.492530697242223, "learning_rate": 4.965634594957763e-06, "loss": 1.3069, "step": 1148 }, { "epoch": 0.1626672329581652, "grad_norm": 9.641180532515033, "learning_rate": 4.9655398161843836e-06, "loss": 1.1498, "step": 1149 }, { "epoch": 0.16280880583280244, "grad_norm": 8.679582459563552, "learning_rate": 4.965444907799413e-06, "loss": 1.324, "step": 1150 }, { "epoch": 0.16295037870743964, "grad_norm": 9.656374439808005, "learning_rate": 4.9653498698078425e-06, "loss": 1.2375, "step": 1151 }, { "epoch": 0.16309195158207687, "grad_norm": 9.928473445648002, "learning_rate": 4.965254702214668e-06, "loss": 1.2195, "step": 1152 }, { "epoch": 0.1632335244567141, "grad_norm": 10.029533263097836, "learning_rate": 4.96515940502489e-06, "loss": 1.1382, "step": 1153 }, { "epoch": 0.1633750973313513, "grad_norm": 9.610992940520473, "learning_rate": 4.9650639782435225e-06, "loss": 1.2923, "step": 1154 }, { "epoch": 0.16351667020598853, "grad_norm": 8.252802517305769, "learning_rate": 4.964968421875579e-06, "loss": 1.0721, "step": 1155 }, { "epoch": 0.16365824308062576, "grad_norm": 9.566170580115697, "learning_rate": 4.964872735926083e-06, "loss": 1.1146, "step": 1156 }, { "epoch": 0.16379981595526297, "grad_norm": 9.842713316567181, "learning_rate": 4.964776920400066e-06, "loss": 1.2198, "step": 1157 }, { "epoch": 0.1639413888299002, "grad_norm": 9.252614244705553, "learning_rate": 4.964680975302563e-06, "loss": 1.23, "step": 1158 }, { "epoch": 0.1640829617045374, "grad_norm": 9.259022616717951, "learning_rate": 4.96458490063862e-06, "loss": 1.2409, "step": 1159 }, { "epoch": 0.16422453457917463, "grad_norm": 11.968250871813478, "learning_rate": 4.964488696413285e-06, "loss": 1.2763, "step": 1160 }, { "epoch": 0.16436610745381186, "grad_norm": 9.544514016058354, "learning_rate": 4.964392362631618e-06, "loss": 1.3623, "step": 1161 }, { "epoch": 0.16450768032844906, "grad_norm": 10.087048269223486, "learning_rate": 4.964295899298682e-06, "loss": 1.257, "step": 1162 }, { "epoch": 0.1646492532030863, "grad_norm": 8.723449096999948, "learning_rate": 4.964199306419548e-06, "loss": 1.1218, "step": 1163 }, { "epoch": 0.1647908260777235, "grad_norm": 9.177687614599435, "learning_rate": 4.964102583999293e-06, "loss": 1.2198, "step": 1164 }, { "epoch": 0.16493239895236073, "grad_norm": 8.630207203734713, "learning_rate": 4.964005732043003e-06, "loss": 1.2422, "step": 1165 }, { "epoch": 0.16507397182699796, "grad_norm": 9.998788378747134, "learning_rate": 4.9639087505557694e-06, "loss": 1.323, "step": 1166 }, { "epoch": 0.16521554470163516, "grad_norm": 11.891657981165437, "learning_rate": 4.96381163954269e-06, "loss": 1.4085, "step": 1167 }, { "epoch": 0.1653571175762724, "grad_norm": 10.89844450044578, "learning_rate": 4.963714399008869e-06, "loss": 1.3082, "step": 1168 }, { "epoch": 0.16549869045090962, "grad_norm": 10.51827103538079, "learning_rate": 4.9636170289594195e-06, "loss": 1.3568, "step": 1169 }, { "epoch": 0.16564026332554682, "grad_norm": 9.26785369881383, "learning_rate": 4.96351952939946e-06, "loss": 1.2927, "step": 1170 }, { "epoch": 0.16578183620018405, "grad_norm": 11.18056951065193, "learning_rate": 4.9634219003341156e-06, "loss": 1.3469, "step": 1171 }, { "epoch": 0.16592340907482125, "grad_norm": 12.294641856670289, "learning_rate": 4.963324141768519e-06, "loss": 1.3931, "step": 1172 }, { "epoch": 0.16606498194945848, "grad_norm": 10.233979521819816, "learning_rate": 4.963226253707808e-06, "loss": 1.3536, "step": 1173 }, { "epoch": 0.16620655482409571, "grad_norm": 9.145937792298062, "learning_rate": 4.96312823615713e-06, "loss": 1.2203, "step": 1174 }, { "epoch": 0.16634812769873292, "grad_norm": 8.500683925265403, "learning_rate": 4.963030089121636e-06, "loss": 1.1209, "step": 1175 }, { "epoch": 0.16648970057337015, "grad_norm": 10.56601200743265, "learning_rate": 4.9629318126064884e-06, "loss": 1.2593, "step": 1176 }, { "epoch": 0.16663127344800735, "grad_norm": 10.417313659920207, "learning_rate": 4.962833406616851e-06, "loss": 1.3101, "step": 1177 }, { "epoch": 0.16677284632264458, "grad_norm": 10.259573954460818, "learning_rate": 4.9627348711578996e-06, "loss": 1.2187, "step": 1178 }, { "epoch": 0.1669144191972818, "grad_norm": 9.322921758611272, "learning_rate": 4.96263620623481e-06, "loss": 1.0489, "step": 1179 }, { "epoch": 0.167055992071919, "grad_norm": 9.500126888030069, "learning_rate": 4.962537411852772e-06, "loss": 1.1771, "step": 1180 }, { "epoch": 0.16719756494655624, "grad_norm": 9.480339184580238, "learning_rate": 4.962438488016979e-06, "loss": 1.2073, "step": 1181 }, { "epoch": 0.16733913782119345, "grad_norm": 10.313921096449645, "learning_rate": 4.9623394347326306e-06, "loss": 1.2858, "step": 1182 }, { "epoch": 0.16748071069583068, "grad_norm": 10.626265147563753, "learning_rate": 4.9622402520049336e-06, "loss": 1.2574, "step": 1183 }, { "epoch": 0.1676222835704679, "grad_norm": 11.09456251150884, "learning_rate": 4.962140939839103e-06, "loss": 1.3639, "step": 1184 }, { "epoch": 0.1677638564451051, "grad_norm": 9.51776790449258, "learning_rate": 4.962041498240359e-06, "loss": 1.266, "step": 1185 }, { "epoch": 0.16790542931974234, "grad_norm": 7.776907448287431, "learning_rate": 4.961941927213928e-06, "loss": 1.1566, "step": 1186 }, { "epoch": 0.16804700219437957, "grad_norm": 9.27915566187804, "learning_rate": 4.961842226765047e-06, "loss": 1.0774, "step": 1187 }, { "epoch": 0.16818857506901677, "grad_norm": 13.047710021931291, "learning_rate": 4.9617423968989556e-06, "loss": 1.3055, "step": 1188 }, { "epoch": 0.168330147943654, "grad_norm": 8.963693928168823, "learning_rate": 4.961642437620901e-06, "loss": 1.1772, "step": 1189 }, { "epoch": 0.1684717208182912, "grad_norm": 9.645927763793196, "learning_rate": 4.96154234893614e-06, "loss": 1.0929, "step": 1190 }, { "epoch": 0.16861329369292843, "grad_norm": 11.405067361635243, "learning_rate": 4.961442130849933e-06, "loss": 1.3532, "step": 1191 }, { "epoch": 0.16875486656756566, "grad_norm": 12.575681536731171, "learning_rate": 4.961341783367548e-06, "loss": 1.2858, "step": 1192 }, { "epoch": 0.16889643944220287, "grad_norm": 9.55003152412684, "learning_rate": 4.96124130649426e-06, "loss": 1.3926, "step": 1193 }, { "epoch": 0.1690380123168401, "grad_norm": 9.435974401991512, "learning_rate": 4.961140700235353e-06, "loss": 1.0928, "step": 1194 }, { "epoch": 0.1691795851914773, "grad_norm": 10.503840152962361, "learning_rate": 4.961039964596114e-06, "loss": 1.2489, "step": 1195 }, { "epoch": 0.16932115806611453, "grad_norm": 11.508755212150579, "learning_rate": 4.9609390995818395e-06, "loss": 1.2808, "step": 1196 }, { "epoch": 0.16946273094075176, "grad_norm": 8.15256180080934, "learning_rate": 4.960838105197831e-06, "loss": 1.2168, "step": 1197 }, { "epoch": 0.16960430381538896, "grad_norm": 12.002825086561653, "learning_rate": 4.960736981449399e-06, "loss": 1.2959, "step": 1198 }, { "epoch": 0.1697458766900262, "grad_norm": 11.07447709685428, "learning_rate": 4.960635728341858e-06, "loss": 1.3097, "step": 1199 }, { "epoch": 0.16988744956466342, "grad_norm": 8.384928725047166, "learning_rate": 4.960534345880531e-06, "loss": 1.1667, "step": 1200 }, { "epoch": 0.17002902243930063, "grad_norm": 16.535290518147278, "learning_rate": 4.960432834070749e-06, "loss": 1.2371, "step": 1201 }, { "epoch": 0.17017059531393786, "grad_norm": 12.717052899482733, "learning_rate": 4.960331192917847e-06, "loss": 1.2602, "step": 1202 }, { "epoch": 0.17031216818857506, "grad_norm": 10.432143247287856, "learning_rate": 4.960229422427169e-06, "loss": 1.1703, "step": 1203 }, { "epoch": 0.1704537410632123, "grad_norm": 11.589407958608355, "learning_rate": 4.960127522604065e-06, "loss": 1.2982, "step": 1204 }, { "epoch": 0.17059531393784952, "grad_norm": 10.342450662467112, "learning_rate": 4.96002549345389e-06, "loss": 1.2938, "step": 1205 }, { "epoch": 0.17073688681248672, "grad_norm": 9.547643752293187, "learning_rate": 4.95992333498201e-06, "loss": 1.2299, "step": 1206 }, { "epoch": 0.17087845968712395, "grad_norm": 8.21156745621021, "learning_rate": 4.9598210471937945e-06, "loss": 1.1575, "step": 1207 }, { "epoch": 0.17102003256176115, "grad_norm": 10.352709989465394, "learning_rate": 4.959718630094621e-06, "loss": 1.253, "step": 1208 }, { "epoch": 0.17116160543639838, "grad_norm": 10.02356423158874, "learning_rate": 4.9596160836898735e-06, "loss": 1.2303, "step": 1209 }, { "epoch": 0.17130317831103561, "grad_norm": 8.648986192508525, "learning_rate": 4.959513407984941e-06, "loss": 1.354, "step": 1210 }, { "epoch": 0.17144475118567282, "grad_norm": 7.812433349325067, "learning_rate": 4.9594106029852234e-06, "loss": 1.1874, "step": 1211 }, { "epoch": 0.17158632406031005, "grad_norm": 9.243932847799627, "learning_rate": 4.959307668696124e-06, "loss": 1.0706, "step": 1212 }, { "epoch": 0.17172789693494728, "grad_norm": 11.547507303602705, "learning_rate": 4.959204605123055e-06, "loss": 1.2628, "step": 1213 }, { "epoch": 0.17186946980958448, "grad_norm": 8.539322942477229, "learning_rate": 4.959101412271433e-06, "loss": 1.1105, "step": 1214 }, { "epoch": 0.1720110426842217, "grad_norm": 10.665335174744788, "learning_rate": 4.958998090146683e-06, "loss": 1.3188, "step": 1215 }, { "epoch": 0.1721526155588589, "grad_norm": 9.060480142632567, "learning_rate": 4.9588946387542366e-06, "loss": 1.2182, "step": 1216 }, { "epoch": 0.17229418843349614, "grad_norm": 10.610819610089107, "learning_rate": 4.958791058099533e-06, "loss": 1.4549, "step": 1217 }, { "epoch": 0.17243576130813337, "grad_norm": 9.71104552622437, "learning_rate": 4.9586873481880175e-06, "loss": 1.2569, "step": 1218 }, { "epoch": 0.17257733418277058, "grad_norm": 10.979326067255371, "learning_rate": 4.95858350902514e-06, "loss": 1.2841, "step": 1219 }, { "epoch": 0.1727189070574078, "grad_norm": 13.244914680456727, "learning_rate": 4.958479540616362e-06, "loss": 1.349, "step": 1220 }, { "epoch": 0.172860479932045, "grad_norm": 11.6532060263843, "learning_rate": 4.958375442967147e-06, "loss": 1.0523, "step": 1221 }, { "epoch": 0.17300205280668224, "grad_norm": 10.280332762086, "learning_rate": 4.958271216082968e-06, "loss": 1.4402, "step": 1222 }, { "epoch": 0.17314362568131947, "grad_norm": 8.008206926261842, "learning_rate": 4.958166859969304e-06, "loss": 1.1426, "step": 1223 }, { "epoch": 0.17328519855595667, "grad_norm": 10.225137469946157, "learning_rate": 4.958062374631641e-06, "loss": 1.2245, "step": 1224 }, { "epoch": 0.1734267714305939, "grad_norm": 10.339313950348311, "learning_rate": 4.957957760075472e-06, "loss": 1.2015, "step": 1225 }, { "epoch": 0.17356834430523113, "grad_norm": 8.938738116956731, "learning_rate": 4.957853016306297e-06, "loss": 0.9768, "step": 1226 }, { "epoch": 0.17370991717986833, "grad_norm": 10.431063009132304, "learning_rate": 4.95774814332962e-06, "loss": 1.4039, "step": 1227 }, { "epoch": 0.17385149005450556, "grad_norm": 8.40849960460164, "learning_rate": 4.957643141150958e-06, "loss": 1.189, "step": 1228 }, { "epoch": 0.17399306292914277, "grad_norm": 12.064951909361069, "learning_rate": 4.957538009775826e-06, "loss": 1.2769, "step": 1229 }, { "epoch": 0.17413463580378, "grad_norm": 10.214858314401663, "learning_rate": 4.957432749209755e-06, "loss": 1.1523, "step": 1230 }, { "epoch": 0.17427620867841723, "grad_norm": 11.297897440332234, "learning_rate": 4.957327359458276e-06, "loss": 1.2327, "step": 1231 }, { "epoch": 0.17441778155305443, "grad_norm": 11.532496439240454, "learning_rate": 4.95722184052693e-06, "loss": 1.2233, "step": 1232 }, { "epoch": 0.17455935442769166, "grad_norm": 9.220196936897594, "learning_rate": 4.957116192421264e-06, "loss": 1.2615, "step": 1233 }, { "epoch": 0.17470092730232886, "grad_norm": 9.994330515650095, "learning_rate": 4.957010415146833e-06, "loss": 1.4262, "step": 1234 }, { "epoch": 0.1748425001769661, "grad_norm": 8.464694582932347, "learning_rate": 4.956904508709195e-06, "loss": 1.2538, "step": 1235 }, { "epoch": 0.17498407305160332, "grad_norm": 9.648598018654624, "learning_rate": 4.956798473113919e-06, "loss": 1.1428, "step": 1236 }, { "epoch": 0.17512564592624053, "grad_norm": 11.47104748573215, "learning_rate": 4.95669230836658e-06, "loss": 1.4586, "step": 1237 }, { "epoch": 0.17526721880087776, "grad_norm": 11.307415741752664, "learning_rate": 4.9565860144727575e-06, "loss": 1.2903, "step": 1238 }, { "epoch": 0.17540879167551496, "grad_norm": 9.346317206510115, "learning_rate": 4.956479591438039e-06, "loss": 1.2064, "step": 1239 }, { "epoch": 0.1755503645501522, "grad_norm": 10.537476642985494, "learning_rate": 4.956373039268022e-06, "loss": 1.1044, "step": 1240 }, { "epoch": 0.17569193742478942, "grad_norm": 12.20480604718543, "learning_rate": 4.9562663579683045e-06, "loss": 1.263, "step": 1241 }, { "epoch": 0.17583351029942662, "grad_norm": 9.684470213587892, "learning_rate": 4.9561595475444965e-06, "loss": 1.1691, "step": 1242 }, { "epoch": 0.17597508317406385, "grad_norm": 11.719621061376756, "learning_rate": 4.956052608002212e-06, "loss": 1.261, "step": 1243 }, { "epoch": 0.17611665604870108, "grad_norm": 11.781207906713595, "learning_rate": 4.955945539347075e-06, "loss": 1.2531, "step": 1244 }, { "epoch": 0.17625822892333828, "grad_norm": 11.626263293463868, "learning_rate": 4.95583834158471e-06, "loss": 1.3224, "step": 1245 }, { "epoch": 0.17639980179797551, "grad_norm": 10.361873549793689, "learning_rate": 4.955731014720756e-06, "loss": 1.2869, "step": 1246 }, { "epoch": 0.17654137467261272, "grad_norm": 9.955206684781524, "learning_rate": 4.955623558760852e-06, "loss": 1.1906, "step": 1247 }, { "epoch": 0.17668294754724995, "grad_norm": 10.359689100748158, "learning_rate": 4.955515973710651e-06, "loss": 1.3282, "step": 1248 }, { "epoch": 0.17682452042188718, "grad_norm": 12.779047172260512, "learning_rate": 4.955408259575804e-06, "loss": 1.3077, "step": 1249 }, { "epoch": 0.17696609329652438, "grad_norm": 8.563944673832546, "learning_rate": 4.955300416361977e-06, "loss": 1.237, "step": 1250 }, { "epoch": 0.1771076661711616, "grad_norm": 10.595857033732424, "learning_rate": 4.955192444074837e-06, "loss": 1.2239, "step": 1251 }, { "epoch": 0.1772492390457988, "grad_norm": 9.347764393571056, "learning_rate": 4.9550843427200605e-06, "loss": 1.4095, "step": 1252 }, { "epoch": 0.17739081192043604, "grad_norm": 10.352962759066846, "learning_rate": 4.9549761123033316e-06, "loss": 1.2734, "step": 1253 }, { "epoch": 0.17753238479507327, "grad_norm": 9.062054221929563, "learning_rate": 4.9548677528303385e-06, "loss": 1.0969, "step": 1254 }, { "epoch": 0.17767395766971047, "grad_norm": 10.273241442996156, "learning_rate": 4.954759264306778e-06, "loss": 1.2438, "step": 1255 }, { "epoch": 0.1778155305443477, "grad_norm": 10.090595331086377, "learning_rate": 4.954650646738354e-06, "loss": 1.3164, "step": 1256 }, { "epoch": 0.17795710341898494, "grad_norm": 9.463063361104032, "learning_rate": 4.954541900130775e-06, "loss": 1.2843, "step": 1257 }, { "epoch": 0.17809867629362214, "grad_norm": 11.03124342964128, "learning_rate": 4.9544330244897586e-06, "loss": 1.2595, "step": 1258 }, { "epoch": 0.17824024916825937, "grad_norm": 11.903167405657596, "learning_rate": 4.954324019821028e-06, "loss": 1.3789, "step": 1259 }, { "epoch": 0.17838182204289657, "grad_norm": 10.67349020936974, "learning_rate": 4.954214886130315e-06, "loss": 1.0934, "step": 1260 }, { "epoch": 0.1785233949175338, "grad_norm": 9.93715758303541, "learning_rate": 4.954105623423354e-06, "loss": 1.2821, "step": 1261 }, { "epoch": 0.17866496779217103, "grad_norm": 11.711229357604019, "learning_rate": 4.953996231705891e-06, "loss": 1.3415, "step": 1262 }, { "epoch": 0.17880654066680823, "grad_norm": 10.936854752172454, "learning_rate": 4.953886710983676e-06, "loss": 1.3301, "step": 1263 }, { "epoch": 0.17894811354144546, "grad_norm": 10.002583360770364, "learning_rate": 4.9537770612624655e-06, "loss": 1.1456, "step": 1264 }, { "epoch": 0.17908968641608267, "grad_norm": 11.809268892997117, "learning_rate": 4.9536672825480255e-06, "loss": 1.4497, "step": 1265 }, { "epoch": 0.1792312592907199, "grad_norm": 9.173319350313278, "learning_rate": 4.953557374846125e-06, "loss": 1.2662, "step": 1266 }, { "epoch": 0.17937283216535713, "grad_norm": 9.689921414870566, "learning_rate": 4.953447338162543e-06, "loss": 1.251, "step": 1267 }, { "epoch": 0.17951440503999433, "grad_norm": 9.831179582327874, "learning_rate": 4.953337172503064e-06, "loss": 1.3571, "step": 1268 }, { "epoch": 0.17965597791463156, "grad_norm": 12.418653154307522, "learning_rate": 4.953226877873479e-06, "loss": 1.2829, "step": 1269 }, { "epoch": 0.1797975507892688, "grad_norm": 8.875445287574836, "learning_rate": 4.953116454279587e-06, "loss": 1.2809, "step": 1270 }, { "epoch": 0.179939123663906, "grad_norm": 7.0694311108949535, "learning_rate": 4.953005901727191e-06, "loss": 1.1309, "step": 1271 }, { "epoch": 0.18008069653854322, "grad_norm": 11.321720980787191, "learning_rate": 4.952895220222104e-06, "loss": 1.205, "step": 1272 }, { "epoch": 0.18022226941318042, "grad_norm": 13.247546274680632, "learning_rate": 4.952784409770145e-06, "loss": 1.4329, "step": 1273 }, { "epoch": 0.18036384228781766, "grad_norm": 11.067685048995115, "learning_rate": 4.952673470377137e-06, "loss": 1.2545, "step": 1274 }, { "epoch": 0.18050541516245489, "grad_norm": 10.5967757593591, "learning_rate": 4.952562402048915e-06, "loss": 1.2368, "step": 1275 }, { "epoch": 0.1806469880370921, "grad_norm": 10.568849720363191, "learning_rate": 4.952451204791315e-06, "loss": 1.176, "step": 1276 }, { "epoch": 0.18078856091172932, "grad_norm": 10.973278186917549, "learning_rate": 4.952339878610185e-06, "loss": 1.2681, "step": 1277 }, { "epoch": 0.18093013378636652, "grad_norm": 9.436039085239317, "learning_rate": 4.952228423511375e-06, "loss": 1.249, "step": 1278 }, { "epoch": 0.18107170666100375, "grad_norm": 8.383918682111709, "learning_rate": 4.952116839500747e-06, "loss": 1.301, "step": 1279 }, { "epoch": 0.18121327953564098, "grad_norm": 8.857790312417087, "learning_rate": 4.9520051265841626e-06, "loss": 1.189, "step": 1280 }, { "epoch": 0.18135485241027818, "grad_norm": 10.928717874922402, "learning_rate": 4.951893284767498e-06, "loss": 1.1566, "step": 1281 }, { "epoch": 0.1814964252849154, "grad_norm": 11.30723862508911, "learning_rate": 4.951781314056633e-06, "loss": 1.1944, "step": 1282 }, { "epoch": 0.18163799815955262, "grad_norm": 9.179823386627216, "learning_rate": 4.951669214457451e-06, "loss": 1.2296, "step": 1283 }, { "epoch": 0.18177957103418985, "grad_norm": 9.072642576601037, "learning_rate": 4.951556985975847e-06, "loss": 1.2367, "step": 1284 }, { "epoch": 0.18192114390882708, "grad_norm": 9.932865621662653, "learning_rate": 4.95144462861772e-06, "loss": 1.127, "step": 1285 }, { "epoch": 0.18206271678346428, "grad_norm": 10.179427429764974, "learning_rate": 4.951332142388976e-06, "loss": 1.1442, "step": 1286 }, { "epoch": 0.1822042896581015, "grad_norm": 11.12010719631862, "learning_rate": 4.95121952729553e-06, "loss": 1.2672, "step": 1287 }, { "epoch": 0.18234586253273874, "grad_norm": 11.016436348112384, "learning_rate": 4.951106783343301e-06, "loss": 1.3007, "step": 1288 }, { "epoch": 0.18248743540737594, "grad_norm": 10.805266855061102, "learning_rate": 4.950993910538216e-06, "loss": 1.1927, "step": 1289 }, { "epoch": 0.18262900828201317, "grad_norm": 10.544870324529022, "learning_rate": 4.950880908886208e-06, "loss": 1.2731, "step": 1290 }, { "epoch": 0.18277058115665037, "grad_norm": 8.664086715898305, "learning_rate": 4.95076777839322e-06, "loss": 1.2564, "step": 1291 }, { "epoch": 0.1829121540312876, "grad_norm": 8.235606266164567, "learning_rate": 4.950654519065196e-06, "loss": 1.1456, "step": 1292 }, { "epoch": 0.18305372690592484, "grad_norm": 8.794275797596935, "learning_rate": 4.950541130908091e-06, "loss": 1.1879, "step": 1293 }, { "epoch": 0.18319529978056204, "grad_norm": 9.404948502541055, "learning_rate": 4.9504276139278655e-06, "loss": 1.1203, "step": 1294 }, { "epoch": 0.18333687265519927, "grad_norm": 9.217090521983046, "learning_rate": 4.950313968130488e-06, "loss": 1.2186, "step": 1295 }, { "epoch": 0.18347844552983647, "grad_norm": 10.17960880509682, "learning_rate": 4.950200193521932e-06, "loss": 1.4367, "step": 1296 }, { "epoch": 0.1836200184044737, "grad_norm": 9.0465988090667, "learning_rate": 4.950086290108179e-06, "loss": 1.118, "step": 1297 }, { "epoch": 0.18376159127911093, "grad_norm": 8.711971368719857, "learning_rate": 4.949972257895217e-06, "loss": 1.1571, "step": 1298 }, { "epoch": 0.18390316415374813, "grad_norm": 9.400545437186162, "learning_rate": 4.94985809688904e-06, "loss": 1.3195, "step": 1299 }, { "epoch": 0.18404473702838536, "grad_norm": 12.908258535826372, "learning_rate": 4.949743807095649e-06, "loss": 1.2479, "step": 1300 }, { "epoch": 0.1841863099030226, "grad_norm": 11.132740885186326, "learning_rate": 4.9496293885210535e-06, "loss": 1.3276, "step": 1301 }, { "epoch": 0.1843278827776598, "grad_norm": 10.730028985687383, "learning_rate": 4.949514841171266e-06, "loss": 1.1879, "step": 1302 }, { "epoch": 0.18446945565229703, "grad_norm": 9.907429429370124, "learning_rate": 4.949400165052312e-06, "loss": 1.2592, "step": 1303 }, { "epoch": 0.18461102852693423, "grad_norm": 11.10661562788442, "learning_rate": 4.949285360170216e-06, "loss": 1.2555, "step": 1304 }, { "epoch": 0.18475260140157146, "grad_norm": 11.019536442638797, "learning_rate": 4.949170426531016e-06, "loss": 1.2377, "step": 1305 }, { "epoch": 0.1848941742762087, "grad_norm": 8.114482939412925, "learning_rate": 4.9490553641407515e-06, "loss": 1.2575, "step": 1306 }, { "epoch": 0.1850357471508459, "grad_norm": 9.117647317962353, "learning_rate": 4.948940173005474e-06, "loss": 1.407, "step": 1307 }, { "epoch": 0.18517732002548312, "grad_norm": 9.641952447988807, "learning_rate": 4.948824853131237e-06, "loss": 1.1332, "step": 1308 }, { "epoch": 0.18531889290012032, "grad_norm": 8.59602242682626, "learning_rate": 4.948709404524103e-06, "loss": 1.2491, "step": 1309 }, { "epoch": 0.18546046577475755, "grad_norm": 8.779793289288577, "learning_rate": 4.948593827190142e-06, "loss": 1.2979, "step": 1310 }, { "epoch": 0.18560203864939478, "grad_norm": 11.999148656527483, "learning_rate": 4.9484781211354286e-06, "loss": 1.2966, "step": 1311 }, { "epoch": 0.185743611524032, "grad_norm": 9.391632787671004, "learning_rate": 4.948362286366047e-06, "loss": 1.2049, "step": 1312 }, { "epoch": 0.18588518439866922, "grad_norm": 8.689088086215925, "learning_rate": 4.948246322888085e-06, "loss": 1.2219, "step": 1313 }, { "epoch": 0.18602675727330645, "grad_norm": 10.678185402663336, "learning_rate": 4.948130230707639e-06, "loss": 1.2852, "step": 1314 }, { "epoch": 0.18616833014794365, "grad_norm": 9.389327837171663, "learning_rate": 4.9480140098308125e-06, "loss": 1.1238, "step": 1315 }, { "epoch": 0.18630990302258088, "grad_norm": 8.838729457774708, "learning_rate": 4.947897660263715e-06, "loss": 1.1455, "step": 1316 }, { "epoch": 0.18645147589721808, "grad_norm": 12.754555281150413, "learning_rate": 4.947781182012462e-06, "loss": 1.3247, "step": 1317 }, { "epoch": 0.1865930487718553, "grad_norm": 10.402431761268687, "learning_rate": 4.947664575083179e-06, "loss": 1.2422, "step": 1318 }, { "epoch": 0.18673462164649254, "grad_norm": 9.55780153222458, "learning_rate": 4.947547839481993e-06, "loss": 1.1459, "step": 1319 }, { "epoch": 0.18687619452112975, "grad_norm": 10.521642269849679, "learning_rate": 4.947430975215043e-06, "loss": 1.2386, "step": 1320 }, { "epoch": 0.18701776739576698, "grad_norm": 8.377962414061416, "learning_rate": 4.94731398228847e-06, "loss": 1.2185, "step": 1321 }, { "epoch": 0.18715934027040418, "grad_norm": 9.2320390512576, "learning_rate": 4.947196860708426e-06, "loss": 1.1322, "step": 1322 }, { "epoch": 0.1873009131450414, "grad_norm": 11.127170993840792, "learning_rate": 4.947079610481069e-06, "loss": 1.3047, "step": 1323 }, { "epoch": 0.18744248601967864, "grad_norm": 9.285835340779752, "learning_rate": 4.946962231612561e-06, "loss": 1.2528, "step": 1324 }, { "epoch": 0.18758405889431584, "grad_norm": 10.89410616215326, "learning_rate": 4.946844724109073e-06, "loss": 1.3073, "step": 1325 }, { "epoch": 0.18772563176895307, "grad_norm": 9.282938691202743, "learning_rate": 4.946727087976782e-06, "loss": 1.2172, "step": 1326 }, { "epoch": 0.18786720464359027, "grad_norm": 10.494334917880806, "learning_rate": 4.946609323221873e-06, "loss": 1.3532, "step": 1327 }, { "epoch": 0.1880087775182275, "grad_norm": 9.462634840776792, "learning_rate": 4.946491429850535e-06, "loss": 1.3434, "step": 1328 }, { "epoch": 0.18815035039286473, "grad_norm": 9.540053007670354, "learning_rate": 4.946373407868967e-06, "loss": 1.2307, "step": 1329 }, { "epoch": 0.18829192326750194, "grad_norm": 9.878443636607976, "learning_rate": 4.946255257283374e-06, "loss": 1.2112, "step": 1330 }, { "epoch": 0.18843349614213917, "grad_norm": 9.01909138435874, "learning_rate": 4.946136978099966e-06, "loss": 1.1166, "step": 1331 }, { "epoch": 0.1885750690167764, "grad_norm": 10.806554670214371, "learning_rate": 4.94601857032496e-06, "loss": 1.1559, "step": 1332 }, { "epoch": 0.1887166418914136, "grad_norm": 8.552892487905362, "learning_rate": 4.9459000339645824e-06, "loss": 1.1635, "step": 1333 }, { "epoch": 0.18885821476605083, "grad_norm": 12.022278133065674, "learning_rate": 4.9457813690250635e-06, "loss": 1.3513, "step": 1334 }, { "epoch": 0.18899978764068803, "grad_norm": 8.898332469611713, "learning_rate": 4.9456625755126415e-06, "loss": 1.0952, "step": 1335 }, { "epoch": 0.18914136051532526, "grad_norm": 9.148417485033614, "learning_rate": 4.945543653433562e-06, "loss": 1.2738, "step": 1336 }, { "epoch": 0.1892829333899625, "grad_norm": 7.966013479655721, "learning_rate": 4.945424602794076e-06, "loss": 1.1645, "step": 1337 }, { "epoch": 0.1894245062645997, "grad_norm": 10.872115881812453, "learning_rate": 4.945305423600441e-06, "loss": 1.1637, "step": 1338 }, { "epoch": 0.18956607913923693, "grad_norm": 9.273261437355623, "learning_rate": 4.945186115858925e-06, "loss": 1.2068, "step": 1339 }, { "epoch": 0.18970765201387413, "grad_norm": 9.275471439358933, "learning_rate": 4.945066679575796e-06, "loss": 1.2867, "step": 1340 }, { "epoch": 0.18984922488851136, "grad_norm": 9.61262251400104, "learning_rate": 4.944947114757336e-06, "loss": 1.0933, "step": 1341 }, { "epoch": 0.1899907977631486, "grad_norm": 8.383242067278848, "learning_rate": 4.944827421409829e-06, "loss": 1.2212, "step": 1342 }, { "epoch": 0.1901323706377858, "grad_norm": 8.835206127169322, "learning_rate": 4.944707599539567e-06, "loss": 1.2886, "step": 1343 }, { "epoch": 0.19027394351242302, "grad_norm": 12.065851724268422, "learning_rate": 4.94458764915285e-06, "loss": 1.2574, "step": 1344 }, { "epoch": 0.19041551638706025, "grad_norm": 10.129186071023959, "learning_rate": 4.944467570255983e-06, "loss": 1.2368, "step": 1345 }, { "epoch": 0.19055708926169745, "grad_norm": 9.245812370296012, "learning_rate": 4.944347362855278e-06, "loss": 1.3441, "step": 1346 }, { "epoch": 0.19069866213633468, "grad_norm": 12.253983453295211, "learning_rate": 4.9442270269570545e-06, "loss": 1.2458, "step": 1347 }, { "epoch": 0.1908402350109719, "grad_norm": 8.440834573278702, "learning_rate": 4.94410656256764e-06, "loss": 1.1756, "step": 1348 }, { "epoch": 0.19098180788560912, "grad_norm": 9.329075341560685, "learning_rate": 4.943985969693365e-06, "loss": 1.2713, "step": 1349 }, { "epoch": 0.19112338076024635, "grad_norm": 10.659114150694656, "learning_rate": 4.94386524834057e-06, "loss": 1.3754, "step": 1350 }, { "epoch": 0.19126495363488355, "grad_norm": 9.639561722792148, "learning_rate": 4.943744398515601e-06, "loss": 1.2075, "step": 1351 }, { "epoch": 0.19140652650952078, "grad_norm": 9.35914104595951, "learning_rate": 4.943623420224811e-06, "loss": 1.3141, "step": 1352 }, { "epoch": 0.19154809938415798, "grad_norm": 10.11259963242309, "learning_rate": 4.94350231347456e-06, "loss": 1.3359, "step": 1353 }, { "epoch": 0.1916896722587952, "grad_norm": 9.585181370443486, "learning_rate": 4.943381078271214e-06, "loss": 1.2364, "step": 1354 }, { "epoch": 0.19183124513343244, "grad_norm": 9.099929859603701, "learning_rate": 4.943259714621148e-06, "loss": 1.1694, "step": 1355 }, { "epoch": 0.19197281800806965, "grad_norm": 11.516630669706391, "learning_rate": 4.943138222530739e-06, "loss": 1.343, "step": 1356 }, { "epoch": 0.19211439088270688, "grad_norm": 10.843694417409782, "learning_rate": 4.943016602006376e-06, "loss": 1.2328, "step": 1357 }, { "epoch": 0.1922559637573441, "grad_norm": 10.647551535387395, "learning_rate": 4.942894853054452e-06, "loss": 1.1943, "step": 1358 }, { "epoch": 0.1923975366319813, "grad_norm": 11.015423105973428, "learning_rate": 4.942772975681366e-06, "loss": 1.3973, "step": 1359 }, { "epoch": 0.19253910950661854, "grad_norm": 9.560378693298574, "learning_rate": 4.942650969893527e-06, "loss": 1.2599, "step": 1360 }, { "epoch": 0.19268068238125574, "grad_norm": 10.808379871167238, "learning_rate": 4.942528835697348e-06, "loss": 1.1915, "step": 1361 }, { "epoch": 0.19282225525589297, "grad_norm": 10.364835976951182, "learning_rate": 4.942406573099249e-06, "loss": 1.1268, "step": 1362 }, { "epoch": 0.1929638281305302, "grad_norm": 8.721393580939479, "learning_rate": 4.942284182105658e-06, "loss": 1.1265, "step": 1363 }, { "epoch": 0.1931054010051674, "grad_norm": 9.888000043529523, "learning_rate": 4.942161662723007e-06, "loss": 1.211, "step": 1364 }, { "epoch": 0.19324697387980463, "grad_norm": 8.99226301146618, "learning_rate": 4.94203901495774e-06, "loss": 1.1224, "step": 1365 }, { "epoch": 0.19338854675444184, "grad_norm": 9.941572398537923, "learning_rate": 4.9419162388163025e-06, "loss": 1.2178, "step": 1366 }, { "epoch": 0.19353011962907907, "grad_norm": 8.620712320232194, "learning_rate": 4.941793334305149e-06, "loss": 1.133, "step": 1367 }, { "epoch": 0.1936716925037163, "grad_norm": 10.191323902503648, "learning_rate": 4.94167030143074e-06, "loss": 1.3465, "step": 1368 }, { "epoch": 0.1938132653783535, "grad_norm": 11.757984451608099, "learning_rate": 4.941547140199545e-06, "loss": 1.2614, "step": 1369 }, { "epoch": 0.19395483825299073, "grad_norm": 9.544668289103788, "learning_rate": 4.9414238506180365e-06, "loss": 1.2279, "step": 1370 }, { "epoch": 0.19409641112762796, "grad_norm": 9.872899085503663, "learning_rate": 4.941300432692697e-06, "loss": 1.3637, "step": 1371 }, { "epoch": 0.19423798400226516, "grad_norm": 9.134460979191443, "learning_rate": 4.941176886430014e-06, "loss": 1.2858, "step": 1372 }, { "epoch": 0.1943795568769024, "grad_norm": 10.915707223289305, "learning_rate": 4.941053211836482e-06, "loss": 1.2401, "step": 1373 }, { "epoch": 0.1945211297515396, "grad_norm": 10.290961319900843, "learning_rate": 4.940929408918603e-06, "loss": 1.1914, "step": 1374 }, { "epoch": 0.19466270262617683, "grad_norm": 9.16930346856471, "learning_rate": 4.940805477682885e-06, "loss": 1.2697, "step": 1375 }, { "epoch": 0.19480427550081406, "grad_norm": 10.601086801919694, "learning_rate": 4.940681418135843e-06, "loss": 1.2096, "step": 1376 }, { "epoch": 0.19494584837545126, "grad_norm": 11.640240267341792, "learning_rate": 4.940557230283999e-06, "loss": 1.4136, "step": 1377 }, { "epoch": 0.1950874212500885, "grad_norm": 11.465503201860571, "learning_rate": 4.94043291413388e-06, "loss": 1.3753, "step": 1378 }, { "epoch": 0.1952289941247257, "grad_norm": 9.342307907632655, "learning_rate": 4.9403084696920234e-06, "loss": 1.3091, "step": 1379 }, { "epoch": 0.19537056699936292, "grad_norm": 9.13922097924226, "learning_rate": 4.940183896964969e-06, "loss": 0.8699, "step": 1380 }, { "epoch": 0.19551213987400015, "grad_norm": 10.136526822605001, "learning_rate": 4.940059195959268e-06, "loss": 1.2169, "step": 1381 }, { "epoch": 0.19565371274863735, "grad_norm": 8.300763786746336, "learning_rate": 4.939934366681474e-06, "loss": 1.1697, "step": 1382 }, { "epoch": 0.19579528562327458, "grad_norm": 8.10737694029646, "learning_rate": 4.93980940913815e-06, "loss": 1.1508, "step": 1383 }, { "epoch": 0.1959368584979118, "grad_norm": 9.017646866954252, "learning_rate": 4.939684323335864e-06, "loss": 1.2277, "step": 1384 }, { "epoch": 0.19607843137254902, "grad_norm": 8.952273662894335, "learning_rate": 4.939559109281192e-06, "loss": 1.2025, "step": 1385 }, { "epoch": 0.19622000424718625, "grad_norm": 10.395016400506393, "learning_rate": 4.939433766980717e-06, "loss": 1.1779, "step": 1386 }, { "epoch": 0.19636157712182345, "grad_norm": 8.55015487084472, "learning_rate": 4.939308296441028e-06, "loss": 1.1938, "step": 1387 }, { "epoch": 0.19650314999646068, "grad_norm": 9.055201886947573, "learning_rate": 4.939182697668721e-06, "loss": 1.0945, "step": 1388 }, { "epoch": 0.1966447228710979, "grad_norm": 10.393583636578237, "learning_rate": 4.939056970670397e-06, "loss": 1.4453, "step": 1389 }, { "epoch": 0.1967862957457351, "grad_norm": 9.48585420790932, "learning_rate": 4.938931115452668e-06, "loss": 1.2552, "step": 1390 }, { "epoch": 0.19692786862037234, "grad_norm": 8.834998879467689, "learning_rate": 4.938805132022148e-06, "loss": 1.2852, "step": 1391 }, { "epoch": 0.19706944149500955, "grad_norm": 7.8127873482312085, "learning_rate": 4.9386790203854605e-06, "loss": 1.0754, "step": 1392 }, { "epoch": 0.19721101436964678, "grad_norm": 8.34479096166271, "learning_rate": 4.938552780549236e-06, "loss": 1.2333, "step": 1393 }, { "epoch": 0.197352587244284, "grad_norm": 10.320060409250807, "learning_rate": 4.93842641252011e-06, "loss": 1.1539, "step": 1394 }, { "epoch": 0.1974941601189212, "grad_norm": 10.750086939260544, "learning_rate": 4.938299916304725e-06, "loss": 1.1511, "step": 1395 }, { "epoch": 0.19763573299355844, "grad_norm": 8.081262801795255, "learning_rate": 4.938173291909732e-06, "loss": 1.1714, "step": 1396 }, { "epoch": 0.19777730586819564, "grad_norm": 10.200124208778622, "learning_rate": 4.9380465393417875e-06, "loss": 1.2437, "step": 1397 }, { "epoch": 0.19791887874283287, "grad_norm": 9.171669536934187, "learning_rate": 4.937919658607554e-06, "loss": 1.1567, "step": 1398 }, { "epoch": 0.1980604516174701, "grad_norm": 8.97742640601906, "learning_rate": 4.937792649713701e-06, "loss": 1.2237, "step": 1399 }, { "epoch": 0.1982020244921073, "grad_norm": 9.582241051387433, "learning_rate": 4.937665512666907e-06, "loss": 1.3403, "step": 1400 }, { "epoch": 0.19834359736674453, "grad_norm": 9.988670798035313, "learning_rate": 4.937538247473854e-06, "loss": 1.3053, "step": 1401 }, { "epoch": 0.19848517024138176, "grad_norm": 8.998615158348889, "learning_rate": 4.9374108541412336e-06, "loss": 1.2043, "step": 1402 }, { "epoch": 0.19862674311601897, "grad_norm": 8.034109830987326, "learning_rate": 4.937283332675741e-06, "loss": 1.2322, "step": 1403 }, { "epoch": 0.1987683159906562, "grad_norm": 11.071586713232788, "learning_rate": 4.937155683084082e-06, "loss": 1.484, "step": 1404 }, { "epoch": 0.1989098888652934, "grad_norm": 10.40899420402161, "learning_rate": 4.937027905372965e-06, "loss": 1.241, "step": 1405 }, { "epoch": 0.19905146173993063, "grad_norm": 9.94601095163773, "learning_rate": 4.936899999549108e-06, "loss": 1.3802, "step": 1406 }, { "epoch": 0.19919303461456786, "grad_norm": 9.659500315756382, "learning_rate": 4.936771965619236e-06, "loss": 1.243, "step": 1407 }, { "epoch": 0.19933460748920506, "grad_norm": 9.250991768121278, "learning_rate": 4.936643803590079e-06, "loss": 1.2781, "step": 1408 }, { "epoch": 0.1994761803638423, "grad_norm": 8.637503144502412, "learning_rate": 4.936515513468373e-06, "loss": 1.1754, "step": 1409 }, { "epoch": 0.1996177532384795, "grad_norm": 7.624053896382253, "learning_rate": 4.9363870952608634e-06, "loss": 1.1172, "step": 1410 }, { "epoch": 0.19975932611311673, "grad_norm": 9.408321317310842, "learning_rate": 4.936258548974301e-06, "loss": 1.2491, "step": 1411 }, { "epoch": 0.19990089898775396, "grad_norm": 10.699290414258899, "learning_rate": 4.936129874615443e-06, "loss": 1.3083, "step": 1412 }, { "epoch": 0.20004247186239116, "grad_norm": 9.99171524184517, "learning_rate": 4.9360010721910545e-06, "loss": 1.2584, "step": 1413 }, { "epoch": 0.2001840447370284, "grad_norm": 10.396842301102021, "learning_rate": 4.935872141707906e-06, "loss": 1.1796, "step": 1414 }, { "epoch": 0.20032561761166562, "grad_norm": 9.576400327748662, "learning_rate": 4.935743083172775e-06, "loss": 1.1873, "step": 1415 }, { "epoch": 0.20046719048630282, "grad_norm": 8.293858326346069, "learning_rate": 4.935613896592446e-06, "loss": 1.2353, "step": 1416 }, { "epoch": 0.20060876336094005, "grad_norm": 10.402342649854429, "learning_rate": 4.93548458197371e-06, "loss": 1.1971, "step": 1417 }, { "epoch": 0.20075033623557725, "grad_norm": 8.593292779030214, "learning_rate": 4.935355139323367e-06, "loss": 1.2552, "step": 1418 }, { "epoch": 0.20089190911021448, "grad_norm": 11.031107526080648, "learning_rate": 4.93522556864822e-06, "loss": 1.1517, "step": 1419 }, { "epoch": 0.20103348198485171, "grad_norm": 9.137817577682748, "learning_rate": 4.935095869955079e-06, "loss": 1.3494, "step": 1420 }, { "epoch": 0.20117505485948892, "grad_norm": 9.275234546420553, "learning_rate": 4.934966043250765e-06, "loss": 1.1727, "step": 1421 }, { "epoch": 0.20131662773412615, "grad_norm": 10.840189676349684, "learning_rate": 4.934836088542102e-06, "loss": 1.1497, "step": 1422 }, { "epoch": 0.20145820060876335, "grad_norm": 8.965340740581285, "learning_rate": 4.934706005835921e-06, "loss": 1.2939, "step": 1423 }, { "epoch": 0.20159977348340058, "grad_norm": 9.961504656035002, "learning_rate": 4.9345757951390605e-06, "loss": 1.1842, "step": 1424 }, { "epoch": 0.2017413463580378, "grad_norm": 10.241525263439996, "learning_rate": 4.934445456458366e-06, "loss": 1.1608, "step": 1425 }, { "epoch": 0.201882919232675, "grad_norm": 10.925726783024766, "learning_rate": 4.934314989800689e-06, "loss": 1.3064, "step": 1426 }, { "epoch": 0.20202449210731224, "grad_norm": 8.95230562145537, "learning_rate": 4.934184395172888e-06, "loss": 1.2427, "step": 1427 }, { "epoch": 0.20216606498194944, "grad_norm": 9.000464109534114, "learning_rate": 4.934053672581828e-06, "loss": 1.2127, "step": 1428 }, { "epoch": 0.20230763785658668, "grad_norm": 10.72226526211109, "learning_rate": 4.933922822034381e-06, "loss": 1.2424, "step": 1429 }, { "epoch": 0.2024492107312239, "grad_norm": 10.63266576347592, "learning_rate": 4.933791843537427e-06, "loss": 1.2057, "step": 1430 }, { "epoch": 0.2025907836058611, "grad_norm": 7.722782703188279, "learning_rate": 4.933660737097851e-06, "loss": 1.0472, "step": 1431 }, { "epoch": 0.20273235648049834, "grad_norm": 7.851720643230131, "learning_rate": 4.933529502722544e-06, "loss": 1.2785, "step": 1432 }, { "epoch": 0.20287392935513557, "grad_norm": 8.709078767499788, "learning_rate": 4.933398140418405e-06, "loss": 1.25, "step": 1433 }, { "epoch": 0.20301550222977277, "grad_norm": 12.403716090412683, "learning_rate": 4.933266650192341e-06, "loss": 1.2973, "step": 1434 }, { "epoch": 0.20315707510441, "grad_norm": 8.347358751864222, "learning_rate": 4.933135032051263e-06, "loss": 1.2022, "step": 1435 }, { "epoch": 0.2032986479790472, "grad_norm": 8.6272434136552, "learning_rate": 4.933003286002091e-06, "loss": 1.125, "step": 1436 }, { "epoch": 0.20344022085368443, "grad_norm": 11.029275949876112, "learning_rate": 4.932871412051749e-06, "loss": 1.2141, "step": 1437 }, { "epoch": 0.20358179372832166, "grad_norm": 10.143784793363965, "learning_rate": 4.932739410207172e-06, "loss": 1.2792, "step": 1438 }, { "epoch": 0.20372336660295887, "grad_norm": 9.506678191503928, "learning_rate": 4.932607280475299e-06, "loss": 1.1956, "step": 1439 }, { "epoch": 0.2038649394775961, "grad_norm": 9.181891773248285, "learning_rate": 4.932475022863074e-06, "loss": 1.3007, "step": 1440 }, { "epoch": 0.2040065123522333, "grad_norm": 7.472723634040148, "learning_rate": 4.932342637377451e-06, "loss": 1.1949, "step": 1441 }, { "epoch": 0.20414808522687053, "grad_norm": 8.45257803509989, "learning_rate": 4.93221012402539e-06, "loss": 1.1352, "step": 1442 }, { "epoch": 0.20428965810150776, "grad_norm": 10.705377628484529, "learning_rate": 4.9320774828138555e-06, "loss": 1.2769, "step": 1443 }, { "epoch": 0.20443123097614496, "grad_norm": 7.822088598184107, "learning_rate": 4.931944713749821e-06, "loss": 1.0643, "step": 1444 }, { "epoch": 0.2045728038507822, "grad_norm": 11.391632998309069, "learning_rate": 4.9318118168402665e-06, "loss": 1.2034, "step": 1445 }, { "epoch": 0.20471437672541942, "grad_norm": 8.225006622961097, "learning_rate": 4.931678792092177e-06, "loss": 1.1512, "step": 1446 }, { "epoch": 0.20485594960005662, "grad_norm": 8.623537685989415, "learning_rate": 4.9315456395125475e-06, "loss": 1.1194, "step": 1447 }, { "epoch": 0.20499752247469386, "grad_norm": 7.919517753253149, "learning_rate": 4.931412359108377e-06, "loss": 1.1258, "step": 1448 }, { "epoch": 0.20513909534933106, "grad_norm": 10.42313479197125, "learning_rate": 4.931278950886671e-06, "loss": 1.251, "step": 1449 }, { "epoch": 0.2052806682239683, "grad_norm": 10.655222197157634, "learning_rate": 4.931145414854444e-06, "loss": 1.2331, "step": 1450 }, { "epoch": 0.20542224109860552, "grad_norm": 7.9085839486751475, "learning_rate": 4.931011751018715e-06, "loss": 1.2685, "step": 1451 }, { "epoch": 0.20556381397324272, "grad_norm": 11.388910193281225, "learning_rate": 4.930877959386511e-06, "loss": 1.2382, "step": 1452 }, { "epoch": 0.20570538684787995, "grad_norm": 8.062938116427283, "learning_rate": 4.930744039964866e-06, "loss": 1.1617, "step": 1453 }, { "epoch": 0.20584695972251715, "grad_norm": 7.272539290252854, "learning_rate": 4.930609992760818e-06, "loss": 1.1659, "step": 1454 }, { "epoch": 0.20598853259715438, "grad_norm": 9.182088283133517, "learning_rate": 4.930475817781415e-06, "loss": 1.1242, "step": 1455 }, { "epoch": 0.2061301054717916, "grad_norm": 8.592888806281003, "learning_rate": 4.930341515033712e-06, "loss": 1.1883, "step": 1456 }, { "epoch": 0.20627167834642882, "grad_norm": 9.870812795887927, "learning_rate": 4.930207084524766e-06, "loss": 1.1922, "step": 1457 }, { "epoch": 0.20641325122106605, "grad_norm": 10.004734063148126, "learning_rate": 4.930072526261647e-06, "loss": 1.2928, "step": 1458 }, { "epoch": 0.20655482409570328, "grad_norm": 9.465938339512936, "learning_rate": 4.9299378402514265e-06, "loss": 1.2743, "step": 1459 }, { "epoch": 0.20669639697034048, "grad_norm": 10.592758987570235, "learning_rate": 4.9298030265011856e-06, "loss": 1.4018, "step": 1460 }, { "epoch": 0.2068379698449777, "grad_norm": 8.563924183709032, "learning_rate": 4.929668085018011e-06, "loss": 1.2208, "step": 1461 }, { "epoch": 0.2069795427196149, "grad_norm": 9.42711790861811, "learning_rate": 4.929533015808997e-06, "loss": 1.1407, "step": 1462 }, { "epoch": 0.20712111559425214, "grad_norm": 9.773643192654632, "learning_rate": 4.929397818881244e-06, "loss": 1.0706, "step": 1463 }, { "epoch": 0.20726268846888937, "grad_norm": 8.715614985764237, "learning_rate": 4.929262494241859e-06, "loss": 1.1743, "step": 1464 }, { "epoch": 0.20740426134352657, "grad_norm": 9.928492656524659, "learning_rate": 4.929127041897957e-06, "loss": 1.2827, "step": 1465 }, { "epoch": 0.2075458342181638, "grad_norm": 8.761661279347852, "learning_rate": 4.928991461856656e-06, "loss": 1.0669, "step": 1466 }, { "epoch": 0.207687407092801, "grad_norm": 8.768752575396094, "learning_rate": 4.928855754125086e-06, "loss": 1.187, "step": 1467 }, { "epoch": 0.20782897996743824, "grad_norm": 7.31189275323193, "learning_rate": 4.92871991871038e-06, "loss": 1.0787, "step": 1468 }, { "epoch": 0.20797055284207547, "grad_norm": 7.447230025075546, "learning_rate": 4.928583955619678e-06, "loss": 1.2349, "step": 1469 }, { "epoch": 0.20811212571671267, "grad_norm": 9.70762502899722, "learning_rate": 4.928447864860129e-06, "loss": 1.261, "step": 1470 }, { "epoch": 0.2082536985913499, "grad_norm": 8.992565475195807, "learning_rate": 4.928311646438887e-06, "loss": 1.291, "step": 1471 }, { "epoch": 0.2083952714659871, "grad_norm": 10.481823037848777, "learning_rate": 4.9281753003631114e-06, "loss": 1.2986, "step": 1472 }, { "epoch": 0.20853684434062433, "grad_norm": 10.491282886356773, "learning_rate": 4.928038826639971e-06, "loss": 1.2113, "step": 1473 }, { "epoch": 0.20867841721526156, "grad_norm": 10.439233693028251, "learning_rate": 4.92790222527664e-06, "loss": 1.0657, "step": 1474 }, { "epoch": 0.20881999008989877, "grad_norm": 8.092729540973172, "learning_rate": 4.927765496280299e-06, "loss": 1.194, "step": 1475 }, { "epoch": 0.208961562964536, "grad_norm": 9.948288537442949, "learning_rate": 4.927628639658137e-06, "loss": 1.2767, "step": 1476 }, { "epoch": 0.20910313583917323, "grad_norm": 9.573825885707539, "learning_rate": 4.927491655417347e-06, "loss": 1.3029, "step": 1477 }, { "epoch": 0.20924470871381043, "grad_norm": 10.770708586353473, "learning_rate": 4.927354543565131e-06, "loss": 1.3955, "step": 1478 }, { "epoch": 0.20938628158844766, "grad_norm": 9.036150912608182, "learning_rate": 4.927217304108696e-06, "loss": 1.2902, "step": 1479 }, { "epoch": 0.20952785446308486, "grad_norm": 9.746042866851141, "learning_rate": 4.927079937055257e-06, "loss": 1.231, "step": 1480 }, { "epoch": 0.2096694273377221, "grad_norm": 9.439513137029905, "learning_rate": 4.926942442412036e-06, "loss": 1.2947, "step": 1481 }, { "epoch": 0.20981100021235932, "grad_norm": 8.613232089570593, "learning_rate": 4.92680482018626e-06, "loss": 1.1935, "step": 1482 }, { "epoch": 0.20995257308699652, "grad_norm": 10.467493875782816, "learning_rate": 4.9266670703851645e-06, "loss": 1.2064, "step": 1483 }, { "epoch": 0.21009414596163375, "grad_norm": 10.467705973477091, "learning_rate": 4.92652919301599e-06, "loss": 1.2204, "step": 1484 }, { "epoch": 0.21023571883627096, "grad_norm": 8.421102683018965, "learning_rate": 4.9263911880859855e-06, "loss": 1.292, "step": 1485 }, { "epoch": 0.2103772917109082, "grad_norm": 8.214664502621211, "learning_rate": 4.926253055602405e-06, "loss": 1.3166, "step": 1486 }, { "epoch": 0.21051886458554542, "grad_norm": 11.386246372062766, "learning_rate": 4.926114795572511e-06, "loss": 1.3502, "step": 1487 }, { "epoch": 0.21066043746018262, "grad_norm": 8.500001346363634, "learning_rate": 4.925976408003571e-06, "loss": 1.25, "step": 1488 }, { "epoch": 0.21080201033481985, "grad_norm": 9.11239505911475, "learning_rate": 4.92583789290286e-06, "loss": 1.2549, "step": 1489 }, { "epoch": 0.21094358320945708, "grad_norm": 9.689171825248001, "learning_rate": 4.9256992502776605e-06, "loss": 1.2877, "step": 1490 }, { "epoch": 0.21108515608409428, "grad_norm": 9.100747264320825, "learning_rate": 4.925560480135258e-06, "loss": 1.3564, "step": 1491 }, { "epoch": 0.2112267289587315, "grad_norm": 10.540409621326468, "learning_rate": 4.925421582482952e-06, "loss": 1.2826, "step": 1492 }, { "epoch": 0.21136830183336872, "grad_norm": 8.63525444464322, "learning_rate": 4.925282557328041e-06, "loss": 1.272, "step": 1493 }, { "epoch": 0.21150987470800595, "grad_norm": 8.94033149736154, "learning_rate": 4.925143404677835e-06, "loss": 1.2348, "step": 1494 }, { "epoch": 0.21165144758264318, "grad_norm": 10.280401409342295, "learning_rate": 4.925004124539648e-06, "loss": 1.3767, "step": 1495 }, { "epoch": 0.21179302045728038, "grad_norm": 9.91642282223562, "learning_rate": 4.924864716920801e-06, "loss": 1.2204, "step": 1496 }, { "epoch": 0.2119345933319176, "grad_norm": 8.445826512167582, "learning_rate": 4.9247251818286255e-06, "loss": 1.3123, "step": 1497 }, { "epoch": 0.2120761662065548, "grad_norm": 8.514515767779494, "learning_rate": 4.924585519270454e-06, "loss": 1.2733, "step": 1498 }, { "epoch": 0.21221773908119204, "grad_norm": 9.167164413347654, "learning_rate": 4.9244457292536305e-06, "loss": 1.2322, "step": 1499 }, { "epoch": 0.21235931195582927, "grad_norm": 9.139791545202007, "learning_rate": 4.924305811785502e-06, "loss": 1.1993, "step": 1500 }, { "epoch": 0.21250088483046647, "grad_norm": 10.339005871912134, "learning_rate": 4.9241657668734256e-06, "loss": 1.1385, "step": 1501 }, { "epoch": 0.2126424577051037, "grad_norm": 10.84811621279063, "learning_rate": 4.9240255945247616e-06, "loss": 1.3216, "step": 1502 }, { "epoch": 0.21278403057974093, "grad_norm": 11.400617321767397, "learning_rate": 4.9238852947468796e-06, "loss": 1.2241, "step": 1503 }, { "epoch": 0.21292560345437814, "grad_norm": 11.600424107658265, "learning_rate": 4.9237448675471555e-06, "loss": 1.2842, "step": 1504 }, { "epoch": 0.21306717632901537, "grad_norm": 9.003334275487713, "learning_rate": 4.9236043129329705e-06, "loss": 1.3238, "step": 1505 }, { "epoch": 0.21320874920365257, "grad_norm": 9.385269846951534, "learning_rate": 4.923463630911714e-06, "loss": 1.1504, "step": 1506 }, { "epoch": 0.2133503220782898, "grad_norm": 11.75502783158292, "learning_rate": 4.9233228214907815e-06, "loss": 1.2929, "step": 1507 }, { "epoch": 0.21349189495292703, "grad_norm": 9.595850413267632, "learning_rate": 4.923181884677574e-06, "loss": 1.1469, "step": 1508 }, { "epoch": 0.21363346782756423, "grad_norm": 10.595379999393272, "learning_rate": 4.923040820479504e-06, "loss": 1.4215, "step": 1509 }, { "epoch": 0.21377504070220146, "grad_norm": 11.201309291059879, "learning_rate": 4.922899628903983e-06, "loss": 1.3471, "step": 1510 }, { "epoch": 0.21391661357683867, "grad_norm": 9.276778765691144, "learning_rate": 4.9227583099584355e-06, "loss": 1.3545, "step": 1511 }, { "epoch": 0.2140581864514759, "grad_norm": 10.328906515912182, "learning_rate": 4.92261686365029e-06, "loss": 1.1576, "step": 1512 }, { "epoch": 0.21419975932611313, "grad_norm": 8.60607957858226, "learning_rate": 4.9224752899869835e-06, "loss": 1.1439, "step": 1513 }, { "epoch": 0.21434133220075033, "grad_norm": 9.289384045501844, "learning_rate": 4.922333588975956e-06, "loss": 1.2678, "step": 1514 }, { "epoch": 0.21448290507538756, "grad_norm": 8.179158628138827, "learning_rate": 4.922191760624659e-06, "loss": 1.0544, "step": 1515 }, { "epoch": 0.2146244779500248, "grad_norm": 10.709355366145921, "learning_rate": 4.922049804940546e-06, "loss": 1.2237, "step": 1516 }, { "epoch": 0.214766050824662, "grad_norm": 9.807224288981798, "learning_rate": 4.9219077219310804e-06, "loss": 1.329, "step": 1517 }, { "epoch": 0.21490762369929922, "grad_norm": 9.93177678587833, "learning_rate": 4.921765511603733e-06, "loss": 1.1076, "step": 1518 }, { "epoch": 0.21504919657393642, "grad_norm": 10.100275817730834, "learning_rate": 4.921623173965978e-06, "loss": 1.1365, "step": 1519 }, { "epoch": 0.21519076944857365, "grad_norm": 9.276492559636125, "learning_rate": 4.921480709025298e-06, "loss": 1.1469, "step": 1520 }, { "epoch": 0.21533234232321088, "grad_norm": 10.630386961589169, "learning_rate": 4.921338116789183e-06, "loss": 1.2948, "step": 1521 }, { "epoch": 0.2154739151978481, "grad_norm": 10.51227324578016, "learning_rate": 4.921195397265129e-06, "loss": 1.3813, "step": 1522 }, { "epoch": 0.21561548807248532, "grad_norm": 9.431450325852511, "learning_rate": 4.921052550460638e-06, "loss": 1.3463, "step": 1523 }, { "epoch": 0.21575706094712252, "grad_norm": 9.767451391711818, "learning_rate": 4.920909576383219e-06, "loss": 1.1654, "step": 1524 }, { "epoch": 0.21589863382175975, "grad_norm": 6.791915452141112, "learning_rate": 4.920766475040389e-06, "loss": 1.0723, "step": 1525 }, { "epoch": 0.21604020669639698, "grad_norm": 10.140465122367377, "learning_rate": 4.920623246439671e-06, "loss": 1.5105, "step": 1526 }, { "epoch": 0.21618177957103418, "grad_norm": 9.349758500024963, "learning_rate": 4.920479890588593e-06, "loss": 1.2926, "step": 1527 }, { "epoch": 0.2163233524456714, "grad_norm": 10.188339479647263, "learning_rate": 4.920336407494692e-06, "loss": 1.2569, "step": 1528 }, { "epoch": 0.21646492532030862, "grad_norm": 12.806459966365303, "learning_rate": 4.920192797165511e-06, "loss": 1.2767, "step": 1529 }, { "epoch": 0.21660649819494585, "grad_norm": 9.01844212038912, "learning_rate": 4.9200490596086e-06, "loss": 1.2032, "step": 1530 }, { "epoch": 0.21674807106958308, "grad_norm": 10.960959425650737, "learning_rate": 4.919905194831514e-06, "loss": 1.1385, "step": 1531 }, { "epoch": 0.21688964394422028, "grad_norm": 10.160063136347643, "learning_rate": 4.919761202841815e-06, "loss": 1.048, "step": 1532 }, { "epoch": 0.2170312168188575, "grad_norm": 8.591604401899012, "learning_rate": 4.919617083647074e-06, "loss": 1.3581, "step": 1533 }, { "epoch": 0.21717278969349474, "grad_norm": 8.779367482683254, "learning_rate": 4.9194728372548685e-06, "loss": 1.1689, "step": 1534 }, { "epoch": 0.21731436256813194, "grad_norm": 8.25552737865999, "learning_rate": 4.919328463672779e-06, "loss": 1.1069, "step": 1535 }, { "epoch": 0.21745593544276917, "grad_norm": 7.8371926668740945, "learning_rate": 4.919183962908397e-06, "loss": 1.1057, "step": 1536 }, { "epoch": 0.21759750831740637, "grad_norm": 9.899953098378681, "learning_rate": 4.919039334969317e-06, "loss": 1.31, "step": 1537 }, { "epoch": 0.2177390811920436, "grad_norm": 7.509496398782683, "learning_rate": 4.918894579863143e-06, "loss": 1.1715, "step": 1538 }, { "epoch": 0.21788065406668083, "grad_norm": 9.559984462198695, "learning_rate": 4.9187496975974845e-06, "loss": 1.1899, "step": 1539 }, { "epoch": 0.21802222694131804, "grad_norm": 9.111879295695065, "learning_rate": 4.918604688179959e-06, "loss": 1.163, "step": 1540 }, { "epoch": 0.21816379981595527, "grad_norm": 9.155063799683186, "learning_rate": 4.918459551618187e-06, "loss": 1.1664, "step": 1541 }, { "epoch": 0.21830537269059247, "grad_norm": 9.12080474983803, "learning_rate": 4.9183142879198e-06, "loss": 1.1996, "step": 1542 }, { "epoch": 0.2184469455652297, "grad_norm": 9.579482214200697, "learning_rate": 4.918168897092435e-06, "loss": 1.0759, "step": 1543 }, { "epoch": 0.21858851843986693, "grad_norm": 8.727324832413926, "learning_rate": 4.9180233791437326e-06, "loss": 1.2164, "step": 1544 }, { "epoch": 0.21873009131450413, "grad_norm": 10.064780788317972, "learning_rate": 4.917877734081345e-06, "loss": 1.0004, "step": 1545 }, { "epoch": 0.21887166418914136, "grad_norm": 8.46206863182905, "learning_rate": 4.917731961912927e-06, "loss": 1.1718, "step": 1546 }, { "epoch": 0.2190132370637786, "grad_norm": 8.833738449691534, "learning_rate": 4.917586062646144e-06, "loss": 1.2841, "step": 1547 }, { "epoch": 0.2191548099384158, "grad_norm": 7.092406830523627, "learning_rate": 4.917440036288663e-06, "loss": 1.1065, "step": 1548 }, { "epoch": 0.21929638281305303, "grad_norm": 8.224112846045596, "learning_rate": 4.917293882848162e-06, "loss": 1.2793, "step": 1549 }, { "epoch": 0.21943795568769023, "grad_norm": 9.960592058961106, "learning_rate": 4.9171476023323245e-06, "loss": 1.1933, "step": 1550 }, { "epoch": 0.21957952856232746, "grad_norm": 8.456810223728345, "learning_rate": 4.917001194748839e-06, "loss": 1.1871, "step": 1551 }, { "epoch": 0.2197211014369647, "grad_norm": 8.19365897284487, "learning_rate": 4.916854660105404e-06, "loss": 1.0013, "step": 1552 }, { "epoch": 0.2198626743116019, "grad_norm": 8.956099783255919, "learning_rate": 4.916707998409721e-06, "loss": 1.1158, "step": 1553 }, { "epoch": 0.22000424718623912, "grad_norm": 8.747376402848197, "learning_rate": 4.916561209669501e-06, "loss": 1.2374, "step": 1554 }, { "epoch": 0.22014582006087632, "grad_norm": 8.080864860402619, "learning_rate": 4.9164142938924595e-06, "loss": 1.1072, "step": 1555 }, { "epoch": 0.22028739293551355, "grad_norm": 8.46625732673881, "learning_rate": 4.916267251086321e-06, "loss": 1.079, "step": 1556 }, { "epoch": 0.22042896581015078, "grad_norm": 9.350867784998327, "learning_rate": 4.916120081258814e-06, "loss": 1.2092, "step": 1557 }, { "epoch": 0.220570538684788, "grad_norm": 11.230479619559958, "learning_rate": 4.915972784417676e-06, "loss": 1.3025, "step": 1558 }, { "epoch": 0.22071211155942522, "grad_norm": 9.255089674092211, "learning_rate": 4.91582536057065e-06, "loss": 1.1248, "step": 1559 }, { "epoch": 0.22085368443406245, "grad_norm": 8.997341187123006, "learning_rate": 4.915677809725487e-06, "loss": 1.3082, "step": 1560 }, { "epoch": 0.22099525730869965, "grad_norm": 8.561200788180761, "learning_rate": 4.915530131889942e-06, "loss": 1.1094, "step": 1561 }, { "epoch": 0.22113683018333688, "grad_norm": 9.699599849666601, "learning_rate": 4.915382327071778e-06, "loss": 1.4162, "step": 1562 }, { "epoch": 0.22127840305797408, "grad_norm": 8.685798313695258, "learning_rate": 4.915234395278768e-06, "loss": 1.289, "step": 1563 }, { "epoch": 0.2214199759326113, "grad_norm": 8.036358229252052, "learning_rate": 4.915086336518686e-06, "loss": 1.2457, "step": 1564 }, { "epoch": 0.22156154880724854, "grad_norm": 9.398848247986127, "learning_rate": 4.914938150799315e-06, "loss": 1.2137, "step": 1565 }, { "epoch": 0.22170312168188575, "grad_norm": 8.989445325396934, "learning_rate": 4.914789838128447e-06, "loss": 1.2789, "step": 1566 }, { "epoch": 0.22184469455652298, "grad_norm": 9.029195690475122, "learning_rate": 4.914641398513879e-06, "loss": 1.2125, "step": 1567 }, { "epoch": 0.22198626743116018, "grad_norm": 7.660668666727906, "learning_rate": 4.914492831963411e-06, "loss": 1.0856, "step": 1568 }, { "epoch": 0.2221278403057974, "grad_norm": 9.039573134074919, "learning_rate": 4.914344138484856e-06, "loss": 1.0491, "step": 1569 }, { "epoch": 0.22226941318043464, "grad_norm": 10.165481831867265, "learning_rate": 4.91419531808603e-06, "loss": 1.2054, "step": 1570 }, { "epoch": 0.22241098605507184, "grad_norm": 7.951778035495034, "learning_rate": 4.914046370774757e-06, "loss": 1.2309, "step": 1571 }, { "epoch": 0.22255255892970907, "grad_norm": 10.170326408016876, "learning_rate": 4.913897296558865e-06, "loss": 1.3048, "step": 1572 }, { "epoch": 0.22269413180434627, "grad_norm": 14.034497401388135, "learning_rate": 4.913748095446192e-06, "loss": 1.4338, "step": 1573 }, { "epoch": 0.2228357046789835, "grad_norm": 8.730028302648225, "learning_rate": 4.9135987674445815e-06, "loss": 1.3078, "step": 1574 }, { "epoch": 0.22297727755362073, "grad_norm": 9.234447704874531, "learning_rate": 4.913449312561884e-06, "loss": 1.3408, "step": 1575 }, { "epoch": 0.22311885042825794, "grad_norm": 10.59721853362542, "learning_rate": 4.913299730805956e-06, "loss": 1.1759, "step": 1576 }, { "epoch": 0.22326042330289517, "grad_norm": 7.848510027365589, "learning_rate": 4.913150022184659e-06, "loss": 1.0721, "step": 1577 }, { "epoch": 0.2234019961775324, "grad_norm": 10.196066408308486, "learning_rate": 4.913000186705866e-06, "loss": 1.3209, "step": 1578 }, { "epoch": 0.2235435690521696, "grad_norm": 8.058448424071347, "learning_rate": 4.912850224377452e-06, "loss": 1.1341, "step": 1579 }, { "epoch": 0.22368514192680683, "grad_norm": 9.631298828001228, "learning_rate": 4.912700135207301e-06, "loss": 1.2182, "step": 1580 }, { "epoch": 0.22382671480144403, "grad_norm": 10.500248860634395, "learning_rate": 4.9125499192033035e-06, "loss": 1.2878, "step": 1581 }, { "epoch": 0.22396828767608126, "grad_norm": 8.632492128529345, "learning_rate": 4.912399576373354e-06, "loss": 1.2156, "step": 1582 }, { "epoch": 0.2241098605507185, "grad_norm": 8.518928544992468, "learning_rate": 4.9122491067253586e-06, "loss": 1.2762, "step": 1583 }, { "epoch": 0.2242514334253557, "grad_norm": 8.595620357545736, "learning_rate": 4.912098510267226e-06, "loss": 1.193, "step": 1584 }, { "epoch": 0.22439300629999293, "grad_norm": 7.850963664585799, "learning_rate": 4.911947787006873e-06, "loss": 1.0856, "step": 1585 }, { "epoch": 0.22453457917463013, "grad_norm": 9.380574514530347, "learning_rate": 4.911796936952224e-06, "loss": 1.1641, "step": 1586 }, { "epoch": 0.22467615204926736, "grad_norm": 9.481423938541285, "learning_rate": 4.911645960111208e-06, "loss": 1.2723, "step": 1587 }, { "epoch": 0.2248177249239046, "grad_norm": 10.512144422470932, "learning_rate": 4.911494856491762e-06, "loss": 1.363, "step": 1588 }, { "epoch": 0.2249592977985418, "grad_norm": 8.657937666878567, "learning_rate": 4.91134362610183e-06, "loss": 1.2396, "step": 1589 }, { "epoch": 0.22510087067317902, "grad_norm": 9.97175194196046, "learning_rate": 4.9111922689493605e-06, "loss": 1.2242, "step": 1590 }, { "epoch": 0.22524244354781625, "grad_norm": 7.920394127026849, "learning_rate": 4.911040785042313e-06, "loss": 1.2122, "step": 1591 }, { "epoch": 0.22538401642245345, "grad_norm": 9.189704889949159, "learning_rate": 4.910889174388647e-06, "loss": 1.347, "step": 1592 }, { "epoch": 0.22552558929709068, "grad_norm": 10.542609814656974, "learning_rate": 4.910737436996335e-06, "loss": 1.2625, "step": 1593 }, { "epoch": 0.2256671621717279, "grad_norm": 9.201832497577673, "learning_rate": 4.910585572873355e-06, "loss": 1.0942, "step": 1594 }, { "epoch": 0.22580873504636512, "grad_norm": 9.165166789949742, "learning_rate": 4.910433582027688e-06, "loss": 1.2191, "step": 1595 }, { "epoch": 0.22595030792100235, "grad_norm": 8.204009903907842, "learning_rate": 4.910281464467325e-06, "loss": 1.1324, "step": 1596 }, { "epoch": 0.22609188079563955, "grad_norm": 8.14829986251206, "learning_rate": 4.910129220200263e-06, "loss": 1.0764, "step": 1597 }, { "epoch": 0.22623345367027678, "grad_norm": 10.006720382813953, "learning_rate": 4.909976849234504e-06, "loss": 1.3305, "step": 1598 }, { "epoch": 0.22637502654491398, "grad_norm": 9.026190478988813, "learning_rate": 4.90982435157806e-06, "loss": 1.0967, "step": 1599 }, { "epoch": 0.2265165994195512, "grad_norm": 10.642716660125592, "learning_rate": 4.909671727238946e-06, "loss": 1.2511, "step": 1600 }, { "epoch": 0.22665817229418844, "grad_norm": 9.340558071413572, "learning_rate": 4.909518976225186e-06, "loss": 1.1982, "step": 1601 }, { "epoch": 0.22679974516882564, "grad_norm": 8.030297130061273, "learning_rate": 4.90936609854481e-06, "loss": 1.1125, "step": 1602 }, { "epoch": 0.22694131804346288, "grad_norm": 10.11435808951751, "learning_rate": 4.909213094205855e-06, "loss": 1.4206, "step": 1603 }, { "epoch": 0.2270828909181001, "grad_norm": 8.438851255369649, "learning_rate": 4.909059963216363e-06, "loss": 1.2588, "step": 1604 }, { "epoch": 0.2272244637927373, "grad_norm": 9.881348705824207, "learning_rate": 4.908906705584387e-06, "loss": 1.411, "step": 1605 }, { "epoch": 0.22736603666737454, "grad_norm": 9.554637993023826, "learning_rate": 4.90875332131798e-06, "loss": 1.3451, "step": 1606 }, { "epoch": 0.22750760954201174, "grad_norm": 10.829486183383482, "learning_rate": 4.908599810425208e-06, "loss": 1.2309, "step": 1607 }, { "epoch": 0.22764918241664897, "grad_norm": 8.625371510341235, "learning_rate": 4.90844617291414e-06, "loss": 1.2348, "step": 1608 }, { "epoch": 0.2277907552912862, "grad_norm": 6.905837931357764, "learning_rate": 4.908292408792852e-06, "loss": 0.9421, "step": 1609 }, { "epoch": 0.2279323281659234, "grad_norm": 11.999696091936167, "learning_rate": 4.908138518069428e-06, "loss": 1.2362, "step": 1610 }, { "epoch": 0.22807390104056063, "grad_norm": 11.478347296760063, "learning_rate": 4.907984500751956e-06, "loss": 1.1825, "step": 1611 }, { "epoch": 0.22821547391519784, "grad_norm": 8.471650812720965, "learning_rate": 4.907830356848537e-06, "loss": 1.1973, "step": 1612 }, { "epoch": 0.22835704678983507, "grad_norm": 7.689452132810452, "learning_rate": 4.907676086367269e-06, "loss": 1.1612, "step": 1613 }, { "epoch": 0.2284986196644723, "grad_norm": 9.18915734945097, "learning_rate": 4.907521689316265e-06, "loss": 1.2385, "step": 1614 }, { "epoch": 0.2286401925391095, "grad_norm": 9.810798254481472, "learning_rate": 4.907367165703643e-06, "loss": 1.3196, "step": 1615 }, { "epoch": 0.22878176541374673, "grad_norm": 10.591330282860815, "learning_rate": 4.907212515537522e-06, "loss": 1.2345, "step": 1616 }, { "epoch": 0.22892333828838393, "grad_norm": 8.400393576711293, "learning_rate": 4.907057738826034e-06, "loss": 1.2009, "step": 1617 }, { "epoch": 0.22906491116302116, "grad_norm": 10.504892208630292, "learning_rate": 4.906902835577316e-06, "loss": 1.2034, "step": 1618 }, { "epoch": 0.2292064840376584, "grad_norm": 8.181096725252042, "learning_rate": 4.906747805799511e-06, "loss": 1.1529, "step": 1619 }, { "epoch": 0.2293480569122956, "grad_norm": 11.697179366281135, "learning_rate": 4.906592649500767e-06, "loss": 1.3433, "step": 1620 }, { "epoch": 0.22948962978693282, "grad_norm": 11.603694301193343, "learning_rate": 4.906437366689244e-06, "loss": 1.4852, "step": 1621 }, { "epoch": 0.22963120266157006, "grad_norm": 7.873814084812233, "learning_rate": 4.9062819573731015e-06, "loss": 1.1123, "step": 1622 }, { "epoch": 0.22977277553620726, "grad_norm": 7.69382594897716, "learning_rate": 4.906126421560511e-06, "loss": 1.182, "step": 1623 }, { "epoch": 0.2299143484108445, "grad_norm": 9.411875718527929, "learning_rate": 4.905970759259648e-06, "loss": 1.2332, "step": 1624 }, { "epoch": 0.2300559212854817, "grad_norm": 8.995938126232089, "learning_rate": 4.905814970478697e-06, "loss": 1.1243, "step": 1625 }, { "epoch": 0.23019749416011892, "grad_norm": 10.479542011998902, "learning_rate": 4.905659055225847e-06, "loss": 1.2788, "step": 1626 }, { "epoch": 0.23033906703475615, "grad_norm": 9.708253742814064, "learning_rate": 4.905503013509293e-06, "loss": 1.1516, "step": 1627 }, { "epoch": 0.23048063990939335, "grad_norm": 8.53664710949623, "learning_rate": 4.90534684533724e-06, "loss": 1.1778, "step": 1628 }, { "epoch": 0.23062221278403058, "grad_norm": 10.331249601222202, "learning_rate": 4.905190550717897e-06, "loss": 1.1749, "step": 1629 }, { "epoch": 0.23076378565866779, "grad_norm": 13.527190905684304, "learning_rate": 4.90503412965948e-06, "loss": 1.2501, "step": 1630 }, { "epoch": 0.23090535853330502, "grad_norm": 10.077239241598955, "learning_rate": 4.904877582170212e-06, "loss": 1.0769, "step": 1631 }, { "epoch": 0.23104693140794225, "grad_norm": 8.806425557711576, "learning_rate": 4.904720908258323e-06, "loss": 1.2647, "step": 1632 }, { "epoch": 0.23118850428257945, "grad_norm": 8.407015967658724, "learning_rate": 4.904564107932048e-06, "loss": 1.2178, "step": 1633 }, { "epoch": 0.23133007715721668, "grad_norm": 8.498078521760771, "learning_rate": 4.904407181199631e-06, "loss": 1.1159, "step": 1634 }, { "epoch": 0.2314716500318539, "grad_norm": 9.092587531074633, "learning_rate": 4.904250128069322e-06, "loss": 1.1308, "step": 1635 }, { "epoch": 0.2316132229064911, "grad_norm": 11.602529157960696, "learning_rate": 4.904092948549376e-06, "loss": 1.0638, "step": 1636 }, { "epoch": 0.23175479578112834, "grad_norm": 12.14360962147054, "learning_rate": 4.9039356426480565e-06, "loss": 1.1775, "step": 1637 }, { "epoch": 0.23189636865576554, "grad_norm": 10.374973894569003, "learning_rate": 4.903778210373632e-06, "loss": 1.1918, "step": 1638 }, { "epoch": 0.23203794153040277, "grad_norm": 8.70959736047773, "learning_rate": 4.90362065173438e-06, "loss": 1.3358, "step": 1639 }, { "epoch": 0.23217951440504, "grad_norm": 15.814329018941523, "learning_rate": 4.9034629667385825e-06, "loss": 1.1389, "step": 1640 }, { "epoch": 0.2323210872796772, "grad_norm": 9.95110191492827, "learning_rate": 4.903305155394529e-06, "loss": 1.1392, "step": 1641 }, { "epoch": 0.23246266015431444, "grad_norm": 10.314120448561836, "learning_rate": 4.903147217710515e-06, "loss": 1.2272, "step": 1642 }, { "epoch": 0.23260423302895164, "grad_norm": 9.539474390933362, "learning_rate": 4.902989153694843e-06, "loss": 1.2036, "step": 1643 }, { "epoch": 0.23274580590358887, "grad_norm": 10.146509752434579, "learning_rate": 4.902830963355825e-06, "loss": 1.3415, "step": 1644 }, { "epoch": 0.2328873787782261, "grad_norm": 9.02932792722597, "learning_rate": 4.902672646701774e-06, "loss": 0.9473, "step": 1645 }, { "epoch": 0.2330289516528633, "grad_norm": 11.332627068746985, "learning_rate": 4.902514203741013e-06, "loss": 1.1011, "step": 1646 }, { "epoch": 0.23317052452750053, "grad_norm": 10.723735928680192, "learning_rate": 4.902355634481872e-06, "loss": 1.2487, "step": 1647 }, { "epoch": 0.23331209740213776, "grad_norm": 9.938679559146403, "learning_rate": 4.9021969389326866e-06, "loss": 1.321, "step": 1648 }, { "epoch": 0.23345367027677497, "grad_norm": 8.79266158948179, "learning_rate": 4.902038117101798e-06, "loss": 1.1318, "step": 1649 }, { "epoch": 0.2335952431514122, "grad_norm": 10.254661430524536, "learning_rate": 4.901879168997559e-06, "loss": 0.9865, "step": 1650 }, { "epoch": 0.2337368160260494, "grad_norm": 10.251679073601798, "learning_rate": 4.901720094628322e-06, "loss": 1.1355, "step": 1651 }, { "epoch": 0.23387838890068663, "grad_norm": 9.763988925450915, "learning_rate": 4.901560894002449e-06, "loss": 1.2347, "step": 1652 }, { "epoch": 0.23401996177532386, "grad_norm": 8.368327201208398, "learning_rate": 4.9014015671283124e-06, "loss": 1.2211, "step": 1653 }, { "epoch": 0.23416153464996106, "grad_norm": 8.377161003409853, "learning_rate": 4.901242114014285e-06, "loss": 1.1491, "step": 1654 }, { "epoch": 0.2343031075245983, "grad_norm": 9.885682713767682, "learning_rate": 4.901082534668751e-06, "loss": 1.0743, "step": 1655 }, { "epoch": 0.2344446803992355, "grad_norm": 11.028647831808161, "learning_rate": 4.900922829100097e-06, "loss": 1.2353, "step": 1656 }, { "epoch": 0.23458625327387272, "grad_norm": 8.558580378526752, "learning_rate": 4.900762997316722e-06, "loss": 1.2449, "step": 1657 }, { "epoch": 0.23472782614850995, "grad_norm": 9.887753520276199, "learning_rate": 4.900603039327024e-06, "loss": 1.1997, "step": 1658 }, { "epoch": 0.23486939902314716, "grad_norm": 10.14731202260873, "learning_rate": 4.9004429551394155e-06, "loss": 1.4505, "step": 1659 }, { "epoch": 0.2350109718977844, "grad_norm": 9.908581769142407, "learning_rate": 4.900282744762311e-06, "loss": 1.3118, "step": 1660 }, { "epoch": 0.23515254477242162, "grad_norm": 10.515329342635066, "learning_rate": 4.900122408204132e-06, "loss": 1.1904, "step": 1661 }, { "epoch": 0.23529411764705882, "grad_norm": 11.58586967381529, "learning_rate": 4.899961945473307e-06, "loss": 1.2931, "step": 1662 }, { "epoch": 0.23543569052169605, "grad_norm": 10.666914897255577, "learning_rate": 4.899801356578273e-06, "loss": 1.051, "step": 1663 }, { "epoch": 0.23557726339633325, "grad_norm": 9.073320755717436, "learning_rate": 4.89964064152747e-06, "loss": 1.2321, "step": 1664 }, { "epoch": 0.23571883627097048, "grad_norm": 9.210233420397287, "learning_rate": 4.899479800329348e-06, "loss": 1.2238, "step": 1665 }, { "epoch": 0.2358604091456077, "grad_norm": 10.180855110992182, "learning_rate": 4.899318832992363e-06, "loss": 1.0725, "step": 1666 }, { "epoch": 0.23600198202024492, "grad_norm": 12.515774958137206, "learning_rate": 4.8991577395249755e-06, "loss": 1.1143, "step": 1667 }, { "epoch": 0.23614355489488215, "grad_norm": 8.75708854194391, "learning_rate": 4.898996519935654e-06, "loss": 1.1338, "step": 1668 }, { "epoch": 0.23628512776951935, "grad_norm": 8.976160052037015, "learning_rate": 4.898835174232875e-06, "loss": 1.2271, "step": 1669 }, { "epoch": 0.23642670064415658, "grad_norm": 10.927424209461059, "learning_rate": 4.898673702425118e-06, "loss": 1.2109, "step": 1670 }, { "epoch": 0.2365682735187938, "grad_norm": 9.238695402318251, "learning_rate": 4.898512104520875e-06, "loss": 1.216, "step": 1671 }, { "epoch": 0.236709846393431, "grad_norm": 8.986293103672793, "learning_rate": 4.898350380528638e-06, "loss": 1.1438, "step": 1672 }, { "epoch": 0.23685141926806824, "grad_norm": 8.437742328696032, "learning_rate": 4.8981885304569095e-06, "loss": 1.1872, "step": 1673 }, { "epoch": 0.23699299214270544, "grad_norm": 9.690236744857513, "learning_rate": 4.898026554314199e-06, "loss": 1.1462, "step": 1674 }, { "epoch": 0.23713456501734267, "grad_norm": 8.981055772734559, "learning_rate": 4.89786445210902e-06, "loss": 1.2017, "step": 1675 }, { "epoch": 0.2372761378919799, "grad_norm": 9.949521644633537, "learning_rate": 4.897702223849895e-06, "loss": 1.1851, "step": 1676 }, { "epoch": 0.2374177107666171, "grad_norm": 8.867353557090487, "learning_rate": 4.897539869545351e-06, "loss": 1.2955, "step": 1677 }, { "epoch": 0.23755928364125434, "grad_norm": 8.318797085440327, "learning_rate": 4.897377389203925e-06, "loss": 1.1191, "step": 1678 }, { "epoch": 0.23770085651589157, "grad_norm": 10.052610477731253, "learning_rate": 4.897214782834156e-06, "loss": 1.2563, "step": 1679 }, { "epoch": 0.23784242939052877, "grad_norm": 7.454834556048621, "learning_rate": 4.897052050444595e-06, "loss": 1.2355, "step": 1680 }, { "epoch": 0.237984002265166, "grad_norm": 6.743999781326888, "learning_rate": 4.8968891920437936e-06, "loss": 1.0705, "step": 1681 }, { "epoch": 0.2381255751398032, "grad_norm": 8.112158854266568, "learning_rate": 4.896726207640315e-06, "loss": 1.2263, "step": 1682 }, { "epoch": 0.23826714801444043, "grad_norm": 23.434744629702603, "learning_rate": 4.896563097242727e-06, "loss": 1.3718, "step": 1683 }, { "epoch": 0.23840872088907766, "grad_norm": 9.296608287928436, "learning_rate": 4.896399860859603e-06, "loss": 1.2924, "step": 1684 }, { "epoch": 0.23855029376371487, "grad_norm": 9.247248059811497, "learning_rate": 4.896236498499526e-06, "loss": 1.3394, "step": 1685 }, { "epoch": 0.2386918666383521, "grad_norm": 9.267106605167708, "learning_rate": 4.896073010171083e-06, "loss": 1.1026, "step": 1686 }, { "epoch": 0.2388334395129893, "grad_norm": 10.644795724925427, "learning_rate": 4.895909395882868e-06, "loss": 1.1889, "step": 1687 }, { "epoch": 0.23897501238762653, "grad_norm": 9.553551170419318, "learning_rate": 4.895745655643482e-06, "loss": 1.2634, "step": 1688 }, { "epoch": 0.23911658526226376, "grad_norm": 11.89771226963304, "learning_rate": 4.895581789461534e-06, "loss": 1.0475, "step": 1689 }, { "epoch": 0.23925815813690096, "grad_norm": 7.695858760751115, "learning_rate": 4.895417797345638e-06, "loss": 1.0027, "step": 1690 }, { "epoch": 0.2393997310115382, "grad_norm": 8.139679310173156, "learning_rate": 4.895253679304414e-06, "loss": 1.2493, "step": 1691 }, { "epoch": 0.23954130388617542, "grad_norm": 9.460949596142143, "learning_rate": 4.8950894353464905e-06, "loss": 1.239, "step": 1692 }, { "epoch": 0.23968287676081262, "grad_norm": 10.552572968932669, "learning_rate": 4.8949250654805e-06, "loss": 1.2568, "step": 1693 }, { "epoch": 0.23982444963544985, "grad_norm": 8.614587220295249, "learning_rate": 4.894760569715086e-06, "loss": 1.236, "step": 1694 }, { "epoch": 0.23996602251008706, "grad_norm": 10.005989188536697, "learning_rate": 4.894595948058893e-06, "loss": 1.2692, "step": 1695 }, { "epoch": 0.2401075953847243, "grad_norm": 9.875580975789475, "learning_rate": 4.894431200520578e-06, "loss": 1.3126, "step": 1696 }, { "epoch": 0.24024916825936152, "grad_norm": 11.426551790257186, "learning_rate": 4.894266327108799e-06, "loss": 1.1782, "step": 1697 }, { "epoch": 0.24039074113399872, "grad_norm": 9.23624118595423, "learning_rate": 4.894101327832225e-06, "loss": 1.0975, "step": 1698 }, { "epoch": 0.24053231400863595, "grad_norm": 8.427675305775034, "learning_rate": 4.8939362026995295e-06, "loss": 1.0424, "step": 1699 }, { "epoch": 0.24067388688327315, "grad_norm": 9.933490832144795, "learning_rate": 4.893770951719392e-06, "loss": 1.3858, "step": 1700 }, { "epoch": 0.24081545975791038, "grad_norm": 11.838332575327595, "learning_rate": 4.893605574900501e-06, "loss": 1.3169, "step": 1701 }, { "epoch": 0.2409570326325476, "grad_norm": 14.849976079131618, "learning_rate": 4.893440072251549e-06, "loss": 1.3055, "step": 1702 }, { "epoch": 0.24109860550718482, "grad_norm": 10.817494908986879, "learning_rate": 4.893274443781239e-06, "loss": 1.3813, "step": 1703 }, { "epoch": 0.24124017838182205, "grad_norm": 9.571793970156738, "learning_rate": 4.893108689498274e-06, "loss": 1.2097, "step": 1704 }, { "epoch": 0.24138175125645928, "grad_norm": 11.241429433918688, "learning_rate": 4.89294280941137e-06, "loss": 1.1726, "step": 1705 }, { "epoch": 0.24152332413109648, "grad_norm": 13.64269486461327, "learning_rate": 4.892776803529246e-06, "loss": 1.2227, "step": 1706 }, { "epoch": 0.2416648970057337, "grad_norm": 8.781190919507658, "learning_rate": 4.892610671860631e-06, "loss": 1.0939, "step": 1707 }, { "epoch": 0.2418064698803709, "grad_norm": 8.046100823650885, "learning_rate": 4.892444414414257e-06, "loss": 1.1127, "step": 1708 }, { "epoch": 0.24194804275500814, "grad_norm": 8.530838617599752, "learning_rate": 4.892278031198864e-06, "loss": 1.0907, "step": 1709 }, { "epoch": 0.24208961562964537, "grad_norm": 8.494713878606774, "learning_rate": 4.892111522223198e-06, "loss": 1.0277, "step": 1710 }, { "epoch": 0.24223118850428257, "grad_norm": 9.697040806495146, "learning_rate": 4.891944887496013e-06, "loss": 1.2192, "step": 1711 }, { "epoch": 0.2423727613789198, "grad_norm": 13.745219006339134, "learning_rate": 4.8917781270260686e-06, "loss": 1.3102, "step": 1712 }, { "epoch": 0.242514334253557, "grad_norm": 9.955103607175058, "learning_rate": 4.891611240822132e-06, "loss": 1.2568, "step": 1713 }, { "epoch": 0.24265590712819424, "grad_norm": 9.189595716547805, "learning_rate": 4.891444228892975e-06, "loss": 1.2663, "step": 1714 }, { "epoch": 0.24279748000283147, "grad_norm": 11.32039876606192, "learning_rate": 4.891277091247379e-06, "loss": 1.3237, "step": 1715 }, { "epoch": 0.24293905287746867, "grad_norm": 10.137768265153294, "learning_rate": 4.891109827894129e-06, "loss": 1.298, "step": 1716 }, { "epoch": 0.2430806257521059, "grad_norm": 9.967991528013005, "learning_rate": 4.890942438842018e-06, "loss": 1.3068, "step": 1717 }, { "epoch": 0.2432221986267431, "grad_norm": 7.671148013225942, "learning_rate": 4.890774924099845e-06, "loss": 1.2048, "step": 1718 }, { "epoch": 0.24336377150138033, "grad_norm": 9.254098757363417, "learning_rate": 4.890607283676418e-06, "loss": 1.1082, "step": 1719 }, { "epoch": 0.24350534437601756, "grad_norm": 8.800295772783809, "learning_rate": 4.890439517580548e-06, "loss": 1.2907, "step": 1720 }, { "epoch": 0.24364691725065477, "grad_norm": 8.951442274104274, "learning_rate": 4.890271625821056e-06, "loss": 1.2444, "step": 1721 }, { "epoch": 0.243788490125292, "grad_norm": 10.91481289642497, "learning_rate": 4.890103608406765e-06, "loss": 1.2563, "step": 1722 }, { "epoch": 0.24393006299992923, "grad_norm": 10.254453482421408, "learning_rate": 4.889935465346511e-06, "loss": 1.24, "step": 1723 }, { "epoch": 0.24407163587456643, "grad_norm": 10.847742406594296, "learning_rate": 4.8897671966491315e-06, "loss": 1.2455, "step": 1724 }, { "epoch": 0.24421320874920366, "grad_norm": 7.441503649296962, "learning_rate": 4.889598802323471e-06, "loss": 1.2348, "step": 1725 }, { "epoch": 0.24435478162384086, "grad_norm": 10.541674788601327, "learning_rate": 4.8894302823783845e-06, "loss": 1.174, "step": 1726 }, { "epoch": 0.2444963544984781, "grad_norm": 9.59709899303072, "learning_rate": 4.88926163682273e-06, "loss": 1.1528, "step": 1727 }, { "epoch": 0.24463792737311532, "grad_norm": 10.230782706856884, "learning_rate": 4.889092865665372e-06, "loss": 1.2214, "step": 1728 }, { "epoch": 0.24477950024775252, "grad_norm": 9.558874303070134, "learning_rate": 4.888923968915183e-06, "loss": 1.2472, "step": 1729 }, { "epoch": 0.24492107312238975, "grad_norm": 9.630870663588496, "learning_rate": 4.888754946581044e-06, "loss": 1.043, "step": 1730 }, { "epoch": 0.24506264599702696, "grad_norm": 10.815348872793441, "learning_rate": 4.8885857986718365e-06, "loss": 1.417, "step": 1731 }, { "epoch": 0.2452042188716642, "grad_norm": 8.563698789114643, "learning_rate": 4.888416525196455e-06, "loss": 1.3778, "step": 1732 }, { "epoch": 0.24534579174630142, "grad_norm": 13.78542799126468, "learning_rate": 4.8882471261637985e-06, "loss": 1.1437, "step": 1733 }, { "epoch": 0.24548736462093862, "grad_norm": 9.482934179325198, "learning_rate": 4.888077601582772e-06, "loss": 1.1318, "step": 1734 }, { "epoch": 0.24562893749557585, "grad_norm": 8.65663338924216, "learning_rate": 4.887907951462284e-06, "loss": 1.2896, "step": 1735 }, { "epoch": 0.24577051037021308, "grad_norm": 11.594037569083529, "learning_rate": 4.8877381758112576e-06, "loss": 1.3348, "step": 1736 }, { "epoch": 0.24591208324485028, "grad_norm": 10.385114404583565, "learning_rate": 4.887568274638616e-06, "loss": 1.2433, "step": 1737 }, { "epoch": 0.2460536561194875, "grad_norm": 10.126679834487454, "learning_rate": 4.887398247953289e-06, "loss": 1.2399, "step": 1738 }, { "epoch": 0.24619522899412472, "grad_norm": 10.032142480620221, "learning_rate": 4.887228095764216e-06, "loss": 1.3307, "step": 1739 }, { "epoch": 0.24633680186876195, "grad_norm": 11.622697499632723, "learning_rate": 4.887057818080343e-06, "loss": 1.2797, "step": 1740 }, { "epoch": 0.24647837474339918, "grad_norm": 8.829567256359187, "learning_rate": 4.886887414910621e-06, "loss": 1.3563, "step": 1741 }, { "epoch": 0.24661994761803638, "grad_norm": 9.88092867421093, "learning_rate": 4.8867168862640056e-06, "loss": 1.3475, "step": 1742 }, { "epoch": 0.2467615204926736, "grad_norm": 8.471993926538698, "learning_rate": 4.886546232149464e-06, "loss": 1.1857, "step": 1743 }, { "epoch": 0.2469030933673108, "grad_norm": 9.090736335500113, "learning_rate": 4.886375452575967e-06, "loss": 1.0805, "step": 1744 }, { "epoch": 0.24704466624194804, "grad_norm": 10.085366750771069, "learning_rate": 4.886204547552491e-06, "loss": 1.2696, "step": 1745 }, { "epoch": 0.24718623911658527, "grad_norm": 10.84223755944363, "learning_rate": 4.886033517088021e-06, "loss": 1.2417, "step": 1746 }, { "epoch": 0.24732781199122247, "grad_norm": 10.474372105682553, "learning_rate": 4.885862361191549e-06, "loss": 1.0827, "step": 1747 }, { "epoch": 0.2474693848658597, "grad_norm": 8.90701023002629, "learning_rate": 4.885691079872071e-06, "loss": 1.2494, "step": 1748 }, { "epoch": 0.24761095774049693, "grad_norm": 8.780591403794544, "learning_rate": 4.885519673138592e-06, "loss": 1.3383, "step": 1749 }, { "epoch": 0.24775253061513414, "grad_norm": 8.608151994575767, "learning_rate": 4.8853481410001225e-06, "loss": 1.1831, "step": 1750 }, { "epoch": 0.24789410348977137, "grad_norm": 9.203120854996945, "learning_rate": 4.88517648346568e-06, "loss": 1.293, "step": 1751 }, { "epoch": 0.24803567636440857, "grad_norm": 11.498285580427305, "learning_rate": 4.885004700544288e-06, "loss": 1.2573, "step": 1752 }, { "epoch": 0.2481772492390458, "grad_norm": 9.993948058837127, "learning_rate": 4.884832792244977e-06, "loss": 1.2009, "step": 1753 }, { "epoch": 0.24831882211368303, "grad_norm": 8.411118242850641, "learning_rate": 4.884660758576785e-06, "loss": 1.2503, "step": 1754 }, { "epoch": 0.24846039498832023, "grad_norm": 7.33039921525624, "learning_rate": 4.884488599548755e-06, "loss": 1.1655, "step": 1755 }, { "epoch": 0.24860196786295746, "grad_norm": 9.733501070571695, "learning_rate": 4.884316315169936e-06, "loss": 1.2042, "step": 1756 }, { "epoch": 0.24874354073759466, "grad_norm": 12.10613048772159, "learning_rate": 4.8841439054493864e-06, "loss": 1.1043, "step": 1757 }, { "epoch": 0.2488851136122319, "grad_norm": 7.7068538036307075, "learning_rate": 4.88397137039617e-06, "loss": 1.1107, "step": 1758 }, { "epoch": 0.24902668648686913, "grad_norm": 10.201274148982337, "learning_rate": 4.883798710019356e-06, "loss": 1.1562, "step": 1759 }, { "epoch": 0.24916825936150633, "grad_norm": 11.802822771109076, "learning_rate": 4.883625924328022e-06, "loss": 1.2295, "step": 1760 }, { "epoch": 0.24930983223614356, "grad_norm": 10.208384071275908, "learning_rate": 4.88345301333125e-06, "loss": 1.0976, "step": 1761 }, { "epoch": 0.24945140511078076, "grad_norm": 10.830782037257245, "learning_rate": 4.88327997703813e-06, "loss": 1.2701, "step": 1762 }, { "epoch": 0.249592977985418, "grad_norm": 7.491903385189095, "learning_rate": 4.883106815457758e-06, "loss": 1.2593, "step": 1763 }, { "epoch": 0.24973455086005522, "grad_norm": 8.106558661661614, "learning_rate": 4.882933528599239e-06, "loss": 1.3122, "step": 1764 }, { "epoch": 0.24987612373469242, "grad_norm": 9.866943549661517, "learning_rate": 4.882760116471681e-06, "loss": 1.2182, "step": 1765 }, { "epoch": 0.25001769660932965, "grad_norm": 8.94536239410212, "learning_rate": 4.8825865790841995e-06, "loss": 1.0812, "step": 1766 }, { "epoch": 0.2501592694839669, "grad_norm": 10.463678298648027, "learning_rate": 4.882412916445919e-06, "loss": 1.1548, "step": 1767 }, { "epoch": 0.2503008423586041, "grad_norm": 8.859423225177387, "learning_rate": 4.882239128565968e-06, "loss": 1.0671, "step": 1768 }, { "epoch": 0.2504424152332413, "grad_norm": 9.786862064389322, "learning_rate": 4.882065215453481e-06, "loss": 1.2775, "step": 1769 }, { "epoch": 0.2505839881078785, "grad_norm": 11.736595786856702, "learning_rate": 4.881891177117602e-06, "loss": 1.2745, "step": 1770 }, { "epoch": 0.25072556098251575, "grad_norm": 12.63818637632839, "learning_rate": 4.881717013567481e-06, "loss": 1.2149, "step": 1771 }, { "epoch": 0.250867133857153, "grad_norm": 10.459856953271883, "learning_rate": 4.881542724812272e-06, "loss": 1.2204, "step": 1772 }, { "epoch": 0.2510087067317902, "grad_norm": 15.638767372202803, "learning_rate": 4.881368310861137e-06, "loss": 1.3673, "step": 1773 }, { "epoch": 0.2511502796064274, "grad_norm": 11.305103898041265, "learning_rate": 4.881193771723246e-06, "loss": 1.2403, "step": 1774 }, { "epoch": 0.2512918524810646, "grad_norm": 8.319516540565925, "learning_rate": 4.881019107407774e-06, "loss": 1.2069, "step": 1775 }, { "epoch": 0.25143342535570185, "grad_norm": 9.010079990925224, "learning_rate": 4.8808443179239025e-06, "loss": 1.2722, "step": 1776 }, { "epoch": 0.2515749982303391, "grad_norm": 8.048758691841382, "learning_rate": 4.880669403280821e-06, "loss": 1.2874, "step": 1777 }, { "epoch": 0.2517165711049763, "grad_norm": 9.757112696333138, "learning_rate": 4.880494363487723e-06, "loss": 1.0987, "step": 1778 }, { "epoch": 0.2518581439796135, "grad_norm": 10.030421332716607, "learning_rate": 4.880319198553813e-06, "loss": 1.3214, "step": 1779 }, { "epoch": 0.2519997168542507, "grad_norm": 9.568631867739112, "learning_rate": 4.880143908488296e-06, "loss": 1.1204, "step": 1780 }, { "epoch": 0.25214128972888794, "grad_norm": 9.811645470603812, "learning_rate": 4.87996849330039e-06, "loss": 1.1877, "step": 1781 }, { "epoch": 0.25228286260352517, "grad_norm": 10.161788979369256, "learning_rate": 4.8797929529993135e-06, "loss": 1.3785, "step": 1782 }, { "epoch": 0.2524244354781624, "grad_norm": 9.50006665658153, "learning_rate": 4.8796172875942965e-06, "loss": 1.1034, "step": 1783 }, { "epoch": 0.2525660083527996, "grad_norm": 10.594190380204225, "learning_rate": 4.879441497094572e-06, "loss": 1.2215, "step": 1784 }, { "epoch": 0.2527075812274368, "grad_norm": 12.65921068765249, "learning_rate": 4.879265581509384e-06, "loss": 1.2777, "step": 1785 }, { "epoch": 0.25284915410207404, "grad_norm": 9.277327302617, "learning_rate": 4.8790895408479776e-06, "loss": 1.2089, "step": 1786 }, { "epoch": 0.25299072697671127, "grad_norm": 9.705470982501957, "learning_rate": 4.878913375119608e-06, "loss": 1.2562, "step": 1787 }, { "epoch": 0.2531322998513485, "grad_norm": 8.863486975631401, "learning_rate": 4.878737084333536e-06, "loss": 1.2106, "step": 1788 }, { "epoch": 0.2532738727259857, "grad_norm": 9.009522380807622, "learning_rate": 4.878560668499029e-06, "loss": 1.195, "step": 1789 }, { "epoch": 0.2534154456006229, "grad_norm": 10.677609870147087, "learning_rate": 4.8783841276253605e-06, "loss": 1.2073, "step": 1790 }, { "epoch": 0.25355701847526013, "grad_norm": 11.582270365364739, "learning_rate": 4.8782074617218135e-06, "loss": 1.1021, "step": 1791 }, { "epoch": 0.25369859134989736, "grad_norm": 9.673634309018968, "learning_rate": 4.878030670797672e-06, "loss": 1.2397, "step": 1792 }, { "epoch": 0.2538401642245346, "grad_norm": 8.123696560112352, "learning_rate": 4.877853754862232e-06, "loss": 1.2178, "step": 1793 }, { "epoch": 0.2539817370991718, "grad_norm": 10.430149436750618, "learning_rate": 4.8776767139247936e-06, "loss": 1.3798, "step": 1794 }, { "epoch": 0.254123309973809, "grad_norm": 10.192268987175211, "learning_rate": 4.877499547994662e-06, "loss": 1.3648, "step": 1795 }, { "epoch": 0.2542648828484462, "grad_norm": 9.6926496047192, "learning_rate": 4.877322257081153e-06, "loss": 1.2305, "step": 1796 }, { "epoch": 0.25440645572308346, "grad_norm": 8.615625123974201, "learning_rate": 4.877144841193585e-06, "loss": 1.1569, "step": 1797 }, { "epoch": 0.2545480285977207, "grad_norm": 10.155984446648509, "learning_rate": 4.876967300341285e-06, "loss": 1.3582, "step": 1798 }, { "epoch": 0.2546896014723579, "grad_norm": 10.08366338856987, "learning_rate": 4.876789634533587e-06, "loss": 1.2787, "step": 1799 }, { "epoch": 0.2548311743469951, "grad_norm": 8.614231629824312, "learning_rate": 4.876611843779829e-06, "loss": 1.3915, "step": 1800 }, { "epoch": 0.2549727472216323, "grad_norm": 8.682392183090792, "learning_rate": 4.876433928089359e-06, "loss": 1.2334, "step": 1801 }, { "epoch": 0.25511432009626955, "grad_norm": 9.092003094130131, "learning_rate": 4.87625588747153e-06, "loss": 1.2244, "step": 1802 }, { "epoch": 0.2552558929709068, "grad_norm": 8.46884279094691, "learning_rate": 4.8760777219357e-06, "loss": 1.086, "step": 1803 }, { "epoch": 0.255397465845544, "grad_norm": 9.639625039940325, "learning_rate": 4.875899431491236e-06, "loss": 1.4323, "step": 1804 }, { "epoch": 0.2555390387201812, "grad_norm": 9.457650808391932, "learning_rate": 4.875721016147511e-06, "loss": 1.2802, "step": 1805 }, { "epoch": 0.2556806115948184, "grad_norm": 9.310606488756603, "learning_rate": 4.875542475913902e-06, "loss": 1.1629, "step": 1806 }, { "epoch": 0.25582218446945565, "grad_norm": 10.277965821158311, "learning_rate": 4.875363810799798e-06, "loss": 1.1947, "step": 1807 }, { "epoch": 0.2559637573440929, "grad_norm": 10.743253498244332, "learning_rate": 4.87518502081459e-06, "loss": 1.2023, "step": 1808 }, { "epoch": 0.2561053302187301, "grad_norm": 7.889038651646217, "learning_rate": 4.875006105967675e-06, "loss": 1.1738, "step": 1809 }, { "epoch": 0.2562469030933673, "grad_norm": 8.843003743636036, "learning_rate": 4.87482706626846e-06, "loss": 1.2809, "step": 1810 }, { "epoch": 0.2563884759680045, "grad_norm": 9.775186334965731, "learning_rate": 4.874647901726358e-06, "loss": 1.1192, "step": 1811 }, { "epoch": 0.25653004884264174, "grad_norm": 12.168404511659489, "learning_rate": 4.874468612350786e-06, "loss": 1.24, "step": 1812 }, { "epoch": 0.256671621717279, "grad_norm": 11.379313898036166, "learning_rate": 4.874289198151168e-06, "loss": 1.1437, "step": 1813 }, { "epoch": 0.2568131945919162, "grad_norm": 10.184844437367657, "learning_rate": 4.87410965913694e-06, "loss": 1.2661, "step": 1814 }, { "epoch": 0.25695476746655344, "grad_norm": 13.413508515424269, "learning_rate": 4.873929995317535e-06, "loss": 1.2176, "step": 1815 }, { "epoch": 0.2570963403411906, "grad_norm": 9.888710258480216, "learning_rate": 4.873750206702401e-06, "loss": 1.2416, "step": 1816 }, { "epoch": 0.25723791321582784, "grad_norm": 9.454611517935474, "learning_rate": 4.873570293300989e-06, "loss": 1.32, "step": 1817 }, { "epoch": 0.25737948609046507, "grad_norm": 9.223863534783067, "learning_rate": 4.873390255122756e-06, "loss": 1.1076, "step": 1818 }, { "epoch": 0.2575210589651023, "grad_norm": 10.901340899102161, "learning_rate": 4.873210092177167e-06, "loss": 1.2304, "step": 1819 }, { "epoch": 0.25766263183973953, "grad_norm": 10.797861410258804, "learning_rate": 4.873029804473694e-06, "loss": 1.2229, "step": 1820 }, { "epoch": 0.2578042047143767, "grad_norm": 9.004815402852694, "learning_rate": 4.8728493920218126e-06, "loss": 1.2165, "step": 1821 }, { "epoch": 0.25794577758901394, "grad_norm": 9.246407300872745, "learning_rate": 4.872668854831008e-06, "loss": 1.1099, "step": 1822 }, { "epoch": 0.25808735046365117, "grad_norm": 8.302761252823467, "learning_rate": 4.87248819291077e-06, "loss": 1.1432, "step": 1823 }, { "epoch": 0.2582289233382884, "grad_norm": 11.913779453604999, "learning_rate": 4.872307406270598e-06, "loss": 1.1371, "step": 1824 }, { "epoch": 0.2583704962129256, "grad_norm": 11.59291159999223, "learning_rate": 4.872126494919994e-06, "loss": 1.3077, "step": 1825 }, { "epoch": 0.2585120690875628, "grad_norm": 9.818873965466052, "learning_rate": 4.871945458868469e-06, "loss": 1.2192, "step": 1826 }, { "epoch": 0.25865364196220003, "grad_norm": 8.350623796198972, "learning_rate": 4.87176429812554e-06, "loss": 1.2056, "step": 1827 }, { "epoch": 0.25879521483683726, "grad_norm": 10.319581715586537, "learning_rate": 4.87158301270073e-06, "loss": 1.17, "step": 1828 }, { "epoch": 0.2589367877114745, "grad_norm": 9.26096127556738, "learning_rate": 4.87140160260357e-06, "loss": 1.3377, "step": 1829 }, { "epoch": 0.2590783605861117, "grad_norm": 9.052947383214054, "learning_rate": 4.871220067843595e-06, "loss": 1.2077, "step": 1830 }, { "epoch": 0.2592199334607489, "grad_norm": 11.87657396024641, "learning_rate": 4.8710384084303495e-06, "loss": 1.2877, "step": 1831 }, { "epoch": 0.2593615063353861, "grad_norm": 10.111394712874716, "learning_rate": 4.870856624373383e-06, "loss": 1.2358, "step": 1832 }, { "epoch": 0.25950307921002336, "grad_norm": 9.797462165917159, "learning_rate": 4.870674715682252e-06, "loss": 1.3889, "step": 1833 }, { "epoch": 0.2596446520846606, "grad_norm": 11.368148584800155, "learning_rate": 4.870492682366518e-06, "loss": 1.2436, "step": 1834 }, { "epoch": 0.2597862249592978, "grad_norm": 9.910197822143786, "learning_rate": 4.8703105244357504e-06, "loss": 1.113, "step": 1835 }, { "epoch": 0.259927797833935, "grad_norm": 8.885752866465861, "learning_rate": 4.870128241899527e-06, "loss": 1.2666, "step": 1836 }, { "epoch": 0.2600693707085722, "grad_norm": 12.343876087172488, "learning_rate": 4.86994583476743e-06, "loss": 1.4567, "step": 1837 }, { "epoch": 0.26021094358320945, "grad_norm": 11.555181383123115, "learning_rate": 4.8697633030490465e-06, "loss": 1.0876, "step": 1838 }, { "epoch": 0.2603525164578467, "grad_norm": 11.952417769110305, "learning_rate": 4.869580646753973e-06, "loss": 1.2656, "step": 1839 }, { "epoch": 0.2604940893324839, "grad_norm": 9.936797495068912, "learning_rate": 4.869397865891812e-06, "loss": 1.4074, "step": 1840 }, { "epoch": 0.2606356622071211, "grad_norm": 10.079669213089907, "learning_rate": 4.869214960472172e-06, "loss": 1.2334, "step": 1841 }, { "epoch": 0.2607772350817583, "grad_norm": 8.910153253095725, "learning_rate": 4.869031930504668e-06, "loss": 1.3296, "step": 1842 }, { "epoch": 0.26091880795639555, "grad_norm": 9.279096051618188, "learning_rate": 4.8688487759989215e-06, "loss": 1.2071, "step": 1843 }, { "epoch": 0.2610603808310328, "grad_norm": 10.866355529574632, "learning_rate": 4.868665496964562e-06, "loss": 1.2579, "step": 1844 }, { "epoch": 0.26120195370567, "grad_norm": 9.745258841367628, "learning_rate": 4.868482093411223e-06, "loss": 1.1464, "step": 1845 }, { "epoch": 0.26134352658030724, "grad_norm": 9.014016151672749, "learning_rate": 4.868298565348546e-06, "loss": 1.2231, "step": 1846 }, { "epoch": 0.2614850994549444, "grad_norm": 9.393963161747939, "learning_rate": 4.8681149127861795e-06, "loss": 1.3006, "step": 1847 }, { "epoch": 0.26162667232958164, "grad_norm": 7.336320730971062, "learning_rate": 4.8679311357337774e-06, "loss": 1.1133, "step": 1848 }, { "epoch": 0.2617682452042189, "grad_norm": 12.86226105315135, "learning_rate": 4.867747234201003e-06, "loss": 1.2157, "step": 1849 }, { "epoch": 0.2619098180788561, "grad_norm": 9.034694344818583, "learning_rate": 4.86756320819752e-06, "loss": 1.2538, "step": 1850 }, { "epoch": 0.26205139095349334, "grad_norm": 8.971182994935132, "learning_rate": 4.867379057733005e-06, "loss": 1.1907, "step": 1851 }, { "epoch": 0.2621929638281305, "grad_norm": 10.140750267066235, "learning_rate": 4.867194782817138e-06, "loss": 1.2287, "step": 1852 }, { "epoch": 0.26233453670276774, "grad_norm": 8.411089216859184, "learning_rate": 4.867010383459606e-06, "loss": 1.1976, "step": 1853 }, { "epoch": 0.26247610957740497, "grad_norm": 8.858300199353993, "learning_rate": 4.8668258596701035e-06, "loss": 1.2041, "step": 1854 }, { "epoch": 0.2626176824520422, "grad_norm": 8.71581675638933, "learning_rate": 4.86664121145833e-06, "loss": 1.2247, "step": 1855 }, { "epoch": 0.26275925532667943, "grad_norm": 11.489086777905126, "learning_rate": 4.866456438833993e-06, "loss": 1.1742, "step": 1856 }, { "epoch": 0.2629008282013166, "grad_norm": 9.23254727576408, "learning_rate": 4.866271541806806e-06, "loss": 1.2725, "step": 1857 }, { "epoch": 0.26304240107595384, "grad_norm": 9.761057134816912, "learning_rate": 4.8660865203864885e-06, "loss": 1.2693, "step": 1858 }, { "epoch": 0.26318397395059107, "grad_norm": 9.816372829976935, "learning_rate": 4.865901374582766e-06, "loss": 1.0824, "step": 1859 }, { "epoch": 0.2633255468252283, "grad_norm": 9.315533240018976, "learning_rate": 4.865716104405373e-06, "loss": 1.2235, "step": 1860 }, { "epoch": 0.2634671196998655, "grad_norm": 19.862766171419043, "learning_rate": 4.865530709864048e-06, "loss": 1.2208, "step": 1861 }, { "epoch": 0.2636086925745027, "grad_norm": 8.3127707530506, "learning_rate": 4.865345190968537e-06, "loss": 1.1253, "step": 1862 }, { "epoch": 0.26375026544913993, "grad_norm": 9.138434149170768, "learning_rate": 4.865159547728593e-06, "loss": 1.0343, "step": 1863 }, { "epoch": 0.26389183832377716, "grad_norm": 8.643106225037684, "learning_rate": 4.8649737801539755e-06, "loss": 1.1469, "step": 1864 }, { "epoch": 0.2640334111984144, "grad_norm": 9.661063269312148, "learning_rate": 4.86478788825445e-06, "loss": 1.2546, "step": 1865 }, { "epoch": 0.2641749840730516, "grad_norm": 10.676936053989298, "learning_rate": 4.864601872039788e-06, "loss": 1.3395, "step": 1866 }, { "epoch": 0.2643165569476888, "grad_norm": 10.712629077538795, "learning_rate": 4.864415731519769e-06, "loss": 1.3361, "step": 1867 }, { "epoch": 0.264458129822326, "grad_norm": 10.457814075806695, "learning_rate": 4.864229466704178e-06, "loss": 1.2663, "step": 1868 }, { "epoch": 0.26459970269696326, "grad_norm": 9.28196883067031, "learning_rate": 4.864043077602807e-06, "loss": 1.3316, "step": 1869 }, { "epoch": 0.2647412755716005, "grad_norm": 9.126283738225844, "learning_rate": 4.863856564225453e-06, "loss": 1.1979, "step": 1870 }, { "epoch": 0.2648828484462377, "grad_norm": 11.24123736429708, "learning_rate": 4.863669926581924e-06, "loss": 1.2552, "step": 1871 }, { "epoch": 0.26502442132087495, "grad_norm": 10.464773031909244, "learning_rate": 4.863483164682027e-06, "loss": 1.2079, "step": 1872 }, { "epoch": 0.2651659941955121, "grad_norm": 11.205631161999289, "learning_rate": 4.863296278535584e-06, "loss": 1.2424, "step": 1873 }, { "epoch": 0.26530756707014935, "grad_norm": 8.482095598361651, "learning_rate": 4.863109268152417e-06, "loss": 1.0478, "step": 1874 }, { "epoch": 0.2654491399447866, "grad_norm": 10.983533846038364, "learning_rate": 4.862922133542358e-06, "loss": 1.3693, "step": 1875 }, { "epoch": 0.2655907128194238, "grad_norm": 11.055224078739984, "learning_rate": 4.862734874715245e-06, "loss": 1.2836, "step": 1876 }, { "epoch": 0.26573228569406104, "grad_norm": 7.723172916770914, "learning_rate": 4.8625474916809205e-06, "loss": 1.2519, "step": 1877 }, { "epoch": 0.2658738585686982, "grad_norm": 8.990195974350424, "learning_rate": 4.862359984449236e-06, "loss": 1.3099, "step": 1878 }, { "epoch": 0.26601543144333545, "grad_norm": 8.41856237031077, "learning_rate": 4.862172353030049e-06, "loss": 1.1701, "step": 1879 }, { "epoch": 0.2661570043179727, "grad_norm": 9.906464487154532, "learning_rate": 4.861984597433223e-06, "loss": 1.2291, "step": 1880 }, { "epoch": 0.2662985771926099, "grad_norm": 11.584148204508756, "learning_rate": 4.861796717668626e-06, "loss": 1.2647, "step": 1881 }, { "epoch": 0.26644015006724714, "grad_norm": 10.958555347432178, "learning_rate": 4.8616087137461385e-06, "loss": 1.3679, "step": 1882 }, { "epoch": 0.2665817229418843, "grad_norm": 7.333378126990178, "learning_rate": 4.861420585675641e-06, "loss": 1.2374, "step": 1883 }, { "epoch": 0.26672329581652154, "grad_norm": 9.315602854510745, "learning_rate": 4.861232333467024e-06, "loss": 1.1631, "step": 1884 }, { "epoch": 0.2668648686911588, "grad_norm": 11.555625067607767, "learning_rate": 4.8610439571301845e-06, "loss": 1.1704, "step": 1885 }, { "epoch": 0.267006441565796, "grad_norm": 10.53153543057187, "learning_rate": 4.860855456675024e-06, "loss": 1.27, "step": 1886 }, { "epoch": 0.26714801444043323, "grad_norm": 9.3599521159716, "learning_rate": 4.860666832111453e-06, "loss": 1.2962, "step": 1887 }, { "epoch": 0.2672895873150704, "grad_norm": 7.805939872695829, "learning_rate": 4.860478083449387e-06, "loss": 1.2905, "step": 1888 }, { "epoch": 0.26743116018970764, "grad_norm": 8.043125738359834, "learning_rate": 4.8602892106987474e-06, "loss": 1.1392, "step": 1889 }, { "epoch": 0.26757273306434487, "grad_norm": 11.505482569209804, "learning_rate": 4.860100213869464e-06, "loss": 1.1548, "step": 1890 }, { "epoch": 0.2677143059389821, "grad_norm": 7.097062870765734, "learning_rate": 4.859911092971473e-06, "loss": 1.0911, "step": 1891 }, { "epoch": 0.26785587881361933, "grad_norm": 9.567462506127335, "learning_rate": 4.8597218480147145e-06, "loss": 1.1557, "step": 1892 }, { "epoch": 0.2679974516882565, "grad_norm": 9.904639465148154, "learning_rate": 4.859532479009138e-06, "loss": 1.0443, "step": 1893 }, { "epoch": 0.26813902456289374, "grad_norm": 8.778521456648821, "learning_rate": 4.859342985964699e-06, "loss": 1.1627, "step": 1894 }, { "epoch": 0.26828059743753097, "grad_norm": 9.206371207646816, "learning_rate": 4.8591533688913584e-06, "loss": 1.0772, "step": 1895 }, { "epoch": 0.2684221703121682, "grad_norm": 10.995584815598109, "learning_rate": 4.858963627799084e-06, "loss": 1.433, "step": 1896 }, { "epoch": 0.2685637431868054, "grad_norm": 9.433939078824325, "learning_rate": 4.85877376269785e-06, "loss": 1.147, "step": 1897 }, { "epoch": 0.2687053160614426, "grad_norm": 8.471879106532572, "learning_rate": 4.858583773597639e-06, "loss": 1.2028, "step": 1898 }, { "epoch": 0.26884688893607983, "grad_norm": 11.435504009580207, "learning_rate": 4.858393660508437e-06, "loss": 1.1306, "step": 1899 }, { "epoch": 0.26898846181071706, "grad_norm": 10.654226519302533, "learning_rate": 4.85820342344024e-06, "loss": 1.1349, "step": 1900 }, { "epoch": 0.2691300346853543, "grad_norm": 8.98453974033746, "learning_rate": 4.8580130624030454e-06, "loss": 1.131, "step": 1901 }, { "epoch": 0.2692716075599915, "grad_norm": 8.663211818585062, "learning_rate": 4.857822577406864e-06, "loss": 1.3537, "step": 1902 }, { "epoch": 0.26941318043462875, "grad_norm": 10.863317770075255, "learning_rate": 4.8576319684617064e-06, "loss": 1.2978, "step": 1903 }, { "epoch": 0.2695547533092659, "grad_norm": 11.400592561011358, "learning_rate": 4.857441235577596e-06, "loss": 1.0447, "step": 1904 }, { "epoch": 0.26969632618390316, "grad_norm": 10.845806355588767, "learning_rate": 4.857250378764556e-06, "loss": 1.1425, "step": 1905 }, { "epoch": 0.2698378990585404, "grad_norm": 8.527956595877578, "learning_rate": 4.857059398032622e-06, "loss": 1.2136, "step": 1906 }, { "epoch": 0.2699794719331776, "grad_norm": 8.85984389260158, "learning_rate": 4.8568682933918325e-06, "loss": 1.291, "step": 1907 }, { "epoch": 0.27012104480781485, "grad_norm": 10.839116670435487, "learning_rate": 4.856677064852234e-06, "loss": 1.0655, "step": 1908 }, { "epoch": 0.270262617682452, "grad_norm": 11.588278906197093, "learning_rate": 4.85648571242388e-06, "loss": 1.0913, "step": 1909 }, { "epoch": 0.27040419055708925, "grad_norm": 11.030256793991981, "learning_rate": 4.856294236116829e-06, "loss": 1.3241, "step": 1910 }, { "epoch": 0.2705457634317265, "grad_norm": 8.682572318889944, "learning_rate": 4.856102635941147e-06, "loss": 1.1264, "step": 1911 }, { "epoch": 0.2706873363063637, "grad_norm": 9.797770920083885, "learning_rate": 4.855910911906906e-06, "loss": 1.2853, "step": 1912 }, { "epoch": 0.27082890918100094, "grad_norm": 11.412399348371872, "learning_rate": 4.855719064024185e-06, "loss": 1.2521, "step": 1913 }, { "epoch": 0.2709704820556381, "grad_norm": 11.80768983784045, "learning_rate": 4.855527092303069e-06, "loss": 1.2022, "step": 1914 }, { "epoch": 0.27111205493027535, "grad_norm": 10.212730571333813, "learning_rate": 4.855334996753651e-06, "loss": 1.1315, "step": 1915 }, { "epoch": 0.2712536278049126, "grad_norm": 8.698457024211294, "learning_rate": 4.8551427773860284e-06, "loss": 1.158, "step": 1916 }, { "epoch": 0.2713952006795498, "grad_norm": 9.510166149593344, "learning_rate": 4.854950434210305e-06, "loss": 1.0671, "step": 1917 }, { "epoch": 0.27153677355418704, "grad_norm": 8.69804039294174, "learning_rate": 4.854757967236594e-06, "loss": 1.0715, "step": 1918 }, { "epoch": 0.2716783464288242, "grad_norm": 9.26468256102404, "learning_rate": 4.8545653764750125e-06, "loss": 1.1997, "step": 1919 }, { "epoch": 0.27181991930346144, "grad_norm": 9.662118653030213, "learning_rate": 4.8543726619356846e-06, "loss": 1.2487, "step": 1920 }, { "epoch": 0.2719614921780987, "grad_norm": 9.618332968948543, "learning_rate": 4.854179823628741e-06, "loss": 1.2369, "step": 1921 }, { "epoch": 0.2721030650527359, "grad_norm": 9.923051803365997, "learning_rate": 4.85398686156432e-06, "loss": 1.1437, "step": 1922 }, { "epoch": 0.27224463792737313, "grad_norm": 10.853671501321282, "learning_rate": 4.853793775752564e-06, "loss": 1.1423, "step": 1923 }, { "epoch": 0.2723862108020103, "grad_norm": 8.674019653157316, "learning_rate": 4.853600566203625e-06, "loss": 1.1853, "step": 1924 }, { "epoch": 0.27252778367664754, "grad_norm": 8.452837983228067, "learning_rate": 4.8534072329276594e-06, "loss": 1.0377, "step": 1925 }, { "epoch": 0.27266935655128477, "grad_norm": 8.987105987782972, "learning_rate": 4.85321377593483e-06, "loss": 1.2436, "step": 1926 }, { "epoch": 0.272810929425922, "grad_norm": 11.75612334731252, "learning_rate": 4.853020195235307e-06, "loss": 1.2822, "step": 1927 }, { "epoch": 0.27295250230055923, "grad_norm": 9.224762174854005, "learning_rate": 4.852826490839266e-06, "loss": 1.0697, "step": 1928 }, { "epoch": 0.2730940751751964, "grad_norm": 8.776748752502305, "learning_rate": 4.852632662756892e-06, "loss": 1.2302, "step": 1929 }, { "epoch": 0.27323564804983363, "grad_norm": 8.897325829786539, "learning_rate": 4.852438710998373e-06, "loss": 1.0787, "step": 1930 }, { "epoch": 0.27337722092447087, "grad_norm": 8.354972464369412, "learning_rate": 4.852244635573905e-06, "loss": 1.427, "step": 1931 }, { "epoch": 0.2735187937991081, "grad_norm": 9.499213738278964, "learning_rate": 4.85205043649369e-06, "loss": 1.0853, "step": 1932 }, { "epoch": 0.2736603666737453, "grad_norm": 11.46445844220506, "learning_rate": 4.851856113767937e-06, "loss": 1.1689, "step": 1933 }, { "epoch": 0.27380193954838256, "grad_norm": 9.101502564850943, "learning_rate": 4.851661667406862e-06, "loss": 1.2538, "step": 1934 }, { "epoch": 0.27394351242301973, "grad_norm": 8.865683380139119, "learning_rate": 4.851467097420687e-06, "loss": 1.3585, "step": 1935 }, { "epoch": 0.27408508529765696, "grad_norm": 7.401521485085985, "learning_rate": 4.8512724038196395e-06, "loss": 1.1831, "step": 1936 }, { "epoch": 0.2742266581722942, "grad_norm": 13.07347062394961, "learning_rate": 4.8510775866139556e-06, "loss": 1.2133, "step": 1937 }, { "epoch": 0.2743682310469314, "grad_norm": 10.11244761072647, "learning_rate": 4.850882645813875e-06, "loss": 1.2784, "step": 1938 }, { "epoch": 0.27450980392156865, "grad_norm": 8.69373432654388, "learning_rate": 4.850687581429647e-06, "loss": 1.2419, "step": 1939 }, { "epoch": 0.2746513767962058, "grad_norm": 9.359019175597368, "learning_rate": 4.8504923934715265e-06, "loss": 1.2591, "step": 1940 }, { "epoch": 0.27479294967084306, "grad_norm": 9.893902035362569, "learning_rate": 4.850297081949773e-06, "loss": 1.315, "step": 1941 }, { "epoch": 0.2749345225454803, "grad_norm": 10.656607255512524, "learning_rate": 4.850101646874654e-06, "loss": 1.2112, "step": 1942 }, { "epoch": 0.2750760954201175, "grad_norm": 10.111636160980614, "learning_rate": 4.8499060882564435e-06, "loss": 1.2273, "step": 1943 }, { "epoch": 0.27521766829475475, "grad_norm": 10.772446020366903, "learning_rate": 4.849710406105422e-06, "loss": 1.3165, "step": 1944 }, { "epoch": 0.2753592411693919, "grad_norm": 9.41039947537472, "learning_rate": 4.849514600431877e-06, "loss": 1.2174, "step": 1945 }, { "epoch": 0.27550081404402915, "grad_norm": 8.09067362052067, "learning_rate": 4.849318671246101e-06, "loss": 1.1446, "step": 1946 }, { "epoch": 0.2756423869186664, "grad_norm": 10.750769299326619, "learning_rate": 4.849122618558395e-06, "loss": 1.3447, "step": 1947 }, { "epoch": 0.2757839597933036, "grad_norm": 11.52257875507875, "learning_rate": 4.848926442379064e-06, "loss": 1.2858, "step": 1948 }, { "epoch": 0.27592553266794084, "grad_norm": 9.535471497246725, "learning_rate": 4.8487301427184204e-06, "loss": 1.1338, "step": 1949 }, { "epoch": 0.276067105542578, "grad_norm": 9.81444010415803, "learning_rate": 4.848533719586787e-06, "loss": 1.1204, "step": 1950 }, { "epoch": 0.27620867841721525, "grad_norm": 10.065136297585088, "learning_rate": 4.848337172994485e-06, "loss": 1.2154, "step": 1951 }, { "epoch": 0.2763502512918525, "grad_norm": 8.499480736523106, "learning_rate": 4.848140502951849e-06, "loss": 1.0766, "step": 1952 }, { "epoch": 0.2764918241664897, "grad_norm": 9.169243144143675, "learning_rate": 4.847943709469218e-06, "loss": 1.3202, "step": 1953 }, { "epoch": 0.27663339704112694, "grad_norm": 8.458163355229669, "learning_rate": 4.8477467925569365e-06, "loss": 0.9487, "step": 1954 }, { "epoch": 0.2767749699157641, "grad_norm": 9.911790134945313, "learning_rate": 4.847549752225356e-06, "loss": 1.1532, "step": 1955 }, { "epoch": 0.27691654279040134, "grad_norm": 8.681857025916576, "learning_rate": 4.847352588484837e-06, "loss": 1.1556, "step": 1956 }, { "epoch": 0.2770581156650386, "grad_norm": 9.348678462834458, "learning_rate": 4.847155301345743e-06, "loss": 1.3105, "step": 1957 }, { "epoch": 0.2771996885396758, "grad_norm": 9.538636993990545, "learning_rate": 4.846957890818444e-06, "loss": 1.2647, "step": 1958 }, { "epoch": 0.27734126141431303, "grad_norm": 9.063222290720647, "learning_rate": 4.846760356913318e-06, "loss": 1.2374, "step": 1959 }, { "epoch": 0.27748283428895026, "grad_norm": 7.586750336617517, "learning_rate": 4.846562699640751e-06, "loss": 0.994, "step": 1960 }, { "epoch": 0.27762440716358744, "grad_norm": 9.905844502618573, "learning_rate": 4.846364919011132e-06, "loss": 1.3595, "step": 1961 }, { "epoch": 0.27776598003822467, "grad_norm": 8.00468164786654, "learning_rate": 4.8461670150348585e-06, "loss": 1.2131, "step": 1962 }, { "epoch": 0.2779075529128619, "grad_norm": 11.25030517164215, "learning_rate": 4.8459689877223346e-06, "loss": 1.2019, "step": 1963 }, { "epoch": 0.27804912578749913, "grad_norm": 8.983588620476404, "learning_rate": 4.845770837083971e-06, "loss": 1.1014, "step": 1964 }, { "epoch": 0.27819069866213636, "grad_norm": 7.786284874522432, "learning_rate": 4.845572563130182e-06, "loss": 1.1601, "step": 1965 }, { "epoch": 0.27833227153677353, "grad_norm": 8.429067509348139, "learning_rate": 4.845374165871394e-06, "loss": 1.1343, "step": 1966 }, { "epoch": 0.27847384441141076, "grad_norm": 7.626059536721254, "learning_rate": 4.845175645318034e-06, "loss": 1.1543, "step": 1967 }, { "epoch": 0.278615417286048, "grad_norm": 7.681927157445254, "learning_rate": 4.844977001480539e-06, "loss": 1.2269, "step": 1968 }, { "epoch": 0.2787569901606852, "grad_norm": 8.61065188041324, "learning_rate": 4.8447782343693515e-06, "loss": 1.0422, "step": 1969 }, { "epoch": 0.27889856303532246, "grad_norm": 9.410385287400523, "learning_rate": 4.844579343994921e-06, "loss": 1.1878, "step": 1970 }, { "epoch": 0.27904013590995963, "grad_norm": 10.626074523446183, "learning_rate": 4.844380330367701e-06, "loss": 1.2545, "step": 1971 }, { "epoch": 0.27918170878459686, "grad_norm": 8.999828760849251, "learning_rate": 4.844181193498157e-06, "loss": 1.2293, "step": 1972 }, { "epoch": 0.2793232816592341, "grad_norm": 9.801970645744612, "learning_rate": 4.843981933396755e-06, "loss": 1.3322, "step": 1973 }, { "epoch": 0.2794648545338713, "grad_norm": 9.511373036581817, "learning_rate": 4.84378255007397e-06, "loss": 1.1562, "step": 1974 }, { "epoch": 0.27960642740850855, "grad_norm": 10.745077736184165, "learning_rate": 4.843583043540284e-06, "loss": 1.2143, "step": 1975 }, { "epoch": 0.2797480002831457, "grad_norm": 8.352052593927931, "learning_rate": 4.8433834138061856e-06, "loss": 1.158, "step": 1976 }, { "epoch": 0.27988957315778296, "grad_norm": 14.643593990912994, "learning_rate": 4.843183660882168e-06, "loss": 1.3458, "step": 1977 }, { "epoch": 0.2800311460324202, "grad_norm": 7.494482426805945, "learning_rate": 4.842983784778732e-06, "loss": 1.155, "step": 1978 }, { "epoch": 0.2801727189070574, "grad_norm": 9.569176431294503, "learning_rate": 4.842783785506386e-06, "loss": 1.1635, "step": 1979 }, { "epoch": 0.28031429178169465, "grad_norm": 10.155981066150902, "learning_rate": 4.842583663075643e-06, "loss": 1.293, "step": 1980 }, { "epoch": 0.2804558646563318, "grad_norm": 9.148221085852951, "learning_rate": 4.842383417497024e-06, "loss": 1.1197, "step": 1981 }, { "epoch": 0.28059743753096905, "grad_norm": 8.667214840890038, "learning_rate": 4.842183048781055e-06, "loss": 1.0892, "step": 1982 }, { "epoch": 0.2807390104056063, "grad_norm": 9.489512225881594, "learning_rate": 4.84198255693827e-06, "loss": 1.2879, "step": 1983 }, { "epoch": 0.2808805832802435, "grad_norm": 8.348949398258892, "learning_rate": 4.841781941979207e-06, "loss": 1.1039, "step": 1984 }, { "epoch": 0.28102215615488074, "grad_norm": 8.97264349558301, "learning_rate": 4.8415812039144145e-06, "loss": 1.0673, "step": 1985 }, { "epoch": 0.2811637290295179, "grad_norm": 9.734313474800551, "learning_rate": 4.841380342754444e-06, "loss": 1.2104, "step": 1986 }, { "epoch": 0.28130530190415515, "grad_norm": 10.152134081857648, "learning_rate": 4.841179358509854e-06, "loss": 1.3926, "step": 1987 }, { "epoch": 0.2814468747787924, "grad_norm": 10.812239627238935, "learning_rate": 4.840978251191212e-06, "loss": 1.2239, "step": 1988 }, { "epoch": 0.2815884476534296, "grad_norm": 15.182592281900924, "learning_rate": 4.840777020809087e-06, "loss": 1.2037, "step": 1989 }, { "epoch": 0.28173002052806684, "grad_norm": 9.909137677996222, "learning_rate": 4.8405756673740606e-06, "loss": 1.137, "step": 1990 }, { "epoch": 0.28187159340270407, "grad_norm": 9.607208851381472, "learning_rate": 4.840374190896716e-06, "loss": 1.2068, "step": 1991 }, { "epoch": 0.28201316627734124, "grad_norm": 10.175529702385084, "learning_rate": 4.840172591387646e-06, "loss": 1.1721, "step": 1992 }, { "epoch": 0.2821547391519785, "grad_norm": 10.847967113838873, "learning_rate": 4.839970868857447e-06, "loss": 1.3036, "step": 1993 }, { "epoch": 0.2822963120266157, "grad_norm": 10.744044228741808, "learning_rate": 4.839769023316725e-06, "loss": 1.2659, "step": 1994 }, { "epoch": 0.28243788490125293, "grad_norm": 10.419938532378907, "learning_rate": 4.83956705477609e-06, "loss": 1.1822, "step": 1995 }, { "epoch": 0.28257945777589016, "grad_norm": 9.13873427961435, "learning_rate": 4.839364963246159e-06, "loss": 1.2339, "step": 1996 }, { "epoch": 0.28272103065052734, "grad_norm": 11.241489836760962, "learning_rate": 4.839162748737556e-06, "loss": 1.3823, "step": 1997 }, { "epoch": 0.28286260352516457, "grad_norm": 13.789795040740271, "learning_rate": 4.838960411260911e-06, "loss": 1.4485, "step": 1998 }, { "epoch": 0.2830041763998018, "grad_norm": 9.000848094399606, "learning_rate": 4.838757950826862e-06, "loss": 1.1921, "step": 1999 }, { "epoch": 0.28314574927443903, "grad_norm": 10.274008197670508, "learning_rate": 4.838555367446052e-06, "loss": 1.2745, "step": 2000 }, { "epoch": 0.28328732214907626, "grad_norm": 11.21120494563398, "learning_rate": 4.838352661129129e-06, "loss": 1.3764, "step": 2001 }, { "epoch": 0.28342889502371343, "grad_norm": 9.39170955545493, "learning_rate": 4.838149831886751e-06, "loss": 1.0973, "step": 2002 }, { "epoch": 0.28357046789835066, "grad_norm": 8.469304027945475, "learning_rate": 4.8379468797295785e-06, "loss": 1.1925, "step": 2003 }, { "epoch": 0.2837120407729879, "grad_norm": 8.002241774220893, "learning_rate": 4.837743804668282e-06, "loss": 0.9994, "step": 2004 }, { "epoch": 0.2838536136476251, "grad_norm": 8.645599974887876, "learning_rate": 4.837540606713538e-06, "loss": 1.2742, "step": 2005 }, { "epoch": 0.28399518652226236, "grad_norm": 8.477485458825031, "learning_rate": 4.837337285876026e-06, "loss": 1.3553, "step": 2006 }, { "epoch": 0.28413675939689953, "grad_norm": 10.129592419151177, "learning_rate": 4.837133842166436e-06, "loss": 1.2131, "step": 2007 }, { "epoch": 0.28427833227153676, "grad_norm": 9.99265630008561, "learning_rate": 4.8369302755954625e-06, "loss": 1.1884, "step": 2008 }, { "epoch": 0.284419905146174, "grad_norm": 7.663220778351222, "learning_rate": 4.836726586173807e-06, "loss": 1.0998, "step": 2009 }, { "epoch": 0.2845614780208112, "grad_norm": 11.624331875541682, "learning_rate": 4.836522773912178e-06, "loss": 1.4119, "step": 2010 }, { "epoch": 0.28470305089544845, "grad_norm": 11.043283787954064, "learning_rate": 4.836318838821288e-06, "loss": 1.2948, "step": 2011 }, { "epoch": 0.2848446237700856, "grad_norm": 12.475722273288804, "learning_rate": 4.836114780911859e-06, "loss": 1.2924, "step": 2012 }, { "epoch": 0.28498619664472286, "grad_norm": 9.248532565453711, "learning_rate": 4.835910600194618e-06, "loss": 1.0613, "step": 2013 }, { "epoch": 0.2851277695193601, "grad_norm": 8.312875380680657, "learning_rate": 4.835706296680298e-06, "loss": 1.1558, "step": 2014 }, { "epoch": 0.2852693423939973, "grad_norm": 9.587496530465067, "learning_rate": 4.83550187037964e-06, "loss": 1.2526, "step": 2015 }, { "epoch": 0.28541091526863455, "grad_norm": 10.405595271961484, "learning_rate": 4.8352973213033894e-06, "loss": 1.3411, "step": 2016 }, { "epoch": 0.2855524881432718, "grad_norm": 8.583170867666254, "learning_rate": 4.835092649462301e-06, "loss": 1.223, "step": 2017 }, { "epoch": 0.28569406101790895, "grad_norm": 7.875844758758421, "learning_rate": 4.834887854867132e-06, "loss": 1.257, "step": 2018 }, { "epoch": 0.2858356338925462, "grad_norm": 10.232277039261746, "learning_rate": 4.83468293752865e-06, "loss": 1.2418, "step": 2019 }, { "epoch": 0.2859772067671834, "grad_norm": 8.719827127405447, "learning_rate": 4.834477897457627e-06, "loss": 1.2041, "step": 2020 }, { "epoch": 0.28611877964182064, "grad_norm": 10.39081627232316, "learning_rate": 4.834272734664841e-06, "loss": 1.304, "step": 2021 }, { "epoch": 0.28626035251645787, "grad_norm": 12.281376107614312, "learning_rate": 4.8340674491610786e-06, "loss": 1.1655, "step": 2022 }, { "epoch": 0.28640192539109505, "grad_norm": 12.491474753939997, "learning_rate": 4.83386204095713e-06, "loss": 1.2948, "step": 2023 }, { "epoch": 0.2865434982657323, "grad_norm": 9.54722422259189, "learning_rate": 4.833656510063794e-06, "loss": 1.1582, "step": 2024 }, { "epoch": 0.2866850711403695, "grad_norm": 8.834687135226904, "learning_rate": 4.833450856491875e-06, "loss": 1.1621, "step": 2025 }, { "epoch": 0.28682664401500674, "grad_norm": 10.407525611569195, "learning_rate": 4.833245080252186e-06, "loss": 1.3637, "step": 2026 }, { "epoch": 0.28696821688964397, "grad_norm": 9.991420498685008, "learning_rate": 4.833039181355542e-06, "loss": 1.3324, "step": 2027 }, { "epoch": 0.28710978976428114, "grad_norm": 11.66135750721728, "learning_rate": 4.832833159812768e-06, "loss": 1.2246, "step": 2028 }, { "epoch": 0.2872513626389184, "grad_norm": 8.460564179783425, "learning_rate": 4.832627015634694e-06, "loss": 1.2071, "step": 2029 }, { "epoch": 0.2873929355135556, "grad_norm": 9.136605603323567, "learning_rate": 4.832420748832157e-06, "loss": 1.2716, "step": 2030 }, { "epoch": 0.28753450838819283, "grad_norm": 8.847097026339037, "learning_rate": 4.832214359416001e-06, "loss": 1.1687, "step": 2031 }, { "epoch": 0.28767608126283006, "grad_norm": 10.435012977932006, "learning_rate": 4.8320078473970745e-06, "loss": 1.2175, "step": 2032 }, { "epoch": 0.28781765413746724, "grad_norm": 7.962670254646271, "learning_rate": 4.831801212786234e-06, "loss": 1.0618, "step": 2033 }, { "epoch": 0.28795922701210447, "grad_norm": 8.332751393661892, "learning_rate": 4.831594455594343e-06, "loss": 1.1248, "step": 2034 }, { "epoch": 0.2881007998867417, "grad_norm": 8.868028938600695, "learning_rate": 4.8313875758322695e-06, "loss": 1.1296, "step": 2035 }, { "epoch": 0.28824237276137893, "grad_norm": 10.626296379437774, "learning_rate": 4.83118057351089e-06, "loss": 1.0997, "step": 2036 }, { "epoch": 0.28838394563601616, "grad_norm": 8.291442816953898, "learning_rate": 4.830973448641086e-06, "loss": 1.0708, "step": 2037 }, { "epoch": 0.28852551851065333, "grad_norm": 10.413542703761598, "learning_rate": 4.830766201233746e-06, "loss": 1.2076, "step": 2038 }, { "epoch": 0.28866709138529056, "grad_norm": 8.43855697227992, "learning_rate": 4.8305588312997635e-06, "loss": 1.2046, "step": 2039 }, { "epoch": 0.2888086642599278, "grad_norm": 8.35684239891905, "learning_rate": 4.8303513388500414e-06, "loss": 1.1023, "step": 2040 }, { "epoch": 0.288950237134565, "grad_norm": 13.320610333913711, "learning_rate": 4.8301437238954875e-06, "loss": 1.2569, "step": 2041 }, { "epoch": 0.28909181000920225, "grad_norm": 7.851262723599336, "learning_rate": 4.829935986447015e-06, "loss": 1.2731, "step": 2042 }, { "epoch": 0.28923338288383943, "grad_norm": 8.943606124776332, "learning_rate": 4.829728126515545e-06, "loss": 1.2281, "step": 2043 }, { "epoch": 0.28937495575847666, "grad_norm": 11.96203584318622, "learning_rate": 4.829520144112005e-06, "loss": 1.2534, "step": 2044 }, { "epoch": 0.2895165286331139, "grad_norm": 12.888224800218863, "learning_rate": 4.829312039247328e-06, "loss": 1.2428, "step": 2045 }, { "epoch": 0.2896581015077511, "grad_norm": 10.354717237442209, "learning_rate": 4.829103811932453e-06, "loss": 1.4019, "step": 2046 }, { "epoch": 0.28979967438238835, "grad_norm": 8.70219090378273, "learning_rate": 4.828895462178329e-06, "loss": 1.247, "step": 2047 }, { "epoch": 0.2899412472570256, "grad_norm": 10.120494075369534, "learning_rate": 4.828686989995905e-06, "loss": 1.3166, "step": 2048 }, { "epoch": 0.29008282013166276, "grad_norm": 11.025839886495573, "learning_rate": 4.828478395396143e-06, "loss": 1.259, "step": 2049 }, { "epoch": 0.2902243930063, "grad_norm": 10.895607901600084, "learning_rate": 4.828269678390008e-06, "loss": 1.1549, "step": 2050 }, { "epoch": 0.2903659658809372, "grad_norm": 12.433518275572059, "learning_rate": 4.828060838988473e-06, "loss": 1.2789, "step": 2051 }, { "epoch": 0.29050753875557445, "grad_norm": 11.207987348659653, "learning_rate": 4.827851877202515e-06, "loss": 1.2013, "step": 2052 }, { "epoch": 0.2906491116302117, "grad_norm": 10.262505694630537, "learning_rate": 4.827642793043119e-06, "loss": 1.241, "step": 2053 }, { "epoch": 0.29079068450484885, "grad_norm": 10.241090205093752, "learning_rate": 4.827433586521277e-06, "loss": 1.1386, "step": 2054 }, { "epoch": 0.2909322573794861, "grad_norm": 11.764706113478715, "learning_rate": 4.827224257647987e-06, "loss": 1.1821, "step": 2055 }, { "epoch": 0.2910738302541233, "grad_norm": 9.150389582777422, "learning_rate": 4.827014806434254e-06, "loss": 0.9945, "step": 2056 }, { "epoch": 0.29121540312876054, "grad_norm": 10.077243784148335, "learning_rate": 4.826805232891087e-06, "loss": 1.2511, "step": 2057 }, { "epoch": 0.29135697600339777, "grad_norm": 8.924251954602063, "learning_rate": 4.826595537029503e-06, "loss": 1.0945, "step": 2058 }, { "epoch": 0.29149854887803495, "grad_norm": 12.807425633873011, "learning_rate": 4.826385718860527e-06, "loss": 1.1737, "step": 2059 }, { "epoch": 0.2916401217526722, "grad_norm": 11.872064809946183, "learning_rate": 4.826175778395188e-06, "loss": 1.2112, "step": 2060 }, { "epoch": 0.2917816946273094, "grad_norm": 11.548699798722609, "learning_rate": 4.825965715644523e-06, "loss": 1.1904, "step": 2061 }, { "epoch": 0.29192326750194664, "grad_norm": 8.313775215712916, "learning_rate": 4.825755530619576e-06, "loss": 1.2634, "step": 2062 }, { "epoch": 0.29206484037658387, "grad_norm": 10.56636870372257, "learning_rate": 4.825545223331392e-06, "loss": 1.1228, "step": 2063 }, { "epoch": 0.29220641325122104, "grad_norm": 11.922635235426405, "learning_rate": 4.825334793791032e-06, "loss": 1.1681, "step": 2064 }, { "epoch": 0.2923479861258583, "grad_norm": 13.143557843786771, "learning_rate": 4.825124242009556e-06, "loss": 1.3747, "step": 2065 }, { "epoch": 0.2924895590004955, "grad_norm": 9.021751821038427, "learning_rate": 4.824913567998031e-06, "loss": 1.2517, "step": 2066 }, { "epoch": 0.29263113187513273, "grad_norm": 13.030851289426506, "learning_rate": 4.8247027717675335e-06, "loss": 1.284, "step": 2067 }, { "epoch": 0.29277270474976996, "grad_norm": 10.174518950617019, "learning_rate": 4.8244918533291444e-06, "loss": 1.2078, "step": 2068 }, { "epoch": 0.29291427762440714, "grad_norm": 9.756096132324858, "learning_rate": 4.824280812693952e-06, "loss": 1.1643, "step": 2069 }, { "epoch": 0.29305585049904437, "grad_norm": 9.611962145378003, "learning_rate": 4.824069649873051e-06, "loss": 1.2618, "step": 2070 }, { "epoch": 0.2931974233736816, "grad_norm": 9.487212559463265, "learning_rate": 4.82385836487754e-06, "loss": 1.3131, "step": 2071 }, { "epoch": 0.29333899624831883, "grad_norm": 9.96764671414805, "learning_rate": 4.823646957718529e-06, "loss": 1.0943, "step": 2072 }, { "epoch": 0.29348056912295606, "grad_norm": 10.706099181886128, "learning_rate": 4.823435428407129e-06, "loss": 1.3082, "step": 2073 }, { "epoch": 0.29362214199759323, "grad_norm": 11.097320411915078, "learning_rate": 4.823223776954462e-06, "loss": 1.2733, "step": 2074 }, { "epoch": 0.29376371487223046, "grad_norm": 11.121822289097837, "learning_rate": 4.8230120033716525e-06, "loss": 1.3835, "step": 2075 }, { "epoch": 0.2939052877468677, "grad_norm": 10.493553362441636, "learning_rate": 4.822800107669835e-06, "loss": 1.2, "step": 2076 }, { "epoch": 0.2940468606215049, "grad_norm": 8.60107997057092, "learning_rate": 4.822588089860146e-06, "loss": 1.227, "step": 2077 }, { "epoch": 0.29418843349614215, "grad_norm": 8.514456180537485, "learning_rate": 4.822375949953735e-06, "loss": 1.1557, "step": 2078 }, { "epoch": 0.2943300063707794, "grad_norm": 8.435488764921983, "learning_rate": 4.82216368796175e-06, "loss": 1.1332, "step": 2079 }, { "epoch": 0.29447157924541656, "grad_norm": 8.7924151591616, "learning_rate": 4.8219513038953534e-06, "loss": 1.2745, "step": 2080 }, { "epoch": 0.2946131521200538, "grad_norm": 7.909887389335128, "learning_rate": 4.821738797765707e-06, "loss": 1.1001, "step": 2081 }, { "epoch": 0.294754724994691, "grad_norm": 9.95340478045743, "learning_rate": 4.8215261695839825e-06, "loss": 1.2164, "step": 2082 }, { "epoch": 0.29489629786932825, "grad_norm": 10.802611310827235, "learning_rate": 4.821313419361359e-06, "loss": 1.0639, "step": 2083 }, { "epoch": 0.2950378707439655, "grad_norm": 7.97724945966029, "learning_rate": 4.82110054710902e-06, "loss": 1.1815, "step": 2084 }, { "epoch": 0.29517944361860265, "grad_norm": 9.795714192468425, "learning_rate": 4.820887552838156e-06, "loss": 1.2038, "step": 2085 }, { "epoch": 0.2953210164932399, "grad_norm": 10.906821861686318, "learning_rate": 4.820674436559964e-06, "loss": 1.374, "step": 2086 }, { "epoch": 0.2954625893678771, "grad_norm": 10.705066543792768, "learning_rate": 4.8204611982856465e-06, "loss": 1.2546, "step": 2087 }, { "epoch": 0.29560416224251435, "grad_norm": 11.396213745983383, "learning_rate": 4.820247838026414e-06, "loss": 1.2, "step": 2088 }, { "epoch": 0.2957457351171516, "grad_norm": 8.178922630189444, "learning_rate": 4.820034355793483e-06, "loss": 1.243, "step": 2089 }, { "epoch": 0.29588730799178875, "grad_norm": 8.119498870118493, "learning_rate": 4.819820751598076e-06, "loss": 1.2636, "step": 2090 }, { "epoch": 0.296028880866426, "grad_norm": 9.826151493531338, "learning_rate": 4.819607025451422e-06, "loss": 1.1707, "step": 2091 }, { "epoch": 0.2961704537410632, "grad_norm": 9.617062156474002, "learning_rate": 4.819393177364756e-06, "loss": 1.1871, "step": 2092 }, { "epoch": 0.29631202661570044, "grad_norm": 10.31363112286243, "learning_rate": 4.81917920734932e-06, "loss": 1.2422, "step": 2093 }, { "epoch": 0.29645359949033767, "grad_norm": 9.138566474934505, "learning_rate": 4.818965115416362e-06, "loss": 1.2549, "step": 2094 }, { "epoch": 0.29659517236497485, "grad_norm": 9.85172745326811, "learning_rate": 4.818750901577137e-06, "loss": 1.2347, "step": 2095 }, { "epoch": 0.2967367452396121, "grad_norm": 8.22366893630366, "learning_rate": 4.818536565842907e-06, "loss": 1.1711, "step": 2096 }, { "epoch": 0.2968783181142493, "grad_norm": 8.175611411279416, "learning_rate": 4.8183221082249375e-06, "loss": 1.0131, "step": 2097 }, { "epoch": 0.29701989098888654, "grad_norm": 8.010346874103332, "learning_rate": 4.8181075287345045e-06, "loss": 1.1154, "step": 2098 }, { "epoch": 0.29716146386352377, "grad_norm": 9.626781348322915, "learning_rate": 4.817892827382886e-06, "loss": 1.3354, "step": 2099 }, { "epoch": 0.29730303673816094, "grad_norm": 11.52791160495602, "learning_rate": 4.81767800418137e-06, "loss": 1.2014, "step": 2100 }, { "epoch": 0.29744460961279817, "grad_norm": 8.368175857825886, "learning_rate": 4.8174630591412495e-06, "loss": 1.2245, "step": 2101 }, { "epoch": 0.2975861824874354, "grad_norm": 12.32874446419734, "learning_rate": 4.817247992273824e-06, "loss": 1.2566, "step": 2102 }, { "epoch": 0.29772775536207263, "grad_norm": 10.076369305647201, "learning_rate": 4.8170328035904e-06, "loss": 1.2384, "step": 2103 }, { "epoch": 0.29786932823670986, "grad_norm": 11.00720637692058, "learning_rate": 4.816817493102289e-06, "loss": 1.1792, "step": 2104 }, { "epoch": 0.2980109011113471, "grad_norm": 9.911277482024383, "learning_rate": 4.81660206082081e-06, "loss": 1.2147, "step": 2105 }, { "epoch": 0.29815247398598427, "grad_norm": 10.688212766916596, "learning_rate": 4.816386506757287e-06, "loss": 1.0191, "step": 2106 }, { "epoch": 0.2982940468606215, "grad_norm": 11.505436151452766, "learning_rate": 4.816170830923053e-06, "loss": 1.0626, "step": 2107 }, { "epoch": 0.29843561973525873, "grad_norm": 11.237949953841184, "learning_rate": 4.815955033329446e-06, "loss": 1.1264, "step": 2108 }, { "epoch": 0.29857719260989596, "grad_norm": 14.14289573014625, "learning_rate": 4.815739113987809e-06, "loss": 1.4769, "step": 2109 }, { "epoch": 0.2987187654845332, "grad_norm": 10.05426408636452, "learning_rate": 4.815523072909494e-06, "loss": 1.2036, "step": 2110 }, { "epoch": 0.29886033835917036, "grad_norm": 10.025657544725012, "learning_rate": 4.815306910105857e-06, "loss": 1.1251, "step": 2111 }, { "epoch": 0.2990019112338076, "grad_norm": 10.31196657592944, "learning_rate": 4.815090625588263e-06, "loss": 1.1327, "step": 2112 }, { "epoch": 0.2991434841084448, "grad_norm": 11.980683356204713, "learning_rate": 4.81487421936808e-06, "loss": 1.2476, "step": 2113 }, { "epoch": 0.29928505698308205, "grad_norm": 13.126043950298058, "learning_rate": 4.814657691456685e-06, "loss": 1.2584, "step": 2114 }, { "epoch": 0.2994266298577193, "grad_norm": 9.7369301077066, "learning_rate": 4.814441041865463e-06, "loss": 1.2982, "step": 2115 }, { "epoch": 0.29956820273235646, "grad_norm": 7.764173775050356, "learning_rate": 4.814224270605799e-06, "loss": 1.2004, "step": 2116 }, { "epoch": 0.2997097756069937, "grad_norm": 8.969126411009054, "learning_rate": 4.814007377689093e-06, "loss": 1.2858, "step": 2117 }, { "epoch": 0.2998513484816309, "grad_norm": 8.84804729751169, "learning_rate": 4.813790363126743e-06, "loss": 1.0948, "step": 2118 }, { "epoch": 0.29999292135626815, "grad_norm": 10.470737917599815, "learning_rate": 4.813573226930158e-06, "loss": 1.218, "step": 2119 }, { "epoch": 0.3001344942309054, "grad_norm": 9.047898308787094, "learning_rate": 4.813355969110755e-06, "loss": 1.1304, "step": 2120 }, { "epoch": 0.30027606710554255, "grad_norm": 9.806374745885366, "learning_rate": 4.813138589679953e-06, "loss": 1.1958, "step": 2121 }, { "epoch": 0.3004176399801798, "grad_norm": 11.055679546365912, "learning_rate": 4.812921088649181e-06, "loss": 1.1849, "step": 2122 }, { "epoch": 0.300559212854817, "grad_norm": 9.724575443365687, "learning_rate": 4.812703466029871e-06, "loss": 1.1896, "step": 2123 }, { "epoch": 0.30070078572945425, "grad_norm": 7.92515360686358, "learning_rate": 4.812485721833465e-06, "loss": 1.1648, "step": 2124 }, { "epoch": 0.3008423586040915, "grad_norm": 9.048085502321705, "learning_rate": 4.812267856071407e-06, "loss": 1.2137, "step": 2125 }, { "epoch": 0.30098393147872865, "grad_norm": 9.772460108002488, "learning_rate": 4.812049868755154e-06, "loss": 1.1587, "step": 2126 }, { "epoch": 0.3011255043533659, "grad_norm": 9.817632219877925, "learning_rate": 4.8118317598961625e-06, "loss": 1.2271, "step": 2127 }, { "epoch": 0.3012670772280031, "grad_norm": 9.031094137252582, "learning_rate": 4.811613529505899e-06, "loss": 1.1718, "step": 2128 }, { "epoch": 0.30140865010264034, "grad_norm": 8.678348688656964, "learning_rate": 4.811395177595836e-06, "loss": 1.1695, "step": 2129 }, { "epoch": 0.30155022297727757, "grad_norm": 12.695537257625832, "learning_rate": 4.811176704177452e-06, "loss": 1.2688, "step": 2130 }, { "epoch": 0.30169179585191475, "grad_norm": 8.762127999804667, "learning_rate": 4.810958109262232e-06, "loss": 1.053, "step": 2131 }, { "epoch": 0.301833368726552, "grad_norm": 8.79772272988491, "learning_rate": 4.810739392861667e-06, "loss": 1.2597, "step": 2132 }, { "epoch": 0.3019749416011892, "grad_norm": 12.13297725057171, "learning_rate": 4.810520554987256e-06, "loss": 1.2667, "step": 2133 }, { "epoch": 0.30211651447582644, "grad_norm": 9.20987597585997, "learning_rate": 4.810301595650501e-06, "loss": 1.1689, "step": 2134 }, { "epoch": 0.30225808735046367, "grad_norm": 9.589788059919766, "learning_rate": 4.810082514862915e-06, "loss": 1.3787, "step": 2135 }, { "epoch": 0.3023996602251009, "grad_norm": 7.8920737570067825, "learning_rate": 4.809863312636013e-06, "loss": 1.1501, "step": 2136 }, { "epoch": 0.30254123309973807, "grad_norm": 9.677296242289742, "learning_rate": 4.8096439889813186e-06, "loss": 1.2924, "step": 2137 }, { "epoch": 0.3026828059743753, "grad_norm": 7.539466016969963, "learning_rate": 4.809424543910363e-06, "loss": 1.1711, "step": 2138 }, { "epoch": 0.30282437884901253, "grad_norm": 10.745254888764237, "learning_rate": 4.80920497743468e-06, "loss": 1.1121, "step": 2139 }, { "epoch": 0.30296595172364976, "grad_norm": 9.380276835246356, "learning_rate": 4.808985289565813e-06, "loss": 1.2156, "step": 2140 }, { "epoch": 0.303107524598287, "grad_norm": 9.660712238852478, "learning_rate": 4.808765480315312e-06, "loss": 1.2627, "step": 2141 }, { "epoch": 0.30324909747292417, "grad_norm": 9.024896729953651, "learning_rate": 4.80854554969473e-06, "loss": 1.0978, "step": 2142 }, { "epoch": 0.3033906703475614, "grad_norm": 9.356167608292036, "learning_rate": 4.80832549771563e-06, "loss": 1.0612, "step": 2143 }, { "epoch": 0.3035322432221986, "grad_norm": 8.91437104048083, "learning_rate": 4.808105324389581e-06, "loss": 1.2948, "step": 2144 }, { "epoch": 0.30367381609683586, "grad_norm": 9.051249077518596, "learning_rate": 4.807885029728155e-06, "loss": 1.0464, "step": 2145 }, { "epoch": 0.3038153889714731, "grad_norm": 11.494547339076622, "learning_rate": 4.807664613742934e-06, "loss": 1.2066, "step": 2146 }, { "epoch": 0.30395696184611026, "grad_norm": 10.551580619824698, "learning_rate": 4.807444076445506e-06, "loss": 1.2141, "step": 2147 }, { "epoch": 0.3040985347207475, "grad_norm": 8.865018577663795, "learning_rate": 4.807223417847462e-06, "loss": 1.0731, "step": 2148 }, { "epoch": 0.3042401075953847, "grad_norm": 8.843968688633364, "learning_rate": 4.807002637960403e-06, "loss": 1.2672, "step": 2149 }, { "epoch": 0.30438168047002195, "grad_norm": 9.404368469029142, "learning_rate": 4.806781736795937e-06, "loss": 1.1864, "step": 2150 }, { "epoch": 0.3045232533446592, "grad_norm": 9.579407349414453, "learning_rate": 4.806560714365674e-06, "loss": 1.106, "step": 2151 }, { "epoch": 0.30466482621929636, "grad_norm": 10.548548767651198, "learning_rate": 4.806339570681234e-06, "loss": 1.2189, "step": 2152 }, { "epoch": 0.3048063990939336, "grad_norm": 7.941506643641123, "learning_rate": 4.8061183057542424e-06, "loss": 1.1759, "step": 2153 }, { "epoch": 0.3049479719685708, "grad_norm": 8.20968115983159, "learning_rate": 4.805896919596332e-06, "loss": 1.1252, "step": 2154 }, { "epoch": 0.30508954484320805, "grad_norm": 8.69962918598447, "learning_rate": 4.805675412219139e-06, "loss": 1.2823, "step": 2155 }, { "epoch": 0.3052311177178453, "grad_norm": 8.862838364182712, "learning_rate": 4.805453783634309e-06, "loss": 1.0743, "step": 2156 }, { "epoch": 0.30537269059248245, "grad_norm": 9.533465818893962, "learning_rate": 4.805232033853493e-06, "loss": 1.3235, "step": 2157 }, { "epoch": 0.3055142634671197, "grad_norm": 10.847826804227275, "learning_rate": 4.805010162888347e-06, "loss": 1.2404, "step": 2158 }, { "epoch": 0.3056558363417569, "grad_norm": 8.465865316039253, "learning_rate": 4.804788170750536e-06, "loss": 1.2761, "step": 2159 }, { "epoch": 0.30579740921639414, "grad_norm": 6.896326729294207, "learning_rate": 4.804566057451729e-06, "loss": 1.0506, "step": 2160 }, { "epoch": 0.3059389820910314, "grad_norm": 9.076844957356208, "learning_rate": 4.8043438230036034e-06, "loss": 1.1467, "step": 2161 }, { "epoch": 0.3060805549656686, "grad_norm": 8.169390269654748, "learning_rate": 4.804121467417841e-06, "loss": 1.1067, "step": 2162 }, { "epoch": 0.3062221278403058, "grad_norm": 8.87474339073896, "learning_rate": 4.8038989907061305e-06, "loss": 1.3508, "step": 2163 }, { "epoch": 0.306363700714943, "grad_norm": 7.3808384430934995, "learning_rate": 4.803676392880168e-06, "loss": 1.1407, "step": 2164 }, { "epoch": 0.30650527358958024, "grad_norm": 7.510865796363714, "learning_rate": 4.803453673951656e-06, "loss": 1.2397, "step": 2165 }, { "epoch": 0.30664684646421747, "grad_norm": 9.032354901053974, "learning_rate": 4.803230833932302e-06, "loss": 1.1027, "step": 2166 }, { "epoch": 0.3067884193388547, "grad_norm": 10.225542616495117, "learning_rate": 4.803007872833819e-06, "loss": 1.3034, "step": 2167 }, { "epoch": 0.3069299922134919, "grad_norm": 8.066411452026168, "learning_rate": 4.8027847906679305e-06, "loss": 1.1676, "step": 2168 }, { "epoch": 0.3070715650881291, "grad_norm": 11.045525755761535, "learning_rate": 4.802561587446362e-06, "loss": 1.2162, "step": 2169 }, { "epoch": 0.30721313796276634, "grad_norm": 11.150607009581536, "learning_rate": 4.802338263180848e-06, "loss": 1.3734, "step": 2170 }, { "epoch": 0.30735471083740357, "grad_norm": 8.781057986946514, "learning_rate": 4.802114817883128e-06, "loss": 1.1012, "step": 2171 }, { "epoch": 0.3074962837120408, "grad_norm": 8.987321188133418, "learning_rate": 4.801891251564949e-06, "loss": 1.1374, "step": 2172 }, { "epoch": 0.30763785658667797, "grad_norm": 11.503467866222723, "learning_rate": 4.801667564238063e-06, "loss": 1.2758, "step": 2173 }, { "epoch": 0.3077794294613152, "grad_norm": 11.2666072761129, "learning_rate": 4.801443755914229e-06, "loss": 1.2351, "step": 2174 }, { "epoch": 0.30792100233595243, "grad_norm": 11.122527115545061, "learning_rate": 4.801219826605213e-06, "loss": 1.24, "step": 2175 }, { "epoch": 0.30806257521058966, "grad_norm": 11.30631015101554, "learning_rate": 4.8009957763227875e-06, "loss": 1.2926, "step": 2176 }, { "epoch": 0.3082041480852269, "grad_norm": 10.125048225193794, "learning_rate": 4.800771605078728e-06, "loss": 1.3017, "step": 2177 }, { "epoch": 0.30834572095986407, "grad_norm": 10.27865502906596, "learning_rate": 4.800547312884822e-06, "loss": 1.1554, "step": 2178 }, { "epoch": 0.3084872938345013, "grad_norm": 10.32205294098533, "learning_rate": 4.800322899752859e-06, "loss": 1.2131, "step": 2179 }, { "epoch": 0.3086288667091385, "grad_norm": 10.117732701284831, "learning_rate": 4.800098365694636e-06, "loss": 1.228, "step": 2180 }, { "epoch": 0.30877043958377576, "grad_norm": 12.07318875014715, "learning_rate": 4.799873710721958e-06, "loss": 1.316, "step": 2181 }, { "epoch": 0.308912012458413, "grad_norm": 8.094701998187032, "learning_rate": 4.799648934846633e-06, "loss": 1.1431, "step": 2182 }, { "epoch": 0.30905358533305016, "grad_norm": 8.571321495840568, "learning_rate": 4.799424038080478e-06, "loss": 1.1428, "step": 2183 }, { "epoch": 0.3091951582076874, "grad_norm": 8.888715864192953, "learning_rate": 4.799199020435316e-06, "loss": 1.2063, "step": 2184 }, { "epoch": 0.3093367310823246, "grad_norm": 11.492814846277666, "learning_rate": 4.798973881922975e-06, "loss": 1.2589, "step": 2185 }, { "epoch": 0.30947830395696185, "grad_norm": 8.000476822948196, "learning_rate": 4.798748622555293e-06, "loss": 1.1714, "step": 2186 }, { "epoch": 0.3096198768315991, "grad_norm": 9.823049523489026, "learning_rate": 4.798523242344109e-06, "loss": 1.3461, "step": 2187 }, { "epoch": 0.30976144970623626, "grad_norm": 8.587745594138687, "learning_rate": 4.798297741301271e-06, "loss": 1.2277, "step": 2188 }, { "epoch": 0.3099030225808735, "grad_norm": 8.722750275887089, "learning_rate": 4.798072119438636e-06, "loss": 1.2084, "step": 2189 }, { "epoch": 0.3100445954555107, "grad_norm": 8.476433340749857, "learning_rate": 4.797846376768062e-06, "loss": 1.1166, "step": 2190 }, { "epoch": 0.31018616833014795, "grad_norm": 8.190020690072071, "learning_rate": 4.797620513301418e-06, "loss": 1.1831, "step": 2191 }, { "epoch": 0.3103277412047852, "grad_norm": 8.49522086033202, "learning_rate": 4.797394529050577e-06, "loss": 1.1999, "step": 2192 }, { "epoch": 0.3104693140794224, "grad_norm": 10.28829204713155, "learning_rate": 4.797168424027419e-06, "loss": 1.2601, "step": 2193 }, { "epoch": 0.3106108869540596, "grad_norm": 8.227485220984809, "learning_rate": 4.796942198243828e-06, "loss": 1.1101, "step": 2194 }, { "epoch": 0.3107524598286968, "grad_norm": 8.880761653781141, "learning_rate": 4.796715851711699e-06, "loss": 1.1765, "step": 2195 }, { "epoch": 0.31089403270333404, "grad_norm": 8.37127557184131, "learning_rate": 4.7964893844429315e-06, "loss": 1.23, "step": 2196 }, { "epoch": 0.3110356055779713, "grad_norm": 10.514872236359919, "learning_rate": 4.796262796449428e-06, "loss": 1.0825, "step": 2197 }, { "epoch": 0.3111771784526085, "grad_norm": 9.606444468667366, "learning_rate": 4.7960360877431025e-06, "loss": 1.0473, "step": 2198 }, { "epoch": 0.3113187513272457, "grad_norm": 8.595107314686453, "learning_rate": 4.795809258335872e-06, "loss": 1.3074, "step": 2199 }, { "epoch": 0.3114603242018829, "grad_norm": 10.50336111859426, "learning_rate": 4.795582308239659e-06, "loss": 1.1388, "step": 2200 }, { "epoch": 0.31160189707652014, "grad_norm": 11.780488412055016, "learning_rate": 4.795355237466397e-06, "loss": 1.3267, "step": 2201 }, { "epoch": 0.31174346995115737, "grad_norm": 10.940185217257094, "learning_rate": 4.795128046028021e-06, "loss": 1.3525, "step": 2202 }, { "epoch": 0.3118850428257946, "grad_norm": 8.097551668101792, "learning_rate": 4.794900733936476e-06, "loss": 1.2334, "step": 2203 }, { "epoch": 0.3120266157004318, "grad_norm": 9.400532857523082, "learning_rate": 4.794673301203709e-06, "loss": 1.2593, "step": 2204 }, { "epoch": 0.312168188575069, "grad_norm": 8.970286593244467, "learning_rate": 4.794445747841679e-06, "loss": 1.262, "step": 2205 }, { "epoch": 0.31230976144970624, "grad_norm": 9.929607592125368, "learning_rate": 4.794218073862346e-06, "loss": 1.14, "step": 2206 }, { "epoch": 0.31245133432434347, "grad_norm": 8.480832197992589, "learning_rate": 4.79399027927768e-06, "loss": 1.1471, "step": 2207 }, { "epoch": 0.3125929071989807, "grad_norm": 11.82014597329822, "learning_rate": 4.793762364099655e-06, "loss": 1.3409, "step": 2208 }, { "epoch": 0.31273448007361787, "grad_norm": 9.161825207768118, "learning_rate": 4.793534328340253e-06, "loss": 1.2557, "step": 2209 }, { "epoch": 0.3128760529482551, "grad_norm": 9.577788454400425, "learning_rate": 4.7933061720114615e-06, "loss": 1.2729, "step": 2210 }, { "epoch": 0.31301762582289233, "grad_norm": 10.758464940273484, "learning_rate": 4.793077895125274e-06, "loss": 1.2912, "step": 2211 }, { "epoch": 0.31315919869752956, "grad_norm": 12.868306318048122, "learning_rate": 4.792849497693692e-06, "loss": 1.1913, "step": 2212 }, { "epoch": 0.3133007715721668, "grad_norm": 8.895016730272964, "learning_rate": 4.7926209797287216e-06, "loss": 1.2055, "step": 2213 }, { "epoch": 0.31344234444680397, "grad_norm": 9.022187328845336, "learning_rate": 4.792392341242375e-06, "loss": 1.2267, "step": 2214 }, { "epoch": 0.3135839173214412, "grad_norm": 8.908743311907743, "learning_rate": 4.792163582246674e-06, "loss": 1.2255, "step": 2215 }, { "epoch": 0.3137254901960784, "grad_norm": 11.460139979494551, "learning_rate": 4.791934702753641e-06, "loss": 1.269, "step": 2216 }, { "epoch": 0.31386706307071566, "grad_norm": 9.036632583799667, "learning_rate": 4.79170570277531e-06, "loss": 1.1171, "step": 2217 }, { "epoch": 0.3140086359453529, "grad_norm": 8.463673326299663, "learning_rate": 4.791476582323719e-06, "loss": 1.2984, "step": 2218 }, { "epoch": 0.31415020881999006, "grad_norm": 9.751828144540106, "learning_rate": 4.791247341410913e-06, "loss": 1.3302, "step": 2219 }, { "epoch": 0.3142917816946273, "grad_norm": 7.900793069555086, "learning_rate": 4.791017980048942e-06, "loss": 1.2362, "step": 2220 }, { "epoch": 0.3144333545692645, "grad_norm": 9.539209663074491, "learning_rate": 4.790788498249864e-06, "loss": 1.3595, "step": 2221 }, { "epoch": 0.31457492744390175, "grad_norm": 11.73292632119514, "learning_rate": 4.790558896025743e-06, "loss": 1.3821, "step": 2222 }, { "epoch": 0.314716500318539, "grad_norm": 10.801273934852437, "learning_rate": 4.79032917338865e-06, "loss": 1.3387, "step": 2223 }, { "epoch": 0.3148580731931762, "grad_norm": 8.94835041550628, "learning_rate": 4.790099330350658e-06, "loss": 1.2132, "step": 2224 }, { "epoch": 0.3149996460678134, "grad_norm": 8.238468781204837, "learning_rate": 4.789869366923853e-06, "loss": 1.224, "step": 2225 }, { "epoch": 0.3151412189424506, "grad_norm": 9.912430528972155, "learning_rate": 4.789639283120323e-06, "loss": 1.1858, "step": 2226 }, { "epoch": 0.31528279181708785, "grad_norm": 7.908317205200257, "learning_rate": 4.789409078952162e-06, "loss": 1.0686, "step": 2227 }, { "epoch": 0.3154243646917251, "grad_norm": 8.584843129328327, "learning_rate": 4.789178754431474e-06, "loss": 1.0398, "step": 2228 }, { "epoch": 0.3155659375663623, "grad_norm": 8.540996196453435, "learning_rate": 4.788948309570365e-06, "loss": 1.1908, "step": 2229 }, { "epoch": 0.3157075104409995, "grad_norm": 10.289811393114034, "learning_rate": 4.78871774438095e-06, "loss": 1.2582, "step": 2230 }, { "epoch": 0.3158490833156367, "grad_norm": 9.394297359354587, "learning_rate": 4.78848705887535e-06, "loss": 1.1249, "step": 2231 }, { "epoch": 0.31599065619027394, "grad_norm": 8.691800816184335, "learning_rate": 4.788256253065692e-06, "loss": 1.2266, "step": 2232 }, { "epoch": 0.3161322290649112, "grad_norm": 8.020070648262708, "learning_rate": 4.7880253269641085e-06, "loss": 1.1242, "step": 2233 }, { "epoch": 0.3162738019395484, "grad_norm": 9.694049811384582, "learning_rate": 4.787794280582739e-06, "loss": 1.3218, "step": 2234 }, { "epoch": 0.3164153748141856, "grad_norm": 8.224143923468617, "learning_rate": 4.787563113933731e-06, "loss": 1.1732, "step": 2235 }, { "epoch": 0.3165569476888228, "grad_norm": 9.236432822479665, "learning_rate": 4.787331827029236e-06, "loss": 1.1252, "step": 2236 }, { "epoch": 0.31669852056346004, "grad_norm": 11.796202381487019, "learning_rate": 4.787100419881412e-06, "loss": 1.1093, "step": 2237 }, { "epoch": 0.31684009343809727, "grad_norm": 9.39560885841426, "learning_rate": 4.7868688925024245e-06, "loss": 1.1114, "step": 2238 }, { "epoch": 0.3169816663127345, "grad_norm": 8.414224352116712, "learning_rate": 4.786637244904444e-06, "loss": 1.1204, "step": 2239 }, { "epoch": 0.3171232391873717, "grad_norm": 8.103215048128178, "learning_rate": 4.786405477099648e-06, "loss": 1.1257, "step": 2240 }, { "epoch": 0.3172648120620089, "grad_norm": 10.742834852653498, "learning_rate": 4.786173589100222e-06, "loss": 1.3593, "step": 2241 }, { "epoch": 0.31740638493664614, "grad_norm": 10.562664324142451, "learning_rate": 4.785941580918354e-06, "loss": 1.0311, "step": 2242 }, { "epoch": 0.31754795781128337, "grad_norm": 8.325672697548182, "learning_rate": 4.785709452566243e-06, "loss": 1.2481, "step": 2243 }, { "epoch": 0.3176895306859206, "grad_norm": 8.29179430788284, "learning_rate": 4.785477204056089e-06, "loss": 1.2457, "step": 2244 }, { "epoch": 0.31783110356055777, "grad_norm": 9.7439744376613, "learning_rate": 4.785244835400103e-06, "loss": 1.1732, "step": 2245 }, { "epoch": 0.317972676435195, "grad_norm": 10.329679848214154, "learning_rate": 4.7850123466105e-06, "loss": 1.1916, "step": 2246 }, { "epoch": 0.31811424930983223, "grad_norm": 8.321956447040883, "learning_rate": 4.784779737699502e-06, "loss": 1.0392, "step": 2247 }, { "epoch": 0.31825582218446946, "grad_norm": 9.539086494134443, "learning_rate": 4.7845470086793365e-06, "loss": 1.2911, "step": 2248 }, { "epoch": 0.3183973950591067, "grad_norm": 9.198932618957755, "learning_rate": 4.784314159562238e-06, "loss": 1.1581, "step": 2249 }, { "epoch": 0.3185389679337439, "grad_norm": 8.531940809514023, "learning_rate": 4.7840811903604475e-06, "loss": 1.3097, "step": 2250 }, { "epoch": 0.3186805408083811, "grad_norm": 9.931655412661065, "learning_rate": 4.783848101086212e-06, "loss": 1.1809, "step": 2251 }, { "epoch": 0.3188221136830183, "grad_norm": 7.448440070979077, "learning_rate": 4.783614891751785e-06, "loss": 1.0939, "step": 2252 }, { "epoch": 0.31896368655765556, "grad_norm": 7.889945969777351, "learning_rate": 4.783381562369425e-06, "loss": 1.3002, "step": 2253 }, { "epoch": 0.3191052594322928, "grad_norm": 9.488645493986319, "learning_rate": 4.7831481129514e-06, "loss": 1.3186, "step": 2254 }, { "epoch": 0.31924683230693, "grad_norm": 9.008496406577748, "learning_rate": 4.78291454350998e-06, "loss": 1.2953, "step": 2255 }, { "epoch": 0.3193884051815672, "grad_norm": 10.29152733918268, "learning_rate": 4.782680854057445e-06, "loss": 1.2342, "step": 2256 }, { "epoch": 0.3195299780562044, "grad_norm": 10.415046708345178, "learning_rate": 4.78244704460608e-06, "loss": 1.3273, "step": 2257 }, { "epoch": 0.31967155093084165, "grad_norm": 10.627958357103777, "learning_rate": 4.782213115168176e-06, "loss": 1.3295, "step": 2258 }, { "epoch": 0.3198131238054789, "grad_norm": 9.558768946979432, "learning_rate": 4.781979065756029e-06, "loss": 1.1931, "step": 2259 }, { "epoch": 0.3199546966801161, "grad_norm": 12.470743349827453, "learning_rate": 4.781744896381945e-06, "loss": 1.2722, "step": 2260 }, { "epoch": 0.3200962695547533, "grad_norm": 8.30291424800748, "learning_rate": 4.781510607058233e-06, "loss": 1.1835, "step": 2261 }, { "epoch": 0.3202378424293905, "grad_norm": 10.148834808066677, "learning_rate": 4.781276197797209e-06, "loss": 1.2835, "step": 2262 }, { "epoch": 0.32037941530402775, "grad_norm": 10.559669611993224, "learning_rate": 4.781041668611197e-06, "loss": 1.1497, "step": 2263 }, { "epoch": 0.320520988178665, "grad_norm": 8.218536026058478, "learning_rate": 4.780807019512525e-06, "loss": 1.2487, "step": 2264 }, { "epoch": 0.3206625610533022, "grad_norm": 9.602016602743168, "learning_rate": 4.7805722505135285e-06, "loss": 1.3059, "step": 2265 }, { "epoch": 0.3208041339279394, "grad_norm": 8.030165543423445, "learning_rate": 4.7803373616265495e-06, "loss": 1.2537, "step": 2266 }, { "epoch": 0.3209457068025766, "grad_norm": 9.628204604887364, "learning_rate": 4.780102352863935e-06, "loss": 1.1725, "step": 2267 }, { "epoch": 0.32108727967721384, "grad_norm": 9.378977834483168, "learning_rate": 4.77986722423804e-06, "loss": 1.1321, "step": 2268 }, { "epoch": 0.3212288525518511, "grad_norm": 11.082142089461568, "learning_rate": 4.779631975761226e-06, "loss": 1.3052, "step": 2269 }, { "epoch": 0.3213704254264883, "grad_norm": 9.858468426649155, "learning_rate": 4.779396607445858e-06, "loss": 1.3193, "step": 2270 }, { "epoch": 0.3215119983011255, "grad_norm": 8.754841800046854, "learning_rate": 4.779161119304311e-06, "loss": 1.2678, "step": 2271 }, { "epoch": 0.3216535711757627, "grad_norm": 10.322055527958838, "learning_rate": 4.7789255113489615e-06, "loss": 1.0483, "step": 2272 }, { "epoch": 0.32179514405039994, "grad_norm": 6.929448034370065, "learning_rate": 4.778689783592198e-06, "loss": 1.1261, "step": 2273 }, { "epoch": 0.32193671692503717, "grad_norm": 7.720204081234395, "learning_rate": 4.778453936046412e-06, "loss": 1.1112, "step": 2274 }, { "epoch": 0.3220782897996744, "grad_norm": 11.003469440212013, "learning_rate": 4.778217968724002e-06, "loss": 1.1805, "step": 2275 }, { "epoch": 0.3222198626743116, "grad_norm": 11.54497722056815, "learning_rate": 4.777981881637372e-06, "loss": 1.3244, "step": 2276 }, { "epoch": 0.3223614355489488, "grad_norm": 9.22379570933776, "learning_rate": 4.777745674798931e-06, "loss": 1.1291, "step": 2277 }, { "epoch": 0.32250300842358604, "grad_norm": 8.558709189516994, "learning_rate": 4.7775093482211e-06, "loss": 1.3068, "step": 2278 }, { "epoch": 0.32264458129822327, "grad_norm": 7.469908811939501, "learning_rate": 4.7772729019163e-06, "loss": 1.1386, "step": 2279 }, { "epoch": 0.3227861541728605, "grad_norm": 10.594264555281086, "learning_rate": 4.777036335896962e-06, "loss": 1.3527, "step": 2280 }, { "epoch": 0.3229277270474977, "grad_norm": 11.72139944008327, "learning_rate": 4.776799650175521e-06, "loss": 1.2344, "step": 2281 }, { "epoch": 0.3230692999221349, "grad_norm": 8.171720951882149, "learning_rate": 4.7765628447644214e-06, "loss": 1.2283, "step": 2282 }, { "epoch": 0.32321087279677213, "grad_norm": 9.384114221994812, "learning_rate": 4.776325919676109e-06, "loss": 1.1947, "step": 2283 }, { "epoch": 0.32335244567140936, "grad_norm": 8.729062124520048, "learning_rate": 4.7760888749230414e-06, "loss": 1.1854, "step": 2284 }, { "epoch": 0.3234940185460466, "grad_norm": 8.480513282212648, "learning_rate": 4.775851710517678e-06, "loss": 1.144, "step": 2285 }, { "epoch": 0.3236355914206838, "grad_norm": 8.153110177497066, "learning_rate": 4.775614426472488e-06, "loss": 1.177, "step": 2286 }, { "epoch": 0.323777164295321, "grad_norm": 9.708967676572342, "learning_rate": 4.775377022799944e-06, "loss": 1.2642, "step": 2287 }, { "epoch": 0.3239187371699582, "grad_norm": 8.621504807967609, "learning_rate": 4.7751394995125266e-06, "loss": 1.0098, "step": 2288 }, { "epoch": 0.32406031004459546, "grad_norm": 10.413556257622275, "learning_rate": 4.7749018566227214e-06, "loss": 1.3502, "step": 2289 }, { "epoch": 0.3242018829192327, "grad_norm": 8.405813893630734, "learning_rate": 4.774664094143022e-06, "loss": 1.0244, "step": 2290 }, { "epoch": 0.3243434557938699, "grad_norm": 9.064759702785192, "learning_rate": 4.774426212085928e-06, "loss": 1.1948, "step": 2291 }, { "epoch": 0.3244850286685071, "grad_norm": 11.118426491811176, "learning_rate": 4.774188210463944e-06, "loss": 1.2851, "step": 2292 }, { "epoch": 0.3246266015431443, "grad_norm": 8.983715583817085, "learning_rate": 4.77395008928958e-06, "loss": 1.1925, "step": 2293 }, { "epoch": 0.32476817441778155, "grad_norm": 10.048354922349619, "learning_rate": 4.773711848575357e-06, "loss": 1.1869, "step": 2294 }, { "epoch": 0.3249097472924188, "grad_norm": 9.674801483877662, "learning_rate": 4.773473488333797e-06, "loss": 1.2729, "step": 2295 }, { "epoch": 0.325051320167056, "grad_norm": 9.579696849892477, "learning_rate": 4.77323500857743e-06, "loss": 1.1929, "step": 2296 }, { "epoch": 0.3251928930416932, "grad_norm": 8.753320990169023, "learning_rate": 4.772996409318794e-06, "loss": 1.1713, "step": 2297 }, { "epoch": 0.3253344659163304, "grad_norm": 9.409540861727294, "learning_rate": 4.772757690570432e-06, "loss": 1.3085, "step": 2298 }, { "epoch": 0.32547603879096765, "grad_norm": 8.519661994640966, "learning_rate": 4.772518852344893e-06, "loss": 1.1769, "step": 2299 }, { "epoch": 0.3256176116656049, "grad_norm": 9.608466277535815, "learning_rate": 4.772279894654732e-06, "loss": 1.3325, "step": 2300 }, { "epoch": 0.3257591845402421, "grad_norm": 7.634963982777558, "learning_rate": 4.772040817512511e-06, "loss": 1.038, "step": 2301 }, { "epoch": 0.3259007574148793, "grad_norm": 7.51675831583092, "learning_rate": 4.7718016209307996e-06, "loss": 1.2637, "step": 2302 }, { "epoch": 0.3260423302895165, "grad_norm": 12.728087500712007, "learning_rate": 4.77156230492217e-06, "loss": 1.2589, "step": 2303 }, { "epoch": 0.32618390316415374, "grad_norm": 9.554039497266695, "learning_rate": 4.771322869499203e-06, "loss": 1.1213, "step": 2304 }, { "epoch": 0.326325476038791, "grad_norm": 9.800792926706874, "learning_rate": 4.7710833146744874e-06, "loss": 1.2086, "step": 2305 }, { "epoch": 0.3264670489134282, "grad_norm": 9.475868495300807, "learning_rate": 4.770843640460615e-06, "loss": 1.3566, "step": 2306 }, { "epoch": 0.32660862178806543, "grad_norm": 10.427775895742867, "learning_rate": 4.770603846870185e-06, "loss": 1.2376, "step": 2307 }, { "epoch": 0.3267501946627026, "grad_norm": 10.714942011033223, "learning_rate": 4.770363933915805e-06, "loss": 1.2113, "step": 2308 }, { "epoch": 0.32689176753733984, "grad_norm": 9.674152457401267, "learning_rate": 4.770123901610085e-06, "loss": 1.1423, "step": 2309 }, { "epoch": 0.32703334041197707, "grad_norm": 8.28243100364127, "learning_rate": 4.769883749965645e-06, "loss": 1.1906, "step": 2310 }, { "epoch": 0.3271749132866143, "grad_norm": 9.847709707064347, "learning_rate": 4.7696434789951074e-06, "loss": 1.1379, "step": 2311 }, { "epoch": 0.32731648616125153, "grad_norm": 9.572035877994262, "learning_rate": 4.769403088711105e-06, "loss": 1.1838, "step": 2312 }, { "epoch": 0.3274580590358887, "grad_norm": 9.991283432517879, "learning_rate": 4.7691625791262756e-06, "loss": 1.259, "step": 2313 }, { "epoch": 0.32759963191052593, "grad_norm": 11.495431407185407, "learning_rate": 4.76892195025326e-06, "loss": 1.3063, "step": 2314 }, { "epoch": 0.32774120478516316, "grad_norm": 8.552767157464576, "learning_rate": 4.768681202104709e-06, "loss": 1.1432, "step": 2315 }, { "epoch": 0.3278827776598004, "grad_norm": 9.76360721341611, "learning_rate": 4.7684403346932795e-06, "loss": 1.1783, "step": 2316 }, { "epoch": 0.3280243505344376, "grad_norm": 9.433485781602643, "learning_rate": 4.768199348031633e-06, "loss": 1.1612, "step": 2317 }, { "epoch": 0.3281659234090748, "grad_norm": 10.542462908159642, "learning_rate": 4.7679582421324385e-06, "loss": 1.319, "step": 2318 }, { "epoch": 0.32830749628371203, "grad_norm": 8.462544212056775, "learning_rate": 4.76771701700837e-06, "loss": 1.0713, "step": 2319 }, { "epoch": 0.32844906915834926, "grad_norm": 10.084905286428349, "learning_rate": 4.767475672672108e-06, "loss": 1.206, "step": 2320 }, { "epoch": 0.3285906420329865, "grad_norm": 11.978561324152595, "learning_rate": 4.767234209136341e-06, "loss": 1.2466, "step": 2321 }, { "epoch": 0.3287322149076237, "grad_norm": 8.597287979107314, "learning_rate": 4.7669926264137625e-06, "loss": 1.2023, "step": 2322 }, { "epoch": 0.3288737877822609, "grad_norm": 8.674217993588412, "learning_rate": 4.766750924517071e-06, "loss": 1.1661, "step": 2323 }, { "epoch": 0.3290153606568981, "grad_norm": 9.143998943285409, "learning_rate": 4.766509103458975e-06, "loss": 1.1806, "step": 2324 }, { "epoch": 0.32915693353153536, "grad_norm": 10.163085515233128, "learning_rate": 4.766267163252185e-06, "loss": 1.3502, "step": 2325 }, { "epoch": 0.3292985064061726, "grad_norm": 8.861589642727413, "learning_rate": 4.766025103909419e-06, "loss": 1.1156, "step": 2326 }, { "epoch": 0.3294400792808098, "grad_norm": 9.609291237566843, "learning_rate": 4.765782925443404e-06, "loss": 1.1547, "step": 2327 }, { "epoch": 0.329581652155447, "grad_norm": 6.997554624398123, "learning_rate": 4.76554062786687e-06, "loss": 1.1849, "step": 2328 }, { "epoch": 0.3297232250300842, "grad_norm": 8.452678224387762, "learning_rate": 4.765298211192554e-06, "loss": 1.2054, "step": 2329 }, { "epoch": 0.32986479790472145, "grad_norm": 9.107208465425332, "learning_rate": 4.7650556754332e-06, "loss": 1.0956, "step": 2330 }, { "epoch": 0.3300063707793587, "grad_norm": 7.341836103662821, "learning_rate": 4.7648130206015585e-06, "loss": 1.1306, "step": 2331 }, { "epoch": 0.3301479436539959, "grad_norm": 11.041763439594105, "learning_rate": 4.764570246710385e-06, "loss": 1.2467, "step": 2332 }, { "epoch": 0.3302895165286331, "grad_norm": 9.848688927161035, "learning_rate": 4.764327353772442e-06, "loss": 1.4403, "step": 2333 }, { "epoch": 0.3304310894032703, "grad_norm": 8.47504351669437, "learning_rate": 4.764084341800499e-06, "loss": 1.2506, "step": 2334 }, { "epoch": 0.33057266227790755, "grad_norm": 7.903254367407597, "learning_rate": 4.763841210807329e-06, "loss": 1.0241, "step": 2335 }, { "epoch": 0.3307142351525448, "grad_norm": 9.61636758093513, "learning_rate": 4.763597960805716e-06, "loss": 1.166, "step": 2336 }, { "epoch": 0.330855808027182, "grad_norm": 8.628222761805802, "learning_rate": 4.763354591808446e-06, "loss": 1.3324, "step": 2337 }, { "epoch": 0.33099738090181924, "grad_norm": 9.564484035137413, "learning_rate": 4.763111103828312e-06, "loss": 1.2075, "step": 2338 }, { "epoch": 0.3311389537764564, "grad_norm": 7.860834611838474, "learning_rate": 4.762867496878114e-06, "loss": 1.1242, "step": 2339 }, { "epoch": 0.33128052665109364, "grad_norm": 9.58612294161502, "learning_rate": 4.76262377097066e-06, "loss": 1.4486, "step": 2340 }, { "epoch": 0.3314220995257309, "grad_norm": 10.749858056839052, "learning_rate": 4.762379926118761e-06, "loss": 1.2744, "step": 2341 }, { "epoch": 0.3315636724003681, "grad_norm": 8.288185756383855, "learning_rate": 4.762135962335237e-06, "loss": 1.12, "step": 2342 }, { "epoch": 0.33170524527500533, "grad_norm": 7.1726400927174785, "learning_rate": 4.7618918796329115e-06, "loss": 1.1916, "step": 2343 }, { "epoch": 0.3318468181496425, "grad_norm": 8.225043262495454, "learning_rate": 4.761647678024617e-06, "loss": 1.1791, "step": 2344 }, { "epoch": 0.33198839102427974, "grad_norm": 10.341498645664528, "learning_rate": 4.76140335752319e-06, "loss": 1.127, "step": 2345 }, { "epoch": 0.33212996389891697, "grad_norm": 8.587457301937382, "learning_rate": 4.7611589181414745e-06, "loss": 1.1436, "step": 2346 }, { "epoch": 0.3322715367735542, "grad_norm": 8.990554091167626, "learning_rate": 4.76091435989232e-06, "loss": 1.0828, "step": 2347 }, { "epoch": 0.33241310964819143, "grad_norm": 7.676015571725545, "learning_rate": 4.760669682788584e-06, "loss": 1.2724, "step": 2348 }, { "epoch": 0.3325546825228286, "grad_norm": 7.530251337849331, "learning_rate": 4.760424886843129e-06, "loss": 1.1688, "step": 2349 }, { "epoch": 0.33269625539746583, "grad_norm": 9.697781920734705, "learning_rate": 4.7601799720688235e-06, "loss": 1.2417, "step": 2350 }, { "epoch": 0.33283782827210306, "grad_norm": 9.515635022187867, "learning_rate": 4.759934938478541e-06, "loss": 1.1795, "step": 2351 }, { "epoch": 0.3329794011467403, "grad_norm": 8.368400592646358, "learning_rate": 4.7596897860851644e-06, "loss": 1.1861, "step": 2352 }, { "epoch": 0.3331209740213775, "grad_norm": 7.3912642285167935, "learning_rate": 4.75944451490158e-06, "loss": 1.1418, "step": 2353 }, { "epoch": 0.3332625468960147, "grad_norm": 9.00467856977802, "learning_rate": 4.759199124940683e-06, "loss": 1.1357, "step": 2354 }, { "epoch": 0.33340411977065193, "grad_norm": 8.572019756456358, "learning_rate": 4.7589536162153725e-06, "loss": 1.054, "step": 2355 }, { "epoch": 0.33354569264528916, "grad_norm": 8.72957210160661, "learning_rate": 4.758707988738555e-06, "loss": 1.1768, "step": 2356 }, { "epoch": 0.3336872655199264, "grad_norm": 8.518905707634426, "learning_rate": 4.758462242523141e-06, "loss": 1.214, "step": 2357 }, { "epoch": 0.3338288383945636, "grad_norm": 8.289953875994321, "learning_rate": 4.758216377582052e-06, "loss": 1.0955, "step": 2358 }, { "epoch": 0.3339704112692008, "grad_norm": 6.531834626383753, "learning_rate": 4.757970393928212e-06, "loss": 0.9995, "step": 2359 }, { "epoch": 0.334111984143838, "grad_norm": 10.058456082188966, "learning_rate": 4.757724291574552e-06, "loss": 1.1884, "step": 2360 }, { "epoch": 0.33425355701847526, "grad_norm": 6.162073779116682, "learning_rate": 4.7574780705340094e-06, "loss": 1.1372, "step": 2361 }, { "epoch": 0.3343951298931125, "grad_norm": 8.068325097134279, "learning_rate": 4.757231730819528e-06, "loss": 1.1871, "step": 2362 }, { "epoch": 0.3345367027677497, "grad_norm": 8.809929655489196, "learning_rate": 4.7569852724440565e-06, "loss": 1.2103, "step": 2363 }, { "epoch": 0.3346782756423869, "grad_norm": 8.774969604091764, "learning_rate": 4.7567386954205535e-06, "loss": 1.3678, "step": 2364 }, { "epoch": 0.3348198485170241, "grad_norm": 8.410207051054186, "learning_rate": 4.756491999761979e-06, "loss": 1.1137, "step": 2365 }, { "epoch": 0.33496142139166135, "grad_norm": 9.354305773226757, "learning_rate": 4.756245185481304e-06, "loss": 1.2378, "step": 2366 }, { "epoch": 0.3351029942662986, "grad_norm": 9.370253111393632, "learning_rate": 4.755998252591501e-06, "loss": 1.2158, "step": 2367 }, { "epoch": 0.3352445671409358, "grad_norm": 8.793782152931847, "learning_rate": 4.755751201105552e-06, "loss": 1.2835, "step": 2368 }, { "epoch": 0.33538614001557304, "grad_norm": 9.068071428567032, "learning_rate": 4.755504031036444e-06, "loss": 1.1622, "step": 2369 }, { "epoch": 0.3355277128902102, "grad_norm": 8.930042062897419, "learning_rate": 4.75525674239717e-06, "loss": 1.1962, "step": 2370 }, { "epoch": 0.33566928576484745, "grad_norm": 8.350646636959985, "learning_rate": 4.755009335200732e-06, "loss": 1.2099, "step": 2371 }, { "epoch": 0.3358108586394847, "grad_norm": 9.830323572275967, "learning_rate": 4.754761809460135e-06, "loss": 1.3633, "step": 2372 }, { "epoch": 0.3359524315141219, "grad_norm": 8.505526821498506, "learning_rate": 4.75451416518839e-06, "loss": 1.2075, "step": 2373 }, { "epoch": 0.33609400438875914, "grad_norm": 9.154995881209393, "learning_rate": 4.754266402398517e-06, "loss": 1.238, "step": 2374 }, { "epoch": 0.3362355772633963, "grad_norm": 8.982078510845916, "learning_rate": 4.754018521103539e-06, "loss": 1.1244, "step": 2375 }, { "epoch": 0.33637715013803354, "grad_norm": 10.465897174256462, "learning_rate": 4.75377052131649e-06, "loss": 1.2006, "step": 2376 }, { "epoch": 0.3365187230126708, "grad_norm": 7.6719841415193635, "learning_rate": 4.753522403050403e-06, "loss": 1.1025, "step": 2377 }, { "epoch": 0.336660295887308, "grad_norm": 8.054782219375177, "learning_rate": 4.7532741663183255e-06, "loss": 1.1594, "step": 2378 }, { "epoch": 0.33680186876194523, "grad_norm": 8.86571866276088, "learning_rate": 4.753025811133304e-06, "loss": 1.0668, "step": 2379 }, { "epoch": 0.3369434416365824, "grad_norm": 10.962116550537953, "learning_rate": 4.752777337508395e-06, "loss": 1.1252, "step": 2380 }, { "epoch": 0.33708501451121964, "grad_norm": 9.513077017368193, "learning_rate": 4.752528745456663e-06, "loss": 1.248, "step": 2381 }, { "epoch": 0.33722658738585687, "grad_norm": 9.388127202645205, "learning_rate": 4.752280034991172e-06, "loss": 1.1446, "step": 2382 }, { "epoch": 0.3373681602604941, "grad_norm": 10.80069683793868, "learning_rate": 4.752031206125e-06, "loss": 1.2039, "step": 2383 }, { "epoch": 0.33750973313513133, "grad_norm": 9.321816923323109, "learning_rate": 4.751782258871227e-06, "loss": 1.191, "step": 2384 }, { "epoch": 0.3376513060097685, "grad_norm": 9.637251549671236, "learning_rate": 4.751533193242941e-06, "loss": 1.3989, "step": 2385 }, { "epoch": 0.33779287888440573, "grad_norm": 9.865012997726485, "learning_rate": 4.751284009253232e-06, "loss": 1.1279, "step": 2386 }, { "epoch": 0.33793445175904296, "grad_norm": 10.591629941462953, "learning_rate": 4.7510347069152015e-06, "loss": 1.2469, "step": 2387 }, { "epoch": 0.3380760246336802, "grad_norm": 9.094693793646673, "learning_rate": 4.750785286241955e-06, "loss": 1.2265, "step": 2388 }, { "epoch": 0.3382175975083174, "grad_norm": 9.049147457199956, "learning_rate": 4.750535747246604e-06, "loss": 1.1664, "step": 2389 }, { "epoch": 0.3383591703829546, "grad_norm": 6.245498867465723, "learning_rate": 4.750286089942267e-06, "loss": 1.0357, "step": 2390 }, { "epoch": 0.33850074325759183, "grad_norm": 8.188892508163354, "learning_rate": 4.750036314342069e-06, "loss": 1.1327, "step": 2391 }, { "epoch": 0.33864231613222906, "grad_norm": 11.152963875600477, "learning_rate": 4.7497864204591386e-06, "loss": 1.2156, "step": 2392 }, { "epoch": 0.3387838890068663, "grad_norm": 8.664385226321464, "learning_rate": 4.749536408306614e-06, "loss": 1.2353, "step": 2393 }, { "epoch": 0.3389254618815035, "grad_norm": 9.064255958653439, "learning_rate": 4.749286277897637e-06, "loss": 1.2274, "step": 2394 }, { "epoch": 0.33906703475614075, "grad_norm": 9.419504553852027, "learning_rate": 4.749036029245358e-06, "loss": 1.075, "step": 2395 }, { "epoch": 0.3392086076307779, "grad_norm": 10.039411320544943, "learning_rate": 4.7487856623629325e-06, "loss": 1.1795, "step": 2396 }, { "epoch": 0.33935018050541516, "grad_norm": 8.639919038322612, "learning_rate": 4.748535177263522e-06, "loss": 1.1908, "step": 2397 }, { "epoch": 0.3394917533800524, "grad_norm": 9.77036057055782, "learning_rate": 4.748284573960292e-06, "loss": 1.293, "step": 2398 }, { "epoch": 0.3396333262546896, "grad_norm": 8.886981881406602, "learning_rate": 4.748033852466419e-06, "loss": 1.1691, "step": 2399 }, { "epoch": 0.33977489912932685, "grad_norm": 10.745746569500767, "learning_rate": 4.747783012795083e-06, "loss": 1.2667, "step": 2400 }, { "epoch": 0.339916472003964, "grad_norm": 11.286509632415912, "learning_rate": 4.747532054959469e-06, "loss": 1.169, "step": 2401 }, { "epoch": 0.34005804487860125, "grad_norm": 10.029558746276903, "learning_rate": 4.747280978972772e-06, "loss": 1.2535, "step": 2402 }, { "epoch": 0.3401996177532385, "grad_norm": 9.61035310062799, "learning_rate": 4.747029784848189e-06, "loss": 1.228, "step": 2403 }, { "epoch": 0.3403411906278757, "grad_norm": 10.208815292087948, "learning_rate": 4.746778472598927e-06, "loss": 1.0237, "step": 2404 }, { "epoch": 0.34048276350251294, "grad_norm": 10.965551708864682, "learning_rate": 4.746527042238194e-06, "loss": 1.0711, "step": 2405 }, { "epoch": 0.3406243363771501, "grad_norm": 12.538983401230265, "learning_rate": 4.74627549377921e-06, "loss": 1.2784, "step": 2406 }, { "epoch": 0.34076590925178735, "grad_norm": 9.275721897708706, "learning_rate": 4.746023827235198e-06, "loss": 1.0591, "step": 2407 }, { "epoch": 0.3409074821264246, "grad_norm": 7.5387811637921205, "learning_rate": 4.745772042619389e-06, "loss": 1.1489, "step": 2408 }, { "epoch": 0.3410490550010618, "grad_norm": 10.537648235468513, "learning_rate": 4.745520139945018e-06, "loss": 1.1854, "step": 2409 }, { "epoch": 0.34119062787569904, "grad_norm": 8.553467824725436, "learning_rate": 4.745268119225327e-06, "loss": 1.1702, "step": 2410 }, { "epoch": 0.3413322007503362, "grad_norm": 9.736564573671025, "learning_rate": 4.745015980473565e-06, "loss": 1.3256, "step": 2411 }, { "epoch": 0.34147377362497344, "grad_norm": 8.481343157937621, "learning_rate": 4.744763723702988e-06, "loss": 1.3118, "step": 2412 }, { "epoch": 0.3416153464996107, "grad_norm": 9.001057032924574, "learning_rate": 4.744511348926855e-06, "loss": 1.1869, "step": 2413 }, { "epoch": 0.3417569193742479, "grad_norm": 11.226843844334796, "learning_rate": 4.7442588561584336e-06, "loss": 1.0548, "step": 2414 }, { "epoch": 0.34189849224888513, "grad_norm": 10.448469234216986, "learning_rate": 4.744006245410998e-06, "loss": 1.2012, "step": 2415 }, { "epoch": 0.3420400651235223, "grad_norm": 8.264026624313555, "learning_rate": 4.743753516697827e-06, "loss": 1.1235, "step": 2416 }, { "epoch": 0.34218163799815954, "grad_norm": 10.55902475824424, "learning_rate": 4.743500670032207e-06, "loss": 1.1806, "step": 2417 }, { "epoch": 0.34232321087279677, "grad_norm": 10.464510204300915, "learning_rate": 4.743247705427429e-06, "loss": 1.2101, "step": 2418 }, { "epoch": 0.342464783747434, "grad_norm": 11.623613828646407, "learning_rate": 4.742994622896793e-06, "loss": 1.2533, "step": 2419 }, { "epoch": 0.34260635662207123, "grad_norm": 8.475380641975155, "learning_rate": 4.7427414224536014e-06, "loss": 1.0999, "step": 2420 }, { "epoch": 0.3427479294967084, "grad_norm": 9.851824642750392, "learning_rate": 4.742488104111165e-06, "loss": 1.3558, "step": 2421 }, { "epoch": 0.34288950237134563, "grad_norm": 7.794967632116389, "learning_rate": 4.742234667882802e-06, "loss": 1.1481, "step": 2422 }, { "epoch": 0.34303107524598286, "grad_norm": 9.032019136933966, "learning_rate": 4.7419811137818335e-06, "loss": 1.2972, "step": 2423 }, { "epoch": 0.3431726481206201, "grad_norm": 8.686282285245216, "learning_rate": 4.7417274418215895e-06, "loss": 1.2328, "step": 2424 }, { "epoch": 0.3433142209952573, "grad_norm": 11.273760985916729, "learning_rate": 4.741473652015407e-06, "loss": 1.2094, "step": 2425 }, { "epoch": 0.34345579386989455, "grad_norm": 10.616160945934292, "learning_rate": 4.741219744376624e-06, "loss": 1.2221, "step": 2426 }, { "epoch": 0.34359736674453173, "grad_norm": 10.777661951060985, "learning_rate": 4.740965718918591e-06, "loss": 1.2621, "step": 2427 }, { "epoch": 0.34373893961916896, "grad_norm": 10.984860839517914, "learning_rate": 4.74071157565466e-06, "loss": 1.2751, "step": 2428 }, { "epoch": 0.3438805124938062, "grad_norm": 9.080816871096273, "learning_rate": 4.740457314598194e-06, "loss": 1.2224, "step": 2429 }, { "epoch": 0.3440220853684434, "grad_norm": 8.877499510481188, "learning_rate": 4.740202935762557e-06, "loss": 1.2221, "step": 2430 }, { "epoch": 0.34416365824308065, "grad_norm": 8.520270468130555, "learning_rate": 4.739948439161122e-06, "loss": 1.2345, "step": 2431 }, { "epoch": 0.3443052311177178, "grad_norm": 8.29968558945392, "learning_rate": 4.7396938248072675e-06, "loss": 1.0873, "step": 2432 }, { "epoch": 0.34444680399235506, "grad_norm": 8.67381515106832, "learning_rate": 4.739439092714379e-06, "loss": 1.0455, "step": 2433 }, { "epoch": 0.3445883768669923, "grad_norm": 8.732373603716391, "learning_rate": 4.7391842428958454e-06, "loss": 1.0352, "step": 2434 }, { "epoch": 0.3447299497416295, "grad_norm": 7.846417099001551, "learning_rate": 4.738929275365068e-06, "loss": 1.2723, "step": 2435 }, { "epoch": 0.34487152261626675, "grad_norm": 8.00511244495339, "learning_rate": 4.738674190135447e-06, "loss": 1.1621, "step": 2436 }, { "epoch": 0.3450130954909039, "grad_norm": 7.86669774884479, "learning_rate": 4.7384189872203935e-06, "loss": 1.0868, "step": 2437 }, { "epoch": 0.34515466836554115, "grad_norm": 8.926798355616416, "learning_rate": 4.738163666633322e-06, "loss": 1.1953, "step": 2438 }, { "epoch": 0.3452962412401784, "grad_norm": 9.438160045860078, "learning_rate": 4.737908228387656e-06, "loss": 1.2562, "step": 2439 }, { "epoch": 0.3454378141148156, "grad_norm": 10.612859993480123, "learning_rate": 4.737652672496823e-06, "loss": 1.2477, "step": 2440 }, { "epoch": 0.34557938698945284, "grad_norm": 9.228601396757494, "learning_rate": 4.737396998974257e-06, "loss": 1.0949, "step": 2441 }, { "epoch": 0.34572095986409, "grad_norm": 9.208759234721386, "learning_rate": 4.7371412078334e-06, "loss": 1.2766, "step": 2442 }, { "epoch": 0.34586253273872725, "grad_norm": 9.602988302522236, "learning_rate": 4.736885299087698e-06, "loss": 1.2124, "step": 2443 }, { "epoch": 0.3460041056133645, "grad_norm": 9.593613216655196, "learning_rate": 4.7366292727506025e-06, "loss": 1.372, "step": 2444 }, { "epoch": 0.3461456784880017, "grad_norm": 8.021572115343735, "learning_rate": 4.736373128835574e-06, "loss": 1.1855, "step": 2445 }, { "epoch": 0.34628725136263894, "grad_norm": 11.759784277150546, "learning_rate": 4.736116867356079e-06, "loss": 1.2328, "step": 2446 }, { "epoch": 0.3464288242372761, "grad_norm": 10.179044432393477, "learning_rate": 4.735860488325586e-06, "loss": 1.2361, "step": 2447 }, { "epoch": 0.34657039711191334, "grad_norm": 12.302917779481088, "learning_rate": 4.735603991757576e-06, "loss": 1.3545, "step": 2448 }, { "epoch": 0.34671196998655057, "grad_norm": 9.014403367867075, "learning_rate": 4.735347377665529e-06, "loss": 1.0983, "step": 2449 }, { "epoch": 0.3468535428611878, "grad_norm": 9.231673360219434, "learning_rate": 4.735090646062939e-06, "loss": 1.2304, "step": 2450 }, { "epoch": 0.34699511573582503, "grad_norm": 11.105926622310122, "learning_rate": 4.7348337969632985e-06, "loss": 1.1967, "step": 2451 }, { "epoch": 0.34713668861046226, "grad_norm": 12.217511750916758, "learning_rate": 4.734576830380113e-06, "loss": 1.1681, "step": 2452 }, { "epoch": 0.34727826148509944, "grad_norm": 9.399023581580662, "learning_rate": 4.7343197463268895e-06, "loss": 1.1498, "step": 2453 }, { "epoch": 0.34741983435973667, "grad_norm": 7.794883703082431, "learning_rate": 4.734062544817143e-06, "loss": 1.0833, "step": 2454 }, { "epoch": 0.3475614072343739, "grad_norm": 8.605971866676043, "learning_rate": 4.733805225864393e-06, "loss": 1.2952, "step": 2455 }, { "epoch": 0.34770298010901113, "grad_norm": 12.991122222186846, "learning_rate": 4.733547789482169e-06, "loss": 1.2397, "step": 2456 }, { "epoch": 0.34784455298364836, "grad_norm": 10.363163090049149, "learning_rate": 4.733290235684002e-06, "loss": 1.0658, "step": 2457 }, { "epoch": 0.34798612585828553, "grad_norm": 10.494758751046914, "learning_rate": 4.733032564483434e-06, "loss": 1.3506, "step": 2458 }, { "epoch": 0.34812769873292276, "grad_norm": 8.66467668232793, "learning_rate": 4.732774775894009e-06, "loss": 1.1266, "step": 2459 }, { "epoch": 0.34826927160756, "grad_norm": 8.660746924629565, "learning_rate": 4.732516869929278e-06, "loss": 1.3908, "step": 2460 }, { "epoch": 0.3484108444821972, "grad_norm": 8.709361719861153, "learning_rate": 4.732258846602801e-06, "loss": 1.2655, "step": 2461 }, { "epoch": 0.34855241735683445, "grad_norm": 9.640833921966372, "learning_rate": 4.73200070592814e-06, "loss": 1.096, "step": 2462 }, { "epoch": 0.34869399023147163, "grad_norm": 10.491458143162301, "learning_rate": 4.731742447918866e-06, "loss": 1.204, "step": 2463 }, { "epoch": 0.34883556310610886, "grad_norm": 8.48904251217047, "learning_rate": 4.731484072588556e-06, "loss": 1.1732, "step": 2464 }, { "epoch": 0.3489771359807461, "grad_norm": 10.24325786383605, "learning_rate": 4.731225579950791e-06, "loss": 1.2098, "step": 2465 }, { "epoch": 0.3491187088553833, "grad_norm": 9.928750847100005, "learning_rate": 4.730966970019163e-06, "loss": 1.1685, "step": 2466 }, { "epoch": 0.34926028173002055, "grad_norm": 9.148296977331311, "learning_rate": 4.730708242807263e-06, "loss": 1.2615, "step": 2467 }, { "epoch": 0.3494018546046577, "grad_norm": 8.63002067059531, "learning_rate": 4.730449398328695e-06, "loss": 1.2591, "step": 2468 }, { "epoch": 0.34954342747929495, "grad_norm": 9.521303083209974, "learning_rate": 4.7301904365970656e-06, "loss": 1.2874, "step": 2469 }, { "epoch": 0.3496850003539322, "grad_norm": 8.62001048076373, "learning_rate": 4.7299313576259865e-06, "loss": 1.0308, "step": 2470 }, { "epoch": 0.3498265732285694, "grad_norm": 8.734723515799509, "learning_rate": 4.72967216142908e-06, "loss": 1.1842, "step": 2471 }, { "epoch": 0.34996814610320665, "grad_norm": 9.462010367662566, "learning_rate": 4.729412848019969e-06, "loss": 1.2859, "step": 2472 }, { "epoch": 0.3501097189778438, "grad_norm": 8.960812342467287, "learning_rate": 4.729153417412288e-06, "loss": 1.1513, "step": 2473 }, { "epoch": 0.35025129185248105, "grad_norm": 9.91294620082098, "learning_rate": 4.7288938696196735e-06, "loss": 1.1082, "step": 2474 }, { "epoch": 0.3503928647271183, "grad_norm": 9.219889528353312, "learning_rate": 4.728634204655771e-06, "loss": 1.4026, "step": 2475 }, { "epoch": 0.3505344376017555, "grad_norm": 10.279356066058442, "learning_rate": 4.728374422534229e-06, "loss": 1.2614, "step": 2476 }, { "epoch": 0.35067601047639274, "grad_norm": 9.56138494634983, "learning_rate": 4.728114523268705e-06, "loss": 1.1279, "step": 2477 }, { "epoch": 0.3508175833510299, "grad_norm": 9.043871445225957, "learning_rate": 4.727854506872863e-06, "loss": 1.2071, "step": 2478 }, { "epoch": 0.35095915622566715, "grad_norm": 8.992622318575977, "learning_rate": 4.72759437336037e-06, "loss": 1.3444, "step": 2479 }, { "epoch": 0.3511007291003044, "grad_norm": 10.144158216572313, "learning_rate": 4.727334122744902e-06, "loss": 1.3366, "step": 2480 }, { "epoch": 0.3512423019749416, "grad_norm": 8.475191151311547, "learning_rate": 4.72707375504014e-06, "loss": 1.2134, "step": 2481 }, { "epoch": 0.35138387484957884, "grad_norm": 8.391342388151928, "learning_rate": 4.726813270259772e-06, "loss": 1.1012, "step": 2482 }, { "epoch": 0.35152544772421607, "grad_norm": 9.083771674635711, "learning_rate": 4.7265526684174894e-06, "loss": 1.1402, "step": 2483 }, { "epoch": 0.35166702059885324, "grad_norm": 10.312637235710907, "learning_rate": 4.7262919495269946e-06, "loss": 1.0835, "step": 2484 }, { "epoch": 0.35180859347349047, "grad_norm": 8.379197620225284, "learning_rate": 4.726031113601991e-06, "loss": 1.3028, "step": 2485 }, { "epoch": 0.3519501663481277, "grad_norm": 11.567999114610314, "learning_rate": 4.725770160656191e-06, "loss": 1.0691, "step": 2486 }, { "epoch": 0.35209173922276493, "grad_norm": 9.172187760851415, "learning_rate": 4.725509090703314e-06, "loss": 1.2845, "step": 2487 }, { "epoch": 0.35223331209740216, "grad_norm": 6.993756370780163, "learning_rate": 4.725247903757084e-06, "loss": 1.0183, "step": 2488 }, { "epoch": 0.35237488497203934, "grad_norm": 8.249560720135062, "learning_rate": 4.7249865998312306e-06, "loss": 1.2169, "step": 2489 }, { "epoch": 0.35251645784667657, "grad_norm": 8.95424252337854, "learning_rate": 4.72472517893949e-06, "loss": 1.2643, "step": 2490 }, { "epoch": 0.3526580307213138, "grad_norm": 8.207868792044733, "learning_rate": 4.724463641095606e-06, "loss": 1.0372, "step": 2491 }, { "epoch": 0.35279960359595103, "grad_norm": 8.569788594259077, "learning_rate": 4.7242019863133275e-06, "loss": 1.1759, "step": 2492 }, { "epoch": 0.35294117647058826, "grad_norm": 6.97002012712372, "learning_rate": 4.723940214606408e-06, "loss": 1.1354, "step": 2493 }, { "epoch": 0.35308274934522543, "grad_norm": 8.772184749579875, "learning_rate": 4.723678325988611e-06, "loss": 1.2691, "step": 2494 }, { "epoch": 0.35322432221986266, "grad_norm": 8.426288304747622, "learning_rate": 4.723416320473702e-06, "loss": 1.1693, "step": 2495 }, { "epoch": 0.3533658950944999, "grad_norm": 10.087409422513883, "learning_rate": 4.723154198075454e-06, "loss": 1.2994, "step": 2496 }, { "epoch": 0.3535074679691371, "grad_norm": 8.646384887260822, "learning_rate": 4.7228919588076484e-06, "loss": 1.0768, "step": 2497 }, { "epoch": 0.35364904084377435, "grad_norm": 7.820520297251748, "learning_rate": 4.722629602684069e-06, "loss": 1.1548, "step": 2498 }, { "epoch": 0.35379061371841153, "grad_norm": 9.703012954138709, "learning_rate": 4.72236712971851e-06, "loss": 1.1716, "step": 2499 }, { "epoch": 0.35393218659304876, "grad_norm": 13.006042836641539, "learning_rate": 4.7221045399247666e-06, "loss": 1.4149, "step": 2500 }, { "epoch": 0.354073759467686, "grad_norm": 8.78044673191585, "learning_rate": 4.721841833316645e-06, "loss": 1.3073, "step": 2501 }, { "epoch": 0.3542153323423232, "grad_norm": 7.814420174183841, "learning_rate": 4.721579009907955e-06, "loss": 1.2198, "step": 2502 }, { "epoch": 0.35435690521696045, "grad_norm": 7.948504171659881, "learning_rate": 4.721316069712514e-06, "loss": 1.1155, "step": 2503 }, { "epoch": 0.3544984780915976, "grad_norm": 10.612191771579392, "learning_rate": 4.721053012744142e-06, "loss": 1.1693, "step": 2504 }, { "epoch": 0.35464005096623485, "grad_norm": 7.522984951585847, "learning_rate": 4.7207898390166695e-06, "loss": 1.1256, "step": 2505 }, { "epoch": 0.3547816238408721, "grad_norm": 8.790160521689597, "learning_rate": 4.720526548543931e-06, "loss": 1.2633, "step": 2506 }, { "epoch": 0.3549231967155093, "grad_norm": 8.489997363769698, "learning_rate": 4.720263141339768e-06, "loss": 1.0564, "step": 2507 }, { "epoch": 0.35506476959014655, "grad_norm": 10.511586653759373, "learning_rate": 4.719999617418027e-06, "loss": 1.2305, "step": 2508 }, { "epoch": 0.3552063424647837, "grad_norm": 7.952327545896031, "learning_rate": 4.719735976792562e-06, "loss": 1.1609, "step": 2509 }, { "epoch": 0.35534791533942095, "grad_norm": 7.752061877229094, "learning_rate": 4.71947221947723e-06, "loss": 1.1665, "step": 2510 }, { "epoch": 0.3554894882140582, "grad_norm": 7.598542635874921, "learning_rate": 4.7192083454859e-06, "loss": 1.1571, "step": 2511 }, { "epoch": 0.3556310610886954, "grad_norm": 7.55508967752206, "learning_rate": 4.7189443548324415e-06, "loss": 1.2714, "step": 2512 }, { "epoch": 0.35577263396333264, "grad_norm": 9.565022453764321, "learning_rate": 4.7186802475307325e-06, "loss": 1.2991, "step": 2513 }, { "epoch": 0.35591420683796987, "grad_norm": 10.061643196732797, "learning_rate": 4.7184160235946576e-06, "loss": 1.2083, "step": 2514 }, { "epoch": 0.35605577971260705, "grad_norm": 7.330608670930957, "learning_rate": 4.7181516830381065e-06, "loss": 1.0756, "step": 2515 }, { "epoch": 0.3561973525872443, "grad_norm": 8.731450500589137, "learning_rate": 4.717887225874976e-06, "loss": 1.0937, "step": 2516 }, { "epoch": 0.3563389254618815, "grad_norm": 9.58363111834954, "learning_rate": 4.717622652119166e-06, "loss": 1.1234, "step": 2517 }, { "epoch": 0.35648049833651874, "grad_norm": 9.680097348062608, "learning_rate": 4.717357961784587e-06, "loss": 1.1599, "step": 2518 }, { "epoch": 0.35662207121115597, "grad_norm": 8.987089858160557, "learning_rate": 4.717093154885154e-06, "loss": 1.3428, "step": 2519 }, { "epoch": 0.35676364408579314, "grad_norm": 9.873212435246387, "learning_rate": 4.716828231434787e-06, "loss": 1.1876, "step": 2520 }, { "epoch": 0.35690521696043037, "grad_norm": 10.027196903141725, "learning_rate": 4.716563191447413e-06, "loss": 1.2068, "step": 2521 }, { "epoch": 0.3570467898350676, "grad_norm": 8.304999847136513, "learning_rate": 4.7162980349369645e-06, "loss": 1.1069, "step": 2522 }, { "epoch": 0.35718836270970483, "grad_norm": 8.791742213016526, "learning_rate": 4.716032761917381e-06, "loss": 1.1003, "step": 2523 }, { "epoch": 0.35732993558434206, "grad_norm": 9.269492977560054, "learning_rate": 4.715767372402608e-06, "loss": 1.2081, "step": 2524 }, { "epoch": 0.35747150845897924, "grad_norm": 6.895599023193842, "learning_rate": 4.715501866406595e-06, "loss": 1.1351, "step": 2525 }, { "epoch": 0.35761308133361647, "grad_norm": 8.491577912660128, "learning_rate": 4.715236243943302e-06, "loss": 1.1151, "step": 2526 }, { "epoch": 0.3577546542082537, "grad_norm": 9.31101332708992, "learning_rate": 4.714970505026691e-06, "loss": 1.1274, "step": 2527 }, { "epoch": 0.3578962270828909, "grad_norm": 7.671606241712413, "learning_rate": 4.714704649670732e-06, "loss": 1.2501, "step": 2528 }, { "epoch": 0.35803779995752816, "grad_norm": 8.195003885544047, "learning_rate": 4.7144386778894e-06, "loss": 1.1105, "step": 2529 }, { "epoch": 0.35817937283216533, "grad_norm": 7.641832127611909, "learning_rate": 4.71417258969668e-06, "loss": 1.0465, "step": 2530 }, { "epoch": 0.35832094570680256, "grad_norm": 9.308189674664524, "learning_rate": 4.713906385106556e-06, "loss": 1.4224, "step": 2531 }, { "epoch": 0.3584625185814398, "grad_norm": 8.038753106898598, "learning_rate": 4.7136400641330245e-06, "loss": 1.1246, "step": 2532 }, { "epoch": 0.358604091456077, "grad_norm": 9.081663298997997, "learning_rate": 4.713373626790086e-06, "loss": 1.0628, "step": 2533 }, { "epoch": 0.35874566433071425, "grad_norm": 8.347476198631462, "learning_rate": 4.713107073091746e-06, "loss": 1.251, "step": 2534 }, { "epoch": 0.35888723720535143, "grad_norm": 11.35221979557784, "learning_rate": 4.712840403052018e-06, "loss": 1.3901, "step": 2535 }, { "epoch": 0.35902881007998866, "grad_norm": 7.3162021231701955, "learning_rate": 4.712573616684919e-06, "loss": 1.133, "step": 2536 }, { "epoch": 0.3591703829546259, "grad_norm": 10.223495454782814, "learning_rate": 4.712306714004475e-06, "loss": 1.314, "step": 2537 }, { "epoch": 0.3593119558292631, "grad_norm": 8.955924297195116, "learning_rate": 4.712039695024717e-06, "loss": 1.2397, "step": 2538 }, { "epoch": 0.35945352870390035, "grad_norm": 9.173706911523562, "learning_rate": 4.7117725597596814e-06, "loss": 1.0875, "step": 2539 }, { "epoch": 0.3595951015785376, "grad_norm": 10.430071168621982, "learning_rate": 4.711505308223412e-06, "loss": 1.1592, "step": 2540 }, { "epoch": 0.35973667445317475, "grad_norm": 11.380517313215874, "learning_rate": 4.711237940429956e-06, "loss": 1.2858, "step": 2541 }, { "epoch": 0.359878247327812, "grad_norm": 8.064750416459592, "learning_rate": 4.710970456393371e-06, "loss": 1.2885, "step": 2542 }, { "epoch": 0.3600198202024492, "grad_norm": 8.137485711572998, "learning_rate": 4.710702856127718e-06, "loss": 1.3127, "step": 2543 }, { "epoch": 0.36016139307708644, "grad_norm": 7.104841123840809, "learning_rate": 4.710435139647064e-06, "loss": 1.0098, "step": 2544 }, { "epoch": 0.3603029659517237, "grad_norm": 11.379735610652796, "learning_rate": 4.710167306965483e-06, "loss": 1.2314, "step": 2545 }, { "epoch": 0.36044453882636085, "grad_norm": 8.273910091111746, "learning_rate": 4.709899358097055e-06, "loss": 1.056, "step": 2546 }, { "epoch": 0.3605861117009981, "grad_norm": 8.923609469785099, "learning_rate": 4.709631293055865e-06, "loss": 1.0893, "step": 2547 }, { "epoch": 0.3607276845756353, "grad_norm": 8.239265162469312, "learning_rate": 4.7093631118560054e-06, "loss": 1.2516, "step": 2548 }, { "epoch": 0.36086925745027254, "grad_norm": 8.260544021092924, "learning_rate": 4.709094814511574e-06, "loss": 1.3621, "step": 2549 }, { "epoch": 0.36101083032490977, "grad_norm": 7.824218506225034, "learning_rate": 4.708826401036677e-06, "loss": 1.0098, "step": 2550 }, { "epoch": 0.36115240319954695, "grad_norm": 8.320421617535658, "learning_rate": 4.708557871445422e-06, "loss": 1.2414, "step": 2551 }, { "epoch": 0.3612939760741842, "grad_norm": 8.35265181229071, "learning_rate": 4.708289225751926e-06, "loss": 1.172, "step": 2552 }, { "epoch": 0.3614355489488214, "grad_norm": 9.962884690193782, "learning_rate": 4.7080204639703125e-06, "loss": 1.4082, "step": 2553 }, { "epoch": 0.36157712182345864, "grad_norm": 9.001749080663739, "learning_rate": 4.707751586114709e-06, "loss": 1.3347, "step": 2554 }, { "epoch": 0.36171869469809587, "grad_norm": 9.2452735166387, "learning_rate": 4.7074825921992516e-06, "loss": 1.1559, "step": 2555 }, { "epoch": 0.36186026757273304, "grad_norm": 7.969395510390069, "learning_rate": 4.70721348223808e-06, "loss": 1.1643, "step": 2556 }, { "epoch": 0.36200184044737027, "grad_norm": 8.187643035338137, "learning_rate": 4.706944256245342e-06, "loss": 1.2071, "step": 2557 }, { "epoch": 0.3621434133220075, "grad_norm": 8.33014933476304, "learning_rate": 4.706674914235189e-06, "loss": 1.0884, "step": 2558 }, { "epoch": 0.36228498619664473, "grad_norm": 11.130626155928825, "learning_rate": 4.706405456221782e-06, "loss": 1.3068, "step": 2559 }, { "epoch": 0.36242655907128196, "grad_norm": 13.22437673444947, "learning_rate": 4.706135882219285e-06, "loss": 1.1364, "step": 2560 }, { "epoch": 0.36256813194591914, "grad_norm": 9.130458505572031, "learning_rate": 4.705866192241869e-06, "loss": 1.0782, "step": 2561 }, { "epoch": 0.36270970482055637, "grad_norm": 7.734050952018481, "learning_rate": 4.705596386303713e-06, "loss": 1.1432, "step": 2562 }, { "epoch": 0.3628512776951936, "grad_norm": 11.588717373433875, "learning_rate": 4.705326464418999e-06, "loss": 1.1932, "step": 2563 }, { "epoch": 0.3629928505698308, "grad_norm": 12.022248306606512, "learning_rate": 4.705056426601917e-06, "loss": 1.35, "step": 2564 }, { "epoch": 0.36313442344446806, "grad_norm": 8.615359460877807, "learning_rate": 4.704786272866663e-06, "loss": 1.1939, "step": 2565 }, { "epoch": 0.36327599631910523, "grad_norm": 8.790109312660203, "learning_rate": 4.704516003227439e-06, "loss": 1.1603, "step": 2566 }, { "epoch": 0.36341756919374246, "grad_norm": 9.731969349365635, "learning_rate": 4.704245617698452e-06, "loss": 1.2445, "step": 2567 }, { "epoch": 0.3635591420683797, "grad_norm": 11.42124776513912, "learning_rate": 4.703975116293916e-06, "loss": 1.3862, "step": 2568 }, { "epoch": 0.3637007149430169, "grad_norm": 8.433404225087902, "learning_rate": 4.703704499028052e-06, "loss": 1.222, "step": 2569 }, { "epoch": 0.36384228781765415, "grad_norm": 10.172216266116664, "learning_rate": 4.703433765915086e-06, "loss": 1.2706, "step": 2570 }, { "epoch": 0.3639838606922914, "grad_norm": 8.682153607243173, "learning_rate": 4.7031629169692495e-06, "loss": 1.1801, "step": 2571 }, { "epoch": 0.36412543356692856, "grad_norm": 8.347317622255451, "learning_rate": 4.702891952204781e-06, "loss": 1.1394, "step": 2572 }, { "epoch": 0.3642670064415658, "grad_norm": 9.668761804594443, "learning_rate": 4.702620871635926e-06, "loss": 1.198, "step": 2573 }, { "epoch": 0.364408579316203, "grad_norm": 10.730471238509905, "learning_rate": 4.702349675276933e-06, "loss": 1.1122, "step": 2574 }, { "epoch": 0.36455015219084025, "grad_norm": 7.5464425604786465, "learning_rate": 4.702078363142061e-06, "loss": 1.1964, "step": 2575 }, { "epoch": 0.3646917250654775, "grad_norm": 9.259902190315984, "learning_rate": 4.70180693524557e-06, "loss": 1.2618, "step": 2576 }, { "epoch": 0.36483329794011465, "grad_norm": 9.70662568317746, "learning_rate": 4.7015353916017305e-06, "loss": 1.1999, "step": 2577 }, { "epoch": 0.3649748708147519, "grad_norm": 7.695529868836727, "learning_rate": 4.701263732224817e-06, "loss": 1.1855, "step": 2578 }, { "epoch": 0.3651164436893891, "grad_norm": 11.667018630758621, "learning_rate": 4.700991957129111e-06, "loss": 1.2137, "step": 2579 }, { "epoch": 0.36525801656402634, "grad_norm": 8.869889628208547, "learning_rate": 4.700720066328899e-06, "loss": 1.2016, "step": 2580 }, { "epoch": 0.3653995894386636, "grad_norm": 9.507458619903622, "learning_rate": 4.7004480598384736e-06, "loss": 1.1849, "step": 2581 }, { "epoch": 0.36554116231330075, "grad_norm": 9.960161581299067, "learning_rate": 4.700175937672134e-06, "loss": 1.1552, "step": 2582 }, { "epoch": 0.365682735187938, "grad_norm": 9.55308038852746, "learning_rate": 4.699903699844186e-06, "loss": 1.1318, "step": 2583 }, { "epoch": 0.3658243080625752, "grad_norm": 8.488827711931227, "learning_rate": 4.699631346368941e-06, "loss": 1.1669, "step": 2584 }, { "epoch": 0.36596588093721244, "grad_norm": 8.340308004496194, "learning_rate": 4.699358877260717e-06, "loss": 1.0719, "step": 2585 }, { "epoch": 0.36610745381184967, "grad_norm": 7.027950614777398, "learning_rate": 4.699086292533836e-06, "loss": 1.1611, "step": 2586 }, { "epoch": 0.36624902668648684, "grad_norm": 8.657914755588616, "learning_rate": 4.698813592202628e-06, "loss": 1.1568, "step": 2587 }, { "epoch": 0.3663905995611241, "grad_norm": 9.8578787029448, "learning_rate": 4.69854077628143e-06, "loss": 1.3429, "step": 2588 }, { "epoch": 0.3665321724357613, "grad_norm": 8.3316017895064, "learning_rate": 4.698267844784582e-06, "loss": 1.2758, "step": 2589 }, { "epoch": 0.36667374531039854, "grad_norm": 9.86674134853383, "learning_rate": 4.697994797726433e-06, "loss": 1.0627, "step": 2590 }, { "epoch": 0.36681531818503577, "grad_norm": 7.205148276207014, "learning_rate": 4.6977216351213355e-06, "loss": 1.1755, "step": 2591 }, { "epoch": 0.36695689105967294, "grad_norm": 7.704521625182214, "learning_rate": 4.697448356983651e-06, "loss": 1.2141, "step": 2592 }, { "epoch": 0.36709846393431017, "grad_norm": 10.40754503777807, "learning_rate": 4.697174963327744e-06, "loss": 1.303, "step": 2593 }, { "epoch": 0.3672400368089474, "grad_norm": 8.88597610221086, "learning_rate": 4.696901454167989e-06, "loss": 1.164, "step": 2594 }, { "epoch": 0.36738160968358463, "grad_norm": 8.893536618877137, "learning_rate": 4.696627829518761e-06, "loss": 1.1188, "step": 2595 }, { "epoch": 0.36752318255822186, "grad_norm": 8.796460861837996, "learning_rate": 4.696354089394447e-06, "loss": 1.3198, "step": 2596 }, { "epoch": 0.3676647554328591, "grad_norm": 8.81553565152125, "learning_rate": 4.696080233809436e-06, "loss": 1.2374, "step": 2597 }, { "epoch": 0.36780632830749627, "grad_norm": 10.709077524729386, "learning_rate": 4.695806262778124e-06, "loss": 1.1936, "step": 2598 }, { "epoch": 0.3679479011821335, "grad_norm": 9.028891918847327, "learning_rate": 4.695532176314914e-06, "loss": 1.2072, "step": 2599 }, { "epoch": 0.3680894740567707, "grad_norm": 8.057145580766676, "learning_rate": 4.695257974434215e-06, "loss": 1.1326, "step": 2600 }, { "epoch": 0.36823104693140796, "grad_norm": 8.607044049811215, "learning_rate": 4.694983657150442e-06, "loss": 0.9953, "step": 2601 }, { "epoch": 0.3683726198060452, "grad_norm": 10.293924143278797, "learning_rate": 4.6947092244780134e-06, "loss": 1.339, "step": 2602 }, { "epoch": 0.36851419268068236, "grad_norm": 8.980510802832162, "learning_rate": 4.694434676431358e-06, "loss": 1.2107, "step": 2603 }, { "epoch": 0.3686557655553196, "grad_norm": 9.531608599811472, "learning_rate": 4.694160013024907e-06, "loss": 1.0385, "step": 2604 }, { "epoch": 0.3687973384299568, "grad_norm": 8.337861572844092, "learning_rate": 4.693885234273101e-06, "loss": 1.1071, "step": 2605 }, { "epoch": 0.36893891130459405, "grad_norm": 10.493461026000869, "learning_rate": 4.693610340190384e-06, "loss": 1.412, "step": 2606 }, { "epoch": 0.3690804841792313, "grad_norm": 10.629622945907723, "learning_rate": 4.693335330791207e-06, "loss": 1.1915, "step": 2607 }, { "epoch": 0.36922205705386846, "grad_norm": 11.289922112600856, "learning_rate": 4.693060206090028e-06, "loss": 1.3554, "step": 2608 }, { "epoch": 0.3693636299285057, "grad_norm": 7.615564730988438, "learning_rate": 4.692784966101308e-06, "loss": 1.0888, "step": 2609 }, { "epoch": 0.3695052028031429, "grad_norm": 13.252531565535236, "learning_rate": 4.6925096108395175e-06, "loss": 1.1663, "step": 2610 }, { "epoch": 0.36964677567778015, "grad_norm": 11.085495663763895, "learning_rate": 4.692234140319131e-06, "loss": 1.3691, "step": 2611 }, { "epoch": 0.3697883485524174, "grad_norm": 7.921847552189876, "learning_rate": 4.691958554554631e-06, "loss": 1.1994, "step": 2612 }, { "epoch": 0.36992992142705455, "grad_norm": 9.605153020049926, "learning_rate": 4.6916828535605044e-06, "loss": 1.1187, "step": 2613 }, { "epoch": 0.3700714943016918, "grad_norm": 9.67541537711568, "learning_rate": 4.691407037351244e-06, "loss": 1.1814, "step": 2614 }, { "epoch": 0.370213067176329, "grad_norm": 8.03170549459176, "learning_rate": 4.69113110594135e-06, "loss": 1.3074, "step": 2615 }, { "epoch": 0.37035464005096624, "grad_norm": 9.660556659968863, "learning_rate": 4.690855059345327e-06, "loss": 1.27, "step": 2616 }, { "epoch": 0.3704962129256035, "grad_norm": 8.659854950780336, "learning_rate": 4.690578897577687e-06, "loss": 1.1069, "step": 2617 }, { "epoch": 0.37063778580024065, "grad_norm": 11.727773218853251, "learning_rate": 4.690302620652949e-06, "loss": 1.2598, "step": 2618 }, { "epoch": 0.3707793586748779, "grad_norm": 14.62524883352016, "learning_rate": 4.690026228585634e-06, "loss": 1.1354, "step": 2619 }, { "epoch": 0.3709209315495151, "grad_norm": 10.881634672443807, "learning_rate": 4.689749721390273e-06, "loss": 1.2034, "step": 2620 }, { "epoch": 0.37106250442415234, "grad_norm": 12.865867560942135, "learning_rate": 4.689473099081403e-06, "loss": 1.4026, "step": 2621 }, { "epoch": 0.37120407729878957, "grad_norm": 8.7724130496056, "learning_rate": 4.689196361673565e-06, "loss": 1.1467, "step": 2622 }, { "epoch": 0.37134565017342674, "grad_norm": 17.40206798673203, "learning_rate": 4.688919509181305e-06, "loss": 1.242, "step": 2623 }, { "epoch": 0.371487223048064, "grad_norm": 8.084474882814552, "learning_rate": 4.68864254161918e-06, "loss": 1.08, "step": 2624 }, { "epoch": 0.3716287959227012, "grad_norm": 11.528219346828623, "learning_rate": 4.6883654590017475e-06, "loss": 1.2857, "step": 2625 }, { "epoch": 0.37177036879733844, "grad_norm": 10.523620196075589, "learning_rate": 4.688088261343575e-06, "loss": 1.1279, "step": 2626 }, { "epoch": 0.37191194167197567, "grad_norm": 9.203084793391588, "learning_rate": 4.687810948659234e-06, "loss": 1.2103, "step": 2627 }, { "epoch": 0.3720535145466129, "grad_norm": 7.740436190357245, "learning_rate": 4.687533520963302e-06, "loss": 1.1309, "step": 2628 }, { "epoch": 0.37219508742125007, "grad_norm": 8.904714779963477, "learning_rate": 4.6872559782703655e-06, "loss": 1.2419, "step": 2629 }, { "epoch": 0.3723366602958873, "grad_norm": 8.24686996731563, "learning_rate": 4.686978320595012e-06, "loss": 1.0178, "step": 2630 }, { "epoch": 0.37247823317052453, "grad_norm": 10.748234803520138, "learning_rate": 4.686700547951839e-06, "loss": 1.1695, "step": 2631 }, { "epoch": 0.37261980604516176, "grad_norm": 9.849962774680808, "learning_rate": 4.686422660355448e-06, "loss": 1.3209, "step": 2632 }, { "epoch": 0.372761378919799, "grad_norm": 8.685087239426286, "learning_rate": 4.686144657820449e-06, "loss": 1.2319, "step": 2633 }, { "epoch": 0.37290295179443617, "grad_norm": 10.715526930348293, "learning_rate": 4.685866540361456e-06, "loss": 1.2466, "step": 2634 }, { "epoch": 0.3730445246690734, "grad_norm": 7.641565058319359, "learning_rate": 4.685588307993087e-06, "loss": 1.0218, "step": 2635 }, { "epoch": 0.3731860975437106, "grad_norm": 11.025031652426751, "learning_rate": 4.6853099607299725e-06, "loss": 1.1629, "step": 2636 }, { "epoch": 0.37332767041834786, "grad_norm": 10.44100509540838, "learning_rate": 4.685031498586741e-06, "loss": 1.0349, "step": 2637 }, { "epoch": 0.3734692432929851, "grad_norm": 8.286340379784578, "learning_rate": 4.684752921578033e-06, "loss": 1.2846, "step": 2638 }, { "epoch": 0.37361081616762226, "grad_norm": 8.755934964053044, "learning_rate": 4.684474229718494e-06, "loss": 1.2507, "step": 2639 }, { "epoch": 0.3737523890422595, "grad_norm": 11.999498992633765, "learning_rate": 4.6841954230227725e-06, "loss": 1.3631, "step": 2640 }, { "epoch": 0.3738939619168967, "grad_norm": 13.825960392903374, "learning_rate": 4.683916501505527e-06, "loss": 1.2249, "step": 2641 }, { "epoch": 0.37403553479153395, "grad_norm": 8.08554733735626, "learning_rate": 4.6836374651814186e-06, "loss": 1.1791, "step": 2642 }, { "epoch": 0.3741771076661712, "grad_norm": 7.713161901329006, "learning_rate": 4.6833583140651175e-06, "loss": 1.0248, "step": 2643 }, { "epoch": 0.37431868054080836, "grad_norm": 9.742293221910316, "learning_rate": 4.6830790481712975e-06, "loss": 1.3169, "step": 2644 }, { "epoch": 0.3744602534154456, "grad_norm": 10.049759470020835, "learning_rate": 4.68279966751464e-06, "loss": 1.0528, "step": 2645 }, { "epoch": 0.3746018262900828, "grad_norm": 61.223946148800714, "learning_rate": 4.682520172109831e-06, "loss": 0.9916, "step": 2646 }, { "epoch": 0.37474339916472005, "grad_norm": 50.6096499683793, "learning_rate": 4.682240561971565e-06, "loss": 1.2559, "step": 2647 }, { "epoch": 0.3748849720393573, "grad_norm": 8.557594397668074, "learning_rate": 4.681960837114539e-06, "loss": 1.1218, "step": 2648 }, { "epoch": 0.37502654491399445, "grad_norm": 245.68932840835802, "learning_rate": 4.681680997553459e-06, "loss": 1.8602, "step": 2649 }, { "epoch": 0.3751681177886317, "grad_norm": 719.6138721564503, "learning_rate": 4.681401043303036e-06, "loss": 7.4486, "step": 2650 }, { "epoch": 0.3753096906632689, "grad_norm": 106.4377752492037, "learning_rate": 4.681120974377985e-06, "loss": 2.2904, "step": 2651 }, { "epoch": 0.37545126353790614, "grad_norm": 63.22827257742378, "learning_rate": 4.680840790793032e-06, "loss": 1.6441, "step": 2652 }, { "epoch": 0.3755928364125434, "grad_norm": 340.1762525698994, "learning_rate": 4.680560492562904e-06, "loss": 2.7335, "step": 2653 }, { "epoch": 0.37573440928718055, "grad_norm": 216.10669023077466, "learning_rate": 4.680280079702339e-06, "loss": 4.2543, "step": 2654 }, { "epoch": 0.3758759821618178, "grad_norm": 88.70355298690126, "learning_rate": 4.679999552226073e-06, "loss": 1.9404, "step": 2655 }, { "epoch": 0.376017555036455, "grad_norm": 179.7345155882698, "learning_rate": 4.679718910148858e-06, "loss": 2.4292, "step": 2656 }, { "epoch": 0.37615912791109224, "grad_norm": 54.553626318799836, "learning_rate": 4.679438153485444e-06, "loss": 1.5392, "step": 2657 }, { "epoch": 0.37630070078572947, "grad_norm": 31.83415926782208, "learning_rate": 4.679157282250592e-06, "loss": 1.5953, "step": 2658 }, { "epoch": 0.3764422736603667, "grad_norm": 53.65705990943783, "learning_rate": 4.678876296459066e-06, "loss": 1.8037, "step": 2659 }, { "epoch": 0.3765838465350039, "grad_norm": 40.56912473588674, "learning_rate": 4.678595196125638e-06, "loss": 1.7307, "step": 2660 }, { "epoch": 0.3767254194096411, "grad_norm": 19.438349371012222, "learning_rate": 4.678313981265086e-06, "loss": 1.4986, "step": 2661 }, { "epoch": 0.37686699228427833, "grad_norm": 32.32056082965997, "learning_rate": 4.678032651892191e-06, "loss": 1.5549, "step": 2662 }, { "epoch": 0.37700856515891557, "grad_norm": 20.580846144591426, "learning_rate": 4.677751208021744e-06, "loss": 1.4211, "step": 2663 }, { "epoch": 0.3771501380335528, "grad_norm": 24.88951170611328, "learning_rate": 4.677469649668539e-06, "loss": 1.5143, "step": 2664 }, { "epoch": 0.37729171090818997, "grad_norm": 19.552065470434478, "learning_rate": 4.677187976847379e-06, "loss": 1.2825, "step": 2665 }, { "epoch": 0.3774332837828272, "grad_norm": 16.181284060834663, "learning_rate": 4.67690618957307e-06, "loss": 1.4351, "step": 2666 }, { "epoch": 0.37757485665746443, "grad_norm": 20.30368423478506, "learning_rate": 4.676624287860425e-06, "loss": 1.5557, "step": 2667 }, { "epoch": 0.37771642953210166, "grad_norm": 17.536457559895045, "learning_rate": 4.676342271724266e-06, "loss": 1.4887, "step": 2668 }, { "epoch": 0.3778580024067389, "grad_norm": 11.271654777184033, "learning_rate": 4.676060141179415e-06, "loss": 1.3649, "step": 2669 }, { "epoch": 0.37799957528137607, "grad_norm": 15.382347274262651, "learning_rate": 4.675777896240706e-06, "loss": 1.4251, "step": 2670 }, { "epoch": 0.3781411481560133, "grad_norm": 13.02422423294828, "learning_rate": 4.675495536922975e-06, "loss": 1.4488, "step": 2671 }, { "epoch": 0.3782827210306505, "grad_norm": 12.635321599734253, "learning_rate": 4.675213063241065e-06, "loss": 1.0786, "step": 2672 }, { "epoch": 0.37842429390528776, "grad_norm": 20.737978474786377, "learning_rate": 4.674930475209827e-06, "loss": 1.2599, "step": 2673 }, { "epoch": 0.378565866779925, "grad_norm": 11.106423287657863, "learning_rate": 4.674647772844115e-06, "loss": 1.2303, "step": 2674 }, { "epoch": 0.37870743965456216, "grad_norm": 11.838690248164793, "learning_rate": 4.674364956158791e-06, "loss": 1.3721, "step": 2675 }, { "epoch": 0.3788490125291994, "grad_norm": 12.13145165017876, "learning_rate": 4.674082025168723e-06, "loss": 1.3205, "step": 2676 }, { "epoch": 0.3789905854038366, "grad_norm": 11.793520580621669, "learning_rate": 4.673798979888784e-06, "loss": 1.315, "step": 2677 }, { "epoch": 0.37913215827847385, "grad_norm": 12.680091837360889, "learning_rate": 4.673515820333853e-06, "loss": 1.4251, "step": 2678 }, { "epoch": 0.3792737311531111, "grad_norm": 11.973005449638379, "learning_rate": 4.673232546518817e-06, "loss": 1.338, "step": 2679 }, { "epoch": 0.37941530402774826, "grad_norm": 13.60784616215408, "learning_rate": 4.672949158458565e-06, "loss": 1.2942, "step": 2680 }, { "epoch": 0.3795568769023855, "grad_norm": 13.927675077220915, "learning_rate": 4.672665656167997e-06, "loss": 1.2641, "step": 2681 }, { "epoch": 0.3796984497770227, "grad_norm": 9.963638956592327, "learning_rate": 4.672382039662016e-06, "loss": 1.2523, "step": 2682 }, { "epoch": 0.37984002265165995, "grad_norm": 14.045009196337157, "learning_rate": 4.672098308955529e-06, "loss": 1.2666, "step": 2683 }, { "epoch": 0.3799815955262972, "grad_norm": 12.15753465155296, "learning_rate": 4.671814464063455e-06, "loss": 1.4344, "step": 2684 }, { "epoch": 0.3801231684009344, "grad_norm": 10.436642548752433, "learning_rate": 4.671530505000714e-06, "loss": 1.2374, "step": 2685 }, { "epoch": 0.3802647412755716, "grad_norm": 11.936953547088619, "learning_rate": 4.671246431782234e-06, "loss": 1.3049, "step": 2686 }, { "epoch": 0.3804063141502088, "grad_norm": 9.501871025146343, "learning_rate": 4.670962244422946e-06, "loss": 1.3993, "step": 2687 }, { "epoch": 0.38054788702484604, "grad_norm": 9.497165457829954, "learning_rate": 4.670677942937793e-06, "loss": 1.4469, "step": 2688 }, { "epoch": 0.3806894598994833, "grad_norm": 10.441624357640636, "learning_rate": 4.6703935273417195e-06, "loss": 1.2959, "step": 2689 }, { "epoch": 0.3808310327741205, "grad_norm": 10.336909587933018, "learning_rate": 4.670108997649676e-06, "loss": 1.2105, "step": 2690 }, { "epoch": 0.3809726056487577, "grad_norm": 9.933102192418996, "learning_rate": 4.66982435387662e-06, "loss": 1.3503, "step": 2691 }, { "epoch": 0.3811141785233949, "grad_norm": 8.134942178081086, "learning_rate": 4.669539596037517e-06, "loss": 1.3376, "step": 2692 }, { "epoch": 0.38125575139803214, "grad_norm": 11.785316150100195, "learning_rate": 4.669254724147334e-06, "loss": 1.3857, "step": 2693 }, { "epoch": 0.38139732427266937, "grad_norm": 10.363775224987393, "learning_rate": 4.6689697382210475e-06, "loss": 1.2123, "step": 2694 }, { "epoch": 0.3815388971473066, "grad_norm": 8.723336712150614, "learning_rate": 4.668684638273639e-06, "loss": 1.153, "step": 2695 }, { "epoch": 0.3816804700219438, "grad_norm": 10.629272320046699, "learning_rate": 4.668399424320097e-06, "loss": 1.2295, "step": 2696 }, { "epoch": 0.381822042896581, "grad_norm": 11.01535003535372, "learning_rate": 4.668114096375413e-06, "loss": 1.4107, "step": 2697 }, { "epoch": 0.38196361577121823, "grad_norm": 9.12875999428198, "learning_rate": 4.6678286544545894e-06, "loss": 1.408, "step": 2698 }, { "epoch": 0.38210518864585546, "grad_norm": 9.665480266860126, "learning_rate": 4.667543098572627e-06, "loss": 1.3217, "step": 2699 }, { "epoch": 0.3822467615204927, "grad_norm": 9.11171518303004, "learning_rate": 4.667257428744542e-06, "loss": 1.2478, "step": 2700 }, { "epoch": 0.38238833439512987, "grad_norm": 10.55757197701274, "learning_rate": 4.6669716449853505e-06, "loss": 1.2566, "step": 2701 }, { "epoch": 0.3825299072697671, "grad_norm": 11.33770607757859, "learning_rate": 4.666685747310075e-06, "loss": 1.2531, "step": 2702 }, { "epoch": 0.38267148014440433, "grad_norm": 9.87286237927272, "learning_rate": 4.666399735733745e-06, "loss": 1.1003, "step": 2703 }, { "epoch": 0.38281305301904156, "grad_norm": 9.174280322379175, "learning_rate": 4.666113610271395e-06, "loss": 1.2061, "step": 2704 }, { "epoch": 0.3829546258936788, "grad_norm": 9.760176214284593, "learning_rate": 4.66582737093807e-06, "loss": 1.2878, "step": 2705 }, { "epoch": 0.38309619876831597, "grad_norm": 9.282818286061149, "learning_rate": 4.665541017748813e-06, "loss": 1.2088, "step": 2706 }, { "epoch": 0.3832377716429532, "grad_norm": 10.552986872037069, "learning_rate": 4.665254550718681e-06, "loss": 1.2764, "step": 2707 }, { "epoch": 0.3833793445175904, "grad_norm": 9.790994391122496, "learning_rate": 4.6649679698627306e-06, "loss": 1.178, "step": 2708 }, { "epoch": 0.38352091739222766, "grad_norm": 10.016598087357421, "learning_rate": 4.664681275196028e-06, "loss": 1.2359, "step": 2709 }, { "epoch": 0.3836624902668649, "grad_norm": 12.329081721828095, "learning_rate": 4.664394466733646e-06, "loss": 1.3059, "step": 2710 }, { "epoch": 0.38380406314150206, "grad_norm": 8.492913377441145, "learning_rate": 4.66410754449066e-06, "loss": 1.2905, "step": 2711 }, { "epoch": 0.3839456360161393, "grad_norm": 10.969386416499885, "learning_rate": 4.6638205084821544e-06, "loss": 1.2192, "step": 2712 }, { "epoch": 0.3840872088907765, "grad_norm": 7.251400253299009, "learning_rate": 4.6635333587232175e-06, "loss": 1.2192, "step": 2713 }, { "epoch": 0.38422878176541375, "grad_norm": 9.89555055448494, "learning_rate": 4.663246095228946e-06, "loss": 1.2323, "step": 2714 }, { "epoch": 0.384370354640051, "grad_norm": 10.454333955336754, "learning_rate": 4.66295871801444e-06, "loss": 1.338, "step": 2715 }, { "epoch": 0.3845119275146882, "grad_norm": 9.091805476298294, "learning_rate": 4.662671227094806e-06, "loss": 1.1689, "step": 2716 }, { "epoch": 0.3846535003893254, "grad_norm": 10.100304521558876, "learning_rate": 4.662383622485159e-06, "loss": 1.2532, "step": 2717 }, { "epoch": 0.3847950732639626, "grad_norm": 10.538027249468795, "learning_rate": 4.662095904200617e-06, "loss": 1.1787, "step": 2718 }, { "epoch": 0.38493664613859985, "grad_norm": 8.415051245439859, "learning_rate": 4.661808072256306e-06, "loss": 1.2641, "step": 2719 }, { "epoch": 0.3850782190132371, "grad_norm": 10.237145527908174, "learning_rate": 4.661520126667356e-06, "loss": 1.2255, "step": 2720 }, { "epoch": 0.3852197918878743, "grad_norm": 14.240204238284186, "learning_rate": 4.6612320674489045e-06, "loss": 1.2562, "step": 2721 }, { "epoch": 0.3853613647625115, "grad_norm": 10.696366051332648, "learning_rate": 4.660943894616095e-06, "loss": 1.3444, "step": 2722 }, { "epoch": 0.3855029376371487, "grad_norm": 9.457914995753267, "learning_rate": 4.660655608184076e-06, "loss": 1.2949, "step": 2723 }, { "epoch": 0.38564451051178594, "grad_norm": 9.210007689366149, "learning_rate": 4.660367208168004e-06, "loss": 1.1321, "step": 2724 }, { "epoch": 0.3857860833864232, "grad_norm": 7.9927163345321395, "learning_rate": 4.660078694583037e-06, "loss": 1.2493, "step": 2725 }, { "epoch": 0.3859276562610604, "grad_norm": 9.14588482146372, "learning_rate": 4.6597900674443445e-06, "loss": 1.3643, "step": 2726 }, { "epoch": 0.3860692291356976, "grad_norm": 10.833659905622088, "learning_rate": 4.659501326767098e-06, "loss": 1.4215, "step": 2727 }, { "epoch": 0.3862108020103348, "grad_norm": 8.516369484371609, "learning_rate": 4.6592124725664776e-06, "loss": 1.2873, "step": 2728 }, { "epoch": 0.38635237488497204, "grad_norm": 7.876575130792292, "learning_rate": 4.6589235048576676e-06, "loss": 1.1488, "step": 2729 }, { "epoch": 0.38649394775960927, "grad_norm": 7.932109053361774, "learning_rate": 4.658634423655858e-06, "loss": 1.1527, "step": 2730 }, { "epoch": 0.3866355206342465, "grad_norm": 9.812664054908442, "learning_rate": 4.658345228976246e-06, "loss": 1.2026, "step": 2731 }, { "epoch": 0.3867770935088837, "grad_norm": 8.591980908108034, "learning_rate": 4.658055920834036e-06, "loss": 1.1719, "step": 2732 }, { "epoch": 0.3869186663835209, "grad_norm": 8.889831614835948, "learning_rate": 4.6577664992444345e-06, "loss": 1.0979, "step": 2733 }, { "epoch": 0.38706023925815813, "grad_norm": 9.10288851660302, "learning_rate": 4.657476964222657e-06, "loss": 1.286, "step": 2734 }, { "epoch": 0.38720181213279536, "grad_norm": 8.537601105075977, "learning_rate": 4.657187315783925e-06, "loss": 1.181, "step": 2735 }, { "epoch": 0.3873433850074326, "grad_norm": 8.015437014273468, "learning_rate": 4.656897553943463e-06, "loss": 1.1654, "step": 2736 }, { "epoch": 0.38748495788206977, "grad_norm": 12.311835101614054, "learning_rate": 4.656607678716506e-06, "loss": 1.282, "step": 2737 }, { "epoch": 0.387626530756707, "grad_norm": 8.125226296427938, "learning_rate": 4.656317690118291e-06, "loss": 1.225, "step": 2738 }, { "epoch": 0.38776810363134423, "grad_norm": 7.714850425234454, "learning_rate": 4.6560275881640615e-06, "loss": 1.2273, "step": 2739 }, { "epoch": 0.38790967650598146, "grad_norm": 8.486095949427547, "learning_rate": 4.655737372869071e-06, "loss": 1.2004, "step": 2740 }, { "epoch": 0.3880512493806187, "grad_norm": 8.810018852984447, "learning_rate": 4.655447044248573e-06, "loss": 1.1282, "step": 2741 }, { "epoch": 0.3881928222552559, "grad_norm": 7.66928274329685, "learning_rate": 4.655156602317832e-06, "loss": 1.1494, "step": 2742 }, { "epoch": 0.3883343951298931, "grad_norm": 8.64520991885192, "learning_rate": 4.654866047092115e-06, "loss": 1.1447, "step": 2743 }, { "epoch": 0.3884759680045303, "grad_norm": 8.071294202300995, "learning_rate": 4.654575378586696e-06, "loss": 1.2118, "step": 2744 }, { "epoch": 0.38861754087916756, "grad_norm": 7.999461632733431, "learning_rate": 4.6542845968168575e-06, "loss": 1.048, "step": 2745 }, { "epoch": 0.3887591137538048, "grad_norm": 8.157547303137733, "learning_rate": 4.653993701797883e-06, "loss": 1.2265, "step": 2746 }, { "epoch": 0.388900686628442, "grad_norm": 7.964992220600014, "learning_rate": 4.653702693545066e-06, "loss": 1.1155, "step": 2747 }, { "epoch": 0.3890422595030792, "grad_norm": 10.534052535343784, "learning_rate": 4.653411572073704e-06, "loss": 1.2427, "step": 2748 }, { "epoch": 0.3891838323777164, "grad_norm": 9.036406737664901, "learning_rate": 4.6531203373991015e-06, "loss": 1.29, "step": 2749 }, { "epoch": 0.38932540525235365, "grad_norm": 8.448696816981506, "learning_rate": 4.652828989536567e-06, "loss": 1.1129, "step": 2750 }, { "epoch": 0.3894669781269909, "grad_norm": 9.114326400768523, "learning_rate": 4.6525375285014195e-06, "loss": 1.3077, "step": 2751 }, { "epoch": 0.3896085510016281, "grad_norm": 10.630328469140379, "learning_rate": 4.652245954308979e-06, "loss": 1.3086, "step": 2752 }, { "epoch": 0.3897501238762653, "grad_norm": 11.657812918844652, "learning_rate": 4.651954266974573e-06, "loss": 1.2397, "step": 2753 }, { "epoch": 0.3898916967509025, "grad_norm": 10.484155598603191, "learning_rate": 4.651662466513536e-06, "loss": 1.146, "step": 2754 }, { "epoch": 0.39003326962553975, "grad_norm": 8.122819813414022, "learning_rate": 4.651370552941207e-06, "loss": 1.243, "step": 2755 }, { "epoch": 0.390174842500177, "grad_norm": 13.050919797790476, "learning_rate": 4.651078526272932e-06, "loss": 1.0982, "step": 2756 }, { "epoch": 0.3903164153748142, "grad_norm": 14.27877098286999, "learning_rate": 4.6507863865240635e-06, "loss": 1.1674, "step": 2757 }, { "epoch": 0.3904579882494514, "grad_norm": 9.254013840262058, "learning_rate": 4.650494133709958e-06, "loss": 1.284, "step": 2758 }, { "epoch": 0.3905995611240886, "grad_norm": 8.613117823704696, "learning_rate": 4.650201767845979e-06, "loss": 1.1543, "step": 2759 }, { "epoch": 0.39074113399872584, "grad_norm": 9.714903042516084, "learning_rate": 4.649909288947497e-06, "loss": 1.1786, "step": 2760 }, { "epoch": 0.3908827068733631, "grad_norm": 10.111481106687508, "learning_rate": 4.649616697029886e-06, "loss": 1.0455, "step": 2761 }, { "epoch": 0.3910242797480003, "grad_norm": 10.765283410310055, "learning_rate": 4.649323992108529e-06, "loss": 1.1643, "step": 2762 }, { "epoch": 0.3911658526226375, "grad_norm": 9.49563377832416, "learning_rate": 4.649031174198812e-06, "loss": 1.2291, "step": 2763 }, { "epoch": 0.3913074254972747, "grad_norm": 8.768756055659884, "learning_rate": 4.648738243316128e-06, "loss": 1.4483, "step": 2764 }, { "epoch": 0.39144899837191194, "grad_norm": 10.51403451939359, "learning_rate": 4.648445199475877e-06, "loss": 1.1473, "step": 2765 }, { "epoch": 0.39159057124654917, "grad_norm": 10.951384532612867, "learning_rate": 4.648152042693464e-06, "loss": 1.1828, "step": 2766 }, { "epoch": 0.3917321441211864, "grad_norm": 11.551528921946666, "learning_rate": 4.6478587729843e-06, "loss": 1.19, "step": 2767 }, { "epoch": 0.3918737169958236, "grad_norm": 10.275626920337125, "learning_rate": 4.647565390363802e-06, "loss": 1.1997, "step": 2768 }, { "epoch": 0.3920152898704608, "grad_norm": 10.39191097246642, "learning_rate": 4.6472718948473915e-06, "loss": 1.1123, "step": 2769 }, { "epoch": 0.39215686274509803, "grad_norm": 12.693796183484732, "learning_rate": 4.6469782864504995e-06, "loss": 1.2015, "step": 2770 }, { "epoch": 0.39229843561973526, "grad_norm": 9.426676423509756, "learning_rate": 4.64668456518856e-06, "loss": 1.1307, "step": 2771 }, { "epoch": 0.3924400084943725, "grad_norm": 11.849979423150977, "learning_rate": 4.646390731077013e-06, "loss": 1.1623, "step": 2772 }, { "epoch": 0.3925815813690097, "grad_norm": 10.280516438852269, "learning_rate": 4.646096784131306e-06, "loss": 1.2245, "step": 2773 }, { "epoch": 0.3927231542436469, "grad_norm": 10.564624459867343, "learning_rate": 4.645802724366891e-06, "loss": 1.1631, "step": 2774 }, { "epoch": 0.39286472711828413, "grad_norm": 10.11926710045696, "learning_rate": 4.645508551799227e-06, "loss": 1.2378, "step": 2775 }, { "epoch": 0.39300629999292136, "grad_norm": 9.961308586837728, "learning_rate": 4.645214266443778e-06, "loss": 1.2451, "step": 2776 }, { "epoch": 0.3931478728675586, "grad_norm": 9.635069496842478, "learning_rate": 4.644919868316014e-06, "loss": 1.216, "step": 2777 }, { "epoch": 0.3932894457421958, "grad_norm": 7.901752625181362, "learning_rate": 4.644625357431414e-06, "loss": 1.1152, "step": 2778 }, { "epoch": 0.393431018616833, "grad_norm": 9.601186249874813, "learning_rate": 4.6443307338054565e-06, "loss": 1.2273, "step": 2779 }, { "epoch": 0.3935725914914702, "grad_norm": 7.697847179328066, "learning_rate": 4.644035997453631e-06, "loss": 1.1287, "step": 2780 }, { "epoch": 0.39371416436610746, "grad_norm": 8.803772845820182, "learning_rate": 4.643741148391432e-06, "loss": 1.2706, "step": 2781 }, { "epoch": 0.3938557372407447, "grad_norm": 9.081087820622558, "learning_rate": 4.64344618663436e-06, "loss": 1.3146, "step": 2782 }, { "epoch": 0.3939973101153819, "grad_norm": 8.657258233250728, "learning_rate": 4.643151112197919e-06, "loss": 1.167, "step": 2783 }, { "epoch": 0.3941388829900191, "grad_norm": 10.457811157645962, "learning_rate": 4.642855925097622e-06, "loss": 1.255, "step": 2784 }, { "epoch": 0.3942804558646563, "grad_norm": 9.006597008462828, "learning_rate": 4.642560625348988e-06, "loss": 1.145, "step": 2785 }, { "epoch": 0.39442202873929355, "grad_norm": 9.878556781951414, "learning_rate": 4.642265212967539e-06, "loss": 1.1271, "step": 2786 }, { "epoch": 0.3945636016139308, "grad_norm": 8.540977884449546, "learning_rate": 4.6419696879688046e-06, "loss": 1.0965, "step": 2787 }, { "epoch": 0.394705174488568, "grad_norm": 8.264818235084446, "learning_rate": 4.641674050368321e-06, "loss": 1.0718, "step": 2788 }, { "epoch": 0.3948467473632052, "grad_norm": 9.338277279369398, "learning_rate": 4.641378300181629e-06, "loss": 1.2603, "step": 2789 }, { "epoch": 0.3949883202378424, "grad_norm": 9.840878409899087, "learning_rate": 4.641082437424277e-06, "loss": 1.2714, "step": 2790 }, { "epoch": 0.39512989311247965, "grad_norm": 7.442381467019462, "learning_rate": 4.6407864621118184e-06, "loss": 1.2088, "step": 2791 }, { "epoch": 0.3952714659871169, "grad_norm": 9.584456623974855, "learning_rate": 4.640490374259811e-06, "loss": 1.2332, "step": 2792 }, { "epoch": 0.3954130388617541, "grad_norm": 8.656805688532225, "learning_rate": 4.6401941738838204e-06, "loss": 1.1966, "step": 2793 }, { "epoch": 0.3955546117363913, "grad_norm": 10.954863101551314, "learning_rate": 4.639897860999418e-06, "loss": 1.2107, "step": 2794 }, { "epoch": 0.3956961846110285, "grad_norm": 7.144378810651623, "learning_rate": 4.639601435622182e-06, "loss": 1.22, "step": 2795 }, { "epoch": 0.39583775748566574, "grad_norm": 9.93410141128071, "learning_rate": 4.639304897767692e-06, "loss": 1.311, "step": 2796 }, { "epoch": 0.395979330360303, "grad_norm": 9.569482585189347, "learning_rate": 4.63900824745154e-06, "loss": 1.2189, "step": 2797 }, { "epoch": 0.3961209032349402, "grad_norm": 7.747566825625275, "learning_rate": 4.638711484689319e-06, "loss": 1.0098, "step": 2798 }, { "epoch": 0.3962624761095774, "grad_norm": 9.010948303650103, "learning_rate": 4.638414609496628e-06, "loss": 1.0929, "step": 2799 }, { "epoch": 0.3964040489842146, "grad_norm": 9.970321483575134, "learning_rate": 4.638117621889078e-06, "loss": 1.196, "step": 2800 }, { "epoch": 0.39654562185885184, "grad_norm": 8.822063478354329, "learning_rate": 4.637820521882278e-06, "loss": 1.0744, "step": 2801 }, { "epoch": 0.39668719473348907, "grad_norm": 8.844458669569034, "learning_rate": 4.637523309491847e-06, "loss": 1.3337, "step": 2802 }, { "epoch": 0.3968287676081263, "grad_norm": 10.838144926001485, "learning_rate": 4.63722598473341e-06, "loss": 1.269, "step": 2803 }, { "epoch": 0.39697034048276353, "grad_norm": 8.907012371427673, "learning_rate": 4.636928547622596e-06, "loss": 1.154, "step": 2804 }, { "epoch": 0.3971119133574007, "grad_norm": 11.625988989979183, "learning_rate": 4.636630998175042e-06, "loss": 1.2045, "step": 2805 }, { "epoch": 0.39725348623203793, "grad_norm": 8.95463615832756, "learning_rate": 4.636333336406389e-06, "loss": 1.2185, "step": 2806 }, { "epoch": 0.39739505910667516, "grad_norm": 10.443998038669783, "learning_rate": 4.636035562332286e-06, "loss": 1.4022, "step": 2807 }, { "epoch": 0.3975366319813124, "grad_norm": 10.052606303524094, "learning_rate": 4.6357376759683856e-06, "loss": 1.2283, "step": 2808 }, { "epoch": 0.3976782048559496, "grad_norm": 9.891670159776087, "learning_rate": 4.635439677330349e-06, "loss": 1.1578, "step": 2809 }, { "epoch": 0.3978197777305868, "grad_norm": 9.688912368691524, "learning_rate": 4.635141566433839e-06, "loss": 1.1386, "step": 2810 }, { "epoch": 0.39796135060522403, "grad_norm": 8.268375624112984, "learning_rate": 4.6348433432945314e-06, "loss": 1.019, "step": 2811 }, { "epoch": 0.39810292347986126, "grad_norm": 8.953754289757322, "learning_rate": 4.6345450079281e-06, "loss": 1.2574, "step": 2812 }, { "epoch": 0.3982444963544985, "grad_norm": 8.982027971290826, "learning_rate": 4.634246560350229e-06, "loss": 1.1519, "step": 2813 }, { "epoch": 0.3983860692291357, "grad_norm": 11.96503631905106, "learning_rate": 4.633948000576607e-06, "loss": 1.1311, "step": 2814 }, { "epoch": 0.3985276421037729, "grad_norm": 10.605280989360029, "learning_rate": 4.63364932862293e-06, "loss": 1.3075, "step": 2815 }, { "epoch": 0.3986692149784101, "grad_norm": 7.89890379241488, "learning_rate": 4.633350544504899e-06, "loss": 0.9558, "step": 2816 }, { "epoch": 0.39881078785304735, "grad_norm": 7.961484701201211, "learning_rate": 4.63305164823822e-06, "loss": 1.0873, "step": 2817 }, { "epoch": 0.3989523607276846, "grad_norm": 9.88037967214926, "learning_rate": 4.632752639838607e-06, "loss": 1.0337, "step": 2818 }, { "epoch": 0.3990939336023218, "grad_norm": 8.259987536552577, "learning_rate": 4.632453519321778e-06, "loss": 1.1577, "step": 2819 }, { "epoch": 0.399235506476959, "grad_norm": 8.601049368067457, "learning_rate": 4.632154286703457e-06, "loss": 1.1147, "step": 2820 }, { "epoch": 0.3993770793515962, "grad_norm": 8.598188661956748, "learning_rate": 4.6318549419993765e-06, "loss": 1.1428, "step": 2821 }, { "epoch": 0.39951865222623345, "grad_norm": 10.799185432515703, "learning_rate": 4.63155548522527e-06, "loss": 1.2824, "step": 2822 }, { "epoch": 0.3996602251008707, "grad_norm": 9.022577999514564, "learning_rate": 4.6312559163968805e-06, "loss": 1.1793, "step": 2823 }, { "epoch": 0.3998017979755079, "grad_norm": 9.412006226413935, "learning_rate": 4.630956235529957e-06, "loss": 1.4078, "step": 2824 }, { "epoch": 0.3999433708501451, "grad_norm": 8.36073203792353, "learning_rate": 4.630656442640254e-06, "loss": 1.1883, "step": 2825 }, { "epoch": 0.4000849437247823, "grad_norm": 9.966798214591817, "learning_rate": 4.63035653774353e-06, "loss": 1.159, "step": 2826 }, { "epoch": 0.40022651659941955, "grad_norm": 7.20665464166749, "learning_rate": 4.6300565208555505e-06, "loss": 1.1574, "step": 2827 }, { "epoch": 0.4003680894740568, "grad_norm": 9.98145214394456, "learning_rate": 4.629756391992088e-06, "loss": 1.1879, "step": 2828 }, { "epoch": 0.400509662348694, "grad_norm": 9.346515973337004, "learning_rate": 4.629456151168921e-06, "loss": 1.3265, "step": 2829 }, { "epoch": 0.40065123522333124, "grad_norm": 8.411585366123441, "learning_rate": 4.629155798401832e-06, "loss": 1.3491, "step": 2830 }, { "epoch": 0.4007928080979684, "grad_norm": 9.4170925978035, "learning_rate": 4.628855333706609e-06, "loss": 1.1402, "step": 2831 }, { "epoch": 0.40093438097260564, "grad_norm": 8.32242444953763, "learning_rate": 4.62855475709905e-06, "loss": 1.1697, "step": 2832 }, { "epoch": 0.40107595384724287, "grad_norm": 9.294521104316981, "learning_rate": 4.628254068594953e-06, "loss": 1.2028, "step": 2833 }, { "epoch": 0.4012175267218801, "grad_norm": 8.130509606562331, "learning_rate": 4.627953268210127e-06, "loss": 1.2593, "step": 2834 }, { "epoch": 0.40135909959651733, "grad_norm": 8.710291543454368, "learning_rate": 4.627652355960384e-06, "loss": 1.1375, "step": 2835 }, { "epoch": 0.4015006724711545, "grad_norm": 9.697549836910119, "learning_rate": 4.627351331861544e-06, "loss": 1.1317, "step": 2836 }, { "epoch": 0.40164224534579174, "grad_norm": 8.019630665634397, "learning_rate": 4.6270501959294315e-06, "loss": 1.2234, "step": 2837 }, { "epoch": 0.40178381822042897, "grad_norm": 9.209435260369737, "learning_rate": 4.6267489481798744e-06, "loss": 1.2151, "step": 2838 }, { "epoch": 0.4019253910950662, "grad_norm": 8.700142203736348, "learning_rate": 4.626447588628712e-06, "loss": 1.1766, "step": 2839 }, { "epoch": 0.40206696396970343, "grad_norm": 9.451894935466314, "learning_rate": 4.626146117291784e-06, "loss": 1.0, "step": 2840 }, { "epoch": 0.4022085368443406, "grad_norm": 10.075318695404372, "learning_rate": 4.625844534184941e-06, "loss": 1.0198, "step": 2841 }, { "epoch": 0.40235010971897783, "grad_norm": 10.048489311890215, "learning_rate": 4.625542839324036e-06, "loss": 1.1776, "step": 2842 }, { "epoch": 0.40249168259361506, "grad_norm": 9.235791402715453, "learning_rate": 4.625241032724929e-06, "loss": 1.1723, "step": 2843 }, { "epoch": 0.4026332554682523, "grad_norm": 7.890649414024731, "learning_rate": 4.624939114403485e-06, "loss": 1.166, "step": 2844 }, { "epoch": 0.4027748283428895, "grad_norm": 9.213031611638526, "learning_rate": 4.624637084375576e-06, "loss": 1.3228, "step": 2845 }, { "epoch": 0.4029164012175267, "grad_norm": 7.686263070187675, "learning_rate": 4.62433494265708e-06, "loss": 1.0842, "step": 2846 }, { "epoch": 0.40305797409216393, "grad_norm": 8.527090546801961, "learning_rate": 4.62403268926388e-06, "loss": 1.1274, "step": 2847 }, { "epoch": 0.40319954696680116, "grad_norm": 7.393199383571644, "learning_rate": 4.623730324211865e-06, "loss": 1.0701, "step": 2848 }, { "epoch": 0.4033411198414384, "grad_norm": 8.59267793384036, "learning_rate": 4.623427847516931e-06, "loss": 1.1827, "step": 2849 }, { "epoch": 0.4034826927160756, "grad_norm": 9.435022698452427, "learning_rate": 4.623125259194978e-06, "loss": 1.1899, "step": 2850 }, { "epoch": 0.4036242655907128, "grad_norm": 9.711405343089792, "learning_rate": 4.622822559261913e-06, "loss": 1.1992, "step": 2851 }, { "epoch": 0.40376583846535, "grad_norm": 8.913911007422174, "learning_rate": 4.622519747733649e-06, "loss": 1.2651, "step": 2852 }, { "epoch": 0.40390741133998725, "grad_norm": 7.439152125375742, "learning_rate": 4.622216824626104e-06, "loss": 1.0564, "step": 2853 }, { "epoch": 0.4040489842146245, "grad_norm": 8.737688394897596, "learning_rate": 4.621913789955204e-06, "loss": 1.1889, "step": 2854 }, { "epoch": 0.4041905570892617, "grad_norm": 10.29355318845987, "learning_rate": 4.621610643736878e-06, "loss": 1.215, "step": 2855 }, { "epoch": 0.4043321299638989, "grad_norm": 8.119294496151397, "learning_rate": 4.621307385987062e-06, "loss": 1.0567, "step": 2856 }, { "epoch": 0.4044737028385361, "grad_norm": 7.387413569169074, "learning_rate": 4.621004016721699e-06, "loss": 1.0649, "step": 2857 }, { "epoch": 0.40461527571317335, "grad_norm": 8.48270226949931, "learning_rate": 4.620700535956735e-06, "loss": 1.2155, "step": 2858 }, { "epoch": 0.4047568485878106, "grad_norm": 10.669513759527474, "learning_rate": 4.620396943708127e-06, "loss": 1.4123, "step": 2859 }, { "epoch": 0.4048984214624478, "grad_norm": 8.817730589085789, "learning_rate": 4.6200932399918304e-06, "loss": 1.1006, "step": 2860 }, { "epoch": 0.40503999433708504, "grad_norm": 8.82138284789415, "learning_rate": 4.619789424823815e-06, "loss": 1.1194, "step": 2861 }, { "epoch": 0.4051815672117222, "grad_norm": 9.240134261836124, "learning_rate": 4.619485498220049e-06, "loss": 1.3061, "step": 2862 }, { "epoch": 0.40532314008635945, "grad_norm": 9.333874232649483, "learning_rate": 4.6191814601965115e-06, "loss": 1.1382, "step": 2863 }, { "epoch": 0.4054647129609967, "grad_norm": 10.022797347786929, "learning_rate": 4.618877310769184e-06, "loss": 1.1223, "step": 2864 }, { "epoch": 0.4056062858356339, "grad_norm": 8.761690885488461, "learning_rate": 4.6185730499540565e-06, "loss": 1.2473, "step": 2865 }, { "epoch": 0.40574785871027114, "grad_norm": 7.8547412719464385, "learning_rate": 4.618268677767124e-06, "loss": 1.1864, "step": 2866 }, { "epoch": 0.4058894315849083, "grad_norm": 8.426563550261848, "learning_rate": 4.617964194224386e-06, "loss": 1.1971, "step": 2867 }, { "epoch": 0.40603100445954554, "grad_norm": 8.775912906294547, "learning_rate": 4.617659599341849e-06, "loss": 1.18, "step": 2868 }, { "epoch": 0.40617257733418277, "grad_norm": 7.913469358248349, "learning_rate": 4.617354893135527e-06, "loss": 1.1821, "step": 2869 }, { "epoch": 0.40631415020882, "grad_norm": 10.235066025096563, "learning_rate": 4.617050075621436e-06, "loss": 1.2288, "step": 2870 }, { "epoch": 0.40645572308345723, "grad_norm": 8.429585227310547, "learning_rate": 4.6167451468156015e-06, "loss": 1.2184, "step": 2871 }, { "epoch": 0.4065972959580944, "grad_norm": 7.911032696429374, "learning_rate": 4.616440106734053e-06, "loss": 1.168, "step": 2872 }, { "epoch": 0.40673886883273164, "grad_norm": 9.843509731690757, "learning_rate": 4.6161349553928255e-06, "loss": 1.3132, "step": 2873 }, { "epoch": 0.40688044170736887, "grad_norm": 7.908481690033907, "learning_rate": 4.615829692807962e-06, "loss": 1.2225, "step": 2874 }, { "epoch": 0.4070220145820061, "grad_norm": 8.021351454873278, "learning_rate": 4.61552431899551e-06, "loss": 1.1365, "step": 2875 }, { "epoch": 0.4071635874566433, "grad_norm": 9.477756359001107, "learning_rate": 4.615218833971521e-06, "loss": 1.2487, "step": 2876 }, { "epoch": 0.4073051603312805, "grad_norm": 9.015171826375914, "learning_rate": 4.614913237752054e-06, "loss": 1.2036, "step": 2877 }, { "epoch": 0.40744673320591773, "grad_norm": 8.854263795806101, "learning_rate": 4.614607530353177e-06, "loss": 1.1318, "step": 2878 }, { "epoch": 0.40758830608055496, "grad_norm": 9.587497326230071, "learning_rate": 4.614301711790958e-06, "loss": 1.2028, "step": 2879 }, { "epoch": 0.4077298789551922, "grad_norm": 10.179102894763737, "learning_rate": 4.613995782081474e-06, "loss": 1.1559, "step": 2880 }, { "epoch": 0.4078714518298294, "grad_norm": 9.635432546791531, "learning_rate": 4.6136897412408084e-06, "loss": 1.2069, "step": 2881 }, { "epoch": 0.4080130247044666, "grad_norm": 10.005323137653635, "learning_rate": 4.61338358928505e-06, "loss": 1.1792, "step": 2882 }, { "epoch": 0.40815459757910383, "grad_norm": 9.479095346930075, "learning_rate": 4.6130773262302905e-06, "loss": 1.3267, "step": 2883 }, { "epoch": 0.40829617045374106, "grad_norm": 9.491398581541679, "learning_rate": 4.612770952092632e-06, "loss": 1.1573, "step": 2884 }, { "epoch": 0.4084377433283783, "grad_norm": 10.699491855997765, "learning_rate": 4.612464466888181e-06, "loss": 1.303, "step": 2885 }, { "epoch": 0.4085793162030155, "grad_norm": 11.83471592809193, "learning_rate": 4.612157870633047e-06, "loss": 1.1411, "step": 2886 }, { "epoch": 0.40872088907765275, "grad_norm": 9.833814275154996, "learning_rate": 4.61185116334335e-06, "loss": 1.0162, "step": 2887 }, { "epoch": 0.4088624619522899, "grad_norm": 8.997947882771618, "learning_rate": 4.61154434503521e-06, "loss": 1.1927, "step": 2888 }, { "epoch": 0.40900403482692715, "grad_norm": 9.143450751788988, "learning_rate": 4.611237415724759e-06, "loss": 1.2292, "step": 2889 }, { "epoch": 0.4091456077015644, "grad_norm": 12.263446512352395, "learning_rate": 4.610930375428132e-06, "loss": 1.1798, "step": 2890 }, { "epoch": 0.4092871805762016, "grad_norm": 13.40480044337691, "learning_rate": 4.610623224161468e-06, "loss": 1.3092, "step": 2891 }, { "epoch": 0.40942875345083884, "grad_norm": 9.212456057895329, "learning_rate": 4.610315961940916e-06, "loss": 1.2578, "step": 2892 }, { "epoch": 0.409570326325476, "grad_norm": 8.242674841190672, "learning_rate": 4.610008588782626e-06, "loss": 1.1813, "step": 2893 }, { "epoch": 0.40971189920011325, "grad_norm": 9.693378855070593, "learning_rate": 4.609701104702759e-06, "loss": 1.2324, "step": 2894 }, { "epoch": 0.4098534720747505, "grad_norm": 11.574881857481243, "learning_rate": 4.609393509717478e-06, "loss": 1.1407, "step": 2895 }, { "epoch": 0.4099950449493877, "grad_norm": 8.474002349557882, "learning_rate": 4.6090858038429535e-06, "loss": 1.235, "step": 2896 }, { "epoch": 0.41013661782402494, "grad_norm": 8.307984925683883, "learning_rate": 4.6087779870953595e-06, "loss": 1.2606, "step": 2897 }, { "epoch": 0.4102781906986621, "grad_norm": 8.204780803869825, "learning_rate": 4.608470059490879e-06, "loss": 1.1489, "step": 2898 }, { "epoch": 0.41041976357329935, "grad_norm": 6.851665500401674, "learning_rate": 4.6081620210457e-06, "loss": 1.0395, "step": 2899 }, { "epoch": 0.4105613364479366, "grad_norm": 8.5413695741175, "learning_rate": 4.6078538717760165e-06, "loss": 1.11, "step": 2900 }, { "epoch": 0.4107029093225738, "grad_norm": 8.330502397973168, "learning_rate": 4.607545611698025e-06, "loss": 1.2099, "step": 2901 }, { "epoch": 0.41084448219721104, "grad_norm": 8.484704111365456, "learning_rate": 4.607237240827933e-06, "loss": 1.047, "step": 2902 }, { "epoch": 0.4109860550718482, "grad_norm": 7.424656653894687, "learning_rate": 4.606928759181951e-06, "loss": 1.2439, "step": 2903 }, { "epoch": 0.41112762794648544, "grad_norm": 9.335208114159425, "learning_rate": 4.6066201667762944e-06, "loss": 1.2684, "step": 2904 }, { "epoch": 0.41126920082112267, "grad_norm": 6.92743647160443, "learning_rate": 4.606311463627186e-06, "loss": 1.0702, "step": 2905 }, { "epoch": 0.4114107736957599, "grad_norm": 9.2303724570464, "learning_rate": 4.606002649750856e-06, "loss": 1.2769, "step": 2906 }, { "epoch": 0.41155234657039713, "grad_norm": 9.62960863627349, "learning_rate": 4.605693725163536e-06, "loss": 1.1395, "step": 2907 }, { "epoch": 0.4116939194450343, "grad_norm": 9.42309683853033, "learning_rate": 4.605384689881467e-06, "loss": 1.222, "step": 2908 }, { "epoch": 0.41183549231967154, "grad_norm": 8.358385145103492, "learning_rate": 4.605075543920895e-06, "loss": 1.0766, "step": 2909 }, { "epoch": 0.41197706519430877, "grad_norm": 9.174822514036034, "learning_rate": 4.604766287298071e-06, "loss": 1.2895, "step": 2910 }, { "epoch": 0.412118638068946, "grad_norm": 7.54706783324747, "learning_rate": 4.604456920029252e-06, "loss": 1.1693, "step": 2911 }, { "epoch": 0.4122602109435832, "grad_norm": 8.970188357821963, "learning_rate": 4.604147442130703e-06, "loss": 1.249, "step": 2912 }, { "epoch": 0.4124017838182204, "grad_norm": 7.911665317087143, "learning_rate": 4.603837853618691e-06, "loss": 1.3148, "step": 2913 }, { "epoch": 0.41254335669285763, "grad_norm": 7.913018868595354, "learning_rate": 4.603528154509492e-06, "loss": 1.2026, "step": 2914 }, { "epoch": 0.41268492956749486, "grad_norm": 7.917924667258268, "learning_rate": 4.6032183448193865e-06, "loss": 1.3484, "step": 2915 }, { "epoch": 0.4128265024421321, "grad_norm": 8.14417480859714, "learning_rate": 4.602908424564661e-06, "loss": 1.2093, "step": 2916 }, { "epoch": 0.4129680753167693, "grad_norm": 8.680023708750607, "learning_rate": 4.602598393761607e-06, "loss": 1.1025, "step": 2917 }, { "epoch": 0.41310964819140655, "grad_norm": 8.006387544711615, "learning_rate": 4.602288252426524e-06, "loss": 1.1263, "step": 2918 }, { "epoch": 0.41325122106604373, "grad_norm": 10.045146882505666, "learning_rate": 4.601978000575715e-06, "loss": 1.2776, "step": 2919 }, { "epoch": 0.41339279394068096, "grad_norm": 9.249077415852545, "learning_rate": 4.6016676382254895e-06, "loss": 1.2574, "step": 2920 }, { "epoch": 0.4135343668153182, "grad_norm": 8.649199220492132, "learning_rate": 4.601357165392163e-06, "loss": 1.3033, "step": 2921 }, { "epoch": 0.4136759396899554, "grad_norm": 7.945015060573777, "learning_rate": 4.601046582092058e-06, "loss": 1.0724, "step": 2922 }, { "epoch": 0.41381751256459265, "grad_norm": 9.739101089261547, "learning_rate": 4.6007358883414996e-06, "loss": 1.1954, "step": 2923 }, { "epoch": 0.4139590854392298, "grad_norm": 8.848355122094683, "learning_rate": 4.600425084156823e-06, "loss": 1.1817, "step": 2924 }, { "epoch": 0.41410065831386705, "grad_norm": 8.900021379423716, "learning_rate": 4.6001141695543655e-06, "loss": 1.2535, "step": 2925 }, { "epoch": 0.4142422311885043, "grad_norm": 10.554610155388335, "learning_rate": 4.599803144550472e-06, "loss": 1.254, "step": 2926 }, { "epoch": 0.4143838040631415, "grad_norm": 10.039418160042318, "learning_rate": 4.5994920091614935e-06, "loss": 1.2663, "step": 2927 }, { "epoch": 0.41452537693777874, "grad_norm": 8.029372652661756, "learning_rate": 4.5991807634037846e-06, "loss": 1.0904, "step": 2928 }, { "epoch": 0.4146669498124159, "grad_norm": 8.61534794862511, "learning_rate": 4.598869407293708e-06, "loss": 1.2888, "step": 2929 }, { "epoch": 0.41480852268705315, "grad_norm": 7.329662820469137, "learning_rate": 4.5985579408476324e-06, "loss": 1.129, "step": 2930 }, { "epoch": 0.4149500955616904, "grad_norm": 9.527644842003273, "learning_rate": 4.5982463640819304e-06, "loss": 1.1134, "step": 2931 }, { "epoch": 0.4150916684363276, "grad_norm": 10.868576081756151, "learning_rate": 4.597934677012982e-06, "loss": 1.2502, "step": 2932 }, { "epoch": 0.41523324131096484, "grad_norm": 10.036104637819871, "learning_rate": 4.597622879657171e-06, "loss": 1.082, "step": 2933 }, { "epoch": 0.415374814185602, "grad_norm": 7.941334436545072, "learning_rate": 4.597310972030889e-06, "loss": 1.2384, "step": 2934 }, { "epoch": 0.41551638706023925, "grad_norm": 11.0989744107819, "learning_rate": 4.596998954150534e-06, "loss": 1.2173, "step": 2935 }, { "epoch": 0.4156579599348765, "grad_norm": 8.649763741326217, "learning_rate": 4.596686826032507e-06, "loss": 1.0951, "step": 2936 }, { "epoch": 0.4157995328095137, "grad_norm": 10.599990312104026, "learning_rate": 4.596374587693218e-06, "loss": 1.1918, "step": 2937 }, { "epoch": 0.41594110568415094, "grad_norm": 9.860941863659706, "learning_rate": 4.596062239149079e-06, "loss": 1.2484, "step": 2938 }, { "epoch": 0.4160826785587881, "grad_norm": 8.126157238050439, "learning_rate": 4.595749780416511e-06, "loss": 1.0439, "step": 2939 }, { "epoch": 0.41622425143342534, "grad_norm": 12.065283893860647, "learning_rate": 4.59543721151194e-06, "loss": 1.1341, "step": 2940 }, { "epoch": 0.41636582430806257, "grad_norm": 11.46813131633362, "learning_rate": 4.595124532451797e-06, "loss": 1.2777, "step": 2941 }, { "epoch": 0.4165073971826998, "grad_norm": 10.121058697715258, "learning_rate": 4.5948117432525195e-06, "loss": 1.2422, "step": 2942 }, { "epoch": 0.41664897005733703, "grad_norm": 10.29575204007948, "learning_rate": 4.594498843930551e-06, "loss": 1.1824, "step": 2943 }, { "epoch": 0.4167905429319742, "grad_norm": 11.796127356275395, "learning_rate": 4.59418583450234e-06, "loss": 1.2814, "step": 2944 }, { "epoch": 0.41693211580661144, "grad_norm": 10.890637259565374, "learning_rate": 4.593872714984341e-06, "loss": 1.1721, "step": 2945 }, { "epoch": 0.41707368868124867, "grad_norm": 8.78812408184646, "learning_rate": 4.593559485393015e-06, "loss": 1.1804, "step": 2946 }, { "epoch": 0.4172152615558859, "grad_norm": 9.871101865474504, "learning_rate": 4.593246145744827e-06, "loss": 1.0183, "step": 2947 }, { "epoch": 0.4173568344305231, "grad_norm": 8.462625351038012, "learning_rate": 4.59293269605625e-06, "loss": 1.2207, "step": 2948 }, { "epoch": 0.41749840730516036, "grad_norm": 9.483389939964399, "learning_rate": 4.592619136343762e-06, "loss": 1.0294, "step": 2949 }, { "epoch": 0.41763998017979753, "grad_norm": 10.788715995069369, "learning_rate": 4.592305466623847e-06, "loss": 1.3119, "step": 2950 }, { "epoch": 0.41778155305443476, "grad_norm": 9.9301473419537, "learning_rate": 4.591991686912993e-06, "loss": 1.1032, "step": 2951 }, { "epoch": 0.417923125929072, "grad_norm": 9.94277142517524, "learning_rate": 4.591677797227696e-06, "loss": 1.1042, "step": 2952 }, { "epoch": 0.4180646988037092, "grad_norm": 8.064440116770205, "learning_rate": 4.591363797584457e-06, "loss": 1.117, "step": 2953 }, { "epoch": 0.41820627167834645, "grad_norm": 9.98941395244392, "learning_rate": 4.591049687999782e-06, "loss": 1.1881, "step": 2954 }, { "epoch": 0.4183478445529836, "grad_norm": 8.324905202514884, "learning_rate": 4.590735468490184e-06, "loss": 1.2599, "step": 2955 }, { "epoch": 0.41848941742762086, "grad_norm": 8.298531864342971, "learning_rate": 4.590421139072182e-06, "loss": 1.0672, "step": 2956 }, { "epoch": 0.4186309903022581, "grad_norm": 8.442804322274604, "learning_rate": 4.590106699762299e-06, "loss": 1.1527, "step": 2957 }, { "epoch": 0.4187725631768953, "grad_norm": 8.360798195876274, "learning_rate": 4.589792150577065e-06, "loss": 1.2362, "step": 2958 }, { "epoch": 0.41891413605153255, "grad_norm": 10.250632289830154, "learning_rate": 4.589477491533016e-06, "loss": 1.3016, "step": 2959 }, { "epoch": 0.4190557089261697, "grad_norm": 8.798903344919877, "learning_rate": 4.589162722646694e-06, "loss": 1.1871, "step": 2960 }, { "epoch": 0.41919728180080695, "grad_norm": 9.978404186535204, "learning_rate": 4.588847843934645e-06, "loss": 1.2035, "step": 2961 }, { "epoch": 0.4193388546754442, "grad_norm": 7.809460590407943, "learning_rate": 4.588532855413422e-06, "loss": 1.1652, "step": 2962 }, { "epoch": 0.4194804275500814, "grad_norm": 7.285369100861741, "learning_rate": 4.588217757099584e-06, "loss": 1.059, "step": 2963 }, { "epoch": 0.41962200042471864, "grad_norm": 7.855124687501859, "learning_rate": 4.587902549009696e-06, "loss": 1.2737, "step": 2964 }, { "epoch": 0.4197635732993558, "grad_norm": 8.197987602602353, "learning_rate": 4.587587231160329e-06, "loss": 1.1102, "step": 2965 }, { "epoch": 0.41990514617399305, "grad_norm": 8.62233261658216, "learning_rate": 4.5872718035680554e-06, "loss": 1.2349, "step": 2966 }, { "epoch": 0.4200467190486303, "grad_norm": 7.358739740446523, "learning_rate": 4.586956266249461e-06, "loss": 1.0572, "step": 2967 }, { "epoch": 0.4201882919232675, "grad_norm": 9.090386781426712, "learning_rate": 4.586640619221131e-06, "loss": 1.0759, "step": 2968 }, { "epoch": 0.42032986479790474, "grad_norm": 7.038601571253868, "learning_rate": 4.586324862499661e-06, "loss": 1.1482, "step": 2969 }, { "epoch": 0.4204714376725419, "grad_norm": 7.624071408620856, "learning_rate": 4.586008996101646e-06, "loss": 1.1187, "step": 2970 }, { "epoch": 0.42061301054717914, "grad_norm": 8.12149112934703, "learning_rate": 4.5856930200436955e-06, "loss": 1.0374, "step": 2971 }, { "epoch": 0.4207545834218164, "grad_norm": 8.684135389231106, "learning_rate": 4.585376934342418e-06, "loss": 1.0818, "step": 2972 }, { "epoch": 0.4208961562964536, "grad_norm": 9.276622504858988, "learning_rate": 4.585060739014429e-06, "loss": 1.1967, "step": 2973 }, { "epoch": 0.42103772917109084, "grad_norm": 8.455654929144266, "learning_rate": 4.584744434076352e-06, "loss": 1.0923, "step": 2974 }, { "epoch": 0.42117930204572807, "grad_norm": 10.1024136520279, "learning_rate": 4.584428019544815e-06, "loss": 1.0914, "step": 2975 }, { "epoch": 0.42132087492036524, "grad_norm": 8.012504341969377, "learning_rate": 4.58411149543645e-06, "loss": 1.1675, "step": 2976 }, { "epoch": 0.42146244779500247, "grad_norm": 8.290009094895918, "learning_rate": 4.583794861767899e-06, "loss": 1.1822, "step": 2977 }, { "epoch": 0.4216040206696397, "grad_norm": 7.5831586656960495, "learning_rate": 4.583478118555806e-06, "loss": 1.0576, "step": 2978 }, { "epoch": 0.42174559354427693, "grad_norm": 7.765508332325955, "learning_rate": 4.583161265816821e-06, "loss": 0.9219, "step": 2979 }, { "epoch": 0.42188716641891416, "grad_norm": 10.238789079712877, "learning_rate": 4.582844303567602e-06, "loss": 1.2092, "step": 2980 }, { "epoch": 0.42202873929355134, "grad_norm": 8.470031416113013, "learning_rate": 4.58252723182481e-06, "loss": 1.1072, "step": 2981 }, { "epoch": 0.42217031216818857, "grad_norm": 10.095152766303242, "learning_rate": 4.582210050605115e-06, "loss": 1.0575, "step": 2982 }, { "epoch": 0.4223118850428258, "grad_norm": 8.998350204147668, "learning_rate": 4.58189275992519e-06, "loss": 1.1478, "step": 2983 }, { "epoch": 0.422453457917463, "grad_norm": 9.104700790089193, "learning_rate": 4.581575359801715e-06, "loss": 1.1983, "step": 2984 }, { "epoch": 0.42259503079210026, "grad_norm": 9.962799688065184, "learning_rate": 4.581257850251376e-06, "loss": 1.2851, "step": 2985 }, { "epoch": 0.42273660366673743, "grad_norm": 8.845768027679208, "learning_rate": 4.580940231290864e-06, "loss": 1.2591, "step": 2986 }, { "epoch": 0.42287817654137466, "grad_norm": 8.189737028900199, "learning_rate": 4.580622502936875e-06, "loss": 1.0981, "step": 2987 }, { "epoch": 0.4230197494160119, "grad_norm": 10.002748493138883, "learning_rate": 4.580304665206111e-06, "loss": 1.1443, "step": 2988 }, { "epoch": 0.4231613222906491, "grad_norm": 8.739756228192103, "learning_rate": 4.579986718115283e-06, "loss": 1.1506, "step": 2989 }, { "epoch": 0.42330289516528635, "grad_norm": 10.42799904404004, "learning_rate": 4.579668661681105e-06, "loss": 1.1762, "step": 2990 }, { "epoch": 0.4234444680399235, "grad_norm": 10.061232587714192, "learning_rate": 4.579350495920295e-06, "loss": 1.1511, "step": 2991 }, { "epoch": 0.42358604091456076, "grad_norm": 6.772684761644767, "learning_rate": 4.579032220849581e-06, "loss": 1.0852, "step": 2992 }, { "epoch": 0.423727613789198, "grad_norm": 7.74566430667879, "learning_rate": 4.578713836485692e-06, "loss": 1.188, "step": 2993 }, { "epoch": 0.4238691866638352, "grad_norm": 8.624552397239777, "learning_rate": 4.578395342845367e-06, "loss": 1.1144, "step": 2994 }, { "epoch": 0.42401075953847245, "grad_norm": 8.25031441754078, "learning_rate": 4.578076739945349e-06, "loss": 1.1903, "step": 2995 }, { "epoch": 0.4241523324131096, "grad_norm": 8.482074910515566, "learning_rate": 4.577758027802386e-06, "loss": 1.1843, "step": 2996 }, { "epoch": 0.42429390528774685, "grad_norm": 9.937840869743475, "learning_rate": 4.5774392064332325e-06, "loss": 1.1702, "step": 2997 }, { "epoch": 0.4244354781623841, "grad_norm": 8.669415209274543, "learning_rate": 4.577120275854649e-06, "loss": 1.0938, "step": 2998 }, { "epoch": 0.4245770510370213, "grad_norm": 8.003261854855356, "learning_rate": 4.576801236083402e-06, "loss": 1.1623, "step": 2999 }, { "epoch": 0.42471862391165854, "grad_norm": 8.027306683308582, "learning_rate": 4.576482087136262e-06, "loss": 1.1352, "step": 3000 }, { "epoch": 0.4248601967862957, "grad_norm": 8.305180360051688, "learning_rate": 4.576162829030007e-06, "loss": 1.1365, "step": 3001 }, { "epoch": 0.42500176966093295, "grad_norm": 9.128887863695647, "learning_rate": 4.57584346178142e-06, "loss": 1.1947, "step": 3002 }, { "epoch": 0.4251433425355702, "grad_norm": 8.699625678066647, "learning_rate": 4.5755239854072904e-06, "loss": 1.3001, "step": 3003 }, { "epoch": 0.4252849154102074, "grad_norm": 9.619683320668171, "learning_rate": 4.575204399924412e-06, "loss": 1.2823, "step": 3004 }, { "epoch": 0.42542648828484464, "grad_norm": 7.858691570683061, "learning_rate": 4.574884705349586e-06, "loss": 1.1777, "step": 3005 }, { "epoch": 0.42556806115948187, "grad_norm": 9.883742699553473, "learning_rate": 4.574564901699618e-06, "loss": 1.2202, "step": 3006 }, { "epoch": 0.42570963403411904, "grad_norm": 9.394919430649654, "learning_rate": 4.57424498899132e-06, "loss": 1.0648, "step": 3007 }, { "epoch": 0.4258512069087563, "grad_norm": 7.551535231771712, "learning_rate": 4.573924967241509e-06, "loss": 1.1072, "step": 3008 }, { "epoch": 0.4259927797833935, "grad_norm": 7.966895751638807, "learning_rate": 4.57360483646701e-06, "loss": 1.2169, "step": 3009 }, { "epoch": 0.42613435265803074, "grad_norm": 8.271940245333212, "learning_rate": 4.57328459668465e-06, "loss": 1.1478, "step": 3010 }, { "epoch": 0.42627592553266797, "grad_norm": 10.539151541275082, "learning_rate": 4.572964247911265e-06, "loss": 1.3064, "step": 3011 }, { "epoch": 0.42641749840730514, "grad_norm": 11.31904741356736, "learning_rate": 4.572643790163696e-06, "loss": 1.3014, "step": 3012 }, { "epoch": 0.42655907128194237, "grad_norm": 9.83285064374653, "learning_rate": 4.572323223458786e-06, "loss": 1.2949, "step": 3013 }, { "epoch": 0.4267006441565796, "grad_norm": 7.9235510623702226, "learning_rate": 4.572002547813391e-06, "loss": 1.055, "step": 3014 }, { "epoch": 0.42684221703121683, "grad_norm": 8.132073243298722, "learning_rate": 4.571681763244367e-06, "loss": 1.0693, "step": 3015 }, { "epoch": 0.42698378990585406, "grad_norm": 9.093687915917798, "learning_rate": 4.571360869768578e-06, "loss": 1.0811, "step": 3016 }, { "epoch": 0.42712536278049124, "grad_norm": 10.055036157877826, "learning_rate": 4.571039867402891e-06, "loss": 1.2632, "step": 3017 }, { "epoch": 0.42726693565512847, "grad_norm": 10.092638834446525, "learning_rate": 4.570718756164183e-06, "loss": 1.1013, "step": 3018 }, { "epoch": 0.4274085085297657, "grad_norm": 7.715563405912383, "learning_rate": 4.570397536069335e-06, "loss": 1.1175, "step": 3019 }, { "epoch": 0.4275500814044029, "grad_norm": 8.642599090723158, "learning_rate": 4.570076207135231e-06, "loss": 1.1905, "step": 3020 }, { "epoch": 0.42769165427904016, "grad_norm": 13.249325141274886, "learning_rate": 4.569754769378765e-06, "loss": 1.0621, "step": 3021 }, { "epoch": 0.42783322715367733, "grad_norm": 10.430801892126654, "learning_rate": 4.569433222816834e-06, "loss": 1.3083, "step": 3022 }, { "epoch": 0.42797480002831456, "grad_norm": 7.489670570371273, "learning_rate": 4.569111567466341e-06, "loss": 1.0479, "step": 3023 }, { "epoch": 0.4281163729029518, "grad_norm": 8.90595188645808, "learning_rate": 4.568789803344196e-06, "loss": 1.3397, "step": 3024 }, { "epoch": 0.428257945777589, "grad_norm": 9.425920468822676, "learning_rate": 4.568467930467314e-06, "loss": 1.309, "step": 3025 }, { "epoch": 0.42839951865222625, "grad_norm": 10.632904343835518, "learning_rate": 4.568145948852614e-06, "loss": 0.9721, "step": 3026 }, { "epoch": 0.4285410915268634, "grad_norm": 9.309933712044154, "learning_rate": 4.567823858517024e-06, "loss": 1.2353, "step": 3027 }, { "epoch": 0.42868266440150066, "grad_norm": 9.154941296058503, "learning_rate": 4.567501659477477e-06, "loss": 1.0189, "step": 3028 }, { "epoch": 0.4288242372761379, "grad_norm": 10.173033758163125, "learning_rate": 4.567179351750908e-06, "loss": 1.2306, "step": 3029 }, { "epoch": 0.4289658101507751, "grad_norm": 9.035392260076872, "learning_rate": 4.566856935354262e-06, "loss": 1.1979, "step": 3030 }, { "epoch": 0.42910738302541235, "grad_norm": 7.175539896254105, "learning_rate": 4.566534410304488e-06, "loss": 1.1544, "step": 3031 }, { "epoch": 0.4292489559000496, "grad_norm": 11.43589629723415, "learning_rate": 4.566211776618541e-06, "loss": 1.1544, "step": 3032 }, { "epoch": 0.42939052877468675, "grad_norm": 9.091512608002303, "learning_rate": 4.565889034313382e-06, "loss": 1.2238, "step": 3033 }, { "epoch": 0.429532101649324, "grad_norm": 8.759019834821524, "learning_rate": 4.565566183405976e-06, "loss": 1.3164, "step": 3034 }, { "epoch": 0.4296736745239612, "grad_norm": 8.547156627415589, "learning_rate": 4.565243223913297e-06, "loss": 1.1047, "step": 3035 }, { "epoch": 0.42981524739859844, "grad_norm": 9.033367186010162, "learning_rate": 4.564920155852321e-06, "loss": 1.2139, "step": 3036 }, { "epoch": 0.4299568202732357, "grad_norm": 10.338102983188726, "learning_rate": 4.564596979240031e-06, "loss": 1.0832, "step": 3037 }, { "epoch": 0.43009839314787285, "grad_norm": 10.126974784511553, "learning_rate": 4.564273694093419e-06, "loss": 1.1612, "step": 3038 }, { "epoch": 0.4302399660225101, "grad_norm": 10.341058571084755, "learning_rate": 4.5639503004294774e-06, "loss": 1.2097, "step": 3039 }, { "epoch": 0.4303815388971473, "grad_norm": 9.178658935061886, "learning_rate": 4.5636267982652075e-06, "loss": 1.0992, "step": 3040 }, { "epoch": 0.43052311177178454, "grad_norm": 9.081745207189023, "learning_rate": 4.5633031876176156e-06, "loss": 1.1454, "step": 3041 }, { "epoch": 0.43066468464642177, "grad_norm": 10.117515152150942, "learning_rate": 4.562979468503713e-06, "loss": 1.1002, "step": 3042 }, { "epoch": 0.43080625752105894, "grad_norm": 9.984625632613248, "learning_rate": 4.562655640940519e-06, "loss": 1.1267, "step": 3043 }, { "epoch": 0.4309478303956962, "grad_norm": 9.466696740044478, "learning_rate": 4.562331704945055e-06, "loss": 1.2362, "step": 3044 }, { "epoch": 0.4310894032703334, "grad_norm": 10.066939802022214, "learning_rate": 4.562007660534351e-06, "loss": 1.2333, "step": 3045 }, { "epoch": 0.43123097614497063, "grad_norm": 10.824737855763349, "learning_rate": 4.5616835077254425e-06, "loss": 1.3215, "step": 3046 }, { "epoch": 0.43137254901960786, "grad_norm": 9.660554290727918, "learning_rate": 4.561359246535369e-06, "loss": 1.2297, "step": 3047 }, { "epoch": 0.43151412189424504, "grad_norm": 9.546675210163976, "learning_rate": 4.561034876981177e-06, "loss": 1.2262, "step": 3048 }, { "epoch": 0.43165569476888227, "grad_norm": 7.518658380750887, "learning_rate": 4.560710399079918e-06, "loss": 0.9017, "step": 3049 }, { "epoch": 0.4317972676435195, "grad_norm": 10.652125301436328, "learning_rate": 4.56038581284865e-06, "loss": 1.15, "step": 3050 }, { "epoch": 0.43193884051815673, "grad_norm": 9.565457553637945, "learning_rate": 4.560061118304436e-06, "loss": 1.0438, "step": 3051 }, { "epoch": 0.43208041339279396, "grad_norm": 9.119052568714393, "learning_rate": 4.559736315464345e-06, "loss": 1.1718, "step": 3052 }, { "epoch": 0.43222198626743114, "grad_norm": 8.36164223415776, "learning_rate": 4.559411404345452e-06, "loss": 1.0965, "step": 3053 }, { "epoch": 0.43236355914206837, "grad_norm": 9.901996264859772, "learning_rate": 4.5590863849648364e-06, "loss": 1.3099, "step": 3054 }, { "epoch": 0.4325051320167056, "grad_norm": 9.10784218718356, "learning_rate": 4.5587612573395855e-06, "loss": 1.0407, "step": 3055 }, { "epoch": 0.4326467048913428, "grad_norm": 7.652995262432638, "learning_rate": 4.55843602148679e-06, "loss": 1.1275, "step": 3056 }, { "epoch": 0.43278827776598006, "grad_norm": 9.332398731122677, "learning_rate": 4.558110677423548e-06, "loss": 1.1084, "step": 3057 }, { "epoch": 0.43292985064061723, "grad_norm": 8.201933042121315, "learning_rate": 4.557785225166962e-06, "loss": 0.9501, "step": 3058 }, { "epoch": 0.43307142351525446, "grad_norm": 9.651035122870038, "learning_rate": 4.5574596647341414e-06, "loss": 1.1493, "step": 3059 }, { "epoch": 0.4332129963898917, "grad_norm": 10.858201656484862, "learning_rate": 4.5571339961422e-06, "loss": 1.3044, "step": 3060 }, { "epoch": 0.4333545692645289, "grad_norm": 11.21182385705645, "learning_rate": 4.5568082194082584e-06, "loss": 1.1879, "step": 3061 }, { "epoch": 0.43349614213916615, "grad_norm": 9.70477605216402, "learning_rate": 4.556482334549442e-06, "loss": 1.0551, "step": 3062 }, { "epoch": 0.4336377150138034, "grad_norm": 10.354188935673571, "learning_rate": 4.556156341582884e-06, "loss": 1.1677, "step": 3063 }, { "epoch": 0.43377928788844056, "grad_norm": 7.707814737006821, "learning_rate": 4.555830240525719e-06, "loss": 1.0944, "step": 3064 }, { "epoch": 0.4339208607630778, "grad_norm": 9.700289251989634, "learning_rate": 4.5555040313950915e-06, "loss": 1.1476, "step": 3065 }, { "epoch": 0.434062433637715, "grad_norm": 9.463659548545756, "learning_rate": 4.555177714208149e-06, "loss": 1.1416, "step": 3066 }, { "epoch": 0.43420400651235225, "grad_norm": 11.763258575723647, "learning_rate": 4.554851288982047e-06, "loss": 1.0389, "step": 3067 }, { "epoch": 0.4343455793869895, "grad_norm": 12.147961455705104, "learning_rate": 4.554524755733946e-06, "loss": 1.2952, "step": 3068 }, { "epoch": 0.43448715226162665, "grad_norm": 8.340930933967433, "learning_rate": 4.554198114481009e-06, "loss": 0.9803, "step": 3069 }, { "epoch": 0.4346287251362639, "grad_norm": 8.770781334898812, "learning_rate": 4.553871365240409e-06, "loss": 1.1384, "step": 3070 }, { "epoch": 0.4347702980109011, "grad_norm": 10.187182836919735, "learning_rate": 4.553544508029323e-06, "loss": 1.1702, "step": 3071 }, { "epoch": 0.43491187088553834, "grad_norm": 10.81358834671123, "learning_rate": 4.5532175428649335e-06, "loss": 1.227, "step": 3072 }, { "epoch": 0.4350534437601756, "grad_norm": 11.022913908875479, "learning_rate": 4.5528904697644296e-06, "loss": 1.1639, "step": 3073 }, { "epoch": 0.43519501663481275, "grad_norm": 11.658895320221836, "learning_rate": 4.552563288745004e-06, "loss": 1.1006, "step": 3074 }, { "epoch": 0.43533658950945, "grad_norm": 7.463873256674393, "learning_rate": 4.552235999823856e-06, "loss": 1.0639, "step": 3075 }, { "epoch": 0.4354781623840872, "grad_norm": 10.831477275209458, "learning_rate": 4.551908603018191e-06, "loss": 1.1465, "step": 3076 }, { "epoch": 0.43561973525872444, "grad_norm": 9.035959250146744, "learning_rate": 4.551581098345222e-06, "loss": 1.1104, "step": 3077 }, { "epoch": 0.43576130813336167, "grad_norm": 11.985489655423905, "learning_rate": 4.551253485822164e-06, "loss": 1.2406, "step": 3078 }, { "epoch": 0.43590288100799884, "grad_norm": 10.042934564556754, "learning_rate": 4.55092576546624e-06, "loss": 1.1364, "step": 3079 }, { "epoch": 0.4360444538826361, "grad_norm": 8.541029247287902, "learning_rate": 4.550597937294677e-06, "loss": 1.2247, "step": 3080 }, { "epoch": 0.4361860267572733, "grad_norm": 8.849984603264987, "learning_rate": 4.55027000132471e-06, "loss": 1.0864, "step": 3081 }, { "epoch": 0.43632759963191053, "grad_norm": 14.38738586221424, "learning_rate": 4.549941957573578e-06, "loss": 1.1347, "step": 3082 }, { "epoch": 0.43646917250654776, "grad_norm": 9.53185512935605, "learning_rate": 4.549613806058526e-06, "loss": 1.0988, "step": 3083 }, { "epoch": 0.43661074538118494, "grad_norm": 9.194589383870728, "learning_rate": 4.5492855467968036e-06, "loss": 1.1863, "step": 3084 }, { "epoch": 0.43675231825582217, "grad_norm": 9.028158430893884, "learning_rate": 4.548957179805668e-06, "loss": 1.1157, "step": 3085 }, { "epoch": 0.4368938911304594, "grad_norm": 9.982289462106047, "learning_rate": 4.548628705102382e-06, "loss": 1.3296, "step": 3086 }, { "epoch": 0.43703546400509663, "grad_norm": 7.563940998591659, "learning_rate": 4.5483001227042126e-06, "loss": 1.147, "step": 3087 }, { "epoch": 0.43717703687973386, "grad_norm": 10.553664264450441, "learning_rate": 4.5479714326284316e-06, "loss": 1.0904, "step": 3088 }, { "epoch": 0.43731860975437103, "grad_norm": 7.809884327753557, "learning_rate": 4.547642634892321e-06, "loss": 1.0347, "step": 3089 }, { "epoch": 0.43746018262900827, "grad_norm": 8.784795839372315, "learning_rate": 4.547313729513163e-06, "loss": 1.0787, "step": 3090 }, { "epoch": 0.4376017555036455, "grad_norm": 8.593857865090108, "learning_rate": 4.546984716508249e-06, "loss": 1.3289, "step": 3091 }, { "epoch": 0.4377433283782827, "grad_norm": 9.418620039176801, "learning_rate": 4.546655595894875e-06, "loss": 1.1037, "step": 3092 }, { "epoch": 0.43788490125291996, "grad_norm": 10.442403954741643, "learning_rate": 4.546326367690342e-06, "loss": 1.2573, "step": 3093 }, { "epoch": 0.4380264741275572, "grad_norm": 8.860150446249532, "learning_rate": 4.545997031911958e-06, "loss": 1.1112, "step": 3094 }, { "epoch": 0.43816804700219436, "grad_norm": 8.299249399095011, "learning_rate": 4.545667588577035e-06, "loss": 1.0518, "step": 3095 }, { "epoch": 0.4383096198768316, "grad_norm": 8.997603203260317, "learning_rate": 4.545338037702893e-06, "loss": 1.3171, "step": 3096 }, { "epoch": 0.4384511927514688, "grad_norm": 8.846746469505224, "learning_rate": 4.545008379306854e-06, "loss": 1.2208, "step": 3097 }, { "epoch": 0.43859276562610605, "grad_norm": 10.558992966116422, "learning_rate": 4.5446786134062515e-06, "loss": 1.1858, "step": 3098 }, { "epoch": 0.4387343385007433, "grad_norm": 13.333747348715402, "learning_rate": 4.544348740018417e-06, "loss": 1.2241, "step": 3099 }, { "epoch": 0.43887591137538046, "grad_norm": 9.48305365267907, "learning_rate": 4.544018759160694e-06, "loss": 1.2435, "step": 3100 }, { "epoch": 0.4390174842500177, "grad_norm": 9.165176362933332, "learning_rate": 4.5436886708504295e-06, "loss": 1.179, "step": 3101 }, { "epoch": 0.4391590571246549, "grad_norm": 10.921611296999812, "learning_rate": 4.543358475104975e-06, "loss": 1.2185, "step": 3102 }, { "epoch": 0.43930062999929215, "grad_norm": 9.582663825048165, "learning_rate": 4.543028171941689e-06, "loss": 1.112, "step": 3103 }, { "epoch": 0.4394422028739294, "grad_norm": 11.641493387282098, "learning_rate": 4.5426977613779355e-06, "loss": 1.1861, "step": 3104 }, { "epoch": 0.43958377574856655, "grad_norm": 10.06083599098019, "learning_rate": 4.542367243431084e-06, "loss": 1.3184, "step": 3105 }, { "epoch": 0.4397253486232038, "grad_norm": 8.024425887727839, "learning_rate": 4.54203661811851e-06, "loss": 1.3247, "step": 3106 }, { "epoch": 0.439866921497841, "grad_norm": 9.799622072990239, "learning_rate": 4.541705885457593e-06, "loss": 1.173, "step": 3107 }, { "epoch": 0.44000849437247824, "grad_norm": 9.833939183423968, "learning_rate": 4.541375045465719e-06, "loss": 1.1452, "step": 3108 }, { "epoch": 0.4401500672471155, "grad_norm": 8.8608685669213, "learning_rate": 4.541044098160281e-06, "loss": 1.1697, "step": 3109 }, { "epoch": 0.44029164012175265, "grad_norm": 8.430329617428733, "learning_rate": 4.540713043558678e-06, "loss": 1.2084, "step": 3110 }, { "epoch": 0.4404332129963899, "grad_norm": 7.71400880568904, "learning_rate": 4.54038188167831e-06, "loss": 1.0877, "step": 3111 }, { "epoch": 0.4405747858710271, "grad_norm": 18.184945153192224, "learning_rate": 4.54005061253659e-06, "loss": 1.2248, "step": 3112 }, { "epoch": 0.44071635874566434, "grad_norm": 8.840349062418868, "learning_rate": 4.539719236150929e-06, "loss": 1.0537, "step": 3113 }, { "epoch": 0.44085793162030157, "grad_norm": 8.560562248660048, "learning_rate": 4.53938775253875e-06, "loss": 1.2167, "step": 3114 }, { "epoch": 0.44099950449493874, "grad_norm": 10.259631957950424, "learning_rate": 4.539056161717477e-06, "loss": 1.0788, "step": 3115 }, { "epoch": 0.441141077369576, "grad_norm": 9.739361950539331, "learning_rate": 4.5387244637045414e-06, "loss": 1.1567, "step": 3116 }, { "epoch": 0.4412826502442132, "grad_norm": 8.088398346098922, "learning_rate": 4.53839265851738e-06, "loss": 1.0937, "step": 3117 }, { "epoch": 0.44142422311885043, "grad_norm": 11.787726041501482, "learning_rate": 4.538060746173438e-06, "loss": 1.0576, "step": 3118 }, { "epoch": 0.44156579599348766, "grad_norm": 9.169958691020344, "learning_rate": 4.537728726690162e-06, "loss": 1.2671, "step": 3119 }, { "epoch": 0.4417073688681249, "grad_norm": 9.030974598417695, "learning_rate": 4.537396600085006e-06, "loss": 1.0907, "step": 3120 }, { "epoch": 0.44184894174276207, "grad_norm": 9.346812687203482, "learning_rate": 4.537064366375429e-06, "loss": 1.23, "step": 3121 }, { "epoch": 0.4419905146173993, "grad_norm": 8.904148857826009, "learning_rate": 4.5367320255788985e-06, "loss": 1.1208, "step": 3122 }, { "epoch": 0.44213208749203653, "grad_norm": 10.855796608601517, "learning_rate": 4.536399577712883e-06, "loss": 1.314, "step": 3123 }, { "epoch": 0.44227366036667376, "grad_norm": 8.624374145799699, "learning_rate": 4.536067022794861e-06, "loss": 1.0674, "step": 3124 }, { "epoch": 0.442415233241311, "grad_norm": 9.845175987983913, "learning_rate": 4.535734360842313e-06, "loss": 1.3097, "step": 3125 }, { "epoch": 0.44255680611594816, "grad_norm": 11.711981769443351, "learning_rate": 4.535401591872729e-06, "loss": 1.2274, "step": 3126 }, { "epoch": 0.4426983789905854, "grad_norm": 8.847896399196703, "learning_rate": 4.5350687159036e-06, "loss": 1.0753, "step": 3127 }, { "epoch": 0.4428399518652226, "grad_norm": 10.331196430711277, "learning_rate": 4.5347357329524254e-06, "loss": 1.2787, "step": 3128 }, { "epoch": 0.44298152473985986, "grad_norm": 11.016433577926001, "learning_rate": 4.534402643036711e-06, "loss": 1.0039, "step": 3129 }, { "epoch": 0.4431230976144971, "grad_norm": 12.951685403265332, "learning_rate": 4.534069446173967e-06, "loss": 1.2924, "step": 3130 }, { "epoch": 0.44326467048913426, "grad_norm": 9.944365810696684, "learning_rate": 4.533736142381708e-06, "loss": 1.0408, "step": 3131 }, { "epoch": 0.4434062433637715, "grad_norm": 9.805486614596107, "learning_rate": 4.533402731677457e-06, "loss": 1.1582, "step": 3132 }, { "epoch": 0.4435478162384087, "grad_norm": 9.10648715012172, "learning_rate": 4.53306921407874e-06, "loss": 1.1836, "step": 3133 }, { "epoch": 0.44368938911304595, "grad_norm": 8.398524254639128, "learning_rate": 4.532735589603091e-06, "loss": 1.0296, "step": 3134 }, { "epoch": 0.4438309619876832, "grad_norm": 8.538469662203132, "learning_rate": 4.5324018582680476e-06, "loss": 1.1238, "step": 3135 }, { "epoch": 0.44397253486232036, "grad_norm": 15.937457395010455, "learning_rate": 4.532068020091154e-06, "loss": 1.2434, "step": 3136 }, { "epoch": 0.4441141077369576, "grad_norm": 7.0706367544776, "learning_rate": 4.531734075089959e-06, "loss": 1.0311, "step": 3137 }, { "epoch": 0.4442556806115948, "grad_norm": 8.21959748630759, "learning_rate": 4.53140002328202e-06, "loss": 1.1919, "step": 3138 }, { "epoch": 0.44439725348623205, "grad_norm": 8.76112617465041, "learning_rate": 4.531065864684896e-06, "loss": 1.1062, "step": 3139 }, { "epoch": 0.4445388263608693, "grad_norm": 12.569613895480057, "learning_rate": 4.530731599316153e-06, "loss": 1.1082, "step": 3140 }, { "epoch": 0.44468039923550645, "grad_norm": 9.77364670539482, "learning_rate": 4.530397227193365e-06, "loss": 1.0727, "step": 3141 }, { "epoch": 0.4448219721101437, "grad_norm": 9.623134283586685, "learning_rate": 4.530062748334109e-06, "loss": 1.2862, "step": 3142 }, { "epoch": 0.4449635449847809, "grad_norm": 8.92435710739286, "learning_rate": 4.529728162755966e-06, "loss": 1.0832, "step": 3143 }, { "epoch": 0.44510511785941814, "grad_norm": 8.180556286687843, "learning_rate": 4.5293934704765285e-06, "loss": 0.9974, "step": 3144 }, { "epoch": 0.4452466907340554, "grad_norm": 10.172181014982112, "learning_rate": 4.529058671513389e-06, "loss": 1.0579, "step": 3145 }, { "epoch": 0.44538826360869255, "grad_norm": 10.652600152763101, "learning_rate": 4.528723765884149e-06, "loss": 1.1891, "step": 3146 }, { "epoch": 0.4455298364833298, "grad_norm": 8.419297313953356, "learning_rate": 4.528388753606412e-06, "loss": 1.2474, "step": 3147 }, { "epoch": 0.445671409357967, "grad_norm": 9.09507799447514, "learning_rate": 4.528053634697791e-06, "loss": 1.1658, "step": 3148 }, { "epoch": 0.44581298223260424, "grad_norm": 9.034833256629458, "learning_rate": 4.527718409175903e-06, "loss": 1.0745, "step": 3149 }, { "epoch": 0.44595455510724147, "grad_norm": 9.428313173332418, "learning_rate": 4.52738307705837e-06, "loss": 1.1805, "step": 3150 }, { "epoch": 0.4460961279818787, "grad_norm": 9.718545500636873, "learning_rate": 4.52704763836282e-06, "loss": 1.1943, "step": 3151 }, { "epoch": 0.4462377008565159, "grad_norm": 8.313556345885372, "learning_rate": 4.526712093106888e-06, "loss": 1.138, "step": 3152 }, { "epoch": 0.4463792737311531, "grad_norm": 7.809390005987984, "learning_rate": 4.5263764413082115e-06, "loss": 1.1318, "step": 3153 }, { "epoch": 0.44652084660579033, "grad_norm": 8.70720123768566, "learning_rate": 4.5260406829844364e-06, "loss": 1.1986, "step": 3154 }, { "epoch": 0.44666241948042756, "grad_norm": 9.15096070295952, "learning_rate": 4.525704818153214e-06, "loss": 1.2349, "step": 3155 }, { "epoch": 0.4468039923550648, "grad_norm": 8.742297460769572, "learning_rate": 4.525368846832199e-06, "loss": 1.2559, "step": 3156 }, { "epoch": 0.44694556522970197, "grad_norm": 9.418170055008488, "learning_rate": 4.525032769039054e-06, "loss": 1.277, "step": 3157 }, { "epoch": 0.4470871381043392, "grad_norm": 10.261738452231752, "learning_rate": 4.524696584791447e-06, "loss": 1.1187, "step": 3158 }, { "epoch": 0.44722871097897643, "grad_norm": 8.530663774072304, "learning_rate": 4.524360294107049e-06, "loss": 1.2363, "step": 3159 }, { "epoch": 0.44737028385361366, "grad_norm": 8.927162861479065, "learning_rate": 4.5240238970035414e-06, "loss": 1.2701, "step": 3160 }, { "epoch": 0.4475118567282509, "grad_norm": 7.861228891066229, "learning_rate": 4.523687393498605e-06, "loss": 1.067, "step": 3161 }, { "epoch": 0.44765342960288806, "grad_norm": 8.880230289256826, "learning_rate": 4.523350783609932e-06, "loss": 1.2519, "step": 3162 }, { "epoch": 0.4477950024775253, "grad_norm": 11.584686273468414, "learning_rate": 4.523014067355217e-06, "loss": 1.2617, "step": 3163 }, { "epoch": 0.4479365753521625, "grad_norm": 9.269774461490124, "learning_rate": 4.52267724475216e-06, "loss": 1.305, "step": 3164 }, { "epoch": 0.44807814822679976, "grad_norm": 8.28237941885607, "learning_rate": 4.52234031581847e-06, "loss": 1.1306, "step": 3165 }, { "epoch": 0.448219721101437, "grad_norm": 9.459096286827647, "learning_rate": 4.5220032805718575e-06, "loss": 1.1493, "step": 3166 }, { "epoch": 0.44836129397607416, "grad_norm": 9.971612692658235, "learning_rate": 4.521666139030039e-06, "loss": 1.1331, "step": 3167 }, { "epoch": 0.4485028668507114, "grad_norm": 10.72279179388374, "learning_rate": 4.52132889121074e-06, "loss": 1.1328, "step": 3168 }, { "epoch": 0.4486444397253486, "grad_norm": 8.135807772005638, "learning_rate": 4.520991537131687e-06, "loss": 1.2918, "step": 3169 }, { "epoch": 0.44878601259998585, "grad_norm": 7.872306968767036, "learning_rate": 4.520654076810617e-06, "loss": 0.9967, "step": 3170 }, { "epoch": 0.4489275854746231, "grad_norm": 8.841872592245378, "learning_rate": 4.520316510265268e-06, "loss": 1.1471, "step": 3171 }, { "epoch": 0.44906915834926026, "grad_norm": 12.053388722160253, "learning_rate": 4.519978837513388e-06, "loss": 1.1895, "step": 3172 }, { "epoch": 0.4492107312238975, "grad_norm": 8.50379858969517, "learning_rate": 4.519641058572725e-06, "loss": 1.1853, "step": 3173 }, { "epoch": 0.4493523040985347, "grad_norm": 7.71996788887896, "learning_rate": 4.519303173461038e-06, "loss": 1.2024, "step": 3174 }, { "epoch": 0.44949387697317195, "grad_norm": 9.973170337807034, "learning_rate": 4.5189651821960885e-06, "loss": 1.1141, "step": 3175 }, { "epoch": 0.4496354498478092, "grad_norm": 7.996821725838569, "learning_rate": 4.518627084795646e-06, "loss": 1.1983, "step": 3176 }, { "epoch": 0.4497770227224464, "grad_norm": 8.618057331580601, "learning_rate": 4.5182888812774814e-06, "loss": 1.1283, "step": 3177 }, { "epoch": 0.4499185955970836, "grad_norm": 9.166032480207708, "learning_rate": 4.517950571659376e-06, "loss": 1.2122, "step": 3178 }, { "epoch": 0.4500601684717208, "grad_norm": 9.594751560479521, "learning_rate": 4.517612155959114e-06, "loss": 1.0646, "step": 3179 }, { "epoch": 0.45020174134635804, "grad_norm": 8.121368535665024, "learning_rate": 4.5172736341944845e-06, "loss": 1.1884, "step": 3180 }, { "epoch": 0.45034331422099527, "grad_norm": 8.72849224340961, "learning_rate": 4.516935006383285e-06, "loss": 1.2242, "step": 3181 }, { "epoch": 0.4504848870956325, "grad_norm": 10.631735069516118, "learning_rate": 4.516596272543316e-06, "loss": 1.1331, "step": 3182 }, { "epoch": 0.4506264599702697, "grad_norm": 8.964240771524562, "learning_rate": 4.516257432692383e-06, "loss": 1.0928, "step": 3183 }, { "epoch": 0.4507680328449069, "grad_norm": 8.826892111781323, "learning_rate": 4.515918486848302e-06, "loss": 1.0681, "step": 3184 }, { "epoch": 0.45090960571954414, "grad_norm": 8.700176403844225, "learning_rate": 4.5155794350288885e-06, "loss": 1.3531, "step": 3185 }, { "epoch": 0.45105117859418137, "grad_norm": 7.8108562723947115, "learning_rate": 4.515240277251968e-06, "loss": 1.1038, "step": 3186 }, { "epoch": 0.4511927514688186, "grad_norm": 7.794343158876206, "learning_rate": 4.514901013535368e-06, "loss": 1.0238, "step": 3187 }, { "epoch": 0.4513343243434558, "grad_norm": 7.898812757684349, "learning_rate": 4.514561643896924e-06, "loss": 1.1264, "step": 3188 }, { "epoch": 0.451475897218093, "grad_norm": 9.848295391545719, "learning_rate": 4.514222168354476e-06, "loss": 1.1548, "step": 3189 }, { "epoch": 0.45161747009273023, "grad_norm": 9.21879303485718, "learning_rate": 4.513882586925872e-06, "loss": 1.2331, "step": 3190 }, { "epoch": 0.45175904296736746, "grad_norm": 8.775126538356023, "learning_rate": 4.51354289962896e-06, "loss": 1.0728, "step": 3191 }, { "epoch": 0.4519006158420047, "grad_norm": 10.018607185093106, "learning_rate": 4.5132031064816e-06, "loss": 1.1768, "step": 3192 }, { "epoch": 0.45204218871664187, "grad_norm": 7.672085077396786, "learning_rate": 4.512863207501654e-06, "loss": 1.1108, "step": 3193 }, { "epoch": 0.4521837615912791, "grad_norm": 8.121523069159542, "learning_rate": 4.51252320270699e-06, "loss": 1.104, "step": 3194 }, { "epoch": 0.45232533446591633, "grad_norm": 7.95440819361776, "learning_rate": 4.512183092115482e-06, "loss": 1.1845, "step": 3195 }, { "epoch": 0.45246690734055356, "grad_norm": 8.62608131596433, "learning_rate": 4.511842875745009e-06, "loss": 1.0469, "step": 3196 }, { "epoch": 0.4526084802151908, "grad_norm": 12.846998857269034, "learning_rate": 4.511502553613456e-06, "loss": 1.217, "step": 3197 }, { "epoch": 0.45275005308982796, "grad_norm": 8.24061374193391, "learning_rate": 4.511162125738714e-06, "loss": 1.1742, "step": 3198 }, { "epoch": 0.4528916259644652, "grad_norm": 9.305951369741628, "learning_rate": 4.510821592138678e-06, "loss": 1.272, "step": 3199 }, { "epoch": 0.4530331988391024, "grad_norm": 7.895860683692392, "learning_rate": 4.510480952831251e-06, "loss": 1.1345, "step": 3200 }, { "epoch": 0.45317477171373965, "grad_norm": 9.415343695235668, "learning_rate": 4.510140207834339e-06, "loss": 1.1149, "step": 3201 }, { "epoch": 0.4533163445883769, "grad_norm": 10.25561816066793, "learning_rate": 4.509799357165855e-06, "loss": 1.3144, "step": 3202 }, { "epoch": 0.45345791746301406, "grad_norm": 11.103814687870047, "learning_rate": 4.509458400843717e-06, "loss": 1.2347, "step": 3203 }, { "epoch": 0.4535994903376513, "grad_norm": 9.578185537125169, "learning_rate": 4.50911733888585e-06, "loss": 1.0541, "step": 3204 }, { "epoch": 0.4537410632122885, "grad_norm": 8.073316784859331, "learning_rate": 4.508776171310183e-06, "loss": 1.0701, "step": 3205 }, { "epoch": 0.45388263608692575, "grad_norm": 12.408562048415401, "learning_rate": 4.5084348981346495e-06, "loss": 1.4936, "step": 3206 }, { "epoch": 0.454024208961563, "grad_norm": 7.9748449615427965, "learning_rate": 4.5080935193771905e-06, "loss": 1.1613, "step": 3207 }, { "epoch": 0.4541657818362002, "grad_norm": 7.861728687388134, "learning_rate": 4.5077520350557534e-06, "loss": 1.2243, "step": 3208 }, { "epoch": 0.4543073547108374, "grad_norm": 9.232603054798819, "learning_rate": 4.5074104451882886e-06, "loss": 1.153, "step": 3209 }, { "epoch": 0.4544489275854746, "grad_norm": 9.038251756396903, "learning_rate": 4.507068749792754e-06, "loss": 1.288, "step": 3210 }, { "epoch": 0.45459050046011185, "grad_norm": 8.552249313645733, "learning_rate": 4.50672694888711e-06, "loss": 1.1626, "step": 3211 }, { "epoch": 0.4547320733347491, "grad_norm": 8.561039041222276, "learning_rate": 4.506385042489329e-06, "loss": 1.1209, "step": 3212 }, { "epoch": 0.4548736462093863, "grad_norm": 9.224379653541792, "learning_rate": 4.5060430306173805e-06, "loss": 1.1328, "step": 3213 }, { "epoch": 0.4550152190840235, "grad_norm": 10.211032764833913, "learning_rate": 4.505700913289246e-06, "loss": 1.2404, "step": 3214 }, { "epoch": 0.4551567919586607, "grad_norm": 9.619730510148953, "learning_rate": 4.505358690522911e-06, "loss": 1.281, "step": 3215 }, { "epoch": 0.45529836483329794, "grad_norm": 10.838405380357612, "learning_rate": 4.505016362336364e-06, "loss": 1.0646, "step": 3216 }, { "epoch": 0.45543993770793517, "grad_norm": 8.756537910427717, "learning_rate": 4.504673928747601e-06, "loss": 1.1582, "step": 3217 }, { "epoch": 0.4555815105825724, "grad_norm": 9.34735833859793, "learning_rate": 4.504331389774626e-06, "loss": 1.221, "step": 3218 }, { "epoch": 0.4557230834572096, "grad_norm": 9.941090828738785, "learning_rate": 4.503988745435443e-06, "loss": 1.206, "step": 3219 }, { "epoch": 0.4558646563318468, "grad_norm": 11.077535465625353, "learning_rate": 4.503645995748067e-06, "loss": 1.206, "step": 3220 }, { "epoch": 0.45600622920648404, "grad_norm": 11.17167558825527, "learning_rate": 4.503303140730515e-06, "loss": 1.1947, "step": 3221 }, { "epoch": 0.45614780208112127, "grad_norm": 8.655485814925862, "learning_rate": 4.502960180400809e-06, "loss": 1.2664, "step": 3222 }, { "epoch": 0.4562893749557585, "grad_norm": 9.796782716745385, "learning_rate": 4.5026171147769816e-06, "loss": 1.1554, "step": 3223 }, { "epoch": 0.4564309478303957, "grad_norm": 8.561013197072484, "learning_rate": 4.5022739438770655e-06, "loss": 1.0516, "step": 3224 }, { "epoch": 0.4565725207050329, "grad_norm": 6.639852034517276, "learning_rate": 4.5019306677191e-06, "loss": 1.067, "step": 3225 }, { "epoch": 0.45671409357967013, "grad_norm": 9.852966450565868, "learning_rate": 4.501587286321133e-06, "loss": 1.0409, "step": 3226 }, { "epoch": 0.45685566645430736, "grad_norm": 8.92885400815389, "learning_rate": 4.501243799701215e-06, "loss": 1.2186, "step": 3227 }, { "epoch": 0.4569972393289446, "grad_norm": 11.401167398209369, "learning_rate": 4.500900207877402e-06, "loss": 1.3436, "step": 3228 }, { "epoch": 0.45713881220358177, "grad_norm": 10.303149984212011, "learning_rate": 4.500556510867756e-06, "loss": 1.222, "step": 3229 }, { "epoch": 0.457280385078219, "grad_norm": 10.627661876464137, "learning_rate": 4.500212708690348e-06, "loss": 1.3188, "step": 3230 }, { "epoch": 0.45742195795285623, "grad_norm": 10.685286443259802, "learning_rate": 4.499868801363248e-06, "loss": 1.1843, "step": 3231 }, { "epoch": 0.45756353082749346, "grad_norm": 7.813691559522523, "learning_rate": 4.499524788904537e-06, "loss": 1.1301, "step": 3232 }, { "epoch": 0.4577051037021307, "grad_norm": 8.99207338315404, "learning_rate": 4.4991806713322986e-06, "loss": 1.1598, "step": 3233 }, { "epoch": 0.45784667657676786, "grad_norm": 8.409031290878668, "learning_rate": 4.498836448664622e-06, "loss": 1.1036, "step": 3234 }, { "epoch": 0.4579882494514051, "grad_norm": 8.239411928987826, "learning_rate": 4.498492120919604e-06, "loss": 1.0543, "step": 3235 }, { "epoch": 0.4581298223260423, "grad_norm": 7.677724189528349, "learning_rate": 4.498147688115346e-06, "loss": 0.9732, "step": 3236 }, { "epoch": 0.45827139520067955, "grad_norm": 7.997530079075831, "learning_rate": 4.497803150269954e-06, "loss": 1.1118, "step": 3237 }, { "epoch": 0.4584129680753168, "grad_norm": 9.137477339313342, "learning_rate": 4.4974585074015394e-06, "loss": 1.1061, "step": 3238 }, { "epoch": 0.458554540949954, "grad_norm": 10.133946363410672, "learning_rate": 4.497113759528221e-06, "loss": 1.0815, "step": 3239 }, { "epoch": 0.4586961138245912, "grad_norm": 11.405581770879843, "learning_rate": 4.4967689066681205e-06, "loss": 1.2258, "step": 3240 }, { "epoch": 0.4588376866992284, "grad_norm": 8.409548881488039, "learning_rate": 4.496423948839369e-06, "loss": 1.1309, "step": 3241 }, { "epoch": 0.45897925957386565, "grad_norm": 9.080001873402676, "learning_rate": 4.496078886060098e-06, "loss": 1.1315, "step": 3242 }, { "epoch": 0.4591208324485029, "grad_norm": 7.746957243143892, "learning_rate": 4.495733718348449e-06, "loss": 1.1526, "step": 3243 }, { "epoch": 0.4592624053231401, "grad_norm": 7.415906059896456, "learning_rate": 4.4953884457225645e-06, "loss": 1.0907, "step": 3244 }, { "epoch": 0.4594039781977773, "grad_norm": 8.040093091738933, "learning_rate": 4.4950430682005995e-06, "loss": 1.1811, "step": 3245 }, { "epoch": 0.4595455510724145, "grad_norm": 10.310700970564248, "learning_rate": 4.4946975858007066e-06, "loss": 1.3556, "step": 3246 }, { "epoch": 0.45968712394705175, "grad_norm": 10.190555044224775, "learning_rate": 4.494351998541049e-06, "loss": 0.9682, "step": 3247 }, { "epoch": 0.459828696821689, "grad_norm": 8.20152421136757, "learning_rate": 4.494006306439795e-06, "loss": 1.2113, "step": 3248 }, { "epoch": 0.4599702696963262, "grad_norm": 9.409944638809984, "learning_rate": 4.493660509515115e-06, "loss": 1.1223, "step": 3249 }, { "epoch": 0.4601118425709634, "grad_norm": 8.389312812025059, "learning_rate": 4.493314607785189e-06, "loss": 1.0779, "step": 3250 }, { "epoch": 0.4602534154456006, "grad_norm": 7.917107285401322, "learning_rate": 4.492968601268202e-06, "loss": 1.1278, "step": 3251 }, { "epoch": 0.46039498832023784, "grad_norm": 8.825018898400666, "learning_rate": 4.492622489982339e-06, "loss": 1.2316, "step": 3252 }, { "epoch": 0.46053656119487507, "grad_norm": 9.684920428987546, "learning_rate": 4.4922762739457995e-06, "loss": 1.0688, "step": 3253 }, { "epoch": 0.4606781340695123, "grad_norm": 8.538505403402038, "learning_rate": 4.49192995317678e-06, "loss": 1.1637, "step": 3254 }, { "epoch": 0.4608197069441495, "grad_norm": 8.51185140729576, "learning_rate": 4.491583527693489e-06, "loss": 1.175, "step": 3255 }, { "epoch": 0.4609612798187867, "grad_norm": 9.467792726982026, "learning_rate": 4.491236997514138e-06, "loss": 1.1593, "step": 3256 }, { "epoch": 0.46110285269342394, "grad_norm": 8.699205594680219, "learning_rate": 4.490890362656941e-06, "loss": 1.0473, "step": 3257 }, { "epoch": 0.46124442556806117, "grad_norm": 8.958747416544206, "learning_rate": 4.490543623140123e-06, "loss": 1.2521, "step": 3258 }, { "epoch": 0.4613859984426984, "grad_norm": 8.359222581846446, "learning_rate": 4.490196778981911e-06, "loss": 1.2641, "step": 3259 }, { "epoch": 0.46152757131733557, "grad_norm": 7.907332877685458, "learning_rate": 4.489849830200538e-06, "loss": 1.039, "step": 3260 }, { "epoch": 0.4616691441919728, "grad_norm": 8.868718893092028, "learning_rate": 4.489502776814243e-06, "loss": 1.1367, "step": 3261 }, { "epoch": 0.46181071706661003, "grad_norm": 8.846860736173815, "learning_rate": 4.4891556188412705e-06, "loss": 1.0671, "step": 3262 }, { "epoch": 0.46195228994124726, "grad_norm": 9.393547733081036, "learning_rate": 4.48880835629987e-06, "loss": 1.1449, "step": 3263 }, { "epoch": 0.4620938628158845, "grad_norm": 7.857604174261707, "learning_rate": 4.488460989208298e-06, "loss": 1.0276, "step": 3264 }, { "epoch": 0.4622354356905217, "grad_norm": 8.719888811015014, "learning_rate": 4.4881135175848145e-06, "loss": 1.1177, "step": 3265 }, { "epoch": 0.4623770085651589, "grad_norm": 8.852005949974215, "learning_rate": 4.4877659414476845e-06, "loss": 1.1143, "step": 3266 }, { "epoch": 0.46251858143979613, "grad_norm": 8.575935459548868, "learning_rate": 4.487418260815182e-06, "loss": 1.1766, "step": 3267 }, { "epoch": 0.46266015431443336, "grad_norm": 8.878012562260997, "learning_rate": 4.487070475705584e-06, "loss": 1.2702, "step": 3268 }, { "epoch": 0.4628017271890706, "grad_norm": 8.293785655125411, "learning_rate": 4.486722586137171e-06, "loss": 1.1657, "step": 3269 }, { "epoch": 0.4629433000637078, "grad_norm": 9.043014730579646, "learning_rate": 4.486374592128235e-06, "loss": 1.0801, "step": 3270 }, { "epoch": 0.463084872938345, "grad_norm": 9.616566319550877, "learning_rate": 4.486026493697067e-06, "loss": 1.2837, "step": 3271 }, { "epoch": 0.4632264458129822, "grad_norm": 8.703646577447707, "learning_rate": 4.485678290861967e-06, "loss": 1.1263, "step": 3272 }, { "epoch": 0.46336801868761945, "grad_norm": 8.88661100552816, "learning_rate": 4.485329983641239e-06, "loss": 1.0725, "step": 3273 }, { "epoch": 0.4635095915622567, "grad_norm": 10.519512755855448, "learning_rate": 4.484981572053195e-06, "loss": 1.1961, "step": 3274 }, { "epoch": 0.4636511644368939, "grad_norm": 9.773852393646681, "learning_rate": 4.48463305611615e-06, "loss": 1.3529, "step": 3275 }, { "epoch": 0.4637927373115311, "grad_norm": 9.51472095523337, "learning_rate": 4.484284435848424e-06, "loss": 1.0895, "step": 3276 }, { "epoch": 0.4639343101861683, "grad_norm": 9.382877549732878, "learning_rate": 4.483935711268346e-06, "loss": 1.2111, "step": 3277 }, { "epoch": 0.46407588306080555, "grad_norm": 7.4069458196014715, "learning_rate": 4.483586882394247e-06, "loss": 1.1028, "step": 3278 }, { "epoch": 0.4642174559354428, "grad_norm": 8.52590450735035, "learning_rate": 4.483237949244463e-06, "loss": 1.1654, "step": 3279 }, { "epoch": 0.46435902881008, "grad_norm": 9.672315941836834, "learning_rate": 4.4828889118373395e-06, "loss": 1.0853, "step": 3280 }, { "epoch": 0.4645006016847172, "grad_norm": 9.389250643631033, "learning_rate": 4.482539770191225e-06, "loss": 1.1239, "step": 3281 }, { "epoch": 0.4646421745593544, "grad_norm": 7.968908929174958, "learning_rate": 4.482190524324473e-06, "loss": 1.1103, "step": 3282 }, { "epoch": 0.46478374743399165, "grad_norm": 10.208957658217862, "learning_rate": 4.481841174255443e-06, "loss": 1.1058, "step": 3283 }, { "epoch": 0.4649253203086289, "grad_norm": 7.891791954241029, "learning_rate": 4.481491720002499e-06, "loss": 1.0348, "step": 3284 }, { "epoch": 0.4650668931832661, "grad_norm": 8.881222544875314, "learning_rate": 4.481142161584014e-06, "loss": 1.1057, "step": 3285 }, { "epoch": 0.4652084660579033, "grad_norm": 10.124166572083693, "learning_rate": 4.480792499018362e-06, "loss": 1.3538, "step": 3286 }, { "epoch": 0.4653500389325405, "grad_norm": 9.497166662830658, "learning_rate": 4.4804427323239265e-06, "loss": 1.2417, "step": 3287 }, { "epoch": 0.46549161180717774, "grad_norm": 9.422098084397748, "learning_rate": 4.480092861519092e-06, "loss": 1.1504, "step": 3288 }, { "epoch": 0.46563318468181497, "grad_norm": 10.140702868898831, "learning_rate": 4.479742886622254e-06, "loss": 1.1227, "step": 3289 }, { "epoch": 0.4657747575564522, "grad_norm": 9.9082125575921, "learning_rate": 4.479392807651807e-06, "loss": 1.2773, "step": 3290 }, { "epoch": 0.4659163304310894, "grad_norm": 7.6735246364809235, "learning_rate": 4.479042624626156e-06, "loss": 1.2857, "step": 3291 }, { "epoch": 0.4660579033057266, "grad_norm": 8.448135567559927, "learning_rate": 4.47869233756371e-06, "loss": 1.192, "step": 3292 }, { "epoch": 0.46619947618036384, "grad_norm": 7.619414988811708, "learning_rate": 4.478341946482884e-06, "loss": 1.2241, "step": 3293 }, { "epoch": 0.46634104905500107, "grad_norm": 7.668741553565413, "learning_rate": 4.4779914514020964e-06, "loss": 1.1127, "step": 3294 }, { "epoch": 0.4664826219296383, "grad_norm": 8.760514399784302, "learning_rate": 4.4776408523397725e-06, "loss": 1.1431, "step": 3295 }, { "epoch": 0.4666241948042755, "grad_norm": 9.480086889069973, "learning_rate": 4.477290149314344e-06, "loss": 1.3292, "step": 3296 }, { "epoch": 0.4667657676789127, "grad_norm": 9.653520216774998, "learning_rate": 4.476939342344246e-06, "loss": 1.194, "step": 3297 }, { "epoch": 0.46690734055354993, "grad_norm": 7.524874065777483, "learning_rate": 4.4765884314479226e-06, "loss": 1.158, "step": 3298 }, { "epoch": 0.46704891342818716, "grad_norm": 8.058630672782597, "learning_rate": 4.4762374166438185e-06, "loss": 1.1211, "step": 3299 }, { "epoch": 0.4671904863028244, "grad_norm": 9.338585692665555, "learning_rate": 4.475886297950386e-06, "loss": 1.098, "step": 3300 }, { "epoch": 0.4673320591774616, "grad_norm": 6.9791062385638005, "learning_rate": 4.475535075386085e-06, "loss": 1.0588, "step": 3301 }, { "epoch": 0.4674736320520988, "grad_norm": 10.633229737290364, "learning_rate": 4.475183748969377e-06, "loss": 1.1715, "step": 3302 }, { "epoch": 0.46761520492673603, "grad_norm": 7.550584469839513, "learning_rate": 4.474832318718733e-06, "loss": 1.2794, "step": 3303 }, { "epoch": 0.46775677780137326, "grad_norm": 8.608430287891306, "learning_rate": 4.474480784652627e-06, "loss": 1.2125, "step": 3304 }, { "epoch": 0.4678983506760105, "grad_norm": 9.732915141210063, "learning_rate": 4.474129146789538e-06, "loss": 1.1022, "step": 3305 }, { "epoch": 0.4680399235506477, "grad_norm": 8.73577727660595, "learning_rate": 4.473777405147952e-06, "loss": 1.2014, "step": 3306 }, { "epoch": 0.4681814964252849, "grad_norm": 9.552707020828246, "learning_rate": 4.473425559746358e-06, "loss": 1.3965, "step": 3307 }, { "epoch": 0.4683230692999221, "grad_norm": 7.954271754720806, "learning_rate": 4.473073610603255e-06, "loss": 1.1584, "step": 3308 }, { "epoch": 0.46846464217455935, "grad_norm": 8.796862424363791, "learning_rate": 4.4727215577371445e-06, "loss": 1.1623, "step": 3309 }, { "epoch": 0.4686062150491966, "grad_norm": 8.997487035234824, "learning_rate": 4.472369401166531e-06, "loss": 1.3093, "step": 3310 }, { "epoch": 0.4687477879238338, "grad_norm": 8.284930177991798, "learning_rate": 4.472017140909929e-06, "loss": 1.1583, "step": 3311 }, { "epoch": 0.468889360798471, "grad_norm": 7.086272344279797, "learning_rate": 4.471664776985857e-06, "loss": 1.0733, "step": 3312 }, { "epoch": 0.4690309336731082, "grad_norm": 7.711624377730861, "learning_rate": 4.471312309412837e-06, "loss": 1.188, "step": 3313 }, { "epoch": 0.46917250654774545, "grad_norm": 8.690168012217393, "learning_rate": 4.470959738209399e-06, "loss": 1.2185, "step": 3314 }, { "epoch": 0.4693140794223827, "grad_norm": 9.693880994490884, "learning_rate": 4.470607063394077e-06, "loss": 1.1127, "step": 3315 }, { "epoch": 0.4694556522970199, "grad_norm": 7.660391049674654, "learning_rate": 4.470254284985411e-06, "loss": 1.1435, "step": 3316 }, { "epoch": 0.4695972251716571, "grad_norm": 8.365009327100626, "learning_rate": 4.469901403001947e-06, "loss": 1.1618, "step": 3317 }, { "epoch": 0.4697387980462943, "grad_norm": 8.287139065392843, "learning_rate": 4.469548417462234e-06, "loss": 1.1622, "step": 3318 }, { "epoch": 0.46988037092093154, "grad_norm": 7.5360849143425686, "learning_rate": 4.46919532838483e-06, "loss": 1.2237, "step": 3319 }, { "epoch": 0.4700219437955688, "grad_norm": 8.915112178693626, "learning_rate": 4.468842135788296e-06, "loss": 1.1777, "step": 3320 }, { "epoch": 0.470163516670206, "grad_norm": 9.681668402913045, "learning_rate": 4.468488839691199e-06, "loss": 1.1204, "step": 3321 }, { "epoch": 0.47030508954484324, "grad_norm": 8.004765045129956, "learning_rate": 4.468135440112111e-06, "loss": 1.0963, "step": 3322 }, { "epoch": 0.4704466624194804, "grad_norm": 9.119116990023452, "learning_rate": 4.467781937069611e-06, "loss": 1.1062, "step": 3323 }, { "epoch": 0.47058823529411764, "grad_norm": 7.524668750252425, "learning_rate": 4.467428330582281e-06, "loss": 1.0262, "step": 3324 }, { "epoch": 0.47072980816875487, "grad_norm": 6.574932129948395, "learning_rate": 4.467074620668711e-06, "loss": 1.089, "step": 3325 }, { "epoch": 0.4708713810433921, "grad_norm": 8.603526488003926, "learning_rate": 4.466720807347495e-06, "loss": 1.08, "step": 3326 }, { "epoch": 0.47101295391802933, "grad_norm": 7.417470424118709, "learning_rate": 4.466366890637232e-06, "loss": 0.9012, "step": 3327 }, { "epoch": 0.4711545267926665, "grad_norm": 8.295354384106211, "learning_rate": 4.466012870556529e-06, "loss": 1.0744, "step": 3328 }, { "epoch": 0.47129609966730374, "grad_norm": 9.43096010079841, "learning_rate": 4.4656587471239944e-06, "loss": 1.2295, "step": 3329 }, { "epoch": 0.47143767254194097, "grad_norm": 8.25345100125855, "learning_rate": 4.4653045203582455e-06, "loss": 1.1767, "step": 3330 }, { "epoch": 0.4715792454165782, "grad_norm": 8.59718858763306, "learning_rate": 4.464950190277903e-06, "loss": 1.113, "step": 3331 }, { "epoch": 0.4717208182912154, "grad_norm": 9.125339449486763, "learning_rate": 4.464595756901594e-06, "loss": 1.1394, "step": 3332 }, { "epoch": 0.4718623911658526, "grad_norm": 10.152376439804046, "learning_rate": 4.4642412202479515e-06, "loss": 1.1484, "step": 3333 }, { "epoch": 0.47200396404048983, "grad_norm": 8.649717875383594, "learning_rate": 4.463886580335612e-06, "loss": 1.2619, "step": 3334 }, { "epoch": 0.47214553691512706, "grad_norm": 9.110593946505654, "learning_rate": 4.463531837183221e-06, "loss": 1.0826, "step": 3335 }, { "epoch": 0.4722871097897643, "grad_norm": 7.468616691900003, "learning_rate": 4.463176990809423e-06, "loss": 1.1653, "step": 3336 }, { "epoch": 0.4724286826644015, "grad_norm": 9.004349399344116, "learning_rate": 4.462822041232876e-06, "loss": 1.0877, "step": 3337 }, { "epoch": 0.4725702555390387, "grad_norm": 8.221921359677832, "learning_rate": 4.462466988472237e-06, "loss": 1.064, "step": 3338 }, { "epoch": 0.4727118284136759, "grad_norm": 9.093823829167656, "learning_rate": 4.462111832546172e-06, "loss": 1.2413, "step": 3339 }, { "epoch": 0.47285340128831316, "grad_norm": 9.397572516435304, "learning_rate": 4.461756573473352e-06, "loss": 1.2332, "step": 3340 }, { "epoch": 0.4729949741629504, "grad_norm": 9.74614111023062, "learning_rate": 4.4614012112724494e-06, "loss": 1.0497, "step": 3341 }, { "epoch": 0.4731365470375876, "grad_norm": 7.942784510179442, "learning_rate": 4.461045745962149e-06, "loss": 1.1807, "step": 3342 }, { "epoch": 0.4732781199122248, "grad_norm": 8.510706721903299, "learning_rate": 4.460690177561136e-06, "loss": 1.1679, "step": 3343 }, { "epoch": 0.473419692786862, "grad_norm": 8.736476802264395, "learning_rate": 4.460334506088102e-06, "loss": 1.1258, "step": 3344 }, { "epoch": 0.47356126566149925, "grad_norm": 10.059675685137007, "learning_rate": 4.459978731561745e-06, "loss": 1.2572, "step": 3345 }, { "epoch": 0.4737028385361365, "grad_norm": 9.39576517039496, "learning_rate": 4.459622854000767e-06, "loss": 1.0201, "step": 3346 }, { "epoch": 0.4738444114107737, "grad_norm": 10.476876728485772, "learning_rate": 4.4592668734238775e-06, "loss": 1.2245, "step": 3347 }, { "epoch": 0.4739859842854109, "grad_norm": 7.8616728864619185, "learning_rate": 4.458910789849789e-06, "loss": 1.1296, "step": 3348 }, { "epoch": 0.4741275571600481, "grad_norm": 8.62848847095811, "learning_rate": 4.45855460329722e-06, "loss": 1.2071, "step": 3349 }, { "epoch": 0.47426913003468535, "grad_norm": 10.035203766552549, "learning_rate": 4.458198313784897e-06, "loss": 1.2872, "step": 3350 }, { "epoch": 0.4744107029093226, "grad_norm": 7.146341585670394, "learning_rate": 4.457841921331549e-06, "loss": 1.082, "step": 3351 }, { "epoch": 0.4745522757839598, "grad_norm": 9.126907776275633, "learning_rate": 4.457485425955911e-06, "loss": 1.0717, "step": 3352 }, { "epoch": 0.47469384865859704, "grad_norm": 9.00454427647247, "learning_rate": 4.457128827676722e-06, "loss": 1.1878, "step": 3353 }, { "epoch": 0.4748354215332342, "grad_norm": 10.876601178035129, "learning_rate": 4.456772126512732e-06, "loss": 1.2277, "step": 3354 }, { "epoch": 0.47497699440787144, "grad_norm": 8.085109030625956, "learning_rate": 4.456415322482689e-06, "loss": 1.1028, "step": 3355 }, { "epoch": 0.4751185672825087, "grad_norm": 8.041532472535067, "learning_rate": 4.456058415605352e-06, "loss": 1.1573, "step": 3356 }, { "epoch": 0.4752601401571459, "grad_norm": 7.882729264302871, "learning_rate": 4.4557014058994815e-06, "loss": 1.0207, "step": 3357 }, { "epoch": 0.47540171303178314, "grad_norm": 7.827517792380869, "learning_rate": 4.455344293383847e-06, "loss": 1.0704, "step": 3358 }, { "epoch": 0.4755432859064203, "grad_norm": 9.67758636207449, "learning_rate": 4.454987078077221e-06, "loss": 1.2015, "step": 3359 }, { "epoch": 0.47568485878105754, "grad_norm": 8.290010935519636, "learning_rate": 4.454629759998382e-06, "loss": 1.2228, "step": 3360 }, { "epoch": 0.47582643165569477, "grad_norm": 8.225929518472128, "learning_rate": 4.454272339166114e-06, "loss": 1.119, "step": 3361 }, { "epoch": 0.475968004530332, "grad_norm": 8.276178613725603, "learning_rate": 4.453914815599206e-06, "loss": 1.0442, "step": 3362 }, { "epoch": 0.47610957740496923, "grad_norm": 8.681766511671665, "learning_rate": 4.453557189316454e-06, "loss": 1.2993, "step": 3363 }, { "epoch": 0.4762511502796064, "grad_norm": 8.667662441072727, "learning_rate": 4.453199460336656e-06, "loss": 1.1424, "step": 3364 }, { "epoch": 0.47639272315424364, "grad_norm": 10.060691149652264, "learning_rate": 4.452841628678619e-06, "loss": 1.3006, "step": 3365 }, { "epoch": 0.47653429602888087, "grad_norm": 10.063820319896035, "learning_rate": 4.452483694361154e-06, "loss": 1.165, "step": 3366 }, { "epoch": 0.4766758689035181, "grad_norm": 8.577232884222937, "learning_rate": 4.452125657403077e-06, "loss": 1.2203, "step": 3367 }, { "epoch": 0.4768174417781553, "grad_norm": 9.566841685680139, "learning_rate": 4.45176751782321e-06, "loss": 1.2427, "step": 3368 }, { "epoch": 0.4769590146527925, "grad_norm": 10.08081473009789, "learning_rate": 4.451409275640379e-06, "loss": 1.1478, "step": 3369 }, { "epoch": 0.47710058752742973, "grad_norm": 7.231788650800281, "learning_rate": 4.451050930873418e-06, "loss": 0.922, "step": 3370 }, { "epoch": 0.47724216040206696, "grad_norm": 8.64756410732146, "learning_rate": 4.450692483541165e-06, "loss": 1.128, "step": 3371 }, { "epoch": 0.4773837332767042, "grad_norm": 9.420869637618594, "learning_rate": 4.450333933662462e-06, "loss": 1.1726, "step": 3372 }, { "epoch": 0.4775253061513414, "grad_norm": 8.4385180954054, "learning_rate": 4.449975281256158e-06, "loss": 1.124, "step": 3373 }, { "epoch": 0.4776668790259786, "grad_norm": 9.158934736652178, "learning_rate": 4.4496165263411075e-06, "loss": 1.2325, "step": 3374 }, { "epoch": 0.4778084519006158, "grad_norm": 9.257876779840133, "learning_rate": 4.4492576689361705e-06, "loss": 1.2484, "step": 3375 }, { "epoch": 0.47795002477525306, "grad_norm": 8.027924914333454, "learning_rate": 4.448898709060211e-06, "loss": 1.1525, "step": 3376 }, { "epoch": 0.4780915976498903, "grad_norm": 8.039793702320907, "learning_rate": 4.448539646732099e-06, "loss": 1.1641, "step": 3377 }, { "epoch": 0.4782331705245275, "grad_norm": 8.151093822633321, "learning_rate": 4.448180481970711e-06, "loss": 1.2838, "step": 3378 }, { "epoch": 0.4783747433991647, "grad_norm": 10.323299417543685, "learning_rate": 4.447821214794928e-06, "loss": 1.1882, "step": 3379 }, { "epoch": 0.4785163162738019, "grad_norm": 8.525483918089204, "learning_rate": 4.447461845223636e-06, "loss": 1.288, "step": 3380 }, { "epoch": 0.47865788914843915, "grad_norm": 8.956110857503637, "learning_rate": 4.447102373275727e-06, "loss": 0.982, "step": 3381 }, { "epoch": 0.4787994620230764, "grad_norm": 8.89561796858255, "learning_rate": 4.446742798970097e-06, "loss": 1.1214, "step": 3382 }, { "epoch": 0.4789410348977136, "grad_norm": 7.2673059631416415, "learning_rate": 4.44638312232565e-06, "loss": 1.0143, "step": 3383 }, { "epoch": 0.47908260777235084, "grad_norm": 8.535864619489118, "learning_rate": 4.446023343361294e-06, "loss": 1.266, "step": 3384 }, { "epoch": 0.479224180646988, "grad_norm": 8.559547527868277, "learning_rate": 4.445663462095943e-06, "loss": 1.1782, "step": 3385 }, { "epoch": 0.47936575352162525, "grad_norm": 7.267082872049711, "learning_rate": 4.445303478548513e-06, "loss": 1.1557, "step": 3386 }, { "epoch": 0.4795073263962625, "grad_norm": 8.266257249271451, "learning_rate": 4.4449433927379295e-06, "loss": 1.0609, "step": 3387 }, { "epoch": 0.4796488992708997, "grad_norm": 7.801500313081437, "learning_rate": 4.444583204683123e-06, "loss": 1.0998, "step": 3388 }, { "epoch": 0.47979047214553694, "grad_norm": 8.162168496660131, "learning_rate": 4.444222914403027e-06, "loss": 1.231, "step": 3389 }, { "epoch": 0.4799320450201741, "grad_norm": 9.556979711501677, "learning_rate": 4.443862521916582e-06, "loss": 1.1326, "step": 3390 }, { "epoch": 0.48007361789481134, "grad_norm": 8.27965049411432, "learning_rate": 4.443502027242733e-06, "loss": 1.1153, "step": 3391 }, { "epoch": 0.4802151907694486, "grad_norm": 7.338799029228573, "learning_rate": 4.443141430400432e-06, "loss": 0.9806, "step": 3392 }, { "epoch": 0.4803567636440858, "grad_norm": 7.1708193731384515, "learning_rate": 4.4427807314086355e-06, "loss": 1.1701, "step": 3393 }, { "epoch": 0.48049833651872303, "grad_norm": 8.644855146722385, "learning_rate": 4.442419930286304e-06, "loss": 1.2344, "step": 3394 }, { "epoch": 0.4806399093933602, "grad_norm": 9.645012597470835, "learning_rate": 4.442059027052406e-06, "loss": 1.1179, "step": 3395 }, { "epoch": 0.48078148226799744, "grad_norm": 8.74444360061161, "learning_rate": 4.441698021725911e-06, "loss": 1.2285, "step": 3396 }, { "epoch": 0.48092305514263467, "grad_norm": 7.7558945419909096, "learning_rate": 4.4413369143258e-06, "loss": 1.0357, "step": 3397 }, { "epoch": 0.4810646280172719, "grad_norm": 8.078697317129592, "learning_rate": 4.440975704871055e-06, "loss": 1.1267, "step": 3398 }, { "epoch": 0.48120620089190913, "grad_norm": 8.256008849856343, "learning_rate": 4.4406143933806646e-06, "loss": 1.1351, "step": 3399 }, { "epoch": 0.4813477737665463, "grad_norm": 8.325277274614868, "learning_rate": 4.4402529798736224e-06, "loss": 1.0954, "step": 3400 }, { "epoch": 0.48148934664118354, "grad_norm": 8.072797955696407, "learning_rate": 4.439891464368927e-06, "loss": 1.269, "step": 3401 }, { "epoch": 0.48163091951582077, "grad_norm": 7.771501555019965, "learning_rate": 4.439529846885585e-06, "loss": 1.0582, "step": 3402 }, { "epoch": 0.481772492390458, "grad_norm": 8.676555078614241, "learning_rate": 4.439168127442604e-06, "loss": 1.1685, "step": 3403 }, { "epoch": 0.4819140652650952, "grad_norm": 9.797498375925842, "learning_rate": 4.438806306059001e-06, "loss": 1.2642, "step": 3404 }, { "epoch": 0.4820556381397324, "grad_norm": 7.007827197413517, "learning_rate": 4.438444382753796e-06, "loss": 1.2384, "step": 3405 }, { "epoch": 0.48219721101436963, "grad_norm": 9.31804353734031, "learning_rate": 4.438082357546015e-06, "loss": 1.179, "step": 3406 }, { "epoch": 0.48233878388900686, "grad_norm": 7.803653438520024, "learning_rate": 4.4377202304546905e-06, "loss": 1.1553, "step": 3407 }, { "epoch": 0.4824803567636441, "grad_norm": 9.513579049985974, "learning_rate": 4.437358001498857e-06, "loss": 1.1834, "step": 3408 }, { "epoch": 0.4826219296382813, "grad_norm": 9.23313397170501, "learning_rate": 4.436995670697559e-06, "loss": 1.2957, "step": 3409 }, { "epoch": 0.48276350251291855, "grad_norm": 8.010902605004944, "learning_rate": 4.436633238069843e-06, "loss": 1.0954, "step": 3410 }, { "epoch": 0.4829050753875557, "grad_norm": 8.813438852498965, "learning_rate": 4.436270703634761e-06, "loss": 1.1116, "step": 3411 }, { "epoch": 0.48304664826219296, "grad_norm": 7.497569135275274, "learning_rate": 4.435908067411372e-06, "loss": 1.1024, "step": 3412 }, { "epoch": 0.4831882211368302, "grad_norm": 8.98647945801449, "learning_rate": 4.435545329418739e-06, "loss": 1.2106, "step": 3413 }, { "epoch": 0.4833297940114674, "grad_norm": 9.333371934356792, "learning_rate": 4.435182489675931e-06, "loss": 1.345, "step": 3414 }, { "epoch": 0.48347136688610465, "grad_norm": 8.26854724822876, "learning_rate": 4.434819548202024e-06, "loss": 1.1034, "step": 3415 }, { "epoch": 0.4836129397607418, "grad_norm": 7.6409322914785065, "learning_rate": 4.434456505016094e-06, "loss": 1.1057, "step": 3416 }, { "epoch": 0.48375451263537905, "grad_norm": 9.291894838642053, "learning_rate": 4.43409336013723e-06, "loss": 1.2698, "step": 3417 }, { "epoch": 0.4838960855100163, "grad_norm": 8.188922787577035, "learning_rate": 4.433730113584519e-06, "loss": 1.1348, "step": 3418 }, { "epoch": 0.4840376583846535, "grad_norm": 8.688573098629854, "learning_rate": 4.433366765377057e-06, "loss": 1.2848, "step": 3419 }, { "epoch": 0.48417923125929074, "grad_norm": 11.41912599977042, "learning_rate": 4.433003315533947e-06, "loss": 1.1107, "step": 3420 }, { "epoch": 0.4843208041339279, "grad_norm": 10.574767443397212, "learning_rate": 4.432639764074294e-06, "loss": 1.2418, "step": 3421 }, { "epoch": 0.48446237700856515, "grad_norm": 8.523104513431418, "learning_rate": 4.4322761110172085e-06, "loss": 1.2221, "step": 3422 }, { "epoch": 0.4846039498832024, "grad_norm": 7.92671444034965, "learning_rate": 4.43191235638181e-06, "loss": 1.1454, "step": 3423 }, { "epoch": 0.4847455227578396, "grad_norm": 8.114503624214041, "learning_rate": 4.431548500187218e-06, "loss": 1.1485, "step": 3424 }, { "epoch": 0.48488709563247684, "grad_norm": 7.5149242685122655, "learning_rate": 4.431184542452563e-06, "loss": 1.2816, "step": 3425 }, { "epoch": 0.485028668507114, "grad_norm": 8.337664839045866, "learning_rate": 4.430820483196976e-06, "loss": 1.0692, "step": 3426 }, { "epoch": 0.48517024138175124, "grad_norm": 7.5423962431621945, "learning_rate": 4.430456322439596e-06, "loss": 1.1046, "step": 3427 }, { "epoch": 0.4853118142563885, "grad_norm": 9.074352012177947, "learning_rate": 4.430092060199566e-06, "loss": 1.1298, "step": 3428 }, { "epoch": 0.4854533871310257, "grad_norm": 8.760633274685977, "learning_rate": 4.429727696496036e-06, "loss": 1.1316, "step": 3429 }, { "epoch": 0.48559496000566293, "grad_norm": 9.507883916381331, "learning_rate": 4.42936323134816e-06, "loss": 1.1806, "step": 3430 }, { "epoch": 0.4857365328803001, "grad_norm": 7.18670809156455, "learning_rate": 4.4289986647750975e-06, "loss": 1.0525, "step": 3431 }, { "epoch": 0.48587810575493734, "grad_norm": 9.072355817299252, "learning_rate": 4.428633996796012e-06, "loss": 1.2004, "step": 3432 }, { "epoch": 0.48601967862957457, "grad_norm": 7.083122459713433, "learning_rate": 4.4282692274300775e-06, "loss": 1.1855, "step": 3433 }, { "epoch": 0.4861612515042118, "grad_norm": 7.40133851807434, "learning_rate": 4.427904356696467e-06, "loss": 1.1161, "step": 3434 }, { "epoch": 0.48630282437884903, "grad_norm": 9.132079004441398, "learning_rate": 4.427539384614361e-06, "loss": 1.2616, "step": 3435 }, { "epoch": 0.4864443972534862, "grad_norm": 7.949022235362725, "learning_rate": 4.427174311202948e-06, "loss": 1.1367, "step": 3436 }, { "epoch": 0.48658597012812344, "grad_norm": 8.680107209558738, "learning_rate": 4.426809136481417e-06, "loss": 1.2131, "step": 3437 }, { "epoch": 0.48672754300276067, "grad_norm": 7.473600592120699, "learning_rate": 4.426443860468967e-06, "loss": 1.0865, "step": 3438 }, { "epoch": 0.4868691158773979, "grad_norm": 7.639008208604885, "learning_rate": 4.4260784831848e-06, "loss": 1.0106, "step": 3439 }, { "epoch": 0.4870106887520351, "grad_norm": 9.703285793378873, "learning_rate": 4.425713004648123e-06, "loss": 1.2047, "step": 3440 }, { "epoch": 0.48715226162667236, "grad_norm": 8.327276190181193, "learning_rate": 4.4253474248781494e-06, "loss": 1.1636, "step": 3441 }, { "epoch": 0.48729383450130953, "grad_norm": 9.852640454506663, "learning_rate": 4.424981743894097e-06, "loss": 1.1959, "step": 3442 }, { "epoch": 0.48743540737594676, "grad_norm": 8.999597328502832, "learning_rate": 4.42461596171519e-06, "loss": 1.2354, "step": 3443 }, { "epoch": 0.487576980250584, "grad_norm": 9.007985704779928, "learning_rate": 4.424250078360657e-06, "loss": 0.9897, "step": 3444 }, { "epoch": 0.4877185531252212, "grad_norm": 7.8902401669770565, "learning_rate": 4.4238840938497315e-06, "loss": 1.0925, "step": 3445 }, { "epoch": 0.48786012599985845, "grad_norm": 8.861091999071876, "learning_rate": 4.423518008201655e-06, "loss": 1.1133, "step": 3446 }, { "epoch": 0.4880016988744956, "grad_norm": 8.411217565407352, "learning_rate": 4.42315182143567e-06, "loss": 1.2718, "step": 3447 }, { "epoch": 0.48814327174913286, "grad_norm": 9.746482801603516, "learning_rate": 4.422785533571028e-06, "loss": 1.1475, "step": 3448 }, { "epoch": 0.4882848446237701, "grad_norm": 8.158376832975302, "learning_rate": 4.422419144626984e-06, "loss": 1.1264, "step": 3449 }, { "epoch": 0.4884264174984073, "grad_norm": 8.667556374682588, "learning_rate": 4.4220526546228e-06, "loss": 1.1148, "step": 3450 }, { "epoch": 0.48856799037304455, "grad_norm": 8.947964177803021, "learning_rate": 4.4216860635777395e-06, "loss": 1.039, "step": 3451 }, { "epoch": 0.4887095632476817, "grad_norm": 7.3234913701313715, "learning_rate": 4.4213193715110755e-06, "loss": 1.014, "step": 3452 }, { "epoch": 0.48885113612231895, "grad_norm": 7.71656626340116, "learning_rate": 4.420952578442086e-06, "loss": 1.1924, "step": 3453 }, { "epoch": 0.4889927089969562, "grad_norm": 8.165305303132234, "learning_rate": 4.420585684390051e-06, "loss": 1.2199, "step": 3454 }, { "epoch": 0.4891342818715934, "grad_norm": 8.051786179547513, "learning_rate": 4.420218689374259e-06, "loss": 0.9796, "step": 3455 }, { "epoch": 0.48927585474623064, "grad_norm": 10.088093876522137, "learning_rate": 4.419851593414002e-06, "loss": 1.2525, "step": 3456 }, { "epoch": 0.4894174276208678, "grad_norm": 9.75815847377549, "learning_rate": 4.4194843965285786e-06, "loss": 1.2415, "step": 3457 }, { "epoch": 0.48955900049550505, "grad_norm": 7.256120926056476, "learning_rate": 4.419117098737291e-06, "loss": 1.1347, "step": 3458 }, { "epoch": 0.4897005733701423, "grad_norm": 7.048649803351913, "learning_rate": 4.418749700059449e-06, "loss": 1.0791, "step": 3459 }, { "epoch": 0.4898421462447795, "grad_norm": 8.981539549171949, "learning_rate": 4.418382200514366e-06, "loss": 1.043, "step": 3460 }, { "epoch": 0.48998371911941674, "grad_norm": 8.490689732512257, "learning_rate": 4.418014600121361e-06, "loss": 0.9693, "step": 3461 }, { "epoch": 0.4901252919940539, "grad_norm": 7.700887792360524, "learning_rate": 4.4176468988997586e-06, "loss": 1.3127, "step": 3462 }, { "epoch": 0.49026686486869114, "grad_norm": 9.60444725658339, "learning_rate": 4.4172790968688885e-06, "loss": 1.1534, "step": 3463 }, { "epoch": 0.4904084377433284, "grad_norm": 11.23549479924178, "learning_rate": 4.416911194048086e-06, "loss": 1.2138, "step": 3464 }, { "epoch": 0.4905500106179656, "grad_norm": 8.290222604521517, "learning_rate": 4.4165431904566915e-06, "loss": 1.1476, "step": 3465 }, { "epoch": 0.49069158349260283, "grad_norm": 7.217500652845701, "learning_rate": 4.416175086114049e-06, "loss": 1.0144, "step": 3466 }, { "epoch": 0.49083315636724006, "grad_norm": 7.922757613091592, "learning_rate": 4.415806881039513e-06, "loss": 1.0608, "step": 3467 }, { "epoch": 0.49097472924187724, "grad_norm": 8.528093026374883, "learning_rate": 4.415438575252438e-06, "loss": 1.1595, "step": 3468 }, { "epoch": 0.49111630211651447, "grad_norm": 8.429361218633309, "learning_rate": 4.415070168772184e-06, "loss": 1.0538, "step": 3469 }, { "epoch": 0.4912578749911517, "grad_norm": 11.331714813748059, "learning_rate": 4.414701661618119e-06, "loss": 1.1383, "step": 3470 }, { "epoch": 0.49139944786578893, "grad_norm": 7.825678825793495, "learning_rate": 4.414333053809616e-06, "loss": 1.0748, "step": 3471 }, { "epoch": 0.49154102074042616, "grad_norm": 8.313415476931839, "learning_rate": 4.413964345366051e-06, "loss": 1.1439, "step": 3472 }, { "epoch": 0.49168259361506333, "grad_norm": 8.834795944715736, "learning_rate": 4.413595536306808e-06, "loss": 1.269, "step": 3473 }, { "epoch": 0.49182416648970056, "grad_norm": 8.212612166429839, "learning_rate": 4.4132266266512745e-06, "loss": 1.276, "step": 3474 }, { "epoch": 0.4919657393643378, "grad_norm": 7.828024857845613, "learning_rate": 4.412857616418844e-06, "loss": 1.1117, "step": 3475 }, { "epoch": 0.492107312238975, "grad_norm": 9.77886602337536, "learning_rate": 4.412488505628915e-06, "loss": 1.2728, "step": 3476 }, { "epoch": 0.49224888511361226, "grad_norm": 9.466789420321492, "learning_rate": 4.41211929430089e-06, "loss": 1.0854, "step": 3477 }, { "epoch": 0.49239045798824943, "grad_norm": 8.521007383725006, "learning_rate": 4.411749982454181e-06, "loss": 1.1327, "step": 3478 }, { "epoch": 0.49253203086288666, "grad_norm": 9.234237850610121, "learning_rate": 4.4113805701082e-06, "loss": 1.1742, "step": 3479 }, { "epoch": 0.4926736037375239, "grad_norm": 7.827428607840498, "learning_rate": 4.411011057282368e-06, "loss": 1.0277, "step": 3480 }, { "epoch": 0.4928151766121611, "grad_norm": 10.43218749297921, "learning_rate": 4.41064144399611e-06, "loss": 1.2934, "step": 3481 }, { "epoch": 0.49295674948679835, "grad_norm": 8.244218592307819, "learning_rate": 4.4102717302688556e-06, "loss": 1.2638, "step": 3482 }, { "epoch": 0.4930983223614355, "grad_norm": 7.773021100263815, "learning_rate": 4.40990191612004e-06, "loss": 1.032, "step": 3483 }, { "epoch": 0.49323989523607276, "grad_norm": 9.009555829919774, "learning_rate": 4.409532001569106e-06, "loss": 1.1463, "step": 3484 }, { "epoch": 0.49338146811071, "grad_norm": 9.224963974300845, "learning_rate": 4.4091619866354975e-06, "loss": 1.049, "step": 3485 }, { "epoch": 0.4935230409853472, "grad_norm": 8.856490054003345, "learning_rate": 4.408791871338667e-06, "loss": 1.0777, "step": 3486 }, { "epoch": 0.49366461385998445, "grad_norm": 7.7403575840244265, "learning_rate": 4.4084216556980715e-06, "loss": 1.0282, "step": 3487 }, { "epoch": 0.4938061867346216, "grad_norm": 7.246484364471592, "learning_rate": 4.408051339733172e-06, "loss": 1.0896, "step": 3488 }, { "epoch": 0.49394775960925885, "grad_norm": 8.528095262922228, "learning_rate": 4.407680923463437e-06, "loss": 1.1834, "step": 3489 }, { "epoch": 0.4940893324838961, "grad_norm": 7.600342953623561, "learning_rate": 4.407310406908338e-06, "loss": 1.0968, "step": 3490 }, { "epoch": 0.4942309053585333, "grad_norm": 8.90450743643616, "learning_rate": 4.406939790087353e-06, "loss": 1.1035, "step": 3491 }, { "epoch": 0.49437247823317054, "grad_norm": 11.63680004797046, "learning_rate": 4.406569073019965e-06, "loss": 1.3193, "step": 3492 }, { "epoch": 0.4945140511078077, "grad_norm": 8.798503176227753, "learning_rate": 4.406198255725662e-06, "loss": 1.1953, "step": 3493 }, { "epoch": 0.49465562398244495, "grad_norm": 6.482507496515743, "learning_rate": 4.4058273382239395e-06, "loss": 0.999, "step": 3494 }, { "epoch": 0.4947971968570822, "grad_norm": 7.137174143927146, "learning_rate": 4.4054563205342935e-06, "loss": 1.1715, "step": 3495 }, { "epoch": 0.4949387697317194, "grad_norm": 8.580754111701854, "learning_rate": 4.4050852026762295e-06, "loss": 1.124, "step": 3496 }, { "epoch": 0.49508034260635664, "grad_norm": 9.121608574202087, "learning_rate": 4.404713984669257e-06, "loss": 1.107, "step": 3497 }, { "epoch": 0.49522191548099387, "grad_norm": 8.806970471270807, "learning_rate": 4.404342666532891e-06, "loss": 1.117, "step": 3498 }, { "epoch": 0.49536348835563104, "grad_norm": 8.147388774795042, "learning_rate": 4.403971248286651e-06, "loss": 1.1788, "step": 3499 }, { "epoch": 0.4955050612302683, "grad_norm": 7.169862818130522, "learning_rate": 4.403599729950062e-06, "loss": 1.0443, "step": 3500 }, { "epoch": 0.4956466341049055, "grad_norm": 9.377944687276482, "learning_rate": 4.403228111542654e-06, "loss": 1.2312, "step": 3501 }, { "epoch": 0.49578820697954273, "grad_norm": 8.19104625982337, "learning_rate": 4.402856393083964e-06, "loss": 1.12, "step": 3502 }, { "epoch": 0.49592977985417996, "grad_norm": 6.819202450742241, "learning_rate": 4.402484574593532e-06, "loss": 1.0412, "step": 3503 }, { "epoch": 0.49607135272881714, "grad_norm": 7.3684464124372235, "learning_rate": 4.402112656090904e-06, "loss": 1.1052, "step": 3504 }, { "epoch": 0.49621292560345437, "grad_norm": 8.395794006205065, "learning_rate": 4.401740637595633e-06, "loss": 1.1657, "step": 3505 }, { "epoch": 0.4963544984780916, "grad_norm": 7.95044619669629, "learning_rate": 4.401368519127274e-06, "loss": 1.083, "step": 3506 }, { "epoch": 0.49649607135272883, "grad_norm": 8.659208267352323, "learning_rate": 4.400996300705389e-06, "loss": 1.1633, "step": 3507 }, { "epoch": 0.49663764422736606, "grad_norm": 8.812907324501598, "learning_rate": 4.400623982349547e-06, "loss": 1.1878, "step": 3508 }, { "epoch": 0.49677921710200323, "grad_norm": 8.322217724941359, "learning_rate": 4.400251564079319e-06, "loss": 1.0764, "step": 3509 }, { "epoch": 0.49692078997664046, "grad_norm": 8.848412891901372, "learning_rate": 4.399879045914283e-06, "loss": 1.1087, "step": 3510 }, { "epoch": 0.4970623628512777, "grad_norm": 7.7998550010310685, "learning_rate": 4.399506427874023e-06, "loss": 1.1379, "step": 3511 }, { "epoch": 0.4972039357259149, "grad_norm": 9.291164048175943, "learning_rate": 4.399133709978126e-06, "loss": 1.059, "step": 3512 }, { "epoch": 0.49734550860055216, "grad_norm": 10.525246558846577, "learning_rate": 4.398760892246185e-06, "loss": 0.9611, "step": 3513 }, { "epoch": 0.49748708147518933, "grad_norm": 7.8761523175716865, "learning_rate": 4.398387974697801e-06, "loss": 1.1662, "step": 3514 }, { "epoch": 0.49762865434982656, "grad_norm": 6.953929794540807, "learning_rate": 4.398014957352576e-06, "loss": 1.0406, "step": 3515 }, { "epoch": 0.4977702272244638, "grad_norm": 7.136544495341171, "learning_rate": 4.3976418402301196e-06, "loss": 1.0992, "step": 3516 }, { "epoch": 0.497911800099101, "grad_norm": 8.370738753117864, "learning_rate": 4.397268623350047e-06, "loss": 1.1689, "step": 3517 }, { "epoch": 0.49805337297373825, "grad_norm": 7.789987338916445, "learning_rate": 4.396895306731978e-06, "loss": 1.1818, "step": 3518 }, { "epoch": 0.4981949458483754, "grad_norm": 8.560186588713691, "learning_rate": 4.396521890395536e-06, "loss": 1.1623, "step": 3519 }, { "epoch": 0.49833651872301266, "grad_norm": 9.49371290469629, "learning_rate": 4.396148374360354e-06, "loss": 1.2163, "step": 3520 }, { "epoch": 0.4984780915976499, "grad_norm": 8.167286388731965, "learning_rate": 4.395774758646064e-06, "loss": 1.2024, "step": 3521 }, { "epoch": 0.4986196644722871, "grad_norm": 8.34867250774646, "learning_rate": 4.395401043272309e-06, "loss": 1.1638, "step": 3522 }, { "epoch": 0.49876123734692435, "grad_norm": 7.492405670458114, "learning_rate": 4.395027228258735e-06, "loss": 1.1012, "step": 3523 }, { "epoch": 0.4989028102215615, "grad_norm": 8.489365151688961, "learning_rate": 4.3946533136249926e-06, "loss": 0.9317, "step": 3524 }, { "epoch": 0.49904438309619875, "grad_norm": 7.624140206311663, "learning_rate": 4.394279299390737e-06, "loss": 1.0746, "step": 3525 }, { "epoch": 0.499185955970836, "grad_norm": 6.813547036439496, "learning_rate": 4.393905185575632e-06, "loss": 1.0313, "step": 3526 }, { "epoch": 0.4993275288454732, "grad_norm": 8.051266910539365, "learning_rate": 4.393530972199344e-06, "loss": 1.1109, "step": 3527 }, { "epoch": 0.49946910172011044, "grad_norm": 10.263159887027934, "learning_rate": 4.393156659281545e-06, "loss": 1.1695, "step": 3528 }, { "epoch": 0.4996106745947477, "grad_norm": 8.51478457722122, "learning_rate": 4.39278224684191e-06, "loss": 1.0232, "step": 3529 }, { "epoch": 0.49975224746938485, "grad_norm": 10.158624647269978, "learning_rate": 4.392407734900125e-06, "loss": 1.2264, "step": 3530 }, { "epoch": 0.4998938203440221, "grad_norm": 9.59363150756426, "learning_rate": 4.392033123475876e-06, "loss": 1.1619, "step": 3531 }, { "epoch": 0.5000353932186593, "grad_norm": 8.804369916494016, "learning_rate": 4.3916584125888575e-06, "loss": 1.0716, "step": 3532 }, { "epoch": 0.5001769660932965, "grad_norm": 8.946550814943992, "learning_rate": 4.391283602258765e-06, "loss": 1.0859, "step": 3533 }, { "epoch": 0.5003185389679338, "grad_norm": 8.391083717238615, "learning_rate": 4.390908692505305e-06, "loss": 1.1999, "step": 3534 }, { "epoch": 0.500460111842571, "grad_norm": 9.709130337680223, "learning_rate": 4.390533683348184e-06, "loss": 1.1356, "step": 3535 }, { "epoch": 0.5006016847172082, "grad_norm": 9.809226103421558, "learning_rate": 4.390158574807118e-06, "loss": 1.0937, "step": 3536 }, { "epoch": 0.5007432575918453, "grad_norm": 8.788658844897336, "learning_rate": 4.389783366901824e-06, "loss": 1.1908, "step": 3537 }, { "epoch": 0.5008848304664826, "grad_norm": 9.090046446359482, "learning_rate": 4.3894080596520286e-06, "loss": 1.2141, "step": 3538 }, { "epoch": 0.5010264033411198, "grad_norm": 7.325983120818901, "learning_rate": 4.38903265307746e-06, "loss": 1.0725, "step": 3539 }, { "epoch": 0.501167976215757, "grad_norm": 9.442006228996991, "learning_rate": 4.388657147197852e-06, "loss": 1.1589, "step": 3540 }, { "epoch": 0.5013095490903943, "grad_norm": 9.355416965540398, "learning_rate": 4.388281542032948e-06, "loss": 1.2759, "step": 3541 }, { "epoch": 0.5014511219650315, "grad_norm": 8.696366649797767, "learning_rate": 4.38790583760249e-06, "loss": 1.2143, "step": 3542 }, { "epoch": 0.5015926948396687, "grad_norm": 9.793482142755392, "learning_rate": 4.3875300339262304e-06, "loss": 0.8944, "step": 3543 }, { "epoch": 0.501734267714306, "grad_norm": 9.583455181729335, "learning_rate": 4.387154131023924e-06, "loss": 1.0457, "step": 3544 }, { "epoch": 0.5018758405889432, "grad_norm": 8.105293202165262, "learning_rate": 4.386778128915332e-06, "loss": 1.2049, "step": 3545 }, { "epoch": 0.5020174134635804, "grad_norm": 9.527242449620333, "learning_rate": 4.386402027620221e-06, "loss": 1.1912, "step": 3546 }, { "epoch": 0.5021589863382176, "grad_norm": 9.33721511590075, "learning_rate": 4.386025827158362e-06, "loss": 1.1618, "step": 3547 }, { "epoch": 0.5023005592128548, "grad_norm": 7.097732837454842, "learning_rate": 4.385649527549531e-06, "loss": 1.0142, "step": 3548 }, { "epoch": 0.502442132087492, "grad_norm": 8.405384118393066, "learning_rate": 4.385273128813511e-06, "loss": 1.0878, "step": 3549 }, { "epoch": 0.5025837049621292, "grad_norm": 7.815592649591934, "learning_rate": 4.384896630970088e-06, "loss": 1.2528, "step": 3550 }, { "epoch": 0.5027252778367665, "grad_norm": 6.8090405561046365, "learning_rate": 4.384520034039054e-06, "loss": 1.0197, "step": 3551 }, { "epoch": 0.5028668507114037, "grad_norm": 7.831090917288216, "learning_rate": 4.384143338040207e-06, "loss": 1.1098, "step": 3552 }, { "epoch": 0.5030084235860409, "grad_norm": 10.533539746672368, "learning_rate": 4.3837665429933505e-06, "loss": 1.2245, "step": 3553 }, { "epoch": 0.5031499964606782, "grad_norm": 8.72324531640018, "learning_rate": 4.383389648918291e-06, "loss": 1.1455, "step": 3554 }, { "epoch": 0.5032915693353154, "grad_norm": 7.647257104952602, "learning_rate": 4.3830126558348425e-06, "loss": 1.1379, "step": 3555 }, { "epoch": 0.5034331422099526, "grad_norm": 9.320965701649595, "learning_rate": 4.382635563762822e-06, "loss": 1.2343, "step": 3556 }, { "epoch": 0.5035747150845898, "grad_norm": 8.893338880332273, "learning_rate": 4.382258372722054e-06, "loss": 1.2121, "step": 3557 }, { "epoch": 0.503716287959227, "grad_norm": 10.6938418484193, "learning_rate": 4.381881082732367e-06, "loss": 1.1222, "step": 3558 }, { "epoch": 0.5038578608338642, "grad_norm": 8.698449568879719, "learning_rate": 4.381503693813594e-06, "loss": 1.1382, "step": 3559 }, { "epoch": 0.5039994337085014, "grad_norm": 8.969762658558972, "learning_rate": 4.381126205985575e-06, "loss": 1.218, "step": 3560 }, { "epoch": 0.5041410065831387, "grad_norm": 8.794317439464772, "learning_rate": 4.380748619268154e-06, "loss": 1.0672, "step": 3561 }, { "epoch": 0.5042825794577759, "grad_norm": 10.397831441297777, "learning_rate": 4.3803709336811804e-06, "loss": 1.1655, "step": 3562 }, { "epoch": 0.5044241523324131, "grad_norm": 8.68571223244607, "learning_rate": 4.379993149244509e-06, "loss": 1.1816, "step": 3563 }, { "epoch": 0.5045657252070503, "grad_norm": 10.207870244531874, "learning_rate": 4.379615265978e-06, "loss": 1.0094, "step": 3564 }, { "epoch": 0.5047072980816876, "grad_norm": 6.598396291385056, "learning_rate": 4.379237283901518e-06, "loss": 0.9731, "step": 3565 }, { "epoch": 0.5048488709563248, "grad_norm": 7.653321994724359, "learning_rate": 4.378859203034932e-06, "loss": 1.0916, "step": 3566 }, { "epoch": 0.504990443830962, "grad_norm": 7.9854570764132395, "learning_rate": 4.378481023398119e-06, "loss": 0.9308, "step": 3567 }, { "epoch": 0.5051320167055992, "grad_norm": 8.509708918289945, "learning_rate": 4.37810274501096e-06, "loss": 1.1651, "step": 3568 }, { "epoch": 0.5052735895802364, "grad_norm": 10.97039626067404, "learning_rate": 4.37772436789334e-06, "loss": 1.1935, "step": 3569 }, { "epoch": 0.5054151624548736, "grad_norm": 9.626237665833994, "learning_rate": 4.377345892065149e-06, "loss": 1.2942, "step": 3570 }, { "epoch": 0.5055567353295108, "grad_norm": 7.37048046186743, "learning_rate": 4.376967317546285e-06, "loss": 1.089, "step": 3571 }, { "epoch": 0.5056983082041481, "grad_norm": 9.129106825460203, "learning_rate": 4.376588644356649e-06, "loss": 1.3073, "step": 3572 }, { "epoch": 0.5058398810787853, "grad_norm": 9.343895748366826, "learning_rate": 4.376209872516146e-06, "loss": 1.1139, "step": 3573 }, { "epoch": 0.5059814539534225, "grad_norm": 9.378346977416042, "learning_rate": 4.37583100204469e-06, "loss": 1.2148, "step": 3574 }, { "epoch": 0.5061230268280598, "grad_norm": 10.315061678755171, "learning_rate": 4.375452032962197e-06, "loss": 1.2435, "step": 3575 }, { "epoch": 0.506264599702697, "grad_norm": 9.306676080402033, "learning_rate": 4.375072965288589e-06, "loss": 1.1709, "step": 3576 }, { "epoch": 0.5064061725773342, "grad_norm": 9.152997269134797, "learning_rate": 4.374693799043792e-06, "loss": 1.2563, "step": 3577 }, { "epoch": 0.5065477454519715, "grad_norm": 7.7999430336461435, "learning_rate": 4.374314534247741e-06, "loss": 0.9826, "step": 3578 }, { "epoch": 0.5066893183266086, "grad_norm": 9.29283123387639, "learning_rate": 4.3739351709203725e-06, "loss": 1.0714, "step": 3579 }, { "epoch": 0.5068308912012458, "grad_norm": 9.852142534718734, "learning_rate": 4.3735557090816295e-06, "loss": 1.1539, "step": 3580 }, { "epoch": 0.506972464075883, "grad_norm": 7.8035451608635364, "learning_rate": 4.37317614875146e-06, "loss": 1.1507, "step": 3581 }, { "epoch": 0.5071140369505203, "grad_norm": 8.616908604110964, "learning_rate": 4.372796489949816e-06, "loss": 1.1821, "step": 3582 }, { "epoch": 0.5072556098251575, "grad_norm": 7.794383291105967, "learning_rate": 4.3724167326966575e-06, "loss": 1.041, "step": 3583 }, { "epoch": 0.5073971826997947, "grad_norm": 9.38881387874979, "learning_rate": 4.372036877011948e-06, "loss": 1.1906, "step": 3584 }, { "epoch": 0.507538755574432, "grad_norm": 9.265970825123034, "learning_rate": 4.371656922915655e-06, "loss": 1.2168, "step": 3585 }, { "epoch": 0.5076803284490692, "grad_norm": 9.57667040086464, "learning_rate": 4.3712768704277535e-06, "loss": 1.2322, "step": 3586 }, { "epoch": 0.5078219013237064, "grad_norm": 8.369113504550574, "learning_rate": 4.3708967195682215e-06, "loss": 1.2306, "step": 3587 }, { "epoch": 0.5079634741983436, "grad_norm": 9.855562806105883, "learning_rate": 4.3705164703570444e-06, "loss": 1.0996, "step": 3588 }, { "epoch": 0.5081050470729808, "grad_norm": 9.531686641509655, "learning_rate": 4.3701361228142115e-06, "loss": 1.0023, "step": 3589 }, { "epoch": 0.508246619947618, "grad_norm": 10.425917360482826, "learning_rate": 4.369755676959717e-06, "loss": 1.244, "step": 3590 }, { "epoch": 0.5083881928222552, "grad_norm": 8.748173986364419, "learning_rate": 4.36937513281356e-06, "loss": 1.2984, "step": 3591 }, { "epoch": 0.5085297656968925, "grad_norm": 8.839410910332159, "learning_rate": 4.3689944903957475e-06, "loss": 1.1588, "step": 3592 }, { "epoch": 0.5086713385715297, "grad_norm": 9.122692077571593, "learning_rate": 4.368613749726287e-06, "loss": 1.1337, "step": 3593 }, { "epoch": 0.5088129114461669, "grad_norm": 8.268767309423021, "learning_rate": 4.368232910825196e-06, "loss": 1.0307, "step": 3594 }, { "epoch": 0.5089544843208041, "grad_norm": 8.566122583510152, "learning_rate": 4.367851973712492e-06, "loss": 1.2726, "step": 3595 }, { "epoch": 0.5090960571954414, "grad_norm": 8.73816207101897, "learning_rate": 4.367470938408204e-06, "loss": 0.9946, "step": 3596 }, { "epoch": 0.5092376300700786, "grad_norm": 9.64553149240601, "learning_rate": 4.367089804932362e-06, "loss": 1.2223, "step": 3597 }, { "epoch": 0.5093792029447158, "grad_norm": 8.856347483419466, "learning_rate": 4.366708573304999e-06, "loss": 1.096, "step": 3598 }, { "epoch": 0.5095207758193531, "grad_norm": 8.974327346969599, "learning_rate": 4.36632724354616e-06, "loss": 1.1619, "step": 3599 }, { "epoch": 0.5096623486939902, "grad_norm": 8.428727173924953, "learning_rate": 4.365945815675888e-06, "loss": 1.1207, "step": 3600 }, { "epoch": 0.5098039215686274, "grad_norm": 7.538603805265513, "learning_rate": 4.365564289714237e-06, "loss": 1.0742, "step": 3601 }, { "epoch": 0.5099454944432646, "grad_norm": 8.534168009865137, "learning_rate": 4.365182665681261e-06, "loss": 1.2264, "step": 3602 }, { "epoch": 0.5100870673179019, "grad_norm": 9.550271187172392, "learning_rate": 4.364800943597024e-06, "loss": 1.0526, "step": 3603 }, { "epoch": 0.5102286401925391, "grad_norm": 8.723012231036508, "learning_rate": 4.364419123481592e-06, "loss": 1.0426, "step": 3604 }, { "epoch": 0.5103702130671763, "grad_norm": 8.14742201771014, "learning_rate": 4.364037205355036e-06, "loss": 0.9837, "step": 3605 }, { "epoch": 0.5105117859418136, "grad_norm": 9.516484865321571, "learning_rate": 4.3636551892374346e-06, "loss": 1.2557, "step": 3606 }, { "epoch": 0.5106533588164508, "grad_norm": 7.414975720068085, "learning_rate": 4.3632730751488695e-06, "loss": 1.1891, "step": 3607 }, { "epoch": 0.510794931691088, "grad_norm": 8.502140000171694, "learning_rate": 4.362890863109428e-06, "loss": 1.0738, "step": 3608 }, { "epoch": 0.5109365045657253, "grad_norm": 8.385916811478813, "learning_rate": 4.362508553139203e-06, "loss": 1.0596, "step": 3609 }, { "epoch": 0.5110780774403624, "grad_norm": 8.234409281534427, "learning_rate": 4.362126145258292e-06, "loss": 1.0796, "step": 3610 }, { "epoch": 0.5112196503149996, "grad_norm": 11.529695731761189, "learning_rate": 4.361743639486797e-06, "loss": 1.0707, "step": 3611 }, { "epoch": 0.5113612231896368, "grad_norm": 9.378228610723882, "learning_rate": 4.361361035844829e-06, "loss": 1.2301, "step": 3612 }, { "epoch": 0.5115027960642741, "grad_norm": 8.742914437214019, "learning_rate": 4.360978334352498e-06, "loss": 1.0919, "step": 3613 }, { "epoch": 0.5116443689389113, "grad_norm": 8.601125652366262, "learning_rate": 4.360595535029924e-06, "loss": 1.1204, "step": 3614 }, { "epoch": 0.5117859418135485, "grad_norm": 7.969972284859939, "learning_rate": 4.36021263789723e-06, "loss": 1.1553, "step": 3615 }, { "epoch": 0.5119275146881858, "grad_norm": 8.007367079837762, "learning_rate": 4.359829642974544e-06, "loss": 1.1505, "step": 3616 }, { "epoch": 0.512069087562823, "grad_norm": 10.182681201921277, "learning_rate": 4.359446550282001e-06, "loss": 1.1907, "step": 3617 }, { "epoch": 0.5122106604374602, "grad_norm": 8.638742307323662, "learning_rate": 4.359063359839739e-06, "loss": 1.0824, "step": 3618 }, { "epoch": 0.5123522333120974, "grad_norm": 8.589531546310228, "learning_rate": 4.358680071667903e-06, "loss": 1.107, "step": 3619 }, { "epoch": 0.5124938061867346, "grad_norm": 8.806663799107344, "learning_rate": 4.35829668578664e-06, "loss": 1.1686, "step": 3620 }, { "epoch": 0.5126353790613718, "grad_norm": 7.659508583899242, "learning_rate": 4.357913202216108e-06, "loss": 1.0664, "step": 3621 }, { "epoch": 0.512776951936009, "grad_norm": 9.817626391540227, "learning_rate": 4.357529620976463e-06, "loss": 1.1779, "step": 3622 }, { "epoch": 0.5129185248106463, "grad_norm": 7.629030506940582, "learning_rate": 4.3571459420878705e-06, "loss": 1.1602, "step": 3623 }, { "epoch": 0.5130600976852835, "grad_norm": 8.46806034221962, "learning_rate": 4.3567621655705015e-06, "loss": 1.0971, "step": 3624 }, { "epoch": 0.5132016705599207, "grad_norm": 10.550084147185524, "learning_rate": 4.356378291444529e-06, "loss": 1.096, "step": 3625 }, { "epoch": 0.513343243434558, "grad_norm": 7.887470604408006, "learning_rate": 4.355994319730135e-06, "loss": 0.9759, "step": 3626 }, { "epoch": 0.5134848163091952, "grad_norm": 10.050894732525876, "learning_rate": 4.355610250447503e-06, "loss": 1.1734, "step": 3627 }, { "epoch": 0.5136263891838324, "grad_norm": 8.40575398963575, "learning_rate": 4.355226083616824e-06, "loss": 1.1908, "step": 3628 }, { "epoch": 0.5137679620584696, "grad_norm": 10.75297114986387, "learning_rate": 4.354841819258293e-06, "loss": 1.2617, "step": 3629 }, { "epoch": 0.5139095349331069, "grad_norm": 10.807623496725478, "learning_rate": 4.35445745739211e-06, "loss": 1.2109, "step": 3630 }, { "epoch": 0.514051107807744, "grad_norm": 7.545676442254293, "learning_rate": 4.354072998038482e-06, "loss": 1.099, "step": 3631 }, { "epoch": 0.5141926806823812, "grad_norm": 9.395624692733273, "learning_rate": 4.353688441217618e-06, "loss": 1.2529, "step": 3632 }, { "epoch": 0.5143342535570185, "grad_norm": 10.125138646813065, "learning_rate": 4.353303786949735e-06, "loss": 1.1671, "step": 3633 }, { "epoch": 0.5144758264316557, "grad_norm": 9.283838187369371, "learning_rate": 4.352919035255055e-06, "loss": 1.1617, "step": 3634 }, { "epoch": 0.5146173993062929, "grad_norm": 8.548558824222443, "learning_rate": 4.352534186153802e-06, "loss": 1.1922, "step": 3635 }, { "epoch": 0.5147589721809301, "grad_norm": 9.893942519126405, "learning_rate": 4.352149239666208e-06, "loss": 1.1301, "step": 3636 }, { "epoch": 0.5149005450555674, "grad_norm": 8.129982696927698, "learning_rate": 4.35176419581251e-06, "loss": 1.1379, "step": 3637 }, { "epoch": 0.5150421179302046, "grad_norm": 9.800920591127843, "learning_rate": 4.351379054612949e-06, "loss": 1.1622, "step": 3638 }, { "epoch": 0.5151836908048418, "grad_norm": 8.947028356177821, "learning_rate": 4.35099381608777e-06, "loss": 1.2057, "step": 3639 }, { "epoch": 0.5153252636794791, "grad_norm": 7.88051327461832, "learning_rate": 4.3506084802572276e-06, "loss": 1.0702, "step": 3640 }, { "epoch": 0.5154668365541162, "grad_norm": 9.591220783262225, "learning_rate": 4.350223047141577e-06, "loss": 1.3741, "step": 3641 }, { "epoch": 0.5156084094287534, "grad_norm": 9.4028013884265, "learning_rate": 4.349837516761081e-06, "loss": 1.1037, "step": 3642 }, { "epoch": 0.5157499823033906, "grad_norm": 11.231147397973192, "learning_rate": 4.3494518891360054e-06, "loss": 1.2016, "step": 3643 }, { "epoch": 0.5158915551780279, "grad_norm": 9.352344043678468, "learning_rate": 4.3490661642866225e-06, "loss": 1.1019, "step": 3644 }, { "epoch": 0.5160331280526651, "grad_norm": 9.443331300048753, "learning_rate": 4.3486803422332115e-06, "loss": 1.1282, "step": 3645 }, { "epoch": 0.5161747009273023, "grad_norm": 9.551536108973028, "learning_rate": 4.348294422996052e-06, "loss": 0.9669, "step": 3646 }, { "epoch": 0.5163162738019396, "grad_norm": 7.818031245065994, "learning_rate": 4.347908406595433e-06, "loss": 1.1652, "step": 3647 }, { "epoch": 0.5164578466765768, "grad_norm": 11.247155062916015, "learning_rate": 4.3475222930516484e-06, "loss": 1.2227, "step": 3648 }, { "epoch": 0.516599419551214, "grad_norm": 9.774635686695294, "learning_rate": 4.347136082384993e-06, "loss": 1.1472, "step": 3649 }, { "epoch": 0.5167409924258513, "grad_norm": 6.648672742973899, "learning_rate": 4.3467497746157715e-06, "loss": 0.9994, "step": 3650 }, { "epoch": 0.5168825653004884, "grad_norm": 12.773332974534107, "learning_rate": 4.3463633697642905e-06, "loss": 1.1545, "step": 3651 }, { "epoch": 0.5170241381751256, "grad_norm": 9.031390654316693, "learning_rate": 4.345976867850865e-06, "loss": 1.1499, "step": 3652 }, { "epoch": 0.5171657110497628, "grad_norm": 7.680795863161104, "learning_rate": 4.345590268895812e-06, "loss": 1.0651, "step": 3653 }, { "epoch": 0.5173072839244001, "grad_norm": 7.363275810259927, "learning_rate": 4.3452035729194544e-06, "loss": 0.9594, "step": 3654 }, { "epoch": 0.5174488567990373, "grad_norm": 6.837679186605317, "learning_rate": 4.34481677994212e-06, "loss": 1.0194, "step": 3655 }, { "epoch": 0.5175904296736745, "grad_norm": 7.9317574942140965, "learning_rate": 4.3444298899841445e-06, "loss": 1.1888, "step": 3656 }, { "epoch": 0.5177320025483118, "grad_norm": 8.795083007420788, "learning_rate": 4.344042903065864e-06, "loss": 1.1019, "step": 3657 }, { "epoch": 0.517873575422949, "grad_norm": 9.71348227856901, "learning_rate": 4.3436558192076225e-06, "loss": 1.0377, "step": 3658 }, { "epoch": 0.5180151482975862, "grad_norm": 9.009965677312408, "learning_rate": 4.3432686384297705e-06, "loss": 1.2768, "step": 3659 }, { "epoch": 0.5181567211722234, "grad_norm": 7.565130611686423, "learning_rate": 4.34288136075266e-06, "loss": 1.093, "step": 3660 }, { "epoch": 0.5182982940468607, "grad_norm": 10.729417480697526, "learning_rate": 4.34249398619665e-06, "loss": 1.0897, "step": 3661 }, { "epoch": 0.5184398669214978, "grad_norm": 7.326801888233626, "learning_rate": 4.342106514782106e-06, "loss": 0.9761, "step": 3662 }, { "epoch": 0.518581439796135, "grad_norm": 7.151919458076583, "learning_rate": 4.341718946529395e-06, "loss": 1.107, "step": 3663 }, { "epoch": 0.5187230126707723, "grad_norm": 8.327305508314716, "learning_rate": 4.341331281458893e-06, "loss": 1.0095, "step": 3664 }, { "epoch": 0.5188645855454095, "grad_norm": 8.365913130223229, "learning_rate": 4.3409435195909785e-06, "loss": 1.2272, "step": 3665 }, { "epoch": 0.5190061584200467, "grad_norm": 9.324284617619258, "learning_rate": 4.340555660946035e-06, "loss": 1.1502, "step": 3666 }, { "epoch": 0.519147731294684, "grad_norm": 8.601962074194406, "learning_rate": 4.340167705544454e-06, "loss": 1.2502, "step": 3667 }, { "epoch": 0.5192893041693212, "grad_norm": 8.865022450443936, "learning_rate": 4.339779653406628e-06, "loss": 1.1952, "step": 3668 }, { "epoch": 0.5194308770439584, "grad_norm": 10.075235777715701, "learning_rate": 4.3393915045529575e-06, "loss": 1.1843, "step": 3669 }, { "epoch": 0.5195724499185956, "grad_norm": 8.540556697523487, "learning_rate": 4.339003259003848e-06, "loss": 1.0002, "step": 3670 }, { "epoch": 0.5197140227932329, "grad_norm": 7.705697951146712, "learning_rate": 4.338614916779706e-06, "loss": 1.0842, "step": 3671 }, { "epoch": 0.51985559566787, "grad_norm": 8.098793844206496, "learning_rate": 4.3382264779009504e-06, "loss": 1.1002, "step": 3672 }, { "epoch": 0.5199971685425072, "grad_norm": 8.447635242997325, "learning_rate": 4.337837942388e-06, "loss": 1.2827, "step": 3673 }, { "epoch": 0.5201387414171444, "grad_norm": 9.001701194199264, "learning_rate": 4.337449310261279e-06, "loss": 1.2678, "step": 3674 }, { "epoch": 0.5202803142917817, "grad_norm": 8.276842779937807, "learning_rate": 4.337060581541217e-06, "loss": 1.1977, "step": 3675 }, { "epoch": 0.5204218871664189, "grad_norm": 8.846042726769305, "learning_rate": 4.336671756248251e-06, "loss": 1.1186, "step": 3676 }, { "epoch": 0.5205634600410561, "grad_norm": 8.206519944805605, "learning_rate": 4.33628283440282e-06, "loss": 1.0908, "step": 3677 }, { "epoch": 0.5207050329156934, "grad_norm": 8.786754905400407, "learning_rate": 4.335893816025369e-06, "loss": 1.2027, "step": 3678 }, { "epoch": 0.5208466057903306, "grad_norm": 11.484563003350615, "learning_rate": 4.33550470113635e-06, "loss": 1.0513, "step": 3679 }, { "epoch": 0.5209881786649678, "grad_norm": 9.333348228806008, "learning_rate": 4.335115489756217e-06, "loss": 0.9294, "step": 3680 }, { "epoch": 0.5211297515396051, "grad_norm": 9.263866856015179, "learning_rate": 4.33472618190543e-06, "loss": 1.2133, "step": 3681 }, { "epoch": 0.5212713244142422, "grad_norm": 10.683305240619957, "learning_rate": 4.334336777604458e-06, "loss": 1.3215, "step": 3682 }, { "epoch": 0.5214128972888794, "grad_norm": 8.178819087437933, "learning_rate": 4.333947276873767e-06, "loss": 1.2998, "step": 3683 }, { "epoch": 0.5215544701635166, "grad_norm": 9.507647197104516, "learning_rate": 4.333557679733836e-06, "loss": 1.1565, "step": 3684 }, { "epoch": 0.5216960430381539, "grad_norm": 7.009657328851384, "learning_rate": 4.333167986205145e-06, "loss": 1.1035, "step": 3685 }, { "epoch": 0.5218376159127911, "grad_norm": 8.473955532293125, "learning_rate": 4.33277819630818e-06, "loss": 1.1983, "step": 3686 }, { "epoch": 0.5219791887874283, "grad_norm": 9.387657877360214, "learning_rate": 4.332388310063431e-06, "loss": 1.2268, "step": 3687 }, { "epoch": 0.5221207616620656, "grad_norm": 9.456819074189434, "learning_rate": 4.331998327491396e-06, "loss": 1.1297, "step": 3688 }, { "epoch": 0.5222623345367028, "grad_norm": 8.341568450908463, "learning_rate": 4.331608248612574e-06, "loss": 1.1677, "step": 3689 }, { "epoch": 0.52240390741134, "grad_norm": 8.434540858409989, "learning_rate": 4.331218073447472e-06, "loss": 1.0772, "step": 3690 }, { "epoch": 0.5225454802859772, "grad_norm": 6.98745311523431, "learning_rate": 4.330827802016603e-06, "loss": 0.9104, "step": 3691 }, { "epoch": 0.5226870531606145, "grad_norm": 7.9524093335909445, "learning_rate": 4.3304374343404794e-06, "loss": 1.0451, "step": 3692 }, { "epoch": 0.5228286260352516, "grad_norm": 10.47159949850685, "learning_rate": 4.330046970439625e-06, "loss": 1.2463, "step": 3693 }, { "epoch": 0.5229701989098888, "grad_norm": 9.110311731559541, "learning_rate": 4.329656410334567e-06, "loss": 1.265, "step": 3694 }, { "epoch": 0.5231117717845261, "grad_norm": 8.350397212465825, "learning_rate": 4.329265754045835e-06, "loss": 1.1467, "step": 3695 }, { "epoch": 0.5232533446591633, "grad_norm": 8.728904362078021, "learning_rate": 4.328875001593966e-06, "loss": 1.2231, "step": 3696 }, { "epoch": 0.5233949175338005, "grad_norm": 11.776222350218973, "learning_rate": 4.3284841529995025e-06, "loss": 1.1375, "step": 3697 }, { "epoch": 0.5235364904084377, "grad_norm": 8.439837096752909, "learning_rate": 4.32809320828299e-06, "loss": 1.1956, "step": 3698 }, { "epoch": 0.523678063283075, "grad_norm": 8.613546091449866, "learning_rate": 4.327702167464981e-06, "loss": 1.1618, "step": 3699 }, { "epoch": 0.5238196361577122, "grad_norm": 8.3465685709266, "learning_rate": 4.327311030566033e-06, "loss": 1.0766, "step": 3700 }, { "epoch": 0.5239612090323494, "grad_norm": 9.153601149354545, "learning_rate": 4.326919797606705e-06, "loss": 1.1227, "step": 3701 }, { "epoch": 0.5241027819069867, "grad_norm": 12.392837153764532, "learning_rate": 4.326528468607566e-06, "loss": 1.3172, "step": 3702 }, { "epoch": 0.5242443547816238, "grad_norm": 10.125321371370648, "learning_rate": 4.3261370435891866e-06, "loss": 1.1703, "step": 3703 }, { "epoch": 0.524385927656261, "grad_norm": 7.533109443285786, "learning_rate": 4.325745522572145e-06, "loss": 1.0884, "step": 3704 }, { "epoch": 0.5245275005308982, "grad_norm": 8.442635336828427, "learning_rate": 4.325353905577023e-06, "loss": 1.1442, "step": 3705 }, { "epoch": 0.5246690734055355, "grad_norm": 8.233532741257983, "learning_rate": 4.324962192624407e-06, "loss": 1.1911, "step": 3706 }, { "epoch": 0.5248106462801727, "grad_norm": 8.305812354000256, "learning_rate": 4.324570383734888e-06, "loss": 1.1356, "step": 3707 }, { "epoch": 0.5249522191548099, "grad_norm": 9.502976603730785, "learning_rate": 4.3241784789290665e-06, "loss": 1.1481, "step": 3708 }, { "epoch": 0.5250937920294472, "grad_norm": 7.706302877153469, "learning_rate": 4.323786478227541e-06, "loss": 1.0639, "step": 3709 }, { "epoch": 0.5252353649040844, "grad_norm": 8.37347944889427, "learning_rate": 4.323394381650921e-06, "loss": 1.1737, "step": 3710 }, { "epoch": 0.5253769377787216, "grad_norm": 10.389026031223592, "learning_rate": 4.323002189219818e-06, "loss": 1.1122, "step": 3711 }, { "epoch": 0.5255185106533589, "grad_norm": 8.808436545853311, "learning_rate": 4.322609900954848e-06, "loss": 1.2043, "step": 3712 }, { "epoch": 0.525660083527996, "grad_norm": 8.088128100006168, "learning_rate": 4.322217516876635e-06, "loss": 1.1345, "step": 3713 }, { "epoch": 0.5258016564026332, "grad_norm": 8.62312628254774, "learning_rate": 4.321825037005807e-06, "loss": 1.3017, "step": 3714 }, { "epoch": 0.5259432292772704, "grad_norm": 8.722173859394005, "learning_rate": 4.321432461362994e-06, "loss": 1.134, "step": 3715 }, { "epoch": 0.5260848021519077, "grad_norm": 9.82903980552196, "learning_rate": 4.3210397899688355e-06, "loss": 1.0541, "step": 3716 }, { "epoch": 0.5262263750265449, "grad_norm": 11.980335017186432, "learning_rate": 4.320647022843972e-06, "loss": 1.1425, "step": 3717 }, { "epoch": 0.5263679479011821, "grad_norm": 8.830995886211541, "learning_rate": 4.320254160009053e-06, "loss": 1.1077, "step": 3718 }, { "epoch": 0.5265095207758194, "grad_norm": 8.299631813799266, "learning_rate": 4.31986120148473e-06, "loss": 1.2099, "step": 3719 }, { "epoch": 0.5266510936504566, "grad_norm": 7.279083584546241, "learning_rate": 4.31946814729166e-06, "loss": 1.0551, "step": 3720 }, { "epoch": 0.5267926665250938, "grad_norm": 8.925901770247608, "learning_rate": 4.319074997450506e-06, "loss": 1.0968, "step": 3721 }, { "epoch": 0.526934239399731, "grad_norm": 9.35319771169636, "learning_rate": 4.318681751981937e-06, "loss": 1.0994, "step": 3722 }, { "epoch": 0.5270758122743683, "grad_norm": 9.1945138744897, "learning_rate": 4.318288410906623e-06, "loss": 1.0731, "step": 3723 }, { "epoch": 0.5272173851490054, "grad_norm": 10.483983130600452, "learning_rate": 4.3178949742452435e-06, "loss": 1.2664, "step": 3724 }, { "epoch": 0.5273589580236426, "grad_norm": 9.521474158648365, "learning_rate": 4.317501442018481e-06, "loss": 1.2244, "step": 3725 }, { "epoch": 0.5275005308982799, "grad_norm": 10.377112116170954, "learning_rate": 4.317107814247022e-06, "loss": 1.1959, "step": 3726 }, { "epoch": 0.5276421037729171, "grad_norm": 7.7709014589597825, "learning_rate": 4.316714090951562e-06, "loss": 1.1107, "step": 3727 }, { "epoch": 0.5277836766475543, "grad_norm": 7.2133933005011475, "learning_rate": 4.316320272152795e-06, "loss": 1.1092, "step": 3728 }, { "epoch": 0.5279252495221916, "grad_norm": 7.4233394422821455, "learning_rate": 4.315926357871426e-06, "loss": 1.041, "step": 3729 }, { "epoch": 0.5280668223968288, "grad_norm": 10.665053325078024, "learning_rate": 4.3155323481281625e-06, "loss": 1.2152, "step": 3730 }, { "epoch": 0.528208395271466, "grad_norm": 9.072415524528571, "learning_rate": 4.3151382429437175e-06, "loss": 1.1981, "step": 3731 }, { "epoch": 0.5283499681461032, "grad_norm": 8.157465000200258, "learning_rate": 4.314744042338808e-06, "loss": 1.2499, "step": 3732 }, { "epoch": 0.5284915410207405, "grad_norm": 8.745313860941023, "learning_rate": 4.314349746334158e-06, "loss": 1.1599, "step": 3733 }, { "epoch": 0.5286331138953776, "grad_norm": 9.490759923514094, "learning_rate": 4.313955354950494e-06, "loss": 1.1744, "step": 3734 }, { "epoch": 0.5287746867700148, "grad_norm": 8.983823437553129, "learning_rate": 4.313560868208549e-06, "loss": 1.1528, "step": 3735 }, { "epoch": 0.528916259644652, "grad_norm": 10.799846676338598, "learning_rate": 4.313166286129063e-06, "loss": 1.1044, "step": 3736 }, { "epoch": 0.5290578325192893, "grad_norm": 10.309168051925992, "learning_rate": 4.312771608732776e-06, "loss": 1.2199, "step": 3737 }, { "epoch": 0.5291994053939265, "grad_norm": 7.356345685078897, "learning_rate": 4.312376836040437e-06, "loss": 1.0806, "step": 3738 }, { "epoch": 0.5293409782685637, "grad_norm": 8.450120769292294, "learning_rate": 4.3119819680728e-06, "loss": 1.1864, "step": 3739 }, { "epoch": 0.529482551143201, "grad_norm": 8.117251883316609, "learning_rate": 4.311587004850622e-06, "loss": 1.1194, "step": 3740 }, { "epoch": 0.5296241240178382, "grad_norm": 7.887633105815894, "learning_rate": 4.311191946394665e-06, "loss": 1.1374, "step": 3741 }, { "epoch": 0.5297656968924754, "grad_norm": 8.04640519335464, "learning_rate": 4.3107967927256985e-06, "loss": 1.1042, "step": 3742 }, { "epoch": 0.5299072697671127, "grad_norm": 9.394448414395717, "learning_rate": 4.310401543864495e-06, "loss": 1.2079, "step": 3743 }, { "epoch": 0.5300488426417499, "grad_norm": 8.628381923012855, "learning_rate": 4.3100061998318325e-06, "loss": 1.1908, "step": 3744 }, { "epoch": 0.530190415516387, "grad_norm": 8.998198434866959, "learning_rate": 4.309610760648493e-06, "loss": 1.0729, "step": 3745 }, { "epoch": 0.5303319883910242, "grad_norm": 9.244896795543685, "learning_rate": 4.309215226335265e-06, "loss": 1.315, "step": 3746 }, { "epoch": 0.5304735612656615, "grad_norm": 6.5889690114825505, "learning_rate": 4.308819596912942e-06, "loss": 1.12, "step": 3747 }, { "epoch": 0.5306151341402987, "grad_norm": 7.653769079468375, "learning_rate": 4.308423872402322e-06, "loss": 1.1508, "step": 3748 }, { "epoch": 0.5307567070149359, "grad_norm": 8.77179379916549, "learning_rate": 4.308028052824207e-06, "loss": 1.0936, "step": 3749 }, { "epoch": 0.5308982798895732, "grad_norm": 8.153592550046143, "learning_rate": 4.307632138199405e-06, "loss": 1.1258, "step": 3750 }, { "epoch": 0.5310398527642104, "grad_norm": 8.304565773302386, "learning_rate": 4.30723612854873e-06, "loss": 1.2175, "step": 3751 }, { "epoch": 0.5311814256388476, "grad_norm": 10.382241169499972, "learning_rate": 4.306840023892998e-06, "loss": 1.0859, "step": 3752 }, { "epoch": 0.5313229985134849, "grad_norm": 6.899573503664607, "learning_rate": 4.306443824253035e-06, "loss": 1.1562, "step": 3753 }, { "epoch": 0.5314645713881221, "grad_norm": 7.86761176472067, "learning_rate": 4.306047529649665e-06, "loss": 1.1044, "step": 3754 }, { "epoch": 0.5316061442627592, "grad_norm": 8.603271979736792, "learning_rate": 4.305651140103725e-06, "loss": 1.1258, "step": 3755 }, { "epoch": 0.5317477171373964, "grad_norm": 7.976848720455541, "learning_rate": 4.305254655636049e-06, "loss": 1.1291, "step": 3756 }, { "epoch": 0.5318892900120337, "grad_norm": 7.612973092948959, "learning_rate": 4.304858076267483e-06, "loss": 1.1672, "step": 3757 }, { "epoch": 0.5320308628866709, "grad_norm": 10.83655742695418, "learning_rate": 4.304461402018873e-06, "loss": 1.2013, "step": 3758 }, { "epoch": 0.5321724357613081, "grad_norm": 9.316966374671415, "learning_rate": 4.304064632911073e-06, "loss": 1.1178, "step": 3759 }, { "epoch": 0.5323140086359454, "grad_norm": 8.382042500167211, "learning_rate": 4.303667768964941e-06, "loss": 1.043, "step": 3760 }, { "epoch": 0.5324555815105826, "grad_norm": 7.83887807603657, "learning_rate": 4.303270810201339e-06, "loss": 1.0796, "step": 3761 }, { "epoch": 0.5325971543852198, "grad_norm": 10.847336233801395, "learning_rate": 4.302873756641135e-06, "loss": 1.0975, "step": 3762 }, { "epoch": 0.532738727259857, "grad_norm": 9.81869291972488, "learning_rate": 4.302476608305201e-06, "loss": 1.4123, "step": 3763 }, { "epoch": 0.5328803001344943, "grad_norm": 8.371764967402846, "learning_rate": 4.3020793652144165e-06, "loss": 1.1232, "step": 3764 }, { "epoch": 0.5330218730091314, "grad_norm": 7.9148703160026965, "learning_rate": 4.301682027389663e-06, "loss": 1.1916, "step": 3765 }, { "epoch": 0.5331634458837686, "grad_norm": 7.966718107985833, "learning_rate": 4.301284594851829e-06, "loss": 1.2082, "step": 3766 }, { "epoch": 0.5333050187584059, "grad_norm": 8.168116797588228, "learning_rate": 4.300887067621807e-06, "loss": 1.2331, "step": 3767 }, { "epoch": 0.5334465916330431, "grad_norm": 10.017622868941311, "learning_rate": 4.300489445720495e-06, "loss": 1.2059, "step": 3768 }, { "epoch": 0.5335881645076803, "grad_norm": 7.151467403473864, "learning_rate": 4.300091729168795e-06, "loss": 0.9938, "step": 3769 }, { "epoch": 0.5337297373823175, "grad_norm": 8.242596627878976, "learning_rate": 4.299693917987615e-06, "loss": 1.088, "step": 3770 }, { "epoch": 0.5338713102569548, "grad_norm": 8.25035140705193, "learning_rate": 4.299296012197868e-06, "loss": 1.0849, "step": 3771 }, { "epoch": 0.534012883131592, "grad_norm": 7.842924158155932, "learning_rate": 4.29889801182047e-06, "loss": 1.0433, "step": 3772 }, { "epoch": 0.5341544560062292, "grad_norm": 9.028257725665298, "learning_rate": 4.298499916876347e-06, "loss": 1.119, "step": 3773 }, { "epoch": 0.5342960288808665, "grad_norm": 9.303017108408214, "learning_rate": 4.298101727386422e-06, "loss": 1.0701, "step": 3774 }, { "epoch": 0.5344376017555037, "grad_norm": 8.881416258037632, "learning_rate": 4.297703443371632e-06, "loss": 1.0763, "step": 3775 }, { "epoch": 0.5345791746301408, "grad_norm": 8.131771611013889, "learning_rate": 4.2973050648529114e-06, "loss": 1.1675, "step": 3776 }, { "epoch": 0.534720747504778, "grad_norm": 7.3339344414079095, "learning_rate": 4.296906591851203e-06, "loss": 1.0558, "step": 3777 }, { "epoch": 0.5348623203794153, "grad_norm": 7.88751389008038, "learning_rate": 4.2965080243874555e-06, "loss": 1.0522, "step": 3778 }, { "epoch": 0.5350038932540525, "grad_norm": 8.735598674624514, "learning_rate": 4.296109362482621e-06, "loss": 1.0466, "step": 3779 }, { "epoch": 0.5351454661286897, "grad_norm": 10.663401421817174, "learning_rate": 4.2957106061576565e-06, "loss": 1.2576, "step": 3780 }, { "epoch": 0.535287039003327, "grad_norm": 8.166880494516786, "learning_rate": 4.295311755433525e-06, "loss": 1.0663, "step": 3781 }, { "epoch": 0.5354286118779642, "grad_norm": 8.217090511674481, "learning_rate": 4.294912810331191e-06, "loss": 1.0586, "step": 3782 }, { "epoch": 0.5355701847526014, "grad_norm": 7.820268598465685, "learning_rate": 4.2945137708716315e-06, "loss": 1.1421, "step": 3783 }, { "epoch": 0.5357117576272387, "grad_norm": 8.803053100855657, "learning_rate": 4.294114637075819e-06, "loss": 1.1152, "step": 3784 }, { "epoch": 0.5358533305018759, "grad_norm": 6.990630555097396, "learning_rate": 4.293715408964738e-06, "loss": 1.1034, "step": 3785 }, { "epoch": 0.535994903376513, "grad_norm": 7.696505102700734, "learning_rate": 4.293316086559377e-06, "loss": 1.1376, "step": 3786 }, { "epoch": 0.5361364762511502, "grad_norm": 9.146775691695911, "learning_rate": 4.292916669880726e-06, "loss": 1.0536, "step": 3787 }, { "epoch": 0.5362780491257875, "grad_norm": 8.181783063408592, "learning_rate": 4.292517158949781e-06, "loss": 1.2193, "step": 3788 }, { "epoch": 0.5364196220004247, "grad_norm": 9.250669300506003, "learning_rate": 4.292117553787547e-06, "loss": 1.0537, "step": 3789 }, { "epoch": 0.5365611948750619, "grad_norm": 9.28086692969988, "learning_rate": 4.291717854415029e-06, "loss": 1.2392, "step": 3790 }, { "epoch": 0.5367027677496992, "grad_norm": 9.292400610522844, "learning_rate": 4.29131806085324e-06, "loss": 1.2504, "step": 3791 }, { "epoch": 0.5368443406243364, "grad_norm": 9.244392551170504, "learning_rate": 4.2909181731231955e-06, "loss": 1.2046, "step": 3792 }, { "epoch": 0.5369859134989736, "grad_norm": 11.414275055772404, "learning_rate": 4.290518191245918e-06, "loss": 1.324, "step": 3793 }, { "epoch": 0.5371274863736109, "grad_norm": 8.2812688863287, "learning_rate": 4.290118115242434e-06, "loss": 1.1516, "step": 3794 }, { "epoch": 0.5372690592482481, "grad_norm": 8.58801655333247, "learning_rate": 4.289717945133775e-06, "loss": 1.157, "step": 3795 }, { "epoch": 0.5374106321228852, "grad_norm": 9.374396139406294, "learning_rate": 4.289317680940979e-06, "loss": 1.1288, "step": 3796 }, { "epoch": 0.5375522049975224, "grad_norm": 9.121143100776555, "learning_rate": 4.288917322685087e-06, "loss": 1.2386, "step": 3797 }, { "epoch": 0.5376937778721597, "grad_norm": 9.272603229638205, "learning_rate": 4.288516870387145e-06, "loss": 1.1308, "step": 3798 }, { "epoch": 0.5378353507467969, "grad_norm": 7.902320337567271, "learning_rate": 4.288116324068205e-06, "loss": 1.1916, "step": 3799 }, { "epoch": 0.5379769236214341, "grad_norm": 8.840589410110848, "learning_rate": 4.287715683749322e-06, "loss": 0.9455, "step": 3800 }, { "epoch": 0.5381184964960714, "grad_norm": 10.399570280147096, "learning_rate": 4.287314949451559e-06, "loss": 1.1572, "step": 3801 }, { "epoch": 0.5382600693707086, "grad_norm": 9.557225586596036, "learning_rate": 4.286914121195982e-06, "loss": 1.0947, "step": 3802 }, { "epoch": 0.5384016422453458, "grad_norm": 9.516417522030718, "learning_rate": 4.286513199003661e-06, "loss": 1.1798, "step": 3803 }, { "epoch": 0.538543215119983, "grad_norm": 7.83372239573795, "learning_rate": 4.2861121828956745e-06, "loss": 1.1745, "step": 3804 }, { "epoch": 0.5386847879946203, "grad_norm": 9.696755989566377, "learning_rate": 4.285711072893102e-06, "loss": 1.1992, "step": 3805 }, { "epoch": 0.5388263608692575, "grad_norm": 11.087703636733584, "learning_rate": 4.28530986901703e-06, "loss": 1.2199, "step": 3806 }, { "epoch": 0.5389679337438946, "grad_norm": 10.832497862853723, "learning_rate": 4.2849085712885495e-06, "loss": 1.1026, "step": 3807 }, { "epoch": 0.5391095066185319, "grad_norm": 9.235979744368159, "learning_rate": 4.284507179728756e-06, "loss": 1.0242, "step": 3808 }, { "epoch": 0.5392510794931691, "grad_norm": 9.221348283204374, "learning_rate": 4.2841056943587505e-06, "loss": 1.3404, "step": 3809 }, { "epoch": 0.5393926523678063, "grad_norm": 8.253438522005446, "learning_rate": 4.283704115199639e-06, "loss": 1.13, "step": 3810 }, { "epoch": 0.5395342252424435, "grad_norm": 7.909155993856503, "learning_rate": 4.283302442272532e-06, "loss": 1.1761, "step": 3811 }, { "epoch": 0.5396757981170808, "grad_norm": 9.74249722279819, "learning_rate": 4.282900675598546e-06, "loss": 1.2145, "step": 3812 }, { "epoch": 0.539817370991718, "grad_norm": 9.33355667210347, "learning_rate": 4.2824988151988e-06, "loss": 1.1203, "step": 3813 }, { "epoch": 0.5399589438663552, "grad_norm": 8.253805958779003, "learning_rate": 4.282096861094421e-06, "loss": 1.1646, "step": 3814 }, { "epoch": 0.5401005167409925, "grad_norm": 7.96575129316443, "learning_rate": 4.281694813306538e-06, "loss": 1.1983, "step": 3815 }, { "epoch": 0.5402420896156297, "grad_norm": 10.979698520544668, "learning_rate": 4.281292671856288e-06, "loss": 1.1438, "step": 3816 }, { "epoch": 0.5403836624902668, "grad_norm": 8.975933959310002, "learning_rate": 4.28089043676481e-06, "loss": 1.2343, "step": 3817 }, { "epoch": 0.540525235364904, "grad_norm": 9.023855171610586, "learning_rate": 4.28048810805325e-06, "loss": 1.2512, "step": 3818 }, { "epoch": 0.5406668082395413, "grad_norm": 10.646545824270014, "learning_rate": 4.280085685742758e-06, "loss": 1.3737, "step": 3819 }, { "epoch": 0.5408083811141785, "grad_norm": 8.759654359312867, "learning_rate": 4.279683169854488e-06, "loss": 1.238, "step": 3820 }, { "epoch": 0.5409499539888157, "grad_norm": 9.61584631882426, "learning_rate": 4.279280560409601e-06, "loss": 1.0521, "step": 3821 }, { "epoch": 0.541091526863453, "grad_norm": 8.549606528346832, "learning_rate": 4.278877857429261e-06, "loss": 1.152, "step": 3822 }, { "epoch": 0.5412330997380902, "grad_norm": 7.550194178225237, "learning_rate": 4.278475060934639e-06, "loss": 1.1945, "step": 3823 }, { "epoch": 0.5413746726127274, "grad_norm": 9.757969264778573, "learning_rate": 4.278072170946909e-06, "loss": 1.1793, "step": 3824 }, { "epoch": 0.5415162454873647, "grad_norm": 9.531863133455174, "learning_rate": 4.277669187487251e-06, "loss": 1.0922, "step": 3825 }, { "epoch": 0.5416578183620019, "grad_norm": 10.17240602012529, "learning_rate": 4.2772661105768495e-06, "loss": 1.2368, "step": 3826 }, { "epoch": 0.541799391236639, "grad_norm": 9.022039343113695, "learning_rate": 4.276862940236894e-06, "loss": 1.0196, "step": 3827 }, { "epoch": 0.5419409641112762, "grad_norm": 7.429670813187754, "learning_rate": 4.276459676488578e-06, "loss": 0.9822, "step": 3828 }, { "epoch": 0.5420825369859135, "grad_norm": 8.171212105642017, "learning_rate": 4.276056319353101e-06, "loss": 1.1535, "step": 3829 }, { "epoch": 0.5422241098605507, "grad_norm": 10.047076993012228, "learning_rate": 4.275652868851669e-06, "loss": 0.9619, "step": 3830 }, { "epoch": 0.5423656827351879, "grad_norm": 8.725746765533076, "learning_rate": 4.275249325005488e-06, "loss": 1.3258, "step": 3831 }, { "epoch": 0.5425072556098252, "grad_norm": 8.12919766491706, "learning_rate": 4.2748456878357746e-06, "loss": 1.0862, "step": 3832 }, { "epoch": 0.5426488284844624, "grad_norm": 7.756637192251972, "learning_rate": 4.274441957363747e-06, "loss": 1.115, "step": 3833 }, { "epoch": 0.5427904013590996, "grad_norm": 14.298069238031646, "learning_rate": 4.274038133610629e-06, "loss": 0.971, "step": 3834 }, { "epoch": 0.5429319742337368, "grad_norm": 8.35289660254645, "learning_rate": 4.273634216597648e-06, "loss": 1.0809, "step": 3835 }, { "epoch": 0.5430735471083741, "grad_norm": 9.315273614326399, "learning_rate": 4.273230206346039e-06, "loss": 1.0791, "step": 3836 }, { "epoch": 0.5432151199830113, "grad_norm": 9.690137541024113, "learning_rate": 4.27282610287704e-06, "loss": 1.0668, "step": 3837 }, { "epoch": 0.5433566928576484, "grad_norm": 9.752006911089907, "learning_rate": 4.272421906211895e-06, "loss": 1.1053, "step": 3838 }, { "epoch": 0.5434982657322857, "grad_norm": 7.632642824128015, "learning_rate": 4.272017616371853e-06, "loss": 0.9516, "step": 3839 }, { "epoch": 0.5436398386069229, "grad_norm": 7.394202626579161, "learning_rate": 4.2716132333781646e-06, "loss": 1.1273, "step": 3840 }, { "epoch": 0.5437814114815601, "grad_norm": 7.58928307284782, "learning_rate": 4.27120875725209e-06, "loss": 1.1266, "step": 3841 }, { "epoch": 0.5439229843561973, "grad_norm": 9.853007489685414, "learning_rate": 4.270804188014892e-06, "loss": 1.0735, "step": 3842 }, { "epoch": 0.5440645572308346, "grad_norm": 8.636417958211945, "learning_rate": 4.270399525687839e-06, "loss": 1.1105, "step": 3843 }, { "epoch": 0.5442061301054718, "grad_norm": 9.084100906651498, "learning_rate": 4.269994770292201e-06, "loss": 1.1672, "step": 3844 }, { "epoch": 0.544347702980109, "grad_norm": 8.159734106032644, "learning_rate": 4.269589921849259e-06, "loss": 1.2021, "step": 3845 }, { "epoch": 0.5444892758547463, "grad_norm": 8.297152699566393, "learning_rate": 4.269184980380294e-06, "loss": 1.1082, "step": 3846 }, { "epoch": 0.5446308487293835, "grad_norm": 8.836801771668176, "learning_rate": 4.268779945906594e-06, "loss": 1.2612, "step": 3847 }, { "epoch": 0.5447724216040206, "grad_norm": 8.889771968633216, "learning_rate": 4.26837481844945e-06, "loss": 1.0413, "step": 3848 }, { "epoch": 0.5449139944786578, "grad_norm": 8.771088826738843, "learning_rate": 4.267969598030162e-06, "loss": 1.0399, "step": 3849 }, { "epoch": 0.5450555673532951, "grad_norm": 9.339420604174528, "learning_rate": 4.267564284670029e-06, "loss": 1.1172, "step": 3850 }, { "epoch": 0.5451971402279323, "grad_norm": 7.429986829373789, "learning_rate": 4.267158878390361e-06, "loss": 1.132, "step": 3851 }, { "epoch": 0.5453387131025695, "grad_norm": 9.191549433781681, "learning_rate": 4.266753379212467e-06, "loss": 1.2528, "step": 3852 }, { "epoch": 0.5454802859772068, "grad_norm": 7.70228358503609, "learning_rate": 4.266347787157666e-06, "loss": 1.0659, "step": 3853 }, { "epoch": 0.545621858851844, "grad_norm": 7.870347116805819, "learning_rate": 4.265942102247278e-06, "loss": 1.0365, "step": 3854 }, { "epoch": 0.5457634317264812, "grad_norm": 8.096910014457773, "learning_rate": 4.265536324502631e-06, "loss": 1.2675, "step": 3855 }, { "epoch": 0.5459050046011185, "grad_norm": 8.462253457651984, "learning_rate": 4.265130453945056e-06, "loss": 1.3059, "step": 3856 }, { "epoch": 0.5460465774757557, "grad_norm": 8.938184165110584, "learning_rate": 4.26472449059589e-06, "loss": 1.0278, "step": 3857 }, { "epoch": 0.5461881503503928, "grad_norm": 11.864088968634046, "learning_rate": 4.264318434476472e-06, "loss": 1.0353, "step": 3858 }, { "epoch": 0.54632972322503, "grad_norm": 8.839788513415582, "learning_rate": 4.26391228560815e-06, "loss": 1.1435, "step": 3859 }, { "epoch": 0.5464712960996673, "grad_norm": 9.101543220204901, "learning_rate": 4.263506044012275e-06, "loss": 1.2431, "step": 3860 }, { "epoch": 0.5466128689743045, "grad_norm": 8.074383162331872, "learning_rate": 4.2630997097102e-06, "loss": 0.9147, "step": 3861 }, { "epoch": 0.5467544418489417, "grad_norm": 8.317602900197334, "learning_rate": 4.26269328272329e-06, "loss": 1.0244, "step": 3862 }, { "epoch": 0.546896014723579, "grad_norm": 7.573150701111661, "learning_rate": 4.262286763072908e-06, "loss": 1.0077, "step": 3863 }, { "epoch": 0.5470375875982162, "grad_norm": 8.244275505663385, "learning_rate": 4.261880150780424e-06, "loss": 1.1259, "step": 3864 }, { "epoch": 0.5471791604728534, "grad_norm": 8.737123442518406, "learning_rate": 4.261473445867215e-06, "loss": 1.1164, "step": 3865 }, { "epoch": 0.5473207333474907, "grad_norm": 8.736307820860041, "learning_rate": 4.26106664835466e-06, "loss": 1.1349, "step": 3866 }, { "epoch": 0.5474623062221279, "grad_norm": 10.068768367359876, "learning_rate": 4.260659758264145e-06, "loss": 1.2784, "step": 3867 }, { "epoch": 0.5476038790967651, "grad_norm": 8.316270475054177, "learning_rate": 4.260252775617058e-06, "loss": 1.0585, "step": 3868 }, { "epoch": 0.5477454519714022, "grad_norm": 8.939389329232162, "learning_rate": 4.259845700434797e-06, "loss": 1.1964, "step": 3869 }, { "epoch": 0.5478870248460395, "grad_norm": 6.799228725450139, "learning_rate": 4.259438532738759e-06, "loss": 1.145, "step": 3870 }, { "epoch": 0.5480285977206767, "grad_norm": 9.608506772895836, "learning_rate": 4.259031272550349e-06, "loss": 1.0138, "step": 3871 }, { "epoch": 0.5481701705953139, "grad_norm": 7.391987003694809, "learning_rate": 4.258623919890976e-06, "loss": 1.0292, "step": 3872 }, { "epoch": 0.5483117434699512, "grad_norm": 9.070590492213912, "learning_rate": 4.258216474782056e-06, "loss": 1.1096, "step": 3873 }, { "epoch": 0.5484533163445884, "grad_norm": 9.443617297085297, "learning_rate": 4.257808937245006e-06, "loss": 1.1364, "step": 3874 }, { "epoch": 0.5485948892192256, "grad_norm": 8.400300937893116, "learning_rate": 4.257401307301251e-06, "loss": 1.2205, "step": 3875 }, { "epoch": 0.5487364620938628, "grad_norm": 7.427140915599742, "learning_rate": 4.25699358497222e-06, "loss": 1.086, "step": 3876 }, { "epoch": 0.5488780349685001, "grad_norm": 9.357601857549511, "learning_rate": 4.256585770279345e-06, "loss": 1.3121, "step": 3877 }, { "epoch": 0.5490196078431373, "grad_norm": 6.606767721830745, "learning_rate": 4.256177863244067e-06, "loss": 1.0439, "step": 3878 }, { "epoch": 0.5491611807177744, "grad_norm": 8.418044427589063, "learning_rate": 4.255769863887829e-06, "loss": 1.0389, "step": 3879 }, { "epoch": 0.5493027535924117, "grad_norm": 11.71764968792737, "learning_rate": 4.2553617722320775e-06, "loss": 1.2835, "step": 3880 }, { "epoch": 0.5494443264670489, "grad_norm": 7.897189893584428, "learning_rate": 4.254953588298266e-06, "loss": 1.1057, "step": 3881 }, { "epoch": 0.5495858993416861, "grad_norm": 8.975966258619676, "learning_rate": 4.254545312107854e-06, "loss": 1.1748, "step": 3882 }, { "epoch": 0.5497274722163233, "grad_norm": 7.393419443127227, "learning_rate": 4.254136943682302e-06, "loss": 1.1388, "step": 3883 }, { "epoch": 0.5498690450909606, "grad_norm": 8.973279918872553, "learning_rate": 4.253728483043081e-06, "loss": 1.1365, "step": 3884 }, { "epoch": 0.5500106179655978, "grad_norm": 8.431881086231218, "learning_rate": 4.253319930211659e-06, "loss": 1.1286, "step": 3885 }, { "epoch": 0.550152190840235, "grad_norm": 9.475170011177775, "learning_rate": 4.252911285209516e-06, "loss": 1.1897, "step": 3886 }, { "epoch": 0.5502937637148723, "grad_norm": 8.801164844752751, "learning_rate": 4.252502548058134e-06, "loss": 1.0707, "step": 3887 }, { "epoch": 0.5504353365895095, "grad_norm": 10.095140296448607, "learning_rate": 4.252093718779e-06, "loss": 1.0424, "step": 3888 }, { "epoch": 0.5505769094641467, "grad_norm": 8.231281662216059, "learning_rate": 4.2516847973936045e-06, "loss": 1.0008, "step": 3889 }, { "epoch": 0.5507184823387838, "grad_norm": 7.910215325782487, "learning_rate": 4.251275783923447e-06, "loss": 1.1746, "step": 3890 }, { "epoch": 0.5508600552134211, "grad_norm": 10.939168399537394, "learning_rate": 4.250866678390026e-06, "loss": 1.1996, "step": 3891 }, { "epoch": 0.5510016280880583, "grad_norm": 10.51608098530773, "learning_rate": 4.25045748081485e-06, "loss": 1.1094, "step": 3892 }, { "epoch": 0.5511432009626955, "grad_norm": 9.243810696560573, "learning_rate": 4.250048191219429e-06, "loss": 1.1387, "step": 3893 }, { "epoch": 0.5512847738373328, "grad_norm": 9.318985082678875, "learning_rate": 4.24963880962528e-06, "loss": 1.1561, "step": 3894 }, { "epoch": 0.55142634671197, "grad_norm": 6.665257336741788, "learning_rate": 4.249229336053924e-06, "loss": 0.9909, "step": 3895 }, { "epoch": 0.5515679195866072, "grad_norm": 10.50556108257279, "learning_rate": 4.248819770526884e-06, "loss": 1.1176, "step": 3896 }, { "epoch": 0.5517094924612445, "grad_norm": 9.277198189647562, "learning_rate": 4.248410113065694e-06, "loss": 1.2097, "step": 3897 }, { "epoch": 0.5518510653358817, "grad_norm": 8.447652854202591, "learning_rate": 4.248000363691888e-06, "loss": 1.206, "step": 3898 }, { "epoch": 0.5519926382105189, "grad_norm": 7.536306116499767, "learning_rate": 4.247590522427006e-06, "loss": 1.0711, "step": 3899 }, { "epoch": 0.552134211085156, "grad_norm": 8.214181536849091, "learning_rate": 4.2471805892925935e-06, "loss": 1.0598, "step": 3900 }, { "epoch": 0.5522757839597933, "grad_norm": 9.549731537405826, "learning_rate": 4.2467705643102005e-06, "loss": 1.0486, "step": 3901 }, { "epoch": 0.5524173568344305, "grad_norm": 7.898295747445225, "learning_rate": 4.246360447501381e-06, "loss": 1.1241, "step": 3902 }, { "epoch": 0.5525589297090677, "grad_norm": 8.749906266936565, "learning_rate": 4.245950238887695e-06, "loss": 1.1252, "step": 3903 }, { "epoch": 0.552700502583705, "grad_norm": 8.155470770667439, "learning_rate": 4.245539938490706e-06, "loss": 1.0136, "step": 3904 }, { "epoch": 0.5528420754583422, "grad_norm": 9.102654256542987, "learning_rate": 4.245129546331985e-06, "loss": 1.1387, "step": 3905 }, { "epoch": 0.5529836483329794, "grad_norm": 10.032310928855933, "learning_rate": 4.244719062433105e-06, "loss": 1.1725, "step": 3906 }, { "epoch": 0.5531252212076166, "grad_norm": 10.202238129047807, "learning_rate": 4.2443084868156434e-06, "loss": 1.0716, "step": 3907 }, { "epoch": 0.5532667940822539, "grad_norm": 9.321988385975478, "learning_rate": 4.243897819501187e-06, "loss": 1.1276, "step": 3908 }, { "epoch": 0.5534083669568911, "grad_norm": 7.042481535161857, "learning_rate": 4.243487060511321e-06, "loss": 0.9854, "step": 3909 }, { "epoch": 0.5535499398315282, "grad_norm": 7.116964259242418, "learning_rate": 4.243076209867642e-06, "loss": 1.0357, "step": 3910 }, { "epoch": 0.5536915127061655, "grad_norm": 9.612501873120348, "learning_rate": 4.242665267591744e-06, "loss": 1.1799, "step": 3911 }, { "epoch": 0.5538330855808027, "grad_norm": 9.463189938184287, "learning_rate": 4.242254233705234e-06, "loss": 1.0414, "step": 3912 }, { "epoch": 0.5539746584554399, "grad_norm": 8.410709602093167, "learning_rate": 4.241843108229718e-06, "loss": 1.0317, "step": 3913 }, { "epoch": 0.5541162313300771, "grad_norm": 9.439640838126842, "learning_rate": 4.241431891186808e-06, "loss": 1.1646, "step": 3914 }, { "epoch": 0.5542578042047144, "grad_norm": 8.078908384252934, "learning_rate": 4.241020582598122e-06, "loss": 1.3258, "step": 3915 }, { "epoch": 0.5543993770793516, "grad_norm": 10.684188597470943, "learning_rate": 4.240609182485282e-06, "loss": 1.2725, "step": 3916 }, { "epoch": 0.5545409499539888, "grad_norm": 9.610861959394644, "learning_rate": 4.240197690869916e-06, "loss": 1.1246, "step": 3917 }, { "epoch": 0.5546825228286261, "grad_norm": 8.630704016408375, "learning_rate": 4.239786107773655e-06, "loss": 1.1195, "step": 3918 }, { "epoch": 0.5548240957032633, "grad_norm": 7.113343189855943, "learning_rate": 4.239374433218134e-06, "loss": 1.0688, "step": 3919 }, { "epoch": 0.5549656685779005, "grad_norm": 7.706421678646408, "learning_rate": 4.238962667224997e-06, "loss": 0.9649, "step": 3920 }, { "epoch": 0.5551072414525376, "grad_norm": 7.552998777131091, "learning_rate": 4.238550809815889e-06, "loss": 1.0786, "step": 3921 }, { "epoch": 0.5552488143271749, "grad_norm": 10.068822165952199, "learning_rate": 4.238138861012461e-06, "loss": 1.155, "step": 3922 }, { "epoch": 0.5553903872018121, "grad_norm": 7.4737945503250724, "learning_rate": 4.23772682083637e-06, "loss": 0.9296, "step": 3923 }, { "epoch": 0.5555319600764493, "grad_norm": 8.966678818856364, "learning_rate": 4.237314689309275e-06, "loss": 1.0456, "step": 3924 }, { "epoch": 0.5556735329510866, "grad_norm": 8.28207220638279, "learning_rate": 4.236902466452843e-06, "loss": 1.1369, "step": 3925 }, { "epoch": 0.5558151058257238, "grad_norm": 8.816529996085478, "learning_rate": 4.2364901522887415e-06, "loss": 1.1747, "step": 3926 }, { "epoch": 0.555956678700361, "grad_norm": 7.091447229572709, "learning_rate": 4.236077746838649e-06, "loss": 1.0677, "step": 3927 }, { "epoch": 0.5560982515749983, "grad_norm": 10.630688748802957, "learning_rate": 4.2356652501242435e-06, "loss": 1.1908, "step": 3928 }, { "epoch": 0.5562398244496355, "grad_norm": 8.966986399720557, "learning_rate": 4.235252662167211e-06, "loss": 1.1997, "step": 3929 }, { "epoch": 0.5563813973242727, "grad_norm": 7.534374554450941, "learning_rate": 4.234839982989238e-06, "loss": 1.0869, "step": 3930 }, { "epoch": 0.5565229701989098, "grad_norm": 9.767908717348316, "learning_rate": 4.234427212612021e-06, "loss": 1.2083, "step": 3931 }, { "epoch": 0.5566645430735471, "grad_norm": 9.051819288633752, "learning_rate": 4.23401435105726e-06, "loss": 1.2731, "step": 3932 }, { "epoch": 0.5568061159481843, "grad_norm": 7.137725441707685, "learning_rate": 4.2336013983466565e-06, "loss": 1.0153, "step": 3933 }, { "epoch": 0.5569476888228215, "grad_norm": 7.373869728504106, "learning_rate": 4.233188354501921e-06, "loss": 1.2941, "step": 3934 }, { "epoch": 0.5570892616974588, "grad_norm": 9.642257478021468, "learning_rate": 4.2327752195447645e-06, "loss": 1.1447, "step": 3935 }, { "epoch": 0.557230834572096, "grad_norm": 7.910432576192055, "learning_rate": 4.232361993496908e-06, "loss": 1.0444, "step": 3936 }, { "epoch": 0.5573724074467332, "grad_norm": 7.982500964940338, "learning_rate": 4.231948676380073e-06, "loss": 1.0791, "step": 3937 }, { "epoch": 0.5575139803213705, "grad_norm": 10.573471240202188, "learning_rate": 4.231535268215987e-06, "loss": 1.2, "step": 3938 }, { "epoch": 0.5576555531960077, "grad_norm": 7.829275202815138, "learning_rate": 4.231121769026383e-06, "loss": 1.105, "step": 3939 }, { "epoch": 0.5577971260706449, "grad_norm": 8.19491590734863, "learning_rate": 4.230708178832999e-06, "loss": 1.1092, "step": 3940 }, { "epoch": 0.557938698945282, "grad_norm": 8.52171603405318, "learning_rate": 4.230294497657576e-06, "loss": 1.2635, "step": 3941 }, { "epoch": 0.5580802718199193, "grad_norm": 6.785757228170484, "learning_rate": 4.2298807255218615e-06, "loss": 1.018, "step": 3942 }, { "epoch": 0.5582218446945565, "grad_norm": 8.88972862833558, "learning_rate": 4.229466862447608e-06, "loss": 1.2651, "step": 3943 }, { "epoch": 0.5583634175691937, "grad_norm": 8.24607709614614, "learning_rate": 4.22905290845657e-06, "loss": 1.1477, "step": 3944 }, { "epoch": 0.558504990443831, "grad_norm": 8.426348062761512, "learning_rate": 4.22863886357051e-06, "loss": 1.1502, "step": 3945 }, { "epoch": 0.5586465633184682, "grad_norm": 10.396263302235763, "learning_rate": 4.228224727811194e-06, "loss": 1.1927, "step": 3946 }, { "epoch": 0.5587881361931054, "grad_norm": 8.831871871488769, "learning_rate": 4.227810501200393e-06, "loss": 1.1723, "step": 3947 }, { "epoch": 0.5589297090677426, "grad_norm": 10.602030260959307, "learning_rate": 4.227396183759882e-06, "loss": 1.1421, "step": 3948 }, { "epoch": 0.5590712819423799, "grad_norm": 8.952338858237821, "learning_rate": 4.226981775511442e-06, "loss": 1.1909, "step": 3949 }, { "epoch": 0.5592128548170171, "grad_norm": 8.793624250194755, "learning_rate": 4.2265672764768565e-06, "loss": 1.1282, "step": 3950 }, { "epoch": 0.5593544276916543, "grad_norm": 10.758514580848423, "learning_rate": 4.226152686677918e-06, "loss": 1.1291, "step": 3951 }, { "epoch": 0.5594960005662915, "grad_norm": 7.727866036796058, "learning_rate": 4.22573800613642e-06, "loss": 1.0069, "step": 3952 }, { "epoch": 0.5596375734409287, "grad_norm": 11.422192278211293, "learning_rate": 4.22532323487416e-06, "loss": 1.2244, "step": 3953 }, { "epoch": 0.5597791463155659, "grad_norm": 8.219250333808107, "learning_rate": 4.224908372912946e-06, "loss": 1.3047, "step": 3954 }, { "epoch": 0.5599207191902031, "grad_norm": 8.820783469074659, "learning_rate": 4.224493420274584e-06, "loss": 1.18, "step": 3955 }, { "epoch": 0.5600622920648404, "grad_norm": 9.133677915583897, "learning_rate": 4.224078376980888e-06, "loss": 1.0165, "step": 3956 }, { "epoch": 0.5602038649394776, "grad_norm": 7.739246414128042, "learning_rate": 4.223663243053679e-06, "loss": 1.0955, "step": 3957 }, { "epoch": 0.5603454378141148, "grad_norm": 7.948685341824149, "learning_rate": 4.2232480185147775e-06, "loss": 0.9643, "step": 3958 }, { "epoch": 0.5604870106887521, "grad_norm": 8.516411589244726, "learning_rate": 4.222832703386013e-06, "loss": 1.0937, "step": 3959 }, { "epoch": 0.5606285835633893, "grad_norm": 9.789526219812604, "learning_rate": 4.222417297689217e-06, "loss": 1.266, "step": 3960 }, { "epoch": 0.5607701564380265, "grad_norm": 7.248031875237106, "learning_rate": 4.2220018014462284e-06, "loss": 1.0142, "step": 3961 }, { "epoch": 0.5609117293126636, "grad_norm": 7.6602526108644655, "learning_rate": 4.221586214678889e-06, "loss": 1.1043, "step": 3962 }, { "epoch": 0.5610533021873009, "grad_norm": 8.82381194765605, "learning_rate": 4.221170537409046e-06, "loss": 1.0799, "step": 3963 }, { "epoch": 0.5611948750619381, "grad_norm": 6.220602248495267, "learning_rate": 4.220754769658551e-06, "loss": 1.1745, "step": 3964 }, { "epoch": 0.5613364479365753, "grad_norm": 9.097826067140941, "learning_rate": 4.220338911449262e-06, "loss": 1.0913, "step": 3965 }, { "epoch": 0.5614780208112126, "grad_norm": 7.123091592692784, "learning_rate": 4.219922962803038e-06, "loss": 1.0742, "step": 3966 }, { "epoch": 0.5616195936858498, "grad_norm": 10.500102814670546, "learning_rate": 4.2195069237417466e-06, "loss": 1.1378, "step": 3967 }, { "epoch": 0.561761166560487, "grad_norm": 8.393439646626554, "learning_rate": 4.219090794287258e-06, "loss": 1.2061, "step": 3968 }, { "epoch": 0.5619027394351243, "grad_norm": 7.650252254540726, "learning_rate": 4.218674574461449e-06, "loss": 1.0765, "step": 3969 }, { "epoch": 0.5620443123097615, "grad_norm": 8.628846568518144, "learning_rate": 4.218258264286198e-06, "loss": 1.2486, "step": 3970 }, { "epoch": 0.5621858851843987, "grad_norm": 7.831383185079934, "learning_rate": 4.217841863783393e-06, "loss": 1.0724, "step": 3971 }, { "epoch": 0.5623274580590358, "grad_norm": 8.222657641775417, "learning_rate": 4.21742537297492e-06, "loss": 1.0059, "step": 3972 }, { "epoch": 0.5624690309336731, "grad_norm": 7.928569666053448, "learning_rate": 4.217008791882678e-06, "loss": 1.1093, "step": 3973 }, { "epoch": 0.5626106038083103, "grad_norm": 7.719276564703006, "learning_rate": 4.216592120528562e-06, "loss": 1.1091, "step": 3974 }, { "epoch": 0.5627521766829475, "grad_norm": 8.573602981875258, "learning_rate": 4.216175358934479e-06, "loss": 1.0592, "step": 3975 }, { "epoch": 0.5628937495575848, "grad_norm": 7.750430925755813, "learning_rate": 4.215758507122337e-06, "loss": 0.9791, "step": 3976 }, { "epoch": 0.563035322432222, "grad_norm": 9.190437106923076, "learning_rate": 4.21534156511405e-06, "loss": 1.1744, "step": 3977 }, { "epoch": 0.5631768953068592, "grad_norm": 7.187219498178015, "learning_rate": 4.214924532931534e-06, "loss": 1.0715, "step": 3978 }, { "epoch": 0.5633184681814964, "grad_norm": 9.700517337806977, "learning_rate": 4.214507410596716e-06, "loss": 1.2372, "step": 3979 }, { "epoch": 0.5634600410561337, "grad_norm": 8.643624804204505, "learning_rate": 4.214090198131522e-06, "loss": 1.1534, "step": 3980 }, { "epoch": 0.5636016139307709, "grad_norm": 7.7362324651684915, "learning_rate": 4.2136728955578835e-06, "loss": 1.2468, "step": 3981 }, { "epoch": 0.5637431868054081, "grad_norm": 7.009541139916899, "learning_rate": 4.2132555028977386e-06, "loss": 1.1307, "step": 3982 }, { "epoch": 0.5638847596800453, "grad_norm": 8.083821337674008, "learning_rate": 4.212838020173029e-06, "loss": 1.1285, "step": 3983 }, { "epoch": 0.5640263325546825, "grad_norm": 7.969649918548415, "learning_rate": 4.212420447405703e-06, "loss": 1.262, "step": 3984 }, { "epoch": 0.5641679054293197, "grad_norm": 8.871895260466916, "learning_rate": 4.21200278461771e-06, "loss": 1.0921, "step": 3985 }, { "epoch": 0.564309478303957, "grad_norm": 8.79875854058223, "learning_rate": 4.211585031831007e-06, "loss": 1.1156, "step": 3986 }, { "epoch": 0.5644510511785942, "grad_norm": 8.064889479824094, "learning_rate": 4.211167189067556e-06, "loss": 0.9801, "step": 3987 }, { "epoch": 0.5645926240532314, "grad_norm": 8.308907327621599, "learning_rate": 4.210749256349322e-06, "loss": 1.2174, "step": 3988 }, { "epoch": 0.5647341969278686, "grad_norm": 9.736418434573643, "learning_rate": 4.210331233698274e-06, "loss": 1.0578, "step": 3989 }, { "epoch": 0.5648757698025059, "grad_norm": 10.26158901171793, "learning_rate": 4.209913121136389e-06, "loss": 1.3689, "step": 3990 }, { "epoch": 0.5650173426771431, "grad_norm": 8.904897700926432, "learning_rate": 4.209494918685646e-06, "loss": 1.1457, "step": 3991 }, { "epoch": 0.5651589155517803, "grad_norm": 6.966420421476839, "learning_rate": 4.20907662636803e-06, "loss": 1.1396, "step": 3992 }, { "epoch": 0.5653004884264174, "grad_norm": 10.129622922847723, "learning_rate": 4.208658244205529e-06, "loss": 1.1226, "step": 3993 }, { "epoch": 0.5654420613010547, "grad_norm": 7.641784954368865, "learning_rate": 4.208239772220139e-06, "loss": 1.0809, "step": 3994 }, { "epoch": 0.5655836341756919, "grad_norm": 8.659006499286596, "learning_rate": 4.207821210433858e-06, "loss": 1.1748, "step": 3995 }, { "epoch": 0.5657252070503291, "grad_norm": 9.83625474195503, "learning_rate": 4.20740255886869e-06, "loss": 1.1834, "step": 3996 }, { "epoch": 0.5658667799249664, "grad_norm": 8.513742445294236, "learning_rate": 4.206983817546641e-06, "loss": 1.2097, "step": 3997 }, { "epoch": 0.5660083527996036, "grad_norm": 7.36411969903059, "learning_rate": 4.206564986489726e-06, "loss": 1.0669, "step": 3998 }, { "epoch": 0.5661499256742408, "grad_norm": 8.324792477882237, "learning_rate": 4.206146065719963e-06, "loss": 1.1525, "step": 3999 }, { "epoch": 0.5662914985488781, "grad_norm": 9.763990488212606, "learning_rate": 4.205727055259372e-06, "loss": 1.1929, "step": 4000 }, { "epoch": 0.5664330714235153, "grad_norm": 9.030890117721475, "learning_rate": 4.2053079551299835e-06, "loss": 1.1446, "step": 4001 }, { "epoch": 0.5665746442981525, "grad_norm": 7.760523142926918, "learning_rate": 4.204888765353826e-06, "loss": 1.2068, "step": 4002 }, { "epoch": 0.5667162171727896, "grad_norm": 9.179683759972642, "learning_rate": 4.204469485952938e-06, "loss": 1.1497, "step": 4003 }, { "epoch": 0.5668577900474269, "grad_norm": 7.865516159280603, "learning_rate": 4.204050116949359e-06, "loss": 1.2334, "step": 4004 }, { "epoch": 0.5669993629220641, "grad_norm": 7.431676556253479, "learning_rate": 4.203630658365136e-06, "loss": 1.1451, "step": 4005 }, { "epoch": 0.5671409357967013, "grad_norm": 8.28869984774546, "learning_rate": 4.203211110222321e-06, "loss": 1.0628, "step": 4006 }, { "epoch": 0.5672825086713386, "grad_norm": 6.843480213717789, "learning_rate": 4.202791472542968e-06, "loss": 1.1224, "step": 4007 }, { "epoch": 0.5674240815459758, "grad_norm": 8.384326808704218, "learning_rate": 4.202371745349135e-06, "loss": 1.3326, "step": 4008 }, { "epoch": 0.567565654420613, "grad_norm": 9.488535337787667, "learning_rate": 4.2019519286628895e-06, "loss": 1.0785, "step": 4009 }, { "epoch": 0.5677072272952502, "grad_norm": 7.74729921595917, "learning_rate": 4.2015320225063e-06, "loss": 1.1305, "step": 4010 }, { "epoch": 0.5678488001698875, "grad_norm": 7.577460002233546, "learning_rate": 4.201112026901442e-06, "loss": 1.0066, "step": 4011 }, { "epoch": 0.5679903730445247, "grad_norm": 10.473910298261828, "learning_rate": 4.200691941870392e-06, "loss": 1.271, "step": 4012 }, { "epoch": 0.5681319459191619, "grad_norm": 8.526602013377383, "learning_rate": 4.200271767435235e-06, "loss": 1.0855, "step": 4013 }, { "epoch": 0.5682735187937991, "grad_norm": 10.074839353582185, "learning_rate": 4.199851503618059e-06, "loss": 1.3732, "step": 4014 }, { "epoch": 0.5684150916684363, "grad_norm": 7.37907348895645, "learning_rate": 4.1994311504409566e-06, "loss": 1.0395, "step": 4015 }, { "epoch": 0.5685566645430735, "grad_norm": 8.649566163225282, "learning_rate": 4.199010707926026e-06, "loss": 1.1344, "step": 4016 }, { "epoch": 0.5686982374177108, "grad_norm": 8.973270991403092, "learning_rate": 4.19859017609537e-06, "loss": 1.0333, "step": 4017 }, { "epoch": 0.568839810292348, "grad_norm": 8.355160572419777, "learning_rate": 4.198169554971095e-06, "loss": 1.0633, "step": 4018 }, { "epoch": 0.5689813831669852, "grad_norm": 8.406532254497002, "learning_rate": 4.197748844575311e-06, "loss": 1.1438, "step": 4019 }, { "epoch": 0.5691229560416224, "grad_norm": 9.459684659125676, "learning_rate": 4.197328044930137e-06, "loss": 1.11, "step": 4020 }, { "epoch": 0.5692645289162597, "grad_norm": 8.643976978954496, "learning_rate": 4.196907156057694e-06, "loss": 1.253, "step": 4021 }, { "epoch": 0.5694061017908969, "grad_norm": 9.11346375192315, "learning_rate": 4.196486177980107e-06, "loss": 1.1242, "step": 4022 }, { "epoch": 0.5695476746655341, "grad_norm": 8.549092955188012, "learning_rate": 4.196065110719505e-06, "loss": 1.1184, "step": 4023 }, { "epoch": 0.5696892475401713, "grad_norm": 8.520302703924544, "learning_rate": 4.195643954298026e-06, "loss": 1.1458, "step": 4024 }, { "epoch": 0.5698308204148085, "grad_norm": 9.131218450493067, "learning_rate": 4.195222708737809e-06, "loss": 1.1849, "step": 4025 }, { "epoch": 0.5699723932894457, "grad_norm": 8.365078187384425, "learning_rate": 4.1948013740609976e-06, "loss": 1.2646, "step": 4026 }, { "epoch": 0.5701139661640829, "grad_norm": 8.84112833477343, "learning_rate": 4.194379950289742e-06, "loss": 1.1666, "step": 4027 }, { "epoch": 0.5702555390387202, "grad_norm": 8.374800950147403, "learning_rate": 4.193958437446195e-06, "loss": 1.1589, "step": 4028 }, { "epoch": 0.5703971119133574, "grad_norm": 9.574423543046372, "learning_rate": 4.193536835552517e-06, "loss": 1.2561, "step": 4029 }, { "epoch": 0.5705386847879946, "grad_norm": 7.063825904817744, "learning_rate": 4.19311514463087e-06, "loss": 0.9717, "step": 4030 }, { "epoch": 0.5706802576626319, "grad_norm": 10.440691248784317, "learning_rate": 4.192693364703422e-06, "loss": 0.9885, "step": 4031 }, { "epoch": 0.5708218305372691, "grad_norm": 8.27077988255779, "learning_rate": 4.192271495792346e-06, "loss": 1.2711, "step": 4032 }, { "epoch": 0.5709634034119063, "grad_norm": 9.103620173785519, "learning_rate": 4.191849537919819e-06, "loss": 1.2532, "step": 4033 }, { "epoch": 0.5711049762865436, "grad_norm": 9.12607515872702, "learning_rate": 4.191427491108024e-06, "loss": 1.1615, "step": 4034 }, { "epoch": 0.5712465491611807, "grad_norm": 10.257500486139133, "learning_rate": 4.191005355379147e-06, "loss": 1.2533, "step": 4035 }, { "epoch": 0.5713881220358179, "grad_norm": 10.71139585486728, "learning_rate": 4.190583130755379e-06, "loss": 1.332, "step": 4036 }, { "epoch": 0.5715296949104551, "grad_norm": 7.328551062457628, "learning_rate": 4.190160817258916e-06, "loss": 0.9653, "step": 4037 }, { "epoch": 0.5716712677850924, "grad_norm": 7.877544900130018, "learning_rate": 4.189738414911959e-06, "loss": 1.0564, "step": 4038 }, { "epoch": 0.5718128406597296, "grad_norm": 8.961281035671908, "learning_rate": 4.189315923736715e-06, "loss": 1.1221, "step": 4039 }, { "epoch": 0.5719544135343668, "grad_norm": 8.407394388007964, "learning_rate": 4.18889334375539e-06, "loss": 0.9866, "step": 4040 }, { "epoch": 0.572095986409004, "grad_norm": 7.7574862347209335, "learning_rate": 4.188470674990203e-06, "loss": 1.0269, "step": 4041 }, { "epoch": 0.5722375592836413, "grad_norm": 7.95340007313782, "learning_rate": 4.1880479174633715e-06, "loss": 1.0793, "step": 4042 }, { "epoch": 0.5723791321582785, "grad_norm": 9.101889412047404, "learning_rate": 4.187625071197119e-06, "loss": 1.0093, "step": 4043 }, { "epoch": 0.5725207050329157, "grad_norm": 8.463909046513688, "learning_rate": 4.187202136213675e-06, "loss": 0.9859, "step": 4044 }, { "epoch": 0.5726622779075529, "grad_norm": 11.678478328922033, "learning_rate": 4.186779112535273e-06, "loss": 1.1561, "step": 4045 }, { "epoch": 0.5728038507821901, "grad_norm": 8.404019765432102, "learning_rate": 4.186356000184151e-06, "loss": 1.1224, "step": 4046 }, { "epoch": 0.5729454236568273, "grad_norm": 8.948638590835733, "learning_rate": 4.185932799182551e-06, "loss": 1.124, "step": 4047 }, { "epoch": 0.5730869965314646, "grad_norm": 8.977544958203357, "learning_rate": 4.185509509552721e-06, "loss": 1.1706, "step": 4048 }, { "epoch": 0.5732285694061018, "grad_norm": 8.90232618128477, "learning_rate": 4.185086131316914e-06, "loss": 1.1396, "step": 4049 }, { "epoch": 0.573370142280739, "grad_norm": 9.366089515507525, "learning_rate": 4.184662664497383e-06, "loss": 1.1419, "step": 4050 }, { "epoch": 0.5735117151553762, "grad_norm": 10.383522669584979, "learning_rate": 4.184239109116393e-06, "loss": 1.1283, "step": 4051 }, { "epoch": 0.5736532880300135, "grad_norm": 10.42762444483402, "learning_rate": 4.183815465196209e-06, "loss": 1.0869, "step": 4052 }, { "epoch": 0.5737948609046507, "grad_norm": 10.162951890346047, "learning_rate": 4.183391732759102e-06, "loss": 1.0913, "step": 4053 }, { "epoch": 0.5739364337792879, "grad_norm": 9.257845876126545, "learning_rate": 4.182967911827347e-06, "loss": 1.0669, "step": 4054 }, { "epoch": 0.574078006653925, "grad_norm": 10.071332194786068, "learning_rate": 4.182544002423223e-06, "loss": 1.2565, "step": 4055 }, { "epoch": 0.5742195795285623, "grad_norm": 9.607555086810262, "learning_rate": 4.182120004569015e-06, "loss": 1.1685, "step": 4056 }, { "epoch": 0.5743611524031995, "grad_norm": 8.450932414058947, "learning_rate": 4.181695918287013e-06, "loss": 1.1861, "step": 4057 }, { "epoch": 0.5745027252778367, "grad_norm": 9.38778872191968, "learning_rate": 4.181271743599511e-06, "loss": 0.9754, "step": 4058 }, { "epoch": 0.574644298152474, "grad_norm": 9.65134500410091, "learning_rate": 4.180847480528806e-06, "loss": 1.1911, "step": 4059 }, { "epoch": 0.5747858710271112, "grad_norm": 9.338341822332413, "learning_rate": 4.180423129097203e-06, "loss": 1.1215, "step": 4060 }, { "epoch": 0.5749274439017484, "grad_norm": 7.990902257591496, "learning_rate": 4.179998689327009e-06, "loss": 1.0823, "step": 4061 }, { "epoch": 0.5750690167763857, "grad_norm": 8.871318214630472, "learning_rate": 4.1795741612405365e-06, "loss": 1.1014, "step": 4062 }, { "epoch": 0.5752105896510229, "grad_norm": 10.852700708620802, "learning_rate": 4.179149544860102e-06, "loss": 1.2563, "step": 4063 }, { "epoch": 0.5753521625256601, "grad_norm": 9.80430737248681, "learning_rate": 4.178724840208029e-06, "loss": 1.1569, "step": 4064 }, { "epoch": 0.5754937354002974, "grad_norm": 7.4343878381529285, "learning_rate": 4.178300047306643e-06, "loss": 1.013, "step": 4065 }, { "epoch": 0.5756353082749345, "grad_norm": 8.474590244346349, "learning_rate": 4.177875166178274e-06, "loss": 1.0396, "step": 4066 }, { "epoch": 0.5757768811495717, "grad_norm": 8.343118593706572, "learning_rate": 4.17745019684526e-06, "loss": 1.2058, "step": 4067 }, { "epoch": 0.5759184540242089, "grad_norm": 7.762994270744209, "learning_rate": 4.177025139329939e-06, "loss": 1.0964, "step": 4068 }, { "epoch": 0.5760600268988462, "grad_norm": 9.836095346603777, "learning_rate": 4.176599993654657e-06, "loss": 1.2943, "step": 4069 }, { "epoch": 0.5762015997734834, "grad_norm": 9.36390782608365, "learning_rate": 4.176174759841762e-06, "loss": 0.9977, "step": 4070 }, { "epoch": 0.5763431726481206, "grad_norm": 12.320574441373653, "learning_rate": 4.175749437913611e-06, "loss": 1.2846, "step": 4071 }, { "epoch": 0.5764847455227579, "grad_norm": 8.520947394194433, "learning_rate": 4.175324027892562e-06, "loss": 1.1461, "step": 4072 }, { "epoch": 0.5766263183973951, "grad_norm": 10.218959047938176, "learning_rate": 4.174898529800977e-06, "loss": 1.1832, "step": 4073 }, { "epoch": 0.5767678912720323, "grad_norm": 7.789274314211884, "learning_rate": 4.1744729436612255e-06, "loss": 1.2869, "step": 4074 }, { "epoch": 0.5769094641466695, "grad_norm": 9.093485720225512, "learning_rate": 4.174047269495681e-06, "loss": 1.2063, "step": 4075 }, { "epoch": 0.5770510370213067, "grad_norm": 8.59185881179369, "learning_rate": 4.173621507326719e-06, "loss": 1.0576, "step": 4076 }, { "epoch": 0.5771926098959439, "grad_norm": 9.645023671739638, "learning_rate": 4.1731956571767215e-06, "loss": 1.2116, "step": 4077 }, { "epoch": 0.5773341827705811, "grad_norm": 8.798726457861584, "learning_rate": 4.172769719068076e-06, "loss": 1.0892, "step": 4078 }, { "epoch": 0.5774757556452184, "grad_norm": 10.446653087471626, "learning_rate": 4.172343693023174e-06, "loss": 1.0931, "step": 4079 }, { "epoch": 0.5776173285198556, "grad_norm": 9.831257962116332, "learning_rate": 4.171917579064412e-06, "loss": 1.0907, "step": 4080 }, { "epoch": 0.5777589013944928, "grad_norm": 9.88072174030698, "learning_rate": 4.1714913772141885e-06, "loss": 1.1848, "step": 4081 }, { "epoch": 0.57790047426913, "grad_norm": 7.653691576682233, "learning_rate": 4.171065087494909e-06, "loss": 1.0821, "step": 4082 }, { "epoch": 0.5780420471437673, "grad_norm": 8.300730238813847, "learning_rate": 4.170638709928984e-06, "loss": 1.1967, "step": 4083 }, { "epoch": 0.5781836200184045, "grad_norm": 10.337715163292287, "learning_rate": 4.170212244538829e-06, "loss": 1.1285, "step": 4084 }, { "epoch": 0.5783251928930417, "grad_norm": 10.752569911634605, "learning_rate": 4.169785691346861e-06, "loss": 1.2651, "step": 4085 }, { "epoch": 0.5784667657676789, "grad_norm": 9.667508669608377, "learning_rate": 4.169359050375505e-06, "loss": 1.1189, "step": 4086 }, { "epoch": 0.5786083386423161, "grad_norm": 9.83891211411889, "learning_rate": 4.168932321647186e-06, "loss": 1.1015, "step": 4087 }, { "epoch": 0.5787499115169533, "grad_norm": 7.294079132629863, "learning_rate": 4.168505505184341e-06, "loss": 1.0046, "step": 4088 }, { "epoch": 0.5788914843915905, "grad_norm": 7.040466236368118, "learning_rate": 4.168078601009407e-06, "loss": 1.1133, "step": 4089 }, { "epoch": 0.5790330572662278, "grad_norm": 8.650876356945814, "learning_rate": 4.167651609144822e-06, "loss": 0.9461, "step": 4090 }, { "epoch": 0.579174630140865, "grad_norm": 8.186670625335697, "learning_rate": 4.167224529613038e-06, "loss": 1.1702, "step": 4091 }, { "epoch": 0.5793162030155022, "grad_norm": 9.98669807738121, "learning_rate": 4.166797362436502e-06, "loss": 1.2398, "step": 4092 }, { "epoch": 0.5794577758901395, "grad_norm": 8.561587097063574, "learning_rate": 4.1663701076376715e-06, "loss": 1.2174, "step": 4093 }, { "epoch": 0.5795993487647767, "grad_norm": 9.831203251537772, "learning_rate": 4.1659427652390075e-06, "loss": 1.2686, "step": 4094 }, { "epoch": 0.5797409216394139, "grad_norm": 7.774454726941918, "learning_rate": 4.165515335262974e-06, "loss": 1.0376, "step": 4095 }, { "epoch": 0.5798824945140512, "grad_norm": 6.782949770768868, "learning_rate": 4.165087817732041e-06, "loss": 1.0239, "step": 4096 }, { "epoch": 0.5800240673886883, "grad_norm": 7.547984671507909, "learning_rate": 4.164660212668684e-06, "loss": 1.1463, "step": 4097 }, { "epoch": 0.5801656402633255, "grad_norm": 7.651877637827028, "learning_rate": 4.164232520095379e-06, "loss": 1.1092, "step": 4098 }, { "epoch": 0.5803072131379627, "grad_norm": 8.071314997784254, "learning_rate": 4.163804740034613e-06, "loss": 1.1168, "step": 4099 }, { "epoch": 0.5804487860126, "grad_norm": 8.59097034236179, "learning_rate": 4.163376872508872e-06, "loss": 1.2782, "step": 4100 }, { "epoch": 0.5805903588872372, "grad_norm": 8.739231567820829, "learning_rate": 4.162948917540649e-06, "loss": 1.2478, "step": 4101 }, { "epoch": 0.5807319317618744, "grad_norm": 7.127625901720225, "learning_rate": 4.162520875152441e-06, "loss": 1.0005, "step": 4102 }, { "epoch": 0.5808735046365117, "grad_norm": 9.560725826657698, "learning_rate": 4.1620927453667515e-06, "loss": 1.2837, "step": 4103 }, { "epoch": 0.5810150775111489, "grad_norm": 8.974488871266614, "learning_rate": 4.161664528206084e-06, "loss": 1.1179, "step": 4104 }, { "epoch": 0.5811566503857861, "grad_norm": 7.753438371431725, "learning_rate": 4.1612362236929524e-06, "loss": 1.0979, "step": 4105 }, { "epoch": 0.5812982232604234, "grad_norm": 9.183698840982073, "learning_rate": 4.16080783184987e-06, "loss": 1.2791, "step": 4106 }, { "epoch": 0.5814397961350605, "grad_norm": 7.607469512045973, "learning_rate": 4.16037935269936e-06, "loss": 1.0815, "step": 4107 }, { "epoch": 0.5815813690096977, "grad_norm": 8.25788133027724, "learning_rate": 4.159950786263944e-06, "loss": 0.9603, "step": 4108 }, { "epoch": 0.5817229418843349, "grad_norm": 9.097309058424484, "learning_rate": 4.159522132566153e-06, "loss": 1.1237, "step": 4109 }, { "epoch": 0.5818645147589722, "grad_norm": 6.641643419149277, "learning_rate": 4.159093391628521e-06, "loss": 1.0483, "step": 4110 }, { "epoch": 0.5820060876336094, "grad_norm": 8.971558453467512, "learning_rate": 4.158664563473587e-06, "loss": 1.0921, "step": 4111 }, { "epoch": 0.5821476605082466, "grad_norm": 8.956824265761519, "learning_rate": 4.158235648123894e-06, "loss": 1.1985, "step": 4112 }, { "epoch": 0.5822892333828839, "grad_norm": 10.802479240399299, "learning_rate": 4.1578066456019885e-06, "loss": 1.1879, "step": 4113 }, { "epoch": 0.5824308062575211, "grad_norm": 8.017256721252766, "learning_rate": 4.157377555930424e-06, "loss": 1.0004, "step": 4114 }, { "epoch": 0.5825723791321583, "grad_norm": 8.940021719340796, "learning_rate": 4.156948379131757e-06, "loss": 1.1355, "step": 4115 }, { "epoch": 0.5827139520067955, "grad_norm": 8.923395725393593, "learning_rate": 4.15651911522855e-06, "loss": 1.0651, "step": 4116 }, { "epoch": 0.5828555248814327, "grad_norm": 8.290443010629769, "learning_rate": 4.1560897642433674e-06, "loss": 1.032, "step": 4117 }, { "epoch": 0.5829970977560699, "grad_norm": 8.415771084214121, "learning_rate": 4.155660326198781e-06, "loss": 0.9822, "step": 4118 }, { "epoch": 0.5831386706307071, "grad_norm": 10.12268944266959, "learning_rate": 4.155230801117366e-06, "loss": 1.0923, "step": 4119 }, { "epoch": 0.5832802435053444, "grad_norm": 8.181749960104376, "learning_rate": 4.154801189021701e-06, "loss": 1.0497, "step": 4120 }, { "epoch": 0.5834218163799816, "grad_norm": 10.579451642599699, "learning_rate": 4.154371489934372e-06, "loss": 1.2815, "step": 4121 }, { "epoch": 0.5835633892546188, "grad_norm": 9.646813185641768, "learning_rate": 4.153941703877967e-06, "loss": 1.1538, "step": 4122 }, { "epoch": 0.583704962129256, "grad_norm": 8.707178894132909, "learning_rate": 4.153511830875081e-06, "loss": 1.2126, "step": 4123 }, { "epoch": 0.5838465350038933, "grad_norm": 9.319915072336572, "learning_rate": 4.15308187094831e-06, "loss": 1.1144, "step": 4124 }, { "epoch": 0.5839881078785305, "grad_norm": 8.967692987446094, "learning_rate": 4.152651824120258e-06, "loss": 0.9968, "step": 4125 }, { "epoch": 0.5841296807531677, "grad_norm": 9.544119529340149, "learning_rate": 4.152221690413531e-06, "loss": 1.079, "step": 4126 }, { "epoch": 0.584271253627805, "grad_norm": 8.873493335331242, "learning_rate": 4.151791469850743e-06, "loss": 1.0397, "step": 4127 }, { "epoch": 0.5844128265024421, "grad_norm": 9.688178844662803, "learning_rate": 4.151361162454509e-06, "loss": 1.2388, "step": 4128 }, { "epoch": 0.5845543993770793, "grad_norm": 7.765766963774056, "learning_rate": 4.150930768247449e-06, "loss": 1.2118, "step": 4129 }, { "epoch": 0.5846959722517165, "grad_norm": 8.878942339345924, "learning_rate": 4.15050028725219e-06, "loss": 1.11, "step": 4130 }, { "epoch": 0.5848375451263538, "grad_norm": 9.746810391362693, "learning_rate": 4.1500697194913615e-06, "loss": 1.1845, "step": 4131 }, { "epoch": 0.584979118000991, "grad_norm": 9.643668561432102, "learning_rate": 4.149639064987598e-06, "loss": 1.2216, "step": 4132 }, { "epoch": 0.5851206908756282, "grad_norm": 10.86843674017993, "learning_rate": 4.149208323763539e-06, "loss": 1.1856, "step": 4133 }, { "epoch": 0.5852622637502655, "grad_norm": 7.289481949262448, "learning_rate": 4.148777495841829e-06, "loss": 1.0755, "step": 4134 }, { "epoch": 0.5854038366249027, "grad_norm": 7.287341798926737, "learning_rate": 4.1483465812451144e-06, "loss": 1.1371, "step": 4135 }, { "epoch": 0.5855454094995399, "grad_norm": 10.108463676683614, "learning_rate": 4.147915579996049e-06, "loss": 1.2178, "step": 4136 }, { "epoch": 0.5856869823741772, "grad_norm": 9.00074849724967, "learning_rate": 4.147484492117291e-06, "loss": 1.0328, "step": 4137 }, { "epoch": 0.5858285552488143, "grad_norm": 7.047714555552244, "learning_rate": 4.147053317631501e-06, "loss": 1.0872, "step": 4138 }, { "epoch": 0.5859701281234515, "grad_norm": 9.926110992899453, "learning_rate": 4.146622056561347e-06, "loss": 1.2822, "step": 4139 }, { "epoch": 0.5861117009980887, "grad_norm": 9.76797471731602, "learning_rate": 4.146190708929498e-06, "loss": 1.2612, "step": 4140 }, { "epoch": 0.586253273872726, "grad_norm": 10.375259212335108, "learning_rate": 4.145759274758632e-06, "loss": 1.2155, "step": 4141 }, { "epoch": 0.5863948467473632, "grad_norm": 8.62162515707573, "learning_rate": 4.145327754071427e-06, "loss": 1.0248, "step": 4142 }, { "epoch": 0.5865364196220004, "grad_norm": 8.611983054803956, "learning_rate": 4.1448961468905706e-06, "loss": 1.1078, "step": 4143 }, { "epoch": 0.5866779924966377, "grad_norm": 7.856641897109354, "learning_rate": 4.1444644532387485e-06, "loss": 1.0677, "step": 4144 }, { "epoch": 0.5868195653712749, "grad_norm": 7.322781369373428, "learning_rate": 4.1440326731386575e-06, "loss": 1.0377, "step": 4145 }, { "epoch": 0.5869611382459121, "grad_norm": 10.994524546669775, "learning_rate": 4.143600806612993e-06, "loss": 1.1034, "step": 4146 }, { "epoch": 0.5871027111205493, "grad_norm": 10.929041790924465, "learning_rate": 4.143168853684461e-06, "loss": 1.2634, "step": 4147 }, { "epoch": 0.5872442839951865, "grad_norm": 9.191429906604704, "learning_rate": 4.142736814375768e-06, "loss": 1.1835, "step": 4148 }, { "epoch": 0.5873858568698237, "grad_norm": 7.47293088720362, "learning_rate": 4.142304688709624e-06, "loss": 0.9351, "step": 4149 }, { "epoch": 0.5875274297444609, "grad_norm": 8.741201282061615, "learning_rate": 4.141872476708748e-06, "loss": 1.1003, "step": 4150 }, { "epoch": 0.5876690026190982, "grad_norm": 7.788022690280018, "learning_rate": 4.1414401783958605e-06, "loss": 1.1501, "step": 4151 }, { "epoch": 0.5878105754937354, "grad_norm": 8.389123650887667, "learning_rate": 4.141007793793686e-06, "loss": 1.1231, "step": 4152 }, { "epoch": 0.5879521483683726, "grad_norm": 10.422424759441103, "learning_rate": 4.140575322924955e-06, "loss": 1.1946, "step": 4153 }, { "epoch": 0.5880937212430098, "grad_norm": 11.706238764051268, "learning_rate": 4.140142765812404e-06, "loss": 1.0961, "step": 4154 }, { "epoch": 0.5882352941176471, "grad_norm": 10.516478187899336, "learning_rate": 4.13971012247877e-06, "loss": 0.9921, "step": 4155 }, { "epoch": 0.5883768669922843, "grad_norm": 9.179392864287571, "learning_rate": 4.139277392946797e-06, "loss": 1.2802, "step": 4156 }, { "epoch": 0.5885184398669215, "grad_norm": 7.556543195288164, "learning_rate": 4.138844577239234e-06, "loss": 1.1222, "step": 4157 }, { "epoch": 0.5886600127415588, "grad_norm": 10.364952645678994, "learning_rate": 4.138411675378833e-06, "loss": 1.2151, "step": 4158 }, { "epoch": 0.5888015856161959, "grad_norm": 9.753943794981806, "learning_rate": 4.137978687388352e-06, "loss": 1.0555, "step": 4159 }, { "epoch": 0.5889431584908331, "grad_norm": 8.801420131881594, "learning_rate": 4.137545613290554e-06, "loss": 1.1348, "step": 4160 }, { "epoch": 0.5890847313654703, "grad_norm": 8.289929027368606, "learning_rate": 4.137112453108203e-06, "loss": 1.1467, "step": 4161 }, { "epoch": 0.5892263042401076, "grad_norm": 7.99434175664703, "learning_rate": 4.136679206864072e-06, "loss": 1.2752, "step": 4162 }, { "epoch": 0.5893678771147448, "grad_norm": 8.24725989874227, "learning_rate": 4.136245874580935e-06, "loss": 1.1972, "step": 4163 }, { "epoch": 0.589509449989382, "grad_norm": 8.131861679679593, "learning_rate": 4.135812456281571e-06, "loss": 1.2154, "step": 4164 }, { "epoch": 0.5896510228640193, "grad_norm": 9.077710665835147, "learning_rate": 4.1353789519887685e-06, "loss": 1.248, "step": 4165 }, { "epoch": 0.5897925957386565, "grad_norm": 9.149533252589139, "learning_rate": 4.134945361725312e-06, "loss": 1.1779, "step": 4166 }, { "epoch": 0.5899341686132937, "grad_norm": 6.688449400062564, "learning_rate": 4.134511685513998e-06, "loss": 1.1858, "step": 4167 }, { "epoch": 0.590075741487931, "grad_norm": 9.935129326725924, "learning_rate": 4.134077923377622e-06, "loss": 1.2876, "step": 4168 }, { "epoch": 0.5902173143625681, "grad_norm": 8.615726959322565, "learning_rate": 4.13364407533899e-06, "loss": 1.2473, "step": 4169 }, { "epoch": 0.5903588872372053, "grad_norm": 8.677442698156247, "learning_rate": 4.133210141420905e-06, "loss": 1.1777, "step": 4170 }, { "epoch": 0.5905004601118425, "grad_norm": 8.379037367855215, "learning_rate": 4.132776121646182e-06, "loss": 1.1961, "step": 4171 }, { "epoch": 0.5906420329864798, "grad_norm": 7.288691176986876, "learning_rate": 4.132342016037635e-06, "loss": 1.0755, "step": 4172 }, { "epoch": 0.590783605861117, "grad_norm": 8.57669472365739, "learning_rate": 4.131907824618086e-06, "loss": 0.967, "step": 4173 }, { "epoch": 0.5909251787357542, "grad_norm": 9.51261345573869, "learning_rate": 4.131473547410359e-06, "loss": 1.1579, "step": 4174 }, { "epoch": 0.5910667516103915, "grad_norm": 9.559663239616906, "learning_rate": 4.131039184437283e-06, "loss": 1.3152, "step": 4175 }, { "epoch": 0.5912083244850287, "grad_norm": 9.408271445607758, "learning_rate": 4.130604735721695e-06, "loss": 1.1878, "step": 4176 }, { "epoch": 0.5913498973596659, "grad_norm": 7.9549743061142255, "learning_rate": 4.130170201286432e-06, "loss": 1.1532, "step": 4177 }, { "epoch": 0.5914914702343032, "grad_norm": 9.041749124693006, "learning_rate": 4.129735581154336e-06, "loss": 1.1018, "step": 4178 }, { "epoch": 0.5916330431089404, "grad_norm": 9.821674698557132, "learning_rate": 4.129300875348255e-06, "loss": 1.086, "step": 4179 }, { "epoch": 0.5917746159835775, "grad_norm": 7.353080373905036, "learning_rate": 4.128866083891043e-06, "loss": 1.2214, "step": 4180 }, { "epoch": 0.5919161888582147, "grad_norm": 8.796499891397456, "learning_rate": 4.128431206805556e-06, "loss": 0.9857, "step": 4181 }, { "epoch": 0.592057761732852, "grad_norm": 7.433981952964505, "learning_rate": 4.127996244114654e-06, "loss": 1.0217, "step": 4182 }, { "epoch": 0.5921993346074892, "grad_norm": 10.407013552951135, "learning_rate": 4.127561195841203e-06, "loss": 1.125, "step": 4183 }, { "epoch": 0.5923409074821264, "grad_norm": 9.372043397719116, "learning_rate": 4.1271260620080745e-06, "loss": 1.1811, "step": 4184 }, { "epoch": 0.5924824803567637, "grad_norm": 8.884961622302324, "learning_rate": 4.126690842638141e-06, "loss": 1.1071, "step": 4185 }, { "epoch": 0.5926240532314009, "grad_norm": 8.108978908619116, "learning_rate": 4.1262555377542834e-06, "loss": 1.0033, "step": 4186 }, { "epoch": 0.5927656261060381, "grad_norm": 7.575771320214953, "learning_rate": 4.125820147379384e-06, "loss": 1.0524, "step": 4187 }, { "epoch": 0.5929071989806753, "grad_norm": 8.210040332913499, "learning_rate": 4.125384671536333e-06, "loss": 1.2502, "step": 4188 }, { "epoch": 0.5930487718553126, "grad_norm": 8.55229436418097, "learning_rate": 4.124949110248021e-06, "loss": 1.2134, "step": 4189 }, { "epoch": 0.5931903447299497, "grad_norm": 9.21540880593201, "learning_rate": 4.124513463537346e-06, "loss": 1.089, "step": 4190 }, { "epoch": 0.5933319176045869, "grad_norm": 8.258963137521175, "learning_rate": 4.124077731427209e-06, "loss": 1.1203, "step": 4191 }, { "epoch": 0.5934734904792242, "grad_norm": 8.95103443497201, "learning_rate": 4.123641913940518e-06, "loss": 1.2495, "step": 4192 }, { "epoch": 0.5936150633538614, "grad_norm": 9.416428239442714, "learning_rate": 4.123206011100182e-06, "loss": 1.2453, "step": 4193 }, { "epoch": 0.5937566362284986, "grad_norm": 8.390198084024009, "learning_rate": 4.122770022929114e-06, "loss": 1.1786, "step": 4194 }, { "epoch": 0.5938982091031358, "grad_norm": 7.943414602094712, "learning_rate": 4.1223339494502375e-06, "loss": 1.0761, "step": 4195 }, { "epoch": 0.5940397819777731, "grad_norm": 8.055679156644999, "learning_rate": 4.1218977906864754e-06, "loss": 1.0202, "step": 4196 }, { "epoch": 0.5941813548524103, "grad_norm": 8.653171253459579, "learning_rate": 4.121461546660756e-06, "loss": 1.2089, "step": 4197 }, { "epoch": 0.5943229277270475, "grad_norm": 9.604427794728855, "learning_rate": 4.121025217396011e-06, "loss": 1.1913, "step": 4198 }, { "epoch": 0.5944645006016848, "grad_norm": 10.020875123082908, "learning_rate": 4.12058880291518e-06, "loss": 1.1932, "step": 4199 }, { "epoch": 0.5946060734763219, "grad_norm": 8.962455428051214, "learning_rate": 4.120152303241203e-06, "loss": 1.225, "step": 4200 }, { "epoch": 0.5947476463509591, "grad_norm": 7.960133640183441, "learning_rate": 4.119715718397028e-06, "loss": 1.0764, "step": 4201 }, { "epoch": 0.5948892192255963, "grad_norm": 7.577762807468954, "learning_rate": 4.119279048405606e-06, "loss": 1.0967, "step": 4202 }, { "epoch": 0.5950307921002336, "grad_norm": 9.176147921313612, "learning_rate": 4.1188422932898905e-06, "loss": 1.2163, "step": 4203 }, { "epoch": 0.5951723649748708, "grad_norm": 9.716400156788312, "learning_rate": 4.1184054530728436e-06, "loss": 1.2074, "step": 4204 }, { "epoch": 0.595313937849508, "grad_norm": 8.751408490850514, "learning_rate": 4.117968527777428e-06, "loss": 1.2021, "step": 4205 }, { "epoch": 0.5954555107241453, "grad_norm": 10.054197309826472, "learning_rate": 4.117531517426614e-06, "loss": 1.1043, "step": 4206 }, { "epoch": 0.5955970835987825, "grad_norm": 10.350286993940486, "learning_rate": 4.117094422043374e-06, "loss": 1.1965, "step": 4207 }, { "epoch": 0.5957386564734197, "grad_norm": 8.067584186844064, "learning_rate": 4.116657241650687e-06, "loss": 1.1965, "step": 4208 }, { "epoch": 0.595880229348057, "grad_norm": 8.592962055496756, "learning_rate": 4.116219976271533e-06, "loss": 1.1929, "step": 4209 }, { "epoch": 0.5960218022226942, "grad_norm": 8.543518858031453, "learning_rate": 4.1157826259289e-06, "loss": 1.1736, "step": 4210 }, { "epoch": 0.5961633750973313, "grad_norm": 8.97533555107521, "learning_rate": 4.115345190645779e-06, "loss": 1.1287, "step": 4211 }, { "epoch": 0.5963049479719685, "grad_norm": 8.364866132377593, "learning_rate": 4.114907670445166e-06, "loss": 1.3112, "step": 4212 }, { "epoch": 0.5964465208466058, "grad_norm": 8.589297053004168, "learning_rate": 4.114470065350061e-06, "loss": 1.1868, "step": 4213 }, { "epoch": 0.596588093721243, "grad_norm": 7.235499383212114, "learning_rate": 4.114032375383469e-06, "loss": 1.1515, "step": 4214 }, { "epoch": 0.5967296665958802, "grad_norm": 9.914541535043245, "learning_rate": 4.113594600568398e-06, "loss": 0.9977, "step": 4215 }, { "epoch": 0.5968712394705175, "grad_norm": 7.996490662474325, "learning_rate": 4.113156740927862e-06, "loss": 1.1097, "step": 4216 }, { "epoch": 0.5970128123451547, "grad_norm": 6.823452885258588, "learning_rate": 4.1127187964848785e-06, "loss": 1.0147, "step": 4217 }, { "epoch": 0.5971543852197919, "grad_norm": 9.401698230982786, "learning_rate": 4.112280767262471e-06, "loss": 0.9416, "step": 4218 }, { "epoch": 0.5972959580944291, "grad_norm": 8.567527909780235, "learning_rate": 4.111842653283665e-06, "loss": 1.1709, "step": 4219 }, { "epoch": 0.5974375309690664, "grad_norm": 9.335858230573512, "learning_rate": 4.1114044545714935e-06, "loss": 1.0292, "step": 4220 }, { "epoch": 0.5975791038437035, "grad_norm": 7.653405482431701, "learning_rate": 4.110966171148991e-06, "loss": 1.147, "step": 4221 }, { "epoch": 0.5977206767183407, "grad_norm": 9.53485179447881, "learning_rate": 4.110527803039198e-06, "loss": 1.16, "step": 4222 }, { "epoch": 0.597862249592978, "grad_norm": 9.92842196031526, "learning_rate": 4.11008935026516e-06, "loss": 1.2976, "step": 4223 }, { "epoch": 0.5980038224676152, "grad_norm": 10.835424563720345, "learning_rate": 4.109650812849924e-06, "loss": 1.252, "step": 4224 }, { "epoch": 0.5981453953422524, "grad_norm": 8.451081824037798, "learning_rate": 4.109212190816546e-06, "loss": 1.134, "step": 4225 }, { "epoch": 0.5982869682168896, "grad_norm": 9.932223856707322, "learning_rate": 4.108773484188082e-06, "loss": 1.0353, "step": 4226 }, { "epoch": 0.5984285410915269, "grad_norm": 9.268923399133467, "learning_rate": 4.1083346929875966e-06, "loss": 1.1191, "step": 4227 }, { "epoch": 0.5985701139661641, "grad_norm": 8.382205425653297, "learning_rate": 4.107895817238155e-06, "loss": 1.1333, "step": 4228 }, { "epoch": 0.5987116868408013, "grad_norm": 8.008381744249027, "learning_rate": 4.107456856962829e-06, "loss": 1.0721, "step": 4229 }, { "epoch": 0.5988532597154386, "grad_norm": 9.197036050105142, "learning_rate": 4.107017812184695e-06, "loss": 1.074, "step": 4230 }, { "epoch": 0.5989948325900757, "grad_norm": 9.035891280764337, "learning_rate": 4.106578682926833e-06, "loss": 1.1121, "step": 4231 }, { "epoch": 0.5991364054647129, "grad_norm": 9.90896251617558, "learning_rate": 4.106139469212327e-06, "loss": 1.1301, "step": 4232 }, { "epoch": 0.5992779783393501, "grad_norm": 7.865984160779053, "learning_rate": 4.105700171064267e-06, "loss": 0.9687, "step": 4233 }, { "epoch": 0.5994195512139874, "grad_norm": 8.575871850892462, "learning_rate": 4.105260788505746e-06, "loss": 1.0548, "step": 4234 }, { "epoch": 0.5995611240886246, "grad_norm": 9.044067157912746, "learning_rate": 4.104821321559863e-06, "loss": 1.056, "step": 4235 }, { "epoch": 0.5997026969632618, "grad_norm": 9.327339791072532, "learning_rate": 4.1043817702497195e-06, "loss": 1.2452, "step": 4236 }, { "epoch": 0.5998442698378991, "grad_norm": 10.277363793452519, "learning_rate": 4.103942134598422e-06, "loss": 1.0997, "step": 4237 }, { "epoch": 0.5999858427125363, "grad_norm": 8.840584232132494, "learning_rate": 4.103502414629082e-06, "loss": 1.1038, "step": 4238 }, { "epoch": 0.6001274155871735, "grad_norm": 8.550735299242682, "learning_rate": 4.103062610364817e-06, "loss": 1.2238, "step": 4239 }, { "epoch": 0.6002689884618108, "grad_norm": 9.322283016103095, "learning_rate": 4.102622721828746e-06, "loss": 1.2153, "step": 4240 }, { "epoch": 0.600410561336448, "grad_norm": 9.91576922119746, "learning_rate": 4.102182749043993e-06, "loss": 1.0889, "step": 4241 }, { "epoch": 0.6005521342110851, "grad_norm": 8.607879010225894, "learning_rate": 4.101742692033687e-06, "loss": 1.0126, "step": 4242 }, { "epoch": 0.6006937070857223, "grad_norm": 8.761858941979186, "learning_rate": 4.101302550820962e-06, "loss": 1.188, "step": 4243 }, { "epoch": 0.6008352799603596, "grad_norm": 7.783028640610071, "learning_rate": 4.100862325428957e-06, "loss": 1.1186, "step": 4244 }, { "epoch": 0.6009768528349968, "grad_norm": 8.37039285592379, "learning_rate": 4.1004220158808114e-06, "loss": 1.1089, "step": 4245 }, { "epoch": 0.601118425709634, "grad_norm": 9.429318146634422, "learning_rate": 4.0999816221996755e-06, "loss": 1.1402, "step": 4246 }, { "epoch": 0.6012599985842713, "grad_norm": 11.664621737054086, "learning_rate": 4.099541144408698e-06, "loss": 1.0134, "step": 4247 }, { "epoch": 0.6014015714589085, "grad_norm": 8.277935012359523, "learning_rate": 4.099100582531035e-06, "loss": 1.2281, "step": 4248 }, { "epoch": 0.6015431443335457, "grad_norm": 8.875501591652045, "learning_rate": 4.098659936589847e-06, "loss": 1.288, "step": 4249 }, { "epoch": 0.601684717208183, "grad_norm": 7.559360072991282, "learning_rate": 4.098219206608298e-06, "loss": 1.0889, "step": 4250 }, { "epoch": 0.6018262900828202, "grad_norm": 7.564248883764805, "learning_rate": 4.0977783926095575e-06, "loss": 0.9899, "step": 4251 }, { "epoch": 0.6019678629574573, "grad_norm": 9.815290503767061, "learning_rate": 4.097337494616798e-06, "loss": 1.1256, "step": 4252 }, { "epoch": 0.6021094358320945, "grad_norm": 7.669848763020521, "learning_rate": 4.096896512653197e-06, "loss": 1.0479, "step": 4253 }, { "epoch": 0.6022510087067318, "grad_norm": 7.646144379015223, "learning_rate": 4.096455446741937e-06, "loss": 1.109, "step": 4254 }, { "epoch": 0.602392581581369, "grad_norm": 7.757737020104233, "learning_rate": 4.096014296906205e-06, "loss": 1.0091, "step": 4255 }, { "epoch": 0.6025341544560062, "grad_norm": 8.793566120451139, "learning_rate": 4.095573063169191e-06, "loss": 1.0139, "step": 4256 }, { "epoch": 0.6026757273306435, "grad_norm": 8.856213094544223, "learning_rate": 4.095131745554092e-06, "loss": 1.0785, "step": 4257 }, { "epoch": 0.6028173002052807, "grad_norm": 9.4234300029246, "learning_rate": 4.094690344084105e-06, "loss": 1.1495, "step": 4258 }, { "epoch": 0.6029588730799179, "grad_norm": 7.254879329001216, "learning_rate": 4.094248858782436e-06, "loss": 0.9735, "step": 4259 }, { "epoch": 0.6031004459545551, "grad_norm": 8.055967538171577, "learning_rate": 4.093807289672294e-06, "loss": 1.1259, "step": 4260 }, { "epoch": 0.6032420188291924, "grad_norm": 7.609475011775384, "learning_rate": 4.09336563677689e-06, "loss": 1.1713, "step": 4261 }, { "epoch": 0.6033835917038295, "grad_norm": 8.252320859040012, "learning_rate": 4.092923900119443e-06, "loss": 1.0637, "step": 4262 }, { "epoch": 0.6035251645784667, "grad_norm": 9.99997024531706, "learning_rate": 4.092482079723175e-06, "loss": 1.2031, "step": 4263 }, { "epoch": 0.603666737453104, "grad_norm": 9.318683389160825, "learning_rate": 4.09204017561131e-06, "loss": 1.0698, "step": 4264 }, { "epoch": 0.6038083103277412, "grad_norm": 8.045674591895441, "learning_rate": 4.091598187807082e-06, "loss": 1.0333, "step": 4265 }, { "epoch": 0.6039498832023784, "grad_norm": 10.605602194717548, "learning_rate": 4.091156116333723e-06, "loss": 1.1451, "step": 4266 }, { "epoch": 0.6040914560770156, "grad_norm": 6.725709229689757, "learning_rate": 4.090713961214473e-06, "loss": 0.9742, "step": 4267 }, { "epoch": 0.6042330289516529, "grad_norm": 8.231996717595075, "learning_rate": 4.090271722472577e-06, "loss": 1.0859, "step": 4268 }, { "epoch": 0.6043746018262901, "grad_norm": 8.144402914156752, "learning_rate": 4.089829400131282e-06, "loss": 0.9745, "step": 4269 }, { "epoch": 0.6045161747009273, "grad_norm": 8.473938425920327, "learning_rate": 4.0893869942138405e-06, "loss": 1.1027, "step": 4270 }, { "epoch": 0.6046577475755646, "grad_norm": 7.8730182879039425, "learning_rate": 4.08894450474351e-06, "loss": 1.076, "step": 4271 }, { "epoch": 0.6047993204502018, "grad_norm": 8.104273725912256, "learning_rate": 4.088501931743551e-06, "loss": 1.1352, "step": 4272 }, { "epoch": 0.6049408933248389, "grad_norm": 9.360565466468934, "learning_rate": 4.0880592752372315e-06, "loss": 1.1223, "step": 4273 }, { "epoch": 0.6050824661994761, "grad_norm": 9.405391413135199, "learning_rate": 4.087616535247819e-06, "loss": 1.2615, "step": 4274 }, { "epoch": 0.6052240390741134, "grad_norm": 9.29660131227927, "learning_rate": 4.087173711798589e-06, "loss": 1.0684, "step": 4275 }, { "epoch": 0.6053656119487506, "grad_norm": 9.459485850647155, "learning_rate": 4.086730804912821e-06, "loss": 1.1704, "step": 4276 }, { "epoch": 0.6055071848233878, "grad_norm": 10.921711889094077, "learning_rate": 4.086287814613797e-06, "loss": 1.1733, "step": 4277 }, { "epoch": 0.6056487576980251, "grad_norm": 8.77402793956001, "learning_rate": 4.085844740924805e-06, "loss": 1.1754, "step": 4278 }, { "epoch": 0.6057903305726623, "grad_norm": 8.80836378926546, "learning_rate": 4.085401583869138e-06, "loss": 1.1887, "step": 4279 }, { "epoch": 0.6059319034472995, "grad_norm": 7.267751337461528, "learning_rate": 4.0849583434700915e-06, "loss": 1.0467, "step": 4280 }, { "epoch": 0.6060734763219368, "grad_norm": 9.753783054513347, "learning_rate": 4.0845150197509675e-06, "loss": 1.2038, "step": 4281 }, { "epoch": 0.606215049196574, "grad_norm": 8.37724114792503, "learning_rate": 4.08407161273507e-06, "loss": 1.1442, "step": 4282 }, { "epoch": 0.6063566220712111, "grad_norm": 7.890504137378814, "learning_rate": 4.083628122445708e-06, "loss": 1.0825, "step": 4283 }, { "epoch": 0.6064981949458483, "grad_norm": 9.337988873084377, "learning_rate": 4.083184548906198e-06, "loss": 1.0392, "step": 4284 }, { "epoch": 0.6066397678204856, "grad_norm": 8.311605534435486, "learning_rate": 4.082740892139856e-06, "loss": 1.1079, "step": 4285 }, { "epoch": 0.6067813406951228, "grad_norm": 8.184864814028744, "learning_rate": 4.082297152170005e-06, "loss": 1.0963, "step": 4286 }, { "epoch": 0.60692291356976, "grad_norm": 8.856894494015664, "learning_rate": 4.081853329019973e-06, "loss": 1.2227, "step": 4287 }, { "epoch": 0.6070644864443973, "grad_norm": 8.47269001765401, "learning_rate": 4.081409422713091e-06, "loss": 1.2564, "step": 4288 }, { "epoch": 0.6072060593190345, "grad_norm": 8.916518973675217, "learning_rate": 4.080965433272695e-06, "loss": 1.0399, "step": 4289 }, { "epoch": 0.6073476321936717, "grad_norm": 9.253606969358454, "learning_rate": 4.080521360722124e-06, "loss": 0.9985, "step": 4290 }, { "epoch": 0.607489205068309, "grad_norm": 9.413587984021298, "learning_rate": 4.080077205084725e-06, "loss": 1.1242, "step": 4291 }, { "epoch": 0.6076307779429462, "grad_norm": 7.8164316278948345, "learning_rate": 4.079632966383845e-06, "loss": 1.0935, "step": 4292 }, { "epoch": 0.6077723508175833, "grad_norm": 8.420170372623343, "learning_rate": 4.079188644642838e-06, "loss": 1.1074, "step": 4293 }, { "epoch": 0.6079139236922205, "grad_norm": 9.176765658667632, "learning_rate": 4.07874423988506e-06, "loss": 0.9465, "step": 4294 }, { "epoch": 0.6080554965668578, "grad_norm": 9.724965355818755, "learning_rate": 4.078299752133876e-06, "loss": 1.1533, "step": 4295 }, { "epoch": 0.608197069441495, "grad_norm": 9.179063725482292, "learning_rate": 4.07785518141265e-06, "loss": 1.2897, "step": 4296 }, { "epoch": 0.6083386423161322, "grad_norm": 9.045178084594644, "learning_rate": 4.077410527744754e-06, "loss": 1.2126, "step": 4297 }, { "epoch": 0.6084802151907694, "grad_norm": 9.244525836113517, "learning_rate": 4.076965791153562e-06, "loss": 1.2845, "step": 4298 }, { "epoch": 0.6086217880654067, "grad_norm": 9.906869574103657, "learning_rate": 4.076520971662456e-06, "loss": 1.1374, "step": 4299 }, { "epoch": 0.6087633609400439, "grad_norm": 10.67582555790293, "learning_rate": 4.076076069294817e-06, "loss": 1.1818, "step": 4300 }, { "epoch": 0.6089049338146811, "grad_norm": 10.172087261370189, "learning_rate": 4.075631084074033e-06, "loss": 1.0683, "step": 4301 }, { "epoch": 0.6090465066893184, "grad_norm": 8.399579773337017, "learning_rate": 4.075186016023499e-06, "loss": 1.2178, "step": 4302 }, { "epoch": 0.6091880795639556, "grad_norm": 8.67209626336529, "learning_rate": 4.074740865166611e-06, "loss": 1.229, "step": 4303 }, { "epoch": 0.6093296524385927, "grad_norm": 12.123127271613955, "learning_rate": 4.074295631526769e-06, "loss": 1.196, "step": 4304 }, { "epoch": 0.60947122531323, "grad_norm": 8.979712614970534, "learning_rate": 4.07385031512738e-06, "loss": 1.0388, "step": 4305 }, { "epoch": 0.6096127981878672, "grad_norm": 7.618404099558511, "learning_rate": 4.0734049159918535e-06, "loss": 1.1997, "step": 4306 }, { "epoch": 0.6097543710625044, "grad_norm": 7.9574816449451315, "learning_rate": 4.072959434143603e-06, "loss": 1.1715, "step": 4307 }, { "epoch": 0.6098959439371416, "grad_norm": 8.48789295303767, "learning_rate": 4.0725138696060485e-06, "loss": 1.1426, "step": 4308 }, { "epoch": 0.6100375168117789, "grad_norm": 7.775347451054463, "learning_rate": 4.072068222402612e-06, "loss": 1.3017, "step": 4309 }, { "epoch": 0.6101790896864161, "grad_norm": 8.946095847933341, "learning_rate": 4.0716224925567225e-06, "loss": 1.1442, "step": 4310 }, { "epoch": 0.6103206625610533, "grad_norm": 10.226027204406114, "learning_rate": 4.071176680091809e-06, "loss": 1.2231, "step": 4311 }, { "epoch": 0.6104622354356906, "grad_norm": 10.117180336025456, "learning_rate": 4.07073078503131e-06, "loss": 1.0117, "step": 4312 }, { "epoch": 0.6106038083103278, "grad_norm": 7.479377113151363, "learning_rate": 4.070284807398664e-06, "loss": 1.0606, "step": 4313 }, { "epoch": 0.6107453811849649, "grad_norm": 9.009674806135052, "learning_rate": 4.069838747217317e-06, "loss": 1.1433, "step": 4314 }, { "epoch": 0.6108869540596021, "grad_norm": 10.421866947382572, "learning_rate": 4.069392604510717e-06, "loss": 1.0756, "step": 4315 }, { "epoch": 0.6110285269342394, "grad_norm": 9.45589528751649, "learning_rate": 4.068946379302318e-06, "loss": 1.2524, "step": 4316 }, { "epoch": 0.6111700998088766, "grad_norm": 9.912285443325416, "learning_rate": 4.068500071615578e-06, "loss": 1.175, "step": 4317 }, { "epoch": 0.6113116726835138, "grad_norm": 8.277261255635404, "learning_rate": 4.068053681473959e-06, "loss": 1.1086, "step": 4318 }, { "epoch": 0.6114532455581511, "grad_norm": 9.160835445902002, "learning_rate": 4.067607208900927e-06, "loss": 1.1674, "step": 4319 }, { "epoch": 0.6115948184327883, "grad_norm": 10.037531802078357, "learning_rate": 4.067160653919952e-06, "loss": 1.0876, "step": 4320 }, { "epoch": 0.6117363913074255, "grad_norm": 8.876176782952589, "learning_rate": 4.066714016554511e-06, "loss": 1.1642, "step": 4321 }, { "epoch": 0.6118779641820628, "grad_norm": 9.057762012403497, "learning_rate": 4.066267296828083e-06, "loss": 1.163, "step": 4322 }, { "epoch": 0.6120195370567, "grad_norm": 8.612326335955471, "learning_rate": 4.06582049476415e-06, "loss": 1.1596, "step": 4323 }, { "epoch": 0.6121611099313372, "grad_norm": 9.309769812666893, "learning_rate": 4.065373610386201e-06, "loss": 1.1095, "step": 4324 }, { "epoch": 0.6123026828059743, "grad_norm": 8.136451046514335, "learning_rate": 4.064926643717729e-06, "loss": 1.2355, "step": 4325 }, { "epoch": 0.6124442556806116, "grad_norm": 9.625917465277782, "learning_rate": 4.06447959478223e-06, "loss": 1.1686, "step": 4326 }, { "epoch": 0.6125858285552488, "grad_norm": 9.011246753596264, "learning_rate": 4.0640324636032044e-06, "loss": 1.1941, "step": 4327 }, { "epoch": 0.612727401429886, "grad_norm": 10.060572848381828, "learning_rate": 4.0635852502041595e-06, "loss": 1.129, "step": 4328 }, { "epoch": 0.6128689743045233, "grad_norm": 9.29919589268278, "learning_rate": 4.0631379546086045e-06, "loss": 1.0717, "step": 4329 }, { "epoch": 0.6130105471791605, "grad_norm": 9.490699230678793, "learning_rate": 4.0626905768400515e-06, "loss": 1.0813, "step": 4330 }, { "epoch": 0.6131521200537977, "grad_norm": 8.611894906855213, "learning_rate": 4.06224311692202e-06, "loss": 0.9363, "step": 4331 }, { "epoch": 0.6132936929284349, "grad_norm": 8.204879834621952, "learning_rate": 4.0617955748780336e-06, "loss": 1.0033, "step": 4332 }, { "epoch": 0.6134352658030722, "grad_norm": 7.989879644105718, "learning_rate": 4.061347950731617e-06, "loss": 1.0819, "step": 4333 }, { "epoch": 0.6135768386777094, "grad_norm": 8.19415199347009, "learning_rate": 4.060900244506304e-06, "loss": 1.0436, "step": 4334 }, { "epoch": 0.6137184115523465, "grad_norm": 8.497098988663398, "learning_rate": 4.060452456225629e-06, "loss": 1.0826, "step": 4335 }, { "epoch": 0.6138599844269838, "grad_norm": 9.41257971011754, "learning_rate": 4.060004585913131e-06, "loss": 1.1057, "step": 4336 }, { "epoch": 0.614001557301621, "grad_norm": 7.609781806614898, "learning_rate": 4.059556633592356e-06, "loss": 0.9653, "step": 4337 }, { "epoch": 0.6141431301762582, "grad_norm": 9.539161275466281, "learning_rate": 4.0591085992868504e-06, "loss": 1.1002, "step": 4338 }, { "epoch": 0.6142847030508954, "grad_norm": 8.61963873935969, "learning_rate": 4.05866048302017e-06, "loss": 1.0662, "step": 4339 }, { "epoch": 0.6144262759255327, "grad_norm": 9.083726740259156, "learning_rate": 4.058212284815869e-06, "loss": 1.098, "step": 4340 }, { "epoch": 0.6145678488001699, "grad_norm": 8.965340315087433, "learning_rate": 4.057764004697511e-06, "loss": 1.0846, "step": 4341 }, { "epoch": 0.6147094216748071, "grad_norm": 8.682289372203382, "learning_rate": 4.05731564268866e-06, "loss": 1.1992, "step": 4342 }, { "epoch": 0.6148509945494444, "grad_norm": 10.613388358633253, "learning_rate": 4.056867198812886e-06, "loss": 1.2739, "step": 4343 }, { "epoch": 0.6149925674240816, "grad_norm": 10.56149630320428, "learning_rate": 4.056418673093766e-06, "loss": 1.1884, "step": 4344 }, { "epoch": 0.6151341402987187, "grad_norm": 8.238039073757887, "learning_rate": 4.055970065554876e-06, "loss": 1.2097, "step": 4345 }, { "epoch": 0.6152757131733559, "grad_norm": 7.548478424144523, "learning_rate": 4.0555213762198e-06, "loss": 1.1833, "step": 4346 }, { "epoch": 0.6154172860479932, "grad_norm": 8.798167592784457, "learning_rate": 4.055072605112125e-06, "loss": 1.1171, "step": 4347 }, { "epoch": 0.6155588589226304, "grad_norm": 9.187673554111186, "learning_rate": 4.054623752255443e-06, "loss": 1.1299, "step": 4348 }, { "epoch": 0.6157004317972676, "grad_norm": 8.32329987816229, "learning_rate": 4.0541748176733485e-06, "loss": 0.9756, "step": 4349 }, { "epoch": 0.6158420046719049, "grad_norm": 8.159717275920118, "learning_rate": 4.0537258013894434e-06, "loss": 1.0941, "step": 4350 }, { "epoch": 0.6159835775465421, "grad_norm": 8.197104375424383, "learning_rate": 4.053276703427332e-06, "loss": 1.1075, "step": 4351 }, { "epoch": 0.6161251504211793, "grad_norm": 6.931190164374843, "learning_rate": 4.052827523810622e-06, "loss": 1.0777, "step": 4352 }, { "epoch": 0.6162667232958166, "grad_norm": 9.627392360072646, "learning_rate": 4.052378262562926e-06, "loss": 1.03, "step": 4353 }, { "epoch": 0.6164082961704538, "grad_norm": 8.415725302851019, "learning_rate": 4.051928919707863e-06, "loss": 1.1303, "step": 4354 }, { "epoch": 0.616549869045091, "grad_norm": 7.07157220735861, "learning_rate": 4.051479495269054e-06, "loss": 1.0404, "step": 4355 }, { "epoch": 0.6166914419197281, "grad_norm": 7.983402439314746, "learning_rate": 4.051029989270125e-06, "loss": 1.0877, "step": 4356 }, { "epoch": 0.6168330147943654, "grad_norm": 7.23479999257873, "learning_rate": 4.0505804017347055e-06, "loss": 1.0182, "step": 4357 }, { "epoch": 0.6169745876690026, "grad_norm": 7.29475270679018, "learning_rate": 4.05013073268643e-06, "loss": 1.0842, "step": 4358 }, { "epoch": 0.6171161605436398, "grad_norm": 8.007889195516867, "learning_rate": 4.049680982148938e-06, "loss": 1.1065, "step": 4359 }, { "epoch": 0.617257733418277, "grad_norm": 7.677593764611689, "learning_rate": 4.049231150145873e-06, "loss": 1.0649, "step": 4360 }, { "epoch": 0.6173993062929143, "grad_norm": 8.797190252316316, "learning_rate": 4.048781236700882e-06, "loss": 1.0758, "step": 4361 }, { "epoch": 0.6175408791675515, "grad_norm": 7.955888489138805, "learning_rate": 4.048331241837615e-06, "loss": 1.0085, "step": 4362 }, { "epoch": 0.6176824520421887, "grad_norm": 7.6844895834057, "learning_rate": 4.047881165579729e-06, "loss": 1.0724, "step": 4363 }, { "epoch": 0.617824024916826, "grad_norm": 8.22144670809956, "learning_rate": 4.047431007950885e-06, "loss": 1.1893, "step": 4364 }, { "epoch": 0.6179655977914632, "grad_norm": 9.983759089473294, "learning_rate": 4.046980768974746e-06, "loss": 1.2156, "step": 4365 }, { "epoch": 0.6181071706661003, "grad_norm": 7.551226070462752, "learning_rate": 4.046530448674982e-06, "loss": 0.984, "step": 4366 }, { "epoch": 0.6182487435407376, "grad_norm": 8.68117287301502, "learning_rate": 4.046080047075265e-06, "loss": 1.0524, "step": 4367 }, { "epoch": 0.6183903164153748, "grad_norm": 7.281389357907274, "learning_rate": 4.045629564199274e-06, "loss": 0.9856, "step": 4368 }, { "epoch": 0.618531889290012, "grad_norm": 9.654971139905202, "learning_rate": 4.045179000070688e-06, "loss": 1.1499, "step": 4369 }, { "epoch": 0.6186734621646492, "grad_norm": 8.68542060391404, "learning_rate": 4.044728354713195e-06, "loss": 1.1387, "step": 4370 }, { "epoch": 0.6188150350392865, "grad_norm": 8.105713004602881, "learning_rate": 4.044277628150482e-06, "loss": 1.2205, "step": 4371 }, { "epoch": 0.6189566079139237, "grad_norm": 11.24795411898506, "learning_rate": 4.0438268204062485e-06, "loss": 1.1833, "step": 4372 }, { "epoch": 0.6190981807885609, "grad_norm": 9.3033517027614, "learning_rate": 4.043375931504189e-06, "loss": 1.0748, "step": 4373 }, { "epoch": 0.6192397536631982, "grad_norm": 7.32782072887441, "learning_rate": 4.042924961468007e-06, "loss": 1.12, "step": 4374 }, { "epoch": 0.6193813265378354, "grad_norm": 8.78076213961188, "learning_rate": 4.04247391032141e-06, "loss": 0.9604, "step": 4375 }, { "epoch": 0.6195228994124725, "grad_norm": 8.096646177355725, "learning_rate": 4.042022778088111e-06, "loss": 1.0622, "step": 4376 }, { "epoch": 0.6196644722871097, "grad_norm": 8.18764117170159, "learning_rate": 4.0415715647918235e-06, "loss": 1.0387, "step": 4377 }, { "epoch": 0.619806045161747, "grad_norm": 10.581490503902543, "learning_rate": 4.041120270456268e-06, "loss": 1.1707, "step": 4378 }, { "epoch": 0.6199476180363842, "grad_norm": 9.100517560053376, "learning_rate": 4.04066889510517e-06, "loss": 1.0509, "step": 4379 }, { "epoch": 0.6200891909110214, "grad_norm": 9.684328803031956, "learning_rate": 4.040217438762256e-06, "loss": 1.2911, "step": 4380 }, { "epoch": 0.6202307637856587, "grad_norm": 11.682089792662447, "learning_rate": 4.03976590145126e-06, "loss": 0.9994, "step": 4381 }, { "epoch": 0.6203723366602959, "grad_norm": 8.920816709481265, "learning_rate": 4.0393142831959186e-06, "loss": 1.1033, "step": 4382 }, { "epoch": 0.6205139095349331, "grad_norm": 8.434278084932037, "learning_rate": 4.038862584019973e-06, "loss": 0.9988, "step": 4383 }, { "epoch": 0.6206554824095704, "grad_norm": 9.095314127324412, "learning_rate": 4.038410803947169e-06, "loss": 1.1241, "step": 4384 }, { "epoch": 0.6207970552842076, "grad_norm": 11.148766665822068, "learning_rate": 4.037958943001257e-06, "loss": 1.2077, "step": 4385 }, { "epoch": 0.6209386281588448, "grad_norm": 8.923226436202226, "learning_rate": 4.0375070012059884e-06, "loss": 1.1147, "step": 4386 }, { "epoch": 0.6210802010334819, "grad_norm": 10.274683934012163, "learning_rate": 4.037054978585124e-06, "loss": 1.2581, "step": 4387 }, { "epoch": 0.6212217739081192, "grad_norm": 9.185291673642313, "learning_rate": 4.036602875162426e-06, "loss": 1.0668, "step": 4388 }, { "epoch": 0.6213633467827564, "grad_norm": 8.988082624924013, "learning_rate": 4.03615069096166e-06, "loss": 1.1136, "step": 4389 }, { "epoch": 0.6215049196573936, "grad_norm": 8.881908897532247, "learning_rate": 4.035698426006597e-06, "loss": 0.9572, "step": 4390 }, { "epoch": 0.6216464925320309, "grad_norm": 8.891434188515987, "learning_rate": 4.035246080321014e-06, "loss": 1.1331, "step": 4391 }, { "epoch": 0.6217880654066681, "grad_norm": 7.867901463770598, "learning_rate": 4.034793653928688e-06, "loss": 1.2049, "step": 4392 }, { "epoch": 0.6219296382813053, "grad_norm": 9.8864644777901, "learning_rate": 4.034341146853406e-06, "loss": 1.0553, "step": 4393 }, { "epoch": 0.6220712111559425, "grad_norm": 7.367998582158486, "learning_rate": 4.0338885591189515e-06, "loss": 1.0347, "step": 4394 }, { "epoch": 0.6222127840305798, "grad_norm": 8.294462208758295, "learning_rate": 4.033435890749121e-06, "loss": 1.0119, "step": 4395 }, { "epoch": 0.622354356905217, "grad_norm": 9.504216513251459, "learning_rate": 4.032983141767708e-06, "loss": 1.0702, "step": 4396 }, { "epoch": 0.6224959297798541, "grad_norm": 9.387106440963818, "learning_rate": 4.032530312198515e-06, "loss": 1.0861, "step": 4397 }, { "epoch": 0.6226375026544914, "grad_norm": 9.799402911622545, "learning_rate": 4.032077402065346e-06, "loss": 1.2814, "step": 4398 }, { "epoch": 0.6227790755291286, "grad_norm": 7.724635795229786, "learning_rate": 4.03162441139201e-06, "loss": 1.1998, "step": 4399 }, { "epoch": 0.6229206484037658, "grad_norm": 8.545024776166127, "learning_rate": 4.031171340202321e-06, "loss": 1.1911, "step": 4400 }, { "epoch": 0.623062221278403, "grad_norm": 7.962224944198181, "learning_rate": 4.030718188520096e-06, "loss": 0.9765, "step": 4401 }, { "epoch": 0.6232037941530403, "grad_norm": 13.572653349485963, "learning_rate": 4.030264956369158e-06, "loss": 1.3261, "step": 4402 }, { "epoch": 0.6233453670276775, "grad_norm": 9.48860810534693, "learning_rate": 4.029811643773332e-06, "loss": 1.1403, "step": 4403 }, { "epoch": 0.6234869399023147, "grad_norm": 9.610423358082725, "learning_rate": 4.029358250756448e-06, "loss": 1.0489, "step": 4404 }, { "epoch": 0.623628512776952, "grad_norm": 11.511750561968261, "learning_rate": 4.028904777342341e-06, "loss": 1.0973, "step": 4405 }, { "epoch": 0.6237700856515892, "grad_norm": 11.99144757998285, "learning_rate": 4.02845122355485e-06, "loss": 1.2434, "step": 4406 }, { "epoch": 0.6239116585262263, "grad_norm": 7.957466544303591, "learning_rate": 4.027997589417818e-06, "loss": 1.2398, "step": 4407 }, { "epoch": 0.6240532314008636, "grad_norm": 9.718386137305387, "learning_rate": 4.027543874955092e-06, "loss": 1.0709, "step": 4408 }, { "epoch": 0.6241948042755008, "grad_norm": 7.474900015988403, "learning_rate": 4.027090080190524e-06, "loss": 1.0339, "step": 4409 }, { "epoch": 0.624336377150138, "grad_norm": 9.867756565142162, "learning_rate": 4.026636205147969e-06, "loss": 1.2329, "step": 4410 }, { "epoch": 0.6244779500247752, "grad_norm": 11.178826576848481, "learning_rate": 4.026182249851287e-06, "loss": 1.1175, "step": 4411 }, { "epoch": 0.6246195228994125, "grad_norm": 9.51883436680336, "learning_rate": 4.025728214324341e-06, "loss": 1.0732, "step": 4412 }, { "epoch": 0.6247610957740497, "grad_norm": 8.096745588416074, "learning_rate": 4.025274098591001e-06, "loss": 1.1014, "step": 4413 }, { "epoch": 0.6249026686486869, "grad_norm": 9.842966006106254, "learning_rate": 4.02481990267514e-06, "loss": 1.1316, "step": 4414 }, { "epoch": 0.6250442415233242, "grad_norm": 7.138475758250009, "learning_rate": 4.024365626600632e-06, "loss": 1.0325, "step": 4415 }, { "epoch": 0.6251858143979614, "grad_norm": 9.67514569471009, "learning_rate": 4.023911270391361e-06, "loss": 1.1008, "step": 4416 }, { "epoch": 0.6253273872725986, "grad_norm": 8.34953558995176, "learning_rate": 4.02345683407121e-06, "loss": 1.1221, "step": 4417 }, { "epoch": 0.6254689601472357, "grad_norm": 9.850151378417205, "learning_rate": 4.02300231766407e-06, "loss": 1.1259, "step": 4418 }, { "epoch": 0.625610533021873, "grad_norm": 7.683516548052035, "learning_rate": 4.022547721193833e-06, "loss": 1.0297, "step": 4419 }, { "epoch": 0.6257521058965102, "grad_norm": 8.396485510801854, "learning_rate": 4.022093044684397e-06, "loss": 1.2378, "step": 4420 }, { "epoch": 0.6258936787711474, "grad_norm": 7.4151652956009, "learning_rate": 4.021638288159666e-06, "loss": 1.0221, "step": 4421 }, { "epoch": 0.6260352516457847, "grad_norm": 7.7012976902923285, "learning_rate": 4.021183451643544e-06, "loss": 1.1344, "step": 4422 }, { "epoch": 0.6261768245204219, "grad_norm": 8.19532087851097, "learning_rate": 4.020728535159942e-06, "loss": 1.1157, "step": 4423 }, { "epoch": 0.6263183973950591, "grad_norm": 9.379960438984488, "learning_rate": 4.020273538732775e-06, "loss": 1.1246, "step": 4424 }, { "epoch": 0.6264599702696964, "grad_norm": 11.93374974325213, "learning_rate": 4.019818462385962e-06, "loss": 1.1702, "step": 4425 }, { "epoch": 0.6266015431443336, "grad_norm": 8.042140123202932, "learning_rate": 4.019363306143425e-06, "loss": 1.0416, "step": 4426 }, { "epoch": 0.6267431160189708, "grad_norm": 7.027940573165881, "learning_rate": 4.018908070029093e-06, "loss": 1.0338, "step": 4427 }, { "epoch": 0.6268846888936079, "grad_norm": 11.446159711604356, "learning_rate": 4.018452754066895e-06, "loss": 1.084, "step": 4428 }, { "epoch": 0.6270262617682452, "grad_norm": 7.650531236740734, "learning_rate": 4.017997358280769e-06, "loss": 1.1568, "step": 4429 }, { "epoch": 0.6271678346428824, "grad_norm": 9.867975368139653, "learning_rate": 4.017541882694653e-06, "loss": 1.1802, "step": 4430 }, { "epoch": 0.6273094075175196, "grad_norm": 8.518597173404455, "learning_rate": 4.017086327332492e-06, "loss": 1.0339, "step": 4431 }, { "epoch": 0.6274509803921569, "grad_norm": 9.135343359229342, "learning_rate": 4.0166306922182335e-06, "loss": 1.1999, "step": 4432 }, { "epoch": 0.6275925532667941, "grad_norm": 9.66557577708328, "learning_rate": 4.016174977375831e-06, "loss": 1.2887, "step": 4433 }, { "epoch": 0.6277341261414313, "grad_norm": 7.737992367462393, "learning_rate": 4.01571918282924e-06, "loss": 1.1671, "step": 4434 }, { "epoch": 0.6278756990160685, "grad_norm": 9.524008679805359, "learning_rate": 4.015263308602422e-06, "loss": 1.0389, "step": 4435 }, { "epoch": 0.6280172718907058, "grad_norm": 10.4620128303964, "learning_rate": 4.014807354719342e-06, "loss": 1.2572, "step": 4436 }, { "epoch": 0.628158844765343, "grad_norm": 9.643341819248715, "learning_rate": 4.014351321203969e-06, "loss": 1.2928, "step": 4437 }, { "epoch": 0.6283004176399801, "grad_norm": 9.462176467827092, "learning_rate": 4.013895208080275e-06, "loss": 1.1496, "step": 4438 }, { "epoch": 0.6284419905146174, "grad_norm": 8.302675335372893, "learning_rate": 4.013439015372239e-06, "loss": 1.1354, "step": 4439 }, { "epoch": 0.6285835633892546, "grad_norm": 7.870923637274637, "learning_rate": 4.012982743103844e-06, "loss": 1.1904, "step": 4440 }, { "epoch": 0.6287251362638918, "grad_norm": 9.777547409734682, "learning_rate": 4.012526391299073e-06, "loss": 1.0548, "step": 4441 }, { "epoch": 0.628866709138529, "grad_norm": 8.841830742907854, "learning_rate": 4.012069959981917e-06, "loss": 1.1714, "step": 4442 }, { "epoch": 0.6290082820131663, "grad_norm": 8.616946676145284, "learning_rate": 4.0116134491763716e-06, "loss": 1.144, "step": 4443 }, { "epoch": 0.6291498548878035, "grad_norm": 8.757935767280358, "learning_rate": 4.0111568589064335e-06, "loss": 0.9894, "step": 4444 }, { "epoch": 0.6292914277624407, "grad_norm": 8.6341703011668, "learning_rate": 4.010700189196106e-06, "loss": 1.0733, "step": 4445 }, { "epoch": 0.629433000637078, "grad_norm": 8.127541012667832, "learning_rate": 4.010243440069397e-06, "loss": 1.0693, "step": 4446 }, { "epoch": 0.6295745735117152, "grad_norm": 8.965344144531365, "learning_rate": 4.0097866115503156e-06, "loss": 1.0962, "step": 4447 }, { "epoch": 0.6297161463863524, "grad_norm": 8.709233385022195, "learning_rate": 4.009329703662878e-06, "loss": 1.0706, "step": 4448 }, { "epoch": 0.6298577192609895, "grad_norm": 8.138289162242963, "learning_rate": 4.008872716431104e-06, "loss": 1.1207, "step": 4449 }, { "epoch": 0.6299992921356268, "grad_norm": 8.283850380349916, "learning_rate": 4.008415649879015e-06, "loss": 1.0612, "step": 4450 }, { "epoch": 0.630140865010264, "grad_norm": 9.456184131824049, "learning_rate": 4.007958504030641e-06, "loss": 1.0221, "step": 4451 }, { "epoch": 0.6302824378849012, "grad_norm": 7.807600758757731, "learning_rate": 4.007501278910013e-06, "loss": 0.9836, "step": 4452 }, { "epoch": 0.6304240107595385, "grad_norm": 8.737292408148436, "learning_rate": 4.007043974541166e-06, "loss": 1.1892, "step": 4453 }, { "epoch": 0.6305655836341757, "grad_norm": 7.358829939757773, "learning_rate": 4.006586590948141e-06, "loss": 0.935, "step": 4454 }, { "epoch": 0.6307071565088129, "grad_norm": 8.295456472280238, "learning_rate": 4.006129128154983e-06, "loss": 1.0657, "step": 4455 }, { "epoch": 0.6308487293834502, "grad_norm": 8.797988089409232, "learning_rate": 4.00567158618574e-06, "loss": 1.217, "step": 4456 }, { "epoch": 0.6309903022580874, "grad_norm": 8.448609222986608, "learning_rate": 4.0052139650644625e-06, "loss": 1.2436, "step": 4457 }, { "epoch": 0.6311318751327246, "grad_norm": 9.30337384461025, "learning_rate": 4.004756264815211e-06, "loss": 1.2597, "step": 4458 }, { "epoch": 0.6312734480073617, "grad_norm": 10.354636188712085, "learning_rate": 4.004298485462044e-06, "loss": 1.2937, "step": 4459 }, { "epoch": 0.631415020881999, "grad_norm": 7.67303644467104, "learning_rate": 4.003840627029028e-06, "loss": 1.0581, "step": 4460 }, { "epoch": 0.6315565937566362, "grad_norm": 8.506790590170862, "learning_rate": 4.00338268954023e-06, "loss": 1.1581, "step": 4461 }, { "epoch": 0.6316981666312734, "grad_norm": 11.054545328149384, "learning_rate": 4.002924673019726e-06, "loss": 1.1919, "step": 4462 }, { "epoch": 0.6318397395059107, "grad_norm": 10.264643559796315, "learning_rate": 4.002466577491593e-06, "loss": 1.2592, "step": 4463 }, { "epoch": 0.6319813123805479, "grad_norm": 8.540076081341294, "learning_rate": 4.002008402979911e-06, "loss": 1.1536, "step": 4464 }, { "epoch": 0.6321228852551851, "grad_norm": 7.954181353670538, "learning_rate": 4.001550149508768e-06, "loss": 1.2179, "step": 4465 }, { "epoch": 0.6322644581298223, "grad_norm": 9.603116213157996, "learning_rate": 4.001091817102253e-06, "loss": 1.1842, "step": 4466 }, { "epoch": 0.6324060310044596, "grad_norm": 9.487402745327659, "learning_rate": 4.000633405784461e-06, "loss": 1.0517, "step": 4467 }, { "epoch": 0.6325476038790968, "grad_norm": 8.296846034112573, "learning_rate": 4.000174915579489e-06, "loss": 1.0347, "step": 4468 }, { "epoch": 0.632689176753734, "grad_norm": 9.93147335005396, "learning_rate": 3.999716346511442e-06, "loss": 1.0607, "step": 4469 }, { "epoch": 0.6328307496283712, "grad_norm": 9.741513985339369, "learning_rate": 3.999257698604423e-06, "loss": 1.2406, "step": 4470 }, { "epoch": 0.6329723225030084, "grad_norm": 6.999255276982906, "learning_rate": 3.998798971882545e-06, "loss": 1.1035, "step": 4471 }, { "epoch": 0.6331138953776456, "grad_norm": 7.329352627799194, "learning_rate": 3.998340166369923e-06, "loss": 1.0004, "step": 4472 }, { "epoch": 0.6332554682522828, "grad_norm": 7.321202243403631, "learning_rate": 3.997881282090676e-06, "loss": 1.0899, "step": 4473 }, { "epoch": 0.6333970411269201, "grad_norm": 7.16796529078595, "learning_rate": 3.997422319068926e-06, "loss": 1.1385, "step": 4474 }, { "epoch": 0.6335386140015573, "grad_norm": 7.36564171796867, "learning_rate": 3.996963277328802e-06, "loss": 0.9433, "step": 4475 }, { "epoch": 0.6336801868761945, "grad_norm": 9.689612373653558, "learning_rate": 3.996504156894434e-06, "loss": 1.1226, "step": 4476 }, { "epoch": 0.6338217597508318, "grad_norm": 8.731450937480712, "learning_rate": 3.996044957789959e-06, "loss": 1.0442, "step": 4477 }, { "epoch": 0.633963332625469, "grad_norm": 9.226779143836739, "learning_rate": 3.995585680039515e-06, "loss": 1.2183, "step": 4478 }, { "epoch": 0.6341049055001062, "grad_norm": 9.97281958285263, "learning_rate": 3.995126323667248e-06, "loss": 1.1574, "step": 4479 }, { "epoch": 0.6342464783747433, "grad_norm": 8.945627211985771, "learning_rate": 3.994666888697304e-06, "loss": 1.1176, "step": 4480 }, { "epoch": 0.6343880512493806, "grad_norm": 9.392656306534152, "learning_rate": 3.994207375153836e-06, "loss": 1.1187, "step": 4481 }, { "epoch": 0.6345296241240178, "grad_norm": 8.55200443084288, "learning_rate": 3.993747783061001e-06, "loss": 1.2972, "step": 4482 }, { "epoch": 0.634671196998655, "grad_norm": 8.338906014317766, "learning_rate": 3.99328811244296e-06, "loss": 1.1653, "step": 4483 }, { "epoch": 0.6348127698732923, "grad_norm": 10.440433295601402, "learning_rate": 3.9928283633238755e-06, "loss": 1.0984, "step": 4484 }, { "epoch": 0.6349543427479295, "grad_norm": 7.883475450628616, "learning_rate": 3.992368535727917e-06, "loss": 1.0242, "step": 4485 }, { "epoch": 0.6350959156225667, "grad_norm": 8.427341251332253, "learning_rate": 3.991908629679257e-06, "loss": 1.1639, "step": 4486 }, { "epoch": 0.635237488497204, "grad_norm": 8.172585451012523, "learning_rate": 3.991448645202073e-06, "loss": 1.054, "step": 4487 }, { "epoch": 0.6353790613718412, "grad_norm": 8.870127885277183, "learning_rate": 3.990988582320546e-06, "loss": 1.0905, "step": 4488 }, { "epoch": 0.6355206342464784, "grad_norm": 9.180423840148428, "learning_rate": 3.990528441058861e-06, "loss": 1.0307, "step": 4489 }, { "epoch": 0.6356622071211155, "grad_norm": 8.159510636708944, "learning_rate": 3.990068221441207e-06, "loss": 1.1373, "step": 4490 }, { "epoch": 0.6358037799957528, "grad_norm": 7.146784356747145, "learning_rate": 3.989607923491777e-06, "loss": 1.0383, "step": 4491 }, { "epoch": 0.63594535287039, "grad_norm": 8.490244933640202, "learning_rate": 3.98914754723477e-06, "loss": 1.1905, "step": 4492 }, { "epoch": 0.6360869257450272, "grad_norm": 8.345278710613115, "learning_rate": 3.988687092694386e-06, "loss": 1.1438, "step": 4493 }, { "epoch": 0.6362284986196645, "grad_norm": 9.269881867578853, "learning_rate": 3.988226559894832e-06, "loss": 1.1312, "step": 4494 }, { "epoch": 0.6363700714943017, "grad_norm": 7.184480513851105, "learning_rate": 3.9877659488603186e-06, "loss": 1.0788, "step": 4495 }, { "epoch": 0.6365116443689389, "grad_norm": 8.186193929850972, "learning_rate": 3.9873052596150565e-06, "loss": 1.2218, "step": 4496 }, { "epoch": 0.6366532172435762, "grad_norm": 9.9325691321747, "learning_rate": 3.986844492183267e-06, "loss": 1.129, "step": 4497 }, { "epoch": 0.6367947901182134, "grad_norm": 9.457464864465406, "learning_rate": 3.986383646589171e-06, "loss": 0.9807, "step": 4498 }, { "epoch": 0.6369363629928506, "grad_norm": 7.8385045723444, "learning_rate": 3.985922722856996e-06, "loss": 1.2394, "step": 4499 }, { "epoch": 0.6370779358674878, "grad_norm": 8.95681958087772, "learning_rate": 3.9854617210109705e-06, "loss": 1.1417, "step": 4500 }, { "epoch": 0.637219508742125, "grad_norm": 8.747847810095564, "learning_rate": 3.985000641075329e-06, "loss": 1.133, "step": 4501 }, { "epoch": 0.6373610816167622, "grad_norm": 9.956007894768042, "learning_rate": 3.984539483074313e-06, "loss": 1.1369, "step": 4502 }, { "epoch": 0.6375026544913994, "grad_norm": 8.715590037661814, "learning_rate": 3.984078247032162e-06, "loss": 1.0509, "step": 4503 }, { "epoch": 0.6376442273660367, "grad_norm": 8.273777307262733, "learning_rate": 3.983616932973124e-06, "loss": 1.1627, "step": 4504 }, { "epoch": 0.6377858002406739, "grad_norm": 8.743015662571842, "learning_rate": 3.98315554092145e-06, "loss": 0.9706, "step": 4505 }, { "epoch": 0.6379273731153111, "grad_norm": 7.762113149343604, "learning_rate": 3.982694070901396e-06, "loss": 1.2112, "step": 4506 }, { "epoch": 0.6380689459899483, "grad_norm": 7.145171406360349, "learning_rate": 3.98223252293722e-06, "loss": 1.0382, "step": 4507 }, { "epoch": 0.6382105188645856, "grad_norm": 9.17241317495829, "learning_rate": 3.9817708970531855e-06, "loss": 1.0961, "step": 4508 }, { "epoch": 0.6383520917392228, "grad_norm": 9.94573825065721, "learning_rate": 3.9813091932735596e-06, "loss": 1.1414, "step": 4509 }, { "epoch": 0.63849366461386, "grad_norm": 7.463441374682346, "learning_rate": 3.9808474116226135e-06, "loss": 1.1255, "step": 4510 }, { "epoch": 0.6386352374884972, "grad_norm": 7.685652596159771, "learning_rate": 3.980385552124624e-06, "loss": 1.0734, "step": 4511 }, { "epoch": 0.6387768103631344, "grad_norm": 7.312362800028338, "learning_rate": 3.979923614803869e-06, "loss": 1.1926, "step": 4512 }, { "epoch": 0.6389183832377716, "grad_norm": 9.619838370950061, "learning_rate": 3.979461599684633e-06, "loss": 1.0132, "step": 4513 }, { "epoch": 0.6390599561124088, "grad_norm": 7.879422595949315, "learning_rate": 3.978999506791205e-06, "loss": 1.0535, "step": 4514 }, { "epoch": 0.6392015289870461, "grad_norm": 7.357420319551203, "learning_rate": 3.978537336147875e-06, "loss": 1.0465, "step": 4515 }, { "epoch": 0.6393431018616833, "grad_norm": 7.466312792884544, "learning_rate": 3.97807508777894e-06, "loss": 1.0287, "step": 4516 }, { "epoch": 0.6394846747363205, "grad_norm": 10.212532601835584, "learning_rate": 3.977612761708699e-06, "loss": 1.1195, "step": 4517 }, { "epoch": 0.6396262476109578, "grad_norm": 8.31292448178987, "learning_rate": 3.977150357961457e-06, "loss": 0.9757, "step": 4518 }, { "epoch": 0.639767820485595, "grad_norm": 9.273938108460223, "learning_rate": 3.976687876561523e-06, "loss": 1.1364, "step": 4519 }, { "epoch": 0.6399093933602322, "grad_norm": 8.67609386748226, "learning_rate": 3.976225317533208e-06, "loss": 1.1074, "step": 4520 }, { "epoch": 0.6400509662348693, "grad_norm": 9.168907400287074, "learning_rate": 3.9757626809008274e-06, "loss": 1.1503, "step": 4521 }, { "epoch": 0.6401925391095066, "grad_norm": 6.847180765098264, "learning_rate": 3.975299966688705e-06, "loss": 0.9975, "step": 4522 }, { "epoch": 0.6403341119841438, "grad_norm": 8.116005011920311, "learning_rate": 3.974837174921162e-06, "loss": 1.0876, "step": 4523 }, { "epoch": 0.640475684858781, "grad_norm": 8.260600360176424, "learning_rate": 3.974374305622529e-06, "loss": 1.019, "step": 4524 }, { "epoch": 0.6406172577334183, "grad_norm": 7.685357764509461, "learning_rate": 3.973911358817139e-06, "loss": 1.0596, "step": 4525 }, { "epoch": 0.6407588306080555, "grad_norm": 7.758450484765663, "learning_rate": 3.973448334529326e-06, "loss": 1.0445, "step": 4526 }, { "epoch": 0.6409004034826927, "grad_norm": 8.608292914926844, "learning_rate": 3.972985232783434e-06, "loss": 1.1252, "step": 4527 }, { "epoch": 0.64104197635733, "grad_norm": 9.923978615580548, "learning_rate": 3.972522053603806e-06, "loss": 1.0778, "step": 4528 }, { "epoch": 0.6411835492319672, "grad_norm": 8.573929558050162, "learning_rate": 3.972058797014792e-06, "loss": 1.0279, "step": 4529 }, { "epoch": 0.6413251221066044, "grad_norm": 7.887724511386621, "learning_rate": 3.971595463040744e-06, "loss": 1.0477, "step": 4530 }, { "epoch": 0.6414666949812416, "grad_norm": 11.052529879988874, "learning_rate": 3.97113205170602e-06, "loss": 1.006, "step": 4531 }, { "epoch": 0.6416082678558788, "grad_norm": 10.417992184544321, "learning_rate": 3.970668563034982e-06, "loss": 1.1222, "step": 4532 }, { "epoch": 0.641749840730516, "grad_norm": 7.9952647977281535, "learning_rate": 3.9702049970519925e-06, "loss": 1.047, "step": 4533 }, { "epoch": 0.6418914136051532, "grad_norm": 8.914358202670977, "learning_rate": 3.969741353781424e-06, "loss": 1.1102, "step": 4534 }, { "epoch": 0.6420329864797905, "grad_norm": 8.352549509759777, "learning_rate": 3.969277633247648e-06, "loss": 1.1571, "step": 4535 }, { "epoch": 0.6421745593544277, "grad_norm": 8.555679166266916, "learning_rate": 3.968813835475043e-06, "loss": 0.9944, "step": 4536 }, { "epoch": 0.6423161322290649, "grad_norm": 7.822257578413464, "learning_rate": 3.968349960487988e-06, "loss": 1.1238, "step": 4537 }, { "epoch": 0.6424577051037021, "grad_norm": 6.246773459146917, "learning_rate": 3.967886008310872e-06, "loss": 1.0176, "step": 4538 }, { "epoch": 0.6425992779783394, "grad_norm": 9.490393348959186, "learning_rate": 3.967421978968083e-06, "loss": 1.0816, "step": 4539 }, { "epoch": 0.6427408508529766, "grad_norm": 9.350603020953715, "learning_rate": 3.966957872484013e-06, "loss": 1.0993, "step": 4540 }, { "epoch": 0.6428824237276138, "grad_norm": 9.193407715442804, "learning_rate": 3.966493688883064e-06, "loss": 1.0979, "step": 4541 }, { "epoch": 0.643023996602251, "grad_norm": 11.125652851279673, "learning_rate": 3.966029428189634e-06, "loss": 1.1767, "step": 4542 }, { "epoch": 0.6431655694768882, "grad_norm": 7.782806853541417, "learning_rate": 3.965565090428129e-06, "loss": 1.0561, "step": 4543 }, { "epoch": 0.6433071423515254, "grad_norm": 9.709773490298842, "learning_rate": 3.965100675622962e-06, "loss": 1.2403, "step": 4544 }, { "epoch": 0.6434487152261626, "grad_norm": 7.922466790179931, "learning_rate": 3.9646361837985435e-06, "loss": 1.1273, "step": 4545 }, { "epoch": 0.6435902881007999, "grad_norm": 9.762473319550809, "learning_rate": 3.964171614979294e-06, "loss": 1.1467, "step": 4546 }, { "epoch": 0.6437318609754371, "grad_norm": 7.653281122798252, "learning_rate": 3.963706969189634e-06, "loss": 0.951, "step": 4547 }, { "epoch": 0.6438734338500743, "grad_norm": 10.212460510102849, "learning_rate": 3.963242246453989e-06, "loss": 1.2101, "step": 4548 }, { "epoch": 0.6440150067247116, "grad_norm": 8.686926074932815, "learning_rate": 3.962777446796791e-06, "loss": 1.2115, "step": 4549 }, { "epoch": 0.6441565795993488, "grad_norm": 11.0743933931335, "learning_rate": 3.962312570242473e-06, "loss": 1.1547, "step": 4550 }, { "epoch": 0.644298152473986, "grad_norm": 8.582988645675636, "learning_rate": 3.961847616815474e-06, "loss": 1.0959, "step": 4551 }, { "epoch": 0.6444397253486231, "grad_norm": 8.624027805545406, "learning_rate": 3.961382586540236e-06, "loss": 1.1908, "step": 4552 }, { "epoch": 0.6445812982232604, "grad_norm": 7.7316233662750085, "learning_rate": 3.960917479441204e-06, "loss": 1.1568, "step": 4553 }, { "epoch": 0.6447228710978976, "grad_norm": 8.7542805691668, "learning_rate": 3.96045229554283e-06, "loss": 1.0889, "step": 4554 }, { "epoch": 0.6448644439725348, "grad_norm": 9.098343885026292, "learning_rate": 3.959987034869568e-06, "loss": 1.1185, "step": 4555 }, { "epoch": 0.6450060168471721, "grad_norm": 7.464830718934858, "learning_rate": 3.959521697445876e-06, "loss": 0.9833, "step": 4556 }, { "epoch": 0.6451475897218093, "grad_norm": 7.551666570980712, "learning_rate": 3.9590562832962174e-06, "loss": 1.0103, "step": 4557 }, { "epoch": 0.6452891625964465, "grad_norm": 9.453097962900385, "learning_rate": 3.958590792445057e-06, "loss": 0.9148, "step": 4558 }, { "epoch": 0.6454307354710838, "grad_norm": 7.799164888718906, "learning_rate": 3.958125224916866e-06, "loss": 1.2088, "step": 4559 }, { "epoch": 0.645572308345721, "grad_norm": 7.580242938574759, "learning_rate": 3.95765958073612e-06, "loss": 1.0263, "step": 4560 }, { "epoch": 0.6457138812203582, "grad_norm": 7.532084691815905, "learning_rate": 3.957193859927295e-06, "loss": 1.0017, "step": 4561 }, { "epoch": 0.6458554540949955, "grad_norm": 8.212858808276659, "learning_rate": 3.9567280625148776e-06, "loss": 1.0552, "step": 4562 }, { "epoch": 0.6459970269696326, "grad_norm": 9.117133525306171, "learning_rate": 3.956262188523351e-06, "loss": 1.1757, "step": 4563 }, { "epoch": 0.6461385998442698, "grad_norm": 7.810825259948854, "learning_rate": 3.955796237977207e-06, "loss": 1.2403, "step": 4564 }, { "epoch": 0.646280172718907, "grad_norm": 9.29716016828925, "learning_rate": 3.955330210900941e-06, "loss": 1.1216, "step": 4565 }, { "epoch": 0.6464217455935443, "grad_norm": 6.619939634848868, "learning_rate": 3.95486410731905e-06, "loss": 0.9936, "step": 4566 }, { "epoch": 0.6465633184681815, "grad_norm": 8.165824794843331, "learning_rate": 3.954397927256037e-06, "loss": 1.1515, "step": 4567 }, { "epoch": 0.6467048913428187, "grad_norm": 7.092239555172892, "learning_rate": 3.953931670736411e-06, "loss": 0.9216, "step": 4568 }, { "epoch": 0.646846464217456, "grad_norm": 7.826385609369306, "learning_rate": 3.953465337784681e-06, "loss": 1.1044, "step": 4569 }, { "epoch": 0.6469880370920932, "grad_norm": 8.067313243499969, "learning_rate": 3.952998928425361e-06, "loss": 1.1134, "step": 4570 }, { "epoch": 0.6471296099667304, "grad_norm": 9.040173197749725, "learning_rate": 3.9525324426829716e-06, "loss": 1.0425, "step": 4571 }, { "epoch": 0.6472711828413676, "grad_norm": 8.274796645325152, "learning_rate": 3.952065880582034e-06, "loss": 1.0149, "step": 4572 }, { "epoch": 0.6474127557160048, "grad_norm": 9.477716512458437, "learning_rate": 3.951599242147076e-06, "loss": 1.2345, "step": 4573 }, { "epoch": 0.647554328590642, "grad_norm": 7.5766746155540075, "learning_rate": 3.951132527402629e-06, "loss": 1.1008, "step": 4574 }, { "epoch": 0.6476959014652792, "grad_norm": 7.416817252812765, "learning_rate": 3.950665736373226e-06, "loss": 0.9556, "step": 4575 }, { "epoch": 0.6478374743399165, "grad_norm": 9.047647446513722, "learning_rate": 3.950198869083407e-06, "loss": 1.2194, "step": 4576 }, { "epoch": 0.6479790472145537, "grad_norm": 9.274085776624426, "learning_rate": 3.949731925557715e-06, "loss": 1.0885, "step": 4577 }, { "epoch": 0.6481206200891909, "grad_norm": 8.873328682859803, "learning_rate": 3.949264905820697e-06, "loss": 1.1716, "step": 4578 }, { "epoch": 0.6482621929638281, "grad_norm": 9.315627833701557, "learning_rate": 3.948797809896903e-06, "loss": 1.2035, "step": 4579 }, { "epoch": 0.6484037658384654, "grad_norm": 8.06421968321307, "learning_rate": 3.948330637810888e-06, "loss": 1.0123, "step": 4580 }, { "epoch": 0.6485453387131026, "grad_norm": 8.56475354910657, "learning_rate": 3.947863389587212e-06, "loss": 1.1206, "step": 4581 }, { "epoch": 0.6486869115877398, "grad_norm": 7.768583597624554, "learning_rate": 3.947396065250437e-06, "loss": 1.0284, "step": 4582 }, { "epoch": 0.648828484462377, "grad_norm": 8.650012912630356, "learning_rate": 3.9469286648251304e-06, "loss": 1.0185, "step": 4583 }, { "epoch": 0.6489700573370142, "grad_norm": 6.911395197457562, "learning_rate": 3.946461188335863e-06, "loss": 1.0928, "step": 4584 }, { "epoch": 0.6491116302116514, "grad_norm": 7.645136275639284, "learning_rate": 3.945993635807209e-06, "loss": 1.168, "step": 4585 }, { "epoch": 0.6492532030862886, "grad_norm": 8.069425698494015, "learning_rate": 3.945526007263747e-06, "loss": 1.1322, "step": 4586 }, { "epoch": 0.6493947759609259, "grad_norm": 8.376813094466709, "learning_rate": 3.945058302730061e-06, "loss": 1.2231, "step": 4587 }, { "epoch": 0.6495363488355631, "grad_norm": 8.424660188517528, "learning_rate": 3.944590522230738e-06, "loss": 1.1131, "step": 4588 }, { "epoch": 0.6496779217102003, "grad_norm": 6.925765827676004, "learning_rate": 3.9441226657903686e-06, "loss": 0.9778, "step": 4589 }, { "epoch": 0.6498194945848376, "grad_norm": 9.024669744731098, "learning_rate": 3.943654733433547e-06, "loss": 0.9814, "step": 4590 }, { "epoch": 0.6499610674594748, "grad_norm": 9.525125305472939, "learning_rate": 3.943186725184872e-06, "loss": 1.1224, "step": 4591 }, { "epoch": 0.650102640334112, "grad_norm": 8.916611810913574, "learning_rate": 3.942718641068947e-06, "loss": 1.1076, "step": 4592 }, { "epoch": 0.6502442132087493, "grad_norm": 8.167362987909227, "learning_rate": 3.94225048111038e-06, "loss": 1.1052, "step": 4593 }, { "epoch": 0.6503857860833864, "grad_norm": 7.751142848258027, "learning_rate": 3.941782245333781e-06, "loss": 1.1241, "step": 4594 }, { "epoch": 0.6505273589580236, "grad_norm": 7.226473817023417, "learning_rate": 3.941313933763763e-06, "loss": 1.1839, "step": 4595 }, { "epoch": 0.6506689318326608, "grad_norm": 8.446587987632196, "learning_rate": 3.9408455464249466e-06, "loss": 1.0181, "step": 4596 }, { "epoch": 0.6508105047072981, "grad_norm": 9.190481519568136, "learning_rate": 3.9403770833419535e-06, "loss": 1.0935, "step": 4597 }, { "epoch": 0.6509520775819353, "grad_norm": 9.339278462085966, "learning_rate": 3.939908544539412e-06, "loss": 1.1237, "step": 4598 }, { "epoch": 0.6510936504565725, "grad_norm": 8.07869023424459, "learning_rate": 3.9394399300419516e-06, "loss": 1.1389, "step": 4599 }, { "epoch": 0.6512352233312098, "grad_norm": 9.717897825505919, "learning_rate": 3.938971239874208e-06, "loss": 1.0525, "step": 4600 }, { "epoch": 0.651376796205847, "grad_norm": 7.597998917802742, "learning_rate": 3.938502474060818e-06, "loss": 0.967, "step": 4601 }, { "epoch": 0.6515183690804842, "grad_norm": 8.828724312483647, "learning_rate": 3.938033632626426e-06, "loss": 1.1069, "step": 4602 }, { "epoch": 0.6516599419551214, "grad_norm": 8.819832855509286, "learning_rate": 3.937564715595678e-06, "loss": 1.0858, "step": 4603 }, { "epoch": 0.6518015148297586, "grad_norm": 9.051293330221213, "learning_rate": 3.937095722993225e-06, "loss": 1.1647, "step": 4604 }, { "epoch": 0.6519430877043958, "grad_norm": 8.513571283315466, "learning_rate": 3.936626654843722e-06, "loss": 1.1093, "step": 4605 }, { "epoch": 0.652084660579033, "grad_norm": 7.139927268632866, "learning_rate": 3.936157511171827e-06, "loss": 1.0445, "step": 4606 }, { "epoch": 0.6522262334536703, "grad_norm": 7.552123714645991, "learning_rate": 3.935688292002201e-06, "loss": 1.1392, "step": 4607 }, { "epoch": 0.6523678063283075, "grad_norm": 8.451957918220321, "learning_rate": 3.935218997359513e-06, "loss": 1.1751, "step": 4608 }, { "epoch": 0.6525093792029447, "grad_norm": 10.110315671194021, "learning_rate": 3.934749627268433e-06, "loss": 1.2279, "step": 4609 }, { "epoch": 0.652650952077582, "grad_norm": 9.034695611501279, "learning_rate": 3.934280181753634e-06, "loss": 1.1886, "step": 4610 }, { "epoch": 0.6527925249522192, "grad_norm": 10.822730720300706, "learning_rate": 3.9338106608397955e-06, "loss": 1.1897, "step": 4611 }, { "epoch": 0.6529340978268564, "grad_norm": 9.907414027997818, "learning_rate": 3.9333410645516e-06, "loss": 1.2075, "step": 4612 }, { "epoch": 0.6530756707014936, "grad_norm": 7.502153723154428, "learning_rate": 3.932871392913733e-06, "loss": 1.0748, "step": 4613 }, { "epoch": 0.6532172435761309, "grad_norm": 7.981361611824823, "learning_rate": 3.932401645950885e-06, "loss": 1.2064, "step": 4614 }, { "epoch": 0.653358816450768, "grad_norm": 9.401803318494194, "learning_rate": 3.931931823687751e-06, "loss": 1.2603, "step": 4615 }, { "epoch": 0.6535003893254052, "grad_norm": 8.978772879784731, "learning_rate": 3.931461926149029e-06, "loss": 1.1745, "step": 4616 }, { "epoch": 0.6536419622000424, "grad_norm": 8.896053647984173, "learning_rate": 3.930991953359421e-06, "loss": 1.1456, "step": 4617 }, { "epoch": 0.6537835350746797, "grad_norm": 11.247059416577274, "learning_rate": 3.930521905343632e-06, "loss": 1.3293, "step": 4618 }, { "epoch": 0.6539251079493169, "grad_norm": 8.300239872051515, "learning_rate": 3.930051782126374e-06, "loss": 1.0233, "step": 4619 }, { "epoch": 0.6540666808239541, "grad_norm": 10.505231008409051, "learning_rate": 3.92958158373236e-06, "loss": 1.1499, "step": 4620 }, { "epoch": 0.6542082536985914, "grad_norm": 7.5424981545814935, "learning_rate": 3.929111310186307e-06, "loss": 1.0771, "step": 4621 }, { "epoch": 0.6543498265732286, "grad_norm": 8.933760280749997, "learning_rate": 3.928640961512939e-06, "loss": 1.212, "step": 4622 }, { "epoch": 0.6544913994478658, "grad_norm": 8.711920137962375, "learning_rate": 3.9281705377369814e-06, "loss": 1.0082, "step": 4623 }, { "epoch": 0.6546329723225031, "grad_norm": 8.992493359942685, "learning_rate": 3.927700038883162e-06, "loss": 1.1348, "step": 4624 }, { "epoch": 0.6547745451971402, "grad_norm": 11.019419780763753, "learning_rate": 3.927229464976218e-06, "loss": 1.1828, "step": 4625 }, { "epoch": 0.6549161180717774, "grad_norm": 10.6787255390418, "learning_rate": 3.9267588160408845e-06, "loss": 1.0746, "step": 4626 }, { "epoch": 0.6550576909464146, "grad_norm": 7.314125336864236, "learning_rate": 3.926288092101903e-06, "loss": 1.0959, "step": 4627 }, { "epoch": 0.6551992638210519, "grad_norm": 10.734192337192209, "learning_rate": 3.92581729318402e-06, "loss": 1.1636, "step": 4628 }, { "epoch": 0.6553408366956891, "grad_norm": 8.523017684150135, "learning_rate": 3.925346419311986e-06, "loss": 1.0939, "step": 4629 }, { "epoch": 0.6554824095703263, "grad_norm": 8.195281778719826, "learning_rate": 3.924875470510553e-06, "loss": 1.0428, "step": 4630 }, { "epoch": 0.6556239824449636, "grad_norm": 7.4645248656379035, "learning_rate": 3.924404446804479e-06, "loss": 1.1035, "step": 4631 }, { "epoch": 0.6557655553196008, "grad_norm": 8.618897864032558, "learning_rate": 3.923933348218525e-06, "loss": 1.0629, "step": 4632 }, { "epoch": 0.655907128194238, "grad_norm": 8.049287127093587, "learning_rate": 3.923462174777458e-06, "loss": 1.0445, "step": 4633 }, { "epoch": 0.6560487010688753, "grad_norm": 9.58124972926349, "learning_rate": 3.922990926506044e-06, "loss": 1.0711, "step": 4634 }, { "epoch": 0.6561902739435124, "grad_norm": 8.098933736062595, "learning_rate": 3.922519603429059e-06, "loss": 1.1809, "step": 4635 }, { "epoch": 0.6563318468181496, "grad_norm": 9.67412051755021, "learning_rate": 3.922048205571279e-06, "loss": 1.2144, "step": 4636 }, { "epoch": 0.6564734196927868, "grad_norm": 8.76752656652528, "learning_rate": 3.921576732957486e-06, "loss": 1.1386, "step": 4637 }, { "epoch": 0.6566149925674241, "grad_norm": 8.673517404998083, "learning_rate": 3.9211051856124625e-06, "loss": 1.1687, "step": 4638 }, { "epoch": 0.6567565654420613, "grad_norm": 8.277280151065249, "learning_rate": 3.920633563560999e-06, "loss": 1.0566, "step": 4639 }, { "epoch": 0.6568981383166985, "grad_norm": 10.04760587653087, "learning_rate": 3.92016186682789e-06, "loss": 0.9722, "step": 4640 }, { "epoch": 0.6570397111913358, "grad_norm": 8.721795100656289, "learning_rate": 3.919690095437929e-06, "loss": 1.1006, "step": 4641 }, { "epoch": 0.657181284065973, "grad_norm": 8.698372822447931, "learning_rate": 3.9192182494159196e-06, "loss": 1.1095, "step": 4642 }, { "epoch": 0.6573228569406102, "grad_norm": 8.184297590556032, "learning_rate": 3.918746328786665e-06, "loss": 1.186, "step": 4643 }, { "epoch": 0.6574644298152474, "grad_norm": 7.761752907495928, "learning_rate": 3.918274333574972e-06, "loss": 1.0639, "step": 4644 }, { "epoch": 0.6576060026898847, "grad_norm": 8.122919373773941, "learning_rate": 3.9178022638056565e-06, "loss": 1.1491, "step": 4645 }, { "epoch": 0.6577475755645218, "grad_norm": 7.810371780436115, "learning_rate": 3.9173301195035326e-06, "loss": 1.1113, "step": 4646 }, { "epoch": 0.657889148439159, "grad_norm": 10.507095482623019, "learning_rate": 3.916857900693421e-06, "loss": 1.1018, "step": 4647 }, { "epoch": 0.6580307213137963, "grad_norm": 7.337541008855802, "learning_rate": 3.916385607400146e-06, "loss": 1.1561, "step": 4648 }, { "epoch": 0.6581722941884335, "grad_norm": 6.76143073973204, "learning_rate": 3.915913239648535e-06, "loss": 0.9705, "step": 4649 }, { "epoch": 0.6583138670630707, "grad_norm": 8.134150590120267, "learning_rate": 3.915440797463422e-06, "loss": 1.0862, "step": 4650 }, { "epoch": 0.6584554399377079, "grad_norm": 7.816097560355682, "learning_rate": 3.914968280869642e-06, "loss": 0.9738, "step": 4651 }, { "epoch": 0.6585970128123452, "grad_norm": 8.643549777654378, "learning_rate": 3.9144956898920336e-06, "loss": 1.2058, "step": 4652 }, { "epoch": 0.6587385856869824, "grad_norm": 9.26971067567603, "learning_rate": 3.914023024555441e-06, "loss": 1.1286, "step": 4653 }, { "epoch": 0.6588801585616196, "grad_norm": 7.7136886002281075, "learning_rate": 3.913550284884714e-06, "loss": 1.1396, "step": 4654 }, { "epoch": 0.6590217314362569, "grad_norm": 7.4045624560817656, "learning_rate": 3.913077470904701e-06, "loss": 1.0711, "step": 4655 }, { "epoch": 0.659163304310894, "grad_norm": 7.326994005457208, "learning_rate": 3.912604582640259e-06, "loss": 0.9717, "step": 4656 }, { "epoch": 0.6593048771855312, "grad_norm": 9.494665555522769, "learning_rate": 3.912131620116249e-06, "loss": 1.0579, "step": 4657 }, { "epoch": 0.6594464500601684, "grad_norm": 8.120649845822502, "learning_rate": 3.9116585833575305e-06, "loss": 1.0356, "step": 4658 }, { "epoch": 0.6595880229348057, "grad_norm": 7.96737742851303, "learning_rate": 3.911185472388974e-06, "loss": 1.1967, "step": 4659 }, { "epoch": 0.6597295958094429, "grad_norm": 7.859260694760856, "learning_rate": 3.91071228723545e-06, "loss": 1.1311, "step": 4660 }, { "epoch": 0.6598711686840801, "grad_norm": 10.425676238816411, "learning_rate": 3.9102390279218315e-06, "loss": 1.1979, "step": 4661 }, { "epoch": 0.6600127415587174, "grad_norm": 10.02027363843236, "learning_rate": 3.909765694473e-06, "loss": 0.9958, "step": 4662 }, { "epoch": 0.6601543144333546, "grad_norm": 8.473137989128496, "learning_rate": 3.909292286913836e-06, "loss": 1.0823, "step": 4663 }, { "epoch": 0.6602958873079918, "grad_norm": 9.02228161563126, "learning_rate": 3.908818805269229e-06, "loss": 1.0971, "step": 4664 }, { "epoch": 0.660437460182629, "grad_norm": 8.695610377169793, "learning_rate": 3.908345249564066e-06, "loss": 1.2205, "step": 4665 }, { "epoch": 0.6605790330572662, "grad_norm": 9.523205573596144, "learning_rate": 3.907871619823244e-06, "loss": 1.0747, "step": 4666 }, { "epoch": 0.6607206059319034, "grad_norm": 9.335400988114367, "learning_rate": 3.907397916071661e-06, "loss": 1.3447, "step": 4667 }, { "epoch": 0.6608621788065406, "grad_norm": 9.366274829782208, "learning_rate": 3.90692413833422e-06, "loss": 1.1007, "step": 4668 }, { "epoch": 0.6610037516811779, "grad_norm": 9.153006854846296, "learning_rate": 3.906450286635824e-06, "loss": 1.1178, "step": 4669 }, { "epoch": 0.6611453245558151, "grad_norm": 7.942449033079558, "learning_rate": 3.905976361001385e-06, "loss": 1.1205, "step": 4670 }, { "epoch": 0.6612868974304523, "grad_norm": 8.691322418837284, "learning_rate": 3.905502361455819e-06, "loss": 1.0372, "step": 4671 }, { "epoch": 0.6614284703050896, "grad_norm": 8.45482839742951, "learning_rate": 3.9050282880240405e-06, "loss": 1.0399, "step": 4672 }, { "epoch": 0.6615700431797268, "grad_norm": 9.136631489402674, "learning_rate": 3.904554140730973e-06, "loss": 1.1255, "step": 4673 }, { "epoch": 0.661711616054364, "grad_norm": 7.408957457311255, "learning_rate": 3.904079919601543e-06, "loss": 0.9265, "step": 4674 }, { "epoch": 0.6618531889290012, "grad_norm": 7.405490755848726, "learning_rate": 3.903605624660676e-06, "loss": 1.0235, "step": 4675 }, { "epoch": 0.6619947618036385, "grad_norm": 9.191705480812367, "learning_rate": 3.903131255933309e-06, "loss": 1.2201, "step": 4676 }, { "epoch": 0.6621363346782756, "grad_norm": 8.714977254832402, "learning_rate": 3.902656813444378e-06, "loss": 1.277, "step": 4677 }, { "epoch": 0.6622779075529128, "grad_norm": 8.76339916453611, "learning_rate": 3.902182297218824e-06, "loss": 1.1077, "step": 4678 }, { "epoch": 0.6624194804275501, "grad_norm": 10.035626463900957, "learning_rate": 3.901707707281592e-06, "loss": 1.0135, "step": 4679 }, { "epoch": 0.6625610533021873, "grad_norm": 10.450849390873579, "learning_rate": 3.901233043657632e-06, "loss": 1.0097, "step": 4680 }, { "epoch": 0.6627026261768245, "grad_norm": 8.826901187297414, "learning_rate": 3.900758306371895e-06, "loss": 1.1019, "step": 4681 }, { "epoch": 0.6628441990514617, "grad_norm": 6.772621396012807, "learning_rate": 3.900283495449339e-06, "loss": 0.971, "step": 4682 }, { "epoch": 0.662985771926099, "grad_norm": 9.570355149777415, "learning_rate": 3.899808610914923e-06, "loss": 1.3069, "step": 4683 }, { "epoch": 0.6631273448007362, "grad_norm": 7.8908645442445176, "learning_rate": 3.899333652793612e-06, "loss": 0.9989, "step": 4684 }, { "epoch": 0.6632689176753734, "grad_norm": 9.88226205853471, "learning_rate": 3.898858621110374e-06, "loss": 1.2244, "step": 4685 }, { "epoch": 0.6634104905500107, "grad_norm": 9.058207580573207, "learning_rate": 3.898383515890182e-06, "loss": 1.0709, "step": 4686 }, { "epoch": 0.6635520634246478, "grad_norm": 7.651075459598903, "learning_rate": 3.89790833715801e-06, "loss": 1.1767, "step": 4687 }, { "epoch": 0.663693636299285, "grad_norm": 10.639066114241565, "learning_rate": 3.897433084938841e-06, "loss": 1.246, "step": 4688 }, { "epoch": 0.6638352091739222, "grad_norm": 9.417109611243557, "learning_rate": 3.8969577592576555e-06, "loss": 1.1443, "step": 4689 }, { "epoch": 0.6639767820485595, "grad_norm": 10.390648496214036, "learning_rate": 3.896482360139443e-06, "loss": 0.9974, "step": 4690 }, { "epoch": 0.6641183549231967, "grad_norm": 9.06215061961227, "learning_rate": 3.896006887609193e-06, "loss": 1.1168, "step": 4691 }, { "epoch": 0.6642599277978339, "grad_norm": 9.260784563810168, "learning_rate": 3.8955313416919026e-06, "loss": 1.3047, "step": 4692 }, { "epoch": 0.6644015006724712, "grad_norm": 9.554783718469942, "learning_rate": 3.89505572241257e-06, "loss": 1.1068, "step": 4693 }, { "epoch": 0.6645430735471084, "grad_norm": 9.000757821284932, "learning_rate": 3.894580029796198e-06, "loss": 1.1215, "step": 4694 }, { "epoch": 0.6646846464217456, "grad_norm": 9.500057019514326, "learning_rate": 3.894104263867794e-06, "loss": 1.1501, "step": 4695 }, { "epoch": 0.6648262192963829, "grad_norm": 9.350481447278447, "learning_rate": 3.893628424652368e-06, "loss": 1.0922, "step": 4696 }, { "epoch": 0.66496779217102, "grad_norm": 8.224914791512234, "learning_rate": 3.893152512174935e-06, "loss": 1.133, "step": 4697 }, { "epoch": 0.6651093650456572, "grad_norm": 9.264611328668304, "learning_rate": 3.892676526460513e-06, "loss": 1.0232, "step": 4698 }, { "epoch": 0.6652509379202944, "grad_norm": 9.575714355593627, "learning_rate": 3.8922004675341244e-06, "loss": 1.3603, "step": 4699 }, { "epoch": 0.6653925107949317, "grad_norm": 9.563820741486792, "learning_rate": 3.891724335420796e-06, "loss": 1.1566, "step": 4700 }, { "epoch": 0.6655340836695689, "grad_norm": 11.46974514585819, "learning_rate": 3.891248130145556e-06, "loss": 1.0704, "step": 4701 }, { "epoch": 0.6656756565442061, "grad_norm": 9.489744573759365, "learning_rate": 3.8907718517334405e-06, "loss": 1.0818, "step": 4702 }, { "epoch": 0.6658172294188434, "grad_norm": 9.244724728337474, "learning_rate": 3.890295500209485e-06, "loss": 0.9451, "step": 4703 }, { "epoch": 0.6659588022934806, "grad_norm": 10.4155455532758, "learning_rate": 3.8898190755987314e-06, "loss": 1.1608, "step": 4704 }, { "epoch": 0.6661003751681178, "grad_norm": 10.927090470584735, "learning_rate": 3.889342577926225e-06, "loss": 1.1445, "step": 4705 }, { "epoch": 0.666241948042755, "grad_norm": 9.78657050658061, "learning_rate": 3.888866007217017e-06, "loss": 1.2347, "step": 4706 }, { "epoch": 0.6663835209173923, "grad_norm": 8.105485222121816, "learning_rate": 3.888389363496157e-06, "loss": 1.1776, "step": 4707 }, { "epoch": 0.6665250937920294, "grad_norm": 10.719486667242032, "learning_rate": 3.887912646788704e-06, "loss": 1.0673, "step": 4708 }, { "epoch": 0.6666666666666666, "grad_norm": 9.377136800755235, "learning_rate": 3.8874358571197164e-06, "loss": 1.1834, "step": 4709 }, { "epoch": 0.6668082395413039, "grad_norm": 7.917273273985855, "learning_rate": 3.886958994514263e-06, "loss": 1.0045, "step": 4710 }, { "epoch": 0.6669498124159411, "grad_norm": 8.77487135572769, "learning_rate": 3.8864820589974075e-06, "loss": 1.0233, "step": 4711 }, { "epoch": 0.6670913852905783, "grad_norm": 6.4069716628585045, "learning_rate": 3.886005050594225e-06, "loss": 0.9902, "step": 4712 }, { "epoch": 0.6672329581652156, "grad_norm": 8.116092905474467, "learning_rate": 3.88552796932979e-06, "loss": 1.0342, "step": 4713 }, { "epoch": 0.6673745310398528, "grad_norm": 9.87185271122642, "learning_rate": 3.885050815229182e-06, "loss": 1.1945, "step": 4714 }, { "epoch": 0.66751610391449, "grad_norm": 9.653079601908463, "learning_rate": 3.884573588317486e-06, "loss": 1.1121, "step": 4715 }, { "epoch": 0.6676576767891272, "grad_norm": 9.450376505138152, "learning_rate": 3.88409628861979e-06, "loss": 1.0107, "step": 4716 }, { "epoch": 0.6677992496637645, "grad_norm": 10.09615446097683, "learning_rate": 3.883618916161183e-06, "loss": 1.2966, "step": 4717 }, { "epoch": 0.6679408225384016, "grad_norm": 8.602873796646977, "learning_rate": 3.883141470966761e-06, "loss": 1.0299, "step": 4718 }, { "epoch": 0.6680823954130388, "grad_norm": 8.938063383685598, "learning_rate": 3.8826639530616235e-06, "loss": 1.1468, "step": 4719 }, { "epoch": 0.668223968287676, "grad_norm": 6.570896553633658, "learning_rate": 3.8821863624708725e-06, "loss": 1.0734, "step": 4720 }, { "epoch": 0.6683655411623133, "grad_norm": 9.230896476944501, "learning_rate": 3.881708699219616e-06, "loss": 1.0848, "step": 4721 }, { "epoch": 0.6685071140369505, "grad_norm": 7.991457908643155, "learning_rate": 3.881230963332963e-06, "loss": 1.0446, "step": 4722 }, { "epoch": 0.6686486869115877, "grad_norm": 7.207324742076226, "learning_rate": 3.880753154836028e-06, "loss": 1.0346, "step": 4723 }, { "epoch": 0.668790259786225, "grad_norm": 9.625369374507304, "learning_rate": 3.880275273753929e-06, "loss": 1.1944, "step": 4724 }, { "epoch": 0.6689318326608622, "grad_norm": 9.25977077455826, "learning_rate": 3.879797320111788e-06, "loss": 1.1732, "step": 4725 }, { "epoch": 0.6690734055354994, "grad_norm": 7.85443238905886, "learning_rate": 3.879319293934732e-06, "loss": 1.1645, "step": 4726 }, { "epoch": 0.6692149784101367, "grad_norm": 9.143848757102965, "learning_rate": 3.878841195247888e-06, "loss": 1.1468, "step": 4727 }, { "epoch": 0.6693565512847738, "grad_norm": 10.814141474141806, "learning_rate": 3.87836302407639e-06, "loss": 1.1165, "step": 4728 }, { "epoch": 0.669498124159411, "grad_norm": 9.640756368128455, "learning_rate": 3.877884780445377e-06, "loss": 1.2162, "step": 4729 }, { "epoch": 0.6696396970340482, "grad_norm": 7.042954125920757, "learning_rate": 3.877406464379987e-06, "loss": 0.9623, "step": 4730 }, { "epoch": 0.6697812699086855, "grad_norm": 8.456937878381785, "learning_rate": 3.876928075905368e-06, "loss": 1.0962, "step": 4731 }, { "epoch": 0.6699228427833227, "grad_norm": 10.763107824939297, "learning_rate": 3.876449615046665e-06, "loss": 1.1912, "step": 4732 }, { "epoch": 0.6700644156579599, "grad_norm": 9.181299725443969, "learning_rate": 3.875971081829033e-06, "loss": 1.2129, "step": 4733 }, { "epoch": 0.6702059885325972, "grad_norm": 8.518009181006724, "learning_rate": 3.875492476277627e-06, "loss": 1.1236, "step": 4734 }, { "epoch": 0.6703475614072344, "grad_norm": 8.711936339174663, "learning_rate": 3.875013798417606e-06, "loss": 1.1436, "step": 4735 }, { "epoch": 0.6704891342818716, "grad_norm": 8.627367220011315, "learning_rate": 3.874535048274136e-06, "loss": 0.956, "step": 4736 }, { "epoch": 0.6706307071565089, "grad_norm": 7.54717877963271, "learning_rate": 3.8740562258723845e-06, "loss": 1.0182, "step": 4737 }, { "epoch": 0.6707722800311461, "grad_norm": 9.188156415884196, "learning_rate": 3.87357733123752e-06, "loss": 1.2544, "step": 4738 }, { "epoch": 0.6709138529057832, "grad_norm": 9.693172639811761, "learning_rate": 3.87309836439472e-06, "loss": 1.1277, "step": 4739 }, { "epoch": 0.6710554257804204, "grad_norm": 9.645202044817493, "learning_rate": 3.872619325369162e-06, "loss": 1.0936, "step": 4740 }, { "epoch": 0.6711969986550577, "grad_norm": 8.460083979196577, "learning_rate": 3.872140214186031e-06, "loss": 0.9064, "step": 4741 }, { "epoch": 0.6713385715296949, "grad_norm": 8.312841422734543, "learning_rate": 3.871661030870512e-06, "loss": 1.0934, "step": 4742 }, { "epoch": 0.6714801444043321, "grad_norm": 8.964745029412873, "learning_rate": 3.871181775447794e-06, "loss": 1.1419, "step": 4743 }, { "epoch": 0.6716217172789694, "grad_norm": 8.23924201293747, "learning_rate": 3.870702447943073e-06, "loss": 1.2373, "step": 4744 }, { "epoch": 0.6717632901536066, "grad_norm": 7.819726150283633, "learning_rate": 3.870223048381546e-06, "loss": 1.0077, "step": 4745 }, { "epoch": 0.6719048630282438, "grad_norm": 6.638293191244261, "learning_rate": 3.869743576788416e-06, "loss": 0.9905, "step": 4746 }, { "epoch": 0.672046435902881, "grad_norm": 7.960168863163528, "learning_rate": 3.869264033188887e-06, "loss": 1.0802, "step": 4747 }, { "epoch": 0.6721880087775183, "grad_norm": 8.550262393605925, "learning_rate": 3.868784417608169e-06, "loss": 0.9385, "step": 4748 }, { "epoch": 0.6723295816521554, "grad_norm": 7.552966200864167, "learning_rate": 3.868304730071475e-06, "loss": 1.0115, "step": 4749 }, { "epoch": 0.6724711545267926, "grad_norm": 9.220062472779299, "learning_rate": 3.86782497060402e-06, "loss": 1.1036, "step": 4750 }, { "epoch": 0.6726127274014299, "grad_norm": 9.771340905340418, "learning_rate": 3.867345139231028e-06, "loss": 1.1681, "step": 4751 }, { "epoch": 0.6727543002760671, "grad_norm": 9.426007884241827, "learning_rate": 3.86686523597772e-06, "loss": 1.2268, "step": 4752 }, { "epoch": 0.6728958731507043, "grad_norm": 8.43063278484077, "learning_rate": 3.866385260869327e-06, "loss": 1.1205, "step": 4753 }, { "epoch": 0.6730374460253415, "grad_norm": 8.439735851041284, "learning_rate": 3.86590521393108e-06, "loss": 1.0024, "step": 4754 }, { "epoch": 0.6731790188999788, "grad_norm": 7.475153902950944, "learning_rate": 3.865425095188214e-06, "loss": 0.9605, "step": 4755 }, { "epoch": 0.673320591774616, "grad_norm": 6.991440035117832, "learning_rate": 3.864944904665967e-06, "loss": 1.1105, "step": 4756 }, { "epoch": 0.6734621646492532, "grad_norm": 9.790376835273511, "learning_rate": 3.864464642389586e-06, "loss": 1.144, "step": 4757 }, { "epoch": 0.6736037375238905, "grad_norm": 9.172854421593428, "learning_rate": 3.863984308384317e-06, "loss": 0.9729, "step": 4758 }, { "epoch": 0.6737453103985277, "grad_norm": 8.632878782268168, "learning_rate": 3.8635039026754075e-06, "loss": 1.0163, "step": 4759 }, { "epoch": 0.6738868832731648, "grad_norm": 9.155136301056253, "learning_rate": 3.863023425288116e-06, "loss": 1.1526, "step": 4760 }, { "epoch": 0.674028456147802, "grad_norm": 8.600266527880745, "learning_rate": 3.862542876247699e-06, "loss": 1.079, "step": 4761 }, { "epoch": 0.6741700290224393, "grad_norm": 9.352960748119248, "learning_rate": 3.862062255579419e-06, "loss": 1.2278, "step": 4762 }, { "epoch": 0.6743116018970765, "grad_norm": 8.140770264718793, "learning_rate": 3.861581563308542e-06, "loss": 1.0872, "step": 4763 }, { "epoch": 0.6744531747717137, "grad_norm": 11.11023066105276, "learning_rate": 3.861100799460336e-06, "loss": 1.083, "step": 4764 }, { "epoch": 0.674594747646351, "grad_norm": 8.706551060670943, "learning_rate": 3.860619964060078e-06, "loss": 1.0058, "step": 4765 }, { "epoch": 0.6747363205209882, "grad_norm": 8.955586519442853, "learning_rate": 3.860139057133042e-06, "loss": 1.1573, "step": 4766 }, { "epoch": 0.6748778933956254, "grad_norm": 8.10814667735506, "learning_rate": 3.85965807870451e-06, "loss": 1.2137, "step": 4767 }, { "epoch": 0.6750194662702627, "grad_norm": 11.008646254576375, "learning_rate": 3.859177028799766e-06, "loss": 1.1503, "step": 4768 }, { "epoch": 0.6751610391448999, "grad_norm": 8.359233077796418, "learning_rate": 3.858695907444101e-06, "loss": 1.1752, "step": 4769 }, { "epoch": 0.675302612019537, "grad_norm": 7.473927510870315, "learning_rate": 3.858214714662804e-06, "loss": 1.1304, "step": 4770 }, { "epoch": 0.6754441848941742, "grad_norm": 7.636521686635807, "learning_rate": 3.857733450481172e-06, "loss": 1.1864, "step": 4771 }, { "epoch": 0.6755857577688115, "grad_norm": 8.640706674144962, "learning_rate": 3.857252114924504e-06, "loss": 1.0738, "step": 4772 }, { "epoch": 0.6757273306434487, "grad_norm": 8.523987525934395, "learning_rate": 3.8567707080181054e-06, "loss": 1.1283, "step": 4773 }, { "epoch": 0.6758689035180859, "grad_norm": 8.305404962246818, "learning_rate": 3.856289229787283e-06, "loss": 1.1578, "step": 4774 }, { "epoch": 0.6760104763927232, "grad_norm": 8.412013917571349, "learning_rate": 3.855807680257347e-06, "loss": 1.1477, "step": 4775 }, { "epoch": 0.6761520492673604, "grad_norm": 8.512137778665727, "learning_rate": 3.85532605945361e-06, "loss": 1.0887, "step": 4776 }, { "epoch": 0.6762936221419976, "grad_norm": 8.278522546135573, "learning_rate": 3.854844367401395e-06, "loss": 1.0213, "step": 4777 }, { "epoch": 0.6764351950166348, "grad_norm": 8.010165907990618, "learning_rate": 3.854362604126021e-06, "loss": 0.9886, "step": 4778 }, { "epoch": 0.6765767678912721, "grad_norm": 9.633370858969633, "learning_rate": 3.853880769652815e-06, "loss": 1.1345, "step": 4779 }, { "epoch": 0.6767183407659092, "grad_norm": 9.328976795149273, "learning_rate": 3.853398864007105e-06, "loss": 1.1698, "step": 4780 }, { "epoch": 0.6768599136405464, "grad_norm": 8.846641687828892, "learning_rate": 3.852916887214227e-06, "loss": 1.1331, "step": 4781 }, { "epoch": 0.6770014865151837, "grad_norm": 7.1790166796756045, "learning_rate": 3.852434839299517e-06, "loss": 1.0213, "step": 4782 }, { "epoch": 0.6771430593898209, "grad_norm": 9.394922272917437, "learning_rate": 3.851952720288316e-06, "loss": 1.0912, "step": 4783 }, { "epoch": 0.6772846322644581, "grad_norm": 10.151767715838837, "learning_rate": 3.851470530205969e-06, "loss": 1.2276, "step": 4784 }, { "epoch": 0.6774262051390953, "grad_norm": 7.883914080379532, "learning_rate": 3.8509882690778234e-06, "loss": 1.119, "step": 4785 }, { "epoch": 0.6775677780137326, "grad_norm": 7.509442552380953, "learning_rate": 3.850505936929232e-06, "loss": 0.9439, "step": 4786 }, { "epoch": 0.6777093508883698, "grad_norm": 8.903832251326708, "learning_rate": 3.8500235337855495e-06, "loss": 1.2259, "step": 4787 }, { "epoch": 0.677850923763007, "grad_norm": 8.24210048828479, "learning_rate": 3.849541059672137e-06, "loss": 1.0467, "step": 4788 }, { "epoch": 0.6779924966376443, "grad_norm": 9.629532576612583, "learning_rate": 3.8490585146143574e-06, "loss": 1.0536, "step": 4789 }, { "epoch": 0.6781340695122815, "grad_norm": 8.349887376585396, "learning_rate": 3.848575898637579e-06, "loss": 1.1109, "step": 4790 }, { "epoch": 0.6782756423869186, "grad_norm": 8.21478802584524, "learning_rate": 3.84809321176717e-06, "loss": 1.2404, "step": 4791 }, { "epoch": 0.6784172152615559, "grad_norm": 9.943867880400038, "learning_rate": 3.8476104540285054e-06, "loss": 1.183, "step": 4792 }, { "epoch": 0.6785587881361931, "grad_norm": 9.899663715238244, "learning_rate": 3.847127625446964e-06, "loss": 1.1007, "step": 4793 }, { "epoch": 0.6787003610108303, "grad_norm": 9.20613833772129, "learning_rate": 3.846644726047928e-06, "loss": 1.1412, "step": 4794 }, { "epoch": 0.6788419338854675, "grad_norm": 8.445907360091864, "learning_rate": 3.846161755856784e-06, "loss": 1.2098, "step": 4795 }, { "epoch": 0.6789835067601048, "grad_norm": 8.170973077020301, "learning_rate": 3.84567871489892e-06, "loss": 1.1198, "step": 4796 }, { "epoch": 0.679125079634742, "grad_norm": 8.036912636185594, "learning_rate": 3.845195603199728e-06, "loss": 1.1692, "step": 4797 }, { "epoch": 0.6792666525093792, "grad_norm": 8.071360842183093, "learning_rate": 3.844712420784607e-06, "loss": 1.0426, "step": 4798 }, { "epoch": 0.6794082253840165, "grad_norm": 8.166216727597293, "learning_rate": 3.844229167678957e-06, "loss": 1.0426, "step": 4799 }, { "epoch": 0.6795497982586537, "grad_norm": 9.377821434164453, "learning_rate": 3.843745843908181e-06, "loss": 1.1291, "step": 4800 }, { "epoch": 0.6796913711332908, "grad_norm": 9.02250739253992, "learning_rate": 3.843262449497689e-06, "loss": 1.0097, "step": 4801 }, { "epoch": 0.679832944007928, "grad_norm": 7.358778619595564, "learning_rate": 3.842778984472891e-06, "loss": 1.1275, "step": 4802 }, { "epoch": 0.6799745168825653, "grad_norm": 8.388591612319749, "learning_rate": 3.842295448859203e-06, "loss": 1.2084, "step": 4803 }, { "epoch": 0.6801160897572025, "grad_norm": 8.622509582992011, "learning_rate": 3.841811842682044e-06, "loss": 1.1292, "step": 4804 }, { "epoch": 0.6802576626318397, "grad_norm": 7.6221968704459435, "learning_rate": 3.841328165966837e-06, "loss": 1.0921, "step": 4805 }, { "epoch": 0.680399235506477, "grad_norm": 8.493045430157014, "learning_rate": 3.84084441873901e-06, "loss": 1.1625, "step": 4806 }, { "epoch": 0.6805408083811142, "grad_norm": 7.924057278082857, "learning_rate": 3.840360601023989e-06, "loss": 1.2214, "step": 4807 }, { "epoch": 0.6806823812557514, "grad_norm": 8.734433960417052, "learning_rate": 3.839876712847211e-06, "loss": 1.1974, "step": 4808 }, { "epoch": 0.6808239541303887, "grad_norm": 10.773907712726324, "learning_rate": 3.839392754234115e-06, "loss": 1.1751, "step": 4809 }, { "epoch": 0.6809655270050259, "grad_norm": 9.102256545425975, "learning_rate": 3.8389087252101395e-06, "loss": 1.0731, "step": 4810 }, { "epoch": 0.681107099879663, "grad_norm": 9.65543616362128, "learning_rate": 3.838424625800732e-06, "loss": 1.2076, "step": 4811 }, { "epoch": 0.6812486727543002, "grad_norm": 7.822158580338105, "learning_rate": 3.837940456031338e-06, "loss": 1.1092, "step": 4812 }, { "epoch": 0.6813902456289375, "grad_norm": 8.841581799971424, "learning_rate": 3.837456215927413e-06, "loss": 1.2099, "step": 4813 }, { "epoch": 0.6815318185035747, "grad_norm": 9.893155947771852, "learning_rate": 3.8369719055144115e-06, "loss": 1.211, "step": 4814 }, { "epoch": 0.6816733913782119, "grad_norm": 8.22394586086636, "learning_rate": 3.836487524817794e-06, "loss": 1.2056, "step": 4815 }, { "epoch": 0.6818149642528492, "grad_norm": 8.548834149029748, "learning_rate": 3.836003073863024e-06, "loss": 1.2335, "step": 4816 }, { "epoch": 0.6819565371274864, "grad_norm": 7.244629811266751, "learning_rate": 3.8355185526755676e-06, "loss": 1.0268, "step": 4817 }, { "epoch": 0.6820981100021236, "grad_norm": 9.409332480028887, "learning_rate": 3.835033961280898e-06, "loss": 1.1199, "step": 4818 }, { "epoch": 0.6822396828767608, "grad_norm": 7.648982619906317, "learning_rate": 3.834549299704487e-06, "loss": 1.0811, "step": 4819 }, { "epoch": 0.6823812557513981, "grad_norm": 8.53549725791892, "learning_rate": 3.8340645679718155e-06, "loss": 1.0916, "step": 4820 }, { "epoch": 0.6825228286260353, "grad_norm": 8.461258055302295, "learning_rate": 3.833579766108365e-06, "loss": 1.11, "step": 4821 }, { "epoch": 0.6826644015006724, "grad_norm": 6.733217073450337, "learning_rate": 3.83309489413962e-06, "loss": 0.9908, "step": 4822 }, { "epoch": 0.6828059743753097, "grad_norm": 7.937379610845756, "learning_rate": 3.83260995209107e-06, "loss": 1.0872, "step": 4823 }, { "epoch": 0.6829475472499469, "grad_norm": 8.538277997471702, "learning_rate": 3.832124939988208e-06, "loss": 1.0791, "step": 4824 }, { "epoch": 0.6830891201245841, "grad_norm": 9.844588955444516, "learning_rate": 3.831639857856532e-06, "loss": 1.0699, "step": 4825 }, { "epoch": 0.6832306929992213, "grad_norm": 8.684236421196644, "learning_rate": 3.831154705721542e-06, "loss": 1.0721, "step": 4826 }, { "epoch": 0.6833722658738586, "grad_norm": 10.056018708617561, "learning_rate": 3.830669483608741e-06, "loss": 1.2255, "step": 4827 }, { "epoch": 0.6835138387484958, "grad_norm": 10.349048928317188, "learning_rate": 3.830184191543638e-06, "loss": 1.2033, "step": 4828 }, { "epoch": 0.683655411623133, "grad_norm": 8.838879217844122, "learning_rate": 3.829698829551743e-06, "loss": 1.1447, "step": 4829 }, { "epoch": 0.6837969844977703, "grad_norm": 7.989481448270336, "learning_rate": 3.829213397658572e-06, "loss": 1.2007, "step": 4830 }, { "epoch": 0.6839385573724075, "grad_norm": 9.059881845002735, "learning_rate": 3.828727895889644e-06, "loss": 1.0979, "step": 4831 }, { "epoch": 0.6840801302470446, "grad_norm": 8.19932156989485, "learning_rate": 3.828242324270482e-06, "loss": 1.1942, "step": 4832 }, { "epoch": 0.6842217031216818, "grad_norm": 9.346557603636, "learning_rate": 3.82775668282661e-06, "loss": 1.1049, "step": 4833 }, { "epoch": 0.6843632759963191, "grad_norm": 9.09575575898718, "learning_rate": 3.827270971583561e-06, "loss": 1.0488, "step": 4834 }, { "epoch": 0.6845048488709563, "grad_norm": 9.747237376539537, "learning_rate": 3.826785190566865e-06, "loss": 0.9938, "step": 4835 }, { "epoch": 0.6846464217455935, "grad_norm": 8.54446940766342, "learning_rate": 3.826299339802062e-06, "loss": 1.0234, "step": 4836 }, { "epoch": 0.6847879946202308, "grad_norm": 7.397664562482129, "learning_rate": 3.825813419314691e-06, "loss": 1.0524, "step": 4837 }, { "epoch": 0.684929567494868, "grad_norm": 10.590850883499561, "learning_rate": 3.825327429130297e-06, "loss": 1.1921, "step": 4838 }, { "epoch": 0.6850711403695052, "grad_norm": 10.233073330867999, "learning_rate": 3.824841369274429e-06, "loss": 1.0568, "step": 4839 }, { "epoch": 0.6852127132441425, "grad_norm": 10.304373203484976, "learning_rate": 3.824355239772637e-06, "loss": 1.1652, "step": 4840 }, { "epoch": 0.6853542861187797, "grad_norm": 7.9029893748093585, "learning_rate": 3.823869040650478e-06, "loss": 1.1438, "step": 4841 }, { "epoch": 0.6854958589934168, "grad_norm": 8.593843216823982, "learning_rate": 3.823382771933512e-06, "loss": 1.0739, "step": 4842 }, { "epoch": 0.685637431868054, "grad_norm": 8.529795763761543, "learning_rate": 3.822896433647299e-06, "loss": 1.0454, "step": 4843 }, { "epoch": 0.6857790047426913, "grad_norm": 8.839722919523288, "learning_rate": 3.8224100258174066e-06, "loss": 0.9791, "step": 4844 }, { "epoch": 0.6859205776173285, "grad_norm": 10.289703140647754, "learning_rate": 3.821923548469405e-06, "loss": 1.159, "step": 4845 }, { "epoch": 0.6860621504919657, "grad_norm": 9.777278593595323, "learning_rate": 3.82143700162887e-06, "loss": 1.2481, "step": 4846 }, { "epoch": 0.686203723366603, "grad_norm": 7.174209381034256, "learning_rate": 3.820950385321375e-06, "loss": 1.0709, "step": 4847 }, { "epoch": 0.6863452962412402, "grad_norm": 7.422848157827959, "learning_rate": 3.820463699572505e-06, "loss": 0.9769, "step": 4848 }, { "epoch": 0.6864868691158774, "grad_norm": 8.192871661214333, "learning_rate": 3.819976944407841e-06, "loss": 1.0328, "step": 4849 }, { "epoch": 0.6866284419905146, "grad_norm": 8.878315480923654, "learning_rate": 3.819490119852975e-06, "loss": 1.1219, "step": 4850 }, { "epoch": 0.6867700148651519, "grad_norm": 9.03628220326971, "learning_rate": 3.819003225933497e-06, "loss": 1.1772, "step": 4851 }, { "epoch": 0.6869115877397891, "grad_norm": 8.82928253957105, "learning_rate": 3.818516262675001e-06, "loss": 1.0793, "step": 4852 }, { "epoch": 0.6870531606144262, "grad_norm": 7.844365190796641, "learning_rate": 3.81802923010309e-06, "loss": 1.0044, "step": 4853 }, { "epoch": 0.6871947334890635, "grad_norm": 7.464703473220838, "learning_rate": 3.817542128243365e-06, "loss": 1.0927, "step": 4854 }, { "epoch": 0.6873363063637007, "grad_norm": 7.771925889459446, "learning_rate": 3.817054957121432e-06, "loss": 1.1207, "step": 4855 }, { "epoch": 0.6874778792383379, "grad_norm": 7.539301576924899, "learning_rate": 3.8165677167629025e-06, "loss": 0.9364, "step": 4856 }, { "epoch": 0.6876194521129751, "grad_norm": 7.292999349242851, "learning_rate": 3.81608040719339e-06, "loss": 1.0794, "step": 4857 }, { "epoch": 0.6877610249876124, "grad_norm": 9.761883267546239, "learning_rate": 3.8155930284385116e-06, "loss": 1.4016, "step": 4858 }, { "epoch": 0.6879025978622496, "grad_norm": 8.318969503554765, "learning_rate": 3.815105580523888e-06, "loss": 1.1474, "step": 4859 }, { "epoch": 0.6880441707368868, "grad_norm": 10.161427466369913, "learning_rate": 3.814618063475145e-06, "loss": 1.064, "step": 4860 }, { "epoch": 0.6881857436115241, "grad_norm": 8.247423174160028, "learning_rate": 3.814130477317911e-06, "loss": 1.2272, "step": 4861 }, { "epoch": 0.6883273164861613, "grad_norm": 7.952011899866299, "learning_rate": 3.8136428220778177e-06, "loss": 1.0287, "step": 4862 }, { "epoch": 0.6884688893607984, "grad_norm": 8.564353129761173, "learning_rate": 3.8131550977805005e-06, "loss": 1.0998, "step": 4863 }, { "epoch": 0.6886104622354356, "grad_norm": 8.77341575993522, "learning_rate": 3.8126673044515993e-06, "loss": 1.2352, "step": 4864 }, { "epoch": 0.6887520351100729, "grad_norm": 8.016620536983423, "learning_rate": 3.812179442116756e-06, "loss": 1.1072, "step": 4865 }, { "epoch": 0.6888936079847101, "grad_norm": 8.072274367509783, "learning_rate": 3.811691510801618e-06, "loss": 1.0349, "step": 4866 }, { "epoch": 0.6890351808593473, "grad_norm": 8.811481389906465, "learning_rate": 3.8112035105318353e-06, "loss": 1.2745, "step": 4867 }, { "epoch": 0.6891767537339846, "grad_norm": 8.287469565219144, "learning_rate": 3.8107154413330616e-06, "loss": 1.1419, "step": 4868 }, { "epoch": 0.6893183266086218, "grad_norm": 8.853661041782289, "learning_rate": 3.8102273032309554e-06, "loss": 1.1568, "step": 4869 }, { "epoch": 0.689459899483259, "grad_norm": 9.475730412897475, "learning_rate": 3.809739096251176e-06, "loss": 1.0669, "step": 4870 }, { "epoch": 0.6896014723578963, "grad_norm": 7.305242343984606, "learning_rate": 3.809250820419389e-06, "loss": 1.0508, "step": 4871 }, { "epoch": 0.6897430452325335, "grad_norm": 9.402125877071674, "learning_rate": 3.808762475761263e-06, "loss": 1.1415, "step": 4872 }, { "epoch": 0.6898846181071706, "grad_norm": 7.458752952435732, "learning_rate": 3.808274062302469e-06, "loss": 1.0342, "step": 4873 }, { "epoch": 0.6900261909818078, "grad_norm": 8.01895661299728, "learning_rate": 3.807785580068683e-06, "loss": 1.06, "step": 4874 }, { "epoch": 0.6901677638564451, "grad_norm": 6.975423995499642, "learning_rate": 3.8072970290855843e-06, "loss": 1.0674, "step": 4875 }, { "epoch": 0.6903093367310823, "grad_norm": 9.100771994909021, "learning_rate": 3.8068084093788554e-06, "loss": 1.1837, "step": 4876 }, { "epoch": 0.6904509096057195, "grad_norm": 6.9609336638948465, "learning_rate": 3.806319720974183e-06, "loss": 1.1786, "step": 4877 }, { "epoch": 0.6905924824803568, "grad_norm": 11.114660365878398, "learning_rate": 3.8058309638972567e-06, "loss": 1.2427, "step": 4878 }, { "epoch": 0.690734055354994, "grad_norm": 7.995091362885036, "learning_rate": 3.805342138173771e-06, "loss": 1.1776, "step": 4879 }, { "epoch": 0.6908756282296312, "grad_norm": 7.10644148784166, "learning_rate": 3.8048532438294215e-06, "loss": 1.1696, "step": 4880 }, { "epoch": 0.6910172011042685, "grad_norm": 8.271397902071604, "learning_rate": 3.8043642808899106e-06, "loss": 1.0932, "step": 4881 }, { "epoch": 0.6911587739789057, "grad_norm": 9.674720653670704, "learning_rate": 3.8038752493809416e-06, "loss": 1.1385, "step": 4882 }, { "epoch": 0.6913003468535429, "grad_norm": 8.564534412028726, "learning_rate": 3.803386149328223e-06, "loss": 1.1291, "step": 4883 }, { "epoch": 0.69144191972818, "grad_norm": 14.964870770498967, "learning_rate": 3.8028969807574665e-06, "loss": 1.1192, "step": 4884 }, { "epoch": 0.6915834926028173, "grad_norm": 7.665119125525454, "learning_rate": 3.8024077436943875e-06, "loss": 1.1473, "step": 4885 }, { "epoch": 0.6917250654774545, "grad_norm": 8.867498101663616, "learning_rate": 3.8019184381647044e-06, "loss": 1.1385, "step": 4886 }, { "epoch": 0.6918666383520917, "grad_norm": 8.58979045856593, "learning_rate": 3.8014290641941392e-06, "loss": 1.2033, "step": 4887 }, { "epoch": 0.692008211226729, "grad_norm": 6.915552562777262, "learning_rate": 3.800939621808419e-06, "loss": 1.0466, "step": 4888 }, { "epoch": 0.6921497841013662, "grad_norm": 9.171579697386258, "learning_rate": 3.8004501110332726e-06, "loss": 1.2018, "step": 4889 }, { "epoch": 0.6922913569760034, "grad_norm": 7.640057564164045, "learning_rate": 3.799960531894434e-06, "loss": 1.1634, "step": 4890 }, { "epoch": 0.6924329298506406, "grad_norm": 8.565529837178875, "learning_rate": 3.7994708844176385e-06, "loss": 1.1633, "step": 4891 }, { "epoch": 0.6925745027252779, "grad_norm": 9.308953141011237, "learning_rate": 3.7989811686286283e-06, "loss": 1.1969, "step": 4892 }, { "epoch": 0.6927160755999151, "grad_norm": 10.342409352437125, "learning_rate": 3.7984913845531466e-06, "loss": 1.3083, "step": 4893 }, { "epoch": 0.6928576484745522, "grad_norm": 6.32037255408351, "learning_rate": 3.798001532216941e-06, "loss": 0.8954, "step": 4894 }, { "epoch": 0.6929992213491895, "grad_norm": 9.834889132597697, "learning_rate": 3.7975116116457626e-06, "loss": 1.0865, "step": 4895 }, { "epoch": 0.6931407942238267, "grad_norm": 9.922866507422192, "learning_rate": 3.7970216228653667e-06, "loss": 1.0855, "step": 4896 }, { "epoch": 0.6932823670984639, "grad_norm": 7.95828313628605, "learning_rate": 3.7965315659015108e-06, "loss": 1.1208, "step": 4897 }, { "epoch": 0.6934239399731011, "grad_norm": 8.623445232517144, "learning_rate": 3.7960414407799565e-06, "loss": 1.0864, "step": 4898 }, { "epoch": 0.6935655128477384, "grad_norm": 6.994330426250412, "learning_rate": 3.795551247526471e-06, "loss": 1.1132, "step": 4899 }, { "epoch": 0.6937070857223756, "grad_norm": 7.404773677687484, "learning_rate": 3.795060986166822e-06, "loss": 1.0228, "step": 4900 }, { "epoch": 0.6938486585970128, "grad_norm": 7.439398771624758, "learning_rate": 3.794570656726784e-06, "loss": 1.1589, "step": 4901 }, { "epoch": 0.6939902314716501, "grad_norm": 8.077632453703803, "learning_rate": 3.79408025923213e-06, "loss": 1.0707, "step": 4902 }, { "epoch": 0.6941318043462873, "grad_norm": 7.8282392725722545, "learning_rate": 3.793589793708642e-06, "loss": 1.1184, "step": 4903 }, { "epoch": 0.6942733772209245, "grad_norm": 8.85975089108327, "learning_rate": 3.7930992601821028e-06, "loss": 1.1327, "step": 4904 }, { "epoch": 0.6944149500955616, "grad_norm": 9.31784743831906, "learning_rate": 3.7926086586783008e-06, "loss": 1.256, "step": 4905 }, { "epoch": 0.6945565229701989, "grad_norm": 8.410067347416781, "learning_rate": 3.7921179892230246e-06, "loss": 1.1266, "step": 4906 }, { "epoch": 0.6946980958448361, "grad_norm": 7.182556044203554, "learning_rate": 3.7916272518420694e-06, "loss": 1.1838, "step": 4907 }, { "epoch": 0.6948396687194733, "grad_norm": 8.95531347666468, "learning_rate": 3.791136446561233e-06, "loss": 1.1337, "step": 4908 }, { "epoch": 0.6949812415941106, "grad_norm": 8.594131738680579, "learning_rate": 3.7906455734063156e-06, "loss": 1.0086, "step": 4909 }, { "epoch": 0.6951228144687478, "grad_norm": 8.862371782201254, "learning_rate": 3.7901546324031236e-06, "loss": 1.2083, "step": 4910 }, { "epoch": 0.695264387343385, "grad_norm": 8.521741997380692, "learning_rate": 3.7896636235774636e-06, "loss": 1.213, "step": 4911 }, { "epoch": 0.6954059602180223, "grad_norm": 9.140365831662379, "learning_rate": 3.789172546955149e-06, "loss": 1.0522, "step": 4912 }, { "epoch": 0.6955475330926595, "grad_norm": 10.887341391528217, "learning_rate": 3.7886814025619944e-06, "loss": 1.042, "step": 4913 }, { "epoch": 0.6956891059672967, "grad_norm": 8.838602569735908, "learning_rate": 3.7881901904238203e-06, "loss": 1.0975, "step": 4914 }, { "epoch": 0.6958306788419338, "grad_norm": 7.84109707532859, "learning_rate": 3.7876989105664476e-06, "loss": 0.9993, "step": 4915 }, { "epoch": 0.6959722517165711, "grad_norm": 8.81075491434443, "learning_rate": 3.7872075630157035e-06, "loss": 1.0707, "step": 4916 }, { "epoch": 0.6961138245912083, "grad_norm": 10.076833432258587, "learning_rate": 3.786716147797418e-06, "loss": 1.0328, "step": 4917 }, { "epoch": 0.6962553974658455, "grad_norm": 8.291609362942559, "learning_rate": 3.786224664937424e-06, "loss": 1.162, "step": 4918 }, { "epoch": 0.6963969703404828, "grad_norm": 9.426447377259898, "learning_rate": 3.7857331144615576e-06, "loss": 1.0966, "step": 4919 }, { "epoch": 0.69653854321512, "grad_norm": 7.712414075474814, "learning_rate": 3.785241496395661e-06, "loss": 1.1907, "step": 4920 }, { "epoch": 0.6966801160897572, "grad_norm": 8.9626312118566, "learning_rate": 3.7847498107655768e-06, "loss": 1.1098, "step": 4921 }, { "epoch": 0.6968216889643944, "grad_norm": 9.545813268881572, "learning_rate": 3.7842580575971533e-06, "loss": 1.0484, "step": 4922 }, { "epoch": 0.6969632618390317, "grad_norm": 7.346350416425694, "learning_rate": 3.783766236916241e-06, "loss": 1.1839, "step": 4923 }, { "epoch": 0.6971048347136689, "grad_norm": 8.187718046357032, "learning_rate": 3.7832743487486945e-06, "loss": 1.0983, "step": 4924 }, { "epoch": 0.697246407588306, "grad_norm": 7.4297622051153525, "learning_rate": 3.782782393120373e-06, "loss": 1.0588, "step": 4925 }, { "epoch": 0.6973879804629433, "grad_norm": 7.411734952252503, "learning_rate": 3.7822903700571372e-06, "loss": 1.141, "step": 4926 }, { "epoch": 0.6975295533375805, "grad_norm": 9.559595801455574, "learning_rate": 3.781798279584853e-06, "loss": 1.223, "step": 4927 }, { "epoch": 0.6976711262122177, "grad_norm": 9.843254730844318, "learning_rate": 3.7813061217293887e-06, "loss": 1.2506, "step": 4928 }, { "epoch": 0.697812699086855, "grad_norm": 9.820889330836346, "learning_rate": 3.7808138965166167e-06, "loss": 1.1526, "step": 4929 }, { "epoch": 0.6979542719614922, "grad_norm": 8.484334984223084, "learning_rate": 3.780321603972414e-06, "loss": 1.1017, "step": 4930 }, { "epoch": 0.6980958448361294, "grad_norm": 7.784013004837072, "learning_rate": 3.7798292441226584e-06, "loss": 1.0439, "step": 4931 }, { "epoch": 0.6982374177107666, "grad_norm": 10.21785403182219, "learning_rate": 3.7793368169932343e-06, "loss": 1.1404, "step": 4932 }, { "epoch": 0.6983789905854039, "grad_norm": 11.39000877105402, "learning_rate": 3.7788443226100274e-06, "loss": 1.1756, "step": 4933 }, { "epoch": 0.6985205634600411, "grad_norm": 8.236991567941054, "learning_rate": 3.7783517609989284e-06, "loss": 0.9966, "step": 4934 }, { "epoch": 0.6986621363346783, "grad_norm": 9.412631180091031, "learning_rate": 3.77785913218583e-06, "loss": 1.1699, "step": 4935 }, { "epoch": 0.6988037092093154, "grad_norm": 7.658535612283159, "learning_rate": 3.77736643619663e-06, "loss": 1.1637, "step": 4936 }, { "epoch": 0.6989452820839527, "grad_norm": 7.4750043786077525, "learning_rate": 3.776873673057229e-06, "loss": 1.0885, "step": 4937 }, { "epoch": 0.6990868549585899, "grad_norm": 9.538111483868855, "learning_rate": 3.776380842793531e-06, "loss": 1.0756, "step": 4938 }, { "epoch": 0.6992284278332271, "grad_norm": 8.831176446499029, "learning_rate": 3.775887945431444e-06, "loss": 1.1748, "step": 4939 }, { "epoch": 0.6993700007078644, "grad_norm": 8.879582471224717, "learning_rate": 3.775394980996879e-06, "loss": 1.168, "step": 4940 }, { "epoch": 0.6995115735825016, "grad_norm": 7.561172242687886, "learning_rate": 3.77490194951575e-06, "loss": 1.1564, "step": 4941 }, { "epoch": 0.6996531464571388, "grad_norm": 9.170230334601658, "learning_rate": 3.7744088510139763e-06, "loss": 0.9583, "step": 4942 }, { "epoch": 0.6997947193317761, "grad_norm": 8.554202772368726, "learning_rate": 3.773915685517481e-06, "loss": 0.9482, "step": 4943 }, { "epoch": 0.6999362922064133, "grad_norm": 9.982382705423543, "learning_rate": 3.7734224530521867e-06, "loss": 1.035, "step": 4944 }, { "epoch": 0.7000778650810505, "grad_norm": 8.669191677335258, "learning_rate": 3.772929153644024e-06, "loss": 1.0772, "step": 4945 }, { "epoch": 0.7002194379556876, "grad_norm": 9.042238513307586, "learning_rate": 3.772435787318925e-06, "loss": 1.1087, "step": 4946 }, { "epoch": 0.7003610108303249, "grad_norm": 10.245520752778038, "learning_rate": 3.771942354102825e-06, "loss": 1.1749, "step": 4947 }, { "epoch": 0.7005025837049621, "grad_norm": 7.337631988637996, "learning_rate": 3.7714488540216637e-06, "loss": 1.1822, "step": 4948 }, { "epoch": 0.7006441565795993, "grad_norm": 10.189303560276189, "learning_rate": 3.7709552871013844e-06, "loss": 1.0398, "step": 4949 }, { "epoch": 0.7007857294542366, "grad_norm": 12.513935713281127, "learning_rate": 3.770461653367934e-06, "loss": 1.1413, "step": 4950 }, { "epoch": 0.7009273023288738, "grad_norm": 14.368508108657442, "learning_rate": 3.769967952847261e-06, "loss": 1.189, "step": 4951 }, { "epoch": 0.701068875203511, "grad_norm": 9.750305953482867, "learning_rate": 3.7694741855653195e-06, "loss": 0.9765, "step": 4952 }, { "epoch": 0.7012104480781483, "grad_norm": 12.255353108318435, "learning_rate": 3.7689803515480674e-06, "loss": 1.2479, "step": 4953 }, { "epoch": 0.7013520209527855, "grad_norm": 9.430392588981872, "learning_rate": 3.7684864508214638e-06, "loss": 1.0484, "step": 4954 }, { "epoch": 0.7014935938274227, "grad_norm": 9.135821053324072, "learning_rate": 3.7679924834114735e-06, "loss": 1.2197, "step": 4955 }, { "epoch": 0.7016351667020598, "grad_norm": 9.406576866984727, "learning_rate": 3.7674984493440632e-06, "loss": 0.9782, "step": 4956 }, { "epoch": 0.7017767395766971, "grad_norm": 7.730095189780421, "learning_rate": 3.7670043486452047e-06, "loss": 1.1603, "step": 4957 }, { "epoch": 0.7019183124513343, "grad_norm": 9.73251182886635, "learning_rate": 3.7665101813408726e-06, "loss": 1.0917, "step": 4958 }, { "epoch": 0.7020598853259715, "grad_norm": 7.963710013872843, "learning_rate": 3.766015947457046e-06, "loss": 1.2439, "step": 4959 }, { "epoch": 0.7022014582006088, "grad_norm": 8.37378604118676, "learning_rate": 3.7655216470197033e-06, "loss": 1.0659, "step": 4960 }, { "epoch": 0.702343031075246, "grad_norm": 8.172266642592906, "learning_rate": 3.7650272800548316e-06, "loss": 1.0117, "step": 4961 }, { "epoch": 0.7024846039498832, "grad_norm": 7.433801837520387, "learning_rate": 3.764532846588419e-06, "loss": 1.1711, "step": 4962 }, { "epoch": 0.7026261768245204, "grad_norm": 8.89316258477699, "learning_rate": 3.764038346646457e-06, "loss": 1.1811, "step": 4963 }, { "epoch": 0.7027677496991577, "grad_norm": 7.967569181430976, "learning_rate": 3.7635437802549426e-06, "loss": 1.0467, "step": 4964 }, { "epoch": 0.7029093225737949, "grad_norm": 8.643060765125306, "learning_rate": 3.7630491474398734e-06, "loss": 1.0561, "step": 4965 }, { "epoch": 0.7030508954484321, "grad_norm": 8.557393799952234, "learning_rate": 3.7625544482272523e-06, "loss": 1.0932, "step": 4966 }, { "epoch": 0.7031924683230693, "grad_norm": 8.191093296954568, "learning_rate": 3.762059682643085e-06, "loss": 1.0218, "step": 4967 }, { "epoch": 0.7033340411977065, "grad_norm": 8.604059866795804, "learning_rate": 3.7615648507133816e-06, "loss": 1.1859, "step": 4968 }, { "epoch": 0.7034756140723437, "grad_norm": 8.011411634014877, "learning_rate": 3.7610699524641547e-06, "loss": 1.1541, "step": 4969 }, { "epoch": 0.7036171869469809, "grad_norm": 8.982056001667427, "learning_rate": 3.7605749879214203e-06, "loss": 1.1212, "step": 4970 }, { "epoch": 0.7037587598216182, "grad_norm": 7.760260404169911, "learning_rate": 3.760079957111199e-06, "loss": 1.2346, "step": 4971 }, { "epoch": 0.7039003326962554, "grad_norm": 7.648769913055388, "learning_rate": 3.7595848600595135e-06, "loss": 1.1191, "step": 4972 }, { "epoch": 0.7040419055708926, "grad_norm": 7.9736382544569615, "learning_rate": 3.7590896967923917e-06, "loss": 1.0451, "step": 4973 }, { "epoch": 0.7041834784455299, "grad_norm": 8.283937414031437, "learning_rate": 3.7585944673358632e-06, "loss": 1.0736, "step": 4974 }, { "epoch": 0.7043250513201671, "grad_norm": 7.754322446323889, "learning_rate": 3.758099171715962e-06, "loss": 1.0731, "step": 4975 }, { "epoch": 0.7044666241948043, "grad_norm": 7.8202261600815755, "learning_rate": 3.7576038099587252e-06, "loss": 0.963, "step": 4976 }, { "epoch": 0.7046081970694414, "grad_norm": 8.316603486681778, "learning_rate": 3.7571083820901943e-06, "loss": 1.2323, "step": 4977 }, { "epoch": 0.7047497699440787, "grad_norm": 9.835279713906527, "learning_rate": 3.7566128881364116e-06, "loss": 1.189, "step": 4978 }, { "epoch": 0.7048913428187159, "grad_norm": 7.897524878114627, "learning_rate": 3.7561173281234276e-06, "loss": 1.0478, "step": 4979 }, { "epoch": 0.7050329156933531, "grad_norm": 8.669723656286122, "learning_rate": 3.755621702077293e-06, "loss": 1.2081, "step": 4980 }, { "epoch": 0.7051744885679904, "grad_norm": 10.200532218661206, "learning_rate": 3.7551260100240604e-06, "loss": 1.2444, "step": 4981 }, { "epoch": 0.7053160614426276, "grad_norm": 9.702119686120584, "learning_rate": 3.7546302519897904e-06, "loss": 1.1109, "step": 4982 }, { "epoch": 0.7054576343172648, "grad_norm": 8.969112375627848, "learning_rate": 3.7541344280005427e-06, "loss": 1.0649, "step": 4983 }, { "epoch": 0.7055992071919021, "grad_norm": 8.718381593639078, "learning_rate": 3.7536385380823835e-06, "loss": 1.2099, "step": 4984 }, { "epoch": 0.7057407800665393, "grad_norm": 9.151593062336921, "learning_rate": 3.753142582261381e-06, "loss": 1.2526, "step": 4985 }, { "epoch": 0.7058823529411765, "grad_norm": 8.080980515615044, "learning_rate": 3.7526465605636075e-06, "loss": 1.117, "step": 4986 }, { "epoch": 0.7060239258158136, "grad_norm": 8.852925531501358, "learning_rate": 3.7521504730151382e-06, "loss": 1.0225, "step": 4987 }, { "epoch": 0.7061654986904509, "grad_norm": 7.175124685021541, "learning_rate": 3.751654319642052e-06, "loss": 1.0808, "step": 4988 }, { "epoch": 0.7063070715650881, "grad_norm": 9.472803639317268, "learning_rate": 3.7511581004704317e-06, "loss": 1.0923, "step": 4989 }, { "epoch": 0.7064486444397253, "grad_norm": 7.896914311663256, "learning_rate": 3.750661815526363e-06, "loss": 1.0632, "step": 4990 }, { "epoch": 0.7065902173143626, "grad_norm": 8.101079854686764, "learning_rate": 3.7501654648359353e-06, "loss": 1.1181, "step": 4991 }, { "epoch": 0.7067317901889998, "grad_norm": 6.043060913566744, "learning_rate": 3.7496690484252413e-06, "loss": 0.8978, "step": 4992 }, { "epoch": 0.706873363063637, "grad_norm": 8.251742554635856, "learning_rate": 3.7491725663203765e-06, "loss": 1.0974, "step": 4993 }, { "epoch": 0.7070149359382742, "grad_norm": 9.909807883978543, "learning_rate": 3.748676018547442e-06, "loss": 1.1907, "step": 4994 }, { "epoch": 0.7071565088129115, "grad_norm": 10.188670834151981, "learning_rate": 3.7481794051325404e-06, "loss": 1.1162, "step": 4995 }, { "epoch": 0.7072980816875487, "grad_norm": 9.019444547412421, "learning_rate": 3.7476827261017777e-06, "loss": 1.2134, "step": 4996 }, { "epoch": 0.7074396545621859, "grad_norm": 8.62963167856629, "learning_rate": 3.747185981481265e-06, "loss": 1.0984, "step": 4997 }, { "epoch": 0.7075812274368231, "grad_norm": 8.281330612078168, "learning_rate": 3.7466891712971144e-06, "loss": 1.0287, "step": 4998 }, { "epoch": 0.7077228003114603, "grad_norm": 8.709712550503738, "learning_rate": 3.7461922955754445e-06, "loss": 1.1163, "step": 4999 }, { "epoch": 0.7078643731860975, "grad_norm": 8.363029009361727, "learning_rate": 3.745695354342374e-06, "loss": 1.0906, "step": 5000 }, { "epoch": 0.7080059460607347, "grad_norm": 7.097964207167466, "learning_rate": 3.745198347624027e-06, "loss": 1.0745, "step": 5001 }, { "epoch": 0.708147518935372, "grad_norm": 9.146528792533084, "learning_rate": 3.744701275446533e-06, "loss": 1.297, "step": 5002 }, { "epoch": 0.7082890918100092, "grad_norm": 9.514779891145833, "learning_rate": 3.7442041378360204e-06, "loss": 1.2018, "step": 5003 }, { "epoch": 0.7084306646846464, "grad_norm": 8.522950547501507, "learning_rate": 3.743706934818624e-06, "loss": 1.1652, "step": 5004 }, { "epoch": 0.7085722375592837, "grad_norm": 7.259303996034051, "learning_rate": 3.743209666420481e-06, "loss": 0.9424, "step": 5005 }, { "epoch": 0.7087138104339209, "grad_norm": 8.847847680048291, "learning_rate": 3.7427123326677326e-06, "loss": 1.1268, "step": 5006 }, { "epoch": 0.7088553833085581, "grad_norm": 7.85774350533316, "learning_rate": 3.7422149335865244e-06, "loss": 1.0288, "step": 5007 }, { "epoch": 0.7089969561831952, "grad_norm": 7.919834935173247, "learning_rate": 3.7417174692030027e-06, "loss": 1.0766, "step": 5008 }, { "epoch": 0.7091385290578325, "grad_norm": 7.838095766144849, "learning_rate": 3.74121993954332e-06, "loss": 1.1546, "step": 5009 }, { "epoch": 0.7092801019324697, "grad_norm": 9.029612250805808, "learning_rate": 3.74072234463363e-06, "loss": 1.1247, "step": 5010 }, { "epoch": 0.7094216748071069, "grad_norm": 10.738256382974118, "learning_rate": 3.7402246845000916e-06, "loss": 1.2685, "step": 5011 }, { "epoch": 0.7095632476817442, "grad_norm": 8.919591928906987, "learning_rate": 3.7397269591688666e-06, "loss": 1.0041, "step": 5012 }, { "epoch": 0.7097048205563814, "grad_norm": 9.842578830379088, "learning_rate": 3.73922916866612e-06, "loss": 1.1222, "step": 5013 }, { "epoch": 0.7098463934310186, "grad_norm": 8.533946299392678, "learning_rate": 3.7387313130180192e-06, "loss": 1.2543, "step": 5014 }, { "epoch": 0.7099879663056559, "grad_norm": 9.703148195297356, "learning_rate": 3.7382333922507375e-06, "loss": 1.0676, "step": 5015 }, { "epoch": 0.7101295391802931, "grad_norm": 10.707470890052802, "learning_rate": 3.7377354063904484e-06, "loss": 1.069, "step": 5016 }, { "epoch": 0.7102711120549303, "grad_norm": 8.658363277125295, "learning_rate": 3.7372373554633334e-06, "loss": 1.1133, "step": 5017 }, { "epoch": 0.7104126849295674, "grad_norm": 9.268753424223913, "learning_rate": 3.7367392394955726e-06, "loss": 1.1118, "step": 5018 }, { "epoch": 0.7105542578042047, "grad_norm": 9.039110188535611, "learning_rate": 3.7362410585133523e-06, "loss": 1.0933, "step": 5019 }, { "epoch": 0.7106958306788419, "grad_norm": 8.508939193789862, "learning_rate": 3.7357428125428612e-06, "loss": 1.025, "step": 5020 }, { "epoch": 0.7108374035534791, "grad_norm": 8.589801116879196, "learning_rate": 3.7352445016102917e-06, "loss": 1.1578, "step": 5021 }, { "epoch": 0.7109789764281164, "grad_norm": 7.739916733089332, "learning_rate": 3.7347461257418403e-06, "loss": 1.0643, "step": 5022 }, { "epoch": 0.7111205493027536, "grad_norm": 8.164818483240017, "learning_rate": 3.7342476849637053e-06, "loss": 1.141, "step": 5023 }, { "epoch": 0.7112621221773908, "grad_norm": 10.1517369029145, "learning_rate": 3.7337491793020898e-06, "loss": 1.199, "step": 5024 }, { "epoch": 0.711403695052028, "grad_norm": 9.469110966716991, "learning_rate": 3.7332506087832e-06, "loss": 1.0474, "step": 5025 }, { "epoch": 0.7115452679266653, "grad_norm": 9.684307138279701, "learning_rate": 3.7327519734332453e-06, "loss": 1.1344, "step": 5026 }, { "epoch": 0.7116868408013025, "grad_norm": 9.48896469852242, "learning_rate": 3.732253273278438e-06, "loss": 1.1844, "step": 5027 }, { "epoch": 0.7118284136759397, "grad_norm": 8.110586674892113, "learning_rate": 3.731754508344996e-06, "loss": 1.0136, "step": 5028 }, { "epoch": 0.7119699865505769, "grad_norm": 8.015613578208237, "learning_rate": 3.731255678659137e-06, "loss": 1.0689, "step": 5029 }, { "epoch": 0.7121115594252141, "grad_norm": 8.954199921155846, "learning_rate": 3.730756784247085e-06, "loss": 0.9064, "step": 5030 }, { "epoch": 0.7122531322998513, "grad_norm": 7.457718751047572, "learning_rate": 3.730257825135067e-06, "loss": 1.1136, "step": 5031 }, { "epoch": 0.7123947051744886, "grad_norm": 9.591822924585376, "learning_rate": 3.7297588013493124e-06, "loss": 1.0727, "step": 5032 }, { "epoch": 0.7125362780491258, "grad_norm": 8.622593198357178, "learning_rate": 3.7292597129160547e-06, "loss": 1.0417, "step": 5033 }, { "epoch": 0.712677850923763, "grad_norm": 8.703062759259039, "learning_rate": 3.72876055986153e-06, "loss": 1.0783, "step": 5034 }, { "epoch": 0.7128194237984002, "grad_norm": 8.523279512026853, "learning_rate": 3.7282613422119794e-06, "loss": 1.1039, "step": 5035 }, { "epoch": 0.7129609966730375, "grad_norm": 8.02914650529654, "learning_rate": 3.7277620599936453e-06, "loss": 1.0146, "step": 5036 }, { "epoch": 0.7131025695476747, "grad_norm": 9.43975601010288, "learning_rate": 3.7272627132327753e-06, "loss": 1.1974, "step": 5037 }, { "epoch": 0.7132441424223119, "grad_norm": 7.126198383405994, "learning_rate": 3.7267633019556194e-06, "loss": 0.9589, "step": 5038 }, { "epoch": 0.713385715296949, "grad_norm": 9.778216881958848, "learning_rate": 3.726263826188432e-06, "loss": 1.0857, "step": 5039 }, { "epoch": 0.7135272881715863, "grad_norm": 9.029104010824177, "learning_rate": 3.7257642859574694e-06, "loss": 1.0878, "step": 5040 }, { "epoch": 0.7136688610462235, "grad_norm": 8.299563329768848, "learning_rate": 3.7252646812889926e-06, "loss": 1.2265, "step": 5041 }, { "epoch": 0.7138104339208607, "grad_norm": 7.02627102359924, "learning_rate": 3.724765012209264e-06, "loss": 0.9399, "step": 5042 }, { "epoch": 0.713952006795498, "grad_norm": 8.358522517864065, "learning_rate": 3.7242652787445527e-06, "loss": 1.2433, "step": 5043 }, { "epoch": 0.7140935796701352, "grad_norm": 9.692395751329524, "learning_rate": 3.723765480921129e-06, "loss": 1.203, "step": 5044 }, { "epoch": 0.7142351525447724, "grad_norm": 8.342642149795877, "learning_rate": 3.7232656187652655e-06, "loss": 1.056, "step": 5045 }, { "epoch": 0.7143767254194097, "grad_norm": 8.433192531128993, "learning_rate": 3.7227656923032406e-06, "loss": 0.9798, "step": 5046 }, { "epoch": 0.7145182982940469, "grad_norm": 7.902899835229095, "learning_rate": 3.7222657015613354e-06, "loss": 1.181, "step": 5047 }, { "epoch": 0.7146598711686841, "grad_norm": 6.873214489821753, "learning_rate": 3.7217656465658335e-06, "loss": 1.0224, "step": 5048 }, { "epoch": 0.7148014440433214, "grad_norm": 8.48073369073133, "learning_rate": 3.721265527343023e-06, "loss": 1.1544, "step": 5049 }, { "epoch": 0.7149430169179585, "grad_norm": 8.583564631731353, "learning_rate": 3.7207653439191944e-06, "loss": 1.1175, "step": 5050 }, { "epoch": 0.7150845897925957, "grad_norm": 8.259690575519176, "learning_rate": 3.720265096320641e-06, "loss": 1.1614, "step": 5051 }, { "epoch": 0.7152261626672329, "grad_norm": 10.421627928222941, "learning_rate": 3.7197647845736616e-06, "loss": 1.1661, "step": 5052 }, { "epoch": 0.7153677355418702, "grad_norm": 9.693595297665457, "learning_rate": 3.719264408704557e-06, "loss": 0.9401, "step": 5053 }, { "epoch": 0.7155093084165074, "grad_norm": 12.77253974769487, "learning_rate": 3.718763968739632e-06, "loss": 1.0935, "step": 5054 }, { "epoch": 0.7156508812911446, "grad_norm": 9.414931235849853, "learning_rate": 3.718263464705194e-06, "loss": 1.0306, "step": 5055 }, { "epoch": 0.7157924541657819, "grad_norm": 10.820703117949261, "learning_rate": 3.7177628966275535e-06, "loss": 1.0095, "step": 5056 }, { "epoch": 0.7159340270404191, "grad_norm": 10.19730995459384, "learning_rate": 3.717262264533026e-06, "loss": 1.1338, "step": 5057 }, { "epoch": 0.7160755999150563, "grad_norm": 10.187237882460002, "learning_rate": 3.716761568447928e-06, "loss": 1.0479, "step": 5058 }, { "epoch": 0.7162171727896935, "grad_norm": 10.091460638570823, "learning_rate": 3.7162608083985824e-06, "loss": 1.2518, "step": 5059 }, { "epoch": 0.7163587456643307, "grad_norm": 10.336146023146945, "learning_rate": 3.715759984411313e-06, "loss": 1.2271, "step": 5060 }, { "epoch": 0.7165003185389679, "grad_norm": 10.589299079961942, "learning_rate": 3.715259096512447e-06, "loss": 1.1715, "step": 5061 }, { "epoch": 0.7166418914136051, "grad_norm": 7.954661642439804, "learning_rate": 3.7147581447283172e-06, "loss": 0.9573, "step": 5062 }, { "epoch": 0.7167834642882424, "grad_norm": 9.174857439393984, "learning_rate": 3.714257129085257e-06, "loss": 1.1174, "step": 5063 }, { "epoch": 0.7169250371628796, "grad_norm": 7.710256250776411, "learning_rate": 3.7137560496096054e-06, "loss": 1.2053, "step": 5064 }, { "epoch": 0.7170666100375168, "grad_norm": 10.943931214702005, "learning_rate": 3.7132549063277033e-06, "loss": 1.0972, "step": 5065 }, { "epoch": 0.717208182912154, "grad_norm": 10.634600800777363, "learning_rate": 3.712753699265895e-06, "loss": 1.1086, "step": 5066 }, { "epoch": 0.7173497557867913, "grad_norm": 7.914538234559777, "learning_rate": 3.712252428450529e-06, "loss": 1.0145, "step": 5067 }, { "epoch": 0.7174913286614285, "grad_norm": 9.764672219146082, "learning_rate": 3.7117510939079563e-06, "loss": 1.2602, "step": 5068 }, { "epoch": 0.7176329015360657, "grad_norm": 8.865371854312395, "learning_rate": 3.7112496956645326e-06, "loss": 1.1039, "step": 5069 }, { "epoch": 0.7177744744107029, "grad_norm": 8.7146730359232, "learning_rate": 3.710748233746616e-06, "loss": 0.9601, "step": 5070 }, { "epoch": 0.7179160472853401, "grad_norm": 9.602783324161843, "learning_rate": 3.7102467081805676e-06, "loss": 1.0697, "step": 5071 }, { "epoch": 0.7180576201599773, "grad_norm": 8.503896381195467, "learning_rate": 3.709745118992751e-06, "loss": 1.0435, "step": 5072 }, { "epoch": 0.7181991930346145, "grad_norm": 8.79174915534157, "learning_rate": 3.709243466209537e-06, "loss": 1.1341, "step": 5073 }, { "epoch": 0.7183407659092518, "grad_norm": 6.80411258200248, "learning_rate": 3.7087417498572946e-06, "loss": 1.1102, "step": 5074 }, { "epoch": 0.718482338783889, "grad_norm": 8.96037427733582, "learning_rate": 3.7082399699623996e-06, "loss": 1.0543, "step": 5075 }, { "epoch": 0.7186239116585262, "grad_norm": 7.915895872906813, "learning_rate": 3.707738126551231e-06, "loss": 1.0759, "step": 5076 }, { "epoch": 0.7187654845331635, "grad_norm": 8.895012870551968, "learning_rate": 3.707236219650169e-06, "loss": 1.0, "step": 5077 }, { "epoch": 0.7189070574078007, "grad_norm": 10.132328724838313, "learning_rate": 3.7067342492855997e-06, "loss": 1.2919, "step": 5078 }, { "epoch": 0.7190486302824379, "grad_norm": 9.494965273214579, "learning_rate": 3.7062322154839098e-06, "loss": 1.1503, "step": 5079 }, { "epoch": 0.7191902031570752, "grad_norm": 8.448304894522943, "learning_rate": 3.7057301182714924e-06, "loss": 1.1097, "step": 5080 }, { "epoch": 0.7193317760317123, "grad_norm": 12.113274321789376, "learning_rate": 3.705227957674742e-06, "loss": 1.0556, "step": 5081 }, { "epoch": 0.7194733489063495, "grad_norm": 9.442606977847058, "learning_rate": 3.7047257337200554e-06, "loss": 1.1174, "step": 5082 }, { "epoch": 0.7196149217809867, "grad_norm": 11.133439538262577, "learning_rate": 3.704223446433836e-06, "loss": 1.2008, "step": 5083 }, { "epoch": 0.719756494655624, "grad_norm": 7.177420007996704, "learning_rate": 3.703721095842488e-06, "loss": 1.0998, "step": 5084 }, { "epoch": 0.7198980675302612, "grad_norm": 11.682185142644816, "learning_rate": 3.703218681972419e-06, "loss": 1.1395, "step": 5085 }, { "epoch": 0.7200396404048984, "grad_norm": 7.370127215794986, "learning_rate": 3.702716204850042e-06, "loss": 1.116, "step": 5086 }, { "epoch": 0.7201812132795357, "grad_norm": 7.8319935024807315, "learning_rate": 3.7022136645017704e-06, "loss": 1.0143, "step": 5087 }, { "epoch": 0.7203227861541729, "grad_norm": 9.214144108931455, "learning_rate": 3.701711060954023e-06, "loss": 1.1123, "step": 5088 }, { "epoch": 0.7204643590288101, "grad_norm": 9.453801709378821, "learning_rate": 3.701208394233221e-06, "loss": 1.1876, "step": 5089 }, { "epoch": 0.7206059319034473, "grad_norm": 9.620495423310663, "learning_rate": 3.7007056643657884e-06, "loss": 1.0139, "step": 5090 }, { "epoch": 0.7207475047780845, "grad_norm": 8.120963634569724, "learning_rate": 3.700202871378156e-06, "loss": 0.9487, "step": 5091 }, { "epoch": 0.7208890776527217, "grad_norm": 9.285713329943933, "learning_rate": 3.6997000152967526e-06, "loss": 1.0912, "step": 5092 }, { "epoch": 0.7210306505273589, "grad_norm": 9.608308264596818, "learning_rate": 3.699197096148014e-06, "loss": 1.1442, "step": 5093 }, { "epoch": 0.7211722234019962, "grad_norm": 7.735837979139814, "learning_rate": 3.698694113958379e-06, "loss": 1.0821, "step": 5094 }, { "epoch": 0.7213137962766334, "grad_norm": 9.041191358129547, "learning_rate": 3.6981910687542873e-06, "loss": 1.1314, "step": 5095 }, { "epoch": 0.7214553691512706, "grad_norm": 8.498999592869142, "learning_rate": 3.697687960562185e-06, "loss": 1.1147, "step": 5096 }, { "epoch": 0.7215969420259079, "grad_norm": 9.792419101672118, "learning_rate": 3.697184789408519e-06, "loss": 1.136, "step": 5097 }, { "epoch": 0.7217385149005451, "grad_norm": 8.54649606035526, "learning_rate": 3.6966815553197416e-06, "loss": 1.1321, "step": 5098 }, { "epoch": 0.7218800877751823, "grad_norm": 7.9544295344638005, "learning_rate": 3.696178258322307e-06, "loss": 1.0256, "step": 5099 }, { "epoch": 0.7220216606498195, "grad_norm": 9.58514794018101, "learning_rate": 3.6956748984426736e-06, "loss": 1.1255, "step": 5100 }, { "epoch": 0.7221632335244567, "grad_norm": 8.87266316340974, "learning_rate": 3.695171475707302e-06, "loss": 1.1819, "step": 5101 }, { "epoch": 0.7223048063990939, "grad_norm": 9.263742085109753, "learning_rate": 3.694667990142658e-06, "loss": 1.1547, "step": 5102 }, { "epoch": 0.7224463792737311, "grad_norm": 7.509526623716351, "learning_rate": 3.6941644417752077e-06, "loss": 1.0012, "step": 5103 }, { "epoch": 0.7225879521483684, "grad_norm": 7.828682946215364, "learning_rate": 3.6936608306314227e-06, "loss": 1.0457, "step": 5104 }, { "epoch": 0.7227295250230056, "grad_norm": 8.741283762073053, "learning_rate": 3.6931571567377785e-06, "loss": 1.1096, "step": 5105 }, { "epoch": 0.7228710978976428, "grad_norm": 8.23404236905249, "learning_rate": 3.692653420120752e-06, "loss": 1.1266, "step": 5106 }, { "epoch": 0.72301267077228, "grad_norm": 8.565250149986044, "learning_rate": 3.6921496208068253e-06, "loss": 0.9, "step": 5107 }, { "epoch": 0.7231542436469173, "grad_norm": 8.013118478409229, "learning_rate": 3.691645758822481e-06, "loss": 1.1576, "step": 5108 }, { "epoch": 0.7232958165215545, "grad_norm": 9.689954877340881, "learning_rate": 3.6911418341942078e-06, "loss": 1.0381, "step": 5109 }, { "epoch": 0.7234373893961917, "grad_norm": 8.799456527573964, "learning_rate": 3.690637846948497e-06, "loss": 1.1963, "step": 5110 }, { "epoch": 0.723578962270829, "grad_norm": 8.76391019044421, "learning_rate": 3.6901337971118415e-06, "loss": 1.1983, "step": 5111 }, { "epoch": 0.7237205351454661, "grad_norm": 7.5658274494675855, "learning_rate": 3.6896296847107406e-06, "loss": 1.0822, "step": 5112 }, { "epoch": 0.7238621080201033, "grad_norm": 8.322995551113053, "learning_rate": 3.6891255097716937e-06, "loss": 1.1471, "step": 5113 }, { "epoch": 0.7240036808947405, "grad_norm": 9.876636997177908, "learning_rate": 3.6886212723212057e-06, "loss": 1.2122, "step": 5114 }, { "epoch": 0.7241452537693778, "grad_norm": 7.982030236829485, "learning_rate": 3.6881169723857833e-06, "loss": 1.2163, "step": 5115 }, { "epoch": 0.724286826644015, "grad_norm": 9.6329166502632, "learning_rate": 3.687612609991938e-06, "loss": 1.1234, "step": 5116 }, { "epoch": 0.7244283995186522, "grad_norm": 7.397301785107268, "learning_rate": 3.6871081851661825e-06, "loss": 0.994, "step": 5117 }, { "epoch": 0.7245699723932895, "grad_norm": 8.280426329910684, "learning_rate": 3.686603697935036e-06, "loss": 1.0401, "step": 5118 }, { "epoch": 0.7247115452679267, "grad_norm": 8.717013278388732, "learning_rate": 3.6860991483250167e-06, "loss": 1.0816, "step": 5119 }, { "epoch": 0.7248531181425639, "grad_norm": 8.489819881833569, "learning_rate": 3.6855945363626504e-06, "loss": 1.0455, "step": 5120 }, { "epoch": 0.7249946910172012, "grad_norm": 10.87814811653677, "learning_rate": 3.685089862074463e-06, "loss": 1.3652, "step": 5121 }, { "epoch": 0.7251362638918383, "grad_norm": 7.653407725372471, "learning_rate": 3.684585125486985e-06, "loss": 1.1447, "step": 5122 }, { "epoch": 0.7252778367664755, "grad_norm": 8.462677640191481, "learning_rate": 3.684080326626751e-06, "loss": 0.9878, "step": 5123 }, { "epoch": 0.7254194096411127, "grad_norm": 7.658901207105058, "learning_rate": 3.683575465520297e-06, "loss": 1.1342, "step": 5124 }, { "epoch": 0.72556098251575, "grad_norm": 6.34840530436166, "learning_rate": 3.6830705421941624e-06, "loss": 1.0917, "step": 5125 }, { "epoch": 0.7257025553903872, "grad_norm": 8.36397726698586, "learning_rate": 3.6825655566748927e-06, "loss": 1.122, "step": 5126 }, { "epoch": 0.7258441282650244, "grad_norm": 8.968036689703666, "learning_rate": 3.6820605089890323e-06, "loss": 1.2923, "step": 5127 }, { "epoch": 0.7259857011396617, "grad_norm": 7.905640107405124, "learning_rate": 3.6815553991631323e-06, "loss": 1.0954, "step": 5128 }, { "epoch": 0.7261272740142989, "grad_norm": 7.561098835929171, "learning_rate": 3.681050227223747e-06, "loss": 1.0319, "step": 5129 }, { "epoch": 0.7262688468889361, "grad_norm": 7.880141744355416, "learning_rate": 3.6805449931974313e-06, "loss": 1.1366, "step": 5130 }, { "epoch": 0.7264104197635733, "grad_norm": 8.72804557745036, "learning_rate": 3.6800396971107456e-06, "loss": 1.0419, "step": 5131 }, { "epoch": 0.7265519926382105, "grad_norm": 7.637442770585886, "learning_rate": 3.6795343389902534e-06, "loss": 1.1928, "step": 5132 }, { "epoch": 0.7266935655128477, "grad_norm": 8.130747478887146, "learning_rate": 3.6790289188625196e-06, "loss": 0.9632, "step": 5133 }, { "epoch": 0.7268351383874849, "grad_norm": 8.895973460562658, "learning_rate": 3.678523436754115e-06, "loss": 1.033, "step": 5134 }, { "epoch": 0.7269767112621222, "grad_norm": 7.671535383426199, "learning_rate": 3.678017892691612e-06, "loss": 1.109, "step": 5135 }, { "epoch": 0.7271182841367594, "grad_norm": 9.739432060712492, "learning_rate": 3.677512286701587e-06, "loss": 1.2023, "step": 5136 }, { "epoch": 0.7272598570113966, "grad_norm": 10.5195580845833, "learning_rate": 3.677006618810619e-06, "loss": 1.1876, "step": 5137 }, { "epoch": 0.7274014298860338, "grad_norm": 7.9782854063327635, "learning_rate": 3.676500889045291e-06, "loss": 1.0866, "step": 5138 }, { "epoch": 0.7275430027606711, "grad_norm": 7.975353900387479, "learning_rate": 3.6759950974321883e-06, "loss": 1.0424, "step": 5139 }, { "epoch": 0.7276845756353083, "grad_norm": 10.408380330460433, "learning_rate": 3.6754892439979e-06, "loss": 1.2062, "step": 5140 }, { "epoch": 0.7278261485099455, "grad_norm": 8.331261593466474, "learning_rate": 3.6749833287690183e-06, "loss": 1.1907, "step": 5141 }, { "epoch": 0.7279677213845828, "grad_norm": 10.201887397424388, "learning_rate": 3.6744773517721394e-06, "loss": 1.2772, "step": 5142 }, { "epoch": 0.7281092942592199, "grad_norm": 9.060079317061485, "learning_rate": 3.6739713130338617e-06, "loss": 0.9812, "step": 5143 }, { "epoch": 0.7282508671338571, "grad_norm": 8.543732283266195, "learning_rate": 3.673465212580788e-06, "loss": 1.1804, "step": 5144 }, { "epoch": 0.7283924400084943, "grad_norm": 8.12595303154642, "learning_rate": 3.672959050439523e-06, "loss": 1.1318, "step": 5145 }, { "epoch": 0.7285340128831316, "grad_norm": 7.041549260291191, "learning_rate": 3.672452826636675e-06, "loss": 1.0939, "step": 5146 }, { "epoch": 0.7286755857577688, "grad_norm": 8.409560675468995, "learning_rate": 3.671946541198856e-06, "loss": 1.0497, "step": 5147 }, { "epoch": 0.728817158632406, "grad_norm": 9.05564842358525, "learning_rate": 3.671440194152681e-06, "loss": 1.0544, "step": 5148 }, { "epoch": 0.7289587315070433, "grad_norm": 7.174256970148927, "learning_rate": 3.670933785524769e-06, "loss": 0.9429, "step": 5149 }, { "epoch": 0.7291003043816805, "grad_norm": 10.621080460983778, "learning_rate": 3.6704273153417407e-06, "loss": 1.1255, "step": 5150 }, { "epoch": 0.7292418772563177, "grad_norm": 9.174296122896074, "learning_rate": 3.669920783630221e-06, "loss": 1.1582, "step": 5151 }, { "epoch": 0.729383450130955, "grad_norm": 7.844818164086909, "learning_rate": 3.669414190416838e-06, "loss": 1.0367, "step": 5152 }, { "epoch": 0.7295250230055921, "grad_norm": 7.680835098719455, "learning_rate": 3.6689075357282235e-06, "loss": 1.1886, "step": 5153 }, { "epoch": 0.7296665958802293, "grad_norm": 12.352425378027045, "learning_rate": 3.668400819591011e-06, "loss": 1.1494, "step": 5154 }, { "epoch": 0.7298081687548665, "grad_norm": 10.896134459283491, "learning_rate": 3.6678940420318385e-06, "loss": 1.1649, "step": 5155 }, { "epoch": 0.7299497416295038, "grad_norm": 10.596528445679635, "learning_rate": 3.6673872030773473e-06, "loss": 1.1653, "step": 5156 }, { "epoch": 0.730091314504141, "grad_norm": 7.860731489393328, "learning_rate": 3.666880302754181e-06, "loss": 0.9802, "step": 5157 }, { "epoch": 0.7302328873787782, "grad_norm": 8.597176163617984, "learning_rate": 3.6663733410889875e-06, "loss": 1.1113, "step": 5158 }, { "epoch": 0.7303744602534155, "grad_norm": 10.502242439233699, "learning_rate": 3.665866318108417e-06, "loss": 1.1166, "step": 5159 }, { "epoch": 0.7305160331280527, "grad_norm": 11.705618293528499, "learning_rate": 3.665359233839124e-06, "loss": 1.125, "step": 5160 }, { "epoch": 0.7306576060026899, "grad_norm": 10.326341622221692, "learning_rate": 3.6648520883077644e-06, "loss": 1.0664, "step": 5161 }, { "epoch": 0.7307991788773271, "grad_norm": 9.286150425320477, "learning_rate": 3.6643448815409994e-06, "loss": 1.2837, "step": 5162 }, { "epoch": 0.7309407517519643, "grad_norm": 6.804168366032715, "learning_rate": 3.663837613565492e-06, "loss": 0.897, "step": 5163 }, { "epoch": 0.7310823246266015, "grad_norm": 12.26920541393198, "learning_rate": 3.663330284407908e-06, "loss": 1.1826, "step": 5164 }, { "epoch": 0.7312238975012387, "grad_norm": 10.819520644535924, "learning_rate": 3.6628228940949195e-06, "loss": 0.9948, "step": 5165 }, { "epoch": 0.731365470375876, "grad_norm": 8.760369396346778, "learning_rate": 3.662315442653199e-06, "loss": 1.0833, "step": 5166 }, { "epoch": 0.7315070432505132, "grad_norm": 11.9629733734383, "learning_rate": 3.661807930109422e-06, "loss": 1.2287, "step": 5167 }, { "epoch": 0.7316486161251504, "grad_norm": 7.629855001729201, "learning_rate": 3.6613003564902678e-06, "loss": 1.1439, "step": 5168 }, { "epoch": 0.7317901889997876, "grad_norm": 8.003677476598746, "learning_rate": 3.66079272182242e-06, "loss": 1.132, "step": 5169 }, { "epoch": 0.7319317618744249, "grad_norm": 10.27264619397846, "learning_rate": 3.6602850261325645e-06, "loss": 1.1859, "step": 5170 }, { "epoch": 0.7320733347490621, "grad_norm": 11.433711190462791, "learning_rate": 3.6597772694473902e-06, "loss": 1.1557, "step": 5171 }, { "epoch": 0.7322149076236993, "grad_norm": 9.570794790143557, "learning_rate": 3.6592694517935895e-06, "loss": 1.1124, "step": 5172 }, { "epoch": 0.7323564804983366, "grad_norm": 7.554525916058945, "learning_rate": 3.6587615731978583e-06, "loss": 1.0089, "step": 5173 }, { "epoch": 0.7324980533729737, "grad_norm": 8.22667796170754, "learning_rate": 3.658253633686895e-06, "loss": 1.0621, "step": 5174 }, { "epoch": 0.7326396262476109, "grad_norm": 8.221847588650382, "learning_rate": 3.6577456332874025e-06, "loss": 1.0364, "step": 5175 }, { "epoch": 0.7327811991222482, "grad_norm": 8.035626714320081, "learning_rate": 3.657237572026085e-06, "loss": 1.061, "step": 5176 }, { "epoch": 0.7329227719968854, "grad_norm": 10.555860249544333, "learning_rate": 3.656729449929651e-06, "loss": 1.2469, "step": 5177 }, { "epoch": 0.7330643448715226, "grad_norm": 12.549441406463389, "learning_rate": 3.656221267024812e-06, "loss": 1.0987, "step": 5178 }, { "epoch": 0.7332059177461598, "grad_norm": 9.184248783662975, "learning_rate": 3.6557130233382833e-06, "loss": 1.2239, "step": 5179 }, { "epoch": 0.7333474906207971, "grad_norm": 7.3621123906484245, "learning_rate": 3.6552047188967827e-06, "loss": 1.0592, "step": 5180 }, { "epoch": 0.7334890634954343, "grad_norm": 9.840140707359067, "learning_rate": 3.6546963537270314e-06, "loss": 1.0191, "step": 5181 }, { "epoch": 0.7336306363700715, "grad_norm": 8.56237659226192, "learning_rate": 3.654187927855754e-06, "loss": 1.1353, "step": 5182 }, { "epoch": 0.7337722092447088, "grad_norm": 11.166729789289777, "learning_rate": 3.6536794413096775e-06, "loss": 0.9758, "step": 5183 }, { "epoch": 0.7339137821193459, "grad_norm": 9.085683909294302, "learning_rate": 3.6531708941155337e-06, "loss": 1.0087, "step": 5184 }, { "epoch": 0.7340553549939831, "grad_norm": 8.045891740594525, "learning_rate": 3.652662286300055e-06, "loss": 1.0108, "step": 5185 }, { "epoch": 0.7341969278686203, "grad_norm": 8.57179546479233, "learning_rate": 3.6521536178899798e-06, "loss": 1.0154, "step": 5186 }, { "epoch": 0.7343385007432576, "grad_norm": 10.448242506966183, "learning_rate": 3.6516448889120475e-06, "loss": 1.1474, "step": 5187 }, { "epoch": 0.7344800736178948, "grad_norm": 7.697139000401637, "learning_rate": 3.651136099393003e-06, "loss": 1.1977, "step": 5188 }, { "epoch": 0.734621646492532, "grad_norm": 8.83201699675431, "learning_rate": 3.650627249359591e-06, "loss": 1.1733, "step": 5189 }, { "epoch": 0.7347632193671693, "grad_norm": 9.054881293763085, "learning_rate": 3.650118338838563e-06, "loss": 1.0866, "step": 5190 }, { "epoch": 0.7349047922418065, "grad_norm": 9.357654445228707, "learning_rate": 3.6496093678566713e-06, "loss": 1.1565, "step": 5191 }, { "epoch": 0.7350463651164437, "grad_norm": 8.865675635154572, "learning_rate": 3.649100336440673e-06, "loss": 1.0372, "step": 5192 }, { "epoch": 0.735187937991081, "grad_norm": 7.563499952049858, "learning_rate": 3.648591244617326e-06, "loss": 1.1248, "step": 5193 }, { "epoch": 0.7353295108657182, "grad_norm": 11.395311937602825, "learning_rate": 3.648082092413394e-06, "loss": 1.1464, "step": 5194 }, { "epoch": 0.7354710837403553, "grad_norm": 10.970313849188956, "learning_rate": 3.6475728798556426e-06, "loss": 1.1447, "step": 5195 }, { "epoch": 0.7356126566149925, "grad_norm": 9.947872099185725, "learning_rate": 3.6470636069708405e-06, "loss": 1.1422, "step": 5196 }, { "epoch": 0.7357542294896298, "grad_norm": 7.7782278051820155, "learning_rate": 3.6465542737857603e-06, "loss": 1.033, "step": 5197 }, { "epoch": 0.735895802364267, "grad_norm": 8.551096652902782, "learning_rate": 3.646044880327176e-06, "loss": 1.0457, "step": 5198 }, { "epoch": 0.7360373752389042, "grad_norm": 8.877035418004144, "learning_rate": 3.6455354266218675e-06, "loss": 1.0912, "step": 5199 }, { "epoch": 0.7361789481135415, "grad_norm": 7.665862359136676, "learning_rate": 3.645025912696615e-06, "loss": 1.1483, "step": 5200 }, { "epoch": 0.7363205209881787, "grad_norm": 7.879884930462753, "learning_rate": 3.644516338578204e-06, "loss": 1.1731, "step": 5201 }, { "epoch": 0.7364620938628159, "grad_norm": 9.132835472998266, "learning_rate": 3.644006704293423e-06, "loss": 1.1726, "step": 5202 }, { "epoch": 0.7366036667374531, "grad_norm": 8.091574122341322, "learning_rate": 3.643497009869063e-06, "loss": 1.1134, "step": 5203 }, { "epoch": 0.7367452396120904, "grad_norm": 8.574342877091269, "learning_rate": 3.642987255331917e-06, "loss": 1.1121, "step": 5204 }, { "epoch": 0.7368868124867275, "grad_norm": 10.416425249481097, "learning_rate": 3.642477440708784e-06, "loss": 1.2453, "step": 5205 }, { "epoch": 0.7370283853613647, "grad_norm": 10.675981706062613, "learning_rate": 3.641967566026463e-06, "loss": 1.0475, "step": 5206 }, { "epoch": 0.737169958236002, "grad_norm": 9.733703688103155, "learning_rate": 3.641457631311759e-06, "loss": 1.1035, "step": 5207 }, { "epoch": 0.7373115311106392, "grad_norm": 7.752389600964756, "learning_rate": 3.6409476365914786e-06, "loss": 1.0669, "step": 5208 }, { "epoch": 0.7374531039852764, "grad_norm": 7.658002380370724, "learning_rate": 3.6404375818924315e-06, "loss": 1.0557, "step": 5209 }, { "epoch": 0.7375946768599136, "grad_norm": 7.793883336447768, "learning_rate": 3.639927467241431e-06, "loss": 0.9864, "step": 5210 }, { "epoch": 0.7377362497345509, "grad_norm": 10.978976185911886, "learning_rate": 3.639417292665293e-06, "loss": 1.0733, "step": 5211 }, { "epoch": 0.7378778226091881, "grad_norm": 10.269948388613507, "learning_rate": 3.638907058190838e-06, "loss": 1.1678, "step": 5212 }, { "epoch": 0.7380193954838253, "grad_norm": 9.877879435194098, "learning_rate": 3.638396763844889e-06, "loss": 1.1968, "step": 5213 }, { "epoch": 0.7381609683584626, "grad_norm": 7.536001139290434, "learning_rate": 3.63788640965427e-06, "loss": 0.9757, "step": 5214 }, { "epoch": 0.7383025412330997, "grad_norm": 7.430811078044434, "learning_rate": 3.637375995645811e-06, "loss": 1.0918, "step": 5215 }, { "epoch": 0.7384441141077369, "grad_norm": 9.144043581480762, "learning_rate": 3.6368655218463435e-06, "loss": 1.2069, "step": 5216 }, { "epoch": 0.7385856869823741, "grad_norm": 7.765854154770989, "learning_rate": 3.636354988282704e-06, "loss": 1.0047, "step": 5217 }, { "epoch": 0.7387272598570114, "grad_norm": 7.764232487620793, "learning_rate": 3.635844394981729e-06, "loss": 1.0048, "step": 5218 }, { "epoch": 0.7388688327316486, "grad_norm": 7.002135359814745, "learning_rate": 3.6353337419702627e-06, "loss": 0.9906, "step": 5219 }, { "epoch": 0.7390104056062858, "grad_norm": 9.155911696501917, "learning_rate": 3.6348230292751476e-06, "loss": 1.0514, "step": 5220 }, { "epoch": 0.7391519784809231, "grad_norm": 8.245790274496226, "learning_rate": 3.6343122569232313e-06, "loss": 1.0322, "step": 5221 }, { "epoch": 0.7392935513555603, "grad_norm": 6.849524165423801, "learning_rate": 3.6338014249413657e-06, "loss": 0.9, "step": 5222 }, { "epoch": 0.7394351242301975, "grad_norm": 9.058581537696599, "learning_rate": 3.6332905333564046e-06, "loss": 1.1634, "step": 5223 }, { "epoch": 0.7395766971048348, "grad_norm": 8.829009479337445, "learning_rate": 3.632779582195205e-06, "loss": 1.183, "step": 5224 }, { "epoch": 0.739718269979472, "grad_norm": 8.758361309187103, "learning_rate": 3.6322685714846277e-06, "loss": 1.1821, "step": 5225 }, { "epoch": 0.7398598428541091, "grad_norm": 8.141320841331824, "learning_rate": 3.631757501251536e-06, "loss": 1.0805, "step": 5226 }, { "epoch": 0.7400014157287463, "grad_norm": 8.526974679226846, "learning_rate": 3.631246371522796e-06, "loss": 1.1936, "step": 5227 }, { "epoch": 0.7401429886033836, "grad_norm": 8.674816945368041, "learning_rate": 3.6307351823252778e-06, "loss": 1.1923, "step": 5228 }, { "epoch": 0.7402845614780208, "grad_norm": 8.307700700565457, "learning_rate": 3.6302239336858547e-06, "loss": 0.9917, "step": 5229 }, { "epoch": 0.740426134352658, "grad_norm": 8.483064722431449, "learning_rate": 3.6297126256314013e-06, "loss": 1.0832, "step": 5230 }, { "epoch": 0.7405677072272953, "grad_norm": 7.113592820777994, "learning_rate": 3.629201258188798e-06, "loss": 0.9923, "step": 5231 }, { "epoch": 0.7407092801019325, "grad_norm": 6.520660723106911, "learning_rate": 3.6286898313849267e-06, "loss": 1.1606, "step": 5232 }, { "epoch": 0.7408508529765697, "grad_norm": 7.321875145880013, "learning_rate": 3.6281783452466725e-06, "loss": 1.0152, "step": 5233 }, { "epoch": 0.740992425851207, "grad_norm": 7.573386435300174, "learning_rate": 3.6276667998009242e-06, "loss": 1.0662, "step": 5234 }, { "epoch": 0.7411339987258442, "grad_norm": 8.649593506892256, "learning_rate": 3.627155195074572e-06, "loss": 1.032, "step": 5235 }, { "epoch": 0.7412755716004813, "grad_norm": 9.80812509769981, "learning_rate": 3.6266435310945125e-06, "loss": 1.0465, "step": 5236 }, { "epoch": 0.7414171444751185, "grad_norm": 8.831599323250876, "learning_rate": 3.6261318078876416e-06, "loss": 1.0849, "step": 5237 }, { "epoch": 0.7415587173497558, "grad_norm": 9.561836568719135, "learning_rate": 3.625620025480862e-06, "loss": 1.2379, "step": 5238 }, { "epoch": 0.741700290224393, "grad_norm": 7.6792570752186, "learning_rate": 3.625108183901077e-06, "loss": 1.1332, "step": 5239 }, { "epoch": 0.7418418630990302, "grad_norm": 10.384325730938455, "learning_rate": 3.624596283175194e-06, "loss": 1.1815, "step": 5240 }, { "epoch": 0.7419834359736674, "grad_norm": 9.099240248268156, "learning_rate": 3.6240843233301228e-06, "loss": 1.0879, "step": 5241 }, { "epoch": 0.7421250088483047, "grad_norm": 8.139048008092061, "learning_rate": 3.623572304392776e-06, "loss": 1.1099, "step": 5242 }, { "epoch": 0.7422665817229419, "grad_norm": 8.197893141858586, "learning_rate": 3.6230602263900714e-06, "loss": 1.2346, "step": 5243 }, { "epoch": 0.7424081545975791, "grad_norm": 7.812591552197939, "learning_rate": 3.6225480893489283e-06, "loss": 0.9295, "step": 5244 }, { "epoch": 0.7425497274722164, "grad_norm": 8.86035193921869, "learning_rate": 3.6220358932962696e-06, "loss": 1.1098, "step": 5245 }, { "epoch": 0.7426913003468535, "grad_norm": 8.969018380539156, "learning_rate": 3.6215236382590197e-06, "loss": 1.2684, "step": 5246 }, { "epoch": 0.7428328732214907, "grad_norm": 8.66597138574746, "learning_rate": 3.621011324264109e-06, "loss": 1.1225, "step": 5247 }, { "epoch": 0.742974446096128, "grad_norm": 7.633687807650882, "learning_rate": 3.620498951338468e-06, "loss": 1.1122, "step": 5248 }, { "epoch": 0.7431160189707652, "grad_norm": 8.114115305287447, "learning_rate": 3.6199865195090333e-06, "loss": 1.0141, "step": 5249 }, { "epoch": 0.7432575918454024, "grad_norm": 10.5781924359695, "learning_rate": 3.619474028802743e-06, "loss": 1.1199, "step": 5250 }, { "epoch": 0.7433991647200396, "grad_norm": 10.564652263155955, "learning_rate": 3.618961479246537e-06, "loss": 1.0867, "step": 5251 }, { "epoch": 0.7435407375946769, "grad_norm": 8.85168144503048, "learning_rate": 3.6184488708673605e-06, "loss": 1.1119, "step": 5252 }, { "epoch": 0.7436823104693141, "grad_norm": 8.539020953552862, "learning_rate": 3.61793620369216e-06, "loss": 1.1717, "step": 5253 }, { "epoch": 0.7438238833439513, "grad_norm": 10.512224256820419, "learning_rate": 3.617423477747888e-06, "loss": 1.1381, "step": 5254 }, { "epoch": 0.7439654562185886, "grad_norm": 10.524729354169429, "learning_rate": 3.616910693061496e-06, "loss": 1.0804, "step": 5255 }, { "epoch": 0.7441070290932258, "grad_norm": 8.15719002754673, "learning_rate": 3.6163978496599428e-06, "loss": 1.1593, "step": 5256 }, { "epoch": 0.7442486019678629, "grad_norm": 7.728077553843689, "learning_rate": 3.6158849475701863e-06, "loss": 1.0236, "step": 5257 }, { "epoch": 0.7443901748425001, "grad_norm": 10.076367412754282, "learning_rate": 3.6153719868191905e-06, "loss": 1.1329, "step": 5258 }, { "epoch": 0.7445317477171374, "grad_norm": 9.519328081156738, "learning_rate": 3.614858967433921e-06, "loss": 1.0576, "step": 5259 }, { "epoch": 0.7446733205917746, "grad_norm": 9.356764083733882, "learning_rate": 3.6143458894413463e-06, "loss": 1.158, "step": 5260 }, { "epoch": 0.7448148934664118, "grad_norm": 6.1999112891957155, "learning_rate": 3.613832752868439e-06, "loss": 0.9886, "step": 5261 }, { "epoch": 0.7449564663410491, "grad_norm": 8.117398036249439, "learning_rate": 3.613319557742175e-06, "loss": 1.1862, "step": 5262 }, { "epoch": 0.7450980392156863, "grad_norm": 8.18463690398053, "learning_rate": 3.6128063040895318e-06, "loss": 1.1315, "step": 5263 }, { "epoch": 0.7452396120903235, "grad_norm": 7.8165236220729115, "learning_rate": 3.612292991937491e-06, "loss": 1.1604, "step": 5264 }, { "epoch": 0.7453811849649608, "grad_norm": 9.105772899521284, "learning_rate": 3.6117796213130367e-06, "loss": 1.1389, "step": 5265 }, { "epoch": 0.745522757839598, "grad_norm": 8.081711229466466, "learning_rate": 3.6112661922431576e-06, "loss": 1.1404, "step": 5266 }, { "epoch": 0.7456643307142351, "grad_norm": 9.45562257166462, "learning_rate": 3.610752704754842e-06, "loss": 1.1923, "step": 5267 }, { "epoch": 0.7458059035888723, "grad_norm": 8.110707079899846, "learning_rate": 3.610239158875085e-06, "loss": 1.0553, "step": 5268 }, { "epoch": 0.7459474764635096, "grad_norm": 8.663315296200798, "learning_rate": 3.609725554630884e-06, "loss": 1.1501, "step": 5269 }, { "epoch": 0.7460890493381468, "grad_norm": 7.5676226960018935, "learning_rate": 3.609211892049238e-06, "loss": 1.1206, "step": 5270 }, { "epoch": 0.746230622212784, "grad_norm": 7.313813181284604, "learning_rate": 3.60869817115715e-06, "loss": 0.9761, "step": 5271 }, { "epoch": 0.7463721950874213, "grad_norm": 7.872856211642584, "learning_rate": 3.6081843919816263e-06, "loss": 1.0803, "step": 5272 }, { "epoch": 0.7465137679620585, "grad_norm": 8.350521925644173, "learning_rate": 3.6076705545496743e-06, "loss": 1.0607, "step": 5273 }, { "epoch": 0.7466553408366957, "grad_norm": 8.587641650146338, "learning_rate": 3.6071566588883077e-06, "loss": 1.0307, "step": 5274 }, { "epoch": 0.7467969137113329, "grad_norm": 9.613356644614955, "learning_rate": 3.606642705024541e-06, "loss": 1.0044, "step": 5275 }, { "epoch": 0.7469384865859702, "grad_norm": 8.26974252062432, "learning_rate": 3.6061286929853915e-06, "loss": 1.2861, "step": 5276 }, { "epoch": 0.7470800594606073, "grad_norm": 9.509796712890756, "learning_rate": 3.6056146227978827e-06, "loss": 1.2296, "step": 5277 }, { "epoch": 0.7472216323352445, "grad_norm": 8.986679816458087, "learning_rate": 3.6051004944890373e-06, "loss": 1.2352, "step": 5278 }, { "epoch": 0.7473632052098818, "grad_norm": 7.844436433095806, "learning_rate": 3.6045863080858824e-06, "loss": 1.0066, "step": 5279 }, { "epoch": 0.747504778084519, "grad_norm": 8.171371298559734, "learning_rate": 3.604072063615449e-06, "loss": 1.056, "step": 5280 }, { "epoch": 0.7476463509591562, "grad_norm": 9.757889123498195, "learning_rate": 3.6035577611047713e-06, "loss": 0.9919, "step": 5281 }, { "epoch": 0.7477879238337934, "grad_norm": 15.96250862234864, "learning_rate": 3.603043400580884e-06, "loss": 0.9543, "step": 5282 }, { "epoch": 0.7479294967084307, "grad_norm": 7.693777854986767, "learning_rate": 3.6025289820708277e-06, "loss": 0.9993, "step": 5283 }, { "epoch": 0.7480710695830679, "grad_norm": 7.988727734313411, "learning_rate": 3.6020145056016454e-06, "loss": 1.0119, "step": 5284 }, { "epoch": 0.7482126424577051, "grad_norm": 7.169750289505708, "learning_rate": 3.601499971200382e-06, "loss": 1.0773, "step": 5285 }, { "epoch": 0.7483542153323424, "grad_norm": 9.288959833063842, "learning_rate": 3.600985378894086e-06, "loss": 1.1455, "step": 5286 }, { "epoch": 0.7484957882069796, "grad_norm": 8.023969029071099, "learning_rate": 3.6004707287098104e-06, "loss": 0.9931, "step": 5287 }, { "epoch": 0.7486373610816167, "grad_norm": 10.20414637642387, "learning_rate": 3.5999560206746088e-06, "loss": 1.0653, "step": 5288 }, { "epoch": 0.748778933956254, "grad_norm": 9.079785928765448, "learning_rate": 3.5994412548155387e-06, "loss": 1.0922, "step": 5289 }, { "epoch": 0.7489205068308912, "grad_norm": 12.384383526129414, "learning_rate": 3.5989264311596617e-06, "loss": 1.0341, "step": 5290 }, { "epoch": 0.7490620797055284, "grad_norm": 8.455887714979418, "learning_rate": 3.598411549734042e-06, "loss": 1.1943, "step": 5291 }, { "epoch": 0.7492036525801656, "grad_norm": 7.20945240073454, "learning_rate": 3.5978966105657465e-06, "loss": 1.1056, "step": 5292 }, { "epoch": 0.7493452254548029, "grad_norm": 6.865189366851042, "learning_rate": 3.597381613681845e-06, "loss": 1.1327, "step": 5293 }, { "epoch": 0.7494867983294401, "grad_norm": 8.893269820932577, "learning_rate": 3.5968665591094097e-06, "loss": 1.1772, "step": 5294 }, { "epoch": 0.7496283712040773, "grad_norm": 7.399455241178971, "learning_rate": 3.5963514468755172e-06, "loss": 1.0916, "step": 5295 }, { "epoch": 0.7497699440787146, "grad_norm": 7.428392235198083, "learning_rate": 3.5958362770072465e-06, "loss": 1.0978, "step": 5296 }, { "epoch": 0.7499115169533518, "grad_norm": 7.004303290649424, "learning_rate": 3.59532104953168e-06, "loss": 1.0879, "step": 5297 }, { "epoch": 0.7500530898279889, "grad_norm": 6.855184122029388, "learning_rate": 3.5948057644759025e-06, "loss": 1.0444, "step": 5298 }, { "epoch": 0.7501946627026261, "grad_norm": 9.619829250414423, "learning_rate": 3.5942904218670025e-06, "loss": 1.1277, "step": 5299 }, { "epoch": 0.7503362355772634, "grad_norm": 7.737650229824398, "learning_rate": 3.5937750217320712e-06, "loss": 1.1729, "step": 5300 }, { "epoch": 0.7504778084519006, "grad_norm": 17.063862840490206, "learning_rate": 3.5932595640982023e-06, "loss": 1.0752, "step": 5301 }, { "epoch": 0.7506193813265378, "grad_norm": 7.326006812989912, "learning_rate": 3.592744048992493e-06, "loss": 1.0879, "step": 5302 }, { "epoch": 0.7507609542011751, "grad_norm": 7.602224887635692, "learning_rate": 3.5922284764420445e-06, "loss": 1.2089, "step": 5303 }, { "epoch": 0.7509025270758123, "grad_norm": 6.939025436264588, "learning_rate": 3.5917128464739586e-06, "loss": 1.1059, "step": 5304 }, { "epoch": 0.7510440999504495, "grad_norm": 7.389110966582167, "learning_rate": 3.5911971591153426e-06, "loss": 1.1048, "step": 5305 }, { "epoch": 0.7511856728250867, "grad_norm": 9.202654115712352, "learning_rate": 3.590681414393306e-06, "loss": 1.1241, "step": 5306 }, { "epoch": 0.751327245699724, "grad_norm": 8.02846659954075, "learning_rate": 3.5901656123349606e-06, "loss": 1.0733, "step": 5307 }, { "epoch": 0.7514688185743611, "grad_norm": 7.665014614183363, "learning_rate": 3.5896497529674213e-06, "loss": 1.0642, "step": 5308 }, { "epoch": 0.7516103914489983, "grad_norm": 9.475593938807428, "learning_rate": 3.589133836317808e-06, "loss": 1.1453, "step": 5309 }, { "epoch": 0.7517519643236356, "grad_norm": 9.444677995773475, "learning_rate": 3.5886178624132407e-06, "loss": 1.1513, "step": 5310 }, { "epoch": 0.7518935371982728, "grad_norm": 10.585936418934446, "learning_rate": 3.5881018312808435e-06, "loss": 1.1898, "step": 5311 }, { "epoch": 0.75203511007291, "grad_norm": 11.643449479062653, "learning_rate": 3.5875857429477447e-06, "loss": 1.1272, "step": 5312 }, { "epoch": 0.7521766829475472, "grad_norm": 9.358255309823923, "learning_rate": 3.5870695974410734e-06, "loss": 1.1763, "step": 5313 }, { "epoch": 0.7523182558221845, "grad_norm": 9.726705257486506, "learning_rate": 3.586553394787965e-06, "loss": 1.0752, "step": 5314 }, { "epoch": 0.7524598286968217, "grad_norm": 10.278990523353897, "learning_rate": 3.5860371350155547e-06, "loss": 1.2672, "step": 5315 }, { "epoch": 0.7526014015714589, "grad_norm": 9.153881196224356, "learning_rate": 3.5855208181509817e-06, "loss": 1.0447, "step": 5316 }, { "epoch": 0.7527429744460962, "grad_norm": 11.612338482377133, "learning_rate": 3.585004444221389e-06, "loss": 1.1161, "step": 5317 }, { "epoch": 0.7528845473207334, "grad_norm": 10.526960365236016, "learning_rate": 3.584488013253921e-06, "loss": 1.0519, "step": 5318 }, { "epoch": 0.7530261201953705, "grad_norm": 8.131755661250365, "learning_rate": 3.5839715252757273e-06, "loss": 1.0963, "step": 5319 }, { "epoch": 0.7531676930700077, "grad_norm": 8.057918695974632, "learning_rate": 3.583454980313959e-06, "loss": 0.9911, "step": 5320 }, { "epoch": 0.753309265944645, "grad_norm": 8.44510291136785, "learning_rate": 3.58293837839577e-06, "loss": 0.9436, "step": 5321 }, { "epoch": 0.7534508388192822, "grad_norm": 7.63215126757793, "learning_rate": 3.5824217195483178e-06, "loss": 1.0454, "step": 5322 }, { "epoch": 0.7535924116939194, "grad_norm": 7.4721153689673585, "learning_rate": 3.581905003798763e-06, "loss": 1.008, "step": 5323 }, { "epoch": 0.7537339845685567, "grad_norm": 8.168513757373157, "learning_rate": 3.581388231174269e-06, "loss": 1.1301, "step": 5324 }, { "epoch": 0.7538755574431939, "grad_norm": 9.316052879173593, "learning_rate": 3.580871401702002e-06, "loss": 1.1678, "step": 5325 }, { "epoch": 0.7540171303178311, "grad_norm": 7.332206061448291, "learning_rate": 3.5803545154091312e-06, "loss": 1.0493, "step": 5326 }, { "epoch": 0.7541587031924684, "grad_norm": 8.155158777427856, "learning_rate": 3.5798375723228283e-06, "loss": 1.2085, "step": 5327 }, { "epoch": 0.7543002760671056, "grad_norm": 7.437001444030761, "learning_rate": 3.57932057247027e-06, "loss": 1.0472, "step": 5328 }, { "epoch": 0.7544418489417427, "grad_norm": 8.577202196635723, "learning_rate": 3.5788035158786346e-06, "loss": 1.1532, "step": 5329 }, { "epoch": 0.7545834218163799, "grad_norm": 11.584384641805402, "learning_rate": 3.5782864025751025e-06, "loss": 1.1408, "step": 5330 }, { "epoch": 0.7547249946910172, "grad_norm": 7.8150098507478, "learning_rate": 3.577769232586858e-06, "loss": 1.0509, "step": 5331 }, { "epoch": 0.7548665675656544, "grad_norm": 7.19163168865139, "learning_rate": 3.5772520059410887e-06, "loss": 1.0656, "step": 5332 }, { "epoch": 0.7550081404402916, "grad_norm": 8.098560214709126, "learning_rate": 3.576734722664984e-06, "loss": 1.037, "step": 5333 }, { "epoch": 0.7551497133149289, "grad_norm": 9.15303269454033, "learning_rate": 3.576217382785738e-06, "loss": 1.1058, "step": 5334 }, { "epoch": 0.7552912861895661, "grad_norm": 8.679516533954157, "learning_rate": 3.5756999863305475e-06, "loss": 1.1862, "step": 5335 }, { "epoch": 0.7554328590642033, "grad_norm": 7.422997448017967, "learning_rate": 3.57518253332661e-06, "loss": 1.1153, "step": 5336 }, { "epoch": 0.7555744319388406, "grad_norm": 7.801570724400457, "learning_rate": 3.574665023801129e-06, "loss": 1.1054, "step": 5337 }, { "epoch": 0.7557160048134778, "grad_norm": 9.457232933619759, "learning_rate": 3.5741474577813086e-06, "loss": 1.2698, "step": 5338 }, { "epoch": 0.755857577688115, "grad_norm": 7.831786009642378, "learning_rate": 3.573629835294357e-06, "loss": 1.0646, "step": 5339 }, { "epoch": 0.7559991505627521, "grad_norm": 10.012170633334993, "learning_rate": 3.5731121563674863e-06, "loss": 1.2181, "step": 5340 }, { "epoch": 0.7561407234373894, "grad_norm": 7.9338132451326695, "learning_rate": 3.572594421027909e-06, "loss": 1.011, "step": 5341 }, { "epoch": 0.7562822963120266, "grad_norm": 7.6035615305736, "learning_rate": 3.572076629302843e-06, "loss": 1.114, "step": 5342 }, { "epoch": 0.7564238691866638, "grad_norm": 8.208675112765372, "learning_rate": 3.571558781219508e-06, "loss": 0.9219, "step": 5343 }, { "epoch": 0.756565442061301, "grad_norm": 9.64745258447166, "learning_rate": 3.5710408768051262e-06, "loss": 1.1225, "step": 5344 }, { "epoch": 0.7567070149359383, "grad_norm": 7.588858076597446, "learning_rate": 3.5705229160869247e-06, "loss": 1.0143, "step": 5345 }, { "epoch": 0.7568485878105755, "grad_norm": 9.177163466162513, "learning_rate": 3.570004899092133e-06, "loss": 1.0835, "step": 5346 }, { "epoch": 0.7569901606852127, "grad_norm": 9.040367302648542, "learning_rate": 3.5694868258479798e-06, "loss": 1.0369, "step": 5347 }, { "epoch": 0.75713173355985, "grad_norm": 9.10106917587395, "learning_rate": 3.5689686963817023e-06, "loss": 1.0089, "step": 5348 }, { "epoch": 0.7572733064344872, "grad_norm": 9.511992263777966, "learning_rate": 3.568450510720537e-06, "loss": 1.1276, "step": 5349 }, { "epoch": 0.7574148793091243, "grad_norm": 8.928932618601147, "learning_rate": 3.567932268891725e-06, "loss": 1.0191, "step": 5350 }, { "epoch": 0.7575564521837616, "grad_norm": 9.475710284099213, "learning_rate": 3.5674139709225104e-06, "loss": 0.9808, "step": 5351 }, { "epoch": 0.7576980250583988, "grad_norm": 8.439077724308152, "learning_rate": 3.5668956168401392e-06, "loss": 1.0242, "step": 5352 }, { "epoch": 0.757839597933036, "grad_norm": 9.035746474722979, "learning_rate": 3.5663772066718606e-06, "loss": 0.9275, "step": 5353 }, { "epoch": 0.7579811708076732, "grad_norm": 8.18546275644263, "learning_rate": 3.565858740444927e-06, "loss": 1.2257, "step": 5354 }, { "epoch": 0.7581227436823105, "grad_norm": 10.303454321705544, "learning_rate": 3.5653402181865954e-06, "loss": 1.1379, "step": 5355 }, { "epoch": 0.7582643165569477, "grad_norm": 7.396992623893151, "learning_rate": 3.564821639924122e-06, "loss": 1.0321, "step": 5356 }, { "epoch": 0.7584058894315849, "grad_norm": 7.983655684406624, "learning_rate": 3.5643030056847695e-06, "loss": 1.2303, "step": 5357 }, { "epoch": 0.7585474623062222, "grad_norm": 9.37432289221467, "learning_rate": 3.5637843154958006e-06, "loss": 0.996, "step": 5358 }, { "epoch": 0.7586890351808594, "grad_norm": 9.347921913666369, "learning_rate": 3.563265569384484e-06, "loss": 1.2734, "step": 5359 }, { "epoch": 0.7588306080554965, "grad_norm": 9.078505700117741, "learning_rate": 3.56274676737809e-06, "loss": 1.1965, "step": 5360 }, { "epoch": 0.7589721809301337, "grad_norm": 7.842646912599287, "learning_rate": 3.5622279095038896e-06, "loss": 1.2077, "step": 5361 }, { "epoch": 0.759113753804771, "grad_norm": 9.058427408590259, "learning_rate": 3.5617089957891614e-06, "loss": 1.1168, "step": 5362 }, { "epoch": 0.7592553266794082, "grad_norm": 7.652977317920999, "learning_rate": 3.561190026261182e-06, "loss": 1.0448, "step": 5363 }, { "epoch": 0.7593968995540454, "grad_norm": 7.902885112990752, "learning_rate": 3.5606710009472335e-06, "loss": 1.1813, "step": 5364 }, { "epoch": 0.7595384724286827, "grad_norm": 9.4667652429454, "learning_rate": 3.560151919874602e-06, "loss": 1.1601, "step": 5365 }, { "epoch": 0.7596800453033199, "grad_norm": 7.187493631111309, "learning_rate": 3.5596327830705746e-06, "loss": 1.1378, "step": 5366 }, { "epoch": 0.7598216181779571, "grad_norm": 8.064962794578143, "learning_rate": 3.559113590562443e-06, "loss": 1.0565, "step": 5367 }, { "epoch": 0.7599631910525944, "grad_norm": 8.684381817310207, "learning_rate": 3.558594342377498e-06, "loss": 1.1979, "step": 5368 }, { "epoch": 0.7601047639272316, "grad_norm": 8.976711235440971, "learning_rate": 3.5580750385430385e-06, "loss": 1.1435, "step": 5369 }, { "epoch": 0.7602463368018688, "grad_norm": 6.07428940121693, "learning_rate": 3.557555679086363e-06, "loss": 1.0463, "step": 5370 }, { "epoch": 0.7603879096765059, "grad_norm": 8.635401990730015, "learning_rate": 3.5570362640347743e-06, "loss": 1.0851, "step": 5371 }, { "epoch": 0.7605294825511432, "grad_norm": 9.630603298727497, "learning_rate": 3.556516793415577e-06, "loss": 1.0708, "step": 5372 }, { "epoch": 0.7606710554257804, "grad_norm": 8.15416097379267, "learning_rate": 3.5559972672560795e-06, "loss": 1.1045, "step": 5373 }, { "epoch": 0.7608126283004176, "grad_norm": 9.145694624443472, "learning_rate": 3.5554776855835934e-06, "loss": 0.9742, "step": 5374 }, { "epoch": 0.7609542011750549, "grad_norm": 8.628977425153492, "learning_rate": 3.554958048425432e-06, "loss": 1.116, "step": 5375 }, { "epoch": 0.7610957740496921, "grad_norm": 9.059104546427614, "learning_rate": 3.5544383558089128e-06, "loss": 1.1518, "step": 5376 }, { "epoch": 0.7612373469243293, "grad_norm": 8.39816905457602, "learning_rate": 3.5539186077613562e-06, "loss": 1.0925, "step": 5377 }, { "epoch": 0.7613789197989665, "grad_norm": 8.84179881647593, "learning_rate": 3.553398804310083e-06, "loss": 1.0434, "step": 5378 }, { "epoch": 0.7615204926736038, "grad_norm": 8.233416449041057, "learning_rate": 3.5528789454824205e-06, "loss": 0.9681, "step": 5379 }, { "epoch": 0.761662065548241, "grad_norm": 9.554006357313456, "learning_rate": 3.5523590313056965e-06, "loss": 1.2374, "step": 5380 }, { "epoch": 0.7618036384228781, "grad_norm": 7.881473121237616, "learning_rate": 3.551839061807244e-06, "loss": 1.0644, "step": 5381 }, { "epoch": 0.7619452112975154, "grad_norm": 8.215354537139492, "learning_rate": 3.551319037014396e-06, "loss": 0.9204, "step": 5382 }, { "epoch": 0.7620867841721526, "grad_norm": 7.303722101563369, "learning_rate": 3.5507989569544896e-06, "loss": 0.9662, "step": 5383 }, { "epoch": 0.7622283570467898, "grad_norm": 8.47221185513333, "learning_rate": 3.550278821654866e-06, "loss": 1.1741, "step": 5384 }, { "epoch": 0.762369929921427, "grad_norm": 7.987120990387456, "learning_rate": 3.5497586311428676e-06, "loss": 1.0132, "step": 5385 }, { "epoch": 0.7625115027960643, "grad_norm": 11.521025965886919, "learning_rate": 3.5492383854458405e-06, "loss": 1.0562, "step": 5386 }, { "epoch": 0.7626530756707015, "grad_norm": 9.36311258061472, "learning_rate": 3.548718084591134e-06, "loss": 1.1013, "step": 5387 }, { "epoch": 0.7627946485453387, "grad_norm": 8.075512225554371, "learning_rate": 3.5481977286061e-06, "loss": 1.0791, "step": 5388 }, { "epoch": 0.762936221419976, "grad_norm": 10.181632944378936, "learning_rate": 3.547677317518093e-06, "loss": 1.0885, "step": 5389 }, { "epoch": 0.7630777942946132, "grad_norm": 8.010588314629855, "learning_rate": 3.54715685135447e-06, "loss": 1.0487, "step": 5390 }, { "epoch": 0.7632193671692503, "grad_norm": 6.820894441542547, "learning_rate": 3.546636330142593e-06, "loss": 1.0511, "step": 5391 }, { "epoch": 0.7633609400438875, "grad_norm": 9.134827638700663, "learning_rate": 3.5461157539098236e-06, "loss": 1.1692, "step": 5392 }, { "epoch": 0.7635025129185248, "grad_norm": 8.051432265344937, "learning_rate": 3.5455951226835296e-06, "loss": 0.9738, "step": 5393 }, { "epoch": 0.763644085793162, "grad_norm": 9.381885898902178, "learning_rate": 3.5450744364910794e-06, "loss": 1.1847, "step": 5394 }, { "epoch": 0.7637856586677992, "grad_norm": 8.039161199588472, "learning_rate": 3.544553695359845e-06, "loss": 1.0505, "step": 5395 }, { "epoch": 0.7639272315424365, "grad_norm": 9.61122116094463, "learning_rate": 3.5440328993172023e-06, "loss": 1.1345, "step": 5396 }, { "epoch": 0.7640688044170737, "grad_norm": 8.29100803334458, "learning_rate": 3.5435120483905285e-06, "loss": 1.1463, "step": 5397 }, { "epoch": 0.7642103772917109, "grad_norm": 7.4209063409822695, "learning_rate": 3.542991142607204e-06, "loss": 1.1611, "step": 5398 }, { "epoch": 0.7643519501663482, "grad_norm": 8.805463863287173, "learning_rate": 3.5424701819946137e-06, "loss": 1.0381, "step": 5399 }, { "epoch": 0.7644935230409854, "grad_norm": 9.289774156007718, "learning_rate": 3.5419491665801424e-06, "loss": 1.1736, "step": 5400 }, { "epoch": 0.7646350959156226, "grad_norm": 6.909965240788571, "learning_rate": 3.54142809639118e-06, "loss": 1.1819, "step": 5401 }, { "epoch": 0.7647766687902597, "grad_norm": 7.9314258793417585, "learning_rate": 3.54090697145512e-06, "loss": 1.1773, "step": 5402 }, { "epoch": 0.764918241664897, "grad_norm": 8.99973890137757, "learning_rate": 3.5403857917993554e-06, "loss": 1.2373, "step": 5403 }, { "epoch": 0.7650598145395342, "grad_norm": 9.827477559656236, "learning_rate": 3.5398645574512876e-06, "loss": 1.1583, "step": 5404 }, { "epoch": 0.7652013874141714, "grad_norm": 8.207692645737566, "learning_rate": 3.5393432684383137e-06, "loss": 1.0903, "step": 5405 }, { "epoch": 0.7653429602888087, "grad_norm": 9.121496076512237, "learning_rate": 3.5388219247878395e-06, "loss": 0.9879, "step": 5406 }, { "epoch": 0.7654845331634459, "grad_norm": 7.156250533058113, "learning_rate": 3.5383005265272713e-06, "loss": 1.1353, "step": 5407 }, { "epoch": 0.7656261060380831, "grad_norm": 9.388241381265724, "learning_rate": 3.537779073684019e-06, "loss": 1.1032, "step": 5408 }, { "epoch": 0.7657676789127204, "grad_norm": 7.633107861960567, "learning_rate": 3.5372575662854937e-06, "loss": 1.0249, "step": 5409 }, { "epoch": 0.7659092517873576, "grad_norm": 9.197529203923803, "learning_rate": 3.536736004359112e-06, "loss": 1.3011, "step": 5410 }, { "epoch": 0.7660508246619948, "grad_norm": 8.609790161978443, "learning_rate": 3.536214387932292e-06, "loss": 1.2173, "step": 5411 }, { "epoch": 0.7661923975366319, "grad_norm": 7.986992513328092, "learning_rate": 3.535692717032454e-06, "loss": 1.0756, "step": 5412 }, { "epoch": 0.7663339704112692, "grad_norm": 7.917540438434039, "learning_rate": 3.535170991687022e-06, "loss": 1.0936, "step": 5413 }, { "epoch": 0.7664755432859064, "grad_norm": 8.713378126422807, "learning_rate": 3.5346492119234225e-06, "loss": 1.1394, "step": 5414 }, { "epoch": 0.7666171161605436, "grad_norm": 7.210124886498586, "learning_rate": 3.5341273777690867e-06, "loss": 1.0607, "step": 5415 }, { "epoch": 0.7667586890351809, "grad_norm": 7.7495980927636, "learning_rate": 3.5336054892514437e-06, "loss": 1.0855, "step": 5416 }, { "epoch": 0.7669002619098181, "grad_norm": 8.480198852798667, "learning_rate": 3.5330835463979318e-06, "loss": 0.9438, "step": 5417 }, { "epoch": 0.7670418347844553, "grad_norm": 7.816853767330192, "learning_rate": 3.532561549235988e-06, "loss": 1.0866, "step": 5418 }, { "epoch": 0.7671834076590925, "grad_norm": 9.404338452335402, "learning_rate": 3.532039497793054e-06, "loss": 0.957, "step": 5419 }, { "epoch": 0.7673249805337298, "grad_norm": 7.345717925320772, "learning_rate": 3.5315173920965736e-06, "loss": 1.1221, "step": 5420 }, { "epoch": 0.767466553408367, "grad_norm": 8.892167800562435, "learning_rate": 3.5309952321739922e-06, "loss": 1.0111, "step": 5421 }, { "epoch": 0.7676081262830041, "grad_norm": 8.984295600871983, "learning_rate": 3.53047301805276e-06, "loss": 1.1601, "step": 5422 }, { "epoch": 0.7677496991576414, "grad_norm": 8.062243553637167, "learning_rate": 3.5299507497603303e-06, "loss": 1.1424, "step": 5423 }, { "epoch": 0.7678912720322786, "grad_norm": 5.747517588929902, "learning_rate": 3.5294284273241565e-06, "loss": 1.027, "step": 5424 }, { "epoch": 0.7680328449069158, "grad_norm": 7.65853461608814, "learning_rate": 3.5289060507716986e-06, "loss": 1.2068, "step": 5425 }, { "epoch": 0.768174417781553, "grad_norm": 7.734434431984411, "learning_rate": 3.528383620130417e-06, "loss": 1.0315, "step": 5426 }, { "epoch": 0.7683159906561903, "grad_norm": 10.027018477510643, "learning_rate": 3.527861135427775e-06, "loss": 1.1253, "step": 5427 }, { "epoch": 0.7684575635308275, "grad_norm": 7.339103884759966, "learning_rate": 3.5273385966912398e-06, "loss": 1.0106, "step": 5428 }, { "epoch": 0.7685991364054647, "grad_norm": 8.699045975215112, "learning_rate": 3.52681600394828e-06, "loss": 1.0326, "step": 5429 }, { "epoch": 0.768740709280102, "grad_norm": 7.596916135223885, "learning_rate": 3.526293357226369e-06, "loss": 1.0605, "step": 5430 }, { "epoch": 0.7688822821547392, "grad_norm": 8.398454305942487, "learning_rate": 3.5257706565529813e-06, "loss": 1.0447, "step": 5431 }, { "epoch": 0.7690238550293764, "grad_norm": 10.372700999517466, "learning_rate": 3.525247901955595e-06, "loss": 1.1565, "step": 5432 }, { "epoch": 0.7691654279040135, "grad_norm": 7.997927397230309, "learning_rate": 3.5247250934616907e-06, "loss": 1.1088, "step": 5433 }, { "epoch": 0.7693070007786508, "grad_norm": 9.845227908649052, "learning_rate": 3.524202231098753e-06, "loss": 1.1674, "step": 5434 }, { "epoch": 0.769448573653288, "grad_norm": 9.77847747996445, "learning_rate": 3.5236793148942673e-06, "loss": 1.1284, "step": 5435 }, { "epoch": 0.7695901465279252, "grad_norm": 8.113746714199767, "learning_rate": 3.5231563448757233e-06, "loss": 1.0934, "step": 5436 }, { "epoch": 0.7697317194025625, "grad_norm": 8.975420129660018, "learning_rate": 3.5226333210706133e-06, "loss": 1.1613, "step": 5437 }, { "epoch": 0.7698732922771997, "grad_norm": 8.775515166807649, "learning_rate": 3.5221102435064314e-06, "loss": 0.985, "step": 5438 }, { "epoch": 0.7700148651518369, "grad_norm": 8.10747198513483, "learning_rate": 3.5215871122106767e-06, "loss": 1.0893, "step": 5439 }, { "epoch": 0.7701564380264742, "grad_norm": 8.460184079595699, "learning_rate": 3.5210639272108487e-06, "loss": 1.1556, "step": 5440 }, { "epoch": 0.7702980109011114, "grad_norm": 9.392855311139211, "learning_rate": 3.520540688534453e-06, "loss": 1.1177, "step": 5441 }, { "epoch": 0.7704395837757486, "grad_norm": 9.655965165069878, "learning_rate": 3.520017396208993e-06, "loss": 1.0251, "step": 5442 }, { "epoch": 0.7705811566503857, "grad_norm": 8.85842422146851, "learning_rate": 3.519494050261979e-06, "loss": 1.0949, "step": 5443 }, { "epoch": 0.770722729525023, "grad_norm": 7.290062806077375, "learning_rate": 3.518970650720923e-06, "loss": 1.0549, "step": 5444 }, { "epoch": 0.7708643023996602, "grad_norm": 7.716936771068045, "learning_rate": 3.5184471976133396e-06, "loss": 1.2266, "step": 5445 }, { "epoch": 0.7710058752742974, "grad_norm": 9.364501364366532, "learning_rate": 3.517923690966747e-06, "loss": 1.2178, "step": 5446 }, { "epoch": 0.7711474481489347, "grad_norm": 10.293645094595796, "learning_rate": 3.5174001308086643e-06, "loss": 1.0354, "step": 5447 }, { "epoch": 0.7712890210235719, "grad_norm": 8.007401380452698, "learning_rate": 3.516876517166615e-06, "loss": 1.0228, "step": 5448 }, { "epoch": 0.7714305938982091, "grad_norm": 7.73282911476544, "learning_rate": 3.5163528500681266e-06, "loss": 0.9658, "step": 5449 }, { "epoch": 0.7715721667728463, "grad_norm": 8.828659932631604, "learning_rate": 3.515829129540726e-06, "loss": 1.193, "step": 5450 }, { "epoch": 0.7717137396474836, "grad_norm": 8.917685144554182, "learning_rate": 3.5153053556119454e-06, "loss": 1.2118, "step": 5451 }, { "epoch": 0.7718553125221208, "grad_norm": 7.91396609640198, "learning_rate": 3.51478152830932e-06, "loss": 1.1391, "step": 5452 }, { "epoch": 0.7719968853967579, "grad_norm": 7.9778223191732485, "learning_rate": 3.514257647660385e-06, "loss": 1.1796, "step": 5453 }, { "epoch": 0.7721384582713952, "grad_norm": 6.999600262808679, "learning_rate": 3.5137337136926825e-06, "loss": 0.9797, "step": 5454 }, { "epoch": 0.7722800311460324, "grad_norm": 8.406700150586733, "learning_rate": 3.5132097264337546e-06, "loss": 1.1291, "step": 5455 }, { "epoch": 0.7724216040206696, "grad_norm": 8.155859459190454, "learning_rate": 3.512685685911147e-06, "loss": 1.0174, "step": 5456 }, { "epoch": 0.7725631768953068, "grad_norm": 7.528401322794288, "learning_rate": 3.5121615921524084e-06, "loss": 1.0402, "step": 5457 }, { "epoch": 0.7727047497699441, "grad_norm": 7.082644200928995, "learning_rate": 3.5116374451850887e-06, "loss": 0.9703, "step": 5458 }, { "epoch": 0.7728463226445813, "grad_norm": 8.460319348523665, "learning_rate": 3.511113245036743e-06, "loss": 1.049, "step": 5459 }, { "epoch": 0.7729878955192185, "grad_norm": 9.224545070109869, "learning_rate": 3.510588991734928e-06, "loss": 1.0129, "step": 5460 }, { "epoch": 0.7731294683938558, "grad_norm": 7.912785057007326, "learning_rate": 3.510064685307203e-06, "loss": 0.991, "step": 5461 }, { "epoch": 0.773271041268493, "grad_norm": 8.99603692855481, "learning_rate": 3.5095403257811313e-06, "loss": 1.0889, "step": 5462 }, { "epoch": 0.7734126141431302, "grad_norm": 9.347736643641964, "learning_rate": 3.5090159131842773e-06, "loss": 1.2529, "step": 5463 }, { "epoch": 0.7735541870177673, "grad_norm": 9.812360823306378, "learning_rate": 3.5084914475442085e-06, "loss": 1.2245, "step": 5464 }, { "epoch": 0.7736957598924046, "grad_norm": 8.406595329367192, "learning_rate": 3.5079669288884965e-06, "loss": 1.1618, "step": 5465 }, { "epoch": 0.7738373327670418, "grad_norm": 8.107709592356178, "learning_rate": 3.507442357244715e-06, "loss": 1.0158, "step": 5466 }, { "epoch": 0.773978905641679, "grad_norm": 7.684818947817976, "learning_rate": 3.5069177326404393e-06, "loss": 1.0989, "step": 5467 }, { "epoch": 0.7741204785163163, "grad_norm": 8.747996509608866, "learning_rate": 3.5063930551032494e-06, "loss": 1.0078, "step": 5468 }, { "epoch": 0.7742620513909535, "grad_norm": 8.911378902851977, "learning_rate": 3.5058683246607273e-06, "loss": 1.0455, "step": 5469 }, { "epoch": 0.7744036242655907, "grad_norm": 9.134386643236892, "learning_rate": 3.505343541340457e-06, "loss": 1.1449, "step": 5470 }, { "epoch": 0.774545197140228, "grad_norm": 8.561276536394393, "learning_rate": 3.5048187051700265e-06, "loss": 1.0411, "step": 5471 }, { "epoch": 0.7746867700148652, "grad_norm": 9.22954752002049, "learning_rate": 3.5042938161770257e-06, "loss": 1.1085, "step": 5472 }, { "epoch": 0.7748283428895024, "grad_norm": 9.330266403205492, "learning_rate": 3.5037688743890484e-06, "loss": 1.0362, "step": 5473 }, { "epoch": 0.7749699157641395, "grad_norm": 7.282007268074849, "learning_rate": 3.50324387983369e-06, "loss": 1.0555, "step": 5474 }, { "epoch": 0.7751114886387768, "grad_norm": 9.697003041195616, "learning_rate": 3.502718832538548e-06, "loss": 1.0411, "step": 5475 }, { "epoch": 0.775253061513414, "grad_norm": 7.5101503985465605, "learning_rate": 3.502193732531225e-06, "loss": 1.0867, "step": 5476 }, { "epoch": 0.7753946343880512, "grad_norm": 9.167008780828835, "learning_rate": 3.5016685798393244e-06, "loss": 1.2577, "step": 5477 }, { "epoch": 0.7755362072626885, "grad_norm": 9.843605839718206, "learning_rate": 3.5011433744904543e-06, "loss": 1.058, "step": 5478 }, { "epoch": 0.7756777801373257, "grad_norm": 8.862544816392106, "learning_rate": 3.5006181165122233e-06, "loss": 1.1088, "step": 5479 }, { "epoch": 0.7758193530119629, "grad_norm": 7.780301323475976, "learning_rate": 3.500092805932244e-06, "loss": 1.0589, "step": 5480 }, { "epoch": 0.7759609258866002, "grad_norm": 9.325483654253123, "learning_rate": 3.499567442778131e-06, "loss": 0.9913, "step": 5481 }, { "epoch": 0.7761024987612374, "grad_norm": 9.4369785594254, "learning_rate": 3.4990420270775026e-06, "loss": 1.1466, "step": 5482 }, { "epoch": 0.7762440716358746, "grad_norm": 7.591077294975399, "learning_rate": 3.4985165588579806e-06, "loss": 0.9708, "step": 5483 }, { "epoch": 0.7763856445105118, "grad_norm": 10.080688339830031, "learning_rate": 3.497991038147187e-06, "loss": 1.1013, "step": 5484 }, { "epoch": 0.776527217385149, "grad_norm": 9.197451644898923, "learning_rate": 3.497465464972749e-06, "loss": 1.136, "step": 5485 }, { "epoch": 0.7766687902597862, "grad_norm": 7.677643698984388, "learning_rate": 3.496939839362295e-06, "loss": 1.0587, "step": 5486 }, { "epoch": 0.7768103631344234, "grad_norm": 7.9458266910497874, "learning_rate": 3.496414161343457e-06, "loss": 1.088, "step": 5487 }, { "epoch": 0.7769519360090607, "grad_norm": 8.60714510028405, "learning_rate": 3.49588843094387e-06, "loss": 1.1992, "step": 5488 }, { "epoch": 0.7770935088836979, "grad_norm": 9.565328341809344, "learning_rate": 3.4953626481911707e-06, "loss": 1.3203, "step": 5489 }, { "epoch": 0.7772350817583351, "grad_norm": 8.435930572354305, "learning_rate": 3.4948368131129984e-06, "loss": 1.0031, "step": 5490 }, { "epoch": 0.7773766546329723, "grad_norm": 8.152261394528917, "learning_rate": 3.4943109257369973e-06, "loss": 1.1636, "step": 5491 }, { "epoch": 0.7775182275076096, "grad_norm": 9.086575224153444, "learning_rate": 3.493784986090812e-06, "loss": 1.099, "step": 5492 }, { "epoch": 0.7776598003822468, "grad_norm": 8.970408216562797, "learning_rate": 3.4932589942020912e-06, "loss": 1.0213, "step": 5493 }, { "epoch": 0.777801373256884, "grad_norm": 8.994320666968346, "learning_rate": 3.4927329500984857e-06, "loss": 1.2645, "step": 5494 }, { "epoch": 0.7779429461315212, "grad_norm": 6.669166318845837, "learning_rate": 3.4922068538076493e-06, "loss": 1.0621, "step": 5495 }, { "epoch": 0.7780845190061584, "grad_norm": 7.564670219822406, "learning_rate": 3.4916807053572376e-06, "loss": 1.2021, "step": 5496 }, { "epoch": 0.7782260918807956, "grad_norm": 7.747232496746343, "learning_rate": 3.4911545047749113e-06, "loss": 1.0978, "step": 5497 }, { "epoch": 0.7783676647554328, "grad_norm": 8.590786509627852, "learning_rate": 3.4906282520883312e-06, "loss": 1.063, "step": 5498 }, { "epoch": 0.7785092376300701, "grad_norm": 7.29492710424846, "learning_rate": 3.4901019473251635e-06, "loss": 1.1594, "step": 5499 }, { "epoch": 0.7786508105047073, "grad_norm": 7.360521231314455, "learning_rate": 3.489575590513074e-06, "loss": 1.0605, "step": 5500 }, { "epoch": 0.7787923833793445, "grad_norm": 8.32854960148257, "learning_rate": 3.4890491816797333e-06, "loss": 1.0657, "step": 5501 }, { "epoch": 0.7789339562539818, "grad_norm": 8.146947239832432, "learning_rate": 3.4885227208528148e-06, "loss": 1.089, "step": 5502 }, { "epoch": 0.779075529128619, "grad_norm": 7.29343164790325, "learning_rate": 3.487996208059994e-06, "loss": 1.091, "step": 5503 }, { "epoch": 0.7792171020032562, "grad_norm": 8.940322963675541, "learning_rate": 3.48746964332895e-06, "loss": 0.9906, "step": 5504 }, { "epoch": 0.7793586748778933, "grad_norm": 9.70007885773056, "learning_rate": 3.486943026687362e-06, "loss": 1.2531, "step": 5505 }, { "epoch": 0.7795002477525306, "grad_norm": 8.834194021459403, "learning_rate": 3.486416358162916e-06, "loss": 1.1038, "step": 5506 }, { "epoch": 0.7796418206271678, "grad_norm": 7.975374228567168, "learning_rate": 3.4858896377832966e-06, "loss": 1.0694, "step": 5507 }, { "epoch": 0.779783393501805, "grad_norm": 8.756992271051397, "learning_rate": 3.4853628655761946e-06, "loss": 1.0048, "step": 5508 }, { "epoch": 0.7799249663764423, "grad_norm": 9.087740141202511, "learning_rate": 3.4848360415693013e-06, "loss": 1.1079, "step": 5509 }, { "epoch": 0.7800665392510795, "grad_norm": 10.677743127797308, "learning_rate": 3.484309165790312e-06, "loss": 1.2054, "step": 5510 }, { "epoch": 0.7802081121257167, "grad_norm": 8.025012492232538, "learning_rate": 3.4837822382669235e-06, "loss": 1.0353, "step": 5511 }, { "epoch": 0.780349685000354, "grad_norm": 7.862940682826027, "learning_rate": 3.4832552590268363e-06, "loss": 1.1313, "step": 5512 }, { "epoch": 0.7804912578749912, "grad_norm": 8.409919023157965, "learning_rate": 3.4827282280977527e-06, "loss": 1.3043, "step": 5513 }, { "epoch": 0.7806328307496284, "grad_norm": 8.160931292478015, "learning_rate": 3.4822011455073788e-06, "loss": 1.1889, "step": 5514 }, { "epoch": 0.7807744036242656, "grad_norm": 8.484169074100874, "learning_rate": 3.4816740112834248e-06, "loss": 0.9772, "step": 5515 }, { "epoch": 0.7809159764989028, "grad_norm": 7.085470737279751, "learning_rate": 3.4811468254535984e-06, "loss": 0.9391, "step": 5516 }, { "epoch": 0.78105754937354, "grad_norm": 7.150607487407266, "learning_rate": 3.4806195880456158e-06, "loss": 0.9471, "step": 5517 }, { "epoch": 0.7811991222481772, "grad_norm": 7.6817660152968426, "learning_rate": 3.4800922990871924e-06, "loss": 1.0284, "step": 5518 }, { "epoch": 0.7813406951228145, "grad_norm": 7.049226694103636, "learning_rate": 3.479564958606047e-06, "loss": 1.044, "step": 5519 }, { "epoch": 0.7814822679974517, "grad_norm": 9.088111623884446, "learning_rate": 3.4790375666299026e-06, "loss": 1.0306, "step": 5520 }, { "epoch": 0.7816238408720889, "grad_norm": 7.653075513761506, "learning_rate": 3.478510123186483e-06, "loss": 1.0986, "step": 5521 }, { "epoch": 0.7817654137467261, "grad_norm": 8.236339009137343, "learning_rate": 3.477982628303516e-06, "loss": 1.1194, "step": 5522 }, { "epoch": 0.7819069866213634, "grad_norm": 8.216023155944939, "learning_rate": 3.4774550820087317e-06, "loss": 1.0338, "step": 5523 }, { "epoch": 0.7820485594960006, "grad_norm": 11.186444962691244, "learning_rate": 3.476927484329863e-06, "loss": 1.1225, "step": 5524 }, { "epoch": 0.7821901323706378, "grad_norm": 8.230803379232073, "learning_rate": 3.4763998352946436e-06, "loss": 1.1302, "step": 5525 }, { "epoch": 0.782331705245275, "grad_norm": 7.800186585370991, "learning_rate": 3.4758721349308146e-06, "loss": 0.9985, "step": 5526 }, { "epoch": 0.7824732781199122, "grad_norm": 8.289483580109339, "learning_rate": 3.4753443832661134e-06, "loss": 1.186, "step": 5527 }, { "epoch": 0.7826148509945494, "grad_norm": 8.749987356994215, "learning_rate": 3.4748165803282856e-06, "loss": 0.8949, "step": 5528 }, { "epoch": 0.7827564238691866, "grad_norm": 8.847437652514808, "learning_rate": 3.4742887261450776e-06, "loss": 1.0504, "step": 5529 }, { "epoch": 0.7828979967438239, "grad_norm": 8.636508064340017, "learning_rate": 3.4737608207442373e-06, "loss": 1.0421, "step": 5530 }, { "epoch": 0.7830395696184611, "grad_norm": 12.691929837806876, "learning_rate": 3.4732328641535174e-06, "loss": 1.2091, "step": 5531 }, { "epoch": 0.7831811424930983, "grad_norm": 9.93079116174258, "learning_rate": 3.472704856400671e-06, "loss": 1.0713, "step": 5532 }, { "epoch": 0.7833227153677356, "grad_norm": 10.075942638589819, "learning_rate": 3.4721767975134557e-06, "loss": 1.1969, "step": 5533 }, { "epoch": 0.7834642882423728, "grad_norm": 8.728554302820436, "learning_rate": 3.471648687519631e-06, "loss": 0.9071, "step": 5534 }, { "epoch": 0.78360586111701, "grad_norm": 7.494948148032728, "learning_rate": 3.4711205264469583e-06, "loss": 1.0374, "step": 5535 }, { "epoch": 0.7837474339916471, "grad_norm": 6.860313125448372, "learning_rate": 3.470592314323205e-06, "loss": 1.0552, "step": 5536 }, { "epoch": 0.7838890068662844, "grad_norm": 10.512057692649606, "learning_rate": 3.4700640511761373e-06, "loss": 1.1439, "step": 5537 }, { "epoch": 0.7840305797409216, "grad_norm": 7.879672405191396, "learning_rate": 3.4695357370335255e-06, "loss": 1.0787, "step": 5538 }, { "epoch": 0.7841721526155588, "grad_norm": 7.797742409321361, "learning_rate": 3.4690073719231426e-06, "loss": 1.0996, "step": 5539 }, { "epoch": 0.7843137254901961, "grad_norm": 8.046609048244669, "learning_rate": 3.468478955872765e-06, "loss": 1.1234, "step": 5540 }, { "epoch": 0.7844552983648333, "grad_norm": 9.432327166525088, "learning_rate": 3.4679504889101704e-06, "loss": 1.1015, "step": 5541 }, { "epoch": 0.7845968712394705, "grad_norm": 8.537838358618101, "learning_rate": 3.4674219710631406e-06, "loss": 1.0584, "step": 5542 }, { "epoch": 0.7847384441141078, "grad_norm": 9.552774906949894, "learning_rate": 3.466893402359459e-06, "loss": 1.0017, "step": 5543 }, { "epoch": 0.784880016988745, "grad_norm": 8.813055359135308, "learning_rate": 3.4663647828269124e-06, "loss": 1.0845, "step": 5544 }, { "epoch": 0.7850215898633822, "grad_norm": 8.32406477156978, "learning_rate": 3.46583611249329e-06, "loss": 1.0721, "step": 5545 }, { "epoch": 0.7851631627380194, "grad_norm": 11.28727852740313, "learning_rate": 3.465307391386383e-06, "loss": 1.1545, "step": 5546 }, { "epoch": 0.7853047356126566, "grad_norm": 8.83144857584269, "learning_rate": 3.464778619533987e-06, "loss": 1.1652, "step": 5547 }, { "epoch": 0.7854463084872938, "grad_norm": 6.646001672157355, "learning_rate": 3.4642497969638973e-06, "loss": 1.1399, "step": 5548 }, { "epoch": 0.785587881361931, "grad_norm": 8.787966945608602, "learning_rate": 3.463720923703915e-06, "loss": 1.084, "step": 5549 }, { "epoch": 0.7857294542365683, "grad_norm": 11.212643122105128, "learning_rate": 3.4631919997818415e-06, "loss": 1.2781, "step": 5550 }, { "epoch": 0.7858710271112055, "grad_norm": 9.393145687845257, "learning_rate": 3.4626630252254835e-06, "loss": 1.1633, "step": 5551 }, { "epoch": 0.7860125999858427, "grad_norm": 9.318087751163267, "learning_rate": 3.462134000062649e-06, "loss": 1.0613, "step": 5552 }, { "epoch": 0.78615417286048, "grad_norm": 8.009479152018631, "learning_rate": 3.4616049243211463e-06, "loss": 1.2113, "step": 5553 }, { "epoch": 0.7862957457351172, "grad_norm": 9.195273918647821, "learning_rate": 3.46107579802879e-06, "loss": 1.0006, "step": 5554 }, { "epoch": 0.7864373186097544, "grad_norm": 9.974978614726842, "learning_rate": 3.4605466212133957e-06, "loss": 1.1951, "step": 5555 }, { "epoch": 0.7865788914843916, "grad_norm": 9.001343520867541, "learning_rate": 3.460017393902782e-06, "loss": 1.088, "step": 5556 }, { "epoch": 0.7867204643590288, "grad_norm": 8.506492379215786, "learning_rate": 3.4594881161247694e-06, "loss": 1.0943, "step": 5557 }, { "epoch": 0.786862037233666, "grad_norm": 10.607415581555397, "learning_rate": 3.458958787907182e-06, "loss": 1.1138, "step": 5558 }, { "epoch": 0.7870036101083032, "grad_norm": 8.171722352332663, "learning_rate": 3.458429409277846e-06, "loss": 1.0265, "step": 5559 }, { "epoch": 0.7871451829829405, "grad_norm": 10.073426563463507, "learning_rate": 3.4578999802645905e-06, "loss": 1.2502, "step": 5560 }, { "epoch": 0.7872867558575777, "grad_norm": 10.478391665448054, "learning_rate": 3.457370500895247e-06, "loss": 1.0778, "step": 5561 }, { "epoch": 0.7874283287322149, "grad_norm": 8.433105228440192, "learning_rate": 3.4568409711976515e-06, "loss": 1.0737, "step": 5562 }, { "epoch": 0.7875699016068521, "grad_norm": 9.880156510692593, "learning_rate": 3.4563113911996395e-06, "loss": 1.2813, "step": 5563 }, { "epoch": 0.7877114744814894, "grad_norm": 7.274120851861101, "learning_rate": 3.455781760929049e-06, "loss": 1.039, "step": 5564 }, { "epoch": 0.7878530473561266, "grad_norm": 7.439753856073791, "learning_rate": 3.4552520804137248e-06, "loss": 1.0514, "step": 5565 }, { "epoch": 0.7879946202307638, "grad_norm": 8.995759600828517, "learning_rate": 3.4547223496815115e-06, "loss": 1.134, "step": 5566 }, { "epoch": 0.788136193105401, "grad_norm": 8.384537461591082, "learning_rate": 3.4541925687602553e-06, "loss": 1.0806, "step": 5567 }, { "epoch": 0.7882777659800382, "grad_norm": 9.10953454847949, "learning_rate": 3.453662737677808e-06, "loss": 1.0723, "step": 5568 }, { "epoch": 0.7884193388546754, "grad_norm": 8.70474135012961, "learning_rate": 3.4531328564620215e-06, "loss": 1.2189, "step": 5569 }, { "epoch": 0.7885609117293126, "grad_norm": 7.130515924863805, "learning_rate": 3.452602925140751e-06, "loss": 1.1064, "step": 5570 }, { "epoch": 0.7887024846039499, "grad_norm": 8.302242058960122, "learning_rate": 3.4520729437418553e-06, "loss": 1.1785, "step": 5571 }, { "epoch": 0.7888440574785871, "grad_norm": 9.245345723200614, "learning_rate": 3.4515429122931955e-06, "loss": 1.3302, "step": 5572 }, { "epoch": 0.7889856303532243, "grad_norm": 8.283347961699414, "learning_rate": 3.451012830822633e-06, "loss": 1.1548, "step": 5573 }, { "epoch": 0.7891272032278616, "grad_norm": 8.801105464511883, "learning_rate": 3.4504826993580364e-06, "loss": 1.0621, "step": 5574 }, { "epoch": 0.7892687761024988, "grad_norm": 7.088171021251792, "learning_rate": 3.449952517927272e-06, "loss": 1.1383, "step": 5575 }, { "epoch": 0.789410348977136, "grad_norm": 6.956101232756145, "learning_rate": 3.4494222865582126e-06, "loss": 1.0211, "step": 5576 }, { "epoch": 0.7895519218517733, "grad_norm": 8.04172032260948, "learning_rate": 3.4488920052787313e-06, "loss": 1.1414, "step": 5577 }, { "epoch": 0.7896934947264104, "grad_norm": 9.0790002502717, "learning_rate": 3.4483616741167046e-06, "loss": 1.0839, "step": 5578 }, { "epoch": 0.7898350676010476, "grad_norm": 9.102770758721755, "learning_rate": 3.4478312931000123e-06, "loss": 1.196, "step": 5579 }, { "epoch": 0.7899766404756848, "grad_norm": 6.761392092954525, "learning_rate": 3.4473008622565353e-06, "loss": 1.0538, "step": 5580 }, { "epoch": 0.7901182133503221, "grad_norm": 9.282293087692302, "learning_rate": 3.4467703816141584e-06, "loss": 1.0644, "step": 5581 }, { "epoch": 0.7902597862249593, "grad_norm": 10.202722702156978, "learning_rate": 3.4462398512007684e-06, "loss": 1.3302, "step": 5582 }, { "epoch": 0.7904013590995965, "grad_norm": 8.24405710430626, "learning_rate": 3.445709271044255e-06, "loss": 1.0942, "step": 5583 }, { "epoch": 0.7905429319742338, "grad_norm": 8.873005817577802, "learning_rate": 3.445178641172511e-06, "loss": 1.2696, "step": 5584 }, { "epoch": 0.790684504848871, "grad_norm": 7.99317092289336, "learning_rate": 3.44464796161343e-06, "loss": 1.1154, "step": 5585 }, { "epoch": 0.7908260777235082, "grad_norm": 8.201397697608375, "learning_rate": 3.44411723239491e-06, "loss": 1.1794, "step": 5586 }, { "epoch": 0.7909676505981454, "grad_norm": 6.6480651105682975, "learning_rate": 3.4435864535448504e-06, "loss": 1.0866, "step": 5587 }, { "epoch": 0.7911092234727826, "grad_norm": 7.786543795691807, "learning_rate": 3.443055625091155e-06, "loss": 1.0183, "step": 5588 }, { "epoch": 0.7912507963474198, "grad_norm": 8.05700685720636, "learning_rate": 3.4425247470617294e-06, "loss": 1.0649, "step": 5589 }, { "epoch": 0.791392369222057, "grad_norm": 9.02274161954158, "learning_rate": 3.44199381948448e-06, "loss": 1.0333, "step": 5590 }, { "epoch": 0.7915339420966943, "grad_norm": 9.211455171256619, "learning_rate": 3.441462842387318e-06, "loss": 1.0797, "step": 5591 }, { "epoch": 0.7916755149713315, "grad_norm": 10.2797772590463, "learning_rate": 3.4409318157981565e-06, "loss": 1.0155, "step": 5592 }, { "epoch": 0.7918170878459687, "grad_norm": 8.491802975425037, "learning_rate": 3.4404007397449104e-06, "loss": 1.0478, "step": 5593 }, { "epoch": 0.791958660720606, "grad_norm": 8.552918356593308, "learning_rate": 3.439869614255499e-06, "loss": 1.0001, "step": 5594 }, { "epoch": 0.7921002335952432, "grad_norm": 9.19697632236384, "learning_rate": 3.4393384393578427e-06, "loss": 1.0995, "step": 5595 }, { "epoch": 0.7922418064698804, "grad_norm": 6.842065891353363, "learning_rate": 3.438807215079865e-06, "loss": 1.0165, "step": 5596 }, { "epoch": 0.7923833793445176, "grad_norm": 8.080735985501741, "learning_rate": 3.438275941449492e-06, "loss": 1.1671, "step": 5597 }, { "epoch": 0.7925249522191548, "grad_norm": 8.029270031957685, "learning_rate": 3.437744618494653e-06, "loss": 1.0674, "step": 5598 }, { "epoch": 0.792666525093792, "grad_norm": 8.029914715267694, "learning_rate": 3.437213246243277e-06, "loss": 0.9947, "step": 5599 }, { "epoch": 0.7928080979684292, "grad_norm": 8.310728873961892, "learning_rate": 3.4366818247233015e-06, "loss": 1.1161, "step": 5600 }, { "epoch": 0.7929496708430664, "grad_norm": 6.755564621171947, "learning_rate": 3.4361503539626593e-06, "loss": 0.8917, "step": 5601 }, { "epoch": 0.7930912437177037, "grad_norm": 10.17824616259054, "learning_rate": 3.4356188339892915e-06, "loss": 0.9672, "step": 5602 }, { "epoch": 0.7932328165923409, "grad_norm": 8.033335849028177, "learning_rate": 3.4350872648311396e-06, "loss": 0.9801, "step": 5603 }, { "epoch": 0.7933743894669781, "grad_norm": 9.424805446407726, "learning_rate": 3.434555646516147e-06, "loss": 1.1814, "step": 5604 }, { "epoch": 0.7935159623416154, "grad_norm": 8.481218119528327, "learning_rate": 3.434023979072262e-06, "loss": 1.129, "step": 5605 }, { "epoch": 0.7936575352162526, "grad_norm": 8.595280403503061, "learning_rate": 3.4334922625274312e-06, "loss": 1.1037, "step": 5606 }, { "epoch": 0.7937991080908898, "grad_norm": 8.536521540546069, "learning_rate": 3.432960496909609e-06, "loss": 1.3211, "step": 5607 }, { "epoch": 0.7939406809655271, "grad_norm": 8.977304027928612, "learning_rate": 3.4324286822467496e-06, "loss": 1.2027, "step": 5608 }, { "epoch": 0.7940822538401642, "grad_norm": 7.395640826518232, "learning_rate": 3.431896818566809e-06, "loss": 0.9859, "step": 5609 }, { "epoch": 0.7942238267148014, "grad_norm": 8.207453285198213, "learning_rate": 3.4313649058977473e-06, "loss": 1.0337, "step": 5610 }, { "epoch": 0.7943653995894386, "grad_norm": 8.55188577836555, "learning_rate": 3.4308329442675276e-06, "loss": 1.0339, "step": 5611 }, { "epoch": 0.7945069724640759, "grad_norm": 8.151771455864814, "learning_rate": 3.430300933704114e-06, "loss": 1.1543, "step": 5612 }, { "epoch": 0.7946485453387131, "grad_norm": 7.662719235543194, "learning_rate": 3.4297688742354728e-06, "loss": 1.0397, "step": 5613 }, { "epoch": 0.7947901182133503, "grad_norm": 6.899820916989327, "learning_rate": 3.4292367658895764e-06, "loss": 1.1051, "step": 5614 }, { "epoch": 0.7949316910879876, "grad_norm": 7.1989045263425, "learning_rate": 3.4287046086943956e-06, "loss": 1.0892, "step": 5615 }, { "epoch": 0.7950732639626248, "grad_norm": 8.472056063775423, "learning_rate": 3.428172402677906e-06, "loss": 0.9257, "step": 5616 }, { "epoch": 0.795214836837262, "grad_norm": 7.430576467831221, "learning_rate": 3.4276401478680856e-06, "loss": 1.171, "step": 5617 }, { "epoch": 0.7953564097118992, "grad_norm": 8.549084477179367, "learning_rate": 3.427107844292914e-06, "loss": 1.0454, "step": 5618 }, { "epoch": 0.7954979825865364, "grad_norm": 9.238531890759594, "learning_rate": 3.426575491980374e-06, "loss": 0.9256, "step": 5619 }, { "epoch": 0.7956395554611736, "grad_norm": 8.599081265367738, "learning_rate": 3.426043090958452e-06, "loss": 1.053, "step": 5620 }, { "epoch": 0.7957811283358108, "grad_norm": 6.898132524844842, "learning_rate": 3.4255106412551352e-06, "loss": 1.0283, "step": 5621 }, { "epoch": 0.7959227012104481, "grad_norm": 8.042850650996716, "learning_rate": 3.4249781428984143e-06, "loss": 1.0174, "step": 5622 }, { "epoch": 0.7960642740850853, "grad_norm": 7.962158828205225, "learning_rate": 3.424445595916281e-06, "loss": 1.0444, "step": 5623 }, { "epoch": 0.7962058469597225, "grad_norm": 10.508671767260205, "learning_rate": 3.423913000336732e-06, "loss": 1.3066, "step": 5624 }, { "epoch": 0.7963474198343597, "grad_norm": 9.144215040186438, "learning_rate": 3.423380356187766e-06, "loss": 0.9956, "step": 5625 }, { "epoch": 0.796488992708997, "grad_norm": 9.706414247242002, "learning_rate": 3.422847663497384e-06, "loss": 1.1917, "step": 5626 }, { "epoch": 0.7966305655836342, "grad_norm": 8.04232843341152, "learning_rate": 3.4223149222935875e-06, "loss": 1.1068, "step": 5627 }, { "epoch": 0.7967721384582714, "grad_norm": 7.738063603151374, "learning_rate": 3.421782132604383e-06, "loss": 1.1233, "step": 5628 }, { "epoch": 0.7969137113329087, "grad_norm": 8.862594315601369, "learning_rate": 3.4212492944577796e-06, "loss": 1.128, "step": 5629 }, { "epoch": 0.7970552842075458, "grad_norm": 8.70931047375294, "learning_rate": 3.420716407881788e-06, "loss": 1.2904, "step": 5630 }, { "epoch": 0.797196857082183, "grad_norm": 8.208125800851109, "learning_rate": 3.4201834729044208e-06, "loss": 1.1093, "step": 5631 }, { "epoch": 0.7973384299568202, "grad_norm": 7.552112602089475, "learning_rate": 3.4196504895536948e-06, "loss": 0.9763, "step": 5632 }, { "epoch": 0.7974800028314575, "grad_norm": 8.616576573425384, "learning_rate": 3.419117457857628e-06, "loss": 1.0073, "step": 5633 }, { "epoch": 0.7976215757060947, "grad_norm": 8.460294098487957, "learning_rate": 3.4185843778442417e-06, "loss": 0.9023, "step": 5634 }, { "epoch": 0.7977631485807319, "grad_norm": 9.266678902963294, "learning_rate": 3.4180512495415603e-06, "loss": 1.0352, "step": 5635 }, { "epoch": 0.7979047214553692, "grad_norm": 9.705421065522197, "learning_rate": 3.417518072977609e-06, "loss": 1.1209, "step": 5636 }, { "epoch": 0.7980462943300064, "grad_norm": 9.603604402998744, "learning_rate": 3.4169848481804165e-06, "loss": 1.1866, "step": 5637 }, { "epoch": 0.7981878672046436, "grad_norm": 9.494818227915285, "learning_rate": 3.416451575178014e-06, "loss": 1.0623, "step": 5638 }, { "epoch": 0.7983294400792809, "grad_norm": 9.365995838764707, "learning_rate": 3.4159182539984352e-06, "loss": 1.2038, "step": 5639 }, { "epoch": 0.798471012953918, "grad_norm": 7.46864248541535, "learning_rate": 3.4153848846697174e-06, "loss": 1.0498, "step": 5640 }, { "epoch": 0.7986125858285552, "grad_norm": 9.43451122932346, "learning_rate": 3.4148514672198986e-06, "loss": 1.0165, "step": 5641 }, { "epoch": 0.7987541587031924, "grad_norm": 10.528457588339002, "learning_rate": 3.414318001677021e-06, "loss": 1.197, "step": 5642 }, { "epoch": 0.7988957315778297, "grad_norm": 9.2588899524649, "learning_rate": 3.4137844880691275e-06, "loss": 1.1126, "step": 5643 }, { "epoch": 0.7990373044524669, "grad_norm": 9.128919621834452, "learning_rate": 3.413250926424264e-06, "loss": 1.0147, "step": 5644 }, { "epoch": 0.7991788773271041, "grad_norm": 9.05768367773776, "learning_rate": 3.4127173167704807e-06, "loss": 1.0413, "step": 5645 }, { "epoch": 0.7993204502017414, "grad_norm": 8.805113815100574, "learning_rate": 3.4121836591358288e-06, "loss": 1.1925, "step": 5646 }, { "epoch": 0.7994620230763786, "grad_norm": 7.979427589021502, "learning_rate": 3.4116499535483623e-06, "loss": 1.1524, "step": 5647 }, { "epoch": 0.7996035959510158, "grad_norm": 8.97626204689194, "learning_rate": 3.4111162000361363e-06, "loss": 1.0222, "step": 5648 }, { "epoch": 0.799745168825653, "grad_norm": 9.354959455735607, "learning_rate": 3.4105823986272125e-06, "loss": 1.1091, "step": 5649 }, { "epoch": 0.7998867417002902, "grad_norm": 9.302246593539204, "learning_rate": 3.41004854934965e-06, "loss": 1.1077, "step": 5650 }, { "epoch": 0.8000283145749274, "grad_norm": 10.476553032946878, "learning_rate": 3.4095146522315144e-06, "loss": 1.1277, "step": 5651 }, { "epoch": 0.8001698874495646, "grad_norm": 7.899822049611456, "learning_rate": 3.408980707300871e-06, "loss": 1.1201, "step": 5652 }, { "epoch": 0.8003114603242019, "grad_norm": 8.940622491179523, "learning_rate": 3.4084467145857903e-06, "loss": 0.9897, "step": 5653 }, { "epoch": 0.8004530331988391, "grad_norm": 10.33098522292378, "learning_rate": 3.4079126741143427e-06, "loss": 0.9701, "step": 5654 }, { "epoch": 0.8005946060734763, "grad_norm": 9.046390922390161, "learning_rate": 3.407378585914603e-06, "loss": 1.0497, "step": 5655 }, { "epoch": 0.8007361789481136, "grad_norm": 8.858791110057886, "learning_rate": 3.4068444500146476e-06, "loss": 1.0976, "step": 5656 }, { "epoch": 0.8008777518227508, "grad_norm": 7.35658758874845, "learning_rate": 3.4063102664425557e-06, "loss": 1.1224, "step": 5657 }, { "epoch": 0.801019324697388, "grad_norm": 8.576945572948922, "learning_rate": 3.405776035226409e-06, "loss": 1.1077, "step": 5658 }, { "epoch": 0.8011608975720252, "grad_norm": 8.429764429966388, "learning_rate": 3.405241756394291e-06, "loss": 1.0322, "step": 5659 }, { "epoch": 0.8013024704466625, "grad_norm": 8.386803806479493, "learning_rate": 3.4047074299742894e-06, "loss": 1.0047, "step": 5660 }, { "epoch": 0.8014440433212996, "grad_norm": 11.035329782927592, "learning_rate": 3.4041730559944918e-06, "loss": 1.2271, "step": 5661 }, { "epoch": 0.8015856161959368, "grad_norm": 9.175725125546746, "learning_rate": 3.403638634482992e-06, "loss": 1.0866, "step": 5662 }, { "epoch": 0.801727189070574, "grad_norm": 8.57165705966677, "learning_rate": 3.403104165467883e-06, "loss": 0.9397, "step": 5663 }, { "epoch": 0.8018687619452113, "grad_norm": 9.282382677622602, "learning_rate": 3.4025696489772607e-06, "loss": 0.9473, "step": 5664 }, { "epoch": 0.8020103348198485, "grad_norm": 9.205609178997625, "learning_rate": 3.402035085039225e-06, "loss": 1.0834, "step": 5665 }, { "epoch": 0.8021519076944857, "grad_norm": 7.606761444375054, "learning_rate": 3.401500473681878e-06, "loss": 1.0236, "step": 5666 }, { "epoch": 0.802293480569123, "grad_norm": 7.979967307881862, "learning_rate": 3.4009658149333223e-06, "loss": 1.1142, "step": 5667 }, { "epoch": 0.8024350534437602, "grad_norm": 8.72195167958836, "learning_rate": 3.4004311088216667e-06, "loss": 1.0677, "step": 5668 }, { "epoch": 0.8025766263183974, "grad_norm": 7.321897548847588, "learning_rate": 3.3998963553750186e-06, "loss": 1.1572, "step": 5669 }, { "epoch": 0.8027181991930347, "grad_norm": 9.406398429840108, "learning_rate": 3.3993615546214898e-06, "loss": 1.1326, "step": 5670 }, { "epoch": 0.8028597720676718, "grad_norm": 8.542249805424872, "learning_rate": 3.3988267065891945e-06, "loss": 1.1605, "step": 5671 }, { "epoch": 0.803001344942309, "grad_norm": 7.107219528584314, "learning_rate": 3.39829181130625e-06, "loss": 1.0001, "step": 5672 }, { "epoch": 0.8031429178169462, "grad_norm": 6.344656141987413, "learning_rate": 3.3977568688007745e-06, "loss": 0.9684, "step": 5673 }, { "epoch": 0.8032844906915835, "grad_norm": 7.933459116902936, "learning_rate": 3.3972218791008902e-06, "loss": 1.1227, "step": 5674 }, { "epoch": 0.8034260635662207, "grad_norm": 9.998105441870365, "learning_rate": 3.3966868422347204e-06, "loss": 1.0835, "step": 5675 }, { "epoch": 0.8035676364408579, "grad_norm": 7.47861713172167, "learning_rate": 3.3961517582303916e-06, "loss": 1.0923, "step": 5676 }, { "epoch": 0.8037092093154952, "grad_norm": 10.300772362055323, "learning_rate": 3.395616627116033e-06, "loss": 0.9826, "step": 5677 }, { "epoch": 0.8038507821901324, "grad_norm": 9.607904089024137, "learning_rate": 3.395081448919777e-06, "loss": 1.2588, "step": 5678 }, { "epoch": 0.8039923550647696, "grad_norm": 8.484638919554257, "learning_rate": 3.394546223669756e-06, "loss": 1.06, "step": 5679 }, { "epoch": 0.8041339279394069, "grad_norm": 7.587460020359044, "learning_rate": 3.394010951394107e-06, "loss": 1.0396, "step": 5680 }, { "epoch": 0.804275500814044, "grad_norm": 7.183511174501607, "learning_rate": 3.3934756321209693e-06, "loss": 0.9103, "step": 5681 }, { "epoch": 0.8044170736886812, "grad_norm": 8.457304141815403, "learning_rate": 3.3929402658784837e-06, "loss": 1.1483, "step": 5682 }, { "epoch": 0.8045586465633184, "grad_norm": 9.04444718273641, "learning_rate": 3.3924048526947937e-06, "loss": 1.1516, "step": 5683 }, { "epoch": 0.8047002194379557, "grad_norm": 8.21677157300894, "learning_rate": 3.3918693925980455e-06, "loss": 1.0619, "step": 5684 }, { "epoch": 0.8048417923125929, "grad_norm": 9.473839729549546, "learning_rate": 3.3913338856163897e-06, "loss": 1.0121, "step": 5685 }, { "epoch": 0.8049833651872301, "grad_norm": 7.940032795182537, "learning_rate": 3.390798331777976e-06, "loss": 1.1991, "step": 5686 }, { "epoch": 0.8051249380618674, "grad_norm": 9.822739622125898, "learning_rate": 3.390262731110957e-06, "loss": 1.1425, "step": 5687 }, { "epoch": 0.8052665109365046, "grad_norm": 8.251237718618956, "learning_rate": 3.3897270836434914e-06, "loss": 1.1266, "step": 5688 }, { "epoch": 0.8054080838111418, "grad_norm": 8.931927417561011, "learning_rate": 3.3891913894037354e-06, "loss": 0.9794, "step": 5689 }, { "epoch": 0.805549656685779, "grad_norm": 8.088194601191187, "learning_rate": 3.3886556484198517e-06, "loss": 0.9515, "step": 5690 }, { "epoch": 0.8056912295604163, "grad_norm": 8.388692111078619, "learning_rate": 3.388119860720003e-06, "loss": 1.0752, "step": 5691 }, { "epoch": 0.8058328024350534, "grad_norm": 9.890582959897818, "learning_rate": 3.3875840263323552e-06, "loss": 1.0595, "step": 5692 }, { "epoch": 0.8059743753096906, "grad_norm": 8.680696087187103, "learning_rate": 3.3870481452850765e-06, "loss": 1.2476, "step": 5693 }, { "epoch": 0.8061159481843279, "grad_norm": 8.188203970660277, "learning_rate": 3.386512217606339e-06, "loss": 1.0736, "step": 5694 }, { "epoch": 0.8062575210589651, "grad_norm": 9.531986795932527, "learning_rate": 3.385976243324316e-06, "loss": 1.3296, "step": 5695 }, { "epoch": 0.8063990939336023, "grad_norm": 7.544067871678398, "learning_rate": 3.3854402224671813e-06, "loss": 1.0104, "step": 5696 }, { "epoch": 0.8065406668082395, "grad_norm": 8.829275626757935, "learning_rate": 3.3849041550631145e-06, "loss": 1.0883, "step": 5697 }, { "epoch": 0.8066822396828768, "grad_norm": 8.083453724806851, "learning_rate": 3.384368041140296e-06, "loss": 1.0983, "step": 5698 }, { "epoch": 0.806823812557514, "grad_norm": 7.7398977579466, "learning_rate": 3.383831880726909e-06, "loss": 1.0529, "step": 5699 }, { "epoch": 0.8069653854321512, "grad_norm": 8.799516352454228, "learning_rate": 3.3832956738511395e-06, "loss": 1.1258, "step": 5700 }, { "epoch": 0.8071069583067885, "grad_norm": 8.40038858150696, "learning_rate": 3.3827594205411746e-06, "loss": 1.0278, "step": 5701 }, { "epoch": 0.8072485311814256, "grad_norm": 7.898149645471756, "learning_rate": 3.3822231208252053e-06, "loss": 0.9506, "step": 5702 }, { "epoch": 0.8073901040560628, "grad_norm": 7.586085089240772, "learning_rate": 3.3816867747314242e-06, "loss": 1.0962, "step": 5703 }, { "epoch": 0.8075316769307, "grad_norm": 7.717991594747418, "learning_rate": 3.381150382288027e-06, "loss": 1.0869, "step": 5704 }, { "epoch": 0.8076732498053373, "grad_norm": 8.499469516132407, "learning_rate": 3.380613943523211e-06, "loss": 1.1324, "step": 5705 }, { "epoch": 0.8078148226799745, "grad_norm": 10.472827083299867, "learning_rate": 3.3800774584651767e-06, "loss": 1.0636, "step": 5706 }, { "epoch": 0.8079563955546117, "grad_norm": 8.208595181337317, "learning_rate": 3.379540927142127e-06, "loss": 1.0807, "step": 5707 }, { "epoch": 0.808097968429249, "grad_norm": 7.869329681738628, "learning_rate": 3.3790043495822663e-06, "loss": 1.1245, "step": 5708 }, { "epoch": 0.8082395413038862, "grad_norm": 9.183609949874057, "learning_rate": 3.378467725813802e-06, "loss": 1.0552, "step": 5709 }, { "epoch": 0.8083811141785234, "grad_norm": 9.053905542162799, "learning_rate": 3.3779310558649447e-06, "loss": 0.9911, "step": 5710 }, { "epoch": 0.8085226870531607, "grad_norm": 9.724475413003288, "learning_rate": 3.3773943397639068e-06, "loss": 1.0769, "step": 5711 }, { "epoch": 0.8086642599277978, "grad_norm": 9.138421626128581, "learning_rate": 3.3768575775389022e-06, "loss": 1.1712, "step": 5712 }, { "epoch": 0.808805832802435, "grad_norm": 7.149150675233896, "learning_rate": 3.3763207692181483e-06, "loss": 1.0057, "step": 5713 }, { "epoch": 0.8089474056770722, "grad_norm": 8.62968428192272, "learning_rate": 3.375783914829865e-06, "loss": 1.018, "step": 5714 }, { "epoch": 0.8090889785517095, "grad_norm": 11.165902714194297, "learning_rate": 3.3752470144022745e-06, "loss": 1.0394, "step": 5715 }, { "epoch": 0.8092305514263467, "grad_norm": 10.898619860145459, "learning_rate": 3.374710067963602e-06, "loss": 1.0865, "step": 5716 }, { "epoch": 0.8093721243009839, "grad_norm": 8.083050699962488, "learning_rate": 3.374173075542072e-06, "loss": 1.1117, "step": 5717 }, { "epoch": 0.8095136971756212, "grad_norm": 10.511124666763967, "learning_rate": 3.373636037165916e-06, "loss": 1.0239, "step": 5718 }, { "epoch": 0.8096552700502584, "grad_norm": 10.529111197873613, "learning_rate": 3.373098952863365e-06, "loss": 1.0553, "step": 5719 }, { "epoch": 0.8097968429248956, "grad_norm": 8.875026219288955, "learning_rate": 3.372561822662652e-06, "loss": 1.0346, "step": 5720 }, { "epoch": 0.8099384157995329, "grad_norm": 8.839645673460232, "learning_rate": 3.3720246465920154e-06, "loss": 1.1067, "step": 5721 }, { "epoch": 0.8100799886741701, "grad_norm": 7.325761035534524, "learning_rate": 3.3714874246796935e-06, "loss": 1.1573, "step": 5722 }, { "epoch": 0.8102215615488072, "grad_norm": 9.152232463712021, "learning_rate": 3.3709501569539277e-06, "loss": 1.1517, "step": 5723 }, { "epoch": 0.8103631344234444, "grad_norm": 9.34180402107452, "learning_rate": 3.370412843442961e-06, "loss": 1.1541, "step": 5724 }, { "epoch": 0.8105047072980817, "grad_norm": 8.811337658135885, "learning_rate": 3.3698754841750403e-06, "loss": 1.017, "step": 5725 }, { "epoch": 0.8106462801727189, "grad_norm": 8.301811057443796, "learning_rate": 3.369338079178414e-06, "loss": 1.0758, "step": 5726 }, { "epoch": 0.8107878530473561, "grad_norm": 7.573952570345407, "learning_rate": 3.368800628481333e-06, "loss": 1.0144, "step": 5727 }, { "epoch": 0.8109294259219934, "grad_norm": 9.863861755813135, "learning_rate": 3.3682631321120507e-06, "loss": 0.989, "step": 5728 }, { "epoch": 0.8110709987966306, "grad_norm": 8.977082638324797, "learning_rate": 3.3677255900988236e-06, "loss": 1.2229, "step": 5729 }, { "epoch": 0.8112125716712678, "grad_norm": 8.192382288655514, "learning_rate": 3.3671880024699085e-06, "loss": 1.2535, "step": 5730 }, { "epoch": 0.811354144545905, "grad_norm": 7.819287578785301, "learning_rate": 3.3666503692535667e-06, "loss": 0.9993, "step": 5731 }, { "epoch": 0.8114957174205423, "grad_norm": 9.803033429327062, "learning_rate": 3.3661126904780624e-06, "loss": 1.1904, "step": 5732 }, { "epoch": 0.8116372902951794, "grad_norm": 8.512845374899813, "learning_rate": 3.3655749661716585e-06, "loss": 1.1013, "step": 5733 }, { "epoch": 0.8117788631698166, "grad_norm": 7.339724991970998, "learning_rate": 3.3650371963626243e-06, "loss": 1.0624, "step": 5734 }, { "epoch": 0.8119204360444539, "grad_norm": 8.166833317931198, "learning_rate": 3.3644993810792297e-06, "loss": 1.021, "step": 5735 }, { "epoch": 0.8120620089190911, "grad_norm": 7.701009154142559, "learning_rate": 3.3639615203497467e-06, "loss": 1.1175, "step": 5736 }, { "epoch": 0.8122035817937283, "grad_norm": 9.55951918474235, "learning_rate": 3.3634236142024516e-06, "loss": 1.0247, "step": 5737 }, { "epoch": 0.8123451546683655, "grad_norm": 8.829910286850492, "learning_rate": 3.362885662665621e-06, "loss": 0.9459, "step": 5738 }, { "epoch": 0.8124867275430028, "grad_norm": 8.693800583122526, "learning_rate": 3.3623476657675342e-06, "loss": 0.9748, "step": 5739 }, { "epoch": 0.81262830041764, "grad_norm": 7.033061018940656, "learning_rate": 3.3618096235364734e-06, "loss": 0.9157, "step": 5740 }, { "epoch": 0.8127698732922772, "grad_norm": 9.180577583177552, "learning_rate": 3.361271536000723e-06, "loss": 1.0477, "step": 5741 }, { "epoch": 0.8129114461669145, "grad_norm": 8.985057294065337, "learning_rate": 3.3607334031885707e-06, "loss": 1.257, "step": 5742 }, { "epoch": 0.8130530190415516, "grad_norm": 8.11636645044506, "learning_rate": 3.3601952251283056e-06, "loss": 1.1328, "step": 5743 }, { "epoch": 0.8131945919161888, "grad_norm": 8.414492285332601, "learning_rate": 3.359657001848218e-06, "loss": 1.1496, "step": 5744 }, { "epoch": 0.813336164790826, "grad_norm": 9.590582408543217, "learning_rate": 3.359118733376603e-06, "loss": 1.0317, "step": 5745 }, { "epoch": 0.8134777376654633, "grad_norm": 11.106842997213299, "learning_rate": 3.358580419741757e-06, "loss": 1.1252, "step": 5746 }, { "epoch": 0.8136193105401005, "grad_norm": 8.241684392968514, "learning_rate": 3.3580420609719783e-06, "loss": 1.1352, "step": 5747 }, { "epoch": 0.8137608834147377, "grad_norm": 10.516125603333109, "learning_rate": 3.3575036570955687e-06, "loss": 1.2536, "step": 5748 }, { "epoch": 0.813902456289375, "grad_norm": 7.487854087239355, "learning_rate": 3.356965208140831e-06, "loss": 1.0901, "step": 5749 }, { "epoch": 0.8140440291640122, "grad_norm": 9.797004662344706, "learning_rate": 3.3564267141360706e-06, "loss": 1.09, "step": 5750 }, { "epoch": 0.8141856020386494, "grad_norm": 9.294170595360518, "learning_rate": 3.3558881751095975e-06, "loss": 1.0247, "step": 5751 }, { "epoch": 0.8143271749132867, "grad_norm": 8.018335787510368, "learning_rate": 3.3553495910897206e-06, "loss": 1.1492, "step": 5752 }, { "epoch": 0.8144687477879239, "grad_norm": 9.50666535101512, "learning_rate": 3.354810962104754e-06, "loss": 1.2007, "step": 5753 }, { "epoch": 0.814610320662561, "grad_norm": 10.577457468898075, "learning_rate": 3.354272288183012e-06, "loss": 1.11, "step": 5754 }, { "epoch": 0.8147518935371982, "grad_norm": 8.606391181979012, "learning_rate": 3.353733569352813e-06, "loss": 1.0576, "step": 5755 }, { "epoch": 0.8148934664118355, "grad_norm": 8.463132452046281, "learning_rate": 3.3531948056424766e-06, "loss": 1.1322, "step": 5756 }, { "epoch": 0.8150350392864727, "grad_norm": 8.76019608578631, "learning_rate": 3.352655997080325e-06, "loss": 1.0418, "step": 5757 }, { "epoch": 0.8151766121611099, "grad_norm": 9.960197582812897, "learning_rate": 3.3521171436946844e-06, "loss": 1.1198, "step": 5758 }, { "epoch": 0.8153181850357472, "grad_norm": 9.103758871924526, "learning_rate": 3.35157824551388e-06, "loss": 1.1243, "step": 5759 }, { "epoch": 0.8154597579103844, "grad_norm": 8.4181554503658, "learning_rate": 3.351039302566243e-06, "loss": 1.0576, "step": 5760 }, { "epoch": 0.8156013307850216, "grad_norm": 8.25164824695672, "learning_rate": 3.350500314880104e-06, "loss": 1.0853, "step": 5761 }, { "epoch": 0.8157429036596588, "grad_norm": 7.281857182804516, "learning_rate": 3.3499612824837978e-06, "loss": 1.0222, "step": 5762 }, { "epoch": 0.8158844765342961, "grad_norm": 9.660504536533825, "learning_rate": 3.3494222054056606e-06, "loss": 1.0368, "step": 5763 }, { "epoch": 0.8160260494089332, "grad_norm": 7.248425279801485, "learning_rate": 3.3488830836740315e-06, "loss": 0.901, "step": 5764 }, { "epoch": 0.8161676222835704, "grad_norm": 8.630908656017374, "learning_rate": 3.3483439173172517e-06, "loss": 1.1323, "step": 5765 }, { "epoch": 0.8163091951582077, "grad_norm": 9.035361861987088, "learning_rate": 3.347804706363664e-06, "loss": 1.0977, "step": 5766 }, { "epoch": 0.8164507680328449, "grad_norm": 8.27102110014613, "learning_rate": 3.3472654508416157e-06, "loss": 1.1208, "step": 5767 }, { "epoch": 0.8165923409074821, "grad_norm": 8.85572204092986, "learning_rate": 3.346726150779455e-06, "loss": 1.2279, "step": 5768 }, { "epoch": 0.8167339137821193, "grad_norm": 9.71015771142579, "learning_rate": 3.3461868062055313e-06, "loss": 1.0966, "step": 5769 }, { "epoch": 0.8168754866567566, "grad_norm": 9.86392053930696, "learning_rate": 3.345647417148198e-06, "loss": 1.2013, "step": 5770 }, { "epoch": 0.8170170595313938, "grad_norm": 8.283273816678422, "learning_rate": 3.3451079836358107e-06, "loss": 1.1122, "step": 5771 }, { "epoch": 0.817158632406031, "grad_norm": 9.181570618019112, "learning_rate": 3.344568505696727e-06, "loss": 1.06, "step": 5772 }, { "epoch": 0.8173002052806683, "grad_norm": 7.22720146035008, "learning_rate": 3.3440289833593053e-06, "loss": 1.0137, "step": 5773 }, { "epoch": 0.8174417781553055, "grad_norm": 9.471096440520734, "learning_rate": 3.3434894166519104e-06, "loss": 1.0743, "step": 5774 }, { "epoch": 0.8175833510299426, "grad_norm": 8.803353832278301, "learning_rate": 3.3429498056029066e-06, "loss": 1.0517, "step": 5775 }, { "epoch": 0.8177249239045798, "grad_norm": 9.430365891219978, "learning_rate": 3.342410150240659e-06, "loss": 1.0052, "step": 5776 }, { "epoch": 0.8178664967792171, "grad_norm": 8.060364906248042, "learning_rate": 3.3418704505935383e-06, "loss": 1.107, "step": 5777 }, { "epoch": 0.8180080696538543, "grad_norm": 9.913973231473594, "learning_rate": 3.341330706689916e-06, "loss": 1.067, "step": 5778 }, { "epoch": 0.8181496425284915, "grad_norm": 7.493306542797774, "learning_rate": 3.3407909185581656e-06, "loss": 1.057, "step": 5779 }, { "epoch": 0.8182912154031288, "grad_norm": 6.806697789394104, "learning_rate": 3.340251086226663e-06, "loss": 0.9067, "step": 5780 }, { "epoch": 0.818432788277766, "grad_norm": 8.38183360503652, "learning_rate": 3.339711209723788e-06, "loss": 0.9755, "step": 5781 }, { "epoch": 0.8185743611524032, "grad_norm": 8.330913141398801, "learning_rate": 3.33917128907792e-06, "loss": 1.0306, "step": 5782 }, { "epoch": 0.8187159340270405, "grad_norm": 7.964673723816656, "learning_rate": 3.3386313243174436e-06, "loss": 1.134, "step": 5783 }, { "epoch": 0.8188575069016777, "grad_norm": 7.091168845694236, "learning_rate": 3.338091315470744e-06, "loss": 1.02, "step": 5784 }, { "epoch": 0.8189990797763148, "grad_norm": 8.952373799337604, "learning_rate": 3.337551262566209e-06, "loss": 1.116, "step": 5785 }, { "epoch": 0.819140652650952, "grad_norm": 8.38323706185468, "learning_rate": 3.337011165632228e-06, "loss": 1.2647, "step": 5786 }, { "epoch": 0.8192822255255893, "grad_norm": 7.806523592990042, "learning_rate": 3.3364710246971937e-06, "loss": 1.086, "step": 5787 }, { "epoch": 0.8194237984002265, "grad_norm": 8.864747909171703, "learning_rate": 3.335930839789502e-06, "loss": 0.9581, "step": 5788 }, { "epoch": 0.8195653712748637, "grad_norm": 9.950221334324718, "learning_rate": 3.335390610937549e-06, "loss": 1.1919, "step": 5789 }, { "epoch": 0.819706944149501, "grad_norm": 7.294448612493115, "learning_rate": 3.3348503381697358e-06, "loss": 1.0551, "step": 5790 }, { "epoch": 0.8198485170241382, "grad_norm": 9.283483576857199, "learning_rate": 3.3343100215144614e-06, "loss": 1.062, "step": 5791 }, { "epoch": 0.8199900898987754, "grad_norm": 8.249982429254489, "learning_rate": 3.3337696610001314e-06, "loss": 1.0983, "step": 5792 }, { "epoch": 0.8201316627734127, "grad_norm": 8.568864893125614, "learning_rate": 3.333229256655153e-06, "loss": 1.1097, "step": 5793 }, { "epoch": 0.8202732356480499, "grad_norm": 8.518822865723768, "learning_rate": 3.332688808507932e-06, "loss": 0.9026, "step": 5794 }, { "epoch": 0.820414808522687, "grad_norm": 9.158328292424137, "learning_rate": 3.332148316586882e-06, "loss": 1.2841, "step": 5795 }, { "epoch": 0.8205563813973242, "grad_norm": 7.99193428660155, "learning_rate": 3.3316077809204168e-06, "loss": 1.0807, "step": 5796 }, { "epoch": 0.8206979542719615, "grad_norm": 8.716162075319744, "learning_rate": 3.3310672015369495e-06, "loss": 1.1437, "step": 5797 }, { "epoch": 0.8208395271465987, "grad_norm": 8.591134633988482, "learning_rate": 3.330526578464899e-06, "loss": 1.0351, "step": 5798 }, { "epoch": 0.8209811000212359, "grad_norm": 9.305109765554807, "learning_rate": 3.329985911732686e-06, "loss": 1.2023, "step": 5799 }, { "epoch": 0.8211226728958732, "grad_norm": 9.533293758212379, "learning_rate": 3.329445201368732e-06, "loss": 1.2418, "step": 5800 }, { "epoch": 0.8212642457705104, "grad_norm": 10.309606157655905, "learning_rate": 3.3289044474014624e-06, "loss": 1.2308, "step": 5801 }, { "epoch": 0.8214058186451476, "grad_norm": 7.501442071241593, "learning_rate": 3.3283636498593043e-06, "loss": 1.0677, "step": 5802 }, { "epoch": 0.8215473915197848, "grad_norm": 7.236622273879884, "learning_rate": 3.3278228087706863e-06, "loss": 1.0628, "step": 5803 }, { "epoch": 0.8216889643944221, "grad_norm": 7.381689884473413, "learning_rate": 3.327281924164041e-06, "loss": 0.9292, "step": 5804 }, { "epoch": 0.8218305372690593, "grad_norm": 9.064812726854825, "learning_rate": 3.3267409960678015e-06, "loss": 1.1355, "step": 5805 }, { "epoch": 0.8219721101436964, "grad_norm": 8.190268943758426, "learning_rate": 3.326200024510405e-06, "loss": 0.985, "step": 5806 }, { "epoch": 0.8221136830183337, "grad_norm": 8.554128299360144, "learning_rate": 3.3256590095202883e-06, "loss": 1.0197, "step": 5807 }, { "epoch": 0.8222552558929709, "grad_norm": 6.512168058696507, "learning_rate": 3.3251179511258934e-06, "loss": 1.1107, "step": 5808 }, { "epoch": 0.8223968287676081, "grad_norm": 9.351715061151133, "learning_rate": 3.324576849355663e-06, "loss": 1.0951, "step": 5809 }, { "epoch": 0.8225384016422453, "grad_norm": 8.587465297845414, "learning_rate": 3.3240357042380423e-06, "loss": 1.0995, "step": 5810 }, { "epoch": 0.8226799745168826, "grad_norm": 7.856949479336723, "learning_rate": 3.3234945158014792e-06, "loss": 1.1388, "step": 5811 }, { "epoch": 0.8228215473915198, "grad_norm": 7.7316359476821095, "learning_rate": 3.322953284074424e-06, "loss": 1.0367, "step": 5812 }, { "epoch": 0.822963120266157, "grad_norm": 9.814615786898313, "learning_rate": 3.3224120090853275e-06, "loss": 1.0635, "step": 5813 }, { "epoch": 0.8231046931407943, "grad_norm": 7.439179046621698, "learning_rate": 3.321870690862645e-06, "loss": 1.0304, "step": 5814 }, { "epoch": 0.8232462660154315, "grad_norm": 8.124911263788276, "learning_rate": 3.3213293294348335e-06, "loss": 1.1262, "step": 5815 }, { "epoch": 0.8233878388900686, "grad_norm": 7.66666572681366, "learning_rate": 3.3207879248303513e-06, "loss": 1.095, "step": 5816 }, { "epoch": 0.8235294117647058, "grad_norm": 9.098325017685475, "learning_rate": 3.3202464770776597e-06, "loss": 0.9118, "step": 5817 }, { "epoch": 0.8236709846393431, "grad_norm": 8.400244627524016, "learning_rate": 3.319704986205223e-06, "loss": 1.1113, "step": 5818 }, { "epoch": 0.8238125575139803, "grad_norm": 8.093401220163676, "learning_rate": 3.3191634522415064e-06, "loss": 1.0884, "step": 5819 }, { "epoch": 0.8239541303886175, "grad_norm": 6.47218091712327, "learning_rate": 3.3186218752149767e-06, "loss": 0.9797, "step": 5820 }, { "epoch": 0.8240957032632548, "grad_norm": 7.959444484399909, "learning_rate": 3.3180802551541063e-06, "loss": 1.0655, "step": 5821 }, { "epoch": 0.824237276137892, "grad_norm": 8.569931480819289, "learning_rate": 3.3175385920873674e-06, "loss": 1.1435, "step": 5822 }, { "epoch": 0.8243788490125292, "grad_norm": 7.517549962193935, "learning_rate": 3.316996886043234e-06, "loss": 1.1108, "step": 5823 }, { "epoch": 0.8245204218871665, "grad_norm": 7.746254292650497, "learning_rate": 3.3164551370501826e-06, "loss": 1.205, "step": 5824 }, { "epoch": 0.8246619947618037, "grad_norm": 8.638727293584958, "learning_rate": 3.3159133451366937e-06, "loss": 1.0618, "step": 5825 }, { "epoch": 0.8248035676364408, "grad_norm": 9.03408842790154, "learning_rate": 3.315371510331249e-06, "loss": 1.2326, "step": 5826 }, { "epoch": 0.824945140511078, "grad_norm": 8.076024739817512, "learning_rate": 3.3148296326623327e-06, "loss": 0.9986, "step": 5827 }, { "epoch": 0.8250867133857153, "grad_norm": 8.326346202066075, "learning_rate": 3.3142877121584295e-06, "loss": 0.9523, "step": 5828 }, { "epoch": 0.8252282862603525, "grad_norm": 7.5948553340512035, "learning_rate": 3.313745748848028e-06, "loss": 1.1722, "step": 5829 }, { "epoch": 0.8253698591349897, "grad_norm": 8.503082165370104, "learning_rate": 3.3132037427596193e-06, "loss": 1.0519, "step": 5830 }, { "epoch": 0.825511432009627, "grad_norm": 8.141212603253104, "learning_rate": 3.3126616939216967e-06, "loss": 1.0284, "step": 5831 }, { "epoch": 0.8256530048842642, "grad_norm": 8.289305486542208, "learning_rate": 3.3121196023627543e-06, "loss": 1.0591, "step": 5832 }, { "epoch": 0.8257945777589014, "grad_norm": 8.103016854302679, "learning_rate": 3.31157746811129e-06, "loss": 1.0324, "step": 5833 }, { "epoch": 0.8259361506335386, "grad_norm": 7.7194003553192605, "learning_rate": 3.311035291195803e-06, "loss": 1.1522, "step": 5834 }, { "epoch": 0.8260777235081759, "grad_norm": 9.20175704763345, "learning_rate": 3.3104930716447965e-06, "loss": 1.1537, "step": 5835 }, { "epoch": 0.8262192963828131, "grad_norm": 8.216349087884739, "learning_rate": 3.3099508094867727e-06, "loss": 0.9836, "step": 5836 }, { "epoch": 0.8263608692574502, "grad_norm": 7.098424774366757, "learning_rate": 3.3094085047502395e-06, "loss": 1.1296, "step": 5837 }, { "epoch": 0.8265024421320875, "grad_norm": 8.682117139270785, "learning_rate": 3.308866157463705e-06, "loss": 1.0824, "step": 5838 }, { "epoch": 0.8266440150067247, "grad_norm": 10.052727354828033, "learning_rate": 3.3083237676556777e-06, "loss": 1.1579, "step": 5839 }, { "epoch": 0.8267855878813619, "grad_norm": 8.3584590802253, "learning_rate": 3.3077813353546744e-06, "loss": 1.0756, "step": 5840 }, { "epoch": 0.8269271607559991, "grad_norm": 9.179492185422163, "learning_rate": 3.307238860589208e-06, "loss": 1.0468, "step": 5841 }, { "epoch": 0.8270687336306364, "grad_norm": 6.817296850397197, "learning_rate": 3.3066963433877967e-06, "loss": 1.0757, "step": 5842 }, { "epoch": 0.8272103065052736, "grad_norm": 7.899735854356367, "learning_rate": 3.306153783778961e-06, "loss": 1.0294, "step": 5843 }, { "epoch": 0.8273518793799108, "grad_norm": 8.448051580127117, "learning_rate": 3.305611181791221e-06, "loss": 1.0531, "step": 5844 }, { "epoch": 0.8274934522545481, "grad_norm": 10.048168140681089, "learning_rate": 3.305068537453102e-06, "loss": 1.0512, "step": 5845 }, { "epoch": 0.8276350251291853, "grad_norm": 8.224989926410291, "learning_rate": 3.3045258507931306e-06, "loss": 1.0471, "step": 5846 }, { "epoch": 0.8277765980038224, "grad_norm": 11.630522697950383, "learning_rate": 3.3039831218398346e-06, "loss": 1.0276, "step": 5847 }, { "epoch": 0.8279181708784596, "grad_norm": 8.277429008674876, "learning_rate": 3.303440350621745e-06, "loss": 0.9822, "step": 5848 }, { "epoch": 0.8280597437530969, "grad_norm": 11.917717805044054, "learning_rate": 3.3028975371673966e-06, "loss": 1.3002, "step": 5849 }, { "epoch": 0.8282013166277341, "grad_norm": 9.018689988241302, "learning_rate": 3.3023546815053227e-06, "loss": 1.0967, "step": 5850 }, { "epoch": 0.8283428895023713, "grad_norm": 8.390105332632945, "learning_rate": 3.301811783664061e-06, "loss": 1.1096, "step": 5851 }, { "epoch": 0.8284844623770086, "grad_norm": 9.173766790765267, "learning_rate": 3.3012688436721518e-06, "loss": 1.1641, "step": 5852 }, { "epoch": 0.8286260352516458, "grad_norm": 7.629680760255829, "learning_rate": 3.3007258615581372e-06, "loss": 1.107, "step": 5853 }, { "epoch": 0.828767608126283, "grad_norm": 8.633495182989739, "learning_rate": 3.300182837350561e-06, "loss": 1.1715, "step": 5854 }, { "epoch": 0.8289091810009203, "grad_norm": 8.012228203268426, "learning_rate": 3.29963977107797e-06, "loss": 1.0705, "step": 5855 }, { "epoch": 0.8290507538755575, "grad_norm": 10.038609927177145, "learning_rate": 3.2990966627689126e-06, "loss": 1.0829, "step": 5856 }, { "epoch": 0.8291923267501946, "grad_norm": 11.677189976491361, "learning_rate": 3.2985535124519387e-06, "loss": 1.1964, "step": 5857 }, { "epoch": 0.8293338996248318, "grad_norm": 9.719488281469003, "learning_rate": 3.2980103201556023e-06, "loss": 1.0716, "step": 5858 }, { "epoch": 0.8294754724994691, "grad_norm": 10.49823128471753, "learning_rate": 3.297467085908459e-06, "loss": 1.0967, "step": 5859 }, { "epoch": 0.8296170453741063, "grad_norm": 6.810590843975504, "learning_rate": 3.2969238097390655e-06, "loss": 1.0095, "step": 5860 }, { "epoch": 0.8297586182487435, "grad_norm": 7.770138816035334, "learning_rate": 3.2963804916759805e-06, "loss": 0.9905, "step": 5861 }, { "epoch": 0.8299001911233808, "grad_norm": 9.321149457522536, "learning_rate": 3.295837131747768e-06, "loss": 1.158, "step": 5862 }, { "epoch": 0.830041763998018, "grad_norm": 9.279362444845686, "learning_rate": 3.2952937299829902e-06, "loss": 1.062, "step": 5863 }, { "epoch": 0.8301833368726552, "grad_norm": 8.806768190124561, "learning_rate": 3.294750286410214e-06, "loss": 1.0698, "step": 5864 }, { "epoch": 0.8303249097472925, "grad_norm": 8.882553969137955, "learning_rate": 3.2942068010580088e-06, "loss": 1.0807, "step": 5865 }, { "epoch": 0.8304664826219297, "grad_norm": 9.318399698285738, "learning_rate": 3.2936632739549437e-06, "loss": 0.9908, "step": 5866 }, { "epoch": 0.8306080554965669, "grad_norm": 8.432321273126556, "learning_rate": 3.2931197051295915e-06, "loss": 1.0879, "step": 5867 }, { "epoch": 0.830749628371204, "grad_norm": 9.67480069529354, "learning_rate": 3.2925760946105277e-06, "loss": 1.0606, "step": 5868 }, { "epoch": 0.8308912012458413, "grad_norm": 7.338354328075523, "learning_rate": 3.2920324424263305e-06, "loss": 1.0043, "step": 5869 }, { "epoch": 0.8310327741204785, "grad_norm": 10.542696293140315, "learning_rate": 3.291488748605578e-06, "loss": 1.1245, "step": 5870 }, { "epoch": 0.8311743469951157, "grad_norm": 8.885265592041314, "learning_rate": 3.290945013176852e-06, "loss": 1.1386, "step": 5871 }, { "epoch": 0.831315919869753, "grad_norm": 8.170736842645088, "learning_rate": 3.2904012361687367e-06, "loss": 1.0079, "step": 5872 }, { "epoch": 0.8314574927443902, "grad_norm": 8.206150390764225, "learning_rate": 3.2898574176098176e-06, "loss": 1.1211, "step": 5873 }, { "epoch": 0.8315990656190274, "grad_norm": 8.436315382696888, "learning_rate": 3.2893135575286828e-06, "loss": 1.0891, "step": 5874 }, { "epoch": 0.8317406384936646, "grad_norm": 8.214635246848733, "learning_rate": 3.288769655953923e-06, "loss": 1.0654, "step": 5875 }, { "epoch": 0.8318822113683019, "grad_norm": 7.869950143736146, "learning_rate": 3.2882257129141305e-06, "loss": 1.0706, "step": 5876 }, { "epoch": 0.8320237842429391, "grad_norm": 7.523407584474532, "learning_rate": 3.287681728437899e-06, "loss": 1.0848, "step": 5877 }, { "epoch": 0.8321653571175762, "grad_norm": 11.140311002653375, "learning_rate": 3.2871377025538274e-06, "loss": 1.1883, "step": 5878 }, { "epoch": 0.8323069299922135, "grad_norm": 9.790372549260846, "learning_rate": 3.2865936352905144e-06, "loss": 1.2142, "step": 5879 }, { "epoch": 0.8324485028668507, "grad_norm": 7.838554698303813, "learning_rate": 3.28604952667656e-06, "loss": 1.1105, "step": 5880 }, { "epoch": 0.8325900757414879, "grad_norm": 7.759794629319064, "learning_rate": 3.2855053767405674e-06, "loss": 1.0451, "step": 5881 }, { "epoch": 0.8327316486161251, "grad_norm": 8.25802638012402, "learning_rate": 3.2849611855111433e-06, "loss": 1.0984, "step": 5882 }, { "epoch": 0.8328732214907624, "grad_norm": 6.827425504073397, "learning_rate": 3.284416953016895e-06, "loss": 1.1459, "step": 5883 }, { "epoch": 0.8330147943653996, "grad_norm": 9.498726207263102, "learning_rate": 3.2838726792864315e-06, "loss": 1.099, "step": 5884 }, { "epoch": 0.8331563672400368, "grad_norm": 8.652164748430556, "learning_rate": 3.2833283643483672e-06, "loss": 0.9982, "step": 5885 }, { "epoch": 0.8332979401146741, "grad_norm": 8.083160188793446, "learning_rate": 3.2827840082313147e-06, "loss": 1.0731, "step": 5886 }, { "epoch": 0.8334395129893113, "grad_norm": 7.770616489142794, "learning_rate": 3.28223961096389e-06, "loss": 1.1212, "step": 5887 }, { "epoch": 0.8335810858639484, "grad_norm": 8.551118512078826, "learning_rate": 3.281695172574712e-06, "loss": 1.2505, "step": 5888 }, { "epoch": 0.8337226587385856, "grad_norm": 8.45174262683003, "learning_rate": 3.281150693092402e-06, "loss": 1.0124, "step": 5889 }, { "epoch": 0.8338642316132229, "grad_norm": 7.759597741938639, "learning_rate": 3.280606172545582e-06, "loss": 1.1775, "step": 5890 }, { "epoch": 0.8340058044878601, "grad_norm": 8.89194214622632, "learning_rate": 3.280061610962878e-06, "loss": 1.1932, "step": 5891 }, { "epoch": 0.8341473773624973, "grad_norm": 7.945531191296478, "learning_rate": 3.279517008372917e-06, "loss": 0.9984, "step": 5892 }, { "epoch": 0.8342889502371346, "grad_norm": 8.047619238945085, "learning_rate": 3.2789723648043276e-06, "loss": 1.0445, "step": 5893 }, { "epoch": 0.8344305231117718, "grad_norm": 9.554523407345021, "learning_rate": 3.2784276802857418e-06, "loss": 1.147, "step": 5894 }, { "epoch": 0.834572095986409, "grad_norm": 9.184735563455568, "learning_rate": 3.2778829548457935e-06, "loss": 1.1255, "step": 5895 }, { "epoch": 0.8347136688610463, "grad_norm": 7.303829693533295, "learning_rate": 3.277338188513119e-06, "loss": 1.0325, "step": 5896 }, { "epoch": 0.8348552417356835, "grad_norm": 8.007591460363956, "learning_rate": 3.2767933813163542e-06, "loss": 1.0662, "step": 5897 }, { "epoch": 0.8349968146103207, "grad_norm": 8.454930364772055, "learning_rate": 3.276248533284141e-06, "loss": 1.056, "step": 5898 }, { "epoch": 0.8351383874849578, "grad_norm": 7.849351319159552, "learning_rate": 3.2757036444451212e-06, "loss": 1.1203, "step": 5899 }, { "epoch": 0.8352799603595951, "grad_norm": 9.443253336077346, "learning_rate": 3.2751587148279395e-06, "loss": 1.2128, "step": 5900 }, { "epoch": 0.8354215332342323, "grad_norm": 9.26717740670817, "learning_rate": 3.274613744461242e-06, "loss": 1.0, "step": 5901 }, { "epoch": 0.8355631061088695, "grad_norm": 11.867274290951553, "learning_rate": 3.2740687333736776e-06, "loss": 1.0337, "step": 5902 }, { "epoch": 0.8357046789835068, "grad_norm": 8.027151761694396, "learning_rate": 3.2735236815938975e-06, "loss": 1.0505, "step": 5903 }, { "epoch": 0.835846251858144, "grad_norm": 7.1865106316343645, "learning_rate": 3.2729785891505533e-06, "loss": 0.9024, "step": 5904 }, { "epoch": 0.8359878247327812, "grad_norm": 8.143624424935991, "learning_rate": 3.2724334560723015e-06, "loss": 0.9415, "step": 5905 }, { "epoch": 0.8361293976074184, "grad_norm": 8.754129361837707, "learning_rate": 3.271888282387799e-06, "loss": 1.0486, "step": 5906 }, { "epoch": 0.8362709704820557, "grad_norm": 9.702560826115581, "learning_rate": 3.2713430681257046e-06, "loss": 1.1897, "step": 5907 }, { "epoch": 0.8364125433566929, "grad_norm": 8.347344585021858, "learning_rate": 3.2707978133146805e-06, "loss": 0.9889, "step": 5908 }, { "epoch": 0.83655411623133, "grad_norm": 10.504336960070498, "learning_rate": 3.27025251798339e-06, "loss": 1.0706, "step": 5909 }, { "epoch": 0.8366956891059673, "grad_norm": 7.8595759402121015, "learning_rate": 3.2697071821604986e-06, "loss": 1.0477, "step": 5910 }, { "epoch": 0.8368372619806045, "grad_norm": 8.544208675593653, "learning_rate": 3.2691618058746757e-06, "loss": 1.0172, "step": 5911 }, { "epoch": 0.8369788348552417, "grad_norm": 8.17370654964843, "learning_rate": 3.26861638915459e-06, "loss": 1.0923, "step": 5912 }, { "epoch": 0.837120407729879, "grad_norm": 9.926551017352283, "learning_rate": 3.2680709320289123e-06, "loss": 0.9406, "step": 5913 }, { "epoch": 0.8372619806045162, "grad_norm": 9.964782499321752, "learning_rate": 3.26752543452632e-06, "loss": 1.0546, "step": 5914 }, { "epoch": 0.8374035534791534, "grad_norm": 9.195274748356022, "learning_rate": 3.266979896675487e-06, "loss": 1.0701, "step": 5915 }, { "epoch": 0.8375451263537906, "grad_norm": 10.577152720272538, "learning_rate": 3.266434318505093e-06, "loss": 1.1416, "step": 5916 }, { "epoch": 0.8376866992284279, "grad_norm": 7.40900585553379, "learning_rate": 3.2658887000438183e-06, "loss": 1.0943, "step": 5917 }, { "epoch": 0.8378282721030651, "grad_norm": 9.920513101659376, "learning_rate": 3.265343041320346e-06, "loss": 1.1432, "step": 5918 }, { "epoch": 0.8379698449777023, "grad_norm": 8.694415735990221, "learning_rate": 3.26479734236336e-06, "loss": 0.9907, "step": 5919 }, { "epoch": 0.8381114178523394, "grad_norm": 8.833489278700352, "learning_rate": 3.2642516032015486e-06, "loss": 1.1789, "step": 5920 }, { "epoch": 0.8382529907269767, "grad_norm": 7.725898181833906, "learning_rate": 3.2637058238635995e-06, "loss": 1.0315, "step": 5921 }, { "epoch": 0.8383945636016139, "grad_norm": 8.562372137075656, "learning_rate": 3.2631600043782054e-06, "loss": 0.9892, "step": 5922 }, { "epoch": 0.8385361364762511, "grad_norm": 9.264131628310349, "learning_rate": 3.262614144774059e-06, "loss": 1.0688, "step": 5923 }, { "epoch": 0.8386777093508884, "grad_norm": 7.8428670074615265, "learning_rate": 3.2620682450798557e-06, "loss": 1.0567, "step": 5924 }, { "epoch": 0.8388192822255256, "grad_norm": 10.001063862477213, "learning_rate": 3.2615223053242924e-06, "loss": 1.1965, "step": 5925 }, { "epoch": 0.8389608551001628, "grad_norm": 7.500943696732614, "learning_rate": 3.2609763255360696e-06, "loss": 1.0267, "step": 5926 }, { "epoch": 0.8391024279748001, "grad_norm": 7.207313627174759, "learning_rate": 3.2604303057438883e-06, "loss": 0.9523, "step": 5927 }, { "epoch": 0.8392440008494373, "grad_norm": 8.387604750591699, "learning_rate": 3.2598842459764535e-06, "loss": 1.1073, "step": 5928 }, { "epoch": 0.8393855737240745, "grad_norm": 9.005711332753606, "learning_rate": 3.2593381462624705e-06, "loss": 1.1287, "step": 5929 }, { "epoch": 0.8395271465987116, "grad_norm": 7.1439836812617905, "learning_rate": 3.2587920066306474e-06, "loss": 1.1055, "step": 5930 }, { "epoch": 0.8396687194733489, "grad_norm": 8.269401624958318, "learning_rate": 3.258245827109693e-06, "loss": 1.0421, "step": 5931 }, { "epoch": 0.8398102923479861, "grad_norm": 7.7468055786616965, "learning_rate": 3.2576996077283222e-06, "loss": 0.9156, "step": 5932 }, { "epoch": 0.8399518652226233, "grad_norm": 7.3426206329068275, "learning_rate": 3.2571533485152485e-06, "loss": 1.0501, "step": 5933 }, { "epoch": 0.8400934380972606, "grad_norm": 8.637810964548677, "learning_rate": 3.256607049499187e-06, "loss": 1.0797, "step": 5934 }, { "epoch": 0.8402350109718978, "grad_norm": 10.978421634148892, "learning_rate": 3.256060710708857e-06, "loss": 1.1578, "step": 5935 }, { "epoch": 0.840376583846535, "grad_norm": 8.64315389148513, "learning_rate": 3.255514332172979e-06, "loss": 1.0596, "step": 5936 }, { "epoch": 0.8405181567211722, "grad_norm": 9.15256881925566, "learning_rate": 3.2549679139202756e-06, "loss": 1.0259, "step": 5937 }, { "epoch": 0.8406597295958095, "grad_norm": 7.45125813965464, "learning_rate": 3.254421455979472e-06, "loss": 1.049, "step": 5938 }, { "epoch": 0.8408013024704467, "grad_norm": 8.802359732778159, "learning_rate": 3.253874958379296e-06, "loss": 1.0744, "step": 5939 }, { "epoch": 0.8409428753450838, "grad_norm": 8.452916056365225, "learning_rate": 3.253328421148475e-06, "loss": 1.0281, "step": 5940 }, { "epoch": 0.8410844482197211, "grad_norm": 12.016576761516971, "learning_rate": 3.2527818443157406e-06, "loss": 1.103, "step": 5941 }, { "epoch": 0.8412260210943583, "grad_norm": 7.228195814156061, "learning_rate": 3.2522352279098256e-06, "loss": 1.0113, "step": 5942 }, { "epoch": 0.8413675939689955, "grad_norm": 7.163789334180041, "learning_rate": 3.251688571959466e-06, "loss": 1.1809, "step": 5943 }, { "epoch": 0.8415091668436327, "grad_norm": 7.94389722253979, "learning_rate": 3.2511418764933983e-06, "loss": 1.1226, "step": 5944 }, { "epoch": 0.84165073971827, "grad_norm": 10.059072349050757, "learning_rate": 3.2505951415403625e-06, "loss": 1.0941, "step": 5945 }, { "epoch": 0.8417923125929072, "grad_norm": 7.73229434608462, "learning_rate": 3.2500483671290993e-06, "loss": 1.1759, "step": 5946 }, { "epoch": 0.8419338854675444, "grad_norm": 8.977915051903711, "learning_rate": 3.2495015532883533e-06, "loss": 1.2655, "step": 5947 }, { "epoch": 0.8420754583421817, "grad_norm": 8.329553077312939, "learning_rate": 3.248954700046869e-06, "loss": 1.0575, "step": 5948 }, { "epoch": 0.8422170312168189, "grad_norm": 7.509371687895909, "learning_rate": 3.248407807433396e-06, "loss": 1.0478, "step": 5949 }, { "epoch": 0.8423586040914561, "grad_norm": 8.42158102962705, "learning_rate": 3.2478608754766804e-06, "loss": 1.1151, "step": 5950 }, { "epoch": 0.8425001769660933, "grad_norm": 8.482512492680254, "learning_rate": 3.2473139042054773e-06, "loss": 1.2028, "step": 5951 }, { "epoch": 0.8426417498407305, "grad_norm": 9.484572884570731, "learning_rate": 3.2467668936485397e-06, "loss": 1.1473, "step": 5952 }, { "epoch": 0.8427833227153677, "grad_norm": 8.050645259798813, "learning_rate": 3.2462198438346227e-06, "loss": 1.0029, "step": 5953 }, { "epoch": 0.8429248955900049, "grad_norm": 9.222695130424187, "learning_rate": 3.2456727547924855e-06, "loss": 1.0408, "step": 5954 }, { "epoch": 0.8430664684646422, "grad_norm": 7.513028336669419, "learning_rate": 3.245125626550888e-06, "loss": 1.0423, "step": 5955 }, { "epoch": 0.8432080413392794, "grad_norm": 7.1991999393575465, "learning_rate": 3.244578459138591e-06, "loss": 1.0383, "step": 5956 }, { "epoch": 0.8433496142139166, "grad_norm": 7.627211203220047, "learning_rate": 3.2440312525843596e-06, "loss": 1.042, "step": 5957 }, { "epoch": 0.8434911870885539, "grad_norm": 7.406211885120807, "learning_rate": 3.24348400691696e-06, "loss": 0.8553, "step": 5958 }, { "epoch": 0.8436327599631911, "grad_norm": 7.690936553237061, "learning_rate": 3.2429367221651603e-06, "loss": 0.9665, "step": 5959 }, { "epoch": 0.8437743328378283, "grad_norm": 9.346497198827768, "learning_rate": 3.242389398357732e-06, "loss": 1.147, "step": 5960 }, { "epoch": 0.8439159057124654, "grad_norm": 9.210276080869663, "learning_rate": 3.2418420355234466e-06, "loss": 1.1733, "step": 5961 }, { "epoch": 0.8440574785871027, "grad_norm": 9.949461449851938, "learning_rate": 3.2412946336910778e-06, "loss": 1.2102, "step": 5962 }, { "epoch": 0.8441990514617399, "grad_norm": 8.07655611427248, "learning_rate": 3.240747192889403e-06, "loss": 1.0574, "step": 5963 }, { "epoch": 0.8443406243363771, "grad_norm": 9.281914170259414, "learning_rate": 3.240199713147201e-06, "loss": 1.1244, "step": 5964 }, { "epoch": 0.8444821972110144, "grad_norm": 7.500326276675689, "learning_rate": 3.239652194493251e-06, "loss": 1.1338, "step": 5965 }, { "epoch": 0.8446237700856516, "grad_norm": 8.572474107099685, "learning_rate": 3.2391046369563374e-06, "loss": 1.0437, "step": 5966 }, { "epoch": 0.8447653429602888, "grad_norm": 7.504184572776373, "learning_rate": 3.2385570405652444e-06, "loss": 0.9641, "step": 5967 }, { "epoch": 0.844906915834926, "grad_norm": 9.47642523280119, "learning_rate": 3.2380094053487576e-06, "loss": 1.1001, "step": 5968 }, { "epoch": 0.8450484887095633, "grad_norm": 9.547687302415545, "learning_rate": 3.237461731335667e-06, "loss": 1.2942, "step": 5969 }, { "epoch": 0.8451900615842005, "grad_norm": 6.665246176383651, "learning_rate": 3.2369140185547643e-06, "loss": 0.8893, "step": 5970 }, { "epoch": 0.8453316344588376, "grad_norm": 8.183413818129326, "learning_rate": 3.23636626703484e-06, "loss": 1.1958, "step": 5971 }, { "epoch": 0.8454732073334749, "grad_norm": 8.176867387839938, "learning_rate": 3.2358184768046895e-06, "loss": 1.0877, "step": 5972 }, { "epoch": 0.8456147802081121, "grad_norm": 9.525774474209229, "learning_rate": 3.235270647893111e-06, "loss": 0.9724, "step": 5973 }, { "epoch": 0.8457563530827493, "grad_norm": 9.29252007047894, "learning_rate": 3.2347227803289027e-06, "loss": 1.1555, "step": 5974 }, { "epoch": 0.8458979259573866, "grad_norm": 6.956622737690378, "learning_rate": 3.234174874140866e-06, "loss": 0.9655, "step": 5975 }, { "epoch": 0.8460394988320238, "grad_norm": 8.444567171956162, "learning_rate": 3.2336269293578032e-06, "loss": 1.0971, "step": 5976 }, { "epoch": 0.846181071706661, "grad_norm": 8.900809141945897, "learning_rate": 3.23307894600852e-06, "loss": 1.1588, "step": 5977 }, { "epoch": 0.8463226445812982, "grad_norm": 9.5838763097, "learning_rate": 3.2325309241218227e-06, "loss": 1.1154, "step": 5978 }, { "epoch": 0.8464642174559355, "grad_norm": 10.901827989636645, "learning_rate": 3.2319828637265217e-06, "loss": 1.0834, "step": 5979 }, { "epoch": 0.8466057903305727, "grad_norm": 9.087045826640733, "learning_rate": 3.2314347648514265e-06, "loss": 1.1508, "step": 5980 }, { "epoch": 0.8467473632052099, "grad_norm": 7.866019077909308, "learning_rate": 3.2308866275253516e-06, "loss": 1.0575, "step": 5981 }, { "epoch": 0.846888936079847, "grad_norm": 7.27048125047616, "learning_rate": 3.230338451777112e-06, "loss": 1.0243, "step": 5982 }, { "epoch": 0.8470305089544843, "grad_norm": 9.168823774440115, "learning_rate": 3.2297902376355238e-06, "loss": 1.1362, "step": 5983 }, { "epoch": 0.8471720818291215, "grad_norm": 8.86506591152726, "learning_rate": 3.2292419851294072e-06, "loss": 0.9775, "step": 5984 }, { "epoch": 0.8473136547037587, "grad_norm": 9.697587600080263, "learning_rate": 3.2286936942875837e-06, "loss": 1.042, "step": 5985 }, { "epoch": 0.847455227578396, "grad_norm": 9.143606368088621, "learning_rate": 3.2281453651388755e-06, "loss": 1.2957, "step": 5986 }, { "epoch": 0.8475968004530332, "grad_norm": 8.112357294975835, "learning_rate": 3.227596997712108e-06, "loss": 1.0239, "step": 5987 }, { "epoch": 0.8477383733276704, "grad_norm": 7.98820866409565, "learning_rate": 3.2270485920361093e-06, "loss": 1.0548, "step": 5988 }, { "epoch": 0.8478799462023077, "grad_norm": 8.550757605466396, "learning_rate": 3.2265001481397084e-06, "loss": 1.1283, "step": 5989 }, { "epoch": 0.8480215190769449, "grad_norm": 7.4013629998380095, "learning_rate": 3.225951666051736e-06, "loss": 0.9926, "step": 5990 }, { "epoch": 0.8481630919515821, "grad_norm": 8.204757557041171, "learning_rate": 3.225403145801026e-06, "loss": 1.165, "step": 5991 }, { "epoch": 0.8483046648262192, "grad_norm": 7.91455004320995, "learning_rate": 3.2248545874164145e-06, "loss": 1.1177, "step": 5992 }, { "epoch": 0.8484462377008565, "grad_norm": 7.622075755223073, "learning_rate": 3.2243059909267367e-06, "loss": 1.1461, "step": 5993 }, { "epoch": 0.8485878105754937, "grad_norm": 8.450789321059055, "learning_rate": 3.2237573563608333e-06, "loss": 1.1717, "step": 5994 }, { "epoch": 0.8487293834501309, "grad_norm": 9.843250855400555, "learning_rate": 3.2232086837475444e-06, "loss": 1.2512, "step": 5995 }, { "epoch": 0.8488709563247682, "grad_norm": 10.027352880303596, "learning_rate": 3.222659973115715e-06, "loss": 1.0931, "step": 5996 }, { "epoch": 0.8490125291994054, "grad_norm": 7.814707207738226, "learning_rate": 3.2221112244941905e-06, "loss": 1.0558, "step": 5997 }, { "epoch": 0.8491541020740426, "grad_norm": 8.733627696815455, "learning_rate": 3.2215624379118164e-06, "loss": 0.9684, "step": 5998 }, { "epoch": 0.8492956749486799, "grad_norm": 8.329090971330029, "learning_rate": 3.2210136133974434e-06, "loss": 1.054, "step": 5999 }, { "epoch": 0.8494372478233171, "grad_norm": 10.143934089085503, "learning_rate": 3.220464750979922e-06, "loss": 1.1375, "step": 6000 }, { "epoch": 0.8495788206979543, "grad_norm": 8.244714141757546, "learning_rate": 3.219915850688106e-06, "loss": 1.1345, "step": 6001 }, { "epoch": 0.8497203935725914, "grad_norm": 7.701661998178035, "learning_rate": 3.2193669125508504e-06, "loss": 1.0384, "step": 6002 }, { "epoch": 0.8498619664472287, "grad_norm": 8.311621139072248, "learning_rate": 3.218817936597013e-06, "loss": 0.9925, "step": 6003 }, { "epoch": 0.8500035393218659, "grad_norm": 7.994756410668056, "learning_rate": 3.218268922855452e-06, "loss": 1.1174, "step": 6004 }, { "epoch": 0.8501451121965031, "grad_norm": 8.534025865507118, "learning_rate": 3.2177198713550295e-06, "loss": 1.0434, "step": 6005 }, { "epoch": 0.8502866850711404, "grad_norm": 8.981721330246181, "learning_rate": 3.2171707821246083e-06, "loss": 0.9554, "step": 6006 }, { "epoch": 0.8504282579457776, "grad_norm": 8.031380618922011, "learning_rate": 3.216621655193055e-06, "loss": 1.0175, "step": 6007 }, { "epoch": 0.8505698308204148, "grad_norm": 10.416472125462024, "learning_rate": 3.216072490589235e-06, "loss": 1.1521, "step": 6008 }, { "epoch": 0.850711403695052, "grad_norm": 8.64135518360284, "learning_rate": 3.2155232883420172e-06, "loss": 1.1938, "step": 6009 }, { "epoch": 0.8508529765696893, "grad_norm": 8.452545992329258, "learning_rate": 3.2149740484802736e-06, "loss": 1.0214, "step": 6010 }, { "epoch": 0.8509945494443265, "grad_norm": 7.349926238435949, "learning_rate": 3.2144247710328787e-06, "loss": 1.0969, "step": 6011 }, { "epoch": 0.8511361223189637, "grad_norm": 8.870791444788543, "learning_rate": 3.2138754560287057e-06, "loss": 0.9664, "step": 6012 }, { "epoch": 0.8512776951936009, "grad_norm": 11.743997542212123, "learning_rate": 3.2133261034966325e-06, "loss": 1.0586, "step": 6013 }, { "epoch": 0.8514192680682381, "grad_norm": 8.246501758913633, "learning_rate": 3.2127767134655374e-06, "loss": 1.1541, "step": 6014 }, { "epoch": 0.8515608409428753, "grad_norm": 7.788973609776761, "learning_rate": 3.2122272859643022e-06, "loss": 1.0591, "step": 6015 }, { "epoch": 0.8517024138175125, "grad_norm": 7.868835458450505, "learning_rate": 3.2116778210218103e-06, "loss": 0.991, "step": 6016 }, { "epoch": 0.8518439866921498, "grad_norm": 8.87818529161594, "learning_rate": 3.211128318666945e-06, "loss": 1.1935, "step": 6017 }, { "epoch": 0.851985559566787, "grad_norm": 7.673812217627951, "learning_rate": 3.2105787789285947e-06, "loss": 0.9962, "step": 6018 }, { "epoch": 0.8521271324414242, "grad_norm": 9.560638046998886, "learning_rate": 3.2100292018356477e-06, "loss": 1.1923, "step": 6019 }, { "epoch": 0.8522687053160615, "grad_norm": 7.767648107162117, "learning_rate": 3.209479587416995e-06, "loss": 1.0041, "step": 6020 }, { "epoch": 0.8524102781906987, "grad_norm": 7.238644092109815, "learning_rate": 3.208929935701529e-06, "loss": 1.1255, "step": 6021 }, { "epoch": 0.8525518510653359, "grad_norm": 9.563133069194484, "learning_rate": 3.2083802467181452e-06, "loss": 1.1191, "step": 6022 }, { "epoch": 0.852693423939973, "grad_norm": 9.102402807909646, "learning_rate": 3.2078305204957406e-06, "loss": 1.0991, "step": 6023 }, { "epoch": 0.8528349968146103, "grad_norm": 10.058112094050472, "learning_rate": 3.2072807570632125e-06, "loss": 1.1735, "step": 6024 }, { "epoch": 0.8529765696892475, "grad_norm": 9.48419199331779, "learning_rate": 3.2067309564494626e-06, "loss": 0.9196, "step": 6025 }, { "epoch": 0.8531181425638847, "grad_norm": 6.726731212950262, "learning_rate": 3.206181118683393e-06, "loss": 1.0244, "step": 6026 }, { "epoch": 0.853259715438522, "grad_norm": 8.313064441304027, "learning_rate": 3.205631243793909e-06, "loss": 1.1124, "step": 6027 }, { "epoch": 0.8534012883131592, "grad_norm": 7.765531666000987, "learning_rate": 3.2050813318099166e-06, "loss": 1.1645, "step": 6028 }, { "epoch": 0.8535428611877964, "grad_norm": 8.492678013087767, "learning_rate": 3.204531382760325e-06, "loss": 1.0785, "step": 6029 }, { "epoch": 0.8536844340624337, "grad_norm": 7.9990088325658695, "learning_rate": 3.203981396674043e-06, "loss": 1.1207, "step": 6030 }, { "epoch": 0.8538260069370709, "grad_norm": 8.69741058400138, "learning_rate": 3.2034313735799837e-06, "loss": 1.1965, "step": 6031 }, { "epoch": 0.8539675798117081, "grad_norm": 7.654595145614281, "learning_rate": 3.2028813135070625e-06, "loss": 1.0205, "step": 6032 }, { "epoch": 0.8541091526863452, "grad_norm": 8.895211000732843, "learning_rate": 3.2023312164841937e-06, "loss": 1.1214, "step": 6033 }, { "epoch": 0.8542507255609825, "grad_norm": 7.151305777017666, "learning_rate": 3.201781082540297e-06, "loss": 1.012, "step": 6034 }, { "epoch": 0.8543922984356197, "grad_norm": 6.778675627865835, "learning_rate": 3.201230911704292e-06, "loss": 1.0506, "step": 6035 }, { "epoch": 0.8545338713102569, "grad_norm": 10.699111074439928, "learning_rate": 3.2006807040051013e-06, "loss": 1.2564, "step": 6036 }, { "epoch": 0.8546754441848942, "grad_norm": 10.249633410341403, "learning_rate": 3.2001304594716476e-06, "loss": 1.213, "step": 6037 }, { "epoch": 0.8548170170595314, "grad_norm": 10.09014922834124, "learning_rate": 3.1995801781328585e-06, "loss": 1.0901, "step": 6038 }, { "epoch": 0.8549585899341686, "grad_norm": 9.21318688077041, "learning_rate": 3.1990298600176607e-06, "loss": 1.241, "step": 6039 }, { "epoch": 0.8551001628088059, "grad_norm": 7.5377361810983965, "learning_rate": 3.198479505154984e-06, "loss": 1.0826, "step": 6040 }, { "epoch": 0.8552417356834431, "grad_norm": 7.577675214180886, "learning_rate": 3.197929113573761e-06, "loss": 1.0489, "step": 6041 }, { "epoch": 0.8553833085580803, "grad_norm": 7.634940250065505, "learning_rate": 3.197378685302925e-06, "loss": 1.0287, "step": 6042 }, { "epoch": 0.8555248814327175, "grad_norm": 7.118768476641408, "learning_rate": 3.196828220371411e-06, "loss": 1.067, "step": 6043 }, { "epoch": 0.8556664543073547, "grad_norm": 8.055412074890338, "learning_rate": 3.196277718808157e-06, "loss": 1.0218, "step": 6044 }, { "epoch": 0.8558080271819919, "grad_norm": 8.42170876531788, "learning_rate": 3.195727180642104e-06, "loss": 1.209, "step": 6045 }, { "epoch": 0.8559496000566291, "grad_norm": 7.72241766275986, "learning_rate": 3.1951766059021905e-06, "loss": 1.1006, "step": 6046 }, { "epoch": 0.8560911729312664, "grad_norm": 8.8662698279277, "learning_rate": 3.1946259946173607e-06, "loss": 1.0299, "step": 6047 }, { "epoch": 0.8562327458059036, "grad_norm": 10.891471227883425, "learning_rate": 3.1940753468165607e-06, "loss": 1.2827, "step": 6048 }, { "epoch": 0.8563743186805408, "grad_norm": 8.017140146761344, "learning_rate": 3.193524662528738e-06, "loss": 1.178, "step": 6049 }, { "epoch": 0.856515891555178, "grad_norm": 9.990441522473612, "learning_rate": 3.192973941782841e-06, "loss": 1.1697, "step": 6050 }, { "epoch": 0.8566574644298153, "grad_norm": 8.330336171633284, "learning_rate": 3.1924231846078198e-06, "loss": 0.9907, "step": 6051 }, { "epoch": 0.8567990373044525, "grad_norm": 8.072419917092464, "learning_rate": 3.1918723910326283e-06, "loss": 1.166, "step": 6052 }, { "epoch": 0.8569406101790897, "grad_norm": 8.068809228941213, "learning_rate": 3.1913215610862208e-06, "loss": 1.0095, "step": 6053 }, { "epoch": 0.8570821830537269, "grad_norm": 7.761346939619352, "learning_rate": 3.1907706947975546e-06, "loss": 1.0803, "step": 6054 }, { "epoch": 0.8572237559283641, "grad_norm": 8.141686778465063, "learning_rate": 3.190219792195588e-06, "loss": 0.9777, "step": 6055 }, { "epoch": 0.8573653288030013, "grad_norm": 7.797542810986045, "learning_rate": 3.189668853309282e-06, "loss": 1.0177, "step": 6056 }, { "epoch": 0.8575069016776385, "grad_norm": 7.259662109162449, "learning_rate": 3.189117878167598e-06, "loss": 1.0588, "step": 6057 }, { "epoch": 0.8576484745522758, "grad_norm": 7.864987017311008, "learning_rate": 3.1885668667995006e-06, "loss": 1.1372, "step": 6058 }, { "epoch": 0.857790047426913, "grad_norm": 7.377609050052067, "learning_rate": 3.1880158192339574e-06, "loss": 1.0593, "step": 6059 }, { "epoch": 0.8579316203015502, "grad_norm": 9.029017400160571, "learning_rate": 3.1874647354999354e-06, "loss": 1.0816, "step": 6060 }, { "epoch": 0.8580731931761875, "grad_norm": 8.269828318943791, "learning_rate": 3.186913615626405e-06, "loss": 1.1624, "step": 6061 }, { "epoch": 0.8582147660508247, "grad_norm": 7.3535815070089905, "learning_rate": 3.186362459642337e-06, "loss": 1.0307, "step": 6062 }, { "epoch": 0.8583563389254619, "grad_norm": 7.911532962757646, "learning_rate": 3.1858112675767074e-06, "loss": 1.127, "step": 6063 }, { "epoch": 0.8584979118000992, "grad_norm": 9.169192804150619, "learning_rate": 3.18526003945849e-06, "loss": 1.1658, "step": 6064 }, { "epoch": 0.8586394846747363, "grad_norm": 8.021146958069602, "learning_rate": 3.184708775316663e-06, "loss": 0.9976, "step": 6065 }, { "epoch": 0.8587810575493735, "grad_norm": 8.130204162661753, "learning_rate": 3.184157475180208e-06, "loss": 1.1572, "step": 6066 }, { "epoch": 0.8589226304240107, "grad_norm": 8.62566472684746, "learning_rate": 3.183606139078103e-06, "loss": 1.1331, "step": 6067 }, { "epoch": 0.859064203298648, "grad_norm": 8.239223493265166, "learning_rate": 3.1830547670393337e-06, "loss": 1.1181, "step": 6068 }, { "epoch": 0.8592057761732852, "grad_norm": 7.810430145726886, "learning_rate": 3.1825033590928844e-06, "loss": 1.0889, "step": 6069 }, { "epoch": 0.8593473490479224, "grad_norm": 8.656312136650856, "learning_rate": 3.181951915267742e-06, "loss": 1.0738, "step": 6070 }, { "epoch": 0.8594889219225597, "grad_norm": 8.078204805728856, "learning_rate": 3.181400435592897e-06, "loss": 0.9287, "step": 6071 }, { "epoch": 0.8596304947971969, "grad_norm": 8.514009486954153, "learning_rate": 3.180848920097338e-06, "loss": 1.1244, "step": 6072 }, { "epoch": 0.8597720676718341, "grad_norm": 8.036461708791688, "learning_rate": 3.1802973688100596e-06, "loss": 1.1641, "step": 6073 }, { "epoch": 0.8599136405464713, "grad_norm": 7.851392692893254, "learning_rate": 3.179745781760055e-06, "loss": 0.9761, "step": 6074 }, { "epoch": 0.8600552134211085, "grad_norm": 8.093291398536264, "learning_rate": 3.1791941589763225e-06, "loss": 0.9995, "step": 6075 }, { "epoch": 0.8601967862957457, "grad_norm": 8.215643814314944, "learning_rate": 3.178642500487859e-06, "loss": 1.067, "step": 6076 }, { "epoch": 0.8603383591703829, "grad_norm": 6.752467234368539, "learning_rate": 3.1780908063236653e-06, "loss": 1.0087, "step": 6077 }, { "epoch": 0.8604799320450202, "grad_norm": 9.308716690333767, "learning_rate": 3.1775390765127433e-06, "loss": 1.0808, "step": 6078 }, { "epoch": 0.8606215049196574, "grad_norm": 9.054592708061843, "learning_rate": 3.1769873110840977e-06, "loss": 1.0687, "step": 6079 }, { "epoch": 0.8607630777942946, "grad_norm": 9.091998478900667, "learning_rate": 3.176435510066734e-06, "loss": 0.937, "step": 6080 }, { "epoch": 0.8609046506689318, "grad_norm": 8.793370038183198, "learning_rate": 3.175883673489659e-06, "loss": 1.0211, "step": 6081 }, { "epoch": 0.8610462235435691, "grad_norm": 8.125422540094645, "learning_rate": 3.1753318013818848e-06, "loss": 1.114, "step": 6082 }, { "epoch": 0.8611877964182063, "grad_norm": 9.17394434705228, "learning_rate": 3.1747798937724207e-06, "loss": 1.0592, "step": 6083 }, { "epoch": 0.8613293692928435, "grad_norm": 7.804686522459899, "learning_rate": 3.1742279506902798e-06, "loss": 1.0483, "step": 6084 }, { "epoch": 0.8614709421674807, "grad_norm": 9.033792421656484, "learning_rate": 3.173675972164479e-06, "loss": 0.9886, "step": 6085 }, { "epoch": 0.8616125150421179, "grad_norm": 9.92985768626286, "learning_rate": 3.1731239582240343e-06, "loss": 1.1587, "step": 6086 }, { "epoch": 0.8617540879167551, "grad_norm": 8.488597626984776, "learning_rate": 3.1725719088979655e-06, "loss": 1.0844, "step": 6087 }, { "epoch": 0.8618956607913923, "grad_norm": 7.731847608285108, "learning_rate": 3.172019824215293e-06, "loss": 1.1277, "step": 6088 }, { "epoch": 0.8620372336660296, "grad_norm": 8.425923862231897, "learning_rate": 3.171467704205039e-06, "loss": 1.109, "step": 6089 }, { "epoch": 0.8621788065406668, "grad_norm": 8.589925462890106, "learning_rate": 3.1709155488962283e-06, "loss": 1.0864, "step": 6090 }, { "epoch": 0.862320379415304, "grad_norm": 6.845903449725883, "learning_rate": 3.1703633583178885e-06, "loss": 0.9306, "step": 6091 }, { "epoch": 0.8624619522899413, "grad_norm": 7.677924170097513, "learning_rate": 3.1698111324990454e-06, "loss": 0.9616, "step": 6092 }, { "epoch": 0.8626035251645785, "grad_norm": 6.902208068930308, "learning_rate": 3.169258871468731e-06, "loss": 0.9473, "step": 6093 }, { "epoch": 0.8627450980392157, "grad_norm": 9.758960225736141, "learning_rate": 3.1687065752559777e-06, "loss": 1.1281, "step": 6094 }, { "epoch": 0.862886670913853, "grad_norm": 8.407545479144714, "learning_rate": 3.168154243889817e-06, "loss": 0.9833, "step": 6095 }, { "epoch": 0.8630282437884901, "grad_norm": 8.42937660527831, "learning_rate": 3.1676018773992866e-06, "loss": 1.1268, "step": 6096 }, { "epoch": 0.8631698166631273, "grad_norm": 8.368246059614751, "learning_rate": 3.1670494758134234e-06, "loss": 1.0646, "step": 6097 }, { "epoch": 0.8633113895377645, "grad_norm": 9.333781458677901, "learning_rate": 3.1664970391612666e-06, "loss": 1.1142, "step": 6098 }, { "epoch": 0.8634529624124018, "grad_norm": 9.069575632213311, "learning_rate": 3.1659445674718563e-06, "loss": 1.0351, "step": 6099 }, { "epoch": 0.863594535287039, "grad_norm": 8.191240926592288, "learning_rate": 3.165392060774238e-06, "loss": 1.1148, "step": 6100 }, { "epoch": 0.8637361081616762, "grad_norm": 7.549061586784443, "learning_rate": 3.1648395190974546e-06, "loss": 1.0582, "step": 6101 }, { "epoch": 0.8638776810363135, "grad_norm": 8.853007832407247, "learning_rate": 3.1642869424705537e-06, "loss": 1.2245, "step": 6102 }, { "epoch": 0.8640192539109507, "grad_norm": 7.973447125918891, "learning_rate": 3.1637343309225833e-06, "loss": 0.9378, "step": 6103 }, { "epoch": 0.8641608267855879, "grad_norm": 8.538413369511478, "learning_rate": 3.163181684482594e-06, "loss": 1.1206, "step": 6104 }, { "epoch": 0.8643023996602252, "grad_norm": 7.260720580940448, "learning_rate": 3.162629003179638e-06, "loss": 0.8665, "step": 6105 }, { "epoch": 0.8644439725348623, "grad_norm": 10.585695698922002, "learning_rate": 3.1620762870427703e-06, "loss": 1.0667, "step": 6106 }, { "epoch": 0.8645855454094995, "grad_norm": 8.07921057366698, "learning_rate": 3.1615235361010442e-06, "loss": 1.1115, "step": 6107 }, { "epoch": 0.8647271182841367, "grad_norm": 8.37787362518033, "learning_rate": 3.1609707503835203e-06, "loss": 1.2319, "step": 6108 }, { "epoch": 0.864868691158774, "grad_norm": 10.294594125699692, "learning_rate": 3.1604179299192565e-06, "loss": 1.0665, "step": 6109 }, { "epoch": 0.8650102640334112, "grad_norm": 7.3914895066194095, "learning_rate": 3.1598650747373144e-06, "loss": 1.0033, "step": 6110 }, { "epoch": 0.8651518369080484, "grad_norm": 8.46485141107752, "learning_rate": 3.1593121848667575e-06, "loss": 1.0614, "step": 6111 }, { "epoch": 0.8652934097826857, "grad_norm": 8.285760306244628, "learning_rate": 3.158759260336651e-06, "loss": 1.004, "step": 6112 }, { "epoch": 0.8654349826573229, "grad_norm": 8.102592674980066, "learning_rate": 3.1582063011760604e-06, "loss": 1.1137, "step": 6113 }, { "epoch": 0.8655765555319601, "grad_norm": 9.247968295930683, "learning_rate": 3.1576533074140564e-06, "loss": 1.0833, "step": 6114 }, { "epoch": 0.8657181284065973, "grad_norm": 9.342329548829124, "learning_rate": 3.157100279079708e-06, "loss": 1.0801, "step": 6115 }, { "epoch": 0.8658597012812345, "grad_norm": 7.56920181803269, "learning_rate": 3.1565472162020876e-06, "loss": 0.9795, "step": 6116 }, { "epoch": 0.8660012741558717, "grad_norm": 7.966905088594556, "learning_rate": 3.15599411881027e-06, "loss": 1.0239, "step": 6117 }, { "epoch": 0.8661428470305089, "grad_norm": 8.145471224984519, "learning_rate": 3.15544098693333e-06, "loss": 1.0322, "step": 6118 }, { "epoch": 0.8662844199051462, "grad_norm": 8.008721842423821, "learning_rate": 3.1548878206003477e-06, "loss": 1.1117, "step": 6119 }, { "epoch": 0.8664259927797834, "grad_norm": 7.751100031400533, "learning_rate": 3.1543346198403998e-06, "loss": 1.0745, "step": 6120 }, { "epoch": 0.8665675656544206, "grad_norm": 7.666410469529054, "learning_rate": 3.1537813846825684e-06, "loss": 0.9179, "step": 6121 }, { "epoch": 0.8667091385290578, "grad_norm": 9.558989235662917, "learning_rate": 3.1532281151559372e-06, "loss": 1.2635, "step": 6122 }, { "epoch": 0.8668507114036951, "grad_norm": 7.795279360616821, "learning_rate": 3.152674811289591e-06, "loss": 1.0952, "step": 6123 }, { "epoch": 0.8669922842783323, "grad_norm": 8.16157118462249, "learning_rate": 3.152121473112618e-06, "loss": 1.1223, "step": 6124 }, { "epoch": 0.8671338571529695, "grad_norm": 7.788360411013714, "learning_rate": 3.151568100654104e-06, "loss": 1.0228, "step": 6125 }, { "epoch": 0.8672754300276068, "grad_norm": 10.508721861789713, "learning_rate": 3.1510146939431414e-06, "loss": 1.0812, "step": 6126 }, { "epoch": 0.8674170029022439, "grad_norm": 8.92502133663416, "learning_rate": 3.150461253008822e-06, "loss": 1.1074, "step": 6127 }, { "epoch": 0.8675585757768811, "grad_norm": 9.852023665232082, "learning_rate": 3.149907777880239e-06, "loss": 1.1565, "step": 6128 }, { "epoch": 0.8677001486515183, "grad_norm": 8.051966210254127, "learning_rate": 3.1493542685864886e-06, "loss": 1.1346, "step": 6129 }, { "epoch": 0.8678417215261556, "grad_norm": 9.416741790021717, "learning_rate": 3.1488007251566687e-06, "loss": 1.0279, "step": 6130 }, { "epoch": 0.8679832944007928, "grad_norm": 7.916914792104274, "learning_rate": 3.1482471476198784e-06, "loss": 1.1341, "step": 6131 }, { "epoch": 0.86812486727543, "grad_norm": 8.564338431030762, "learning_rate": 3.1476935360052184e-06, "loss": 1.1733, "step": 6132 }, { "epoch": 0.8682664401500673, "grad_norm": 9.360618037498663, "learning_rate": 3.1471398903417926e-06, "loss": 1.0384, "step": 6133 }, { "epoch": 0.8684080130247045, "grad_norm": 7.4100157101525115, "learning_rate": 3.146586210658706e-06, "loss": 1.027, "step": 6134 }, { "epoch": 0.8685495858993417, "grad_norm": 8.459024736179975, "learning_rate": 3.1460324969850643e-06, "loss": 1.1453, "step": 6135 }, { "epoch": 0.868691158773979, "grad_norm": 8.155564319537747, "learning_rate": 3.1454787493499746e-06, "loss": 1.0862, "step": 6136 }, { "epoch": 0.8688327316486161, "grad_norm": 9.478624086606416, "learning_rate": 3.144924967782549e-06, "loss": 1.1798, "step": 6137 }, { "epoch": 0.8689743045232533, "grad_norm": 7.3423289121204425, "learning_rate": 3.144371152311899e-06, "loss": 0.8901, "step": 6138 }, { "epoch": 0.8691158773978905, "grad_norm": 8.349163685107788, "learning_rate": 3.143817302967138e-06, "loss": 1.108, "step": 6139 }, { "epoch": 0.8692574502725278, "grad_norm": 7.720210010656613, "learning_rate": 3.1432634197773814e-06, "loss": 1.1083, "step": 6140 }, { "epoch": 0.869399023147165, "grad_norm": 8.625017691331493, "learning_rate": 3.142709502771747e-06, "loss": 1.2336, "step": 6141 }, { "epoch": 0.8695405960218022, "grad_norm": 8.973641261371716, "learning_rate": 3.142155551979352e-06, "loss": 1.2307, "step": 6142 }, { "epoch": 0.8696821688964395, "grad_norm": 7.004935295900731, "learning_rate": 3.1416015674293195e-06, "loss": 0.9585, "step": 6143 }, { "epoch": 0.8698237417710767, "grad_norm": 8.06579711683122, "learning_rate": 3.14104754915077e-06, "loss": 1.0587, "step": 6144 }, { "epoch": 0.8699653146457139, "grad_norm": 11.484493914137758, "learning_rate": 3.1404934971728297e-06, "loss": 0.9737, "step": 6145 }, { "epoch": 0.8701068875203511, "grad_norm": 8.494457008292093, "learning_rate": 3.1399394115246235e-06, "loss": 1.125, "step": 6146 }, { "epoch": 0.8702484603949883, "grad_norm": 8.485592918023475, "learning_rate": 3.1393852922352795e-06, "loss": 1.0522, "step": 6147 }, { "epoch": 0.8703900332696255, "grad_norm": 6.956853865386688, "learning_rate": 3.138831139333928e-06, "loss": 0.9955, "step": 6148 }, { "epoch": 0.8705316061442627, "grad_norm": 8.217384834190474, "learning_rate": 3.1382769528496993e-06, "loss": 1.0351, "step": 6149 }, { "epoch": 0.8706731790189, "grad_norm": 8.202000015865272, "learning_rate": 3.1377227328117264e-06, "loss": 1.0764, "step": 6150 }, { "epoch": 0.8708147518935372, "grad_norm": 8.036824351340327, "learning_rate": 3.137168479249146e-06, "loss": 1.1487, "step": 6151 }, { "epoch": 0.8709563247681744, "grad_norm": 8.65661223717481, "learning_rate": 3.1366141921910936e-06, "loss": 1.0758, "step": 6152 }, { "epoch": 0.8710978976428116, "grad_norm": 8.525352367826533, "learning_rate": 3.136059871666708e-06, "loss": 1.0752, "step": 6153 }, { "epoch": 0.8712394705174489, "grad_norm": 8.26553408134661, "learning_rate": 3.1355055177051286e-06, "loss": 1.1221, "step": 6154 }, { "epoch": 0.8713810433920861, "grad_norm": 7.104346339246066, "learning_rate": 3.1349511303354983e-06, "loss": 1.1588, "step": 6155 }, { "epoch": 0.8715226162667233, "grad_norm": 7.144623085795775, "learning_rate": 3.134396709586961e-06, "loss": 0.9618, "step": 6156 }, { "epoch": 0.8716641891413606, "grad_norm": 8.901450272599764, "learning_rate": 3.133842255488661e-06, "loss": 1.0089, "step": 6157 }, { "epoch": 0.8718057620159977, "grad_norm": 8.811466237571326, "learning_rate": 3.133287768069746e-06, "loss": 1.0826, "step": 6158 }, { "epoch": 0.8719473348906349, "grad_norm": 8.919499122686732, "learning_rate": 3.1327332473593657e-06, "loss": 1.0955, "step": 6159 }, { "epoch": 0.8720889077652721, "grad_norm": 7.552118663485966, "learning_rate": 3.1321786933866705e-06, "loss": 1.1241, "step": 6160 }, { "epoch": 0.8722304806399094, "grad_norm": 10.0129246158549, "learning_rate": 3.131624106180813e-06, "loss": 1.1584, "step": 6161 }, { "epoch": 0.8723720535145466, "grad_norm": 9.263912975528433, "learning_rate": 3.1310694857709467e-06, "loss": 1.0696, "step": 6162 }, { "epoch": 0.8725136263891838, "grad_norm": 7.927734134408102, "learning_rate": 3.130514832186228e-06, "loss": 0.9506, "step": 6163 }, { "epoch": 0.8726551992638211, "grad_norm": 10.390381957918839, "learning_rate": 3.129960145455815e-06, "loss": 1.1371, "step": 6164 }, { "epoch": 0.8727967721384583, "grad_norm": 8.061290088328517, "learning_rate": 3.129405425608867e-06, "loss": 0.9961, "step": 6165 }, { "epoch": 0.8729383450130955, "grad_norm": 8.176510023360173, "learning_rate": 3.128850672674545e-06, "loss": 1.1897, "step": 6166 }, { "epoch": 0.8730799178877328, "grad_norm": 10.345375509549847, "learning_rate": 3.1282958866820113e-06, "loss": 1.1799, "step": 6167 }, { "epoch": 0.8732214907623699, "grad_norm": 8.967619821403586, "learning_rate": 3.127741067660432e-06, "loss": 1.1706, "step": 6168 }, { "epoch": 0.8733630636370071, "grad_norm": 8.812517314920933, "learning_rate": 3.127186215638973e-06, "loss": 0.9879, "step": 6169 }, { "epoch": 0.8735046365116443, "grad_norm": 10.896009824258238, "learning_rate": 3.1266313306468018e-06, "loss": 1.0939, "step": 6170 }, { "epoch": 0.8736462093862816, "grad_norm": 10.025547581508055, "learning_rate": 3.1260764127130887e-06, "loss": 1.1294, "step": 6171 }, { "epoch": 0.8737877822609188, "grad_norm": 7.158974474555958, "learning_rate": 3.125521461867006e-06, "loss": 1.0489, "step": 6172 }, { "epoch": 0.873929355135556, "grad_norm": 9.099692588895646, "learning_rate": 3.1249664781377257e-06, "loss": 1.0541, "step": 6173 }, { "epoch": 0.8740709280101933, "grad_norm": 8.971230193870083, "learning_rate": 3.1244114615544242e-06, "loss": 1.0137, "step": 6174 }, { "epoch": 0.8742125008848305, "grad_norm": 8.493365671923687, "learning_rate": 3.1238564121462776e-06, "loss": 1.1168, "step": 6175 }, { "epoch": 0.8743540737594677, "grad_norm": 9.599751437466118, "learning_rate": 3.1233013299424646e-06, "loss": 1.206, "step": 6176 }, { "epoch": 0.874495646634105, "grad_norm": 8.131100755710245, "learning_rate": 3.122746214972166e-06, "loss": 1.0671, "step": 6177 }, { "epoch": 0.8746372195087421, "grad_norm": 10.466475601234183, "learning_rate": 3.122191067264563e-06, "loss": 1.1627, "step": 6178 }, { "epoch": 0.8747787923833793, "grad_norm": 9.172428562804187, "learning_rate": 3.121635886848839e-06, "loss": 1.1145, "step": 6179 }, { "epoch": 0.8749203652580165, "grad_norm": 9.715331037361894, "learning_rate": 3.12108067375418e-06, "loss": 1.0263, "step": 6180 }, { "epoch": 0.8750619381326538, "grad_norm": 12.3744130621542, "learning_rate": 3.120525428009773e-06, "loss": 1.1423, "step": 6181 }, { "epoch": 0.875203511007291, "grad_norm": 8.074389304106989, "learning_rate": 3.1199701496448074e-06, "loss": 1.1334, "step": 6182 }, { "epoch": 0.8753450838819282, "grad_norm": 8.21009562465722, "learning_rate": 3.119414838688473e-06, "loss": 0.9272, "step": 6183 }, { "epoch": 0.8754866567565655, "grad_norm": 8.374100366355826, "learning_rate": 3.1188594951699623e-06, "loss": 1.1351, "step": 6184 }, { "epoch": 0.8756282296312027, "grad_norm": 10.887242934545931, "learning_rate": 3.1183041191184695e-06, "loss": 1.1635, "step": 6185 }, { "epoch": 0.8757698025058399, "grad_norm": 8.564330413530815, "learning_rate": 3.11774871056319e-06, "loss": 0.852, "step": 6186 }, { "epoch": 0.8759113753804771, "grad_norm": 9.986876077463918, "learning_rate": 3.1171932695333216e-06, "loss": 1.0499, "step": 6187 }, { "epoch": 0.8760529482551144, "grad_norm": 8.578133449307087, "learning_rate": 3.1166377960580635e-06, "loss": 1.1021, "step": 6188 }, { "epoch": 0.8761945211297515, "grad_norm": 7.449026968477132, "learning_rate": 3.116082290166616e-06, "loss": 0.9516, "step": 6189 }, { "epoch": 0.8763360940043887, "grad_norm": 9.125176833020099, "learning_rate": 3.1155267518881816e-06, "loss": 0.9308, "step": 6190 }, { "epoch": 0.876477666879026, "grad_norm": 9.051498154197969, "learning_rate": 3.114971181251965e-06, "loss": 1.182, "step": 6191 }, { "epoch": 0.8766192397536632, "grad_norm": 8.899882077925334, "learning_rate": 3.1144155782871723e-06, "loss": 1.1855, "step": 6192 }, { "epoch": 0.8767608126283004, "grad_norm": 8.986798246573102, "learning_rate": 3.113859943023011e-06, "loss": 1.0521, "step": 6193 }, { "epoch": 0.8769023855029376, "grad_norm": 8.915935831036698, "learning_rate": 3.1133042754886896e-06, "loss": 1.2242, "step": 6194 }, { "epoch": 0.8770439583775749, "grad_norm": 11.052986494021049, "learning_rate": 3.1127485757134194e-06, "loss": 1.0728, "step": 6195 }, { "epoch": 0.8771855312522121, "grad_norm": 8.910731636945648, "learning_rate": 3.1121928437264138e-06, "loss": 1.0483, "step": 6196 }, { "epoch": 0.8773271041268493, "grad_norm": 8.686253739577282, "learning_rate": 3.111637079556887e-06, "loss": 1.0293, "step": 6197 }, { "epoch": 0.8774686770014866, "grad_norm": 8.206783503370332, "learning_rate": 3.1110812832340552e-06, "loss": 0.968, "step": 6198 }, { "epoch": 0.8776102498761237, "grad_norm": 7.559057035220646, "learning_rate": 3.1105254547871354e-06, "loss": 1.0963, "step": 6199 }, { "epoch": 0.8777518227507609, "grad_norm": 11.266367894411516, "learning_rate": 3.1099695942453485e-06, "loss": 1.123, "step": 6200 }, { "epoch": 0.8778933956253981, "grad_norm": 13.753425864942194, "learning_rate": 3.109413701637914e-06, "loss": 1.0057, "step": 6201 }, { "epoch": 0.8780349685000354, "grad_norm": 9.82655406823557, "learning_rate": 3.108857776994056e-06, "loss": 1.0134, "step": 6202 }, { "epoch": 0.8781765413746726, "grad_norm": 7.5676242082457135, "learning_rate": 3.108301820342998e-06, "loss": 1.1894, "step": 6203 }, { "epoch": 0.8783181142493098, "grad_norm": 8.038213064027062, "learning_rate": 3.107745831713968e-06, "loss": 1.1084, "step": 6204 }, { "epoch": 0.8784596871239471, "grad_norm": 8.549289731868583, "learning_rate": 3.107189811136192e-06, "loss": 1.0658, "step": 6205 }, { "epoch": 0.8786012599985843, "grad_norm": 10.82297744709433, "learning_rate": 3.1066337586389007e-06, "loss": 1.0988, "step": 6206 }, { "epoch": 0.8787428328732215, "grad_norm": 11.869928501752602, "learning_rate": 3.1060776742513247e-06, "loss": 1.1206, "step": 6207 }, { "epoch": 0.8788844057478588, "grad_norm": 8.087689461471387, "learning_rate": 3.1055215580026976e-06, "loss": 1.056, "step": 6208 }, { "epoch": 0.879025978622496, "grad_norm": 9.023273469829457, "learning_rate": 3.1049654099222542e-06, "loss": 0.9502, "step": 6209 }, { "epoch": 0.8791675514971331, "grad_norm": 9.671807555424175, "learning_rate": 3.104409230039229e-06, "loss": 0.972, "step": 6210 }, { "epoch": 0.8793091243717703, "grad_norm": 9.626863769810003, "learning_rate": 3.103853018382862e-06, "loss": 1.2636, "step": 6211 }, { "epoch": 0.8794506972464076, "grad_norm": 10.013388254528909, "learning_rate": 3.1032967749823917e-06, "loss": 1.0451, "step": 6212 }, { "epoch": 0.8795922701210448, "grad_norm": 11.358299655729107, "learning_rate": 3.10274049986706e-06, "loss": 1.2689, "step": 6213 }, { "epoch": 0.879733842995682, "grad_norm": 9.435078493407344, "learning_rate": 3.1021841930661108e-06, "loss": 1.1193, "step": 6214 }, { "epoch": 0.8798754158703193, "grad_norm": 10.31500546623969, "learning_rate": 3.1016278546087864e-06, "loss": 0.9329, "step": 6215 }, { "epoch": 0.8800169887449565, "grad_norm": 7.525209909343891, "learning_rate": 3.101071484524334e-06, "loss": 1.0998, "step": 6216 }, { "epoch": 0.8801585616195937, "grad_norm": 11.878707146473461, "learning_rate": 3.100515082842002e-06, "loss": 1.2177, "step": 6217 }, { "epoch": 0.880300134494231, "grad_norm": 9.058464046074446, "learning_rate": 3.09995864959104e-06, "loss": 1.1217, "step": 6218 }, { "epoch": 0.8804417073688682, "grad_norm": 11.97039576437886, "learning_rate": 3.0994021848006996e-06, "loss": 1.09, "step": 6219 }, { "epoch": 0.8805832802435053, "grad_norm": 9.232148549145865, "learning_rate": 3.0988456885002327e-06, "loss": 1.0476, "step": 6220 }, { "epoch": 0.8807248531181425, "grad_norm": 9.931644658000655, "learning_rate": 3.0982891607188948e-06, "loss": 1.2999, "step": 6221 }, { "epoch": 0.8808664259927798, "grad_norm": 9.996992803453681, "learning_rate": 3.0977326014859415e-06, "loss": 1.1412, "step": 6222 }, { "epoch": 0.881007998867417, "grad_norm": 8.640538027223903, "learning_rate": 3.0971760108306316e-06, "loss": 1.0346, "step": 6223 }, { "epoch": 0.8811495717420542, "grad_norm": 8.705721620188797, "learning_rate": 3.0966193887822232e-06, "loss": 1.0248, "step": 6224 }, { "epoch": 0.8812911446166914, "grad_norm": 9.307262612019796, "learning_rate": 3.096062735369979e-06, "loss": 0.9095, "step": 6225 }, { "epoch": 0.8814327174913287, "grad_norm": 9.441284633448202, "learning_rate": 3.095506050623161e-06, "loss": 1.147, "step": 6226 }, { "epoch": 0.8815742903659659, "grad_norm": 7.435693088593939, "learning_rate": 3.0949493345710343e-06, "loss": 1.0315, "step": 6227 }, { "epoch": 0.8817158632406031, "grad_norm": 7.243512638340384, "learning_rate": 3.094392587242864e-06, "loss": 0.9675, "step": 6228 }, { "epoch": 0.8818574361152404, "grad_norm": 8.475663744835996, "learning_rate": 3.093835808667919e-06, "loss": 0.9588, "step": 6229 }, { "epoch": 0.8819990089898775, "grad_norm": 9.142000847976034, "learning_rate": 3.0932789988754695e-06, "loss": 1.157, "step": 6230 }, { "epoch": 0.8821405818645147, "grad_norm": 11.037168303848613, "learning_rate": 3.0927221578947843e-06, "loss": 1.2779, "step": 6231 }, { "epoch": 0.882282154739152, "grad_norm": 10.693876806830533, "learning_rate": 3.092165285755137e-06, "loss": 1.2722, "step": 6232 }, { "epoch": 0.8824237276137892, "grad_norm": 6.183200151323595, "learning_rate": 3.0916083824858017e-06, "loss": 0.8695, "step": 6233 }, { "epoch": 0.8825653004884264, "grad_norm": 10.40986749130475, "learning_rate": 3.091051448116056e-06, "loss": 1.1105, "step": 6234 }, { "epoch": 0.8827068733630636, "grad_norm": 11.440231247278655, "learning_rate": 3.090494482675176e-06, "loss": 1.2432, "step": 6235 }, { "epoch": 0.8828484462377009, "grad_norm": 10.420604439628587, "learning_rate": 3.0899374861924413e-06, "loss": 1.1129, "step": 6236 }, { "epoch": 0.8829900191123381, "grad_norm": 11.189348291913783, "learning_rate": 3.0893804586971327e-06, "loss": 1.1257, "step": 6237 }, { "epoch": 0.8831315919869753, "grad_norm": 8.284819672117898, "learning_rate": 3.088823400218533e-06, "loss": 1.0917, "step": 6238 }, { "epoch": 0.8832731648616126, "grad_norm": 10.653382214691478, "learning_rate": 3.0882663107859256e-06, "loss": 1.0676, "step": 6239 }, { "epoch": 0.8834147377362498, "grad_norm": 9.066877899327231, "learning_rate": 3.0877091904285976e-06, "loss": 1.1089, "step": 6240 }, { "epoch": 0.8835563106108869, "grad_norm": 9.744701804408175, "learning_rate": 3.087152039175835e-06, "loss": 1.0357, "step": 6241 }, { "epoch": 0.8836978834855241, "grad_norm": 7.785572490675837, "learning_rate": 3.0865948570569283e-06, "loss": 1.0983, "step": 6242 }, { "epoch": 0.8838394563601614, "grad_norm": 10.83791121568372, "learning_rate": 3.086037644101167e-06, "loss": 1.1356, "step": 6243 }, { "epoch": 0.8839810292347986, "grad_norm": 8.633292813985387, "learning_rate": 3.0854804003378437e-06, "loss": 1.1709, "step": 6244 }, { "epoch": 0.8841226021094358, "grad_norm": 7.292987580318486, "learning_rate": 3.084923125796252e-06, "loss": 1.0514, "step": 6245 }, { "epoch": 0.8842641749840731, "grad_norm": 9.65457365923032, "learning_rate": 3.0843658205056886e-06, "loss": 1.0838, "step": 6246 }, { "epoch": 0.8844057478587103, "grad_norm": 8.77904246537666, "learning_rate": 3.0838084844954485e-06, "loss": 1.2505, "step": 6247 }, { "epoch": 0.8845473207333475, "grad_norm": 9.643981843783337, "learning_rate": 3.0832511177948326e-06, "loss": 1.0828, "step": 6248 }, { "epoch": 0.8846888936079847, "grad_norm": 7.526773602402055, "learning_rate": 3.0826937204331403e-06, "loss": 1.0141, "step": 6249 }, { "epoch": 0.884830466482622, "grad_norm": 8.83579287085988, "learning_rate": 3.0821362924396732e-06, "loss": 1.1212, "step": 6250 }, { "epoch": 0.8849720393572591, "grad_norm": 10.75304919623274, "learning_rate": 3.081578833843736e-06, "loss": 1.0978, "step": 6251 }, { "epoch": 0.8851136122318963, "grad_norm": 7.489535597294052, "learning_rate": 3.0810213446746323e-06, "loss": 1.1126, "step": 6252 }, { "epoch": 0.8852551851065336, "grad_norm": 7.4983381337511315, "learning_rate": 3.0804638249616704e-06, "loss": 1.0768, "step": 6253 }, { "epoch": 0.8853967579811708, "grad_norm": 7.2387855876982545, "learning_rate": 3.0799062747341574e-06, "loss": 1.0298, "step": 6254 }, { "epoch": 0.885538330855808, "grad_norm": 11.497560325243663, "learning_rate": 3.0793486940214034e-06, "loss": 1.2826, "step": 6255 }, { "epoch": 0.8856799037304453, "grad_norm": 8.583772172789505, "learning_rate": 3.0787910828527217e-06, "loss": 0.9278, "step": 6256 }, { "epoch": 0.8858214766050825, "grad_norm": 9.911349070286366, "learning_rate": 3.0782334412574244e-06, "loss": 1.0103, "step": 6257 }, { "epoch": 0.8859630494797197, "grad_norm": 7.782592902678282, "learning_rate": 3.0776757692648256e-06, "loss": 0.9924, "step": 6258 }, { "epoch": 0.8861046223543569, "grad_norm": 7.330158268779831, "learning_rate": 3.0771180669042422e-06, "loss": 0.938, "step": 6259 }, { "epoch": 0.8862461952289942, "grad_norm": 10.81972055296431, "learning_rate": 3.076560334204993e-06, "loss": 1.261, "step": 6260 }, { "epoch": 0.8863877681036313, "grad_norm": 7.889932190346234, "learning_rate": 3.0760025711963964e-06, "loss": 1.0055, "step": 6261 }, { "epoch": 0.8865293409782685, "grad_norm": 8.655408687539824, "learning_rate": 3.0754447779077745e-06, "loss": 1.0263, "step": 6262 }, { "epoch": 0.8866709138529058, "grad_norm": 8.457919356514852, "learning_rate": 3.0748869543684495e-06, "loss": 1.0746, "step": 6263 }, { "epoch": 0.886812486727543, "grad_norm": 8.01701500572693, "learning_rate": 3.0743291006077458e-06, "loss": 1.1426, "step": 6264 }, { "epoch": 0.8869540596021802, "grad_norm": 11.506935350354002, "learning_rate": 3.0737712166549897e-06, "loss": 1.1267, "step": 6265 }, { "epoch": 0.8870956324768174, "grad_norm": 8.565659879705981, "learning_rate": 3.073213302539508e-06, "loss": 1.2007, "step": 6266 }, { "epoch": 0.8872372053514547, "grad_norm": 9.911560752982277, "learning_rate": 3.072655358290632e-06, "loss": 1.0938, "step": 6267 }, { "epoch": 0.8873787782260919, "grad_norm": 8.017289076321296, "learning_rate": 3.07209738393769e-06, "loss": 0.9832, "step": 6268 }, { "epoch": 0.8875203511007291, "grad_norm": 8.677178048437023, "learning_rate": 3.0715393795100146e-06, "loss": 1.0212, "step": 6269 }, { "epoch": 0.8876619239753664, "grad_norm": 8.156387503330883, "learning_rate": 3.07098134503694e-06, "loss": 1.0577, "step": 6270 }, { "epoch": 0.8878034968500036, "grad_norm": 7.835629823221774, "learning_rate": 3.0704232805478025e-06, "loss": 1.0409, "step": 6271 }, { "epoch": 0.8879450697246407, "grad_norm": 8.438457519453255, "learning_rate": 3.0698651860719387e-06, "loss": 1.014, "step": 6272 }, { "epoch": 0.8880866425992779, "grad_norm": 8.163445700384427, "learning_rate": 3.0693070616386862e-06, "loss": 1.1522, "step": 6273 }, { "epoch": 0.8882282154739152, "grad_norm": 9.488232200624559, "learning_rate": 3.0687489072773864e-06, "loss": 1.2127, "step": 6274 }, { "epoch": 0.8883697883485524, "grad_norm": 7.814551000308705, "learning_rate": 3.0681907230173803e-06, "loss": 0.9201, "step": 6275 }, { "epoch": 0.8885113612231896, "grad_norm": 8.524352250982366, "learning_rate": 3.0676325088880122e-06, "loss": 1.0685, "step": 6276 }, { "epoch": 0.8886529340978269, "grad_norm": 8.321907399222276, "learning_rate": 3.067074264918626e-06, "loss": 1.0066, "step": 6277 }, { "epoch": 0.8887945069724641, "grad_norm": 8.425903036469176, "learning_rate": 3.0665159911385677e-06, "loss": 1.0374, "step": 6278 }, { "epoch": 0.8889360798471013, "grad_norm": 8.400925776010817, "learning_rate": 3.0659576875771868e-06, "loss": 1.1128, "step": 6279 }, { "epoch": 0.8890776527217386, "grad_norm": 8.774701374765332, "learning_rate": 3.065399354263833e-06, "loss": 1.1847, "step": 6280 }, { "epoch": 0.8892192255963758, "grad_norm": 9.585132020967421, "learning_rate": 3.0648409912278553e-06, "loss": 1.1093, "step": 6281 }, { "epoch": 0.8893607984710129, "grad_norm": 7.976299225500307, "learning_rate": 3.064282598498609e-06, "loss": 0.9977, "step": 6282 }, { "epoch": 0.8895023713456501, "grad_norm": 8.741144112919775, "learning_rate": 3.063724176105447e-06, "loss": 0.9579, "step": 6283 }, { "epoch": 0.8896439442202874, "grad_norm": 7.247597394948727, "learning_rate": 3.0631657240777254e-06, "loss": 1.038, "step": 6284 }, { "epoch": 0.8897855170949246, "grad_norm": 8.497195061523197, "learning_rate": 3.062607242444801e-06, "loss": 1.0656, "step": 6285 }, { "epoch": 0.8899270899695618, "grad_norm": 9.020028187777507, "learning_rate": 3.0620487312360337e-06, "loss": 0.9031, "step": 6286 }, { "epoch": 0.890068662844199, "grad_norm": 9.85437368507495, "learning_rate": 3.0614901904807836e-06, "loss": 1.1415, "step": 6287 }, { "epoch": 0.8902102357188363, "grad_norm": 8.371498400742414, "learning_rate": 3.060931620208414e-06, "loss": 1.0413, "step": 6288 }, { "epoch": 0.8903518085934735, "grad_norm": 11.32568130970081, "learning_rate": 3.060373020448286e-06, "loss": 1.0009, "step": 6289 }, { "epoch": 0.8904933814681107, "grad_norm": 7.634305185255636, "learning_rate": 3.0598143912297667e-06, "loss": 1.0465, "step": 6290 }, { "epoch": 0.890634954342748, "grad_norm": 8.223783974775209, "learning_rate": 3.0592557325822225e-06, "loss": 0.8821, "step": 6291 }, { "epoch": 0.8907765272173851, "grad_norm": 9.274314472539029, "learning_rate": 3.0586970445350206e-06, "loss": 1.1206, "step": 6292 }, { "epoch": 0.8909181000920223, "grad_norm": 9.960469504937459, "learning_rate": 3.0581383271175324e-06, "loss": 1.1073, "step": 6293 }, { "epoch": 0.8910596729666596, "grad_norm": 10.19439576218496, "learning_rate": 3.0575795803591278e-06, "loss": 0.9893, "step": 6294 }, { "epoch": 0.8912012458412968, "grad_norm": 6.740864823999918, "learning_rate": 3.0570208042891815e-06, "loss": 1.0331, "step": 6295 }, { "epoch": 0.891342818715934, "grad_norm": 7.831512023000811, "learning_rate": 3.0564619989370656e-06, "loss": 0.967, "step": 6296 }, { "epoch": 0.8914843915905712, "grad_norm": 8.682449739081688, "learning_rate": 3.055903164332158e-06, "loss": 1.2301, "step": 6297 }, { "epoch": 0.8916259644652085, "grad_norm": 8.85904172240191, "learning_rate": 3.055344300503836e-06, "loss": 1.0507, "step": 6298 }, { "epoch": 0.8917675373398457, "grad_norm": 7.647769138953815, "learning_rate": 3.0547854074814777e-06, "loss": 1.0132, "step": 6299 }, { "epoch": 0.8919091102144829, "grad_norm": 9.254539406895898, "learning_rate": 3.0542264852944635e-06, "loss": 1.0997, "step": 6300 }, { "epoch": 0.8920506830891202, "grad_norm": 7.965960564173095, "learning_rate": 3.0536675339721774e-06, "loss": 1.1085, "step": 6301 }, { "epoch": 0.8921922559637574, "grad_norm": 8.395703134087261, "learning_rate": 3.053108553544001e-06, "loss": 1.0911, "step": 6302 }, { "epoch": 0.8923338288383945, "grad_norm": 8.565519148664185, "learning_rate": 3.052549544039321e-06, "loss": 1.1918, "step": 6303 }, { "epoch": 0.8924754017130317, "grad_norm": 9.55821022102959, "learning_rate": 3.0519905054875237e-06, "loss": 1.0731, "step": 6304 }, { "epoch": 0.892616974587669, "grad_norm": 9.1056095146777, "learning_rate": 3.0514314379179967e-06, "loss": 1.1716, "step": 6305 }, { "epoch": 0.8927585474623062, "grad_norm": 9.150369572080903, "learning_rate": 3.05087234136013e-06, "loss": 0.9898, "step": 6306 }, { "epoch": 0.8929001203369434, "grad_norm": 7.9839292278631415, "learning_rate": 3.0503132158433145e-06, "loss": 1.1352, "step": 6307 }, { "epoch": 0.8930416932115807, "grad_norm": 8.931978240598626, "learning_rate": 3.049754061396944e-06, "loss": 1.2081, "step": 6308 }, { "epoch": 0.8931832660862179, "grad_norm": 8.029380254143218, "learning_rate": 3.049194878050413e-06, "loss": 0.9822, "step": 6309 }, { "epoch": 0.8933248389608551, "grad_norm": 8.178347531571886, "learning_rate": 3.048635665833116e-06, "loss": 0.934, "step": 6310 }, { "epoch": 0.8934664118354924, "grad_norm": 8.868304668748921, "learning_rate": 3.048076424774452e-06, "loss": 1.006, "step": 6311 }, { "epoch": 0.8936079847101296, "grad_norm": 7.678148986754097, "learning_rate": 3.0475171549038187e-06, "loss": 1.071, "step": 6312 }, { "epoch": 0.8937495575847667, "grad_norm": 7.778819980125721, "learning_rate": 3.0469578562506165e-06, "loss": 1.0794, "step": 6313 }, { "epoch": 0.8938911304594039, "grad_norm": 7.316149200469818, "learning_rate": 3.046398528844248e-06, "loss": 1.0564, "step": 6314 }, { "epoch": 0.8940327033340412, "grad_norm": 7.18211361943547, "learning_rate": 3.0458391727141156e-06, "loss": 1.0605, "step": 6315 }, { "epoch": 0.8941742762086784, "grad_norm": 6.898287640819878, "learning_rate": 3.045279787889625e-06, "loss": 1.0295, "step": 6316 }, { "epoch": 0.8943158490833156, "grad_norm": 7.281343778734877, "learning_rate": 3.044720374400183e-06, "loss": 1.1441, "step": 6317 }, { "epoch": 0.8944574219579529, "grad_norm": 8.475684898406165, "learning_rate": 3.044160932275197e-06, "loss": 1.2873, "step": 6318 }, { "epoch": 0.8945989948325901, "grad_norm": 9.579888783488245, "learning_rate": 3.043601461544076e-06, "loss": 1.2012, "step": 6319 }, { "epoch": 0.8947405677072273, "grad_norm": 7.851851333695454, "learning_rate": 3.0430419622362327e-06, "loss": 1.0093, "step": 6320 }, { "epoch": 0.8948821405818645, "grad_norm": 10.342223086413318, "learning_rate": 3.0424824343810773e-06, "loss": 1.1171, "step": 6321 }, { "epoch": 0.8950237134565018, "grad_norm": 7.767281246291033, "learning_rate": 3.0419228780080246e-06, "loss": 1.0055, "step": 6322 }, { "epoch": 0.8951652863311389, "grad_norm": 9.866508986098067, "learning_rate": 3.041363293146491e-06, "loss": 1.1251, "step": 6323 }, { "epoch": 0.8953068592057761, "grad_norm": 8.990375883160159, "learning_rate": 3.0408036798258924e-06, "loss": 1.0293, "step": 6324 }, { "epoch": 0.8954484320804134, "grad_norm": 7.700882838777774, "learning_rate": 3.040244038075648e-06, "loss": 0.9636, "step": 6325 }, { "epoch": 0.8955900049550506, "grad_norm": 8.945116758911892, "learning_rate": 3.0396843679251777e-06, "loss": 1.1701, "step": 6326 }, { "epoch": 0.8957315778296878, "grad_norm": 8.95029712375604, "learning_rate": 3.0391246694039016e-06, "loss": 0.9206, "step": 6327 }, { "epoch": 0.895873150704325, "grad_norm": 9.870422074744004, "learning_rate": 3.038564942541244e-06, "loss": 1.1569, "step": 6328 }, { "epoch": 0.8960147235789623, "grad_norm": 9.508718405127361, "learning_rate": 3.0380051873666287e-06, "loss": 0.9453, "step": 6329 }, { "epoch": 0.8961562964535995, "grad_norm": 7.75578559757783, "learning_rate": 3.037445403909482e-06, "loss": 0.998, "step": 6330 }, { "epoch": 0.8962978693282367, "grad_norm": 10.334693655079281, "learning_rate": 3.0368855921992314e-06, "loss": 1.0561, "step": 6331 }, { "epoch": 0.896439442202874, "grad_norm": 8.097334962572177, "learning_rate": 3.036325752265305e-06, "loss": 1.085, "step": 6332 }, { "epoch": 0.8965810150775112, "grad_norm": 7.600713856951458, "learning_rate": 3.035765884137134e-06, "loss": 1.081, "step": 6333 }, { "epoch": 0.8967225879521483, "grad_norm": 6.782968610976336, "learning_rate": 3.0352059878441496e-06, "loss": 0.9455, "step": 6334 }, { "epoch": 0.8968641608267856, "grad_norm": 8.15213411625533, "learning_rate": 3.0346460634157865e-06, "loss": 1.1506, "step": 6335 }, { "epoch": 0.8970057337014228, "grad_norm": 10.475842615437575, "learning_rate": 3.034086110881478e-06, "loss": 1.1762, "step": 6336 }, { "epoch": 0.89714730657606, "grad_norm": 8.67663113872432, "learning_rate": 3.0335261302706605e-06, "loss": 1.1886, "step": 6337 }, { "epoch": 0.8972888794506972, "grad_norm": 8.305514275637151, "learning_rate": 3.032966121612772e-06, "loss": 1.1521, "step": 6338 }, { "epoch": 0.8974304523253345, "grad_norm": 8.046745580725041, "learning_rate": 3.0324060849372526e-06, "loss": 0.9484, "step": 6339 }, { "epoch": 0.8975720251999717, "grad_norm": 9.366635265727597, "learning_rate": 3.0318460202735417e-06, "loss": 0.999, "step": 6340 }, { "epoch": 0.8977135980746089, "grad_norm": 7.728751556134735, "learning_rate": 3.0312859276510833e-06, "loss": 1.0025, "step": 6341 }, { "epoch": 0.8978551709492462, "grad_norm": 10.312237360528647, "learning_rate": 3.0307258070993186e-06, "loss": 1.0561, "step": 6342 }, { "epoch": 0.8979967438238834, "grad_norm": 9.51021548695919, "learning_rate": 3.0301656586476943e-06, "loss": 1.0189, "step": 6343 }, { "epoch": 0.8981383166985205, "grad_norm": 8.953691234957626, "learning_rate": 3.029605482325656e-06, "loss": 1.1622, "step": 6344 }, { "epoch": 0.8982798895731577, "grad_norm": 9.337460649808317, "learning_rate": 3.0290452781626526e-06, "loss": 1.0721, "step": 6345 }, { "epoch": 0.898421462447795, "grad_norm": 8.884992105612524, "learning_rate": 3.028485046188134e-06, "loss": 1.1288, "step": 6346 }, { "epoch": 0.8985630353224322, "grad_norm": 9.842355587001343, "learning_rate": 3.0279247864315508e-06, "loss": 1.0741, "step": 6347 }, { "epoch": 0.8987046081970694, "grad_norm": 7.861910644178728, "learning_rate": 3.0273644989223543e-06, "loss": 1.1437, "step": 6348 }, { "epoch": 0.8988461810717067, "grad_norm": 11.905044614795589, "learning_rate": 3.0268041836900002e-06, "loss": 1.3265, "step": 6349 }, { "epoch": 0.8989877539463439, "grad_norm": 9.873397117613058, "learning_rate": 3.026243840763942e-06, "loss": 1.1203, "step": 6350 }, { "epoch": 0.8991293268209811, "grad_norm": 8.054254618278136, "learning_rate": 3.025683470173638e-06, "loss": 1.1554, "step": 6351 }, { "epoch": 0.8992708996956184, "grad_norm": 9.780061204714864, "learning_rate": 3.0251230719485465e-06, "loss": 1.0278, "step": 6352 }, { "epoch": 0.8994124725702556, "grad_norm": 10.937411760246736, "learning_rate": 3.0245626461181256e-06, "loss": 1.0109, "step": 6353 }, { "epoch": 0.8995540454448928, "grad_norm": 8.78048800491705, "learning_rate": 3.024002192711838e-06, "loss": 1.0777, "step": 6354 }, { "epoch": 0.8996956183195299, "grad_norm": 7.8033070913876, "learning_rate": 3.023441711759146e-06, "loss": 1.1204, "step": 6355 }, { "epoch": 0.8998371911941672, "grad_norm": 9.747186890724668, "learning_rate": 3.0228812032895133e-06, "loss": 1.0203, "step": 6356 }, { "epoch": 0.8999787640688044, "grad_norm": 10.299280189620847, "learning_rate": 3.022320667332406e-06, "loss": 1.0897, "step": 6357 }, { "epoch": 0.9001203369434416, "grad_norm": 9.160672626703391, "learning_rate": 3.02176010391729e-06, "loss": 0.9828, "step": 6358 }, { "epoch": 0.9002619098180789, "grad_norm": 7.8993739205966405, "learning_rate": 3.021199513073635e-06, "loss": 0.9602, "step": 6359 }, { "epoch": 0.9004034826927161, "grad_norm": 7.7934244658420315, "learning_rate": 3.0206388948309094e-06, "loss": 1.1094, "step": 6360 }, { "epoch": 0.9005450555673533, "grad_norm": 9.204001626198172, "learning_rate": 3.020078249218586e-06, "loss": 1.133, "step": 6361 }, { "epoch": 0.9006866284419905, "grad_norm": 9.78222222417107, "learning_rate": 3.019517576266137e-06, "loss": 1.1261, "step": 6362 }, { "epoch": 0.9008282013166278, "grad_norm": 9.172405688969873, "learning_rate": 3.0189568760030363e-06, "loss": 0.9699, "step": 6363 }, { "epoch": 0.900969774191265, "grad_norm": 8.407310901049994, "learning_rate": 3.018396148458759e-06, "loss": 1.0276, "step": 6364 }, { "epoch": 0.9011113470659021, "grad_norm": 11.280574028072863, "learning_rate": 3.0178353936627835e-06, "loss": 1.0892, "step": 6365 }, { "epoch": 0.9012529199405394, "grad_norm": 8.62812682154884, "learning_rate": 3.017274611644587e-06, "loss": 1.1274, "step": 6366 }, { "epoch": 0.9013944928151766, "grad_norm": 8.398961193701325, "learning_rate": 3.016713802433649e-06, "loss": 1.0066, "step": 6367 }, { "epoch": 0.9015360656898138, "grad_norm": 9.35500390285653, "learning_rate": 3.016152966059453e-06, "loss": 1.1604, "step": 6368 }, { "epoch": 0.901677638564451, "grad_norm": 9.498401306555282, "learning_rate": 3.01559210255148e-06, "loss": 0.9756, "step": 6369 }, { "epoch": 0.9018192114390883, "grad_norm": 9.148685181585265, "learning_rate": 3.0150312119392144e-06, "loss": 1.1424, "step": 6370 }, { "epoch": 0.9019607843137255, "grad_norm": 10.328289728971875, "learning_rate": 3.0144702942521424e-06, "loss": 1.1236, "step": 6371 }, { "epoch": 0.9021023571883627, "grad_norm": 8.874319560870966, "learning_rate": 3.0139093495197504e-06, "loss": 1.0094, "step": 6372 }, { "epoch": 0.902243930063, "grad_norm": 11.203771596468746, "learning_rate": 3.0133483777715272e-06, "loss": 1.1624, "step": 6373 }, { "epoch": 0.9023855029376372, "grad_norm": 9.316973744508427, "learning_rate": 3.0127873790369627e-06, "loss": 1.0362, "step": 6374 }, { "epoch": 0.9025270758122743, "grad_norm": 9.374036815755767, "learning_rate": 3.0122263533455474e-06, "loss": 1.0057, "step": 6375 }, { "epoch": 0.9026686486869115, "grad_norm": 9.955423182598965, "learning_rate": 3.0116653007267753e-06, "loss": 1.2206, "step": 6376 }, { "epoch": 0.9028102215615488, "grad_norm": 10.20542519595219, "learning_rate": 3.0111042212101394e-06, "loss": 0.988, "step": 6377 }, { "epoch": 0.902951794436186, "grad_norm": 8.38099342084307, "learning_rate": 3.0105431148251364e-06, "loss": 1.0175, "step": 6378 }, { "epoch": 0.9030933673108232, "grad_norm": 7.984289239769978, "learning_rate": 3.0099819816012623e-06, "loss": 1.0524, "step": 6379 }, { "epoch": 0.9032349401854605, "grad_norm": 11.595968111392411, "learning_rate": 3.0094208215680156e-06, "loss": 0.9909, "step": 6380 }, { "epoch": 0.9033765130600977, "grad_norm": 7.296311415936159, "learning_rate": 3.008859634754895e-06, "loss": 1.0189, "step": 6381 }, { "epoch": 0.9035180859347349, "grad_norm": 9.211329690415349, "learning_rate": 3.0082984211914033e-06, "loss": 1.007, "step": 6382 }, { "epoch": 0.9036596588093722, "grad_norm": 8.564565145005721, "learning_rate": 3.007737180907044e-06, "loss": 1.1053, "step": 6383 }, { "epoch": 0.9038012316840094, "grad_norm": 10.177042643147823, "learning_rate": 3.007175913931319e-06, "loss": 1.0223, "step": 6384 }, { "epoch": 0.9039428045586466, "grad_norm": 8.606187732390936, "learning_rate": 3.006614620293734e-06, "loss": 1.0177, "step": 6385 }, { "epoch": 0.9040843774332837, "grad_norm": 7.168104190222459, "learning_rate": 3.0060533000237964e-06, "loss": 1.0003, "step": 6386 }, { "epoch": 0.904225950307921, "grad_norm": 9.336190830767954, "learning_rate": 3.005491953151014e-06, "loss": 1.0991, "step": 6387 }, { "epoch": 0.9043675231825582, "grad_norm": 8.128546776950854, "learning_rate": 3.0049305797048965e-06, "loss": 1.0971, "step": 6388 }, { "epoch": 0.9045090960571954, "grad_norm": 9.46277795179309, "learning_rate": 3.0043691797149548e-06, "loss": 0.9728, "step": 6389 }, { "epoch": 0.9046506689318327, "grad_norm": 9.313254921031696, "learning_rate": 3.003807753210702e-06, "loss": 0.8682, "step": 6390 }, { "epoch": 0.9047922418064699, "grad_norm": 8.419218929062735, "learning_rate": 3.0032463002216504e-06, "loss": 1.159, "step": 6391 }, { "epoch": 0.9049338146811071, "grad_norm": 9.801496617226372, "learning_rate": 3.0026848207773163e-06, "loss": 1.1631, "step": 6392 }, { "epoch": 0.9050753875557443, "grad_norm": 9.263758556612691, "learning_rate": 3.0021233149072164e-06, "loss": 1.188, "step": 6393 }, { "epoch": 0.9052169604303816, "grad_norm": 7.763622739624388, "learning_rate": 3.0015617826408684e-06, "loss": 1.1189, "step": 6394 }, { "epoch": 0.9053585333050188, "grad_norm": 7.318614783206635, "learning_rate": 3.001000224007791e-06, "loss": 1.1755, "step": 6395 }, { "epoch": 0.9055001061796559, "grad_norm": 10.514461186526333, "learning_rate": 3.000438639037505e-06, "loss": 1.1055, "step": 6396 }, { "epoch": 0.9056416790542932, "grad_norm": 8.233385406617757, "learning_rate": 2.9998770277595337e-06, "loss": 1.0376, "step": 6397 }, { "epoch": 0.9057832519289304, "grad_norm": 8.927057314419006, "learning_rate": 2.999315390203399e-06, "loss": 1.0805, "step": 6398 }, { "epoch": 0.9059248248035676, "grad_norm": 9.417969965133684, "learning_rate": 2.9987537263986277e-06, "loss": 1.034, "step": 6399 }, { "epoch": 0.9060663976782048, "grad_norm": 8.837283516785838, "learning_rate": 2.998192036374744e-06, "loss": 1.1783, "step": 6400 }, { "epoch": 0.9062079705528421, "grad_norm": 9.091768134017407, "learning_rate": 2.9976303201612765e-06, "loss": 0.9648, "step": 6401 }, { "epoch": 0.9063495434274793, "grad_norm": 8.755730632488154, "learning_rate": 2.9970685777877545e-06, "loss": 1.0497, "step": 6402 }, { "epoch": 0.9064911163021165, "grad_norm": 7.189625036147601, "learning_rate": 2.9965068092837074e-06, "loss": 0.9832, "step": 6403 }, { "epoch": 0.9066326891767538, "grad_norm": 8.62465855019327, "learning_rate": 2.9959450146786674e-06, "loss": 1.1483, "step": 6404 }, { "epoch": 0.906774262051391, "grad_norm": 10.206340943098919, "learning_rate": 2.995383194002169e-06, "loss": 1.1143, "step": 6405 }, { "epoch": 0.9069158349260281, "grad_norm": 7.891741199671823, "learning_rate": 2.9948213472837443e-06, "loss": 1.0657, "step": 6406 }, { "epoch": 0.9070574078006653, "grad_norm": 7.491465481046478, "learning_rate": 2.994259474552931e-06, "loss": 1.0422, "step": 6407 }, { "epoch": 0.9071989806753026, "grad_norm": 8.06628329204107, "learning_rate": 2.993697575839265e-06, "loss": 1.0315, "step": 6408 }, { "epoch": 0.9073405535499398, "grad_norm": 7.547468647798542, "learning_rate": 2.9931356511722857e-06, "loss": 1.1327, "step": 6409 }, { "epoch": 0.907482126424577, "grad_norm": 7.10584078963473, "learning_rate": 2.9925737005815337e-06, "loss": 1.0325, "step": 6410 }, { "epoch": 0.9076236992992143, "grad_norm": 8.79290454219116, "learning_rate": 2.9920117240965487e-06, "loss": 1.0224, "step": 6411 }, { "epoch": 0.9077652721738515, "grad_norm": 8.67000261317723, "learning_rate": 2.991449721746875e-06, "loss": 1.083, "step": 6412 }, { "epoch": 0.9079068450484887, "grad_norm": 7.933986577085387, "learning_rate": 2.9908876935620544e-06, "loss": 1.1675, "step": 6413 }, { "epoch": 0.908048417923126, "grad_norm": 9.500503928220688, "learning_rate": 2.990325639571635e-06, "loss": 1.1307, "step": 6414 }, { "epoch": 0.9081899907977632, "grad_norm": 9.165045670032914, "learning_rate": 2.9897635598051626e-06, "loss": 0.9851, "step": 6415 }, { "epoch": 0.9083315636724004, "grad_norm": 7.097090014648637, "learning_rate": 2.9892014542921845e-06, "loss": 1.0369, "step": 6416 }, { "epoch": 0.9084731365470375, "grad_norm": 9.361624980623398, "learning_rate": 2.9886393230622507e-06, "loss": 0.9633, "step": 6417 }, { "epoch": 0.9086147094216748, "grad_norm": 8.03549236657233, "learning_rate": 2.9880771661449115e-06, "loss": 1.0988, "step": 6418 }, { "epoch": 0.908756282296312, "grad_norm": 9.084783689099833, "learning_rate": 2.9875149835697203e-06, "loss": 1.1021, "step": 6419 }, { "epoch": 0.9088978551709492, "grad_norm": 6.375254906438668, "learning_rate": 2.98695277536623e-06, "loss": 0.9932, "step": 6420 }, { "epoch": 0.9090394280455865, "grad_norm": 8.016832762627976, "learning_rate": 2.9863905415639954e-06, "loss": 1.0546, "step": 6421 }, { "epoch": 0.9091810009202237, "grad_norm": 9.30610303897247, "learning_rate": 2.9858282821925723e-06, "loss": 1.0577, "step": 6422 }, { "epoch": 0.9093225737948609, "grad_norm": 8.013204644175362, "learning_rate": 2.985265997281519e-06, "loss": 1.0345, "step": 6423 }, { "epoch": 0.9094641466694982, "grad_norm": 7.476676533466189, "learning_rate": 2.984703686860394e-06, "loss": 1.1004, "step": 6424 }, { "epoch": 0.9096057195441354, "grad_norm": 7.107757317242236, "learning_rate": 2.984141350958757e-06, "loss": 0.9875, "step": 6425 }, { "epoch": 0.9097472924187726, "grad_norm": 7.46322005742081, "learning_rate": 2.9835789896061707e-06, "loss": 1.088, "step": 6426 }, { "epoch": 0.9098888652934097, "grad_norm": 9.606580275130144, "learning_rate": 2.9830166028321975e-06, "loss": 1.0655, "step": 6427 }, { "epoch": 0.910030438168047, "grad_norm": 9.623161635770442, "learning_rate": 2.9824541906664018e-06, "loss": 1.0648, "step": 6428 }, { "epoch": 0.9101720110426842, "grad_norm": 7.428524981343032, "learning_rate": 2.9818917531383483e-06, "loss": 0.837, "step": 6429 }, { "epoch": 0.9103135839173214, "grad_norm": 9.78236026982908, "learning_rate": 2.981329290277605e-06, "loss": 1.1648, "step": 6430 }, { "epoch": 0.9104551567919587, "grad_norm": 8.660328479201917, "learning_rate": 2.980766802113741e-06, "loss": 1.0729, "step": 6431 }, { "epoch": 0.9105967296665959, "grad_norm": 9.211498654317662, "learning_rate": 2.9802042886763234e-06, "loss": 1.1263, "step": 6432 }, { "epoch": 0.9107383025412331, "grad_norm": 8.057864727082537, "learning_rate": 2.9796417499949244e-06, "loss": 1.1455, "step": 6433 }, { "epoch": 0.9108798754158703, "grad_norm": 9.517390345790206, "learning_rate": 2.9790791860991165e-06, "loss": 0.9813, "step": 6434 }, { "epoch": 0.9110214482905076, "grad_norm": 7.362341669578989, "learning_rate": 2.9785165970184724e-06, "loss": 0.9432, "step": 6435 }, { "epoch": 0.9111630211651448, "grad_norm": 8.369666835484654, "learning_rate": 2.977953982782569e-06, "loss": 0.9887, "step": 6436 }, { "epoch": 0.9113045940397819, "grad_norm": 7.92369621454782, "learning_rate": 2.97739134342098e-06, "loss": 1.045, "step": 6437 }, { "epoch": 0.9114461669144192, "grad_norm": 9.48824788037051, "learning_rate": 2.9768286789632845e-06, "loss": 1.1037, "step": 6438 }, { "epoch": 0.9115877397890564, "grad_norm": 9.627510436990212, "learning_rate": 2.9762659894390603e-06, "loss": 0.9338, "step": 6439 }, { "epoch": 0.9117293126636936, "grad_norm": 9.436656383678134, "learning_rate": 2.9757032748778886e-06, "loss": 1.0463, "step": 6440 }, { "epoch": 0.9118708855383308, "grad_norm": 8.844327550570329, "learning_rate": 2.97514053530935e-06, "loss": 1.1595, "step": 6441 }, { "epoch": 0.9120124584129681, "grad_norm": 8.294394601869593, "learning_rate": 2.9745777707630284e-06, "loss": 1.1429, "step": 6442 }, { "epoch": 0.9121540312876053, "grad_norm": 9.895936814151932, "learning_rate": 2.974014981268507e-06, "loss": 1.0647, "step": 6443 }, { "epoch": 0.9122956041622425, "grad_norm": 8.690563512513965, "learning_rate": 2.973452166855372e-06, "loss": 1.1295, "step": 6444 }, { "epoch": 0.9124371770368798, "grad_norm": 11.08597122198007, "learning_rate": 2.972889327553209e-06, "loss": 1.2085, "step": 6445 }, { "epoch": 0.912578749911517, "grad_norm": 11.626248856599705, "learning_rate": 2.972326463391606e-06, "loss": 1.1912, "step": 6446 }, { "epoch": 0.9127203227861542, "grad_norm": 8.266486139274674, "learning_rate": 2.971763574400154e-06, "loss": 1.0656, "step": 6447 }, { "epoch": 0.9128618956607913, "grad_norm": 9.388298266892916, "learning_rate": 2.971200660608442e-06, "loss": 1.1477, "step": 6448 }, { "epoch": 0.9130034685354286, "grad_norm": 11.212936722873803, "learning_rate": 2.970637722046063e-06, "loss": 1.169, "step": 6449 }, { "epoch": 0.9131450414100658, "grad_norm": 9.797248796696703, "learning_rate": 2.9700747587426097e-06, "loss": 0.9957, "step": 6450 }, { "epoch": 0.913286614284703, "grad_norm": 7.237762646694268, "learning_rate": 2.9695117707276774e-06, "loss": 0.8915, "step": 6451 }, { "epoch": 0.9134281871593403, "grad_norm": 8.589147382528086, "learning_rate": 2.9689487580308613e-06, "loss": 1.0588, "step": 6452 }, { "epoch": 0.9135697600339775, "grad_norm": 7.931795488361349, "learning_rate": 2.9683857206817583e-06, "loss": 0.9761, "step": 6453 }, { "epoch": 0.9137113329086147, "grad_norm": 8.718924571925024, "learning_rate": 2.9678226587099674e-06, "loss": 1.0061, "step": 6454 }, { "epoch": 0.913852905783252, "grad_norm": 10.049324081237806, "learning_rate": 2.967259572145088e-06, "loss": 1.1487, "step": 6455 }, { "epoch": 0.9139944786578892, "grad_norm": 8.584206792838057, "learning_rate": 2.966696461016721e-06, "loss": 1.1074, "step": 6456 }, { "epoch": 0.9141360515325264, "grad_norm": 8.690063098463293, "learning_rate": 2.966133325354469e-06, "loss": 1.1882, "step": 6457 }, { "epoch": 0.9142776244071635, "grad_norm": 8.094529044463313, "learning_rate": 2.9655701651879364e-06, "loss": 1.0163, "step": 6458 }, { "epoch": 0.9144191972818008, "grad_norm": 9.32567141163745, "learning_rate": 2.965006980546727e-06, "loss": 1.1238, "step": 6459 }, { "epoch": 0.914560770156438, "grad_norm": 8.74450380185878, "learning_rate": 2.9644437714604475e-06, "loss": 1.0956, "step": 6460 }, { "epoch": 0.9147023430310752, "grad_norm": 8.141962743221791, "learning_rate": 2.963880537958705e-06, "loss": 1.1203, "step": 6461 }, { "epoch": 0.9148439159057125, "grad_norm": 9.624268342980432, "learning_rate": 2.9633172800711085e-06, "loss": 1.0429, "step": 6462 }, { "epoch": 0.9149854887803497, "grad_norm": 9.293059878966803, "learning_rate": 2.962753997827268e-06, "loss": 1.0457, "step": 6463 }, { "epoch": 0.9151270616549869, "grad_norm": 7.486490671337371, "learning_rate": 2.962190691256795e-06, "loss": 1.1487, "step": 6464 }, { "epoch": 0.9152686345296241, "grad_norm": 8.37489159357311, "learning_rate": 2.961627360389302e-06, "loss": 1.0583, "step": 6465 }, { "epoch": 0.9154102074042614, "grad_norm": 8.619139518538905, "learning_rate": 2.9610640052544026e-06, "loss": 1.1821, "step": 6466 }, { "epoch": 0.9155517802788986, "grad_norm": 7.401432321445326, "learning_rate": 2.960500625881712e-06, "loss": 1.0364, "step": 6467 }, { "epoch": 0.9156933531535357, "grad_norm": 10.411969237063404, "learning_rate": 2.9599372223008483e-06, "loss": 1.1524, "step": 6468 }, { "epoch": 0.915834926028173, "grad_norm": 9.48814455438791, "learning_rate": 2.9593737945414264e-06, "loss": 1.1179, "step": 6469 }, { "epoch": 0.9159764989028102, "grad_norm": 9.411450947412247, "learning_rate": 2.9588103426330665e-06, "loss": 0.997, "step": 6470 }, { "epoch": 0.9161180717774474, "grad_norm": 7.432085390353889, "learning_rate": 2.95824686660539e-06, "loss": 0.9532, "step": 6471 }, { "epoch": 0.9162596446520846, "grad_norm": 9.489365900077143, "learning_rate": 2.957683366488017e-06, "loss": 1.1635, "step": 6472 }, { "epoch": 0.9164012175267219, "grad_norm": 7.393721272661859, "learning_rate": 2.9571198423105708e-06, "loss": 1.0279, "step": 6473 }, { "epoch": 0.9165427904013591, "grad_norm": 8.211461073191723, "learning_rate": 2.956556294102675e-06, "loss": 1.113, "step": 6474 }, { "epoch": 0.9166843632759963, "grad_norm": 8.019091237776582, "learning_rate": 2.9559927218939555e-06, "loss": 1.0279, "step": 6475 }, { "epoch": 0.9168259361506336, "grad_norm": 10.094786221723801, "learning_rate": 2.9554291257140384e-06, "loss": 1.1953, "step": 6476 }, { "epoch": 0.9169675090252708, "grad_norm": 7.560887440663355, "learning_rate": 2.9548655055925516e-06, "loss": 1.0631, "step": 6477 }, { "epoch": 0.917109081899908, "grad_norm": 8.625694357523576, "learning_rate": 2.954301861559124e-06, "loss": 1.0294, "step": 6478 }, { "epoch": 0.9172506547745451, "grad_norm": 9.523388631693733, "learning_rate": 2.9537381936433873e-06, "loss": 1.1821, "step": 6479 }, { "epoch": 0.9173922276491824, "grad_norm": 7.601488981450109, "learning_rate": 2.953174501874971e-06, "loss": 1.1505, "step": 6480 }, { "epoch": 0.9175338005238196, "grad_norm": 8.281129771385505, "learning_rate": 2.9526107862835103e-06, "loss": 1.0834, "step": 6481 }, { "epoch": 0.9176753733984568, "grad_norm": 8.191499854168335, "learning_rate": 2.952047046898637e-06, "loss": 1.0042, "step": 6482 }, { "epoch": 0.9178169462730941, "grad_norm": 9.389876299318534, "learning_rate": 2.9514832837499884e-06, "loss": 1.2229, "step": 6483 }, { "epoch": 0.9179585191477313, "grad_norm": 8.079498115286734, "learning_rate": 2.9509194968671995e-06, "loss": 1.1173, "step": 6484 }, { "epoch": 0.9181000920223685, "grad_norm": 8.092774792691, "learning_rate": 2.9503556862799094e-06, "loss": 1.1826, "step": 6485 }, { "epoch": 0.9182416648970058, "grad_norm": 8.308980325691858, "learning_rate": 2.949791852017756e-06, "loss": 1.0668, "step": 6486 }, { "epoch": 0.918383237771643, "grad_norm": 8.426509678903379, "learning_rate": 2.949227994110381e-06, "loss": 1.1042, "step": 6487 }, { "epoch": 0.9185248106462802, "grad_norm": 8.596637476555605, "learning_rate": 2.948664112587425e-06, "loss": 1.06, "step": 6488 }, { "epoch": 0.9186663835209173, "grad_norm": 7.457978593570005, "learning_rate": 2.9481002074785315e-06, "loss": 1.0767, "step": 6489 }, { "epoch": 0.9188079563955546, "grad_norm": 7.015931936231842, "learning_rate": 2.9475362788133437e-06, "loss": 0.9008, "step": 6490 }, { "epoch": 0.9189495292701918, "grad_norm": 7.637168804673249, "learning_rate": 2.946972326621507e-06, "loss": 1.1497, "step": 6491 }, { "epoch": 0.919091102144829, "grad_norm": 7.5122219954605995, "learning_rate": 2.946408350932669e-06, "loss": 1.0905, "step": 6492 }, { "epoch": 0.9192326750194663, "grad_norm": 8.251135545666957, "learning_rate": 2.9458443517764767e-06, "loss": 1.1293, "step": 6493 }, { "epoch": 0.9193742478941035, "grad_norm": 8.77293746488001, "learning_rate": 2.9452803291825793e-06, "loss": 1.0509, "step": 6494 }, { "epoch": 0.9195158207687407, "grad_norm": 9.50578493363856, "learning_rate": 2.9447162831806275e-06, "loss": 1.1184, "step": 6495 }, { "epoch": 0.919657393643378, "grad_norm": 8.988845694739014, "learning_rate": 2.944152213800272e-06, "loss": 0.9839, "step": 6496 }, { "epoch": 0.9197989665180152, "grad_norm": 7.690704670183, "learning_rate": 2.9435881210711652e-06, "loss": 0.9704, "step": 6497 }, { "epoch": 0.9199405393926524, "grad_norm": 7.666959038287304, "learning_rate": 2.943024005022962e-06, "loss": 1.0734, "step": 6498 }, { "epoch": 0.9200821122672896, "grad_norm": 8.280212106982592, "learning_rate": 2.9424598656853176e-06, "loss": 1.1559, "step": 6499 }, { "epoch": 0.9202236851419268, "grad_norm": 9.032039409838672, "learning_rate": 2.9418957030878876e-06, "loss": 1.0879, "step": 6500 }, { "epoch": 0.920365258016564, "grad_norm": 7.641462221672885, "learning_rate": 2.9413315172603296e-06, "loss": 1.1094, "step": 6501 }, { "epoch": 0.9205068308912012, "grad_norm": 8.948656068626747, "learning_rate": 2.9407673082323033e-06, "loss": 1.1738, "step": 6502 }, { "epoch": 0.9206484037658385, "grad_norm": 9.276195241408962, "learning_rate": 2.9402030760334684e-06, "loss": 1.0399, "step": 6503 }, { "epoch": 0.9207899766404757, "grad_norm": 7.480013601933316, "learning_rate": 2.9396388206934858e-06, "loss": 1.0856, "step": 6504 }, { "epoch": 0.9209315495151129, "grad_norm": 7.711655047113676, "learning_rate": 2.9390745422420186e-06, "loss": 1.1056, "step": 6505 }, { "epoch": 0.9210731223897501, "grad_norm": 7.979217476014864, "learning_rate": 2.9385102407087296e-06, "loss": 1.0357, "step": 6506 }, { "epoch": 0.9212146952643874, "grad_norm": 7.619230244922809, "learning_rate": 2.937945916123284e-06, "loss": 1.0925, "step": 6507 }, { "epoch": 0.9213562681390246, "grad_norm": 8.23472197813192, "learning_rate": 2.9373815685153485e-06, "loss": 0.9739, "step": 6508 }, { "epoch": 0.9214978410136618, "grad_norm": 7.902846979852444, "learning_rate": 2.9368171979145898e-06, "loss": 1.066, "step": 6509 }, { "epoch": 0.921639413888299, "grad_norm": 7.819867863624104, "learning_rate": 2.936252804350677e-06, "loss": 0.9647, "step": 6510 }, { "epoch": 0.9217809867629362, "grad_norm": 9.118651389411763, "learning_rate": 2.9356883878532794e-06, "loss": 1.1396, "step": 6511 }, { "epoch": 0.9219225596375734, "grad_norm": 8.22064674450552, "learning_rate": 2.9351239484520684e-06, "loss": 0.9577, "step": 6512 }, { "epoch": 0.9220641325122106, "grad_norm": 8.745332181280338, "learning_rate": 2.9345594861767157e-06, "loss": 1.1101, "step": 6513 }, { "epoch": 0.9222057053868479, "grad_norm": 9.052849202009078, "learning_rate": 2.9339950010568945e-06, "loss": 1.0478, "step": 6514 }, { "epoch": 0.9223472782614851, "grad_norm": 8.329514149651123, "learning_rate": 2.9334304931222795e-06, "loss": 1.0559, "step": 6515 }, { "epoch": 0.9224888511361223, "grad_norm": 8.247884768194453, "learning_rate": 2.932865962402548e-06, "loss": 1.1332, "step": 6516 }, { "epoch": 0.9226304240107596, "grad_norm": 8.946512866377699, "learning_rate": 2.9323014089273743e-06, "loss": 1.0612, "step": 6517 }, { "epoch": 0.9227719968853968, "grad_norm": 7.3422595520445775, "learning_rate": 2.9317368327264383e-06, "loss": 1.0027, "step": 6518 }, { "epoch": 0.922913569760034, "grad_norm": 7.2992807216845055, "learning_rate": 2.9311722338294193e-06, "loss": 1.0775, "step": 6519 }, { "epoch": 0.9230551426346711, "grad_norm": 10.120054190871306, "learning_rate": 2.930607612265997e-06, "loss": 1.1267, "step": 6520 }, { "epoch": 0.9231967155093084, "grad_norm": 11.144767772091425, "learning_rate": 2.9300429680658538e-06, "loss": 1.003, "step": 6521 }, { "epoch": 0.9233382883839456, "grad_norm": 11.473146349550463, "learning_rate": 2.9294783012586725e-06, "loss": 1.1177, "step": 6522 }, { "epoch": 0.9234798612585828, "grad_norm": 7.77616207355777, "learning_rate": 2.9289136118741367e-06, "loss": 1.0434, "step": 6523 }, { "epoch": 0.9236214341332201, "grad_norm": 7.308910042928767, "learning_rate": 2.9283488999419324e-06, "loss": 0.9724, "step": 6524 }, { "epoch": 0.9237630070078573, "grad_norm": 8.720895810791216, "learning_rate": 2.927784165491746e-06, "loss": 0.9593, "step": 6525 }, { "epoch": 0.9239045798824945, "grad_norm": 8.648330315646781, "learning_rate": 2.927219408553265e-06, "loss": 1.0627, "step": 6526 }, { "epoch": 0.9240461527571318, "grad_norm": 9.78821878534323, "learning_rate": 2.926654629156178e-06, "loss": 1.0475, "step": 6527 }, { "epoch": 0.924187725631769, "grad_norm": 8.070550255906067, "learning_rate": 2.926089827330175e-06, "loss": 1.0084, "step": 6528 }, { "epoch": 0.9243292985064062, "grad_norm": 9.138326033007734, "learning_rate": 2.925525003104948e-06, "loss": 1.1335, "step": 6529 }, { "epoch": 0.9244708713810434, "grad_norm": 8.115702312396476, "learning_rate": 2.924960156510188e-06, "loss": 1.1291, "step": 6530 }, { "epoch": 0.9246124442556806, "grad_norm": 8.039073888475897, "learning_rate": 2.9243952875755905e-06, "loss": 1.166, "step": 6531 }, { "epoch": 0.9247540171303178, "grad_norm": 8.487905087586624, "learning_rate": 2.923830396330849e-06, "loss": 1.2357, "step": 6532 }, { "epoch": 0.924895590004955, "grad_norm": 9.03667437526928, "learning_rate": 2.9232654828056596e-06, "loss": 1.2265, "step": 6533 }, { "epoch": 0.9250371628795923, "grad_norm": 7.964274267741939, "learning_rate": 2.9227005470297194e-06, "loss": 1.1328, "step": 6534 }, { "epoch": 0.9251787357542295, "grad_norm": 9.458369542079186, "learning_rate": 2.922135589032726e-06, "loss": 1.1752, "step": 6535 }, { "epoch": 0.9253203086288667, "grad_norm": 8.431195651841374, "learning_rate": 2.9215706088443794e-06, "loss": 1.193, "step": 6536 }, { "epoch": 0.925461881503504, "grad_norm": 7.800085350939026, "learning_rate": 2.921005606494381e-06, "loss": 1.0747, "step": 6537 }, { "epoch": 0.9256034543781412, "grad_norm": 9.334499104673815, "learning_rate": 2.9204405820124315e-06, "loss": 1.1968, "step": 6538 }, { "epoch": 0.9257450272527784, "grad_norm": 11.540345778549122, "learning_rate": 2.9198755354282337e-06, "loss": 1.2967, "step": 6539 }, { "epoch": 0.9258866001274156, "grad_norm": 8.335519681861427, "learning_rate": 2.9193104667714926e-06, "loss": 1.1008, "step": 6540 }, { "epoch": 0.9260281730020528, "grad_norm": 7.794092572475664, "learning_rate": 2.9187453760719126e-06, "loss": 1.0318, "step": 6541 }, { "epoch": 0.92616974587669, "grad_norm": 7.216686928289689, "learning_rate": 2.918180263359201e-06, "loss": 1.0618, "step": 6542 }, { "epoch": 0.9263113187513272, "grad_norm": 7.794139068607949, "learning_rate": 2.9176151286630642e-06, "loss": 1.0595, "step": 6543 }, { "epoch": 0.9264528916259644, "grad_norm": 8.269724991825889, "learning_rate": 2.917049972013211e-06, "loss": 1.1298, "step": 6544 }, { "epoch": 0.9265944645006017, "grad_norm": 8.56724027303986, "learning_rate": 2.9164847934393523e-06, "loss": 1.1841, "step": 6545 }, { "epoch": 0.9267360373752389, "grad_norm": 7.456691056769972, "learning_rate": 2.9159195929711985e-06, "loss": 0.904, "step": 6546 }, { "epoch": 0.9268776102498761, "grad_norm": 9.438591698162863, "learning_rate": 2.915354370638462e-06, "loss": 1.0933, "step": 6547 }, { "epoch": 0.9270191831245134, "grad_norm": 9.229528920890951, "learning_rate": 2.914789126470856e-06, "loss": 1.0151, "step": 6548 }, { "epoch": 0.9271607559991506, "grad_norm": 7.638343517217049, "learning_rate": 2.914223860498095e-06, "loss": 1.1203, "step": 6549 }, { "epoch": 0.9273023288737878, "grad_norm": 8.573953583599048, "learning_rate": 2.9136585727498946e-06, "loss": 1.0698, "step": 6550 }, { "epoch": 0.927443901748425, "grad_norm": 8.676617509505071, "learning_rate": 2.9130932632559707e-06, "loss": 1.0099, "step": 6551 }, { "epoch": 0.9275854746230622, "grad_norm": 8.164519462390778, "learning_rate": 2.912527932046042e-06, "loss": 1.0003, "step": 6552 }, { "epoch": 0.9277270474976994, "grad_norm": 8.400413557498924, "learning_rate": 2.911962579149828e-06, "loss": 1.0169, "step": 6553 }, { "epoch": 0.9278686203723366, "grad_norm": 8.158255261160317, "learning_rate": 2.9113972045970483e-06, "loss": 0.9153, "step": 6554 }, { "epoch": 0.9280101932469739, "grad_norm": 7.857479405316979, "learning_rate": 2.910831808417424e-06, "loss": 1.1988, "step": 6555 }, { "epoch": 0.9281517661216111, "grad_norm": 9.842587356935484, "learning_rate": 2.910266390640678e-06, "loss": 1.1683, "step": 6556 }, { "epoch": 0.9282933389962483, "grad_norm": 9.001075680346228, "learning_rate": 2.909700951296534e-06, "loss": 1.0996, "step": 6557 }, { "epoch": 0.9284349118708856, "grad_norm": 8.026569590863376, "learning_rate": 2.9091354904147175e-06, "loss": 1.036, "step": 6558 }, { "epoch": 0.9285764847455228, "grad_norm": 9.204666811638608, "learning_rate": 2.908570008024951e-06, "loss": 1.0772, "step": 6559 }, { "epoch": 0.92871805762016, "grad_norm": 7.568518395506811, "learning_rate": 2.9080045041569647e-06, "loss": 0.8921, "step": 6560 }, { "epoch": 0.9288596304947973, "grad_norm": 11.002429000393114, "learning_rate": 2.9074389788404867e-06, "loss": 1.1864, "step": 6561 }, { "epoch": 0.9290012033694344, "grad_norm": 7.891382766445481, "learning_rate": 2.9068734321052445e-06, "loss": 1.0428, "step": 6562 }, { "epoch": 0.9291427762440716, "grad_norm": 9.444839554045382, "learning_rate": 2.9063078639809707e-06, "loss": 1.061, "step": 6563 }, { "epoch": 0.9292843491187088, "grad_norm": 8.048230221892876, "learning_rate": 2.905742274497394e-06, "loss": 0.8918, "step": 6564 }, { "epoch": 0.9294259219933461, "grad_norm": 8.993200064487743, "learning_rate": 2.9051766636842488e-06, "loss": 1.0871, "step": 6565 }, { "epoch": 0.9295674948679833, "grad_norm": 11.100859778845503, "learning_rate": 2.9046110315712682e-06, "loss": 1.1701, "step": 6566 }, { "epoch": 0.9297090677426205, "grad_norm": 8.815724317335349, "learning_rate": 2.904045378188187e-06, "loss": 1.058, "step": 6567 }, { "epoch": 0.9298506406172578, "grad_norm": 8.797970745893934, "learning_rate": 2.9034797035647427e-06, "loss": 1.1417, "step": 6568 }, { "epoch": 0.929992213491895, "grad_norm": 8.128917512606325, "learning_rate": 2.9029140077306717e-06, "loss": 1.1275, "step": 6569 }, { "epoch": 0.9301337863665322, "grad_norm": 7.238268865089918, "learning_rate": 2.902348290715711e-06, "loss": 1.1304, "step": 6570 }, { "epoch": 0.9302753592411694, "grad_norm": 9.648466361925456, "learning_rate": 2.9017825525496e-06, "loss": 1.0899, "step": 6571 }, { "epoch": 0.9304169321158066, "grad_norm": 9.073050415127716, "learning_rate": 2.9012167932620806e-06, "loss": 0.9582, "step": 6572 }, { "epoch": 0.9305585049904438, "grad_norm": 8.555485657685786, "learning_rate": 2.900651012882893e-06, "loss": 1.1098, "step": 6573 }, { "epoch": 0.930700077865081, "grad_norm": 9.731578149573172, "learning_rate": 2.9000852114417804e-06, "loss": 1.1864, "step": 6574 }, { "epoch": 0.9308416507397183, "grad_norm": 9.168741395994386, "learning_rate": 2.899519388968487e-06, "loss": 1.0536, "step": 6575 }, { "epoch": 0.9309832236143555, "grad_norm": 7.8101830060278665, "learning_rate": 2.898953545492757e-06, "loss": 1.0107, "step": 6576 }, { "epoch": 0.9311247964889927, "grad_norm": 9.841136960970024, "learning_rate": 2.8983876810443364e-06, "loss": 1.1133, "step": 6577 }, { "epoch": 0.9312663693636299, "grad_norm": 7.277951520766201, "learning_rate": 2.8978217956529726e-06, "loss": 1.1062, "step": 6578 }, { "epoch": 0.9314079422382672, "grad_norm": 7.934439482094468, "learning_rate": 2.8972558893484145e-06, "loss": 1.1441, "step": 6579 }, { "epoch": 0.9315495151129044, "grad_norm": 9.160936633869138, "learning_rate": 2.8966899621604094e-06, "loss": 1.1033, "step": 6580 }, { "epoch": 0.9316910879875416, "grad_norm": 8.51429533770448, "learning_rate": 2.8961240141187085e-06, "loss": 0.9874, "step": 6581 }, { "epoch": 0.9318326608621788, "grad_norm": 9.368968193053139, "learning_rate": 2.8955580452530642e-06, "loss": 1.1418, "step": 6582 }, { "epoch": 0.931974233736816, "grad_norm": 8.292597068331206, "learning_rate": 2.8949920555932283e-06, "loss": 1.1827, "step": 6583 }, { "epoch": 0.9321158066114532, "grad_norm": 8.937899920712683, "learning_rate": 2.8944260451689544e-06, "loss": 1.1111, "step": 6584 }, { "epoch": 0.9322573794860904, "grad_norm": 9.384746723771288, "learning_rate": 2.8938600140099975e-06, "loss": 1.0748, "step": 6585 }, { "epoch": 0.9323989523607277, "grad_norm": 10.17416763837657, "learning_rate": 2.893293962146114e-06, "loss": 1.1683, "step": 6586 }, { "epoch": 0.9325405252353649, "grad_norm": 10.3280865874853, "learning_rate": 2.8927278896070593e-06, "loss": 1.1374, "step": 6587 }, { "epoch": 0.9326820981100021, "grad_norm": 10.187086600124566, "learning_rate": 2.8921617964225923e-06, "loss": 1.0043, "step": 6588 }, { "epoch": 0.9328236709846394, "grad_norm": 8.53971604815438, "learning_rate": 2.8915956826224724e-06, "loss": 0.9394, "step": 6589 }, { "epoch": 0.9329652438592766, "grad_norm": 8.900402411460815, "learning_rate": 2.8910295482364594e-06, "loss": 1.1652, "step": 6590 }, { "epoch": 0.9331068167339138, "grad_norm": 11.326534775236794, "learning_rate": 2.8904633932943145e-06, "loss": 1.208, "step": 6591 }, { "epoch": 0.933248389608551, "grad_norm": 9.845551437968103, "learning_rate": 2.8898972178258e-06, "loss": 1.0462, "step": 6592 }, { "epoch": 0.9333899624831882, "grad_norm": 6.744906729130357, "learning_rate": 2.8893310218606797e-06, "loss": 1.0789, "step": 6593 }, { "epoch": 0.9335315353578254, "grad_norm": 7.420237278190532, "learning_rate": 2.8887648054287176e-06, "loss": 1.0418, "step": 6594 }, { "epoch": 0.9336731082324626, "grad_norm": 12.39733904928512, "learning_rate": 2.888198568559681e-06, "loss": 1.1914, "step": 6595 }, { "epoch": 0.9338146811070999, "grad_norm": 7.177109879295594, "learning_rate": 2.887632311283333e-06, "loss": 1.0078, "step": 6596 }, { "epoch": 0.9339562539817371, "grad_norm": 9.262102612247647, "learning_rate": 2.8870660336294444e-06, "loss": 1.1389, "step": 6597 }, { "epoch": 0.9340978268563743, "grad_norm": 11.011328672365536, "learning_rate": 2.886499735627783e-06, "loss": 1.1465, "step": 6598 }, { "epoch": 0.9342393997310116, "grad_norm": 7.939501652680723, "learning_rate": 2.885933417308118e-06, "loss": 1.0248, "step": 6599 }, { "epoch": 0.9343809726056488, "grad_norm": 7.484185138755061, "learning_rate": 2.8853670787002224e-06, "loss": 1.0817, "step": 6600 }, { "epoch": 0.934522545480286, "grad_norm": 8.370262969886824, "learning_rate": 2.8848007198338663e-06, "loss": 1.084, "step": 6601 }, { "epoch": 0.9346641183549232, "grad_norm": 8.052202139138757, "learning_rate": 2.884234340738823e-06, "loss": 1.0643, "step": 6602 }, { "epoch": 0.9348056912295604, "grad_norm": 8.834389197625221, "learning_rate": 2.883667941444867e-06, "loss": 1.1844, "step": 6603 }, { "epoch": 0.9349472641041976, "grad_norm": 8.900258401269875, "learning_rate": 2.8831015219817725e-06, "loss": 1.0581, "step": 6604 }, { "epoch": 0.9350888369788348, "grad_norm": 9.97542030768335, "learning_rate": 2.882535082379318e-06, "loss": 0.9966, "step": 6605 }, { "epoch": 0.9352304098534721, "grad_norm": 8.536487131650121, "learning_rate": 2.8819686226672794e-06, "loss": 1.1144, "step": 6606 }, { "epoch": 0.9353719827281093, "grad_norm": 9.431916663166504, "learning_rate": 2.881402142875435e-06, "loss": 1.1491, "step": 6607 }, { "epoch": 0.9355135556027465, "grad_norm": 8.967218248308278, "learning_rate": 2.880835643033564e-06, "loss": 1.2388, "step": 6608 }, { "epoch": 0.9356551284773837, "grad_norm": 9.054999253153442, "learning_rate": 2.8802691231714463e-06, "loss": 1.249, "step": 6609 }, { "epoch": 0.935796701352021, "grad_norm": 8.868635017450764, "learning_rate": 2.879702583318866e-06, "loss": 1.2058, "step": 6610 }, { "epoch": 0.9359382742266582, "grad_norm": 11.703587195687923, "learning_rate": 2.8791360235056024e-06, "loss": 1.2161, "step": 6611 }, { "epoch": 0.9360798471012954, "grad_norm": 7.856528523195917, "learning_rate": 2.878569443761442e-06, "loss": 1.173, "step": 6612 }, { "epoch": 0.9362214199759326, "grad_norm": 7.475765492720357, "learning_rate": 2.878002844116168e-06, "loss": 1.0295, "step": 6613 }, { "epoch": 0.9363629928505698, "grad_norm": 8.935291364174718, "learning_rate": 2.877436224599566e-06, "loss": 1.1985, "step": 6614 }, { "epoch": 0.936504565725207, "grad_norm": 8.11436588124945, "learning_rate": 2.876869585241423e-06, "loss": 1.0756, "step": 6615 }, { "epoch": 0.9366461385998442, "grad_norm": 8.537131493408204, "learning_rate": 2.8763029260715282e-06, "loss": 1.1369, "step": 6616 }, { "epoch": 0.9367877114744815, "grad_norm": 10.575159555949455, "learning_rate": 2.8757362471196677e-06, "loss": 1.2429, "step": 6617 }, { "epoch": 0.9369292843491187, "grad_norm": 10.29609623975613, "learning_rate": 2.875169548415633e-06, "loss": 1.164, "step": 6618 }, { "epoch": 0.9370708572237559, "grad_norm": 9.381678529389536, "learning_rate": 2.874602829989215e-06, "loss": 1.0103, "step": 6619 }, { "epoch": 0.9372124300983932, "grad_norm": 9.824100319241428, "learning_rate": 2.8740360918702053e-06, "loss": 0.957, "step": 6620 }, { "epoch": 0.9373540029730304, "grad_norm": 10.345522633584624, "learning_rate": 2.873469334088398e-06, "loss": 1.1704, "step": 6621 }, { "epoch": 0.9374955758476676, "grad_norm": 6.574250953853834, "learning_rate": 2.8729025566735856e-06, "loss": 1.0567, "step": 6622 }, { "epoch": 0.9376371487223049, "grad_norm": 9.156764097395518, "learning_rate": 2.8723357596555644e-06, "loss": 1.1365, "step": 6623 }, { "epoch": 0.937778721596942, "grad_norm": 10.48405662996963, "learning_rate": 2.871768943064129e-06, "loss": 0.9955, "step": 6624 }, { "epoch": 0.9379202944715792, "grad_norm": 10.825434891420247, "learning_rate": 2.8712021069290786e-06, "loss": 1.1922, "step": 6625 }, { "epoch": 0.9380618673462164, "grad_norm": 10.380487668262846, "learning_rate": 2.87063525128021e-06, "loss": 1.0783, "step": 6626 }, { "epoch": 0.9382034402208537, "grad_norm": 7.6807558824176265, "learning_rate": 2.870068376147322e-06, "loss": 1.1767, "step": 6627 }, { "epoch": 0.9383450130954909, "grad_norm": 9.55538775668806, "learning_rate": 2.8695014815602157e-06, "loss": 0.9802, "step": 6628 }, { "epoch": 0.9384865859701281, "grad_norm": 8.554308906434185, "learning_rate": 2.8689345675486917e-06, "loss": 1.1675, "step": 6629 }, { "epoch": 0.9386281588447654, "grad_norm": 7.814040131394823, "learning_rate": 2.868367634142553e-06, "loss": 1.0068, "step": 6630 }, { "epoch": 0.9387697317194026, "grad_norm": 8.399489396334793, "learning_rate": 2.8678006813716024e-06, "loss": 1.0565, "step": 6631 }, { "epoch": 0.9389113045940398, "grad_norm": 8.483546769830319, "learning_rate": 2.8672337092656444e-06, "loss": 1.1373, "step": 6632 }, { "epoch": 0.939052877468677, "grad_norm": 7.157119365008556, "learning_rate": 2.8666667178544833e-06, "loss": 0.8932, "step": 6633 }, { "epoch": 0.9391944503433142, "grad_norm": 8.546529089920849, "learning_rate": 2.866099707167927e-06, "loss": 0.9442, "step": 6634 }, { "epoch": 0.9393360232179514, "grad_norm": 6.607343356945533, "learning_rate": 2.8655326772357816e-06, "loss": 1.0295, "step": 6635 }, { "epoch": 0.9394775960925886, "grad_norm": 8.985917836278693, "learning_rate": 2.8649656280878563e-06, "loss": 1.0854, "step": 6636 }, { "epoch": 0.9396191689672259, "grad_norm": 9.098026069930809, "learning_rate": 2.8643985597539597e-06, "loss": 1.0498, "step": 6637 }, { "epoch": 0.9397607418418631, "grad_norm": 8.117437511188298, "learning_rate": 2.863831472263904e-06, "loss": 1.0727, "step": 6638 }, { "epoch": 0.9399023147165003, "grad_norm": 8.211402074155028, "learning_rate": 2.8632643656474974e-06, "loss": 0.9858, "step": 6639 }, { "epoch": 0.9400438875911376, "grad_norm": 7.89439329303322, "learning_rate": 2.8626972399345543e-06, "loss": 1.0406, "step": 6640 }, { "epoch": 0.9401854604657748, "grad_norm": 8.50964302148024, "learning_rate": 2.8621300951548877e-06, "loss": 1.0848, "step": 6641 }, { "epoch": 0.940327033340412, "grad_norm": 10.631173528164117, "learning_rate": 2.861562931338312e-06, "loss": 1.1663, "step": 6642 }, { "epoch": 0.9404686062150492, "grad_norm": 8.48769565182608, "learning_rate": 2.8609957485146433e-06, "loss": 0.9944, "step": 6643 }, { "epoch": 0.9406101790896865, "grad_norm": 8.137609937543422, "learning_rate": 2.8604285467136966e-06, "loss": 1.1522, "step": 6644 }, { "epoch": 0.9407517519643236, "grad_norm": 9.453432894650089, "learning_rate": 2.8598613259652895e-06, "loss": 1.1343, "step": 6645 }, { "epoch": 0.9408933248389608, "grad_norm": 8.493200387358362, "learning_rate": 2.8592940862992417e-06, "loss": 1.192, "step": 6646 }, { "epoch": 0.941034897713598, "grad_norm": 8.836557867396538, "learning_rate": 2.858726827745372e-06, "loss": 1.1195, "step": 6647 }, { "epoch": 0.9411764705882353, "grad_norm": 8.819193577306482, "learning_rate": 2.858159550333499e-06, "loss": 1.0589, "step": 6648 }, { "epoch": 0.9413180434628725, "grad_norm": 11.147596748584109, "learning_rate": 2.8575922540934464e-06, "loss": 1.0989, "step": 6649 }, { "epoch": 0.9414596163375097, "grad_norm": 8.051459745184118, "learning_rate": 2.857024939055036e-06, "loss": 0.9897, "step": 6650 }, { "epoch": 0.941601189212147, "grad_norm": 8.129135722523941, "learning_rate": 2.8564576052480895e-06, "loss": 0.9537, "step": 6651 }, { "epoch": 0.9417427620867842, "grad_norm": 7.866393214931101, "learning_rate": 2.8558902527024337e-06, "loss": 1.021, "step": 6652 }, { "epoch": 0.9418843349614214, "grad_norm": 8.655984261630659, "learning_rate": 2.8553228814478927e-06, "loss": 1.0543, "step": 6653 }, { "epoch": 0.9420259078360587, "grad_norm": 11.248164726603042, "learning_rate": 2.8547554915142923e-06, "loss": 0.985, "step": 6654 }, { "epoch": 0.9421674807106958, "grad_norm": 11.384678338842352, "learning_rate": 2.8541880829314604e-06, "loss": 1.1301, "step": 6655 }, { "epoch": 0.942309053585333, "grad_norm": 10.023249873414429, "learning_rate": 2.8536206557292244e-06, "loss": 1.1284, "step": 6656 }, { "epoch": 0.9424506264599702, "grad_norm": 8.67709364018433, "learning_rate": 2.8530532099374146e-06, "loss": 1.2328, "step": 6657 }, { "epoch": 0.9425921993346075, "grad_norm": 7.164640480280216, "learning_rate": 2.8524857455858606e-06, "loss": 1.0693, "step": 6658 }, { "epoch": 0.9427337722092447, "grad_norm": 9.562046015316149, "learning_rate": 2.8519182627043953e-06, "loss": 1.0793, "step": 6659 }, { "epoch": 0.9428753450838819, "grad_norm": 8.300132787170815, "learning_rate": 2.8513507613228474e-06, "loss": 1.0061, "step": 6660 }, { "epoch": 0.9430169179585192, "grad_norm": 8.630577605931814, "learning_rate": 2.8507832414710528e-06, "loss": 1.0634, "step": 6661 }, { "epoch": 0.9431584908331564, "grad_norm": 7.563851480075907, "learning_rate": 2.850215703178845e-06, "loss": 1.0484, "step": 6662 }, { "epoch": 0.9433000637077936, "grad_norm": 9.108144163335233, "learning_rate": 2.8496481464760585e-06, "loss": 1.0833, "step": 6663 }, { "epoch": 0.9434416365824309, "grad_norm": 9.142881248646702, "learning_rate": 2.8490805713925298e-06, "loss": 1.1405, "step": 6664 }, { "epoch": 0.943583209457068, "grad_norm": 9.236893725082938, "learning_rate": 2.848512977958095e-06, "loss": 1.1105, "step": 6665 }, { "epoch": 0.9437247823317052, "grad_norm": 8.82893991675733, "learning_rate": 2.8479453662025937e-06, "loss": 1.023, "step": 6666 }, { "epoch": 0.9438663552063424, "grad_norm": 7.750157262375566, "learning_rate": 2.847377736155863e-06, "loss": 1.0132, "step": 6667 }, { "epoch": 0.9440079280809797, "grad_norm": 8.829034106951832, "learning_rate": 2.8468100878477443e-06, "loss": 1.1048, "step": 6668 }, { "epoch": 0.9441495009556169, "grad_norm": 7.622334999508127, "learning_rate": 2.8462424213080786e-06, "loss": 1.0084, "step": 6669 }, { "epoch": 0.9442910738302541, "grad_norm": 8.40991267282549, "learning_rate": 2.845674736566706e-06, "loss": 0.9983, "step": 6670 }, { "epoch": 0.9444326467048914, "grad_norm": 9.745950493686628, "learning_rate": 2.8451070336534703e-06, "loss": 1.0822, "step": 6671 }, { "epoch": 0.9445742195795286, "grad_norm": 7.950640517049407, "learning_rate": 2.8445393125982152e-06, "loss": 1.1871, "step": 6672 }, { "epoch": 0.9447157924541658, "grad_norm": 8.326386518975617, "learning_rate": 2.8439715734307856e-06, "loss": 1.095, "step": 6673 }, { "epoch": 0.944857365328803, "grad_norm": 7.712310205037818, "learning_rate": 2.8434038161810266e-06, "loss": 1.0148, "step": 6674 }, { "epoch": 0.9449989382034403, "grad_norm": 7.2573504839826555, "learning_rate": 2.8428360408787857e-06, "loss": 1.0314, "step": 6675 }, { "epoch": 0.9451405110780774, "grad_norm": 8.244142244506474, "learning_rate": 2.84226824755391e-06, "loss": 0.9977, "step": 6676 }, { "epoch": 0.9452820839527146, "grad_norm": 8.781530192510452, "learning_rate": 2.8417004362362465e-06, "loss": 1.0646, "step": 6677 }, { "epoch": 0.9454236568273519, "grad_norm": 8.39492113886333, "learning_rate": 2.8411326069556456e-06, "loss": 1.0532, "step": 6678 }, { "epoch": 0.9455652297019891, "grad_norm": 8.246793181493054, "learning_rate": 2.840564759741959e-06, "loss": 0.8945, "step": 6679 }, { "epoch": 0.9457068025766263, "grad_norm": 7.483722570728086, "learning_rate": 2.8399968946250373e-06, "loss": 1.1015, "step": 6680 }, { "epoch": 0.9458483754512635, "grad_norm": 7.949716853251756, "learning_rate": 2.839429011634731e-06, "loss": 0.9963, "step": 6681 }, { "epoch": 0.9459899483259008, "grad_norm": 7.781960328553197, "learning_rate": 2.8388611108008957e-06, "loss": 1.0007, "step": 6682 }, { "epoch": 0.946131521200538, "grad_norm": 7.677013160526334, "learning_rate": 2.838293192153384e-06, "loss": 0.9249, "step": 6683 }, { "epoch": 0.9462730940751752, "grad_norm": 8.530890935810362, "learning_rate": 2.8377252557220513e-06, "loss": 1.0392, "step": 6684 }, { "epoch": 0.9464146669498125, "grad_norm": 6.76494892786206, "learning_rate": 2.8371573015367543e-06, "loss": 0.8825, "step": 6685 }, { "epoch": 0.9465562398244496, "grad_norm": 7.059201710985933, "learning_rate": 2.836589329627349e-06, "loss": 0.9172, "step": 6686 }, { "epoch": 0.9466978126990868, "grad_norm": 7.631186555165985, "learning_rate": 2.8360213400236936e-06, "loss": 1.1795, "step": 6687 }, { "epoch": 0.946839385573724, "grad_norm": 8.42494952359346, "learning_rate": 2.8354533327556465e-06, "loss": 1.191, "step": 6688 }, { "epoch": 0.9469809584483613, "grad_norm": 8.821256142779408, "learning_rate": 2.834885307853068e-06, "loss": 0.893, "step": 6689 }, { "epoch": 0.9471225313229985, "grad_norm": 8.379455292326211, "learning_rate": 2.8343172653458194e-06, "loss": 1.1982, "step": 6690 }, { "epoch": 0.9472641041976357, "grad_norm": 8.419740424062594, "learning_rate": 2.833749205263761e-06, "loss": 1.0559, "step": 6691 }, { "epoch": 0.947405677072273, "grad_norm": 9.398273926387237, "learning_rate": 2.8331811276367554e-06, "loss": 1.147, "step": 6692 }, { "epoch": 0.9475472499469102, "grad_norm": 8.327725570862295, "learning_rate": 2.832613032494666e-06, "loss": 1.044, "step": 6693 }, { "epoch": 0.9476888228215474, "grad_norm": 8.090209186428728, "learning_rate": 2.8320449198673585e-06, "loss": 1.1074, "step": 6694 }, { "epoch": 0.9478303956961847, "grad_norm": 8.932541545490514, "learning_rate": 2.8314767897846963e-06, "loss": 1.1802, "step": 6695 }, { "epoch": 0.9479719685708218, "grad_norm": 9.453608426430272, "learning_rate": 2.830908642276547e-06, "loss": 1.11, "step": 6696 }, { "epoch": 0.948113541445459, "grad_norm": 7.913321849211555, "learning_rate": 2.830340477372777e-06, "loss": 1.0758, "step": 6697 }, { "epoch": 0.9482551143200962, "grad_norm": 7.824254584837434, "learning_rate": 2.829772295103254e-06, "loss": 1.1447, "step": 6698 }, { "epoch": 0.9483966871947335, "grad_norm": 8.368457573132597, "learning_rate": 2.829204095497848e-06, "loss": 1.1582, "step": 6699 }, { "epoch": 0.9485382600693707, "grad_norm": 8.506337664150358, "learning_rate": 2.828635878586428e-06, "loss": 1.1343, "step": 6700 }, { "epoch": 0.9486798329440079, "grad_norm": 10.790008260389671, "learning_rate": 2.828067644398864e-06, "loss": 1.1472, "step": 6701 }, { "epoch": 0.9488214058186452, "grad_norm": 8.97679707591675, "learning_rate": 2.8274993929650297e-06, "loss": 1.0948, "step": 6702 }, { "epoch": 0.9489629786932824, "grad_norm": 8.777840926617879, "learning_rate": 2.826931124314796e-06, "loss": 1.0521, "step": 6703 }, { "epoch": 0.9491045515679196, "grad_norm": 8.476424340031896, "learning_rate": 2.826362838478037e-06, "loss": 0.9689, "step": 6704 }, { "epoch": 0.9492461244425568, "grad_norm": 9.405634356150657, "learning_rate": 2.825794535484627e-06, "loss": 1.1423, "step": 6705 }, { "epoch": 0.9493876973171941, "grad_norm": 7.768359924982381, "learning_rate": 2.825226215364441e-06, "loss": 1.0523, "step": 6706 }, { "epoch": 0.9495292701918312, "grad_norm": 7.727557265810892, "learning_rate": 2.824657878147355e-06, "loss": 1.0206, "step": 6707 }, { "epoch": 0.9496708430664684, "grad_norm": 8.095209998928862, "learning_rate": 2.8240895238632473e-06, "loss": 1.0531, "step": 6708 }, { "epoch": 0.9498124159411057, "grad_norm": 8.638875221931247, "learning_rate": 2.8235211525419937e-06, "loss": 0.9701, "step": 6709 }, { "epoch": 0.9499539888157429, "grad_norm": 10.285993693356302, "learning_rate": 2.8229527642134753e-06, "loss": 1.1802, "step": 6710 }, { "epoch": 0.9500955616903801, "grad_norm": 9.768368364666255, "learning_rate": 2.8223843589075705e-06, "loss": 1.1591, "step": 6711 }, { "epoch": 0.9502371345650173, "grad_norm": 8.643183903557915, "learning_rate": 2.8218159366541615e-06, "loss": 0.9041, "step": 6712 }, { "epoch": 0.9503787074396546, "grad_norm": 11.446848899651174, "learning_rate": 2.8212474974831277e-06, "loss": 1.0244, "step": 6713 }, { "epoch": 0.9505202803142918, "grad_norm": 8.891235545133362, "learning_rate": 2.8206790414243525e-06, "loss": 1.0157, "step": 6714 }, { "epoch": 0.950661853188929, "grad_norm": 9.660961791626018, "learning_rate": 2.8201105685077184e-06, "loss": 1.1892, "step": 6715 }, { "epoch": 0.9508034260635663, "grad_norm": 7.796852494018119, "learning_rate": 2.8195420787631113e-06, "loss": 1.0632, "step": 6716 }, { "epoch": 0.9509449989382034, "grad_norm": 9.476023885919846, "learning_rate": 2.818973572220416e-06, "loss": 1.0335, "step": 6717 }, { "epoch": 0.9510865718128406, "grad_norm": 9.567394325504072, "learning_rate": 2.818405048909517e-06, "loss": 1.0692, "step": 6718 }, { "epoch": 0.9512281446874779, "grad_norm": 8.907369121710975, "learning_rate": 2.817836508860302e-06, "loss": 0.9877, "step": 6719 }, { "epoch": 0.9513697175621151, "grad_norm": 9.63712171710176, "learning_rate": 2.817267952102659e-06, "loss": 1.204, "step": 6720 }, { "epoch": 0.9515112904367523, "grad_norm": 7.096996219998971, "learning_rate": 2.8166993786664757e-06, "loss": 1.0821, "step": 6721 }, { "epoch": 0.9516528633113895, "grad_norm": 9.869816059226673, "learning_rate": 2.816130788581643e-06, "loss": 1.1791, "step": 6722 }, { "epoch": 0.9517944361860268, "grad_norm": 11.289801149061553, "learning_rate": 2.8155621818780497e-06, "loss": 1.2117, "step": 6723 }, { "epoch": 0.951936009060664, "grad_norm": 8.901508983459893, "learning_rate": 2.8149935585855885e-06, "loss": 1.1203, "step": 6724 }, { "epoch": 0.9520775819353012, "grad_norm": 9.260041843262094, "learning_rate": 2.8144249187341506e-06, "loss": 0.8864, "step": 6725 }, { "epoch": 0.9522191548099385, "grad_norm": 7.674917204469309, "learning_rate": 2.8138562623536293e-06, "loss": 1.075, "step": 6726 }, { "epoch": 0.9523607276845756, "grad_norm": 8.640466064315845, "learning_rate": 2.8132875894739175e-06, "loss": 1.0587, "step": 6727 }, { "epoch": 0.9525023005592128, "grad_norm": 8.35445423217927, "learning_rate": 2.812718900124912e-06, "loss": 0.9975, "step": 6728 }, { "epoch": 0.95264387343385, "grad_norm": 10.620351335421676, "learning_rate": 2.8121501943365066e-06, "loss": 1.1609, "step": 6729 }, { "epoch": 0.9527854463084873, "grad_norm": 8.860150446249532, "learning_rate": 2.8115814721385975e-06, "loss": 1.1768, "step": 6730 }, { "epoch": 0.9529270191831245, "grad_norm": 9.684872375498166, "learning_rate": 2.8110127335610833e-06, "loss": 0.9969, "step": 6731 }, { "epoch": 0.9530685920577617, "grad_norm": 8.347716113934856, "learning_rate": 2.8104439786338617e-06, "loss": 0.9929, "step": 6732 }, { "epoch": 0.953210164932399, "grad_norm": 11.056282322018541, "learning_rate": 2.809875207386832e-06, "loss": 1.1537, "step": 6733 }, { "epoch": 0.9533517378070362, "grad_norm": 9.629824135383677, "learning_rate": 2.809306419849893e-06, "loss": 1.2215, "step": 6734 }, { "epoch": 0.9534933106816734, "grad_norm": 8.701113785229776, "learning_rate": 2.8087376160529463e-06, "loss": 1.1107, "step": 6735 }, { "epoch": 0.9536348835563107, "grad_norm": 10.027545375602427, "learning_rate": 2.808168796025893e-06, "loss": 1.0519, "step": 6736 }, { "epoch": 0.9537764564309479, "grad_norm": 12.407300928606826, "learning_rate": 2.8075999597986364e-06, "loss": 1.078, "step": 6737 }, { "epoch": 0.953918029305585, "grad_norm": 9.680239214502913, "learning_rate": 2.8070311074010793e-06, "loss": 1.1529, "step": 6738 }, { "epoch": 0.9540596021802222, "grad_norm": 9.384291050370635, "learning_rate": 2.806462238863125e-06, "loss": 1.0868, "step": 6739 }, { "epoch": 0.9542011750548595, "grad_norm": 9.164136595235409, "learning_rate": 2.8058933542146804e-06, "loss": 0.8951, "step": 6740 }, { "epoch": 0.9543427479294967, "grad_norm": 15.118535449432335, "learning_rate": 2.80532445348565e-06, "loss": 1.1424, "step": 6741 }, { "epoch": 0.9544843208041339, "grad_norm": 10.867179777360663, "learning_rate": 2.8047555367059404e-06, "loss": 1.2285, "step": 6742 }, { "epoch": 0.9546258936787712, "grad_norm": 9.834930635022452, "learning_rate": 2.80418660390546e-06, "loss": 0.9461, "step": 6743 }, { "epoch": 0.9547674665534084, "grad_norm": 9.578091545108784, "learning_rate": 2.803617655114116e-06, "loss": 1.1105, "step": 6744 }, { "epoch": 0.9549090394280456, "grad_norm": 8.772703960457804, "learning_rate": 2.803048690361818e-06, "loss": 1.0131, "step": 6745 }, { "epoch": 0.9550506123026828, "grad_norm": 10.294919095937303, "learning_rate": 2.8024797096784766e-06, "loss": 1.0347, "step": 6746 }, { "epoch": 0.9551921851773201, "grad_norm": 8.021641546046652, "learning_rate": 2.8019107130940025e-06, "loss": 0.9769, "step": 6747 }, { "epoch": 0.9553337580519572, "grad_norm": 9.563601361845901, "learning_rate": 2.8013417006383078e-06, "loss": 1.1519, "step": 6748 }, { "epoch": 0.9554753309265944, "grad_norm": 9.885888772081387, "learning_rate": 2.8007726723413046e-06, "loss": 0.9898, "step": 6749 }, { "epoch": 0.9556169038012317, "grad_norm": 13.214454620342298, "learning_rate": 2.800203628232906e-06, "loss": 1.1474, "step": 6750 }, { "epoch": 0.9557584766758689, "grad_norm": 7.2003351133561075, "learning_rate": 2.7996345683430266e-06, "loss": 0.8921, "step": 6751 }, { "epoch": 0.9559000495505061, "grad_norm": 7.375786690860984, "learning_rate": 2.799065492701581e-06, "loss": 0.9864, "step": 6752 }, { "epoch": 0.9560416224251433, "grad_norm": 11.369410220830773, "learning_rate": 2.7984964013384853e-06, "loss": 1.1279, "step": 6753 }, { "epoch": 0.9561831952997806, "grad_norm": 9.263139207948882, "learning_rate": 2.7979272942836566e-06, "loss": 1.1557, "step": 6754 }, { "epoch": 0.9563247681744178, "grad_norm": 11.026005608678838, "learning_rate": 2.7973581715670124e-06, "loss": 1.1003, "step": 6755 }, { "epoch": 0.956466341049055, "grad_norm": 8.747028827302678, "learning_rate": 2.7967890332184705e-06, "loss": 1.0836, "step": 6756 }, { "epoch": 0.9566079139236923, "grad_norm": 8.485357350617344, "learning_rate": 2.7962198792679506e-06, "loss": 0.9652, "step": 6757 }, { "epoch": 0.9567494867983294, "grad_norm": 8.738375980851067, "learning_rate": 2.795650709745373e-06, "loss": 1.0358, "step": 6758 }, { "epoch": 0.9568910596729666, "grad_norm": 9.118023857713041, "learning_rate": 2.7950815246806575e-06, "loss": 1.0956, "step": 6759 }, { "epoch": 0.9570326325476038, "grad_norm": 11.022068774102037, "learning_rate": 2.794512324103726e-06, "loss": 1.1038, "step": 6760 }, { "epoch": 0.9571742054222411, "grad_norm": 9.048918550905293, "learning_rate": 2.7939431080445016e-06, "loss": 1.0563, "step": 6761 }, { "epoch": 0.9573157782968783, "grad_norm": 8.89589541649577, "learning_rate": 2.7933738765329073e-06, "loss": 1.0117, "step": 6762 }, { "epoch": 0.9574573511715155, "grad_norm": 7.427505060307831, "learning_rate": 2.7928046295988666e-06, "loss": 0.9927, "step": 6763 }, { "epoch": 0.9575989240461528, "grad_norm": 8.235532617721734, "learning_rate": 2.792235367272305e-06, "loss": 0.9461, "step": 6764 }, { "epoch": 0.95774049692079, "grad_norm": 7.960217504165514, "learning_rate": 2.7916660895831487e-06, "loss": 0.9825, "step": 6765 }, { "epoch": 0.9578820697954272, "grad_norm": 8.649609383820508, "learning_rate": 2.791096796561323e-06, "loss": 1.129, "step": 6766 }, { "epoch": 0.9580236426700645, "grad_norm": 8.920089302471276, "learning_rate": 2.790527488236755e-06, "loss": 1.2166, "step": 6767 }, { "epoch": 0.9581652155447017, "grad_norm": 8.382664603628944, "learning_rate": 2.7899581646393746e-06, "loss": 0.9505, "step": 6768 }, { "epoch": 0.9583067884193388, "grad_norm": 8.201148850512453, "learning_rate": 2.789388825799109e-06, "loss": 1.1481, "step": 6769 }, { "epoch": 0.958448361293976, "grad_norm": 10.008228349942353, "learning_rate": 2.78881947174589e-06, "loss": 1.0003, "step": 6770 }, { "epoch": 0.9585899341686133, "grad_norm": 8.989440657513732, "learning_rate": 2.788250102509646e-06, "loss": 1.1477, "step": 6771 }, { "epoch": 0.9587315070432505, "grad_norm": 6.86169783554182, "learning_rate": 2.7876807181203085e-06, "loss": 0.9834, "step": 6772 }, { "epoch": 0.9588730799178877, "grad_norm": 8.992637165668333, "learning_rate": 2.7871113186078102e-06, "loss": 1.1783, "step": 6773 }, { "epoch": 0.959014652792525, "grad_norm": 7.950420766767778, "learning_rate": 2.786541904002085e-06, "loss": 0.9953, "step": 6774 }, { "epoch": 0.9591562256671622, "grad_norm": 7.219212636944012, "learning_rate": 2.785972474333064e-06, "loss": 1.1056, "step": 6775 }, { "epoch": 0.9592977985417994, "grad_norm": 9.67617943644743, "learning_rate": 2.7854030296306846e-06, "loss": 1.0372, "step": 6776 }, { "epoch": 0.9594393714164366, "grad_norm": 7.8312531272595765, "learning_rate": 2.7848335699248796e-06, "loss": 1.0071, "step": 6777 }, { "epoch": 0.9595809442910739, "grad_norm": 7.441915788001308, "learning_rate": 2.7842640952455867e-06, "loss": 1.0684, "step": 6778 }, { "epoch": 0.959722517165711, "grad_norm": 7.545464867692386, "learning_rate": 2.783694605622743e-06, "loss": 1.0376, "step": 6779 }, { "epoch": 0.9598640900403482, "grad_norm": 7.368010231267761, "learning_rate": 2.7831251010862847e-06, "loss": 0.9594, "step": 6780 }, { "epoch": 0.9600056629149855, "grad_norm": 7.420890405497199, "learning_rate": 2.7825555816661503e-06, "loss": 1.0254, "step": 6781 }, { "epoch": 0.9601472357896227, "grad_norm": 8.386582748809953, "learning_rate": 2.7819860473922805e-06, "loss": 1.0884, "step": 6782 }, { "epoch": 0.9602888086642599, "grad_norm": 7.4760573644013935, "learning_rate": 2.781416498294614e-06, "loss": 1.0022, "step": 6783 }, { "epoch": 0.9604303815388971, "grad_norm": 7.869587567197102, "learning_rate": 2.7808469344030923e-06, "loss": 1.1033, "step": 6784 }, { "epoch": 0.9605719544135344, "grad_norm": 7.701577795303335, "learning_rate": 2.780277355747657e-06, "loss": 1.0286, "step": 6785 }, { "epoch": 0.9607135272881716, "grad_norm": 9.057527848700245, "learning_rate": 2.7797077623582503e-06, "loss": 1.1316, "step": 6786 }, { "epoch": 0.9608551001628088, "grad_norm": 10.910364637286634, "learning_rate": 2.779138154264814e-06, "loss": 1.0381, "step": 6787 }, { "epoch": 0.9609966730374461, "grad_norm": 9.615403580866964, "learning_rate": 2.778568531497294e-06, "loss": 1.0422, "step": 6788 }, { "epoch": 0.9611382459120833, "grad_norm": 8.294566607429115, "learning_rate": 2.777998894085634e-06, "loss": 0.968, "step": 6789 }, { "epoch": 0.9612798187867204, "grad_norm": 10.255566829823525, "learning_rate": 2.7774292420597784e-06, "loss": 0.9516, "step": 6790 }, { "epoch": 0.9614213916613576, "grad_norm": 10.08177547137772, "learning_rate": 2.776859575449675e-06, "loss": 1.0549, "step": 6791 }, { "epoch": 0.9615629645359949, "grad_norm": 9.118169867276855, "learning_rate": 2.7762898942852705e-06, "loss": 1.0388, "step": 6792 }, { "epoch": 0.9617045374106321, "grad_norm": 9.404455679149844, "learning_rate": 2.775720198596512e-06, "loss": 1.0751, "step": 6793 }, { "epoch": 0.9618461102852693, "grad_norm": 8.036496834571038, "learning_rate": 2.7751504884133484e-06, "loss": 1.1189, "step": 6794 }, { "epoch": 0.9619876831599066, "grad_norm": 9.338302197907986, "learning_rate": 2.7745807637657287e-06, "loss": 1.0452, "step": 6795 }, { "epoch": 0.9621292560345438, "grad_norm": 8.907704017137839, "learning_rate": 2.774011024683603e-06, "loss": 1.0056, "step": 6796 }, { "epoch": 0.962270828909181, "grad_norm": 9.699921157359187, "learning_rate": 2.7734412711969215e-06, "loss": 1.0168, "step": 6797 }, { "epoch": 0.9624124017838183, "grad_norm": 10.009830220828158, "learning_rate": 2.7728715033356366e-06, "loss": 1.0109, "step": 6798 }, { "epoch": 0.9625539746584555, "grad_norm": 8.217792875886843, "learning_rate": 2.7723017211297006e-06, "loss": 1.0539, "step": 6799 }, { "epoch": 0.9626955475330926, "grad_norm": 8.960900463769365, "learning_rate": 2.7717319246090657e-06, "loss": 1.1875, "step": 6800 }, { "epoch": 0.9628371204077298, "grad_norm": 8.053789031333666, "learning_rate": 2.7711621138036864e-06, "loss": 1.0185, "step": 6801 }, { "epoch": 0.9629786932823671, "grad_norm": 11.179044287816595, "learning_rate": 2.7705922887435172e-06, "loss": 1.1977, "step": 6802 }, { "epoch": 0.9631202661570043, "grad_norm": 7.416470328600376, "learning_rate": 2.770022449458513e-06, "loss": 1.0028, "step": 6803 }, { "epoch": 0.9632618390316415, "grad_norm": 7.639594946704114, "learning_rate": 2.7694525959786297e-06, "loss": 1.0931, "step": 6804 }, { "epoch": 0.9634034119062788, "grad_norm": 6.9449175796396805, "learning_rate": 2.7688827283338236e-06, "loss": 1.0065, "step": 6805 }, { "epoch": 0.963544984780916, "grad_norm": 9.14675567309315, "learning_rate": 2.7683128465540545e-06, "loss": 1.0101, "step": 6806 }, { "epoch": 0.9636865576555532, "grad_norm": 8.587313374319674, "learning_rate": 2.7677429506692788e-06, "loss": 1.0504, "step": 6807 }, { "epoch": 0.9638281305301905, "grad_norm": 8.456526038604833, "learning_rate": 2.7671730407094553e-06, "loss": 1.1542, "step": 6808 }, { "epoch": 0.9639697034048277, "grad_norm": 9.14066339468432, "learning_rate": 2.7666031167045444e-06, "loss": 1.2123, "step": 6809 }, { "epoch": 0.9641112762794648, "grad_norm": 7.904692364472708, "learning_rate": 2.766033178684506e-06, "loss": 1.0945, "step": 6810 }, { "epoch": 0.964252849154102, "grad_norm": 7.348962630160937, "learning_rate": 2.7654632266793025e-06, "loss": 1.1182, "step": 6811 }, { "epoch": 0.9643944220287393, "grad_norm": 9.804662599569975, "learning_rate": 2.764893260718895e-06, "loss": 1.0504, "step": 6812 }, { "epoch": 0.9645359949033765, "grad_norm": 9.010278977828643, "learning_rate": 2.764323280833246e-06, "loss": 0.9698, "step": 6813 }, { "epoch": 0.9646775677780137, "grad_norm": 9.062121153180522, "learning_rate": 2.7637532870523193e-06, "loss": 1.0247, "step": 6814 }, { "epoch": 0.964819140652651, "grad_norm": 10.358497087611818, "learning_rate": 2.7631832794060787e-06, "loss": 1.1343, "step": 6815 }, { "epoch": 0.9649607135272882, "grad_norm": 8.591975136321338, "learning_rate": 2.7626132579244896e-06, "loss": 1.047, "step": 6816 }, { "epoch": 0.9651022864019254, "grad_norm": 8.587779797655932, "learning_rate": 2.7620432226375175e-06, "loss": 1.0199, "step": 6817 }, { "epoch": 0.9652438592765626, "grad_norm": 8.49462541191028, "learning_rate": 2.761473173575129e-06, "loss": 1.1377, "step": 6818 }, { "epoch": 0.9653854321511999, "grad_norm": 8.89332772791894, "learning_rate": 2.7609031107672896e-06, "loss": 1.097, "step": 6819 }, { "epoch": 0.9655270050258371, "grad_norm": 6.689984296885765, "learning_rate": 2.7603330342439686e-06, "loss": 1.0617, "step": 6820 }, { "epoch": 0.9656685779004742, "grad_norm": 8.212233596266513, "learning_rate": 2.7597629440351346e-06, "loss": 1.0096, "step": 6821 }, { "epoch": 0.9658101507751115, "grad_norm": 9.649868235899122, "learning_rate": 2.7591928401707555e-06, "loss": 1.1108, "step": 6822 }, { "epoch": 0.9659517236497487, "grad_norm": 7.287044724234117, "learning_rate": 2.7586227226808038e-06, "loss": 0.8387, "step": 6823 }, { "epoch": 0.9660932965243859, "grad_norm": 7.431220473093673, "learning_rate": 2.7580525915952465e-06, "loss": 1.06, "step": 6824 }, { "epoch": 0.9662348693990231, "grad_norm": 8.560513231096904, "learning_rate": 2.757482446944058e-06, "loss": 0.9155, "step": 6825 }, { "epoch": 0.9663764422736604, "grad_norm": 8.21270878031444, "learning_rate": 2.756912288757209e-06, "loss": 0.9496, "step": 6826 }, { "epoch": 0.9665180151482976, "grad_norm": 8.21859218895544, "learning_rate": 2.7563421170646714e-06, "loss": 1.0566, "step": 6827 }, { "epoch": 0.9666595880229348, "grad_norm": 8.189180856805445, "learning_rate": 2.7557719318964216e-06, "loss": 1.1015, "step": 6828 }, { "epoch": 0.9668011608975721, "grad_norm": 9.392532434191736, "learning_rate": 2.755201733282431e-06, "loss": 1.0893, "step": 6829 }, { "epoch": 0.9669427337722093, "grad_norm": 9.598674889345281, "learning_rate": 2.754631521252676e-06, "loss": 1.1965, "step": 6830 }, { "epoch": 0.9670843066468464, "grad_norm": 9.229027556898727, "learning_rate": 2.7540612958371315e-06, "loss": 1.0371, "step": 6831 }, { "epoch": 0.9672258795214836, "grad_norm": 8.166955229144074, "learning_rate": 2.7534910570657743e-06, "loss": 0.9999, "step": 6832 }, { "epoch": 0.9673674523961209, "grad_norm": 8.041161503185501, "learning_rate": 2.752920804968581e-06, "loss": 1.0807, "step": 6833 }, { "epoch": 0.9675090252707581, "grad_norm": 7.401869367632615, "learning_rate": 2.7523505395755296e-06, "loss": 0.9934, "step": 6834 }, { "epoch": 0.9676505981453953, "grad_norm": 7.668750756099698, "learning_rate": 2.7517802609165985e-06, "loss": 0.9987, "step": 6835 }, { "epoch": 0.9677921710200326, "grad_norm": 10.149305769005752, "learning_rate": 2.751209969021767e-06, "loss": 1.0916, "step": 6836 }, { "epoch": 0.9679337438946698, "grad_norm": 8.889378036179876, "learning_rate": 2.750639663921014e-06, "loss": 1.113, "step": 6837 }, { "epoch": 0.968075316769307, "grad_norm": 8.123674020407499, "learning_rate": 2.7500693456443217e-06, "loss": 1.0535, "step": 6838 }, { "epoch": 0.9682168896439443, "grad_norm": 9.688989537073086, "learning_rate": 2.749499014221671e-06, "loss": 1.1138, "step": 6839 }, { "epoch": 0.9683584625185815, "grad_norm": 8.244271341284836, "learning_rate": 2.748928669683042e-06, "loss": 0.9556, "step": 6840 }, { "epoch": 0.9685000353932186, "grad_norm": 8.552903638211484, "learning_rate": 2.748358312058418e-06, "loss": 1.1052, "step": 6841 }, { "epoch": 0.9686416082678558, "grad_norm": 8.825518576106846, "learning_rate": 2.7477879413777834e-06, "loss": 1.0285, "step": 6842 }, { "epoch": 0.9687831811424931, "grad_norm": 9.536456377719169, "learning_rate": 2.7472175576711213e-06, "loss": 1.0613, "step": 6843 }, { "epoch": 0.9689247540171303, "grad_norm": 8.710825829581303, "learning_rate": 2.7466471609684175e-06, "loss": 1.1135, "step": 6844 }, { "epoch": 0.9690663268917675, "grad_norm": 8.692764114435565, "learning_rate": 2.7460767512996556e-06, "loss": 1.0388, "step": 6845 }, { "epoch": 0.9692078997664048, "grad_norm": 8.129837708677437, "learning_rate": 2.745506328694822e-06, "loss": 0.9016, "step": 6846 }, { "epoch": 0.969349472641042, "grad_norm": 8.667576619819938, "learning_rate": 2.7449358931839042e-06, "loss": 0.9704, "step": 6847 }, { "epoch": 0.9694910455156792, "grad_norm": 7.23821853470537, "learning_rate": 2.7443654447968894e-06, "loss": 1.1075, "step": 6848 }, { "epoch": 0.9696326183903164, "grad_norm": 10.384449527644087, "learning_rate": 2.7437949835637644e-06, "loss": 1.1385, "step": 6849 }, { "epoch": 0.9697741912649537, "grad_norm": 9.167026258409173, "learning_rate": 2.7432245095145193e-06, "loss": 1.1572, "step": 6850 }, { "epoch": 0.9699157641395909, "grad_norm": 8.103771471392001, "learning_rate": 2.7426540226791437e-06, "loss": 1.0021, "step": 6851 }, { "epoch": 0.970057337014228, "grad_norm": 8.246382874046724, "learning_rate": 2.7420835230876264e-06, "loss": 1.1867, "step": 6852 }, { "epoch": 0.9701989098888653, "grad_norm": 8.97732824872476, "learning_rate": 2.7415130107699588e-06, "loss": 1.2361, "step": 6853 }, { "epoch": 0.9703404827635025, "grad_norm": 8.095130360845465, "learning_rate": 2.740942485756133e-06, "loss": 1.0583, "step": 6854 }, { "epoch": 0.9704820556381397, "grad_norm": 7.329301101235258, "learning_rate": 2.7403719480761406e-06, "loss": 1.0118, "step": 6855 }, { "epoch": 0.970623628512777, "grad_norm": 7.8485491535644965, "learning_rate": 2.7398013977599722e-06, "loss": 1.037, "step": 6856 }, { "epoch": 0.9707652013874142, "grad_norm": 9.263763086270865, "learning_rate": 2.7392308348376243e-06, "loss": 1.1817, "step": 6857 }, { "epoch": 0.9709067742620514, "grad_norm": 8.330031185755793, "learning_rate": 2.73866025933909e-06, "loss": 1.1056, "step": 6858 }, { "epoch": 0.9710483471366886, "grad_norm": 7.520408329530458, "learning_rate": 2.738089671294364e-06, "loss": 1.0285, "step": 6859 }, { "epoch": 0.9711899200113259, "grad_norm": 8.089766416820138, "learning_rate": 2.7375190707334416e-06, "loss": 1.1786, "step": 6860 }, { "epoch": 0.9713314928859631, "grad_norm": 8.68071586223506, "learning_rate": 2.736948457686318e-06, "loss": 1.1393, "step": 6861 }, { "epoch": 0.9714730657606002, "grad_norm": 10.754881349385286, "learning_rate": 2.736377832182991e-06, "loss": 0.9926, "step": 6862 }, { "epoch": 0.9716146386352374, "grad_norm": 10.902614916795226, "learning_rate": 2.7358071942534574e-06, "loss": 1.1776, "step": 6863 }, { "epoch": 0.9717562115098747, "grad_norm": 7.960699345523873, "learning_rate": 2.735236543927715e-06, "loss": 0.9896, "step": 6864 }, { "epoch": 0.9718977843845119, "grad_norm": 7.920049995110436, "learning_rate": 2.734665881235764e-06, "loss": 1.0117, "step": 6865 }, { "epoch": 0.9720393572591491, "grad_norm": 7.936227050594255, "learning_rate": 2.7340952062076022e-06, "loss": 1.113, "step": 6866 }, { "epoch": 0.9721809301337864, "grad_norm": 8.983093913420042, "learning_rate": 2.73352451887323e-06, "loss": 1.014, "step": 6867 }, { "epoch": 0.9723225030084236, "grad_norm": 8.155576480812071, "learning_rate": 2.7329538192626478e-06, "loss": 1.0765, "step": 6868 }, { "epoch": 0.9724640758830608, "grad_norm": 6.855400028590743, "learning_rate": 2.7323831074058572e-06, "loss": 1.1169, "step": 6869 }, { "epoch": 0.9726056487576981, "grad_norm": 8.519052582122772, "learning_rate": 2.7318123833328598e-06, "loss": 1.0496, "step": 6870 }, { "epoch": 0.9727472216323353, "grad_norm": 7.596488804104098, "learning_rate": 2.731241647073658e-06, "loss": 1.0043, "step": 6871 }, { "epoch": 0.9728887945069724, "grad_norm": 8.095349010274019, "learning_rate": 2.730670898658255e-06, "loss": 1.124, "step": 6872 }, { "epoch": 0.9730303673816096, "grad_norm": 8.519394234627248, "learning_rate": 2.7301001381166553e-06, "loss": 0.984, "step": 6873 }, { "epoch": 0.9731719402562469, "grad_norm": 8.300496318639157, "learning_rate": 2.729529365478863e-06, "loss": 1.1434, "step": 6874 }, { "epoch": 0.9733135131308841, "grad_norm": 8.523890412462602, "learning_rate": 2.7289585807748832e-06, "loss": 1.1282, "step": 6875 }, { "epoch": 0.9734550860055213, "grad_norm": 7.2176148153698865, "learning_rate": 2.7283877840347217e-06, "loss": 0.8637, "step": 6876 }, { "epoch": 0.9735966588801586, "grad_norm": 6.894532909879404, "learning_rate": 2.7278169752883845e-06, "loss": 1.0681, "step": 6877 }, { "epoch": 0.9737382317547958, "grad_norm": 8.863890235661952, "learning_rate": 2.727246154565878e-06, "loss": 0.996, "step": 6878 }, { "epoch": 0.973879804629433, "grad_norm": 9.410087740384693, "learning_rate": 2.726675321897211e-06, "loss": 1.0739, "step": 6879 }, { "epoch": 0.9740213775040703, "grad_norm": 7.7662122416134824, "learning_rate": 2.7261044773123913e-06, "loss": 1.2061, "step": 6880 }, { "epoch": 0.9741629503787075, "grad_norm": 9.091714847575727, "learning_rate": 2.725533620841429e-06, "loss": 1.0451, "step": 6881 }, { "epoch": 0.9743045232533447, "grad_norm": 8.85669550666904, "learning_rate": 2.7249627525143313e-06, "loss": 1.0927, "step": 6882 }, { "epoch": 0.9744460961279818, "grad_norm": 9.00673635362179, "learning_rate": 2.7243918723611095e-06, "loss": 1.0133, "step": 6883 }, { "epoch": 0.9745876690026191, "grad_norm": 7.210669813219396, "learning_rate": 2.7238209804117744e-06, "loss": 1.0924, "step": 6884 }, { "epoch": 0.9747292418772563, "grad_norm": 8.628571144183397, "learning_rate": 2.7232500766963373e-06, "loss": 1.0491, "step": 6885 }, { "epoch": 0.9748708147518935, "grad_norm": 10.390384895017199, "learning_rate": 2.72267916124481e-06, "loss": 1.0958, "step": 6886 }, { "epoch": 0.9750123876265308, "grad_norm": 7.152352816931719, "learning_rate": 2.722108234087205e-06, "loss": 1.0114, "step": 6887 }, { "epoch": 0.975153960501168, "grad_norm": 11.531535819628495, "learning_rate": 2.7215372952535364e-06, "loss": 1.1944, "step": 6888 }, { "epoch": 0.9752955333758052, "grad_norm": 10.959254662446149, "learning_rate": 2.7209663447738164e-06, "loss": 1.1576, "step": 6889 }, { "epoch": 0.9754371062504424, "grad_norm": 8.563343312856706, "learning_rate": 2.7203953826780615e-06, "loss": 1.0065, "step": 6890 }, { "epoch": 0.9755786791250797, "grad_norm": 7.678353427567826, "learning_rate": 2.719824408996285e-06, "loss": 1.0601, "step": 6891 }, { "epoch": 0.9757202519997169, "grad_norm": 8.365038512985489, "learning_rate": 2.7192534237585037e-06, "loss": 0.9841, "step": 6892 }, { "epoch": 0.975861824874354, "grad_norm": 8.26599512551508, "learning_rate": 2.7186824269947334e-06, "loss": 1.0519, "step": 6893 }, { "epoch": 0.9760033977489913, "grad_norm": 7.88567125790469, "learning_rate": 2.71811141873499e-06, "loss": 1.0979, "step": 6894 }, { "epoch": 0.9761449706236285, "grad_norm": 9.967927235100476, "learning_rate": 2.717540399009293e-06, "loss": 1.1707, "step": 6895 }, { "epoch": 0.9762865434982657, "grad_norm": 8.773796203291624, "learning_rate": 2.716969367847659e-06, "loss": 1.0968, "step": 6896 }, { "epoch": 0.9764281163729029, "grad_norm": 7.995596866049762, "learning_rate": 2.7163983252801076e-06, "loss": 1.0716, "step": 6897 }, { "epoch": 0.9765696892475402, "grad_norm": 7.356861892181663, "learning_rate": 2.7158272713366573e-06, "loss": 0.861, "step": 6898 }, { "epoch": 0.9767112621221774, "grad_norm": 10.600700686346347, "learning_rate": 2.715256206047328e-06, "loss": 1.1269, "step": 6899 }, { "epoch": 0.9768528349968146, "grad_norm": 8.17357820518465, "learning_rate": 2.7146851294421404e-06, "loss": 1.0984, "step": 6900 }, { "epoch": 0.9769944078714519, "grad_norm": 7.487785820449188, "learning_rate": 2.714114041551115e-06, "loss": 1.0213, "step": 6901 }, { "epoch": 0.9771359807460891, "grad_norm": 9.456190989752056, "learning_rate": 2.7135429424042758e-06, "loss": 1.0389, "step": 6902 }, { "epoch": 0.9772775536207262, "grad_norm": 8.73105816312944, "learning_rate": 2.712971832031642e-06, "loss": 1.0005, "step": 6903 }, { "epoch": 0.9774191264953634, "grad_norm": 10.581878401904612, "learning_rate": 2.712400710463239e-06, "loss": 1.036, "step": 6904 }, { "epoch": 0.9775606993700007, "grad_norm": 10.042401636433459, "learning_rate": 2.7118295777290875e-06, "loss": 1.1762, "step": 6905 }, { "epoch": 0.9777022722446379, "grad_norm": 11.286279798690796, "learning_rate": 2.711258433859214e-06, "loss": 1.1853, "step": 6906 }, { "epoch": 0.9778438451192751, "grad_norm": 8.92160178065125, "learning_rate": 2.710687278883642e-06, "loss": 1.1607, "step": 6907 }, { "epoch": 0.9779854179939124, "grad_norm": 8.176851526040219, "learning_rate": 2.7101161128323967e-06, "loss": 1.0815, "step": 6908 }, { "epoch": 0.9781269908685496, "grad_norm": 8.339633340437077, "learning_rate": 2.7095449357355042e-06, "loss": 0.9311, "step": 6909 }, { "epoch": 0.9782685637431868, "grad_norm": 11.897040221838331, "learning_rate": 2.7089737476229906e-06, "loss": 1.114, "step": 6910 }, { "epoch": 0.9784101366178241, "grad_norm": 10.702543680039083, "learning_rate": 2.7084025485248827e-06, "loss": 1.0285, "step": 6911 }, { "epoch": 0.9785517094924613, "grad_norm": 8.660462824411164, "learning_rate": 2.7078313384712084e-06, "loss": 1.1781, "step": 6912 }, { "epoch": 0.9786932823670985, "grad_norm": 7.593504850723989, "learning_rate": 2.7072601174919965e-06, "loss": 0.9685, "step": 6913 }, { "epoch": 0.9788348552417356, "grad_norm": 8.800994071300492, "learning_rate": 2.7066888856172737e-06, "loss": 1.2369, "step": 6914 }, { "epoch": 0.9789764281163729, "grad_norm": 8.02582862830365, "learning_rate": 2.70611764287707e-06, "loss": 0.9572, "step": 6915 }, { "epoch": 0.9791180009910101, "grad_norm": 8.20011849666668, "learning_rate": 2.7055463893014156e-06, "loss": 0.9622, "step": 6916 }, { "epoch": 0.9792595738656473, "grad_norm": 8.603455989081056, "learning_rate": 2.7049751249203414e-06, "loss": 0.917, "step": 6917 }, { "epoch": 0.9794011467402846, "grad_norm": 7.689854703207683, "learning_rate": 2.7044038497638782e-06, "loss": 1.0584, "step": 6918 }, { "epoch": 0.9795427196149218, "grad_norm": 8.079852216521061, "learning_rate": 2.7038325638620563e-06, "loss": 0.9833, "step": 6919 }, { "epoch": 0.979684292489559, "grad_norm": 8.844785595975152, "learning_rate": 2.7032612672449084e-06, "loss": 1.0911, "step": 6920 }, { "epoch": 0.9798258653641962, "grad_norm": 6.82481321421587, "learning_rate": 2.7026899599424674e-06, "loss": 1.002, "step": 6921 }, { "epoch": 0.9799674382388335, "grad_norm": 7.377146004560803, "learning_rate": 2.702118641984766e-06, "loss": 0.9734, "step": 6922 }, { "epoch": 0.9801090111134707, "grad_norm": 9.100532231119697, "learning_rate": 2.7015473134018382e-06, "loss": 1.1018, "step": 6923 }, { "epoch": 0.9802505839881078, "grad_norm": 7.680450929278227, "learning_rate": 2.700975974223719e-06, "loss": 1.0427, "step": 6924 }, { "epoch": 0.9803921568627451, "grad_norm": 8.929082147432931, "learning_rate": 2.700404624480443e-06, "loss": 1.0011, "step": 6925 }, { "epoch": 0.9805337297373823, "grad_norm": 9.560348368406508, "learning_rate": 2.699833264202044e-06, "loss": 0.9979, "step": 6926 }, { "epoch": 0.9806753026120195, "grad_norm": 7.855329864199888, "learning_rate": 2.6992618934185604e-06, "loss": 1.0243, "step": 6927 }, { "epoch": 0.9808168754866567, "grad_norm": 9.537715132500576, "learning_rate": 2.698690512160027e-06, "loss": 1.1916, "step": 6928 }, { "epoch": 0.980958448361294, "grad_norm": 7.466403991756355, "learning_rate": 2.6981191204564825e-06, "loss": 0.9799, "step": 6929 }, { "epoch": 0.9811000212359312, "grad_norm": 6.870683736967358, "learning_rate": 2.6975477183379624e-06, "loss": 0.9164, "step": 6930 }, { "epoch": 0.9812415941105684, "grad_norm": 7.173631639954879, "learning_rate": 2.6969763058345067e-06, "loss": 0.9503, "step": 6931 }, { "epoch": 0.9813831669852057, "grad_norm": 8.304809684249337, "learning_rate": 2.696404882976153e-06, "loss": 1.0299, "step": 6932 }, { "epoch": 0.9815247398598429, "grad_norm": 7.735004807848629, "learning_rate": 2.6958334497929416e-06, "loss": 1.0413, "step": 6933 }, { "epoch": 0.9816663127344801, "grad_norm": 6.837537480169641, "learning_rate": 2.695262006314912e-06, "loss": 0.9742, "step": 6934 }, { "epoch": 0.9818078856091172, "grad_norm": 9.165298729755843, "learning_rate": 2.694690552572104e-06, "loss": 1.0992, "step": 6935 }, { "epoch": 0.9819494584837545, "grad_norm": 8.006629581074371, "learning_rate": 2.6941190885945582e-06, "loss": 1.061, "step": 6936 }, { "epoch": 0.9820910313583917, "grad_norm": 8.56181878491724, "learning_rate": 2.6935476144123173e-06, "loss": 1.0922, "step": 6937 }, { "epoch": 0.9822326042330289, "grad_norm": 7.391768191678613, "learning_rate": 2.692976130055422e-06, "loss": 0.9732, "step": 6938 }, { "epoch": 0.9823741771076662, "grad_norm": 8.35583580890803, "learning_rate": 2.692404635553917e-06, "loss": 1.1586, "step": 6939 }, { "epoch": 0.9825157499823034, "grad_norm": 7.563103013390873, "learning_rate": 2.691833130937842e-06, "loss": 0.9469, "step": 6940 }, { "epoch": 0.9826573228569406, "grad_norm": 8.173013465600476, "learning_rate": 2.6912616162372434e-06, "loss": 1.0501, "step": 6941 }, { "epoch": 0.9827988957315779, "grad_norm": 7.28257431626173, "learning_rate": 2.690690091482164e-06, "loss": 1.0039, "step": 6942 }, { "epoch": 0.9829404686062151, "grad_norm": 7.164098442095636, "learning_rate": 2.6901185567026484e-06, "loss": 0.9185, "step": 6943 }, { "epoch": 0.9830820414808523, "grad_norm": 8.747320146262565, "learning_rate": 2.689547011928742e-06, "loss": 1.0151, "step": 6944 }, { "epoch": 0.9832236143554894, "grad_norm": 6.969654520631772, "learning_rate": 2.6889754571904907e-06, "loss": 1.0174, "step": 6945 }, { "epoch": 0.9833651872301267, "grad_norm": 9.10897255690676, "learning_rate": 2.68840389251794e-06, "loss": 1.0558, "step": 6946 }, { "epoch": 0.9835067601047639, "grad_norm": 8.526452584560463, "learning_rate": 2.687832317941138e-06, "loss": 1.1412, "step": 6947 }, { "epoch": 0.9836483329794011, "grad_norm": 10.355908948937868, "learning_rate": 2.687260733490131e-06, "loss": 1.144, "step": 6948 }, { "epoch": 0.9837899058540384, "grad_norm": 8.215963261062509, "learning_rate": 2.6866891391949664e-06, "loss": 1.1439, "step": 6949 }, { "epoch": 0.9839314787286756, "grad_norm": 7.8281384009210395, "learning_rate": 2.6861175350856937e-06, "loss": 0.9624, "step": 6950 }, { "epoch": 0.9840730516033128, "grad_norm": 6.6034751875055875, "learning_rate": 2.6855459211923603e-06, "loss": 1.0885, "step": 6951 }, { "epoch": 0.98421462447795, "grad_norm": 7.483740666213044, "learning_rate": 2.6849742975450165e-06, "loss": 0.9876, "step": 6952 }, { "epoch": 0.9843561973525873, "grad_norm": 8.857718391880795, "learning_rate": 2.684402664173711e-06, "loss": 1.0666, "step": 6953 }, { "epoch": 0.9844977702272245, "grad_norm": 9.035364395165143, "learning_rate": 2.6838310211084954e-06, "loss": 1.2538, "step": 6954 }, { "epoch": 0.9846393431018616, "grad_norm": 12.732618863764156, "learning_rate": 2.6832593683794206e-06, "loss": 1.1711, "step": 6955 }, { "epoch": 0.9847809159764989, "grad_norm": 7.764480107300805, "learning_rate": 2.6826877060165373e-06, "loss": 1.1393, "step": 6956 }, { "epoch": 0.9849224888511361, "grad_norm": 7.740787075349537, "learning_rate": 2.6821160340498975e-06, "loss": 1.0126, "step": 6957 }, { "epoch": 0.9850640617257733, "grad_norm": 9.170400055756412, "learning_rate": 2.681544352509553e-06, "loss": 1.2324, "step": 6958 }, { "epoch": 0.9852056346004106, "grad_norm": 7.402026038284082, "learning_rate": 2.6809726614255575e-06, "loss": 1.0752, "step": 6959 }, { "epoch": 0.9853472074750478, "grad_norm": 8.793438146900186, "learning_rate": 2.680400960827965e-06, "loss": 1.1268, "step": 6960 }, { "epoch": 0.985488780349685, "grad_norm": 8.150387582748653, "learning_rate": 2.679829250746827e-06, "loss": 1.0565, "step": 6961 }, { "epoch": 0.9856303532243222, "grad_norm": 9.977789054397897, "learning_rate": 2.6792575312122005e-06, "loss": 1.1121, "step": 6962 }, { "epoch": 0.9857719260989595, "grad_norm": 11.556136736152288, "learning_rate": 2.6786858022541385e-06, "loss": 1.1126, "step": 6963 }, { "epoch": 0.9859134989735967, "grad_norm": 7.7471532207723275, "learning_rate": 2.6781140639026975e-06, "loss": 1.0456, "step": 6964 }, { "epoch": 0.9860550718482339, "grad_norm": 6.532773073009463, "learning_rate": 2.6775423161879333e-06, "loss": 1.0431, "step": 6965 }, { "epoch": 0.986196644722871, "grad_norm": 8.19041565675217, "learning_rate": 2.676970559139902e-06, "loss": 1.0344, "step": 6966 }, { "epoch": 0.9863382175975083, "grad_norm": 8.571370006542857, "learning_rate": 2.676398792788659e-06, "loss": 1.0979, "step": 6967 }, { "epoch": 0.9864797904721455, "grad_norm": 8.372267092144579, "learning_rate": 2.675827017164264e-06, "loss": 1.085, "step": 6968 }, { "epoch": 0.9866213633467827, "grad_norm": 8.480615840358428, "learning_rate": 2.675255232296774e-06, "loss": 0.9799, "step": 6969 }, { "epoch": 0.98676293622142, "grad_norm": 8.851862014322494, "learning_rate": 2.674683438216247e-06, "loss": 0.9891, "step": 6970 }, { "epoch": 0.9869045090960572, "grad_norm": 8.927217130129442, "learning_rate": 2.674111634952742e-06, "loss": 1.0794, "step": 6971 }, { "epoch": 0.9870460819706944, "grad_norm": 8.61908375271572, "learning_rate": 2.673539822536318e-06, "loss": 0.9803, "step": 6972 }, { "epoch": 0.9871876548453317, "grad_norm": 7.316899467825537, "learning_rate": 2.672968000997035e-06, "loss": 1.0163, "step": 6973 }, { "epoch": 0.9873292277199689, "grad_norm": 7.0529134046787, "learning_rate": 2.6723961703649525e-06, "loss": 0.981, "step": 6974 }, { "epoch": 0.9874708005946061, "grad_norm": 6.983082081987117, "learning_rate": 2.6718243306701317e-06, "loss": 1.0858, "step": 6975 }, { "epoch": 0.9876123734692432, "grad_norm": 6.802437726807913, "learning_rate": 2.6712524819426355e-06, "loss": 1.0379, "step": 6976 }, { "epoch": 0.9877539463438805, "grad_norm": 9.97008923948825, "learning_rate": 2.6706806242125232e-06, "loss": 1.2116, "step": 6977 }, { "epoch": 0.9878955192185177, "grad_norm": 8.003978693547216, "learning_rate": 2.670108757509858e-06, "loss": 1.1232, "step": 6978 }, { "epoch": 0.9880370920931549, "grad_norm": 8.902363461200204, "learning_rate": 2.6695368818647015e-06, "loss": 1.0596, "step": 6979 }, { "epoch": 0.9881786649677922, "grad_norm": 10.073828344630945, "learning_rate": 2.668964997307118e-06, "loss": 1.0649, "step": 6980 }, { "epoch": 0.9883202378424294, "grad_norm": 7.523508992553618, "learning_rate": 2.6683931038671705e-06, "loss": 1.1066, "step": 6981 }, { "epoch": 0.9884618107170666, "grad_norm": 8.05595285890153, "learning_rate": 2.6678212015749234e-06, "loss": 1.0922, "step": 6982 }, { "epoch": 0.9886033835917039, "grad_norm": 8.615355475868999, "learning_rate": 2.6672492904604403e-06, "loss": 1.0542, "step": 6983 }, { "epoch": 0.9887449564663411, "grad_norm": 10.093274557059255, "learning_rate": 2.6666773705537873e-06, "loss": 1.0643, "step": 6984 }, { "epoch": 0.9888865293409783, "grad_norm": 8.657755696692606, "learning_rate": 2.6661054418850286e-06, "loss": 1.073, "step": 6985 }, { "epoch": 0.9890281022156154, "grad_norm": 8.628835074259417, "learning_rate": 2.665533504484231e-06, "loss": 1.0525, "step": 6986 }, { "epoch": 0.9891696750902527, "grad_norm": 10.128728109574064, "learning_rate": 2.6649615583814613e-06, "loss": 1.0776, "step": 6987 }, { "epoch": 0.9893112479648899, "grad_norm": 9.306343655086163, "learning_rate": 2.6643896036067847e-06, "loss": 1.1046, "step": 6988 }, { "epoch": 0.9894528208395271, "grad_norm": 9.880494725740743, "learning_rate": 2.6638176401902693e-06, "loss": 1.0851, "step": 6989 }, { "epoch": 0.9895943937141644, "grad_norm": 7.697413061976062, "learning_rate": 2.6632456681619817e-06, "loss": 1.0049, "step": 6990 }, { "epoch": 0.9897359665888016, "grad_norm": 8.907774249364245, "learning_rate": 2.662673687551992e-06, "loss": 1.0973, "step": 6991 }, { "epoch": 0.9898775394634388, "grad_norm": 9.431641636143556, "learning_rate": 2.6621016983903686e-06, "loss": 0.9141, "step": 6992 }, { "epoch": 0.990019112338076, "grad_norm": 9.083743118235232, "learning_rate": 2.661529700707179e-06, "loss": 1.0509, "step": 6993 }, { "epoch": 0.9901606852127133, "grad_norm": 7.305386465974016, "learning_rate": 2.6609576945324933e-06, "loss": 0.9542, "step": 6994 }, { "epoch": 0.9903022580873505, "grad_norm": 7.255701388337226, "learning_rate": 2.6603856798963817e-06, "loss": 0.968, "step": 6995 }, { "epoch": 0.9904438309619877, "grad_norm": 9.431060008466746, "learning_rate": 2.6598136568289144e-06, "loss": 1.2035, "step": 6996 }, { "epoch": 0.9905854038366249, "grad_norm": 9.427346533935115, "learning_rate": 2.6592416253601626e-06, "loss": 1.1352, "step": 6997 }, { "epoch": 0.9907269767112621, "grad_norm": 10.89011954361918, "learning_rate": 2.658669585520197e-06, "loss": 1.1269, "step": 6998 }, { "epoch": 0.9908685495858993, "grad_norm": 10.929535325513186, "learning_rate": 2.65809753733909e-06, "loss": 1.0482, "step": 6999 }, { "epoch": 0.9910101224605365, "grad_norm": 12.208696761379903, "learning_rate": 2.657525480846913e-06, "loss": 1.1484, "step": 7000 }, { "epoch": 0.9911516953351738, "grad_norm": 8.39778658451398, "learning_rate": 2.6569534160737386e-06, "loss": 0.963, "step": 7001 }, { "epoch": 0.991293268209811, "grad_norm": 8.155117614010226, "learning_rate": 2.656381343049641e-06, "loss": 1.1892, "step": 7002 }, { "epoch": 0.9914348410844482, "grad_norm": 7.522029820280395, "learning_rate": 2.655809261804693e-06, "loss": 0.995, "step": 7003 }, { "epoch": 0.9915764139590855, "grad_norm": 9.078825879247995, "learning_rate": 2.655237172368967e-06, "loss": 1.0115, "step": 7004 }, { "epoch": 0.9917179868337227, "grad_norm": 9.25569183912788, "learning_rate": 2.654665074772539e-06, "loss": 1.0756, "step": 7005 }, { "epoch": 0.9918595597083599, "grad_norm": 9.611393017269759, "learning_rate": 2.6540929690454835e-06, "loss": 1.0591, "step": 7006 }, { "epoch": 0.992001132582997, "grad_norm": 9.819814494880603, "learning_rate": 2.653520855217876e-06, "loss": 1.1373, "step": 7007 }, { "epoch": 0.9921427054576343, "grad_norm": 10.417720853232673, "learning_rate": 2.652948733319792e-06, "loss": 1.0961, "step": 7008 }, { "epoch": 0.9922842783322715, "grad_norm": 8.737574010270867, "learning_rate": 2.652376603381306e-06, "loss": 1.0603, "step": 7009 }, { "epoch": 0.9924258512069087, "grad_norm": 8.756109665938041, "learning_rate": 2.651804465432496e-06, "loss": 1.1342, "step": 7010 }, { "epoch": 0.992567424081546, "grad_norm": 9.629252497577856, "learning_rate": 2.6512323195034384e-06, "loss": 1.0487, "step": 7011 }, { "epoch": 0.9927089969561832, "grad_norm": 8.398442496364792, "learning_rate": 2.6506601656242105e-06, "loss": 1.0074, "step": 7012 }, { "epoch": 0.9928505698308204, "grad_norm": 11.766132075311406, "learning_rate": 2.65008800382489e-06, "loss": 1.1793, "step": 7013 }, { "epoch": 0.9929921427054577, "grad_norm": 7.051025251145053, "learning_rate": 2.6495158341355548e-06, "loss": 0.9507, "step": 7014 }, { "epoch": 0.9931337155800949, "grad_norm": 8.030727028054253, "learning_rate": 2.648943656586284e-06, "loss": 0.8775, "step": 7015 }, { "epoch": 0.9932752884547321, "grad_norm": 9.701887706061907, "learning_rate": 2.648371471207156e-06, "loss": 1.187, "step": 7016 }, { "epoch": 0.9934168613293692, "grad_norm": 8.347787401724755, "learning_rate": 2.6477992780282507e-06, "loss": 0.9609, "step": 7017 }, { "epoch": 0.9935584342040065, "grad_norm": 8.80918313230552, "learning_rate": 2.6472270770796475e-06, "loss": 1.1719, "step": 7018 }, { "epoch": 0.9937000070786437, "grad_norm": 8.523736013190378, "learning_rate": 2.646654868391427e-06, "loss": 1.0406, "step": 7019 }, { "epoch": 0.9938415799532809, "grad_norm": 9.194691445015595, "learning_rate": 2.646082651993668e-06, "loss": 1.0778, "step": 7020 }, { "epoch": 0.9939831528279182, "grad_norm": 8.738210965156542, "learning_rate": 2.6455104279164546e-06, "loss": 1.0593, "step": 7021 }, { "epoch": 0.9941247257025554, "grad_norm": 7.226909303333845, "learning_rate": 2.6449381961898658e-06, "loss": 0.8749, "step": 7022 }, { "epoch": 0.9942662985771926, "grad_norm": 8.903912796402349, "learning_rate": 2.644365956843984e-06, "loss": 1.0536, "step": 7023 }, { "epoch": 0.9944078714518299, "grad_norm": 7.865962095012649, "learning_rate": 2.643793709908892e-06, "loss": 0.9549, "step": 7024 }, { "epoch": 0.9945494443264671, "grad_norm": 8.501401449121262, "learning_rate": 2.6432214554146717e-06, "loss": 0.9873, "step": 7025 }, { "epoch": 0.9946910172011043, "grad_norm": 7.77985194871947, "learning_rate": 2.6426491933914062e-06, "loss": 0.9433, "step": 7026 }, { "epoch": 0.9948325900757415, "grad_norm": 8.220198471012244, "learning_rate": 2.642076923869178e-06, "loss": 0.998, "step": 7027 }, { "epoch": 0.9949741629503787, "grad_norm": 9.96080690784492, "learning_rate": 2.6415046468780726e-06, "loss": 1.1308, "step": 7028 }, { "epoch": 0.9951157358250159, "grad_norm": 9.508929021721464, "learning_rate": 2.6409323624481743e-06, "loss": 1.1805, "step": 7029 }, { "epoch": 0.9952573086996531, "grad_norm": 8.754343753011897, "learning_rate": 2.6403600706095655e-06, "loss": 1.0067, "step": 7030 }, { "epoch": 0.9953988815742904, "grad_norm": 8.237262487388199, "learning_rate": 2.6397877713923333e-06, "loss": 1.2711, "step": 7031 }, { "epoch": 0.9955404544489276, "grad_norm": 8.69294447413367, "learning_rate": 2.6392154648265617e-06, "loss": 1.0196, "step": 7032 }, { "epoch": 0.9956820273235648, "grad_norm": 8.004576327795244, "learning_rate": 2.6386431509423373e-06, "loss": 1.0807, "step": 7033 }, { "epoch": 0.995823600198202, "grad_norm": 9.058522581517545, "learning_rate": 2.6380708297697456e-06, "loss": 1.1527, "step": 7034 }, { "epoch": 0.9959651730728393, "grad_norm": 7.622729604433995, "learning_rate": 2.637498501338873e-06, "loss": 0.9001, "step": 7035 }, { "epoch": 0.9961067459474765, "grad_norm": 7.171700801686865, "learning_rate": 2.6369261656798067e-06, "loss": 1.1448, "step": 7036 }, { "epoch": 0.9962483188221137, "grad_norm": 9.907306217721304, "learning_rate": 2.636353822822635e-06, "loss": 1.0617, "step": 7037 }, { "epoch": 0.9963898916967509, "grad_norm": 7.371428319861322, "learning_rate": 2.6357814727974434e-06, "loss": 1.0969, "step": 7038 }, { "epoch": 0.9965314645713881, "grad_norm": 9.475054061856198, "learning_rate": 2.6352091156343213e-06, "loss": 1.1422, "step": 7039 }, { "epoch": 0.9966730374460253, "grad_norm": 7.539796404126164, "learning_rate": 2.6346367513633574e-06, "loss": 1.0613, "step": 7040 }, { "epoch": 0.9968146103206625, "grad_norm": 8.184264031321021, "learning_rate": 2.6340643800146387e-06, "loss": 1.0586, "step": 7041 }, { "epoch": 0.9969561831952998, "grad_norm": 8.044249704484905, "learning_rate": 2.6334920016182565e-06, "loss": 1.0706, "step": 7042 }, { "epoch": 0.997097756069937, "grad_norm": 9.377741705215925, "learning_rate": 2.6329196162042987e-06, "loss": 1.0123, "step": 7043 }, { "epoch": 0.9972393289445742, "grad_norm": 8.365996574326374, "learning_rate": 2.6323472238028564e-06, "loss": 0.9281, "step": 7044 }, { "epoch": 0.9973809018192115, "grad_norm": 8.812662758913662, "learning_rate": 2.6317748244440194e-06, "loss": 1.0961, "step": 7045 }, { "epoch": 0.9975224746938487, "grad_norm": 8.907282612150286, "learning_rate": 2.6312024181578776e-06, "loss": 1.0924, "step": 7046 }, { "epoch": 0.9976640475684859, "grad_norm": 9.579756580676753, "learning_rate": 2.6306300049745227e-06, "loss": 1.068, "step": 7047 }, { "epoch": 0.997805620443123, "grad_norm": 6.6976865389748745, "learning_rate": 2.6300575849240455e-06, "loss": 0.9372, "step": 7048 }, { "epoch": 0.9979471933177603, "grad_norm": 10.002128374575301, "learning_rate": 2.629485158036538e-06, "loss": 1.0228, "step": 7049 }, { "epoch": 0.9980887661923975, "grad_norm": 8.650715404760474, "learning_rate": 2.6289127243420924e-06, "loss": 1.0045, "step": 7050 }, { "epoch": 0.9982303390670347, "grad_norm": 8.305547344950767, "learning_rate": 2.628340283870801e-06, "loss": 1.0716, "step": 7051 }, { "epoch": 0.998371911941672, "grad_norm": 8.754893215362614, "learning_rate": 2.627767836652757e-06, "loss": 1.0905, "step": 7052 }, { "epoch": 0.9985134848163092, "grad_norm": 7.77311851575421, "learning_rate": 2.627195382718053e-06, "loss": 1.0509, "step": 7053 }, { "epoch": 0.9986550576909464, "grad_norm": 7.968439073667562, "learning_rate": 2.626622922096782e-06, "loss": 1.072, "step": 7054 }, { "epoch": 0.9987966305655837, "grad_norm": 7.574106436925157, "learning_rate": 2.626050454819039e-06, "loss": 1.064, "step": 7055 }, { "epoch": 0.9989382034402209, "grad_norm": 8.962607376966389, "learning_rate": 2.6254779809149174e-06, "loss": 1.1013, "step": 7056 }, { "epoch": 0.9990797763148581, "grad_norm": 9.455863417282876, "learning_rate": 2.6249055004145118e-06, "loss": 1.1063, "step": 7057 }, { "epoch": 0.9992213491894953, "grad_norm": 9.13094146854807, "learning_rate": 2.6243330133479173e-06, "loss": 1.2489, "step": 7058 }, { "epoch": 0.9993629220641325, "grad_norm": 11.131713213270453, "learning_rate": 2.6237605197452287e-06, "loss": 1.1396, "step": 7059 }, { "epoch": 0.9995044949387697, "grad_norm": 7.121787702319152, "learning_rate": 2.6231880196365423e-06, "loss": 0.8894, "step": 7060 }, { "epoch": 0.9996460678134069, "grad_norm": 8.811549358632078, "learning_rate": 2.6226155130519536e-06, "loss": 1.03, "step": 7061 }, { "epoch": 0.9997876406880442, "grad_norm": 7.905069738052699, "learning_rate": 2.6220430000215584e-06, "loss": 1.1043, "step": 7062 }, { "epoch": 0.9999292135626814, "grad_norm": 7.652145344224757, "learning_rate": 2.6214704805754537e-06, "loss": 1.0135, "step": 7063 }, { "epoch": 1.0000707864373186, "grad_norm": 8.30702292905513, "learning_rate": 2.620897954743736e-06, "loss": 0.9589, "step": 7064 }, { "epoch": 1.0002123593119558, "grad_norm": 7.426739770790873, "learning_rate": 2.6203254225565034e-06, "loss": 0.8964, "step": 7065 }, { "epoch": 1.000353932186593, "grad_norm": 8.974230856068269, "learning_rate": 2.619752884043854e-06, "loss": 0.9368, "step": 7066 }, { "epoch": 1.0004955050612303, "grad_norm": 7.81879068024052, "learning_rate": 2.619180339235884e-06, "loss": 0.8705, "step": 7067 }, { "epoch": 1.0006370779358675, "grad_norm": 7.917744961301595, "learning_rate": 2.618607788162692e-06, "loss": 0.8741, "step": 7068 }, { "epoch": 1.0007786508105048, "grad_norm": 7.77727925050163, "learning_rate": 2.618035230854378e-06, "loss": 0.7775, "step": 7069 }, { "epoch": 1.000920223685142, "grad_norm": 7.680232388636234, "learning_rate": 2.6174626673410385e-06, "loss": 0.8965, "step": 7070 }, { "epoch": 1.0010617965597792, "grad_norm": 9.226508338245983, "learning_rate": 2.616890097652775e-06, "loss": 0.9571, "step": 7071 }, { "epoch": 1.0012033694344165, "grad_norm": 11.164584938462538, "learning_rate": 2.6163175218196862e-06, "loss": 0.8772, "step": 7072 }, { "epoch": 1.0013449423090537, "grad_norm": 10.766352436531031, "learning_rate": 2.615744939871872e-06, "loss": 0.9848, "step": 7073 }, { "epoch": 1.0014865151836907, "grad_norm": 8.303834916254432, "learning_rate": 2.6151723518394327e-06, "loss": 0.9016, "step": 7074 }, { "epoch": 1.001628088058328, "grad_norm": 9.511668618322716, "learning_rate": 2.6145997577524683e-06, "loss": 0.905, "step": 7075 }, { "epoch": 1.0017696609329652, "grad_norm": 7.893912961701809, "learning_rate": 2.6140271576410807e-06, "loss": 0.7883, "step": 7076 }, { "epoch": 1.0019112338076024, "grad_norm": 8.000888298308615, "learning_rate": 2.613454551535371e-06, "loss": 0.8509, "step": 7077 }, { "epoch": 1.0020528066822396, "grad_norm": 9.89298744371605, "learning_rate": 2.6128819394654385e-06, "loss": 0.8814, "step": 7078 }, { "epoch": 1.0021943795568768, "grad_norm": 7.909224964468656, "learning_rate": 2.6123093214613875e-06, "loss": 0.8522, "step": 7079 }, { "epoch": 1.002335952431514, "grad_norm": 10.637073075258929, "learning_rate": 2.6117366975533187e-06, "loss": 0.8534, "step": 7080 }, { "epoch": 1.0024775253061513, "grad_norm": 8.905132451129964, "learning_rate": 2.6111640677713356e-06, "loss": 0.8865, "step": 7081 }, { "epoch": 1.0026190981807885, "grad_norm": 9.133303691880885, "learning_rate": 2.6105914321455405e-06, "loss": 0.8919, "step": 7082 }, { "epoch": 1.0027606710554258, "grad_norm": 7.839179785894451, "learning_rate": 2.6100187907060365e-06, "loss": 0.8468, "step": 7083 }, { "epoch": 1.002902243930063, "grad_norm": 10.692554015049263, "learning_rate": 2.609446143482926e-06, "loss": 0.9015, "step": 7084 }, { "epoch": 1.0030438168047002, "grad_norm": 9.221089728908087, "learning_rate": 2.6088734905063134e-06, "loss": 0.9756, "step": 7085 }, { "epoch": 1.0031853896793375, "grad_norm": 7.989462827116047, "learning_rate": 2.6083008318063023e-06, "loss": 0.8176, "step": 7086 }, { "epoch": 1.0033269625539747, "grad_norm": 9.173029084519968, "learning_rate": 2.6077281674129974e-06, "loss": 0.9392, "step": 7087 }, { "epoch": 1.003468535428612, "grad_norm": 8.084189405696032, "learning_rate": 2.607155497356504e-06, "loss": 1.0132, "step": 7088 }, { "epoch": 1.0036101083032491, "grad_norm": 7.673508977042507, "learning_rate": 2.6065828216669254e-06, "loss": 0.7512, "step": 7089 }, { "epoch": 1.0037516811778864, "grad_norm": 9.057584284391046, "learning_rate": 2.606010140374367e-06, "loss": 0.9648, "step": 7090 }, { "epoch": 1.0038932540525236, "grad_norm": 7.139189928620338, "learning_rate": 2.6054374535089345e-06, "loss": 0.8842, "step": 7091 }, { "epoch": 1.0040348269271608, "grad_norm": 8.041786733202061, "learning_rate": 2.604864761100734e-06, "loss": 0.8256, "step": 7092 }, { "epoch": 1.004176399801798, "grad_norm": 10.488128490507657, "learning_rate": 2.604292063179871e-06, "loss": 0.8701, "step": 7093 }, { "epoch": 1.0043179726764353, "grad_norm": 8.677306857365078, "learning_rate": 2.6037193597764524e-06, "loss": 0.9637, "step": 7094 }, { "epoch": 1.0044595455510723, "grad_norm": 7.247119199677616, "learning_rate": 2.6031466509205843e-06, "loss": 0.7998, "step": 7095 }, { "epoch": 1.0046011184257095, "grad_norm": 7.446655536576129, "learning_rate": 2.6025739366423735e-06, "loss": 0.9304, "step": 7096 }, { "epoch": 1.0047426913003468, "grad_norm": 8.564652889275171, "learning_rate": 2.602001216971927e-06, "loss": 0.9392, "step": 7097 }, { "epoch": 1.004884264174984, "grad_norm": 9.24142925025141, "learning_rate": 2.601428491939354e-06, "loss": 0.8654, "step": 7098 }, { "epoch": 1.0050258370496212, "grad_norm": 9.686408787458534, "learning_rate": 2.600855761574759e-06, "loss": 0.8917, "step": 7099 }, { "epoch": 1.0051674099242585, "grad_norm": 8.225257067156065, "learning_rate": 2.6002830259082527e-06, "loss": 0.8538, "step": 7100 }, { "epoch": 1.0053089827988957, "grad_norm": 9.859364166495633, "learning_rate": 2.5997102849699424e-06, "loss": 0.9203, "step": 7101 }, { "epoch": 1.005450555673533, "grad_norm": 9.898656010191761, "learning_rate": 2.5991375387899364e-06, "loss": 0.9922, "step": 7102 }, { "epoch": 1.0055921285481701, "grad_norm": 8.18105242835231, "learning_rate": 2.598564787398345e-06, "loss": 0.9161, "step": 7103 }, { "epoch": 1.0057337014228074, "grad_norm": 8.929085137984025, "learning_rate": 2.5979920308252753e-06, "loss": 0.8405, "step": 7104 }, { "epoch": 1.0058752742974446, "grad_norm": 9.497785209677051, "learning_rate": 2.597419269100838e-06, "loss": 0.9632, "step": 7105 }, { "epoch": 1.0060168471720818, "grad_norm": 9.986365369148718, "learning_rate": 2.596846502255142e-06, "loss": 0.8548, "step": 7106 }, { "epoch": 1.006158420046719, "grad_norm": 7.4952817698745955, "learning_rate": 2.596273730318298e-06, "loss": 0.6667, "step": 7107 }, { "epoch": 1.0062999929213563, "grad_norm": 8.978803044602857, "learning_rate": 2.595700953320415e-06, "loss": 0.8455, "step": 7108 }, { "epoch": 1.0064415657959935, "grad_norm": 7.813021222847286, "learning_rate": 2.595128171291605e-06, "loss": 0.9085, "step": 7109 }, { "epoch": 1.0065831386706308, "grad_norm": 9.009606215056829, "learning_rate": 2.5945553842619776e-06, "loss": 0.7858, "step": 7110 }, { "epoch": 1.006724711545268, "grad_norm": 9.572862383522871, "learning_rate": 2.5939825922616443e-06, "loss": 0.9732, "step": 7111 }, { "epoch": 1.0068662844199052, "grad_norm": 8.385066117883003, "learning_rate": 2.593409795320716e-06, "loss": 0.7779, "step": 7112 }, { "epoch": 1.0070078572945425, "grad_norm": 9.136132960726558, "learning_rate": 2.5928369934693043e-06, "loss": 0.8302, "step": 7113 }, { "epoch": 1.0071494301691797, "grad_norm": 8.742348077166897, "learning_rate": 2.592264186737522e-06, "loss": 0.9205, "step": 7114 }, { "epoch": 1.007291003043817, "grad_norm": 8.833991500038582, "learning_rate": 2.5916913751554795e-06, "loss": 0.8328, "step": 7115 }, { "epoch": 1.007432575918454, "grad_norm": 8.38913956605154, "learning_rate": 2.5911185587532895e-06, "loss": 0.9386, "step": 7116 }, { "epoch": 1.0075741487930912, "grad_norm": 8.678645389876387, "learning_rate": 2.5905457375610647e-06, "loss": 0.9546, "step": 7117 }, { "epoch": 1.0077157216677284, "grad_norm": 10.0648603810359, "learning_rate": 2.5899729116089183e-06, "loss": 0.8191, "step": 7118 }, { "epoch": 1.0078572945423656, "grad_norm": 7.433648658986642, "learning_rate": 2.589400080926964e-06, "loss": 0.9062, "step": 7119 }, { "epoch": 1.0079988674170028, "grad_norm": 10.825978956821187, "learning_rate": 2.5888272455453136e-06, "loss": 0.9311, "step": 7120 }, { "epoch": 1.00814044029164, "grad_norm": 8.102445784178187, "learning_rate": 2.5882544054940806e-06, "loss": 0.7978, "step": 7121 }, { "epoch": 1.0082820131662773, "grad_norm": 10.462759188377815, "learning_rate": 2.5876815608033797e-06, "loss": 0.9307, "step": 7122 }, { "epoch": 1.0084235860409145, "grad_norm": 10.89870911399889, "learning_rate": 2.587108711503324e-06, "loss": 0.8599, "step": 7123 }, { "epoch": 1.0085651589155518, "grad_norm": 8.989830205245292, "learning_rate": 2.586535857624028e-06, "loss": 0.8942, "step": 7124 }, { "epoch": 1.008706731790189, "grad_norm": 10.312859916186145, "learning_rate": 2.5859629991956075e-06, "loss": 0.7593, "step": 7125 }, { "epoch": 1.0088483046648262, "grad_norm": 7.893841682680903, "learning_rate": 2.585390136248176e-06, "loss": 0.7902, "step": 7126 }, { "epoch": 1.0089898775394635, "grad_norm": 7.956152439043685, "learning_rate": 2.5848172688118482e-06, "loss": 0.9003, "step": 7127 }, { "epoch": 1.0091314504141007, "grad_norm": 7.971105848148223, "learning_rate": 2.5842443969167402e-06, "loss": 0.8742, "step": 7128 }, { "epoch": 1.009273023288738, "grad_norm": 9.682196759105407, "learning_rate": 2.583671520592967e-06, "loss": 0.8074, "step": 7129 }, { "epoch": 1.0094145961633751, "grad_norm": 8.160679809077738, "learning_rate": 2.583098639870644e-06, "loss": 0.805, "step": 7130 }, { "epoch": 1.0095561690380124, "grad_norm": 10.167185368881366, "learning_rate": 2.582525754779888e-06, "loss": 0.8332, "step": 7131 }, { "epoch": 1.0096977419126496, "grad_norm": 9.876202473885181, "learning_rate": 2.581952865350815e-06, "loss": 0.9131, "step": 7132 }, { "epoch": 1.0098393147872868, "grad_norm": 9.48153015406796, "learning_rate": 2.58137997161354e-06, "loss": 1.0051, "step": 7133 }, { "epoch": 1.009980887661924, "grad_norm": 8.232181611633338, "learning_rate": 2.580807073598181e-06, "loss": 0.887, "step": 7134 }, { "epoch": 1.0101224605365613, "grad_norm": 9.140612062624392, "learning_rate": 2.580234171334855e-06, "loss": 0.8602, "step": 7135 }, { "epoch": 1.0102640334111985, "grad_norm": 8.98467985216493, "learning_rate": 2.5796612648536776e-06, "loss": 0.9659, "step": 7136 }, { "epoch": 1.0104056062858355, "grad_norm": 8.710464971068252, "learning_rate": 2.579088354184767e-06, "loss": 0.8528, "step": 7137 }, { "epoch": 1.0105471791604728, "grad_norm": 7.3408150509867065, "learning_rate": 2.5785154393582405e-06, "loss": 0.8295, "step": 7138 }, { "epoch": 1.01068875203511, "grad_norm": 8.917031491620886, "learning_rate": 2.577942520404216e-06, "loss": 0.9134, "step": 7139 }, { "epoch": 1.0108303249097472, "grad_norm": 8.608749339221514, "learning_rate": 2.577369597352812e-06, "loss": 0.9149, "step": 7140 }, { "epoch": 1.0109718977843845, "grad_norm": 9.463661563990648, "learning_rate": 2.5767966702341454e-06, "loss": 1.0393, "step": 7141 }, { "epoch": 1.0111134706590217, "grad_norm": 10.3684760114399, "learning_rate": 2.576223739078335e-06, "loss": 0.907, "step": 7142 }, { "epoch": 1.011255043533659, "grad_norm": 7.633338496221715, "learning_rate": 2.5756508039155e-06, "loss": 0.7841, "step": 7143 }, { "epoch": 1.0113966164082961, "grad_norm": 8.78169048467063, "learning_rate": 2.575077864775758e-06, "loss": 0.9038, "step": 7144 }, { "epoch": 1.0115381892829334, "grad_norm": 8.254047210255166, "learning_rate": 2.5745049216892286e-06, "loss": 0.8629, "step": 7145 }, { "epoch": 1.0116797621575706, "grad_norm": 9.064752548720303, "learning_rate": 2.5739319746860312e-06, "loss": 0.8056, "step": 7146 }, { "epoch": 1.0118213350322078, "grad_norm": 8.828287470850094, "learning_rate": 2.5733590237962854e-06, "loss": 0.8989, "step": 7147 }, { "epoch": 1.011962907906845, "grad_norm": 7.158136244145266, "learning_rate": 2.57278606905011e-06, "loss": 0.8417, "step": 7148 }, { "epoch": 1.0121044807814823, "grad_norm": 10.17395092104095, "learning_rate": 2.572213110477625e-06, "loss": 0.8041, "step": 7149 }, { "epoch": 1.0122460536561195, "grad_norm": 9.817584038849033, "learning_rate": 2.571640148108951e-06, "loss": 0.8735, "step": 7150 }, { "epoch": 1.0123876265307568, "grad_norm": 9.723427974470065, "learning_rate": 2.5710671819742083e-06, "loss": 0.9513, "step": 7151 }, { "epoch": 1.012529199405394, "grad_norm": 9.456985668906057, "learning_rate": 2.5704942121035163e-06, "loss": 0.9416, "step": 7152 }, { "epoch": 1.0126707722800312, "grad_norm": 8.289366232064841, "learning_rate": 2.5699212385269954e-06, "loss": 0.9181, "step": 7153 }, { "epoch": 1.0128123451546684, "grad_norm": 9.556262406614943, "learning_rate": 2.569348261274768e-06, "loss": 0.8066, "step": 7154 }, { "epoch": 1.0129539180293057, "grad_norm": 8.5437309437939, "learning_rate": 2.5687752803769538e-06, "loss": 0.9146, "step": 7155 }, { "epoch": 1.013095490903943, "grad_norm": 10.173815189147565, "learning_rate": 2.5682022958636752e-06, "loss": 0.9045, "step": 7156 }, { "epoch": 1.01323706377858, "grad_norm": 7.582920971512566, "learning_rate": 2.5676293077650528e-06, "loss": 0.752, "step": 7157 }, { "epoch": 1.0133786366532171, "grad_norm": 9.453089488569663, "learning_rate": 2.5670563161112073e-06, "loss": 0.9852, "step": 7158 }, { "epoch": 1.0135202095278544, "grad_norm": 7.802958528929858, "learning_rate": 2.5664833209322614e-06, "loss": 0.7804, "step": 7159 }, { "epoch": 1.0136617824024916, "grad_norm": 8.32526581944117, "learning_rate": 2.565910322258337e-06, "loss": 0.9441, "step": 7160 }, { "epoch": 1.0138033552771288, "grad_norm": 8.285945842119363, "learning_rate": 2.5653373201195554e-06, "loss": 0.9237, "step": 7161 }, { "epoch": 1.013944928151766, "grad_norm": 7.872651007164945, "learning_rate": 2.564764314546041e-06, "loss": 0.841, "step": 7162 }, { "epoch": 1.0140865010264033, "grad_norm": 10.38985289953259, "learning_rate": 2.564191305567914e-06, "loss": 0.9374, "step": 7163 }, { "epoch": 1.0142280739010405, "grad_norm": 10.308409093292134, "learning_rate": 2.563618293215298e-06, "loss": 0.8615, "step": 7164 }, { "epoch": 1.0143696467756778, "grad_norm": 8.82895028636925, "learning_rate": 2.563045277518316e-06, "loss": 0.9526, "step": 7165 }, { "epoch": 1.014511219650315, "grad_norm": 8.257890569196956, "learning_rate": 2.5624722585070907e-06, "loss": 0.7853, "step": 7166 }, { "epoch": 1.0146527925249522, "grad_norm": 9.96020715766169, "learning_rate": 2.5618992362117453e-06, "loss": 0.8883, "step": 7167 }, { "epoch": 1.0147943653995894, "grad_norm": 9.869280353812451, "learning_rate": 2.561326210662403e-06, "loss": 0.9166, "step": 7168 }, { "epoch": 1.0149359382742267, "grad_norm": 8.196326703062772, "learning_rate": 2.5607531818891877e-06, "loss": 0.9829, "step": 7169 }, { "epoch": 1.015077511148864, "grad_norm": 7.059857979877847, "learning_rate": 2.5601801499222227e-06, "loss": 0.8357, "step": 7170 }, { "epoch": 1.0152190840235011, "grad_norm": 9.429665250206765, "learning_rate": 2.5596071147916325e-06, "loss": 0.9555, "step": 7171 }, { "epoch": 1.0153606568981384, "grad_norm": 11.26301500029955, "learning_rate": 2.5590340765275414e-06, "loss": 0.8151, "step": 7172 }, { "epoch": 1.0155022297727756, "grad_norm": 7.914313866858851, "learning_rate": 2.558461035160072e-06, "loss": 0.8678, "step": 7173 }, { "epoch": 1.0156438026474128, "grad_norm": 8.241599227377357, "learning_rate": 2.5578879907193495e-06, "loss": 0.8785, "step": 7174 }, { "epoch": 1.01578537552205, "grad_norm": 8.723663804885197, "learning_rate": 2.557314943235498e-06, "loss": 0.8856, "step": 7175 }, { "epoch": 1.0159269483966873, "grad_norm": 7.369563798560583, "learning_rate": 2.556741892738643e-06, "loss": 0.8482, "step": 7176 }, { "epoch": 1.0160685212713245, "grad_norm": 9.103832200902403, "learning_rate": 2.5561688392589095e-06, "loss": 0.9403, "step": 7177 }, { "epoch": 1.0162100941459615, "grad_norm": 7.615767345577391, "learning_rate": 2.555595782826423e-06, "loss": 0.8544, "step": 7178 }, { "epoch": 1.0163516670205988, "grad_norm": 8.370091608382483, "learning_rate": 2.555022723471306e-06, "loss": 0.9075, "step": 7179 }, { "epoch": 1.016493239895236, "grad_norm": 8.205600909836154, "learning_rate": 2.554449661223686e-06, "loss": 0.8109, "step": 7180 }, { "epoch": 1.0166348127698732, "grad_norm": 11.341625844941307, "learning_rate": 2.553876596113688e-06, "loss": 0.8745, "step": 7181 }, { "epoch": 1.0167763856445104, "grad_norm": 8.95050042314434, "learning_rate": 2.5533035281714368e-06, "loss": 0.9547, "step": 7182 }, { "epoch": 1.0169179585191477, "grad_norm": 9.576399531061536, "learning_rate": 2.5527304574270596e-06, "loss": 0.8738, "step": 7183 }, { "epoch": 1.017059531393785, "grad_norm": 8.720769395394896, "learning_rate": 2.5521573839106815e-06, "loss": 0.9074, "step": 7184 }, { "epoch": 1.0172011042684221, "grad_norm": 9.5193950031284, "learning_rate": 2.551584307652428e-06, "loss": 0.8595, "step": 7185 }, { "epoch": 1.0173426771430594, "grad_norm": 7.54322767875109, "learning_rate": 2.551011228682427e-06, "loss": 0.7828, "step": 7186 }, { "epoch": 1.0174842500176966, "grad_norm": 10.184844062821206, "learning_rate": 2.5504381470308034e-06, "loss": 0.8271, "step": 7187 }, { "epoch": 1.0176258228923338, "grad_norm": 7.759402816052765, "learning_rate": 2.549865062727684e-06, "loss": 0.8594, "step": 7188 }, { "epoch": 1.017767395766971, "grad_norm": 11.181216046797246, "learning_rate": 2.5492919758031953e-06, "loss": 0.9333, "step": 7189 }, { "epoch": 1.0179089686416083, "grad_norm": 8.303398943866066, "learning_rate": 2.5487188862874635e-06, "loss": 0.8688, "step": 7190 }, { "epoch": 1.0180505415162455, "grad_norm": 8.921246454709346, "learning_rate": 2.5481457942106165e-06, "loss": 0.8794, "step": 7191 }, { "epoch": 1.0181921143908828, "grad_norm": 8.654884203087159, "learning_rate": 2.547572699602781e-06, "loss": 0.8933, "step": 7192 }, { "epoch": 1.01833368726552, "grad_norm": 10.428802704159532, "learning_rate": 2.5469996024940853e-06, "loss": 0.9102, "step": 7193 }, { "epoch": 1.0184752601401572, "grad_norm": 7.725047149020506, "learning_rate": 2.5464265029146546e-06, "loss": 0.8798, "step": 7194 }, { "epoch": 1.0186168330147944, "grad_norm": 10.19347894192425, "learning_rate": 2.545853400894617e-06, "loss": 0.9125, "step": 7195 }, { "epoch": 1.0187584058894317, "grad_norm": 9.812297843259945, "learning_rate": 2.545280296464101e-06, "loss": 0.884, "step": 7196 }, { "epoch": 1.018899978764069, "grad_norm": 9.133540924757426, "learning_rate": 2.544707189653233e-06, "loss": 0.8561, "step": 7197 }, { "epoch": 1.019041551638706, "grad_norm": 8.10166938439016, "learning_rate": 2.5441340804921413e-06, "loss": 0.7912, "step": 7198 }, { "epoch": 1.0191831245133431, "grad_norm": 9.082957782039532, "learning_rate": 2.5435609690109545e-06, "loss": 0.8995, "step": 7199 }, { "epoch": 1.0193246973879804, "grad_norm": 8.357997203190196, "learning_rate": 2.5429878552398e-06, "loss": 0.9858, "step": 7200 }, { "epoch": 1.0194662702626176, "grad_norm": 7.045095688526739, "learning_rate": 2.5424147392088057e-06, "loss": 0.9253, "step": 7201 }, { "epoch": 1.0196078431372548, "grad_norm": 10.012474290526644, "learning_rate": 2.5418416209481002e-06, "loss": 1.0321, "step": 7202 }, { "epoch": 1.019749416011892, "grad_norm": 9.674394171620806, "learning_rate": 2.541268500487812e-06, "loss": 0.8671, "step": 7203 }, { "epoch": 1.0198909888865293, "grad_norm": 9.793680403315454, "learning_rate": 2.540695377858069e-06, "loss": 0.8603, "step": 7204 }, { "epoch": 1.0200325617611665, "grad_norm": 10.515886912832249, "learning_rate": 2.540122253089001e-06, "loss": 1.0195, "step": 7205 }, { "epoch": 1.0201741346358038, "grad_norm": 8.82067491920394, "learning_rate": 2.539549126210735e-06, "loss": 0.9284, "step": 7206 }, { "epoch": 1.020315707510441, "grad_norm": 9.953155661255757, "learning_rate": 2.5389759972534024e-06, "loss": 0.9439, "step": 7207 }, { "epoch": 1.0204572803850782, "grad_norm": 9.095357746220776, "learning_rate": 2.53840286624713e-06, "loss": 0.796, "step": 7208 }, { "epoch": 1.0205988532597154, "grad_norm": 8.671479967072283, "learning_rate": 2.5378297332220474e-06, "loss": 0.9742, "step": 7209 }, { "epoch": 1.0207404261343527, "grad_norm": 7.775219399646963, "learning_rate": 2.5372565982082843e-06, "loss": 0.8223, "step": 7210 }, { "epoch": 1.02088199900899, "grad_norm": 8.091445418010105, "learning_rate": 2.5366834612359697e-06, "loss": 0.851, "step": 7211 }, { "epoch": 1.0210235718836271, "grad_norm": 8.242956680637548, "learning_rate": 2.5361103223352325e-06, "loss": 0.7516, "step": 7212 }, { "epoch": 1.0211651447582644, "grad_norm": 11.660938128098604, "learning_rate": 2.5355371815362017e-06, "loss": 0.8969, "step": 7213 }, { "epoch": 1.0213067176329016, "grad_norm": 8.82707016305944, "learning_rate": 2.534964038869009e-06, "loss": 1.0166, "step": 7214 }, { "epoch": 1.0214482905075388, "grad_norm": 7.876473667375606, "learning_rate": 2.534390894363783e-06, "loss": 0.8604, "step": 7215 }, { "epoch": 1.021589863382176, "grad_norm": 9.3379476131, "learning_rate": 2.533817748050653e-06, "loss": 0.9005, "step": 7216 }, { "epoch": 1.0217314362568133, "grad_norm": 8.192748273172937, "learning_rate": 2.533244599959749e-06, "loss": 0.8639, "step": 7217 }, { "epoch": 1.0218730091314505, "grad_norm": 12.75549915875747, "learning_rate": 2.5326714501212014e-06, "loss": 0.9105, "step": 7218 }, { "epoch": 1.0220145820060877, "grad_norm": 10.61892670231234, "learning_rate": 2.53209829856514e-06, "loss": 0.8501, "step": 7219 }, { "epoch": 1.0221561548807248, "grad_norm": 9.784676728519988, "learning_rate": 2.531525145321695e-06, "loss": 0.9411, "step": 7220 }, { "epoch": 1.022297727755362, "grad_norm": 8.311474729710557, "learning_rate": 2.5309519904209962e-06, "loss": 0.9205, "step": 7221 }, { "epoch": 1.0224393006299992, "grad_norm": 9.402796520049817, "learning_rate": 2.5303788338931744e-06, "loss": 0.9503, "step": 7222 }, { "epoch": 1.0225808735046364, "grad_norm": 7.801011083557134, "learning_rate": 2.5298056757683604e-06, "loss": 0.8139, "step": 7223 }, { "epoch": 1.0227224463792737, "grad_norm": 9.267172878718865, "learning_rate": 2.529232516076684e-06, "loss": 0.8483, "step": 7224 }, { "epoch": 1.022864019253911, "grad_norm": 10.030700478401414, "learning_rate": 2.528659354848277e-06, "loss": 0.8662, "step": 7225 }, { "epoch": 1.0230055921285481, "grad_norm": 9.738029761343538, "learning_rate": 2.5280861921132677e-06, "loss": 0.8298, "step": 7226 }, { "epoch": 1.0231471650031854, "grad_norm": 8.772771359951447, "learning_rate": 2.5275130279017884e-06, "loss": 0.9338, "step": 7227 }, { "epoch": 1.0232887378778226, "grad_norm": 8.906111224247757, "learning_rate": 2.52693986224397e-06, "loss": 0.8549, "step": 7228 }, { "epoch": 1.0234303107524598, "grad_norm": 10.553438712786974, "learning_rate": 2.526366695169943e-06, "loss": 1.0038, "step": 7229 }, { "epoch": 1.023571883627097, "grad_norm": 9.474801222998403, "learning_rate": 2.5257935267098395e-06, "loss": 0.9253, "step": 7230 }, { "epoch": 1.0237134565017343, "grad_norm": 10.230115257352233, "learning_rate": 2.5252203568937884e-06, "loss": 0.8231, "step": 7231 }, { "epoch": 1.0238550293763715, "grad_norm": 10.744602356409626, "learning_rate": 2.524647185751922e-06, "loss": 0.9412, "step": 7232 }, { "epoch": 1.0239966022510087, "grad_norm": 7.269954974469274, "learning_rate": 2.5240740133143714e-06, "loss": 0.8131, "step": 7233 }, { "epoch": 1.024138175125646, "grad_norm": 9.955791409665705, "learning_rate": 2.5235008396112688e-06, "loss": 0.8906, "step": 7234 }, { "epoch": 1.0242797480002832, "grad_norm": 8.58606412341229, "learning_rate": 2.5229276646727428e-06, "loss": 0.9399, "step": 7235 }, { "epoch": 1.0244213208749204, "grad_norm": 7.764453085695779, "learning_rate": 2.5223544885289287e-06, "loss": 0.9522, "step": 7236 }, { "epoch": 1.0245628937495577, "grad_norm": 10.476206751393809, "learning_rate": 2.5217813112099543e-06, "loss": 0.9298, "step": 7237 }, { "epoch": 1.024704466624195, "grad_norm": 8.959886803593175, "learning_rate": 2.521208132745953e-06, "loss": 0.9081, "step": 7238 }, { "epoch": 1.0248460394988321, "grad_norm": 8.802397869441776, "learning_rate": 2.520634953167056e-06, "loss": 0.7769, "step": 7239 }, { "epoch": 1.0249876123734691, "grad_norm": 8.133701301547344, "learning_rate": 2.5200617725033947e-06, "loss": 0.7881, "step": 7240 }, { "epoch": 1.0251291852481064, "grad_norm": 9.080106062752783, "learning_rate": 2.519488590785102e-06, "loss": 0.9467, "step": 7241 }, { "epoch": 1.0252707581227436, "grad_norm": 9.87434597505367, "learning_rate": 2.5189154080423073e-06, "loss": 0.9483, "step": 7242 }, { "epoch": 1.0254123309973808, "grad_norm": 8.214654750708629, "learning_rate": 2.518342224305144e-06, "loss": 0.8498, "step": 7243 }, { "epoch": 1.025553903872018, "grad_norm": 8.68820209547726, "learning_rate": 2.517769039603744e-06, "loss": 0.8074, "step": 7244 }, { "epoch": 1.0256954767466553, "grad_norm": 7.108245424514879, "learning_rate": 2.517195853968239e-06, "loss": 0.7873, "step": 7245 }, { "epoch": 1.0258370496212925, "grad_norm": 8.371342102154744, "learning_rate": 2.516622667428761e-06, "loss": 0.8132, "step": 7246 }, { "epoch": 1.0259786224959297, "grad_norm": 7.415698241825806, "learning_rate": 2.516049480015441e-06, "loss": 0.8019, "step": 7247 }, { "epoch": 1.026120195370567, "grad_norm": 8.131066038614888, "learning_rate": 2.5154762917584125e-06, "loss": 0.8819, "step": 7248 }, { "epoch": 1.0262617682452042, "grad_norm": 8.875061464768454, "learning_rate": 2.5149031026878063e-06, "loss": 0.9687, "step": 7249 }, { "epoch": 1.0264033411198414, "grad_norm": 9.42852275389951, "learning_rate": 2.5143299128337543e-06, "loss": 0.9083, "step": 7250 }, { "epoch": 1.0265449139944787, "grad_norm": 8.7012488159477, "learning_rate": 2.513756722226391e-06, "loss": 0.8086, "step": 7251 }, { "epoch": 1.026686486869116, "grad_norm": 9.026112292953151, "learning_rate": 2.5131835308958467e-06, "loss": 0.832, "step": 7252 }, { "epoch": 1.0268280597437531, "grad_norm": 7.0330840706911895, "learning_rate": 2.512610338872254e-06, "loss": 0.7147, "step": 7253 }, { "epoch": 1.0269696326183904, "grad_norm": 8.614860877686565, "learning_rate": 2.512037146185745e-06, "loss": 0.8314, "step": 7254 }, { "epoch": 1.0271112054930276, "grad_norm": 9.751326641891115, "learning_rate": 2.511463952866452e-06, "loss": 0.8772, "step": 7255 }, { "epoch": 1.0272527783676648, "grad_norm": 7.778941597225606, "learning_rate": 2.510890758944508e-06, "loss": 0.8455, "step": 7256 }, { "epoch": 1.027394351242302, "grad_norm": 8.300673712538714, "learning_rate": 2.5103175644500444e-06, "loss": 0.9191, "step": 7257 }, { "epoch": 1.0275359241169393, "grad_norm": 9.499242652017795, "learning_rate": 2.5097443694131947e-06, "loss": 0.9692, "step": 7258 }, { "epoch": 1.0276774969915765, "grad_norm": 7.683379767133478, "learning_rate": 2.50917117386409e-06, "loss": 0.8511, "step": 7259 }, { "epoch": 1.0278190698662137, "grad_norm": 9.658035063944773, "learning_rate": 2.508597977832864e-06, "loss": 0.8959, "step": 7260 }, { "epoch": 1.0279606427408507, "grad_norm": 7.892395183554764, "learning_rate": 2.508024781349649e-06, "loss": 0.8442, "step": 7261 }, { "epoch": 1.028102215615488, "grad_norm": 7.669649069266672, "learning_rate": 2.5074515844445774e-06, "loss": 0.8964, "step": 7262 }, { "epoch": 1.0282437884901252, "grad_norm": 7.797207200306512, "learning_rate": 2.5068783871477807e-06, "loss": 0.9005, "step": 7263 }, { "epoch": 1.0283853613647624, "grad_norm": 8.076720006716497, "learning_rate": 2.5063051894893925e-06, "loss": 0.8298, "step": 7264 }, { "epoch": 1.0285269342393997, "grad_norm": 9.446438024208641, "learning_rate": 2.5057319914995454e-06, "loss": 0.9158, "step": 7265 }, { "epoch": 1.028668507114037, "grad_norm": 9.013402919538292, "learning_rate": 2.5051587932083715e-06, "loss": 0.8761, "step": 7266 }, { "epoch": 1.0288100799886741, "grad_norm": 9.535645918909758, "learning_rate": 2.504585594646004e-06, "loss": 0.9034, "step": 7267 }, { "epoch": 1.0289516528633114, "grad_norm": 7.1867452473965265, "learning_rate": 2.504012395842576e-06, "loss": 0.8511, "step": 7268 }, { "epoch": 1.0290932257379486, "grad_norm": 8.960176310848151, "learning_rate": 2.5034391968282186e-06, "loss": 0.9298, "step": 7269 }, { "epoch": 1.0292347986125858, "grad_norm": 9.139582439208366, "learning_rate": 2.502865997633065e-06, "loss": 0.9206, "step": 7270 }, { "epoch": 1.029376371487223, "grad_norm": 8.528935268625636, "learning_rate": 2.502292798287248e-06, "loss": 0.9048, "step": 7271 }, { "epoch": 1.0295179443618603, "grad_norm": 8.917612851527394, "learning_rate": 2.5017195988208997e-06, "loss": 0.8111, "step": 7272 }, { "epoch": 1.0296595172364975, "grad_norm": 7.757587287453551, "learning_rate": 2.5011463992641548e-06, "loss": 0.8376, "step": 7273 }, { "epoch": 1.0298010901111347, "grad_norm": 7.241992869637262, "learning_rate": 2.500573199647144e-06, "loss": 0.9062, "step": 7274 }, { "epoch": 1.029942662985772, "grad_norm": 10.008547753655723, "learning_rate": 2.5e-06, "loss": 0.9474, "step": 7275 }, { "epoch": 1.0300842358604092, "grad_norm": 8.485778580092198, "learning_rate": 2.499426800352857e-06, "loss": 0.845, "step": 7276 }, { "epoch": 1.0302258087350464, "grad_norm": 8.482483261276265, "learning_rate": 2.4988536007358456e-06, "loss": 0.7964, "step": 7277 }, { "epoch": 1.0303673816096837, "grad_norm": 9.220868814373976, "learning_rate": 2.4982804011791003e-06, "loss": 1.0507, "step": 7278 }, { "epoch": 1.030508954484321, "grad_norm": 7.849484478976525, "learning_rate": 2.497707201712753e-06, "loss": 0.8474, "step": 7279 }, { "epoch": 1.0306505273589581, "grad_norm": 7.30129511592793, "learning_rate": 2.4971340023669356e-06, "loss": 0.8217, "step": 7280 }, { "epoch": 1.0307921002335951, "grad_norm": 9.153123549659538, "learning_rate": 2.4965608031717827e-06, "loss": 0.9749, "step": 7281 }, { "epoch": 1.0309336731082324, "grad_norm": 8.864792662560998, "learning_rate": 2.4959876041574256e-06, "loss": 0.7941, "step": 7282 }, { "epoch": 1.0310752459828696, "grad_norm": 10.170288524754733, "learning_rate": 2.4954144053539966e-06, "loss": 0.9185, "step": 7283 }, { "epoch": 1.0312168188575068, "grad_norm": 7.9820677527274135, "learning_rate": 2.494841206791629e-06, "loss": 0.8381, "step": 7284 }, { "epoch": 1.031358391732144, "grad_norm": 7.768601029579721, "learning_rate": 2.4942680085004554e-06, "loss": 0.8649, "step": 7285 }, { "epoch": 1.0314999646067813, "grad_norm": 7.314949081984199, "learning_rate": 2.4936948105106084e-06, "loss": 0.7646, "step": 7286 }, { "epoch": 1.0316415374814185, "grad_norm": 8.572729974678428, "learning_rate": 2.4931216128522197e-06, "loss": 0.8249, "step": 7287 }, { "epoch": 1.0317831103560557, "grad_norm": 7.808862678663809, "learning_rate": 2.4925484155554235e-06, "loss": 0.8135, "step": 7288 }, { "epoch": 1.031924683230693, "grad_norm": 9.85426761717968, "learning_rate": 2.491975218650351e-06, "loss": 0.8166, "step": 7289 }, { "epoch": 1.0320662561053302, "grad_norm": 9.468397480159204, "learning_rate": 2.491402022167136e-06, "loss": 0.8204, "step": 7290 }, { "epoch": 1.0322078289799674, "grad_norm": 9.037142510948003, "learning_rate": 2.4908288261359108e-06, "loss": 0.9574, "step": 7291 }, { "epoch": 1.0323494018546047, "grad_norm": 8.636561509197305, "learning_rate": 2.4902556305868065e-06, "loss": 0.8625, "step": 7292 }, { "epoch": 1.032490974729242, "grad_norm": 9.30409793603444, "learning_rate": 2.4896824355499565e-06, "loss": 0.896, "step": 7293 }, { "epoch": 1.0326325476038791, "grad_norm": 8.965402011484988, "learning_rate": 2.489109241055493e-06, "loss": 0.9157, "step": 7294 }, { "epoch": 1.0327741204785164, "grad_norm": 9.355416150034804, "learning_rate": 2.4885360471335483e-06, "loss": 0.865, "step": 7295 }, { "epoch": 1.0329156933531536, "grad_norm": 9.95200004987318, "learning_rate": 2.4879628538142557e-06, "loss": 0.8927, "step": 7296 }, { "epoch": 1.0330572662277908, "grad_norm": 11.185940937543123, "learning_rate": 2.4873896611277467e-06, "loss": 0.989, "step": 7297 }, { "epoch": 1.033198839102428, "grad_norm": 9.416400691864492, "learning_rate": 2.4868164691041537e-06, "loss": 0.9111, "step": 7298 }, { "epoch": 1.0333404119770653, "grad_norm": 9.238872536574489, "learning_rate": 2.4862432777736094e-06, "loss": 0.8829, "step": 7299 }, { "epoch": 1.0334819848517025, "grad_norm": 8.476061153088647, "learning_rate": 2.4856700871662452e-06, "loss": 0.8312, "step": 7300 }, { "epoch": 1.0336235577263397, "grad_norm": 10.098866208334858, "learning_rate": 2.4850968973121945e-06, "loss": 0.9646, "step": 7301 }, { "epoch": 1.033765130600977, "grad_norm": 9.059317615402293, "learning_rate": 2.4845237082415887e-06, "loss": 0.872, "step": 7302 }, { "epoch": 1.033906703475614, "grad_norm": 7.901005750929479, "learning_rate": 2.48395051998456e-06, "loss": 0.8839, "step": 7303 }, { "epoch": 1.0340482763502512, "grad_norm": 9.714828435980088, "learning_rate": 2.48337733257124e-06, "loss": 0.8649, "step": 7304 }, { "epoch": 1.0341898492248884, "grad_norm": 9.064934344382685, "learning_rate": 2.482804146031762e-06, "loss": 0.8329, "step": 7305 }, { "epoch": 1.0343314220995257, "grad_norm": 8.584545852299607, "learning_rate": 2.482230960396256e-06, "loss": 0.8196, "step": 7306 }, { "epoch": 1.034472994974163, "grad_norm": 8.20977362633133, "learning_rate": 2.4816577756948564e-06, "loss": 0.9231, "step": 7307 }, { "epoch": 1.0346145678488001, "grad_norm": 8.136421040671028, "learning_rate": 2.481084591957693e-06, "loss": 0.9323, "step": 7308 }, { "epoch": 1.0347561407234374, "grad_norm": 10.463539398228558, "learning_rate": 2.480511409214899e-06, "loss": 0.9352, "step": 7309 }, { "epoch": 1.0348977135980746, "grad_norm": 11.2334248826761, "learning_rate": 2.479938227496605e-06, "loss": 0.9128, "step": 7310 }, { "epoch": 1.0350392864727118, "grad_norm": 8.151042810669516, "learning_rate": 2.479365046832944e-06, "loss": 0.8567, "step": 7311 }, { "epoch": 1.035180859347349, "grad_norm": 8.60069632261753, "learning_rate": 2.478791867254047e-06, "loss": 0.9005, "step": 7312 }, { "epoch": 1.0353224322219863, "grad_norm": 8.007994948378883, "learning_rate": 2.478218688790047e-06, "loss": 0.7745, "step": 7313 }, { "epoch": 1.0354640050966235, "grad_norm": 9.24289368537793, "learning_rate": 2.477645511471073e-06, "loss": 0.8237, "step": 7314 }, { "epoch": 1.0356055779712607, "grad_norm": 8.920599477150528, "learning_rate": 2.4770723353272576e-06, "loss": 0.9275, "step": 7315 }, { "epoch": 1.035747150845898, "grad_norm": 10.901513412822158, "learning_rate": 2.4764991603887325e-06, "loss": 0.9034, "step": 7316 }, { "epoch": 1.0358887237205352, "grad_norm": 9.683620143545111, "learning_rate": 2.475925986685629e-06, "loss": 0.9448, "step": 7317 }, { "epoch": 1.0360302965951724, "grad_norm": 8.886851819058663, "learning_rate": 2.4753528142480784e-06, "loss": 0.861, "step": 7318 }, { "epoch": 1.0361718694698097, "grad_norm": 8.261196976726874, "learning_rate": 2.4747796431062124e-06, "loss": 0.8997, "step": 7319 }, { "epoch": 1.036313442344447, "grad_norm": 8.959564502895383, "learning_rate": 2.4742064732901618e-06, "loss": 0.9454, "step": 7320 }, { "epoch": 1.0364550152190841, "grad_norm": 7.801309368076958, "learning_rate": 2.473633304830057e-06, "loss": 0.9057, "step": 7321 }, { "epoch": 1.0365965880937213, "grad_norm": 9.110533233345809, "learning_rate": 2.4730601377560305e-06, "loss": 0.8823, "step": 7322 }, { "epoch": 1.0367381609683584, "grad_norm": 7.939222013856441, "learning_rate": 2.4724869720982124e-06, "loss": 0.8316, "step": 7323 }, { "epoch": 1.0368797338429956, "grad_norm": 9.497943856435455, "learning_rate": 2.471913807886733e-06, "loss": 0.8908, "step": 7324 }, { "epoch": 1.0370213067176328, "grad_norm": 9.060817799153293, "learning_rate": 2.4713406451517247e-06, "loss": 0.8655, "step": 7325 }, { "epoch": 1.03716287959227, "grad_norm": 10.08932010533853, "learning_rate": 2.4707674839233168e-06, "loss": 0.9485, "step": 7326 }, { "epoch": 1.0373044524669073, "grad_norm": 9.25228482457699, "learning_rate": 2.4701943242316405e-06, "loss": 0.8228, "step": 7327 }, { "epoch": 1.0374460253415445, "grad_norm": 8.708470783031098, "learning_rate": 2.469621166106826e-06, "loss": 0.8856, "step": 7328 }, { "epoch": 1.0375875982161817, "grad_norm": 7.897280947022283, "learning_rate": 2.4690480095790046e-06, "loss": 0.7686, "step": 7329 }, { "epoch": 1.037729171090819, "grad_norm": 8.64630635511059, "learning_rate": 2.468474854678306e-06, "loss": 0.8197, "step": 7330 }, { "epoch": 1.0378707439654562, "grad_norm": 10.119870240355882, "learning_rate": 2.4679017014348606e-06, "loss": 0.8695, "step": 7331 }, { "epoch": 1.0380123168400934, "grad_norm": 8.570753588666326, "learning_rate": 2.467328549878799e-06, "loss": 0.8396, "step": 7332 }, { "epoch": 1.0381538897147307, "grad_norm": 7.387916245973524, "learning_rate": 2.4667554000402513e-06, "loss": 0.799, "step": 7333 }, { "epoch": 1.038295462589368, "grad_norm": 8.666909385607989, "learning_rate": 2.4661822519493485e-06, "loss": 0.8311, "step": 7334 }, { "epoch": 1.0384370354640051, "grad_norm": 9.493181693048257, "learning_rate": 2.465609105636218e-06, "loss": 0.8815, "step": 7335 }, { "epoch": 1.0385786083386424, "grad_norm": 9.417162271696546, "learning_rate": 2.465035961130992e-06, "loss": 0.8824, "step": 7336 }, { "epoch": 1.0387201812132796, "grad_norm": 8.139865363879789, "learning_rate": 2.4644628184637987e-06, "loss": 0.8075, "step": 7337 }, { "epoch": 1.0388617540879168, "grad_norm": 8.514143900728051, "learning_rate": 2.4638896776647684e-06, "loss": 0.9046, "step": 7338 }, { "epoch": 1.039003326962554, "grad_norm": 9.551484988132245, "learning_rate": 2.463316538764031e-06, "loss": 0.9894, "step": 7339 }, { "epoch": 1.0391448998371913, "grad_norm": 9.089202897612356, "learning_rate": 2.462743401791716e-06, "loss": 0.8807, "step": 7340 }, { "epoch": 1.0392864727118285, "grad_norm": 9.366838081212274, "learning_rate": 2.462170266777953e-06, "loss": 0.8515, "step": 7341 }, { "epoch": 1.0394280455864657, "grad_norm": 8.240662347783447, "learning_rate": 2.4615971337528704e-06, "loss": 0.7387, "step": 7342 }, { "epoch": 1.039569618461103, "grad_norm": 10.164904665821238, "learning_rate": 2.461024002746598e-06, "loss": 0.8456, "step": 7343 }, { "epoch": 1.03971119133574, "grad_norm": 8.19739383127345, "learning_rate": 2.4604508737892653e-06, "loss": 0.7623, "step": 7344 }, { "epoch": 1.0398527642103772, "grad_norm": 9.871209298270092, "learning_rate": 2.459877746911e-06, "loss": 1.099, "step": 7345 }, { "epoch": 1.0399943370850144, "grad_norm": 9.327431811162949, "learning_rate": 2.4593046221419317e-06, "loss": 0.9978, "step": 7346 }, { "epoch": 1.0401359099596517, "grad_norm": 9.861553066420925, "learning_rate": 2.4587314995121893e-06, "loss": 0.8851, "step": 7347 }, { "epoch": 1.040277482834289, "grad_norm": 8.81968623219528, "learning_rate": 2.458158379051901e-06, "loss": 0.8816, "step": 7348 }, { "epoch": 1.0404190557089261, "grad_norm": 8.901292994153907, "learning_rate": 2.4575852607911956e-06, "loss": 0.9732, "step": 7349 }, { "epoch": 1.0405606285835634, "grad_norm": 8.508644196463484, "learning_rate": 2.457012144760201e-06, "loss": 0.7609, "step": 7350 }, { "epoch": 1.0407022014582006, "grad_norm": 9.3735163213993, "learning_rate": 2.4564390309890463e-06, "loss": 0.9524, "step": 7351 }, { "epoch": 1.0408437743328378, "grad_norm": 8.261318880689542, "learning_rate": 2.455865919507859e-06, "loss": 0.8668, "step": 7352 }, { "epoch": 1.040985347207475, "grad_norm": 8.585618489580865, "learning_rate": 2.4552928103467677e-06, "loss": 0.9947, "step": 7353 }, { "epoch": 1.0411269200821123, "grad_norm": 8.137759474969812, "learning_rate": 2.4547197035359e-06, "loss": 0.8578, "step": 7354 }, { "epoch": 1.0412684929567495, "grad_norm": 9.081507461374246, "learning_rate": 2.454146599105384e-06, "loss": 0.8573, "step": 7355 }, { "epoch": 1.0414100658313867, "grad_norm": 10.64061460340443, "learning_rate": 2.4535734970853466e-06, "loss": 0.9866, "step": 7356 }, { "epoch": 1.041551638706024, "grad_norm": 7.715383683637658, "learning_rate": 2.453000397505916e-06, "loss": 0.8262, "step": 7357 }, { "epoch": 1.0416932115806612, "grad_norm": 8.990561728574722, "learning_rate": 2.4524273003972194e-06, "loss": 0.9018, "step": 7358 }, { "epoch": 1.0418347844552984, "grad_norm": 7.994214349999034, "learning_rate": 2.451854205789384e-06, "loss": 0.8197, "step": 7359 }, { "epoch": 1.0419763573299357, "grad_norm": 8.301708587906205, "learning_rate": 2.4512811137125374e-06, "loss": 0.8421, "step": 7360 }, { "epoch": 1.0421179302045729, "grad_norm": 8.645663511760896, "learning_rate": 2.4507080241968055e-06, "loss": 0.8601, "step": 7361 }, { "epoch": 1.0422595030792101, "grad_norm": 7.7478967089052775, "learning_rate": 2.450134937272317e-06, "loss": 0.9709, "step": 7362 }, { "epoch": 1.0424010759538473, "grad_norm": 8.386155172173797, "learning_rate": 2.449561852969197e-06, "loss": 0.811, "step": 7363 }, { "epoch": 1.0425426488284844, "grad_norm": 9.59650314539775, "learning_rate": 2.448988771317573e-06, "loss": 0.9109, "step": 7364 }, { "epoch": 1.0426842217031216, "grad_norm": 8.23879799232864, "learning_rate": 2.448415692347572e-06, "loss": 0.9003, "step": 7365 }, { "epoch": 1.0428257945777588, "grad_norm": 9.09613991496058, "learning_rate": 2.4478426160893197e-06, "loss": 0.9505, "step": 7366 }, { "epoch": 1.042967367452396, "grad_norm": 8.426824300877024, "learning_rate": 2.4472695425729412e-06, "loss": 0.9749, "step": 7367 }, { "epoch": 1.0431089403270333, "grad_norm": 7.983492509480158, "learning_rate": 2.4466964718285636e-06, "loss": 0.9365, "step": 7368 }, { "epoch": 1.0432505132016705, "grad_norm": 8.691910097755322, "learning_rate": 2.446123403886313e-06, "loss": 0.9111, "step": 7369 }, { "epoch": 1.0433920860763077, "grad_norm": 8.247281175608771, "learning_rate": 2.445550338776315e-06, "loss": 0.7282, "step": 7370 }, { "epoch": 1.043533658950945, "grad_norm": 8.20736079255684, "learning_rate": 2.4449772765286947e-06, "loss": 0.7587, "step": 7371 }, { "epoch": 1.0436752318255822, "grad_norm": 9.713746184104217, "learning_rate": 2.4444042171735784e-06, "loss": 1.0258, "step": 7372 }, { "epoch": 1.0438168047002194, "grad_norm": 7.840190672599413, "learning_rate": 2.4438311607410905e-06, "loss": 0.8484, "step": 7373 }, { "epoch": 1.0439583775748567, "grad_norm": 9.34127642190454, "learning_rate": 2.443258107261357e-06, "loss": 0.8777, "step": 7374 }, { "epoch": 1.0440999504494939, "grad_norm": 11.466180581132527, "learning_rate": 2.4426850567645022e-06, "loss": 0.8172, "step": 7375 }, { "epoch": 1.0442415233241311, "grad_norm": 9.24765727301042, "learning_rate": 2.442112009280652e-06, "loss": 0.9648, "step": 7376 }, { "epoch": 1.0443830961987683, "grad_norm": 8.496312856370128, "learning_rate": 2.4415389648399294e-06, "loss": 0.9323, "step": 7377 }, { "epoch": 1.0445246690734056, "grad_norm": 7.085135855115921, "learning_rate": 2.44096592347246e-06, "loss": 0.849, "step": 7378 }, { "epoch": 1.0446662419480428, "grad_norm": 8.384145269897122, "learning_rate": 2.440392885208368e-06, "loss": 0.9632, "step": 7379 }, { "epoch": 1.04480781482268, "grad_norm": 8.911563827333158, "learning_rate": 2.4398198500777777e-06, "loss": 0.8742, "step": 7380 }, { "epoch": 1.0449493876973173, "grad_norm": 7.591324281421729, "learning_rate": 2.4392468181108127e-06, "loss": 0.7853, "step": 7381 }, { "epoch": 1.0450909605719545, "grad_norm": 10.731546220817892, "learning_rate": 2.438673789337598e-06, "loss": 0.8847, "step": 7382 }, { "epoch": 1.0452325334465917, "grad_norm": 8.359700818708784, "learning_rate": 2.4381007637882555e-06, "loss": 0.9984, "step": 7383 }, { "epoch": 1.045374106321229, "grad_norm": 9.16659388657779, "learning_rate": 2.4375277414929098e-06, "loss": 0.9939, "step": 7384 }, { "epoch": 1.045515679195866, "grad_norm": 8.906497135414378, "learning_rate": 2.4369547224816843e-06, "loss": 0.9088, "step": 7385 }, { "epoch": 1.0456572520705032, "grad_norm": 8.162390023815261, "learning_rate": 2.436381706784702e-06, "loss": 0.8831, "step": 7386 }, { "epoch": 1.0457988249451404, "grad_norm": 8.257719647508958, "learning_rate": 2.435808694432087e-06, "loss": 0.8534, "step": 7387 }, { "epoch": 1.0459403978197777, "grad_norm": 7.841525913749559, "learning_rate": 2.4352356854539607e-06, "loss": 0.8553, "step": 7388 }, { "epoch": 1.0460819706944149, "grad_norm": 9.09696269227314, "learning_rate": 2.434662679880445e-06, "loss": 0.9233, "step": 7389 }, { "epoch": 1.0462235435690521, "grad_norm": 9.550457720747275, "learning_rate": 2.4340896777416636e-06, "loss": 0.8721, "step": 7390 }, { "epoch": 1.0463651164436893, "grad_norm": 8.14750020825742, "learning_rate": 2.4335166790677395e-06, "loss": 0.883, "step": 7391 }, { "epoch": 1.0465066893183266, "grad_norm": 6.81905336762856, "learning_rate": 2.4329436838887936e-06, "loss": 0.8481, "step": 7392 }, { "epoch": 1.0466482621929638, "grad_norm": 7.446132746822392, "learning_rate": 2.432370692234948e-06, "loss": 0.845, "step": 7393 }, { "epoch": 1.046789835067601, "grad_norm": 9.551707841044138, "learning_rate": 2.431797704136325e-06, "loss": 0.8414, "step": 7394 }, { "epoch": 1.0469314079422383, "grad_norm": 7.997027798706295, "learning_rate": 2.431224719623046e-06, "loss": 0.7872, "step": 7395 }, { "epoch": 1.0470729808168755, "grad_norm": 7.504331736538019, "learning_rate": 2.430651738725232e-06, "loss": 0.8715, "step": 7396 }, { "epoch": 1.0472145536915127, "grad_norm": 8.958442959373446, "learning_rate": 2.430078761473005e-06, "loss": 0.9274, "step": 7397 }, { "epoch": 1.04735612656615, "grad_norm": 7.811152227494594, "learning_rate": 2.429505787896485e-06, "loss": 0.8923, "step": 7398 }, { "epoch": 1.0474976994407872, "grad_norm": 8.93798271956345, "learning_rate": 2.428932818025793e-06, "loss": 0.9345, "step": 7399 }, { "epoch": 1.0476392723154244, "grad_norm": 8.593707386257982, "learning_rate": 2.42835985189105e-06, "loss": 0.9387, "step": 7400 }, { "epoch": 1.0477808451900616, "grad_norm": 8.926422296454971, "learning_rate": 2.427786889522376e-06, "loss": 0.8882, "step": 7401 }, { "epoch": 1.0479224180646989, "grad_norm": 8.716924442056133, "learning_rate": 2.427213930949891e-06, "loss": 0.7686, "step": 7402 }, { "epoch": 1.048063990939336, "grad_norm": 7.131528038870033, "learning_rate": 2.426640976203716e-06, "loss": 0.7644, "step": 7403 }, { "epoch": 1.0482055638139733, "grad_norm": 8.52785952760472, "learning_rate": 2.4260680253139696e-06, "loss": 0.8799, "step": 7404 }, { "epoch": 1.0483471366886103, "grad_norm": 9.335290658161817, "learning_rate": 2.425495078310772e-06, "loss": 0.8578, "step": 7405 }, { "epoch": 1.0484887095632476, "grad_norm": 8.702293041193492, "learning_rate": 2.424922135224243e-06, "loss": 0.7833, "step": 7406 }, { "epoch": 1.0486302824378848, "grad_norm": 9.778953404506026, "learning_rate": 2.4243491960845004e-06, "loss": 0.972, "step": 7407 }, { "epoch": 1.048771855312522, "grad_norm": 9.145632058769614, "learning_rate": 2.4237762609216666e-06, "loss": 0.8511, "step": 7408 }, { "epoch": 1.0489134281871593, "grad_norm": 8.21005891841512, "learning_rate": 2.423203329765856e-06, "loss": 0.7959, "step": 7409 }, { "epoch": 1.0490550010617965, "grad_norm": 9.349479832179126, "learning_rate": 2.4226304026471894e-06, "loss": 0.8869, "step": 7410 }, { "epoch": 1.0491965739364337, "grad_norm": 8.592916330301971, "learning_rate": 2.4220574795957844e-06, "loss": 0.9663, "step": 7411 }, { "epoch": 1.049338146811071, "grad_norm": 9.408055737310853, "learning_rate": 2.4214845606417604e-06, "loss": 0.9059, "step": 7412 }, { "epoch": 1.0494797196857082, "grad_norm": 8.227138401316346, "learning_rate": 2.4209116458152334e-06, "loss": 0.8133, "step": 7413 }, { "epoch": 1.0496212925603454, "grad_norm": 8.07117509986469, "learning_rate": 2.4203387351463228e-06, "loss": 0.8106, "step": 7414 }, { "epoch": 1.0497628654349827, "grad_norm": 7.713392120154892, "learning_rate": 2.4197658286651456e-06, "loss": 0.9148, "step": 7415 }, { "epoch": 1.0499044383096199, "grad_norm": 8.251891872923558, "learning_rate": 2.419192926401819e-06, "loss": 0.8839, "step": 7416 }, { "epoch": 1.050046011184257, "grad_norm": 9.526991396210903, "learning_rate": 2.41862002838646e-06, "loss": 0.8454, "step": 7417 }, { "epoch": 1.0501875840588943, "grad_norm": 10.028812861069039, "learning_rate": 2.4180471346491864e-06, "loss": 0.9069, "step": 7418 }, { "epoch": 1.0503291569335316, "grad_norm": 7.76865921765391, "learning_rate": 2.4174742452201123e-06, "loss": 0.9049, "step": 7419 }, { "epoch": 1.0504707298081688, "grad_norm": 8.679105146839872, "learning_rate": 2.4169013601293563e-06, "loss": 0.8625, "step": 7420 }, { "epoch": 1.050612302682806, "grad_norm": 8.992584564430713, "learning_rate": 2.4163284794070333e-06, "loss": 0.8804, "step": 7421 }, { "epoch": 1.0507538755574433, "grad_norm": 8.412893626737752, "learning_rate": 2.4157556030832606e-06, "loss": 0.8605, "step": 7422 }, { "epoch": 1.0508954484320805, "grad_norm": 8.444486762939068, "learning_rate": 2.415182731188152e-06, "loss": 0.8132, "step": 7423 }, { "epoch": 1.0510370213067177, "grad_norm": 8.400332271725365, "learning_rate": 2.4146098637518248e-06, "loss": 0.892, "step": 7424 }, { "epoch": 1.051178594181355, "grad_norm": 8.916592986851203, "learning_rate": 2.414037000804393e-06, "loss": 0.8362, "step": 7425 }, { "epoch": 1.0513201670559922, "grad_norm": 8.214880434862216, "learning_rate": 2.413464142375972e-06, "loss": 0.8303, "step": 7426 }, { "epoch": 1.0514617399306292, "grad_norm": 10.656752946456834, "learning_rate": 2.412891288496677e-06, "loss": 0.7775, "step": 7427 }, { "epoch": 1.0516033128052664, "grad_norm": 9.921665459770626, "learning_rate": 2.4123184391966216e-06, "loss": 0.8621, "step": 7428 }, { "epoch": 1.0517448856799037, "grad_norm": 7.38737535710404, "learning_rate": 2.411745594505921e-06, "loss": 0.8014, "step": 7429 }, { "epoch": 1.0518864585545409, "grad_norm": 7.801072941858027, "learning_rate": 2.411172754454688e-06, "loss": 0.798, "step": 7430 }, { "epoch": 1.0520280314291781, "grad_norm": 8.203682086738457, "learning_rate": 2.410599919073037e-06, "loss": 0.9117, "step": 7431 }, { "epoch": 1.0521696043038153, "grad_norm": 8.177553147994898, "learning_rate": 2.410027088391082e-06, "loss": 0.8967, "step": 7432 }, { "epoch": 1.0523111771784526, "grad_norm": 10.494431608600708, "learning_rate": 2.4094542624389357e-06, "loss": 0.8397, "step": 7433 }, { "epoch": 1.0524527500530898, "grad_norm": 8.45344855910911, "learning_rate": 2.4088814412467117e-06, "loss": 0.9872, "step": 7434 }, { "epoch": 1.052594322927727, "grad_norm": 9.272488449883486, "learning_rate": 2.4083086248445213e-06, "loss": 0.9643, "step": 7435 }, { "epoch": 1.0527358958023643, "grad_norm": 7.1408669996260015, "learning_rate": 2.4077358132624786e-06, "loss": 0.8758, "step": 7436 }, { "epoch": 1.0528774686770015, "grad_norm": 8.721182753817757, "learning_rate": 2.4071630065306956e-06, "loss": 0.9425, "step": 7437 }, { "epoch": 1.0530190415516387, "grad_norm": 8.627885418763766, "learning_rate": 2.406590204679284e-06, "loss": 0.9316, "step": 7438 }, { "epoch": 1.053160614426276, "grad_norm": 9.501475169441207, "learning_rate": 2.406017407738356e-06, "loss": 0.7686, "step": 7439 }, { "epoch": 1.0533021873009132, "grad_norm": 9.016681047335267, "learning_rate": 2.4054446157380237e-06, "loss": 0.8342, "step": 7440 }, { "epoch": 1.0534437601755504, "grad_norm": 7.918672836156573, "learning_rate": 2.404871828708396e-06, "loss": 0.9009, "step": 7441 }, { "epoch": 1.0535853330501876, "grad_norm": 9.247493507401538, "learning_rate": 2.4042990466795857e-06, "loss": 0.8808, "step": 7442 }, { "epoch": 1.0537269059248249, "grad_norm": 9.009146810732092, "learning_rate": 2.4037262696817034e-06, "loss": 0.8954, "step": 7443 }, { "epoch": 1.053868478799462, "grad_norm": 6.981085427170697, "learning_rate": 2.403153497744859e-06, "loss": 0.9077, "step": 7444 }, { "epoch": 1.0540100516740993, "grad_norm": 8.39191061787239, "learning_rate": 2.402580730899163e-06, "loss": 0.8208, "step": 7445 }, { "epoch": 1.0541516245487366, "grad_norm": 8.279427957723222, "learning_rate": 2.4020079691747256e-06, "loss": 0.8699, "step": 7446 }, { "epoch": 1.0542931974233736, "grad_norm": 8.726026117272191, "learning_rate": 2.4014352126016562e-06, "loss": 1.0066, "step": 7447 }, { "epoch": 1.0544347702980108, "grad_norm": 8.683685123788262, "learning_rate": 2.4008624612100636e-06, "loss": 0.8223, "step": 7448 }, { "epoch": 1.054576343172648, "grad_norm": 9.505250383397284, "learning_rate": 2.400289715030058e-06, "loss": 0.873, "step": 7449 }, { "epoch": 1.0547179160472853, "grad_norm": 9.003194030264039, "learning_rate": 2.3997169740917485e-06, "loss": 0.8986, "step": 7450 }, { "epoch": 1.0548594889219225, "grad_norm": 9.914099053690492, "learning_rate": 2.3991442384252417e-06, "loss": 0.8106, "step": 7451 }, { "epoch": 1.0550010617965597, "grad_norm": 7.862185027055659, "learning_rate": 2.3985715080606473e-06, "loss": 0.7949, "step": 7452 }, { "epoch": 1.055142634671197, "grad_norm": 8.55790197169999, "learning_rate": 2.3979987830280733e-06, "loss": 0.9006, "step": 7453 }, { "epoch": 1.0552842075458342, "grad_norm": 9.337635910495369, "learning_rate": 2.3974260633576274e-06, "loss": 0.7394, "step": 7454 }, { "epoch": 1.0554257804204714, "grad_norm": 8.408838944093015, "learning_rate": 2.3968533490794165e-06, "loss": 0.8746, "step": 7455 }, { "epoch": 1.0555673532951086, "grad_norm": 8.594338580696506, "learning_rate": 2.3962806402235484e-06, "loss": 0.7674, "step": 7456 }, { "epoch": 1.0557089261697459, "grad_norm": 9.324060011216865, "learning_rate": 2.3957079368201293e-06, "loss": 0.9069, "step": 7457 }, { "epoch": 1.055850499044383, "grad_norm": 7.518182204473896, "learning_rate": 2.395135238899266e-06, "loss": 0.783, "step": 7458 }, { "epoch": 1.0559920719190203, "grad_norm": 11.341027135333686, "learning_rate": 2.3945625464910654e-06, "loss": 0.9042, "step": 7459 }, { "epoch": 1.0561336447936576, "grad_norm": 9.667144456653205, "learning_rate": 2.3939898596256334e-06, "loss": 0.9014, "step": 7460 }, { "epoch": 1.0562752176682948, "grad_norm": 9.951225350997138, "learning_rate": 2.3934171783330763e-06, "loss": 0.8687, "step": 7461 }, { "epoch": 1.056416790542932, "grad_norm": 7.77911641847564, "learning_rate": 2.3928445026434973e-06, "loss": 0.8162, "step": 7462 }, { "epoch": 1.0565583634175693, "grad_norm": 8.623496988889299, "learning_rate": 2.3922718325870034e-06, "loss": 0.9713, "step": 7463 }, { "epoch": 1.0566999362922065, "grad_norm": 8.436391800053142, "learning_rate": 2.391699168193698e-06, "loss": 0.8809, "step": 7464 }, { "epoch": 1.0568415091668437, "grad_norm": 11.13356459904699, "learning_rate": 2.3911265094936874e-06, "loss": 0.9558, "step": 7465 }, { "epoch": 1.056983082041481, "grad_norm": 10.10537286749062, "learning_rate": 2.390553856517075e-06, "loss": 0.8401, "step": 7466 }, { "epoch": 1.0571246549161182, "grad_norm": 7.233667854039097, "learning_rate": 2.3899812092939644e-06, "loss": 0.8045, "step": 7467 }, { "epoch": 1.0572662277907552, "grad_norm": 10.432032449402687, "learning_rate": 2.38940856785446e-06, "loss": 0.8281, "step": 7468 }, { "epoch": 1.0574078006653924, "grad_norm": 10.235383567909244, "learning_rate": 2.3888359322286644e-06, "loss": 1.0463, "step": 7469 }, { "epoch": 1.0575493735400296, "grad_norm": 8.56483817371318, "learning_rate": 2.3882633024466813e-06, "loss": 1.0214, "step": 7470 }, { "epoch": 1.0576909464146669, "grad_norm": 8.515480306253512, "learning_rate": 2.3876906785386133e-06, "loss": 0.7856, "step": 7471 }, { "epoch": 1.057832519289304, "grad_norm": 9.28613317194565, "learning_rate": 2.3871180605345623e-06, "loss": 0.9239, "step": 7472 }, { "epoch": 1.0579740921639413, "grad_norm": 9.522330290948842, "learning_rate": 2.3865454484646307e-06, "loss": 0.7849, "step": 7473 }, { "epoch": 1.0581156650385786, "grad_norm": 9.373535448755824, "learning_rate": 2.3859728423589197e-06, "loss": 0.8815, "step": 7474 }, { "epoch": 1.0582572379132158, "grad_norm": 8.134471361349382, "learning_rate": 2.385400242247532e-06, "loss": 0.9418, "step": 7475 }, { "epoch": 1.058398810787853, "grad_norm": 10.543470587699831, "learning_rate": 2.384827648160568e-06, "loss": 0.9364, "step": 7476 }, { "epoch": 1.0585403836624903, "grad_norm": 7.087236951894912, "learning_rate": 2.3842550601281288e-06, "loss": 0.8126, "step": 7477 }, { "epoch": 1.0586819565371275, "grad_norm": 8.910146403025028, "learning_rate": 2.3836824781803146e-06, "loss": 0.8868, "step": 7478 }, { "epoch": 1.0588235294117647, "grad_norm": 8.210038938999181, "learning_rate": 2.3831099023472253e-06, "loss": 0.8495, "step": 7479 }, { "epoch": 1.058965102286402, "grad_norm": 10.320524295825415, "learning_rate": 2.382537332658962e-06, "loss": 0.9851, "step": 7480 }, { "epoch": 1.0591066751610392, "grad_norm": 7.023144742403783, "learning_rate": 2.3819647691456226e-06, "loss": 0.8352, "step": 7481 }, { "epoch": 1.0592482480356764, "grad_norm": 8.60458833586404, "learning_rate": 2.3813922118373094e-06, "loss": 0.7647, "step": 7482 }, { "epoch": 1.0593898209103136, "grad_norm": 8.65834873798996, "learning_rate": 2.3808196607641176e-06, "loss": 0.9389, "step": 7483 }, { "epoch": 1.0595313937849509, "grad_norm": 7.954539354990813, "learning_rate": 2.3802471159561473e-06, "loss": 0.7965, "step": 7484 }, { "epoch": 1.059672966659588, "grad_norm": 9.558100068345377, "learning_rate": 2.379674577443497e-06, "loss": 0.8498, "step": 7485 }, { "epoch": 1.0598145395342253, "grad_norm": 8.946457009206389, "learning_rate": 2.3791020452562647e-06, "loss": 0.8855, "step": 7486 }, { "epoch": 1.0599561124088626, "grad_norm": 9.486296155945205, "learning_rate": 2.378529519424547e-06, "loss": 0.8839, "step": 7487 }, { "epoch": 1.0600976852834996, "grad_norm": 9.434174817119962, "learning_rate": 2.377956999978442e-06, "loss": 0.9427, "step": 7488 }, { "epoch": 1.0602392581581368, "grad_norm": 10.897471745404735, "learning_rate": 2.3773844869480473e-06, "loss": 0.8994, "step": 7489 }, { "epoch": 1.060380831032774, "grad_norm": 7.320443037206582, "learning_rate": 2.376811980363458e-06, "loss": 0.8122, "step": 7490 }, { "epoch": 1.0605224039074113, "grad_norm": 8.24680012000038, "learning_rate": 2.3762394802547717e-06, "loss": 0.8411, "step": 7491 }, { "epoch": 1.0606639767820485, "grad_norm": 9.07732615017336, "learning_rate": 2.375666986652083e-06, "loss": 0.8989, "step": 7492 }, { "epoch": 1.0608055496566857, "grad_norm": 7.967920836151165, "learning_rate": 2.375094499585489e-06, "loss": 0.8259, "step": 7493 }, { "epoch": 1.060947122531323, "grad_norm": 8.413962757649298, "learning_rate": 2.3745220190850834e-06, "loss": 0.7403, "step": 7494 }, { "epoch": 1.0610886954059602, "grad_norm": 9.259601867553782, "learning_rate": 2.3739495451809617e-06, "loss": 0.8319, "step": 7495 }, { "epoch": 1.0612302682805974, "grad_norm": 9.673105879496877, "learning_rate": 2.3733770779032185e-06, "loss": 0.8201, "step": 7496 }, { "epoch": 1.0613718411552346, "grad_norm": 8.951568414942844, "learning_rate": 2.372804617281948e-06, "loss": 0.9269, "step": 7497 }, { "epoch": 1.0615134140298719, "grad_norm": 8.69128467260473, "learning_rate": 2.3722321633472435e-06, "loss": 0.9544, "step": 7498 }, { "epoch": 1.061654986904509, "grad_norm": 7.638365241704133, "learning_rate": 2.3716597161291993e-06, "loss": 0.8361, "step": 7499 }, { "epoch": 1.0617965597791463, "grad_norm": 8.772925289756458, "learning_rate": 2.371087275657908e-06, "loss": 0.8514, "step": 7500 }, { "epoch": 1.0619381326537836, "grad_norm": 8.244004353958454, "learning_rate": 2.3705148419634627e-06, "loss": 0.9759, "step": 7501 }, { "epoch": 1.0620797055284208, "grad_norm": 10.003284677829734, "learning_rate": 2.3699424150759553e-06, "loss": 0.8654, "step": 7502 }, { "epoch": 1.062221278403058, "grad_norm": 8.34969777913726, "learning_rate": 2.369369995025479e-06, "loss": 0.8673, "step": 7503 }, { "epoch": 1.0623628512776953, "grad_norm": 9.536138762878968, "learning_rate": 2.3687975818421236e-06, "loss": 0.8088, "step": 7504 }, { "epoch": 1.0625044241523325, "grad_norm": 7.135387945929501, "learning_rate": 2.3682251755559823e-06, "loss": 0.7919, "step": 7505 }, { "epoch": 1.0626459970269697, "grad_norm": 8.14372560463777, "learning_rate": 2.367652776197145e-06, "loss": 0.871, "step": 7506 }, { "epoch": 1.062787569901607, "grad_norm": 11.352475848278303, "learning_rate": 2.3670803837957017e-06, "loss": 0.8688, "step": 7507 }, { "epoch": 1.0629291427762442, "grad_norm": 8.310804610060423, "learning_rate": 2.3665079983817443e-06, "loss": 0.8079, "step": 7508 }, { "epoch": 1.0630707156508814, "grad_norm": 8.596015326428141, "learning_rate": 2.3659356199853617e-06, "loss": 0.7778, "step": 7509 }, { "epoch": 1.0632122885255184, "grad_norm": 8.901742537426967, "learning_rate": 2.365363248636643e-06, "loss": 0.9189, "step": 7510 }, { "epoch": 1.0633538614001556, "grad_norm": 10.15707027937467, "learning_rate": 2.3647908843656787e-06, "loss": 0.8541, "step": 7511 }, { "epoch": 1.0634954342747929, "grad_norm": 9.424505926078226, "learning_rate": 2.364218527202557e-06, "loss": 0.8816, "step": 7512 }, { "epoch": 1.06363700714943, "grad_norm": 12.498134015997586, "learning_rate": 2.3636461771773655e-06, "loss": 1.0298, "step": 7513 }, { "epoch": 1.0637785800240673, "grad_norm": 10.728704606757635, "learning_rate": 2.363073834320194e-06, "loss": 0.8776, "step": 7514 }, { "epoch": 1.0639201528987046, "grad_norm": 9.782863691019635, "learning_rate": 2.3625014986611282e-06, "loss": 0.9429, "step": 7515 }, { "epoch": 1.0640617257733418, "grad_norm": 7.802968795367849, "learning_rate": 2.3619291702302557e-06, "loss": 0.8141, "step": 7516 }, { "epoch": 1.064203298647979, "grad_norm": 8.03115500308603, "learning_rate": 2.3613568490576635e-06, "loss": 0.884, "step": 7517 }, { "epoch": 1.0643448715226163, "grad_norm": 7.9586114751600245, "learning_rate": 2.360784535173439e-06, "loss": 0.8761, "step": 7518 }, { "epoch": 1.0644864443972535, "grad_norm": 8.951446961802374, "learning_rate": 2.3602122286076675e-06, "loss": 0.9232, "step": 7519 }, { "epoch": 1.0646280172718907, "grad_norm": 8.800499069627342, "learning_rate": 2.359639929390435e-06, "loss": 0.858, "step": 7520 }, { "epoch": 1.064769590146528, "grad_norm": 8.659794161070423, "learning_rate": 2.359067637551827e-06, "loss": 0.7633, "step": 7521 }, { "epoch": 1.0649111630211652, "grad_norm": 8.596147126612205, "learning_rate": 2.3584953531219278e-06, "loss": 0.9009, "step": 7522 }, { "epoch": 1.0650527358958024, "grad_norm": 9.91964095788746, "learning_rate": 2.3579230761308223e-06, "loss": 0.9374, "step": 7523 }, { "epoch": 1.0651943087704396, "grad_norm": 8.90680422480608, "learning_rate": 2.3573508066085954e-06, "loss": 0.8902, "step": 7524 }, { "epoch": 1.0653358816450769, "grad_norm": 8.135256353370716, "learning_rate": 2.3567785445853295e-06, "loss": 0.9262, "step": 7525 }, { "epoch": 1.065477454519714, "grad_norm": 6.496380018027197, "learning_rate": 2.356206290091109e-06, "loss": 0.7815, "step": 7526 }, { "epoch": 1.0656190273943513, "grad_norm": 9.909268181164059, "learning_rate": 2.355634043156017e-06, "loss": 0.8965, "step": 7527 }, { "epoch": 1.0657606002689886, "grad_norm": 6.910808180984006, "learning_rate": 2.355061803810135e-06, "loss": 0.82, "step": 7528 }, { "epoch": 1.0659021731436256, "grad_norm": 10.108069687875199, "learning_rate": 2.354489572083546e-06, "loss": 0.9377, "step": 7529 }, { "epoch": 1.0660437460182628, "grad_norm": 7.716543523191747, "learning_rate": 2.3539173480063323e-06, "loss": 0.8904, "step": 7530 }, { "epoch": 1.0661853188929, "grad_norm": 7.95846528215188, "learning_rate": 2.3533451316085744e-06, "loss": 0.9423, "step": 7531 }, { "epoch": 1.0663268917675373, "grad_norm": 7.44062188287419, "learning_rate": 2.352772922920353e-06, "loss": 0.8414, "step": 7532 }, { "epoch": 1.0664684646421745, "grad_norm": 8.521625161783943, "learning_rate": 2.3522007219717493e-06, "loss": 0.8516, "step": 7533 }, { "epoch": 1.0666100375168117, "grad_norm": 8.942084143836336, "learning_rate": 2.351628528792844e-06, "loss": 0.7939, "step": 7534 }, { "epoch": 1.066751610391449, "grad_norm": 7.716721982599754, "learning_rate": 2.3510563434137175e-06, "loss": 0.9186, "step": 7535 }, { "epoch": 1.0668931832660862, "grad_norm": 9.12390966302618, "learning_rate": 2.3504841658644465e-06, "loss": 0.9138, "step": 7536 }, { "epoch": 1.0670347561407234, "grad_norm": 8.314988760421597, "learning_rate": 2.3499119961751114e-06, "loss": 0.8701, "step": 7537 }, { "epoch": 1.0671763290153606, "grad_norm": 9.542255588886352, "learning_rate": 2.3493398343757904e-06, "loss": 0.9568, "step": 7538 }, { "epoch": 1.0673179018899979, "grad_norm": 9.809375046665936, "learning_rate": 2.3487676804965624e-06, "loss": 0.9563, "step": 7539 }, { "epoch": 1.067459474764635, "grad_norm": 8.229047582867276, "learning_rate": 2.3481955345675052e-06, "loss": 0.9423, "step": 7540 }, { "epoch": 1.0676010476392723, "grad_norm": 8.823082596180512, "learning_rate": 2.347623396618695e-06, "loss": 0.915, "step": 7541 }, { "epoch": 1.0677426205139096, "grad_norm": 8.737061007363215, "learning_rate": 2.3470512666802094e-06, "loss": 0.9266, "step": 7542 }, { "epoch": 1.0678841933885468, "grad_norm": 7.991196318287526, "learning_rate": 2.3464791447821244e-06, "loss": 0.7717, "step": 7543 }, { "epoch": 1.068025766263184, "grad_norm": 8.77534824125921, "learning_rate": 2.3459070309545165e-06, "loss": 0.9026, "step": 7544 }, { "epoch": 1.0681673391378212, "grad_norm": 10.330841215138404, "learning_rate": 2.345334925227461e-06, "loss": 0.9714, "step": 7545 }, { "epoch": 1.0683089120124585, "grad_norm": 9.994641203325067, "learning_rate": 2.344762827631034e-06, "loss": 0.8874, "step": 7546 }, { "epoch": 1.0684504848870957, "grad_norm": 8.264890699454275, "learning_rate": 2.3441907381953084e-06, "loss": 0.895, "step": 7547 }, { "epoch": 1.068592057761733, "grad_norm": 9.032430921385968, "learning_rate": 2.3436186569503598e-06, "loss": 0.8678, "step": 7548 }, { "epoch": 1.0687336306363702, "grad_norm": 9.697479817309697, "learning_rate": 2.343046583926262e-06, "loss": 0.849, "step": 7549 }, { "epoch": 1.0688752035110074, "grad_norm": 10.363740993500903, "learning_rate": 2.3424745191530877e-06, "loss": 0.9209, "step": 7550 }, { "epoch": 1.0690167763856444, "grad_norm": 8.936889200746856, "learning_rate": 2.3419024626609112e-06, "loss": 0.9324, "step": 7551 }, { "epoch": 1.0691583492602816, "grad_norm": 7.568703873207915, "learning_rate": 2.341330414479804e-06, "loss": 0.9164, "step": 7552 }, { "epoch": 1.0692999221349189, "grad_norm": 8.833354110213502, "learning_rate": 2.340758374639838e-06, "loss": 0.9165, "step": 7553 }, { "epoch": 1.069441495009556, "grad_norm": 9.036720810008843, "learning_rate": 2.3401863431710864e-06, "loss": 0.8592, "step": 7554 }, { "epoch": 1.0695830678841933, "grad_norm": 9.185787534389913, "learning_rate": 2.3396143201036187e-06, "loss": 0.969, "step": 7555 }, { "epoch": 1.0697246407588306, "grad_norm": 8.156187795350677, "learning_rate": 2.3390423054675084e-06, "loss": 0.9089, "step": 7556 }, { "epoch": 1.0698662136334678, "grad_norm": 9.737623135102789, "learning_rate": 2.3384702992928228e-06, "loss": 0.92, "step": 7557 }, { "epoch": 1.070007786508105, "grad_norm": 10.525265042916155, "learning_rate": 2.337898301609633e-06, "loss": 0.9087, "step": 7558 }, { "epoch": 1.0701493593827422, "grad_norm": 10.543630143203986, "learning_rate": 2.3373263124480086e-06, "loss": 0.8476, "step": 7559 }, { "epoch": 1.0702909322573795, "grad_norm": 8.566246382599694, "learning_rate": 2.336754331838019e-06, "loss": 0.797, "step": 7560 }, { "epoch": 1.0704325051320167, "grad_norm": 7.76696716622271, "learning_rate": 2.3361823598097316e-06, "loss": 0.8311, "step": 7561 }, { "epoch": 1.070574078006654, "grad_norm": 8.618370272409033, "learning_rate": 2.335610396393216e-06, "loss": 0.9264, "step": 7562 }, { "epoch": 1.0707156508812912, "grad_norm": 8.895454154468888, "learning_rate": 2.3350384416185395e-06, "loss": 0.8601, "step": 7563 }, { "epoch": 1.0708572237559284, "grad_norm": 9.589387081794758, "learning_rate": 2.334466495515769e-06, "loss": 0.8665, "step": 7564 }, { "epoch": 1.0709987966305656, "grad_norm": 8.20280830861902, "learning_rate": 2.3338945581149713e-06, "loss": 0.8701, "step": 7565 }, { "epoch": 1.0711403695052029, "grad_norm": 8.914627792795503, "learning_rate": 2.333322629446213e-06, "loss": 0.8784, "step": 7566 }, { "epoch": 1.07128194237984, "grad_norm": 8.344496561433141, "learning_rate": 2.33275070953956e-06, "loss": 0.9042, "step": 7567 }, { "epoch": 1.0714235152544773, "grad_norm": 8.152341410367416, "learning_rate": 2.3321787984250774e-06, "loss": 0.8626, "step": 7568 }, { "epoch": 1.0715650881291146, "grad_norm": 8.736367641524199, "learning_rate": 2.33160689613283e-06, "loss": 0.7888, "step": 7569 }, { "epoch": 1.0717066610037518, "grad_norm": 6.630664922590632, "learning_rate": 2.3310350026928826e-06, "loss": 0.8406, "step": 7570 }, { "epoch": 1.0718482338783888, "grad_norm": 9.183001811925774, "learning_rate": 2.330463118135299e-06, "loss": 0.9405, "step": 7571 }, { "epoch": 1.071989806753026, "grad_norm": 7.815429626487723, "learning_rate": 2.3298912424901434e-06, "loss": 0.8655, "step": 7572 }, { "epoch": 1.0721313796276633, "grad_norm": 7.527930830196869, "learning_rate": 2.3293193757874776e-06, "loss": 0.8668, "step": 7573 }, { "epoch": 1.0722729525023005, "grad_norm": 9.629679941659981, "learning_rate": 2.3287475180573653e-06, "loss": 0.8968, "step": 7574 }, { "epoch": 1.0724145253769377, "grad_norm": 9.475652715500164, "learning_rate": 2.328175669329868e-06, "loss": 0.874, "step": 7575 }, { "epoch": 1.072556098251575, "grad_norm": 10.872596211975862, "learning_rate": 2.327603829635048e-06, "loss": 0.9183, "step": 7576 }, { "epoch": 1.0726976711262122, "grad_norm": 9.80421632645256, "learning_rate": 2.3270319990029668e-06, "loss": 0.9149, "step": 7577 }, { "epoch": 1.0728392440008494, "grad_norm": 9.391417103396108, "learning_rate": 2.326460177463683e-06, "loss": 0.823, "step": 7578 }, { "epoch": 1.0729808168754866, "grad_norm": 9.682539131130785, "learning_rate": 2.325888365047259e-06, "loss": 0.9445, "step": 7579 }, { "epoch": 1.0731223897501239, "grad_norm": 9.241801572273099, "learning_rate": 2.325316561783754e-06, "loss": 0.7645, "step": 7580 }, { "epoch": 1.073263962624761, "grad_norm": 11.416564792447694, "learning_rate": 2.324744767703227e-06, "loss": 1.0292, "step": 7581 }, { "epoch": 1.0734055354993983, "grad_norm": 7.392248124652323, "learning_rate": 2.3241729828357367e-06, "loss": 0.7763, "step": 7582 }, { "epoch": 1.0735471083740356, "grad_norm": 7.6118728955672585, "learning_rate": 2.3236012072113414e-06, "loss": 0.7645, "step": 7583 }, { "epoch": 1.0736886812486728, "grad_norm": 9.339021130387708, "learning_rate": 2.323029440860099e-06, "loss": 0.8928, "step": 7584 }, { "epoch": 1.07383025412331, "grad_norm": 8.701488183434217, "learning_rate": 2.322457683812067e-06, "loss": 0.9091, "step": 7585 }, { "epoch": 1.0739718269979472, "grad_norm": 10.814044467231389, "learning_rate": 2.3218859360973025e-06, "loss": 0.8317, "step": 7586 }, { "epoch": 1.0741133998725845, "grad_norm": 8.699058253739489, "learning_rate": 2.3213141977458615e-06, "loss": 0.8389, "step": 7587 }, { "epoch": 1.0742549727472217, "grad_norm": 9.697650145007342, "learning_rate": 2.320742468787801e-06, "loss": 0.8199, "step": 7588 }, { "epoch": 1.074396545621859, "grad_norm": 8.749541788365129, "learning_rate": 2.3201707492531743e-06, "loss": 0.8494, "step": 7589 }, { "epoch": 1.0745381184964962, "grad_norm": 8.64249006840734, "learning_rate": 2.3195990391720364e-06, "loss": 0.8387, "step": 7590 }, { "epoch": 1.0746796913711334, "grad_norm": 8.22020357571343, "learning_rate": 2.319027338574443e-06, "loss": 0.9132, "step": 7591 }, { "epoch": 1.0748212642457706, "grad_norm": 7.372509293315899, "learning_rate": 2.318455647490448e-06, "loss": 0.8574, "step": 7592 }, { "epoch": 1.0749628371204076, "grad_norm": 10.077781683226977, "learning_rate": 2.3178839659501033e-06, "loss": 0.9927, "step": 7593 }, { "epoch": 1.0751044099950449, "grad_norm": 9.17247846915379, "learning_rate": 2.3173122939834635e-06, "loss": 0.9204, "step": 7594 }, { "epoch": 1.075245982869682, "grad_norm": 7.556934925775933, "learning_rate": 2.31674063162058e-06, "loss": 0.8681, "step": 7595 }, { "epoch": 1.0753875557443193, "grad_norm": 8.648922680044643, "learning_rate": 2.316168978891505e-06, "loss": 0.8719, "step": 7596 }, { "epoch": 1.0755291286189566, "grad_norm": 8.902448733063517, "learning_rate": 2.315597335826289e-06, "loss": 0.913, "step": 7597 }, { "epoch": 1.0756707014935938, "grad_norm": 8.345209686954444, "learning_rate": 2.3150257024549847e-06, "loss": 0.9158, "step": 7598 }, { "epoch": 1.075812274368231, "grad_norm": 9.393207011008549, "learning_rate": 2.314454078807641e-06, "loss": 0.8808, "step": 7599 }, { "epoch": 1.0759538472428682, "grad_norm": 9.054633574041318, "learning_rate": 2.3138824649143076e-06, "loss": 1.0144, "step": 7600 }, { "epoch": 1.0760954201175055, "grad_norm": 9.276682542226926, "learning_rate": 2.313310860805034e-06, "loss": 1.0462, "step": 7601 }, { "epoch": 1.0762369929921427, "grad_norm": 8.052197401678965, "learning_rate": 2.31273926650987e-06, "loss": 0.8373, "step": 7602 }, { "epoch": 1.07637856586678, "grad_norm": 9.118399963563114, "learning_rate": 2.312167682058863e-06, "loss": 0.9703, "step": 7603 }, { "epoch": 1.0765201387414172, "grad_norm": 8.960340218896764, "learning_rate": 2.3115961074820604e-06, "loss": 0.8812, "step": 7604 }, { "epoch": 1.0766617116160544, "grad_norm": 9.40948572616269, "learning_rate": 2.31102454280951e-06, "loss": 0.8938, "step": 7605 }, { "epoch": 1.0768032844906916, "grad_norm": 8.478318330533902, "learning_rate": 2.3104529880712586e-06, "loss": 0.964, "step": 7606 }, { "epoch": 1.0769448573653289, "grad_norm": 9.33114784629386, "learning_rate": 2.309881443297352e-06, "loss": 0.8143, "step": 7607 }, { "epoch": 1.077086430239966, "grad_norm": 8.6043112483281, "learning_rate": 2.3093099085178366e-06, "loss": 0.8659, "step": 7608 }, { "epoch": 1.0772280031146033, "grad_norm": 8.834746289758101, "learning_rate": 2.308738383762758e-06, "loss": 0.8783, "step": 7609 }, { "epoch": 1.0773695759892405, "grad_norm": 9.830844327071071, "learning_rate": 2.308166869062159e-06, "loss": 0.9331, "step": 7610 }, { "epoch": 1.0775111488638778, "grad_norm": 8.287628366158009, "learning_rate": 2.3075953644460847e-06, "loss": 0.9056, "step": 7611 }, { "epoch": 1.0776527217385148, "grad_norm": 8.207499298737073, "learning_rate": 2.3070238699445783e-06, "loss": 0.9385, "step": 7612 }, { "epoch": 1.077794294613152, "grad_norm": 7.543709353611629, "learning_rate": 2.306452385587683e-06, "loss": 0.8422, "step": 7613 }, { "epoch": 1.0779358674877892, "grad_norm": 9.470427814556356, "learning_rate": 2.305880911405442e-06, "loss": 0.8791, "step": 7614 }, { "epoch": 1.0780774403624265, "grad_norm": 9.001512188498525, "learning_rate": 2.3053094474278967e-06, "loss": 0.838, "step": 7615 }, { "epoch": 1.0782190132370637, "grad_norm": 8.579397194056172, "learning_rate": 2.3047379936850885e-06, "loss": 0.8761, "step": 7616 }, { "epoch": 1.078360586111701, "grad_norm": 9.429285377455292, "learning_rate": 2.3041665502070584e-06, "loss": 0.8982, "step": 7617 }, { "epoch": 1.0785021589863382, "grad_norm": 9.870426712474396, "learning_rate": 2.3035951170238468e-06, "loss": 0.9768, "step": 7618 }, { "epoch": 1.0786437318609754, "grad_norm": 8.375629458329815, "learning_rate": 2.3030236941654933e-06, "loss": 0.8535, "step": 7619 }, { "epoch": 1.0787853047356126, "grad_norm": 10.960587031660909, "learning_rate": 2.302452281662038e-06, "loss": 0.9726, "step": 7620 }, { "epoch": 1.0789268776102499, "grad_norm": 8.218413023331834, "learning_rate": 2.3018808795435187e-06, "loss": 0.8183, "step": 7621 }, { "epoch": 1.079068450484887, "grad_norm": 8.797769558617853, "learning_rate": 2.3013094878399735e-06, "loss": 0.8883, "step": 7622 }, { "epoch": 1.0792100233595243, "grad_norm": 7.49921998100378, "learning_rate": 2.3007381065814405e-06, "loss": 0.7979, "step": 7623 }, { "epoch": 1.0793515962341615, "grad_norm": 13.283405156023154, "learning_rate": 2.3001667357979564e-06, "loss": 0.8942, "step": 7624 }, { "epoch": 1.0794931691087988, "grad_norm": 8.451897438593301, "learning_rate": 2.2995953755195584e-06, "loss": 0.9453, "step": 7625 }, { "epoch": 1.079634741983436, "grad_norm": 8.169224500702077, "learning_rate": 2.2990240257762817e-06, "loss": 0.8569, "step": 7626 }, { "epoch": 1.0797763148580732, "grad_norm": 8.835227715192001, "learning_rate": 2.298452686598162e-06, "loss": 0.8631, "step": 7627 }, { "epoch": 1.0799178877327105, "grad_norm": 8.422053007888028, "learning_rate": 2.2978813580152347e-06, "loss": 0.8937, "step": 7628 }, { "epoch": 1.0800594606073477, "grad_norm": 10.50959775145847, "learning_rate": 2.297310040057533e-06, "loss": 0.9134, "step": 7629 }, { "epoch": 1.080201033481985, "grad_norm": 9.792110568326127, "learning_rate": 2.296738732755093e-06, "loss": 0.8837, "step": 7630 }, { "epoch": 1.0803426063566222, "grad_norm": 9.07319504624297, "learning_rate": 2.296167436137945e-06, "loss": 0.8553, "step": 7631 }, { "epoch": 1.0804841792312594, "grad_norm": 8.12367355082965, "learning_rate": 2.2955961502361235e-06, "loss": 0.8597, "step": 7632 }, { "epoch": 1.0806257521058966, "grad_norm": 8.91684069122005, "learning_rate": 2.2950248750796594e-06, "loss": 0.9511, "step": 7633 }, { "epoch": 1.0807673249805336, "grad_norm": 9.469072695228373, "learning_rate": 2.2944536106985848e-06, "loss": 0.8806, "step": 7634 }, { "epoch": 1.0809088978551709, "grad_norm": 8.509547089158016, "learning_rate": 2.2938823571229303e-06, "loss": 0.8161, "step": 7635 }, { "epoch": 1.081050470729808, "grad_norm": 8.788828121874916, "learning_rate": 2.2933111143827268e-06, "loss": 0.8946, "step": 7636 }, { "epoch": 1.0811920436044453, "grad_norm": 8.9682349080364, "learning_rate": 2.2927398825080043e-06, "loss": 0.8508, "step": 7637 }, { "epoch": 1.0813336164790825, "grad_norm": 10.070775011804152, "learning_rate": 2.2921686615287916e-06, "loss": 0.826, "step": 7638 }, { "epoch": 1.0814751893537198, "grad_norm": 9.694772267256454, "learning_rate": 2.2915974514751173e-06, "loss": 0.9544, "step": 7639 }, { "epoch": 1.081616762228357, "grad_norm": 10.14917271441911, "learning_rate": 2.29102625237701e-06, "loss": 0.8949, "step": 7640 }, { "epoch": 1.0817583351029942, "grad_norm": 10.054238665802885, "learning_rate": 2.290455064264497e-06, "loss": 0.9055, "step": 7641 }, { "epoch": 1.0818999079776315, "grad_norm": 8.820725518313877, "learning_rate": 2.2898838871676037e-06, "loss": 0.9211, "step": 7642 }, { "epoch": 1.0820414808522687, "grad_norm": 8.293333055469635, "learning_rate": 2.2893127211163583e-06, "loss": 0.847, "step": 7643 }, { "epoch": 1.082183053726906, "grad_norm": 9.86131129770549, "learning_rate": 2.2887415661407866e-06, "loss": 0.9185, "step": 7644 }, { "epoch": 1.0823246266015432, "grad_norm": 10.166272473640513, "learning_rate": 2.288170422270913e-06, "loss": 0.8703, "step": 7645 }, { "epoch": 1.0824661994761804, "grad_norm": 8.131572705842958, "learning_rate": 2.287599289536762e-06, "loss": 0.9215, "step": 7646 }, { "epoch": 1.0826077723508176, "grad_norm": 9.434657192906176, "learning_rate": 2.2870281679683582e-06, "loss": 0.8733, "step": 7647 }, { "epoch": 1.0827493452254549, "grad_norm": 8.842648710553966, "learning_rate": 2.2864570575957246e-06, "loss": 0.7996, "step": 7648 }, { "epoch": 1.082890918100092, "grad_norm": 7.58535088609522, "learning_rate": 2.2858859584488848e-06, "loss": 0.7909, "step": 7649 }, { "epoch": 1.0830324909747293, "grad_norm": 7.856283804760827, "learning_rate": 2.28531487055786e-06, "loss": 0.7904, "step": 7650 }, { "epoch": 1.0831740638493665, "grad_norm": 8.033841082512305, "learning_rate": 2.2847437939526735e-06, "loss": 0.9187, "step": 7651 }, { "epoch": 1.0833156367240038, "grad_norm": 10.234916567851391, "learning_rate": 2.2841727286633444e-06, "loss": 0.974, "step": 7652 }, { "epoch": 1.083457209598641, "grad_norm": 8.996149298976349, "learning_rate": 2.2836016747198937e-06, "loss": 0.8261, "step": 7653 }, { "epoch": 1.083598782473278, "grad_norm": 8.988574086657357, "learning_rate": 2.283030632152342e-06, "loss": 0.9297, "step": 7654 }, { "epoch": 1.0837403553479152, "grad_norm": 8.741765534527023, "learning_rate": 2.282459600990708e-06, "loss": 0.8294, "step": 7655 }, { "epoch": 1.0838819282225525, "grad_norm": 9.01773570193435, "learning_rate": 2.2818885812650105e-06, "loss": 0.9135, "step": 7656 }, { "epoch": 1.0840235010971897, "grad_norm": 9.168473452900596, "learning_rate": 2.281317573005268e-06, "loss": 0.736, "step": 7657 }, { "epoch": 1.084165073971827, "grad_norm": 9.457917415756055, "learning_rate": 2.2807465762414967e-06, "loss": 0.9233, "step": 7658 }, { "epoch": 1.0843066468464642, "grad_norm": 9.67988415041871, "learning_rate": 2.280175591003715e-06, "loss": 1.0025, "step": 7659 }, { "epoch": 1.0844482197211014, "grad_norm": 8.118406466762446, "learning_rate": 2.279604617321939e-06, "loss": 0.895, "step": 7660 }, { "epoch": 1.0845897925957386, "grad_norm": 9.143286788586467, "learning_rate": 2.279033655226183e-06, "loss": 0.8672, "step": 7661 }, { "epoch": 1.0847313654703759, "grad_norm": 9.430576235174678, "learning_rate": 2.278462704746465e-06, "loss": 0.9122, "step": 7662 }, { "epoch": 1.084872938345013, "grad_norm": 9.305975145081728, "learning_rate": 2.277891765912796e-06, "loss": 0.9921, "step": 7663 }, { "epoch": 1.0850145112196503, "grad_norm": 9.753880828836516, "learning_rate": 2.2773208387551906e-06, "loss": 0.8685, "step": 7664 }, { "epoch": 1.0851560840942875, "grad_norm": 6.68562019524139, "learning_rate": 2.2767499233036635e-06, "loss": 0.8089, "step": 7665 }, { "epoch": 1.0852976569689248, "grad_norm": 8.800854936165114, "learning_rate": 2.2761790195882264e-06, "loss": 0.8695, "step": 7666 }, { "epoch": 1.085439229843562, "grad_norm": 9.024567028871454, "learning_rate": 2.275608127638891e-06, "loss": 0.894, "step": 7667 }, { "epoch": 1.0855808027181992, "grad_norm": 8.870815526377907, "learning_rate": 2.2750372474856696e-06, "loss": 0.8627, "step": 7668 }, { "epoch": 1.0857223755928365, "grad_norm": 9.763673633163975, "learning_rate": 2.274466379158572e-06, "loss": 0.8974, "step": 7669 }, { "epoch": 1.0858639484674737, "grad_norm": 8.094390490312904, "learning_rate": 2.2738955226876086e-06, "loss": 0.8197, "step": 7670 }, { "epoch": 1.086005521342111, "grad_norm": 9.82334038692653, "learning_rate": 2.273324678102789e-06, "loss": 0.8938, "step": 7671 }, { "epoch": 1.0861470942167482, "grad_norm": 9.894452985897539, "learning_rate": 2.272753845434122e-06, "loss": 0.8253, "step": 7672 }, { "epoch": 1.0862886670913854, "grad_norm": 9.08293006305088, "learning_rate": 2.272183024711617e-06, "loss": 0.9298, "step": 7673 }, { "epoch": 1.0864302399660226, "grad_norm": 9.271452488029746, "learning_rate": 2.2716122159652795e-06, "loss": 0.8707, "step": 7674 }, { "epoch": 1.0865718128406598, "grad_norm": 9.74269612933978, "learning_rate": 2.2710414192251176e-06, "loss": 0.7711, "step": 7675 }, { "epoch": 1.0867133857152969, "grad_norm": 9.085885438909319, "learning_rate": 2.2704706345211375e-06, "loss": 0.9161, "step": 7676 }, { "epoch": 1.086854958589934, "grad_norm": 9.204331116583699, "learning_rate": 2.269899861883345e-06, "loss": 0.9143, "step": 7677 }, { "epoch": 1.0869965314645713, "grad_norm": 9.655150514493263, "learning_rate": 2.269329101341745e-06, "loss": 0.842, "step": 7678 }, { "epoch": 1.0871381043392085, "grad_norm": 8.950293287874137, "learning_rate": 2.268758352926343e-06, "loss": 0.8782, "step": 7679 }, { "epoch": 1.0872796772138458, "grad_norm": 9.030115392883364, "learning_rate": 2.268187616667141e-06, "loss": 0.891, "step": 7680 }, { "epoch": 1.087421250088483, "grad_norm": 8.060025567328934, "learning_rate": 2.267616892594143e-06, "loss": 0.9261, "step": 7681 }, { "epoch": 1.0875628229631202, "grad_norm": 8.349061339836213, "learning_rate": 2.2670461807373526e-06, "loss": 0.8594, "step": 7682 }, { "epoch": 1.0877043958377575, "grad_norm": 11.401422351646525, "learning_rate": 2.2664754811267713e-06, "loss": 0.8514, "step": 7683 }, { "epoch": 1.0878459687123947, "grad_norm": 10.00034140957725, "learning_rate": 2.265904793792399e-06, "loss": 0.9744, "step": 7684 }, { "epoch": 1.087987541587032, "grad_norm": 7.457205433157548, "learning_rate": 2.2653341187642368e-06, "loss": 0.8061, "step": 7685 }, { "epoch": 1.0881291144616692, "grad_norm": 10.333751259832882, "learning_rate": 2.2647634560722857e-06, "loss": 0.994, "step": 7686 }, { "epoch": 1.0882706873363064, "grad_norm": 9.590572464675677, "learning_rate": 2.264192805746543e-06, "loss": 0.9183, "step": 7687 }, { "epoch": 1.0884122602109436, "grad_norm": 8.29946404990256, "learning_rate": 2.2636221678170097e-06, "loss": 0.8932, "step": 7688 }, { "epoch": 1.0885538330855808, "grad_norm": 8.926617165452694, "learning_rate": 2.2630515423136827e-06, "loss": 0.8025, "step": 7689 }, { "epoch": 1.088695405960218, "grad_norm": 10.049500212886638, "learning_rate": 2.2624809292665593e-06, "loss": 0.8123, "step": 7690 }, { "epoch": 1.0888369788348553, "grad_norm": 6.996370737118705, "learning_rate": 2.2619103287056366e-06, "loss": 0.7814, "step": 7691 }, { "epoch": 1.0889785517094925, "grad_norm": 7.736457806284975, "learning_rate": 2.26133974066091e-06, "loss": 0.6984, "step": 7692 }, { "epoch": 1.0891201245841298, "grad_norm": 8.020083014993224, "learning_rate": 2.2607691651623757e-06, "loss": 0.8128, "step": 7693 }, { "epoch": 1.089261697458767, "grad_norm": 8.146286531477376, "learning_rate": 2.260198602240028e-06, "loss": 0.7913, "step": 7694 }, { "epoch": 1.089403270333404, "grad_norm": 7.362903826119526, "learning_rate": 2.259628051923861e-06, "loss": 0.8334, "step": 7695 }, { "epoch": 1.0895448432080412, "grad_norm": 8.062038674574318, "learning_rate": 2.259057514243868e-06, "loss": 0.7853, "step": 7696 }, { "epoch": 1.0896864160826785, "grad_norm": 9.152742202167389, "learning_rate": 2.2584869892300416e-06, "loss": 0.9244, "step": 7697 }, { "epoch": 1.0898279889573157, "grad_norm": 8.57053683047334, "learning_rate": 2.2579164769123744e-06, "loss": 0.7925, "step": 7698 }, { "epoch": 1.089969561831953, "grad_norm": 8.884203799352957, "learning_rate": 2.257345977320857e-06, "loss": 0.8818, "step": 7699 }, { "epoch": 1.0901111347065902, "grad_norm": 9.759706018741836, "learning_rate": 2.256775490485481e-06, "loss": 1.0297, "step": 7700 }, { "epoch": 1.0902527075812274, "grad_norm": 9.527294100936645, "learning_rate": 2.256205016436236e-06, "loss": 0.8639, "step": 7701 }, { "epoch": 1.0903942804558646, "grad_norm": 8.801493812697986, "learning_rate": 2.255634555203112e-06, "loss": 0.7964, "step": 7702 }, { "epoch": 1.0905358533305018, "grad_norm": 9.033204180743182, "learning_rate": 2.2550641068160966e-06, "loss": 0.8868, "step": 7703 }, { "epoch": 1.090677426205139, "grad_norm": 9.047906319400552, "learning_rate": 2.254493671305179e-06, "loss": 0.8334, "step": 7704 }, { "epoch": 1.0908189990797763, "grad_norm": 8.548034477739147, "learning_rate": 2.253923248700346e-06, "loss": 0.816, "step": 7705 }, { "epoch": 1.0909605719544135, "grad_norm": 8.155096096686501, "learning_rate": 2.2533528390315838e-06, "loss": 0.88, "step": 7706 }, { "epoch": 1.0911021448290508, "grad_norm": 8.05565547954219, "learning_rate": 2.252782442328879e-06, "loss": 0.8314, "step": 7707 }, { "epoch": 1.091243717703688, "grad_norm": 7.903640497364001, "learning_rate": 2.2522120586222174e-06, "loss": 0.8335, "step": 7708 }, { "epoch": 1.0913852905783252, "grad_norm": 8.474800454100167, "learning_rate": 2.2516416879415825e-06, "loss": 0.7688, "step": 7709 }, { "epoch": 1.0915268634529625, "grad_norm": 8.251281638729866, "learning_rate": 2.2510713303169588e-06, "loss": 0.7184, "step": 7710 }, { "epoch": 1.0916684363275997, "grad_norm": 9.10745684939939, "learning_rate": 2.25050098577833e-06, "loss": 0.8592, "step": 7711 }, { "epoch": 1.091810009202237, "grad_norm": 7.1628395625872585, "learning_rate": 2.2499306543556783e-06, "loss": 0.7951, "step": 7712 }, { "epoch": 1.0919515820768741, "grad_norm": 7.431285922875616, "learning_rate": 2.2493603360789855e-06, "loss": 0.7957, "step": 7713 }, { "epoch": 1.0920931549515114, "grad_norm": 8.392710165574636, "learning_rate": 2.2487900309782333e-06, "loss": 0.9238, "step": 7714 }, { "epoch": 1.0922347278261486, "grad_norm": 8.673800637833613, "learning_rate": 2.2482197390834027e-06, "loss": 0.8605, "step": 7715 }, { "epoch": 1.0923763007007858, "grad_norm": 10.382071417659711, "learning_rate": 2.2476494604244712e-06, "loss": 0.9176, "step": 7716 }, { "epoch": 1.0925178735754228, "grad_norm": 9.050066818074942, "learning_rate": 2.24707919503142e-06, "loss": 0.9261, "step": 7717 }, { "epoch": 1.09265944645006, "grad_norm": 8.926259902373813, "learning_rate": 2.246508942934227e-06, "loss": 0.8073, "step": 7718 }, { "epoch": 1.0928010193246973, "grad_norm": 8.501678749808264, "learning_rate": 2.2459387041628694e-06, "loss": 0.9354, "step": 7719 }, { "epoch": 1.0929425921993345, "grad_norm": 9.685577792537364, "learning_rate": 2.2453684787473252e-06, "loss": 0.9419, "step": 7720 }, { "epoch": 1.0930841650739718, "grad_norm": 9.608474217815937, "learning_rate": 2.24479826671757e-06, "loss": 1.0289, "step": 7721 }, { "epoch": 1.093225737948609, "grad_norm": 11.393115698839736, "learning_rate": 2.2442280681035792e-06, "loss": 1.0252, "step": 7722 }, { "epoch": 1.0933673108232462, "grad_norm": 8.55738176393975, "learning_rate": 2.2436578829353286e-06, "loss": 0.8374, "step": 7723 }, { "epoch": 1.0935088836978835, "grad_norm": 7.330591498392786, "learning_rate": 2.243087711242792e-06, "loss": 0.803, "step": 7724 }, { "epoch": 1.0936504565725207, "grad_norm": 10.18071947114718, "learning_rate": 2.242517553055943e-06, "loss": 0.8063, "step": 7725 }, { "epoch": 1.093792029447158, "grad_norm": 7.779727894160421, "learning_rate": 2.2419474084047544e-06, "loss": 0.8907, "step": 7726 }, { "epoch": 1.0939336023217952, "grad_norm": 8.574014537004338, "learning_rate": 2.241377277319198e-06, "loss": 0.8428, "step": 7727 }, { "epoch": 1.0940751751964324, "grad_norm": 7.589813342305917, "learning_rate": 2.240807159829245e-06, "loss": 0.8177, "step": 7728 }, { "epoch": 1.0942167480710696, "grad_norm": 8.343385609874362, "learning_rate": 2.2402370559648663e-06, "loss": 0.861, "step": 7729 }, { "epoch": 1.0943583209457068, "grad_norm": 7.486341628200058, "learning_rate": 2.239666965756032e-06, "loss": 0.7979, "step": 7730 }, { "epoch": 1.094499893820344, "grad_norm": 6.937974793279213, "learning_rate": 2.2390968892327108e-06, "loss": 0.7545, "step": 7731 }, { "epoch": 1.0946414666949813, "grad_norm": 9.59445177922364, "learning_rate": 2.2385268264248717e-06, "loss": 0.8951, "step": 7732 }, { "epoch": 1.0947830395696185, "grad_norm": 7.823734597945027, "learning_rate": 2.2379567773624825e-06, "loss": 0.7054, "step": 7733 }, { "epoch": 1.0949246124442558, "grad_norm": 8.047596960179858, "learning_rate": 2.2373867420755104e-06, "loss": 0.8312, "step": 7734 }, { "epoch": 1.095066185318893, "grad_norm": 8.10416546374576, "learning_rate": 2.2368167205939213e-06, "loss": 0.8416, "step": 7735 }, { "epoch": 1.0952077581935302, "grad_norm": 7.19119724879618, "learning_rate": 2.236246712947682e-06, "loss": 0.8657, "step": 7736 }, { "epoch": 1.0953493310681672, "grad_norm": 7.96192837614085, "learning_rate": 2.2356767191667554e-06, "loss": 0.9018, "step": 7737 }, { "epoch": 1.0954909039428045, "grad_norm": 7.782417179185328, "learning_rate": 2.235106739281106e-06, "loss": 0.8175, "step": 7738 }, { "epoch": 1.0956324768174417, "grad_norm": 8.763265526781161, "learning_rate": 2.2345367733206984e-06, "loss": 0.8505, "step": 7739 }, { "epoch": 1.095774049692079, "grad_norm": 8.501135806323681, "learning_rate": 2.2339668213154943e-06, "loss": 0.8909, "step": 7740 }, { "epoch": 1.0959156225667162, "grad_norm": 8.120439863646647, "learning_rate": 2.2333968832954564e-06, "loss": 0.9439, "step": 7741 }, { "epoch": 1.0960571954413534, "grad_norm": 8.393189675836613, "learning_rate": 2.2328269592905455e-06, "loss": 0.8558, "step": 7742 }, { "epoch": 1.0961987683159906, "grad_norm": 9.717695663519674, "learning_rate": 2.232257049330722e-06, "loss": 0.7949, "step": 7743 }, { "epoch": 1.0963403411906278, "grad_norm": 10.138553919259417, "learning_rate": 2.231687153445946e-06, "loss": 0.8833, "step": 7744 }, { "epoch": 1.096481914065265, "grad_norm": 9.437125697513203, "learning_rate": 2.231117271666176e-06, "loss": 0.9028, "step": 7745 }, { "epoch": 1.0966234869399023, "grad_norm": 8.713253353173087, "learning_rate": 2.2305474040213707e-06, "loss": 0.8866, "step": 7746 }, { "epoch": 1.0967650598145395, "grad_norm": 9.361366633227181, "learning_rate": 2.229977550541488e-06, "loss": 0.9393, "step": 7747 }, { "epoch": 1.0969066326891768, "grad_norm": 9.076724760234105, "learning_rate": 2.2294077112564836e-06, "loss": 0.9219, "step": 7748 }, { "epoch": 1.097048205563814, "grad_norm": 10.003836468055136, "learning_rate": 2.2288378861963144e-06, "loss": 0.8954, "step": 7749 }, { "epoch": 1.0971897784384512, "grad_norm": 8.344381815673477, "learning_rate": 2.228268075390935e-06, "loss": 0.8484, "step": 7750 }, { "epoch": 1.0973313513130885, "grad_norm": 8.714588115367691, "learning_rate": 2.2276982788703003e-06, "loss": 0.8989, "step": 7751 }, { "epoch": 1.0974729241877257, "grad_norm": 6.948931923447587, "learning_rate": 2.227128496664364e-06, "loss": 0.8209, "step": 7752 }, { "epoch": 1.097614497062363, "grad_norm": 9.546071819477223, "learning_rate": 2.226558728803079e-06, "loss": 0.8239, "step": 7753 }, { "epoch": 1.0977560699370001, "grad_norm": 6.481460835661123, "learning_rate": 2.225988975316398e-06, "loss": 0.7245, "step": 7754 }, { "epoch": 1.0978976428116374, "grad_norm": 9.163362728371022, "learning_rate": 2.2254192362342718e-06, "loss": 0.9408, "step": 7755 }, { "epoch": 1.0980392156862746, "grad_norm": 9.685931072549891, "learning_rate": 2.224849511586652e-06, "loss": 0.8597, "step": 7756 }, { "epoch": 1.0981807885609118, "grad_norm": 8.248332461978016, "learning_rate": 2.224279801403489e-06, "loss": 0.8996, "step": 7757 }, { "epoch": 1.098322361435549, "grad_norm": 8.373945027420804, "learning_rate": 2.2237101057147308e-06, "loss": 0.8369, "step": 7758 }, { "epoch": 1.098463934310186, "grad_norm": 10.938499884541805, "learning_rate": 2.223140424550326e-06, "loss": 0.8131, "step": 7759 }, { "epoch": 1.0986055071848233, "grad_norm": 8.872659293964654, "learning_rate": 2.2225707579402225e-06, "loss": 0.8601, "step": 7760 }, { "epoch": 1.0987470800594605, "grad_norm": 9.060676759618188, "learning_rate": 2.222001105914367e-06, "loss": 0.8286, "step": 7761 }, { "epoch": 1.0988886529340978, "grad_norm": 8.94601824112859, "learning_rate": 2.2214314685027067e-06, "loss": 0.8984, "step": 7762 }, { "epoch": 1.099030225808735, "grad_norm": 8.445910070064746, "learning_rate": 2.2208618457351862e-06, "loss": 0.9013, "step": 7763 }, { "epoch": 1.0991717986833722, "grad_norm": 8.596935666645722, "learning_rate": 2.2202922376417505e-06, "loss": 0.8532, "step": 7764 }, { "epoch": 1.0993133715580095, "grad_norm": 9.61672975022524, "learning_rate": 2.219722644252343e-06, "loss": 0.922, "step": 7765 }, { "epoch": 1.0994549444326467, "grad_norm": 8.665240635070203, "learning_rate": 2.2191530655969077e-06, "loss": 0.8459, "step": 7766 }, { "epoch": 1.099596517307284, "grad_norm": 8.385762601236507, "learning_rate": 2.2185835017053857e-06, "loss": 0.8237, "step": 7767 }, { "epoch": 1.0997380901819211, "grad_norm": 7.060514998199134, "learning_rate": 2.2180139526077203e-06, "loss": 0.8374, "step": 7768 }, { "epoch": 1.0998796630565584, "grad_norm": 7.756307926622556, "learning_rate": 2.21744441833385e-06, "loss": 0.768, "step": 7769 }, { "epoch": 1.1000212359311956, "grad_norm": 9.229663247095484, "learning_rate": 2.2168748989137166e-06, "loss": 0.8784, "step": 7770 }, { "epoch": 1.1001628088058328, "grad_norm": 9.016472047784353, "learning_rate": 2.2163053943772585e-06, "loss": 0.8408, "step": 7771 }, { "epoch": 1.10030438168047, "grad_norm": 8.373658030019552, "learning_rate": 2.2157359047544137e-06, "loss": 0.877, "step": 7772 }, { "epoch": 1.1004459545551073, "grad_norm": 8.727537259514332, "learning_rate": 2.215166430075121e-06, "loss": 0.8958, "step": 7773 }, { "epoch": 1.1005875274297445, "grad_norm": 7.572828317692673, "learning_rate": 2.2145969703693167e-06, "loss": 0.7859, "step": 7774 }, { "epoch": 1.1007291003043818, "grad_norm": 8.426992245351995, "learning_rate": 2.2140275256669365e-06, "loss": 0.927, "step": 7775 }, { "epoch": 1.100870673179019, "grad_norm": 9.920847633818267, "learning_rate": 2.2134580959979164e-06, "loss": 0.9505, "step": 7776 }, { "epoch": 1.1010122460536562, "grad_norm": 8.8400720287858, "learning_rate": 2.2128886813921906e-06, "loss": 0.874, "step": 7777 }, { "epoch": 1.1011538189282932, "grad_norm": 9.45437386830557, "learning_rate": 2.2123192818796928e-06, "loss": 0.9105, "step": 7778 }, { "epoch": 1.1012953918029305, "grad_norm": 8.864831391273222, "learning_rate": 2.211749897490356e-06, "loss": 0.9622, "step": 7779 }, { "epoch": 1.1014369646775677, "grad_norm": 7.939980907657452, "learning_rate": 2.2111805282541114e-06, "loss": 0.8187, "step": 7780 }, { "epoch": 1.101578537552205, "grad_norm": 8.09414966436021, "learning_rate": 2.2106111742008914e-06, "loss": 0.8337, "step": 7781 }, { "epoch": 1.1017201104268421, "grad_norm": 8.897478462229614, "learning_rate": 2.2100418353606262e-06, "loss": 0.884, "step": 7782 }, { "epoch": 1.1018616833014794, "grad_norm": 8.220911242242677, "learning_rate": 2.2094725117632454e-06, "loss": 0.8173, "step": 7783 }, { "epoch": 1.1020032561761166, "grad_norm": 9.002645315720489, "learning_rate": 2.2089032034386775e-06, "loss": 0.9122, "step": 7784 }, { "epoch": 1.1021448290507538, "grad_norm": 8.355797916757178, "learning_rate": 2.208333910416852e-06, "loss": 0.8187, "step": 7785 }, { "epoch": 1.102286401925391, "grad_norm": 8.148372426783547, "learning_rate": 2.2077646327276948e-06, "loss": 0.832, "step": 7786 }, { "epoch": 1.1024279748000283, "grad_norm": 7.74646432272, "learning_rate": 2.207195370401134e-06, "loss": 0.917, "step": 7787 }, { "epoch": 1.1025695476746655, "grad_norm": 9.12416428174833, "learning_rate": 2.206626123467093e-06, "loss": 0.8775, "step": 7788 }, { "epoch": 1.1027111205493028, "grad_norm": 10.017224545634198, "learning_rate": 2.2060568919554997e-06, "loss": 0.8771, "step": 7789 }, { "epoch": 1.10285269342394, "grad_norm": 9.887755835077117, "learning_rate": 2.205487675896275e-06, "loss": 0.8567, "step": 7790 }, { "epoch": 1.1029942662985772, "grad_norm": 9.56415099807426, "learning_rate": 2.2049184753193438e-06, "loss": 0.9994, "step": 7791 }, { "epoch": 1.1031358391732144, "grad_norm": 8.77723162061222, "learning_rate": 2.2043492902546284e-06, "loss": 0.9245, "step": 7792 }, { "epoch": 1.1032774120478517, "grad_norm": 9.676423071163665, "learning_rate": 2.20378012073205e-06, "loss": 0.902, "step": 7793 }, { "epoch": 1.103418984922489, "grad_norm": 7.121236777739448, "learning_rate": 2.20321096678153e-06, "loss": 0.8052, "step": 7794 }, { "epoch": 1.1035605577971261, "grad_norm": 9.219125306846623, "learning_rate": 2.202641828432988e-06, "loss": 0.9474, "step": 7795 }, { "epoch": 1.1037021306717634, "grad_norm": 11.048542074971786, "learning_rate": 2.202072705716344e-06, "loss": 0.8932, "step": 7796 }, { "epoch": 1.1038437035464006, "grad_norm": 9.105515671801024, "learning_rate": 2.201503598661515e-06, "loss": 0.9098, "step": 7797 }, { "epoch": 1.1039852764210378, "grad_norm": 8.893916213090202, "learning_rate": 2.2009345072984198e-06, "loss": 0.877, "step": 7798 }, { "epoch": 1.104126849295675, "grad_norm": 10.052058708411115, "learning_rate": 2.2003654316569746e-06, "loss": 0.9879, "step": 7799 }, { "epoch": 1.104268422170312, "grad_norm": 8.74392751059539, "learning_rate": 2.1997963717670952e-06, "loss": 0.8519, "step": 7800 }, { "epoch": 1.1044099950449493, "grad_norm": 7.641094544381822, "learning_rate": 2.1992273276586966e-06, "loss": 0.8222, "step": 7801 }, { "epoch": 1.1045515679195865, "grad_norm": 9.718351987680077, "learning_rate": 2.1986582993616926e-06, "loss": 0.7662, "step": 7802 }, { "epoch": 1.1046931407942238, "grad_norm": 8.128881847632762, "learning_rate": 2.198089286905998e-06, "loss": 0.8902, "step": 7803 }, { "epoch": 1.104834713668861, "grad_norm": 10.000156782828574, "learning_rate": 2.197520290321524e-06, "loss": 0.9057, "step": 7804 }, { "epoch": 1.1049762865434982, "grad_norm": 9.0882602127056, "learning_rate": 2.1969513096381823e-06, "loss": 0.8461, "step": 7805 }, { "epoch": 1.1051178594181355, "grad_norm": 10.43074081759806, "learning_rate": 2.1963823448858852e-06, "loss": 0.8808, "step": 7806 }, { "epoch": 1.1052594322927727, "grad_norm": 9.58212998063689, "learning_rate": 2.195813396094541e-06, "loss": 0.9716, "step": 7807 }, { "epoch": 1.10540100516741, "grad_norm": 7.929926585835349, "learning_rate": 2.19524446329406e-06, "loss": 0.7052, "step": 7808 }, { "epoch": 1.1055425780420471, "grad_norm": 8.733193087240396, "learning_rate": 2.1946755465143505e-06, "loss": 0.8783, "step": 7809 }, { "epoch": 1.1056841509166844, "grad_norm": 10.10986628805271, "learning_rate": 2.1941066457853213e-06, "loss": 0.9122, "step": 7810 }, { "epoch": 1.1058257237913216, "grad_norm": 9.1254753093514, "learning_rate": 2.1935377611368758e-06, "loss": 0.8311, "step": 7811 }, { "epoch": 1.1059672966659588, "grad_norm": 11.311083625938267, "learning_rate": 2.192968892598922e-06, "loss": 0.8545, "step": 7812 }, { "epoch": 1.106108869540596, "grad_norm": 8.420321234512542, "learning_rate": 2.1924000402013644e-06, "loss": 0.7735, "step": 7813 }, { "epoch": 1.1062504424152333, "grad_norm": 9.538724576196094, "learning_rate": 2.1918312039741075e-06, "loss": 0.9087, "step": 7814 }, { "epoch": 1.1063920152898705, "grad_norm": 8.538439282066435, "learning_rate": 2.1912623839470545e-06, "loss": 0.9302, "step": 7815 }, { "epoch": 1.1065335881645078, "grad_norm": 9.146893717150734, "learning_rate": 2.190693580150108e-06, "loss": 0.8444, "step": 7816 }, { "epoch": 1.106675161039145, "grad_norm": 8.11928932800738, "learning_rate": 2.190124792613169e-06, "loss": 0.78, "step": 7817 }, { "epoch": 1.1068167339137822, "grad_norm": 8.606465202749199, "learning_rate": 2.1895560213661387e-06, "loss": 0.8784, "step": 7818 }, { "epoch": 1.1069583067884192, "grad_norm": 10.987677521296833, "learning_rate": 2.188987266438917e-06, "loss": 0.9744, "step": 7819 }, { "epoch": 1.1070998796630565, "grad_norm": 9.11978878512828, "learning_rate": 2.188418527861403e-06, "loss": 0.7573, "step": 7820 }, { "epoch": 1.1072414525376937, "grad_norm": 7.394488689705174, "learning_rate": 2.1878498056634946e-06, "loss": 0.7889, "step": 7821 }, { "epoch": 1.107383025412331, "grad_norm": 9.787264696120852, "learning_rate": 2.187281099875089e-06, "loss": 0.8598, "step": 7822 }, { "epoch": 1.1075245982869681, "grad_norm": 8.880949363479091, "learning_rate": 2.186712410526083e-06, "loss": 0.8054, "step": 7823 }, { "epoch": 1.1076661711616054, "grad_norm": 8.252076321195766, "learning_rate": 2.186143737646372e-06, "loss": 0.8848, "step": 7824 }, { "epoch": 1.1078077440362426, "grad_norm": 10.840038005016186, "learning_rate": 2.18557508126585e-06, "loss": 0.9736, "step": 7825 }, { "epoch": 1.1079493169108798, "grad_norm": 8.943069536619761, "learning_rate": 2.1850064414144124e-06, "loss": 0.8646, "step": 7826 }, { "epoch": 1.108090889785517, "grad_norm": 10.524327388483336, "learning_rate": 2.1844378181219507e-06, "loss": 0.9013, "step": 7827 }, { "epoch": 1.1082324626601543, "grad_norm": 8.242648924435876, "learning_rate": 2.183869211418358e-06, "loss": 0.7445, "step": 7828 }, { "epoch": 1.1083740355347915, "grad_norm": 8.704193542740889, "learning_rate": 2.1833006213335243e-06, "loss": 0.8755, "step": 7829 }, { "epoch": 1.1085156084094288, "grad_norm": 9.35303946474107, "learning_rate": 2.1827320478973414e-06, "loss": 0.8575, "step": 7830 }, { "epoch": 1.108657181284066, "grad_norm": 8.535179043876886, "learning_rate": 2.1821634911396993e-06, "loss": 0.7944, "step": 7831 }, { "epoch": 1.1087987541587032, "grad_norm": 7.8744436400536175, "learning_rate": 2.1815949510904843e-06, "loss": 0.8376, "step": 7832 }, { "epoch": 1.1089403270333404, "grad_norm": 8.74571122921316, "learning_rate": 2.1810264277795856e-06, "loss": 0.9166, "step": 7833 }, { "epoch": 1.1090818999079777, "grad_norm": 9.735127378628643, "learning_rate": 2.180457921236889e-06, "loss": 0.8322, "step": 7834 }, { "epoch": 1.109223472782615, "grad_norm": 9.717935509605303, "learning_rate": 2.1798894314922824e-06, "loss": 0.9343, "step": 7835 }, { "epoch": 1.1093650456572521, "grad_norm": 8.292852831267096, "learning_rate": 2.1793209585756483e-06, "loss": 0.7977, "step": 7836 }, { "epoch": 1.1095066185318894, "grad_norm": 8.776781350195561, "learning_rate": 2.178752502516873e-06, "loss": 0.8991, "step": 7837 }, { "epoch": 1.1096481914065266, "grad_norm": 6.820285932526291, "learning_rate": 2.1781840633458394e-06, "loss": 0.7455, "step": 7838 }, { "epoch": 1.1097897642811638, "grad_norm": 7.195129060840287, "learning_rate": 2.177615641092429e-06, "loss": 0.7967, "step": 7839 }, { "epoch": 1.109931337155801, "grad_norm": 9.059278454879664, "learning_rate": 2.1770472357865247e-06, "loss": 0.8568, "step": 7840 }, { "epoch": 1.110072910030438, "grad_norm": 8.618828818643408, "learning_rate": 2.1764788474580062e-06, "loss": 0.8666, "step": 7841 }, { "epoch": 1.1102144829050753, "grad_norm": 8.93217426960632, "learning_rate": 2.175910476136754e-06, "loss": 0.9355, "step": 7842 }, { "epoch": 1.1103560557797125, "grad_norm": 7.547262177891791, "learning_rate": 2.1753421218526458e-06, "loss": 0.8504, "step": 7843 }, { "epoch": 1.1104976286543498, "grad_norm": 9.152765125143066, "learning_rate": 2.1747737846355603e-06, "loss": 0.8417, "step": 7844 }, { "epoch": 1.110639201528987, "grad_norm": 8.800926021057386, "learning_rate": 2.1742054645153744e-06, "loss": 0.8863, "step": 7845 }, { "epoch": 1.1107807744036242, "grad_norm": 9.48870418983269, "learning_rate": 2.173637161521964e-06, "loss": 0.94, "step": 7846 }, { "epoch": 1.1109223472782614, "grad_norm": 11.43367449044775, "learning_rate": 2.1730688756852046e-06, "loss": 0.9695, "step": 7847 }, { "epoch": 1.1110639201528987, "grad_norm": 9.969514536464322, "learning_rate": 2.172500607034971e-06, "loss": 0.9525, "step": 7848 }, { "epoch": 1.111205493027536, "grad_norm": 7.831344460224407, "learning_rate": 2.1719323556011364e-06, "loss": 0.7888, "step": 7849 }, { "epoch": 1.1113470659021731, "grad_norm": 9.451315765010785, "learning_rate": 2.171364121413573e-06, "loss": 0.9007, "step": 7850 }, { "epoch": 1.1114886387768104, "grad_norm": 9.476227580643906, "learning_rate": 2.170795904502153e-06, "loss": 0.8806, "step": 7851 }, { "epoch": 1.1116302116514476, "grad_norm": 9.761270904342771, "learning_rate": 2.170227704896746e-06, "loss": 0.9582, "step": 7852 }, { "epoch": 1.1117717845260848, "grad_norm": 8.835472519678621, "learning_rate": 2.169659522627224e-06, "loss": 0.9451, "step": 7853 }, { "epoch": 1.111913357400722, "grad_norm": 9.757361347406746, "learning_rate": 2.1690913577234542e-06, "loss": 0.8953, "step": 7854 }, { "epoch": 1.1120549302753593, "grad_norm": 9.47638779594734, "learning_rate": 2.1685232102153045e-06, "loss": 0.9168, "step": 7855 }, { "epoch": 1.1121965031499965, "grad_norm": 9.31673913516918, "learning_rate": 2.1679550801326428e-06, "loss": 0.9204, "step": 7856 }, { "epoch": 1.1123380760246337, "grad_norm": 9.227528262326334, "learning_rate": 2.167386967505335e-06, "loss": 0.8881, "step": 7857 }, { "epoch": 1.112479648899271, "grad_norm": 8.058822857867778, "learning_rate": 2.1668188723632454e-06, "loss": 0.6951, "step": 7858 }, { "epoch": 1.1126212217739082, "grad_norm": 8.930482043063765, "learning_rate": 2.1662507947362397e-06, "loss": 0.9877, "step": 7859 }, { "epoch": 1.1127627946485454, "grad_norm": 9.867746514004525, "learning_rate": 2.165682734654181e-06, "loss": 0.9533, "step": 7860 }, { "epoch": 1.1129043675231824, "grad_norm": 8.704827243201578, "learning_rate": 2.165114692146932e-06, "loss": 0.9873, "step": 7861 }, { "epoch": 1.1130459403978197, "grad_norm": 8.19392760060313, "learning_rate": 2.1645466672443535e-06, "loss": 0.8551, "step": 7862 }, { "epoch": 1.113187513272457, "grad_norm": 9.454952447974396, "learning_rate": 2.163978659976308e-06, "loss": 0.8751, "step": 7863 }, { "epoch": 1.1133290861470941, "grad_norm": 9.620560848518087, "learning_rate": 2.163410670372652e-06, "loss": 0.8622, "step": 7864 }, { "epoch": 1.1134706590217314, "grad_norm": 8.103548341778376, "learning_rate": 2.1628426984632465e-06, "loss": 0.8429, "step": 7865 }, { "epoch": 1.1136122318963686, "grad_norm": 7.516575362615928, "learning_rate": 2.1622747442779495e-06, "loss": 0.8601, "step": 7866 }, { "epoch": 1.1137538047710058, "grad_norm": 8.47389115813174, "learning_rate": 2.161706807846617e-06, "loss": 0.9155, "step": 7867 }, { "epoch": 1.113895377645643, "grad_norm": 7.4101992350987045, "learning_rate": 2.161138889199105e-06, "loss": 0.9396, "step": 7868 }, { "epoch": 1.1140369505202803, "grad_norm": 8.57547284037651, "learning_rate": 2.1605709883652693e-06, "loss": 0.8887, "step": 7869 }, { "epoch": 1.1141785233949175, "grad_norm": 8.379744367319393, "learning_rate": 2.160003105374964e-06, "loss": 0.8624, "step": 7870 }, { "epoch": 1.1143200962695547, "grad_norm": 8.650535046561968, "learning_rate": 2.1594352402580413e-06, "loss": 0.8734, "step": 7871 }, { "epoch": 1.114461669144192, "grad_norm": 8.627098823268765, "learning_rate": 2.1588673930443544e-06, "loss": 0.8967, "step": 7872 }, { "epoch": 1.1146032420188292, "grad_norm": 11.143736759162948, "learning_rate": 2.1582995637637543e-06, "loss": 0.875, "step": 7873 }, { "epoch": 1.1147448148934664, "grad_norm": 7.335648633573185, "learning_rate": 2.1577317524460917e-06, "loss": 0.9041, "step": 7874 }, { "epoch": 1.1148863877681037, "grad_norm": 8.954472997888924, "learning_rate": 2.157163959121215e-06, "loss": 0.7982, "step": 7875 }, { "epoch": 1.115027960642741, "grad_norm": 8.936906701515998, "learning_rate": 2.1565961838189738e-06, "loss": 0.8175, "step": 7876 }, { "epoch": 1.1151695335173781, "grad_norm": 9.789149790113605, "learning_rate": 2.156028426569215e-06, "loss": 0.9061, "step": 7877 }, { "epoch": 1.1153111063920154, "grad_norm": 8.943396270602536, "learning_rate": 2.155460687401785e-06, "loss": 0.8322, "step": 7878 }, { "epoch": 1.1154526792666526, "grad_norm": 9.409612618559153, "learning_rate": 2.1548929663465305e-06, "loss": 1.0111, "step": 7879 }, { "epoch": 1.1155942521412898, "grad_norm": 9.748199296606897, "learning_rate": 2.154325263433295e-06, "loss": 0.7797, "step": 7880 }, { "epoch": 1.115735825015927, "grad_norm": 8.623466023575766, "learning_rate": 2.1537575786919222e-06, "loss": 0.8418, "step": 7881 }, { "epoch": 1.1158773978905643, "grad_norm": 10.429128979076202, "learning_rate": 2.1531899121522557e-06, "loss": 0.9701, "step": 7882 }, { "epoch": 1.1160189707652013, "grad_norm": 8.393827313871643, "learning_rate": 2.152622263844137e-06, "loss": 0.8718, "step": 7883 }, { "epoch": 1.1161605436398385, "grad_norm": 8.5796537444132, "learning_rate": 2.152054633797408e-06, "loss": 0.8737, "step": 7884 }, { "epoch": 1.1163021165144758, "grad_norm": 11.089138222207822, "learning_rate": 2.1514870220419063e-06, "loss": 0.8734, "step": 7885 }, { "epoch": 1.116443689389113, "grad_norm": 8.242354115614512, "learning_rate": 2.150919428607472e-06, "loss": 0.8698, "step": 7886 }, { "epoch": 1.1165852622637502, "grad_norm": 11.286445752629602, "learning_rate": 2.1503518535239427e-06, "loss": 0.9309, "step": 7887 }, { "epoch": 1.1167268351383874, "grad_norm": 10.967726439622504, "learning_rate": 2.149784296821156e-06, "loss": 0.949, "step": 7888 }, { "epoch": 1.1168684080130247, "grad_norm": 8.884187482907219, "learning_rate": 2.1492167585289476e-06, "loss": 0.8926, "step": 7889 }, { "epoch": 1.117009980887662, "grad_norm": 9.327141842556694, "learning_rate": 2.148649238677153e-06, "loss": 0.878, "step": 7890 }, { "epoch": 1.1171515537622991, "grad_norm": 8.921880985932756, "learning_rate": 2.148081737295606e-06, "loss": 0.8768, "step": 7891 }, { "epoch": 1.1172931266369364, "grad_norm": 8.631526081329755, "learning_rate": 2.147514254414139e-06, "loss": 0.8857, "step": 7892 }, { "epoch": 1.1174346995115736, "grad_norm": 7.669658768087175, "learning_rate": 2.146946790062586e-06, "loss": 0.788, "step": 7893 }, { "epoch": 1.1175762723862108, "grad_norm": 7.168589252774914, "learning_rate": 2.146379344270776e-06, "loss": 0.824, "step": 7894 }, { "epoch": 1.117717845260848, "grad_norm": 7.409591243038715, "learning_rate": 2.145811917068541e-06, "loss": 0.8192, "step": 7895 }, { "epoch": 1.1178594181354853, "grad_norm": 8.463332128816418, "learning_rate": 2.145244508485709e-06, "loss": 0.8149, "step": 7896 }, { "epoch": 1.1180009910101225, "grad_norm": 9.678361680287274, "learning_rate": 2.1446771185521086e-06, "loss": 0.8837, "step": 7897 }, { "epoch": 1.1181425638847597, "grad_norm": 8.491052293748742, "learning_rate": 2.1441097472975667e-06, "loss": 0.9312, "step": 7898 }, { "epoch": 1.118284136759397, "grad_norm": 10.289065095268276, "learning_rate": 2.143542394751911e-06, "loss": 1.0034, "step": 7899 }, { "epoch": 1.1184257096340342, "grad_norm": 8.602423268834858, "learning_rate": 2.142975060944965e-06, "loss": 0.9064, "step": 7900 }, { "epoch": 1.1185672825086714, "grad_norm": 9.155799203653233, "learning_rate": 2.1424077459065544e-06, "loss": 0.8195, "step": 7901 }, { "epoch": 1.1187088553833084, "grad_norm": 8.7741140238942, "learning_rate": 2.1418404496665015e-06, "loss": 0.7632, "step": 7902 }, { "epoch": 1.1188504282579457, "grad_norm": 10.730134573427641, "learning_rate": 2.1412731722546294e-06, "loss": 0.9848, "step": 7903 }, { "epoch": 1.118992001132583, "grad_norm": 9.255690602689507, "learning_rate": 2.1407059137007587e-06, "loss": 0.8104, "step": 7904 }, { "epoch": 1.1191335740072201, "grad_norm": 7.5633395651125745, "learning_rate": 2.14013867403471e-06, "loss": 0.8344, "step": 7905 }, { "epoch": 1.1192751468818574, "grad_norm": 7.976146182741813, "learning_rate": 2.139571453286305e-06, "loss": 0.7647, "step": 7906 }, { "epoch": 1.1194167197564946, "grad_norm": 8.651762201816306, "learning_rate": 2.139004251485358e-06, "loss": 0.8267, "step": 7907 }, { "epoch": 1.1195582926311318, "grad_norm": 9.116609242855265, "learning_rate": 2.138437068661689e-06, "loss": 0.756, "step": 7908 }, { "epoch": 1.119699865505769, "grad_norm": 9.484475953823297, "learning_rate": 2.1378699048451136e-06, "loss": 0.9381, "step": 7909 }, { "epoch": 1.1198414383804063, "grad_norm": 8.8067850831604, "learning_rate": 2.1373027600654465e-06, "loss": 0.7881, "step": 7910 }, { "epoch": 1.1199830112550435, "grad_norm": 9.024230974988473, "learning_rate": 2.1367356343525035e-06, "loss": 0.8913, "step": 7911 }, { "epoch": 1.1201245841296807, "grad_norm": 7.240892148347737, "learning_rate": 2.1361685277360973e-06, "loss": 0.7989, "step": 7912 }, { "epoch": 1.120266157004318, "grad_norm": 7.521319288226582, "learning_rate": 2.1356014402460403e-06, "loss": 0.8311, "step": 7913 }, { "epoch": 1.1204077298789552, "grad_norm": 8.597812871273344, "learning_rate": 2.1350343719121437e-06, "loss": 0.9244, "step": 7914 }, { "epoch": 1.1205493027535924, "grad_norm": 7.5342667105513765, "learning_rate": 2.134467322764218e-06, "loss": 0.8198, "step": 7915 }, { "epoch": 1.1206908756282297, "grad_norm": 9.823454166942478, "learning_rate": 2.1339002928320737e-06, "loss": 0.8976, "step": 7916 }, { "epoch": 1.120832448502867, "grad_norm": 8.486058638917394, "learning_rate": 2.133333282145517e-06, "loss": 0.8105, "step": 7917 }, { "epoch": 1.1209740213775041, "grad_norm": 9.952080161259394, "learning_rate": 2.1327662907343564e-06, "loss": 0.8724, "step": 7918 }, { "epoch": 1.1211155942521414, "grad_norm": 9.293946902992534, "learning_rate": 2.1321993186283985e-06, "loss": 0.8279, "step": 7919 }, { "epoch": 1.1212571671267786, "grad_norm": 11.081338306124634, "learning_rate": 2.1316323658574477e-06, "loss": 0.9149, "step": 7920 }, { "epoch": 1.1213987400014158, "grad_norm": 10.039199293815095, "learning_rate": 2.1310654324513087e-06, "loss": 0.8563, "step": 7921 }, { "epoch": 1.121540312876053, "grad_norm": 11.63518709498698, "learning_rate": 2.130498518439785e-06, "loss": 0.9177, "step": 7922 }, { "epoch": 1.1216818857506903, "grad_norm": 10.25509665709149, "learning_rate": 2.1299316238526786e-06, "loss": 0.9881, "step": 7923 }, { "epoch": 1.1218234586253273, "grad_norm": 9.226717127990916, "learning_rate": 2.129364748719791e-06, "loss": 0.9967, "step": 7924 }, { "epoch": 1.1219650314999645, "grad_norm": 7.970250844801106, "learning_rate": 2.128797893070922e-06, "loss": 0.7837, "step": 7925 }, { "epoch": 1.1221066043746017, "grad_norm": 8.809481922379014, "learning_rate": 2.1282310569358704e-06, "loss": 0.9045, "step": 7926 }, { "epoch": 1.122248177249239, "grad_norm": 10.132222554747097, "learning_rate": 2.127664240344437e-06, "loss": 0.9203, "step": 7927 }, { "epoch": 1.1223897501238762, "grad_norm": 8.50255052784255, "learning_rate": 2.1270974433264152e-06, "loss": 0.7987, "step": 7928 }, { "epoch": 1.1225313229985134, "grad_norm": 9.458020265302205, "learning_rate": 2.126530665911603e-06, "loss": 0.8865, "step": 7929 }, { "epoch": 1.1226728958731507, "grad_norm": 7.407442536094162, "learning_rate": 2.125963908129795e-06, "loss": 0.7801, "step": 7930 }, { "epoch": 1.122814468747788, "grad_norm": 8.691666516468294, "learning_rate": 2.125397170010786e-06, "loss": 0.8775, "step": 7931 }, { "epoch": 1.1229560416224251, "grad_norm": 9.018870174588068, "learning_rate": 2.124830451584368e-06, "loss": 0.8435, "step": 7932 }, { "epoch": 1.1230976144970624, "grad_norm": 9.656640696106043, "learning_rate": 2.124263752880333e-06, "loss": 0.9238, "step": 7933 }, { "epoch": 1.1232391873716996, "grad_norm": 10.10874066728482, "learning_rate": 2.123697073928473e-06, "loss": 0.8447, "step": 7934 }, { "epoch": 1.1233807602463368, "grad_norm": 9.105096718590243, "learning_rate": 2.123130414758577e-06, "loss": 0.8327, "step": 7935 }, { "epoch": 1.123522333120974, "grad_norm": 10.708886237410535, "learning_rate": 2.122563775400434e-06, "loss": 0.7364, "step": 7936 }, { "epoch": 1.1236639059956113, "grad_norm": 9.516593495458997, "learning_rate": 2.1219971558838333e-06, "loss": 0.943, "step": 7937 }, { "epoch": 1.1238054788702485, "grad_norm": 7.756449323209404, "learning_rate": 2.1214305562385592e-06, "loss": 0.7211, "step": 7938 }, { "epoch": 1.1239470517448857, "grad_norm": 9.824380279487178, "learning_rate": 2.120863976494398e-06, "loss": 0.846, "step": 7939 }, { "epoch": 1.124088624619523, "grad_norm": 10.254233997738256, "learning_rate": 2.1202974166811354e-06, "loss": 0.9736, "step": 7940 }, { "epoch": 1.1242301974941602, "grad_norm": 9.319550782866822, "learning_rate": 2.119730876828554e-06, "loss": 0.9137, "step": 7941 }, { "epoch": 1.1243717703687974, "grad_norm": 8.078663319224946, "learning_rate": 2.1191643569664373e-06, "loss": 0.8223, "step": 7942 }, { "epoch": 1.1245133432434347, "grad_norm": 10.777731324016004, "learning_rate": 2.1185978571245665e-06, "loss": 0.9167, "step": 7943 }, { "epoch": 1.1246549161180717, "grad_norm": 8.365413816149784, "learning_rate": 2.1180313773327214e-06, "loss": 0.8418, "step": 7944 }, { "epoch": 1.124796488992709, "grad_norm": 8.967177409358976, "learning_rate": 2.1174649176206826e-06, "loss": 0.8432, "step": 7945 }, { "epoch": 1.1249380618673461, "grad_norm": 8.829888253806331, "learning_rate": 2.116898478018227e-06, "loss": 0.8734, "step": 7946 }, { "epoch": 1.1250796347419834, "grad_norm": 9.948667764852244, "learning_rate": 2.1163320585551335e-06, "loss": 0.8454, "step": 7947 }, { "epoch": 1.1252212076166206, "grad_norm": 8.558085619329445, "learning_rate": 2.115765659261178e-06, "loss": 1.0003, "step": 7948 }, { "epoch": 1.1253627804912578, "grad_norm": 7.767670206661269, "learning_rate": 2.115199280166135e-06, "loss": 0.7709, "step": 7949 }, { "epoch": 1.125504353365895, "grad_norm": 8.454953826110474, "learning_rate": 2.1146329212997784e-06, "loss": 0.8411, "step": 7950 }, { "epoch": 1.1256459262405323, "grad_norm": 8.699493253134237, "learning_rate": 2.1140665826918823e-06, "loss": 0.7353, "step": 7951 }, { "epoch": 1.1257874991151695, "grad_norm": 8.210816241819908, "learning_rate": 2.113500264372218e-06, "loss": 0.9403, "step": 7952 }, { "epoch": 1.1259290719898067, "grad_norm": 9.773131880338477, "learning_rate": 2.1129339663705565e-06, "loss": 0.9472, "step": 7953 }, { "epoch": 1.126070644864444, "grad_norm": 9.431550632789245, "learning_rate": 2.1123676887166678e-06, "loss": 0.9519, "step": 7954 }, { "epoch": 1.1262122177390812, "grad_norm": 7.919291118147182, "learning_rate": 2.11180143144032e-06, "loss": 0.8554, "step": 7955 }, { "epoch": 1.1263537906137184, "grad_norm": 9.374832355271913, "learning_rate": 2.1112351945712824e-06, "loss": 0.9529, "step": 7956 }, { "epoch": 1.1264953634883557, "grad_norm": 8.857653361432082, "learning_rate": 2.1106689781393203e-06, "loss": 0.9773, "step": 7957 }, { "epoch": 1.126636936362993, "grad_norm": 7.997143712360829, "learning_rate": 2.1101027821742013e-06, "loss": 0.9153, "step": 7958 }, { "epoch": 1.1267785092376301, "grad_norm": 8.618415862557368, "learning_rate": 2.1095366067056868e-06, "loss": 0.8238, "step": 7959 }, { "epoch": 1.1269200821122674, "grad_norm": 7.69671079063186, "learning_rate": 2.108970451763542e-06, "loss": 0.7965, "step": 7960 }, { "epoch": 1.1270616549869046, "grad_norm": 10.493971047450367, "learning_rate": 2.1084043173775284e-06, "loss": 0.9861, "step": 7961 }, { "epoch": 1.1272032278615418, "grad_norm": 7.5503535813153055, "learning_rate": 2.1078382035774085e-06, "loss": 0.7353, "step": 7962 }, { "epoch": 1.127344800736179, "grad_norm": 8.28816826659136, "learning_rate": 2.1072721103929415e-06, "loss": 0.9655, "step": 7963 }, { "epoch": 1.1274863736108163, "grad_norm": 8.481934591619055, "learning_rate": 2.106706037853887e-06, "loss": 0.8359, "step": 7964 }, { "epoch": 1.1276279464854535, "grad_norm": 12.404785684796904, "learning_rate": 2.106139985990003e-06, "loss": 1.0528, "step": 7965 }, { "epoch": 1.1277695193600905, "grad_norm": 10.05958695035817, "learning_rate": 2.105573954831046e-06, "loss": 0.8847, "step": 7966 }, { "epoch": 1.1279110922347277, "grad_norm": 8.74303922347837, "learning_rate": 2.105007944406772e-06, "loss": 0.8652, "step": 7967 }, { "epoch": 1.128052665109365, "grad_norm": 7.524168111753338, "learning_rate": 2.104441954746936e-06, "loss": 0.6989, "step": 7968 }, { "epoch": 1.1281942379840022, "grad_norm": 9.423108173600003, "learning_rate": 2.1038759858812924e-06, "loss": 0.8917, "step": 7969 }, { "epoch": 1.1283358108586394, "grad_norm": 8.323697228207847, "learning_rate": 2.103310037839592e-06, "loss": 0.8438, "step": 7970 }, { "epoch": 1.1284773837332767, "grad_norm": 7.68916364826912, "learning_rate": 2.1027441106515872e-06, "loss": 0.818, "step": 7971 }, { "epoch": 1.128618956607914, "grad_norm": 8.615336436357033, "learning_rate": 2.1021782043470282e-06, "loss": 0.9294, "step": 7972 }, { "epoch": 1.1287605294825511, "grad_norm": 9.476418792062674, "learning_rate": 2.1016123189556644e-06, "loss": 0.884, "step": 7973 }, { "epoch": 1.1289021023571884, "grad_norm": 9.25696487087455, "learning_rate": 2.101046454507244e-06, "loss": 0.8369, "step": 7974 }, { "epoch": 1.1290436752318256, "grad_norm": 8.517114351553666, "learning_rate": 2.100480611031514e-06, "loss": 0.993, "step": 7975 }, { "epoch": 1.1291852481064628, "grad_norm": 9.512500769957262, "learning_rate": 2.0999147885582204e-06, "loss": 0.9108, "step": 7976 }, { "epoch": 1.1293268209811, "grad_norm": 8.873847564234001, "learning_rate": 2.099348987117108e-06, "loss": 0.8646, "step": 7977 }, { "epoch": 1.1294683938557373, "grad_norm": 8.17209346326054, "learning_rate": 2.09878320673792e-06, "loss": 0.8748, "step": 7978 }, { "epoch": 1.1296099667303745, "grad_norm": 9.741796710536207, "learning_rate": 2.0982174474504004e-06, "loss": 0.8406, "step": 7979 }, { "epoch": 1.1297515396050117, "grad_norm": 9.182795766984286, "learning_rate": 2.097651709284291e-06, "loss": 0.8714, "step": 7980 }, { "epoch": 1.129893112479649, "grad_norm": 8.429291978383235, "learning_rate": 2.09708599226933e-06, "loss": 0.854, "step": 7981 }, { "epoch": 1.1300346853542862, "grad_norm": 8.4085154830918, "learning_rate": 2.096520296435258e-06, "loss": 1.0418, "step": 7982 }, { "epoch": 1.1301762582289234, "grad_norm": 9.964151595424548, "learning_rate": 2.0959546218118133e-06, "loss": 0.8684, "step": 7983 }, { "epoch": 1.1303178311035604, "grad_norm": 7.951819771794722, "learning_rate": 2.095388968428732e-06, "loss": 0.8777, "step": 7984 }, { "epoch": 1.1304594039781977, "grad_norm": 11.281034599635957, "learning_rate": 2.094823336315752e-06, "loss": 0.9342, "step": 7985 }, { "epoch": 1.130600976852835, "grad_norm": 8.454115042819478, "learning_rate": 2.0942577255026068e-06, "loss": 0.8177, "step": 7986 }, { "epoch": 1.1307425497274721, "grad_norm": 9.622754119823801, "learning_rate": 2.0936921360190305e-06, "loss": 0.896, "step": 7987 }, { "epoch": 1.1308841226021094, "grad_norm": 9.614976692349993, "learning_rate": 2.0931265678947555e-06, "loss": 0.9804, "step": 7988 }, { "epoch": 1.1310256954767466, "grad_norm": 8.340286050185915, "learning_rate": 2.0925610211595137e-06, "loss": 0.849, "step": 7989 }, { "epoch": 1.1311672683513838, "grad_norm": 10.114426731792216, "learning_rate": 2.0919954958430357e-06, "loss": 0.9461, "step": 7990 }, { "epoch": 1.131308841226021, "grad_norm": 6.773093385326062, "learning_rate": 2.0914299919750497e-06, "loss": 0.8306, "step": 7991 }, { "epoch": 1.1314504141006583, "grad_norm": 7.828759447530923, "learning_rate": 2.090864509585284e-06, "loss": 0.8051, "step": 7992 }, { "epoch": 1.1315919869752955, "grad_norm": 7.8236904718219105, "learning_rate": 2.0902990487034664e-06, "loss": 0.8764, "step": 7993 }, { "epoch": 1.1317335598499327, "grad_norm": 9.967825054439809, "learning_rate": 2.0897336093593223e-06, "loss": 0.9397, "step": 7994 }, { "epoch": 1.13187513272457, "grad_norm": 7.6964067174938195, "learning_rate": 2.0891681915825763e-06, "loss": 0.8846, "step": 7995 }, { "epoch": 1.1320167055992072, "grad_norm": 9.362339677443874, "learning_rate": 2.0886027954029525e-06, "loss": 0.8639, "step": 7996 }, { "epoch": 1.1321582784738444, "grad_norm": 8.87764346016226, "learning_rate": 2.0880374208501724e-06, "loss": 0.8642, "step": 7997 }, { "epoch": 1.1322998513484817, "grad_norm": 8.001935247951106, "learning_rate": 2.0874720679539585e-06, "loss": 0.8451, "step": 7998 }, { "epoch": 1.1324414242231189, "grad_norm": 9.091071189822351, "learning_rate": 2.0869067367440297e-06, "loss": 0.9501, "step": 7999 }, { "epoch": 1.1325829970977561, "grad_norm": 11.050911733720634, "learning_rate": 2.0863414272501067e-06, "loss": 0.8132, "step": 8000 }, { "epoch": 1.1327245699723933, "grad_norm": 8.491975923796065, "learning_rate": 2.0857761395019064e-06, "loss": 0.9962, "step": 8001 }, { "epoch": 1.1328661428470306, "grad_norm": 8.262539206453642, "learning_rate": 2.0852108735291448e-06, "loss": 0.9254, "step": 8002 }, { "epoch": 1.1330077157216678, "grad_norm": 9.851225227796263, "learning_rate": 2.0846456293615384e-06, "loss": 0.9076, "step": 8003 }, { "epoch": 1.133149288596305, "grad_norm": 8.684016345711132, "learning_rate": 2.084080407028802e-06, "loss": 0.8356, "step": 8004 }, { "epoch": 1.1332908614709423, "grad_norm": 8.624189698123455, "learning_rate": 2.083515206560648e-06, "loss": 0.828, "step": 8005 }, { "epoch": 1.1334324343455795, "grad_norm": 7.790986247281038, "learning_rate": 2.0829500279867895e-06, "loss": 0.8483, "step": 8006 }, { "epoch": 1.1335740072202167, "grad_norm": 6.6064235870162, "learning_rate": 2.082384871336936e-06, "loss": 0.7532, "step": 8007 }, { "epoch": 1.1337155800948537, "grad_norm": 8.274652350461384, "learning_rate": 2.081819736640799e-06, "loss": 0.8876, "step": 8008 }, { "epoch": 1.133857152969491, "grad_norm": 8.144213216962081, "learning_rate": 2.0812546239280873e-06, "loss": 0.9286, "step": 8009 }, { "epoch": 1.1339987258441282, "grad_norm": 8.002279910416059, "learning_rate": 2.0806895332285078e-06, "loss": 0.8215, "step": 8010 }, { "epoch": 1.1341402987187654, "grad_norm": 8.304748592349373, "learning_rate": 2.080124464571767e-06, "loss": 0.9071, "step": 8011 }, { "epoch": 1.1342818715934027, "grad_norm": 8.132948992460536, "learning_rate": 2.0795594179875697e-06, "loss": 0.9169, "step": 8012 }, { "epoch": 1.1344234444680399, "grad_norm": 7.714693184844054, "learning_rate": 2.0789943935056196e-06, "loss": 0.7967, "step": 8013 }, { "epoch": 1.1345650173426771, "grad_norm": 8.466718704584355, "learning_rate": 2.078429391155621e-06, "loss": 0.9282, "step": 8014 }, { "epoch": 1.1347065902173143, "grad_norm": 9.682156177959254, "learning_rate": 2.0778644109672747e-06, "loss": 0.9127, "step": 8015 }, { "epoch": 1.1348481630919516, "grad_norm": 8.182508038809114, "learning_rate": 2.077299452970282e-06, "loss": 0.8181, "step": 8016 }, { "epoch": 1.1349897359665888, "grad_norm": 10.484537274032933, "learning_rate": 2.0767345171943412e-06, "loss": 0.9111, "step": 8017 }, { "epoch": 1.135131308841226, "grad_norm": 7.203525358315234, "learning_rate": 2.0761696036691515e-06, "loss": 0.8852, "step": 8018 }, { "epoch": 1.1352728817158633, "grad_norm": 7.653670144905104, "learning_rate": 2.07560471242441e-06, "loss": 0.8321, "step": 8019 }, { "epoch": 1.1354144545905005, "grad_norm": 7.853334202315709, "learning_rate": 2.075039843489812e-06, "loss": 0.9694, "step": 8020 }, { "epoch": 1.1355560274651377, "grad_norm": 9.317170680771545, "learning_rate": 2.0744749968950527e-06, "loss": 0.8948, "step": 8021 }, { "epoch": 1.135697600339775, "grad_norm": 9.118463971134124, "learning_rate": 2.073910172669826e-06, "loss": 0.9414, "step": 8022 }, { "epoch": 1.1358391732144122, "grad_norm": 7.085122933300485, "learning_rate": 2.0733453708438233e-06, "loss": 0.8173, "step": 8023 }, { "epoch": 1.1359807460890494, "grad_norm": 7.73287153947961, "learning_rate": 2.072780591446736e-06, "loss": 0.8074, "step": 8024 }, { "epoch": 1.1361223189636867, "grad_norm": 9.649750037241898, "learning_rate": 2.072215834508255e-06, "loss": 0.9969, "step": 8025 }, { "epoch": 1.1362638918383237, "grad_norm": 8.868623833971993, "learning_rate": 2.0716511000580684e-06, "loss": 0.8132, "step": 8026 }, { "epoch": 1.1364054647129609, "grad_norm": 8.582338392745747, "learning_rate": 2.071086388125864e-06, "loss": 0.8598, "step": 8027 }, { "epoch": 1.1365470375875981, "grad_norm": 9.984196955327626, "learning_rate": 2.0705216987413284e-06, "loss": 0.9671, "step": 8028 }, { "epoch": 1.1366886104622353, "grad_norm": 7.672705580119014, "learning_rate": 2.069957031934147e-06, "loss": 0.8515, "step": 8029 }, { "epoch": 1.1368301833368726, "grad_norm": 9.930363617815374, "learning_rate": 2.0693923877340032e-06, "loss": 0.8363, "step": 8030 }, { "epoch": 1.1369717562115098, "grad_norm": 8.030784504384323, "learning_rate": 2.0688277661705807e-06, "loss": 0.8196, "step": 8031 }, { "epoch": 1.137113329086147, "grad_norm": 8.614950323686715, "learning_rate": 2.0682631672735616e-06, "loss": 0.8615, "step": 8032 }, { "epoch": 1.1372549019607843, "grad_norm": 8.978926676812664, "learning_rate": 2.0676985910726265e-06, "loss": 0.8403, "step": 8033 }, { "epoch": 1.1373964748354215, "grad_norm": 10.201490285871687, "learning_rate": 2.0671340375974536e-06, "loss": 0.9571, "step": 8034 }, { "epoch": 1.1375380477100587, "grad_norm": 8.626056108914469, "learning_rate": 2.066569506877721e-06, "loss": 0.7246, "step": 8035 }, { "epoch": 1.137679620584696, "grad_norm": 7.3521597251767945, "learning_rate": 2.066004998943106e-06, "loss": 0.849, "step": 8036 }, { "epoch": 1.1378211934593332, "grad_norm": 7.929643482405539, "learning_rate": 2.065440513823285e-06, "loss": 0.8479, "step": 8037 }, { "epoch": 1.1379627663339704, "grad_norm": 6.809653711239081, "learning_rate": 2.064876051547932e-06, "loss": 0.8185, "step": 8038 }, { "epoch": 1.1381043392086077, "grad_norm": 8.087259289609973, "learning_rate": 2.064311612146721e-06, "loss": 0.8213, "step": 8039 }, { "epoch": 1.1382459120832449, "grad_norm": 7.102157920325634, "learning_rate": 2.0637471956493236e-06, "loss": 0.7724, "step": 8040 }, { "epoch": 1.1383874849578821, "grad_norm": 7.751409094902873, "learning_rate": 2.0631828020854106e-06, "loss": 0.8683, "step": 8041 }, { "epoch": 1.1385290578325193, "grad_norm": 9.287970065598463, "learning_rate": 2.062618431484652e-06, "loss": 0.9541, "step": 8042 }, { "epoch": 1.1386706307071566, "grad_norm": 10.611151073262945, "learning_rate": 2.062054083876717e-06, "loss": 0.8394, "step": 8043 }, { "epoch": 1.1388122035817938, "grad_norm": 7.640393087849946, "learning_rate": 2.0614897592912716e-06, "loss": 0.8129, "step": 8044 }, { "epoch": 1.138953776456431, "grad_norm": 9.921790031009202, "learning_rate": 2.060925457757983e-06, "loss": 0.8862, "step": 8045 }, { "epoch": 1.1390953493310683, "grad_norm": 8.719915059227166, "learning_rate": 2.060361179306515e-06, "loss": 0.7641, "step": 8046 }, { "epoch": 1.1392369222057055, "grad_norm": 8.156819641566134, "learning_rate": 2.0597969239665325e-06, "loss": 0.8531, "step": 8047 }, { "epoch": 1.1393784950803427, "grad_norm": 8.502177240111243, "learning_rate": 2.0592326917676975e-06, "loss": 0.855, "step": 8048 }, { "epoch": 1.1395200679549797, "grad_norm": 8.385813095167924, "learning_rate": 2.0586684827396708e-06, "loss": 0.8162, "step": 8049 }, { "epoch": 1.139661640829617, "grad_norm": 7.556723161622784, "learning_rate": 2.0581042969121136e-06, "loss": 0.832, "step": 8050 }, { "epoch": 1.1398032137042542, "grad_norm": 10.93061516010514, "learning_rate": 2.0575401343146832e-06, "loss": 0.9206, "step": 8051 }, { "epoch": 1.1399447865788914, "grad_norm": 7.849103946934031, "learning_rate": 2.056975994977038e-06, "loss": 0.8285, "step": 8052 }, { "epoch": 1.1400863594535287, "grad_norm": 8.986772353383776, "learning_rate": 2.0564118789288347e-06, "loss": 0.7732, "step": 8053 }, { "epoch": 1.1402279323281659, "grad_norm": 8.140135766849001, "learning_rate": 2.0558477861997293e-06, "loss": 0.8827, "step": 8054 }, { "epoch": 1.1403695052028031, "grad_norm": 7.7146355786787755, "learning_rate": 2.0552837168193738e-06, "loss": 0.908, "step": 8055 }, { "epoch": 1.1405110780774403, "grad_norm": 7.329503822342839, "learning_rate": 2.0547196708174215e-06, "loss": 0.8084, "step": 8056 }, { "epoch": 1.1406526509520776, "grad_norm": 9.623818461239923, "learning_rate": 2.054155648223524e-06, "loss": 0.9345, "step": 8057 }, { "epoch": 1.1407942238267148, "grad_norm": 9.150645549077057, "learning_rate": 2.0535916490673313e-06, "loss": 0.8154, "step": 8058 }, { "epoch": 1.140935796701352, "grad_norm": 9.92258048365393, "learning_rate": 2.0530276733784933e-06, "loss": 0.9362, "step": 8059 }, { "epoch": 1.1410773695759893, "grad_norm": 8.821601658585243, "learning_rate": 2.052463721186657e-06, "loss": 0.8547, "step": 8060 }, { "epoch": 1.1412189424506265, "grad_norm": 10.00398594572414, "learning_rate": 2.0518997925214694e-06, "loss": 0.8274, "step": 8061 }, { "epoch": 1.1413605153252637, "grad_norm": 8.165421631036432, "learning_rate": 2.0513358874125754e-06, "loss": 0.7726, "step": 8062 }, { "epoch": 1.141502088199901, "grad_norm": 7.410471811447751, "learning_rate": 2.0507720058896195e-06, "loss": 0.7687, "step": 8063 }, { "epoch": 1.1416436610745382, "grad_norm": 9.063661698171785, "learning_rate": 2.0502081479822447e-06, "loss": 0.9192, "step": 8064 }, { "epoch": 1.1417852339491754, "grad_norm": 8.838915902200442, "learning_rate": 2.0496443137200915e-06, "loss": 0.9316, "step": 8065 }, { "epoch": 1.1419268068238126, "grad_norm": 7.847637778412806, "learning_rate": 2.0490805031328013e-06, "loss": 0.8063, "step": 8066 }, { "epoch": 1.1420683796984497, "grad_norm": 7.713794430221663, "learning_rate": 2.0485167162500124e-06, "loss": 0.8398, "step": 8067 }, { "epoch": 1.1422099525730869, "grad_norm": 10.1163632325906, "learning_rate": 2.047952953101363e-06, "loss": 0.9701, "step": 8068 }, { "epoch": 1.1423515254477241, "grad_norm": 10.07908524071261, "learning_rate": 2.0473892137164906e-06, "loss": 0.8781, "step": 8069 }, { "epoch": 1.1424930983223613, "grad_norm": 10.243768798364005, "learning_rate": 2.0468254981250293e-06, "loss": 0.8568, "step": 8070 }, { "epoch": 1.1426346711969986, "grad_norm": 9.680030748933238, "learning_rate": 2.0462618063566135e-06, "loss": 0.8278, "step": 8071 }, { "epoch": 1.1427762440716358, "grad_norm": 7.646873244023093, "learning_rate": 2.045698138440876e-06, "loss": 0.9223, "step": 8072 }, { "epoch": 1.142917816946273, "grad_norm": 8.857199426988936, "learning_rate": 2.045134494407449e-06, "loss": 0.8189, "step": 8073 }, { "epoch": 1.1430593898209103, "grad_norm": 9.500986148446316, "learning_rate": 2.044570874285963e-06, "loss": 0.99, "step": 8074 }, { "epoch": 1.1432009626955475, "grad_norm": 8.498645001287567, "learning_rate": 2.044007278106046e-06, "loss": 0.8778, "step": 8075 }, { "epoch": 1.1433425355701847, "grad_norm": 9.83992748823043, "learning_rate": 2.043443705897326e-06, "loss": 0.835, "step": 8076 }, { "epoch": 1.143484108444822, "grad_norm": 9.265781857994575, "learning_rate": 2.042880157689431e-06, "loss": 0.8772, "step": 8077 }, { "epoch": 1.1436256813194592, "grad_norm": 8.77235651872412, "learning_rate": 2.0423166335119844e-06, "loss": 0.9096, "step": 8078 }, { "epoch": 1.1437672541940964, "grad_norm": 9.944411843055217, "learning_rate": 2.0417531333946113e-06, "loss": 0.9546, "step": 8079 }, { "epoch": 1.1439088270687336, "grad_norm": 9.074524367297483, "learning_rate": 2.041189657366934e-06, "loss": 0.9462, "step": 8080 }, { "epoch": 1.1440503999433709, "grad_norm": 8.963186206378557, "learning_rate": 2.040626205458574e-06, "loss": 0.8703, "step": 8081 }, { "epoch": 1.144191972818008, "grad_norm": 9.216011494073058, "learning_rate": 2.0400627776991526e-06, "loss": 0.8739, "step": 8082 }, { "epoch": 1.1443335456926453, "grad_norm": 10.240198054186493, "learning_rate": 2.039499374118288e-06, "loss": 0.8442, "step": 8083 }, { "epoch": 1.1444751185672826, "grad_norm": 8.288711813682115, "learning_rate": 2.0389359947455978e-06, "loss": 0.8105, "step": 8084 }, { "epoch": 1.1446166914419198, "grad_norm": 8.500962146832263, "learning_rate": 2.0383726396106983e-06, "loss": 0.8794, "step": 8085 }, { "epoch": 1.144758264316557, "grad_norm": 12.092315352163617, "learning_rate": 2.0378093087432067e-06, "loss": 0.9623, "step": 8086 }, { "epoch": 1.1448998371911943, "grad_norm": 8.990365699738753, "learning_rate": 2.037246002172733e-06, "loss": 1.0559, "step": 8087 }, { "epoch": 1.1450414100658315, "grad_norm": 8.062540216863116, "learning_rate": 2.0366827199288923e-06, "loss": 0.7681, "step": 8088 }, { "epoch": 1.1451829829404687, "grad_norm": 7.772176454324631, "learning_rate": 2.036119462041296e-06, "loss": 0.8716, "step": 8089 }, { "epoch": 1.145324555815106, "grad_norm": 8.420443099950958, "learning_rate": 2.0355562285395537e-06, "loss": 0.8502, "step": 8090 }, { "epoch": 1.145466128689743, "grad_norm": 10.981625903431157, "learning_rate": 2.0349930194532734e-06, "loss": 0.9853, "step": 8091 }, { "epoch": 1.1456077015643802, "grad_norm": 7.9091598523681705, "learning_rate": 2.034429834812064e-06, "loss": 0.8291, "step": 8092 }, { "epoch": 1.1457492744390174, "grad_norm": 8.44841281046998, "learning_rate": 2.033866674645531e-06, "loss": 0.8244, "step": 8093 }, { "epoch": 1.1458908473136546, "grad_norm": 8.958423371564843, "learning_rate": 2.0333035389832795e-06, "loss": 0.8818, "step": 8094 }, { "epoch": 1.1460324201882919, "grad_norm": 10.605107613556648, "learning_rate": 2.0327404278549127e-06, "loss": 0.829, "step": 8095 }, { "epoch": 1.146173993062929, "grad_norm": 7.718498689212875, "learning_rate": 2.032177341290034e-06, "loss": 0.8715, "step": 8096 }, { "epoch": 1.1463155659375663, "grad_norm": 10.491568677069415, "learning_rate": 2.031614279318243e-06, "loss": 0.8676, "step": 8097 }, { "epoch": 1.1464571388122036, "grad_norm": 9.15279930110929, "learning_rate": 2.03105124196914e-06, "loss": 0.8847, "step": 8098 }, { "epoch": 1.1465987116868408, "grad_norm": 11.151114342262783, "learning_rate": 2.030488229272323e-06, "loss": 0.9247, "step": 8099 }, { "epoch": 1.146740284561478, "grad_norm": 7.136544228076157, "learning_rate": 2.0299252412573907e-06, "loss": 0.8588, "step": 8100 }, { "epoch": 1.1468818574361153, "grad_norm": 7.52195400296393, "learning_rate": 2.0293622779539372e-06, "loss": 0.9547, "step": 8101 }, { "epoch": 1.1470234303107525, "grad_norm": 11.066618244417569, "learning_rate": 2.0287993393915585e-06, "loss": 0.9031, "step": 8102 }, { "epoch": 1.1471650031853897, "grad_norm": 7.5819932640889, "learning_rate": 2.0282364255998465e-06, "loss": 0.7822, "step": 8103 }, { "epoch": 1.147306576060027, "grad_norm": 8.660804184010255, "learning_rate": 2.027673536608394e-06, "loss": 1.0392, "step": 8104 }, { "epoch": 1.1474481489346642, "grad_norm": 9.627237035210513, "learning_rate": 2.0271106724467915e-06, "loss": 0.8704, "step": 8105 }, { "epoch": 1.1475897218093014, "grad_norm": 9.348416493191548, "learning_rate": 2.0265478331446285e-06, "loss": 0.9261, "step": 8106 }, { "epoch": 1.1477312946839386, "grad_norm": 9.654671252919652, "learning_rate": 2.025985018731494e-06, "loss": 0.836, "step": 8107 }, { "epoch": 1.1478728675585759, "grad_norm": 7.887089486782165, "learning_rate": 2.0254222292369725e-06, "loss": 0.8455, "step": 8108 }, { "epoch": 1.1480144404332129, "grad_norm": 8.22504233491432, "learning_rate": 2.024859464690651e-06, "loss": 0.8866, "step": 8109 }, { "epoch": 1.14815601330785, "grad_norm": 10.172192265357475, "learning_rate": 2.0242967251221118e-06, "loss": 0.8229, "step": 8110 }, { "epoch": 1.1482975861824873, "grad_norm": 8.53860279740975, "learning_rate": 2.02373401056094e-06, "loss": 0.7968, "step": 8111 }, { "epoch": 1.1484391590571246, "grad_norm": 7.111767443859304, "learning_rate": 2.0231713210367163e-06, "loss": 0.8814, "step": 8112 }, { "epoch": 1.1485807319317618, "grad_norm": 9.008784351709473, "learning_rate": 2.0226086565790207e-06, "loss": 0.872, "step": 8113 }, { "epoch": 1.148722304806399, "grad_norm": 8.427030722725402, "learning_rate": 2.022046017217432e-06, "loss": 0.9024, "step": 8114 }, { "epoch": 1.1488638776810363, "grad_norm": 9.082851105468016, "learning_rate": 2.0214834029815276e-06, "loss": 0.9152, "step": 8115 }, { "epoch": 1.1490054505556735, "grad_norm": 9.90291541742372, "learning_rate": 2.020920813900884e-06, "loss": 0.9314, "step": 8116 }, { "epoch": 1.1491470234303107, "grad_norm": 8.22585531964586, "learning_rate": 2.020358250005077e-06, "loss": 0.9333, "step": 8117 }, { "epoch": 1.149288596304948, "grad_norm": 9.583407017521303, "learning_rate": 2.019795711323678e-06, "loss": 0.8286, "step": 8118 }, { "epoch": 1.1494301691795852, "grad_norm": 9.128401031029519, "learning_rate": 2.0192331978862604e-06, "loss": 0.8947, "step": 8119 }, { "epoch": 1.1495717420542224, "grad_norm": 7.924869610890098, "learning_rate": 2.0186707097223952e-06, "loss": 0.8082, "step": 8120 }, { "epoch": 1.1497133149288596, "grad_norm": 8.424787877522496, "learning_rate": 2.018108246861652e-06, "loss": 0.8806, "step": 8121 }, { "epoch": 1.1498548878034969, "grad_norm": 9.30382323074683, "learning_rate": 2.017545809333599e-06, "loss": 0.9398, "step": 8122 }, { "epoch": 1.149996460678134, "grad_norm": 9.331399671883409, "learning_rate": 2.0169833971678033e-06, "loss": 0.8883, "step": 8123 }, { "epoch": 1.1501380335527713, "grad_norm": 10.196980377223277, "learning_rate": 2.0164210103938297e-06, "loss": 0.949, "step": 8124 }, { "epoch": 1.1502796064274086, "grad_norm": 9.810672662753717, "learning_rate": 2.0158586490412436e-06, "loss": 0.9215, "step": 8125 }, { "epoch": 1.1504211793020458, "grad_norm": 8.950626577256381, "learning_rate": 2.0152963131396068e-06, "loss": 0.9466, "step": 8126 }, { "epoch": 1.150562752176683, "grad_norm": 8.816859256056732, "learning_rate": 2.0147340027184816e-06, "loss": 0.7799, "step": 8127 }, { "epoch": 1.1507043250513203, "grad_norm": 8.979456024346021, "learning_rate": 2.014171717807429e-06, "loss": 0.9838, "step": 8128 }, { "epoch": 1.1508458979259575, "grad_norm": 8.68686064418916, "learning_rate": 2.013609458436006e-06, "loss": 0.9554, "step": 8129 }, { "epoch": 1.1509874708005947, "grad_norm": 9.254894714251211, "learning_rate": 2.013047224633771e-06, "loss": 0.9318, "step": 8130 }, { "epoch": 1.151129043675232, "grad_norm": 7.352514872141937, "learning_rate": 2.0124850164302805e-06, "loss": 0.8009, "step": 8131 }, { "epoch": 1.151270616549869, "grad_norm": 8.869571798798574, "learning_rate": 2.0119228338550894e-06, "loss": 0.8754, "step": 8132 }, { "epoch": 1.1514121894245062, "grad_norm": 9.398555612178653, "learning_rate": 2.0113606769377497e-06, "loss": 0.8582, "step": 8133 }, { "epoch": 1.1515537622991434, "grad_norm": 8.903520346500414, "learning_rate": 2.010798545707816e-06, "loss": 0.9933, "step": 8134 }, { "epoch": 1.1516953351737806, "grad_norm": 9.3206898564116, "learning_rate": 2.0102364401948378e-06, "loss": 0.8826, "step": 8135 }, { "epoch": 1.1518369080484179, "grad_norm": 10.69657040116915, "learning_rate": 2.009674360428365e-06, "loss": 0.9436, "step": 8136 }, { "epoch": 1.151978480923055, "grad_norm": 8.929346166513302, "learning_rate": 2.009112306437945e-06, "loss": 0.8654, "step": 8137 }, { "epoch": 1.1521200537976923, "grad_norm": 9.692579943288125, "learning_rate": 2.008550278253127e-06, "loss": 0.9132, "step": 8138 }, { "epoch": 1.1522616266723296, "grad_norm": 10.348195575672277, "learning_rate": 2.0079882759034517e-06, "loss": 0.9864, "step": 8139 }, { "epoch": 1.1524031995469668, "grad_norm": 9.596879976619721, "learning_rate": 2.007426299418467e-06, "loss": 0.9603, "step": 8140 }, { "epoch": 1.152544772421604, "grad_norm": 10.909693308294907, "learning_rate": 2.0068643488277147e-06, "loss": 0.9581, "step": 8141 }, { "epoch": 1.1526863452962413, "grad_norm": 9.85147460123852, "learning_rate": 2.0063024241607356e-06, "loss": 0.8348, "step": 8142 }, { "epoch": 1.1528279181708785, "grad_norm": 9.908736148620987, "learning_rate": 2.00574052544707e-06, "loss": 0.856, "step": 8143 }, { "epoch": 1.1529694910455157, "grad_norm": 10.140453836792181, "learning_rate": 2.005178652716256e-06, "loss": 0.919, "step": 8144 }, { "epoch": 1.153111063920153, "grad_norm": 8.099445239249029, "learning_rate": 2.004616805997832e-06, "loss": 0.7363, "step": 8145 }, { "epoch": 1.1532526367947902, "grad_norm": 8.870562236388878, "learning_rate": 2.0040549853213326e-06, "loss": 0.858, "step": 8146 }, { "epoch": 1.1533942096694274, "grad_norm": 9.412614969106121, "learning_rate": 2.003493190716293e-06, "loss": 0.9699, "step": 8147 }, { "epoch": 1.1535357825440646, "grad_norm": 6.931509920136392, "learning_rate": 2.0029314222122463e-06, "loss": 0.8636, "step": 8148 }, { "epoch": 1.1536773554187019, "grad_norm": 9.181474227712052, "learning_rate": 2.0023696798387247e-06, "loss": 0.793, "step": 8149 }, { "epoch": 1.1538189282933389, "grad_norm": 8.574742829940114, "learning_rate": 2.001807963625257e-06, "loss": 0.849, "step": 8150 }, { "epoch": 1.153960501167976, "grad_norm": 8.707746227374312, "learning_rate": 2.0012462736013735e-06, "loss": 0.9178, "step": 8151 }, { "epoch": 1.1541020740426133, "grad_norm": 7.9543153962394255, "learning_rate": 2.0006846097966016e-06, "loss": 0.7875, "step": 8152 }, { "epoch": 1.1542436469172506, "grad_norm": 9.073454451508187, "learning_rate": 2.000122972240467e-06, "loss": 0.8834, "step": 8153 }, { "epoch": 1.1543852197918878, "grad_norm": 8.612843225115046, "learning_rate": 1.9995613609624957e-06, "loss": 0.9055, "step": 8154 }, { "epoch": 1.154526792666525, "grad_norm": 8.572154595644031, "learning_rate": 1.9989997759922093e-06, "loss": 0.8545, "step": 8155 }, { "epoch": 1.1546683655411623, "grad_norm": 8.68525853503006, "learning_rate": 1.998438217359132e-06, "loss": 0.9182, "step": 8156 }, { "epoch": 1.1548099384157995, "grad_norm": 8.797980718419408, "learning_rate": 1.997876685092784e-06, "loss": 0.7854, "step": 8157 }, { "epoch": 1.1549515112904367, "grad_norm": 8.869533950965193, "learning_rate": 1.9973151792226837e-06, "loss": 0.9138, "step": 8158 }, { "epoch": 1.155093084165074, "grad_norm": 6.592475659495825, "learning_rate": 1.9967536997783495e-06, "loss": 0.7462, "step": 8159 }, { "epoch": 1.1552346570397112, "grad_norm": 8.750024849992615, "learning_rate": 1.9961922467892997e-06, "loss": 0.8099, "step": 8160 }, { "epoch": 1.1553762299143484, "grad_norm": 8.622441893769176, "learning_rate": 1.9956308202850456e-06, "loss": 0.8889, "step": 8161 }, { "epoch": 1.1555178027889856, "grad_norm": 9.204504767776458, "learning_rate": 1.9950694202951044e-06, "loss": 0.8516, "step": 8162 }, { "epoch": 1.1556593756636229, "grad_norm": 8.920912922898065, "learning_rate": 1.994508046848987e-06, "loss": 0.7763, "step": 8163 }, { "epoch": 1.15580094853826, "grad_norm": 9.731477015204883, "learning_rate": 1.9939466999762044e-06, "loss": 1.0184, "step": 8164 }, { "epoch": 1.1559425214128973, "grad_norm": 7.538337127102084, "learning_rate": 1.993385379706267e-06, "loss": 0.8153, "step": 8165 }, { "epoch": 1.1560840942875346, "grad_norm": 9.247977783202838, "learning_rate": 1.9928240860686822e-06, "loss": 0.8279, "step": 8166 }, { "epoch": 1.1562256671621718, "grad_norm": 7.568923870077264, "learning_rate": 1.9922628190929567e-06, "loss": 0.9035, "step": 8167 }, { "epoch": 1.156367240036809, "grad_norm": 9.25393180795331, "learning_rate": 1.9917015788085962e-06, "loss": 0.8366, "step": 8168 }, { "epoch": 1.1565088129114462, "grad_norm": 9.92676814013543, "learning_rate": 1.991140365245105e-06, "loss": 0.75, "step": 8169 }, { "epoch": 1.1566503857860835, "grad_norm": 8.626867562047389, "learning_rate": 1.990579178431986e-06, "loss": 0.8193, "step": 8170 }, { "epoch": 1.1567919586607207, "grad_norm": 8.035244078427384, "learning_rate": 1.990018018398739e-06, "loss": 0.9079, "step": 8171 }, { "epoch": 1.156933531535358, "grad_norm": 10.888464298750135, "learning_rate": 1.989456885174865e-06, "loss": 0.9955, "step": 8172 }, { "epoch": 1.157075104409995, "grad_norm": 8.887505974747464, "learning_rate": 1.988895778789861e-06, "loss": 0.9075, "step": 8173 }, { "epoch": 1.1572166772846322, "grad_norm": 10.011244364858454, "learning_rate": 1.9883346992732256e-06, "loss": 0.9761, "step": 8174 }, { "epoch": 1.1573582501592694, "grad_norm": 8.729007060944534, "learning_rate": 1.987773646654453e-06, "loss": 0.842, "step": 8175 }, { "epoch": 1.1574998230339066, "grad_norm": 7.700333962072126, "learning_rate": 1.987212620963038e-06, "loss": 0.8277, "step": 8176 }, { "epoch": 1.1576413959085439, "grad_norm": 8.180777782220362, "learning_rate": 1.9866516222284736e-06, "loss": 0.7785, "step": 8177 }, { "epoch": 1.157782968783181, "grad_norm": 10.907543728671047, "learning_rate": 1.9860906504802496e-06, "loss": 0.9003, "step": 8178 }, { "epoch": 1.1579245416578183, "grad_norm": 8.521186454438642, "learning_rate": 1.985529705747858e-06, "loss": 0.9061, "step": 8179 }, { "epoch": 1.1580661145324556, "grad_norm": 7.953087347574796, "learning_rate": 1.9849687880607855e-06, "loss": 0.8553, "step": 8180 }, { "epoch": 1.1582076874070928, "grad_norm": 7.931585075448793, "learning_rate": 1.984407897448521e-06, "loss": 0.8906, "step": 8181 }, { "epoch": 1.15834926028173, "grad_norm": 9.46139512660259, "learning_rate": 1.983847033940548e-06, "loss": 0.8713, "step": 8182 }, { "epoch": 1.1584908331563673, "grad_norm": 8.965071398934215, "learning_rate": 1.9832861975663516e-06, "loss": 1.0851, "step": 8183 }, { "epoch": 1.1586324060310045, "grad_norm": 9.251180212661973, "learning_rate": 1.982725388355414e-06, "loss": 0.8881, "step": 8184 }, { "epoch": 1.1587739789056417, "grad_norm": 7.515306239991779, "learning_rate": 1.9821646063372174e-06, "loss": 0.8093, "step": 8185 }, { "epoch": 1.158915551780279, "grad_norm": 8.690193472267993, "learning_rate": 1.9816038515412412e-06, "loss": 0.8615, "step": 8186 }, { "epoch": 1.1590571246549162, "grad_norm": 8.223397105134257, "learning_rate": 1.9810431239969646e-06, "loss": 0.9449, "step": 8187 }, { "epoch": 1.1591986975295534, "grad_norm": 9.043945259775537, "learning_rate": 1.9804824237338636e-06, "loss": 0.805, "step": 8188 }, { "epoch": 1.1593402704041906, "grad_norm": 7.190385852800822, "learning_rate": 1.9799217507814144e-06, "loss": 0.8551, "step": 8189 }, { "epoch": 1.1594818432788279, "grad_norm": 6.977754941661315, "learning_rate": 1.9793611051690905e-06, "loss": 0.8517, "step": 8190 }, { "epoch": 1.159623416153465, "grad_norm": 8.944776014207171, "learning_rate": 1.978800486926366e-06, "loss": 0.7678, "step": 8191 }, { "epoch": 1.159764989028102, "grad_norm": 8.346346447604335, "learning_rate": 1.9782398960827105e-06, "loss": 0.8425, "step": 8192 }, { "epoch": 1.1599065619027393, "grad_norm": 8.22656620963829, "learning_rate": 1.977679332667595e-06, "loss": 0.8757, "step": 8193 }, { "epoch": 1.1600481347773766, "grad_norm": 8.979795028284332, "learning_rate": 1.9771187967104875e-06, "loss": 0.9108, "step": 8194 }, { "epoch": 1.1601897076520138, "grad_norm": 9.629679149380811, "learning_rate": 1.9765582882408544e-06, "loss": 0.8702, "step": 8195 }, { "epoch": 1.160331280526651, "grad_norm": 8.241882029291952, "learning_rate": 1.9759978072881623e-06, "loss": 0.76, "step": 8196 }, { "epoch": 1.1604728534012883, "grad_norm": 9.04025801366384, "learning_rate": 1.975437353881875e-06, "loss": 0.8376, "step": 8197 }, { "epoch": 1.1606144262759255, "grad_norm": 8.428893722763886, "learning_rate": 1.9748769280514544e-06, "loss": 0.8959, "step": 8198 }, { "epoch": 1.1607559991505627, "grad_norm": 9.205044350338525, "learning_rate": 1.9743165298263624e-06, "loss": 0.9365, "step": 8199 }, { "epoch": 1.1608975720252, "grad_norm": 8.336105445092404, "learning_rate": 1.9737561592360583e-06, "loss": 0.9131, "step": 8200 }, { "epoch": 1.1610391448998372, "grad_norm": 8.038770190263868, "learning_rate": 1.97319581631e-06, "loss": 0.8802, "step": 8201 }, { "epoch": 1.1611807177744744, "grad_norm": 9.618721239271288, "learning_rate": 1.9726355010776466e-06, "loss": 0.8155, "step": 8202 }, { "epoch": 1.1613222906491116, "grad_norm": 7.993204807184638, "learning_rate": 1.9720752135684505e-06, "loss": 0.8174, "step": 8203 }, { "epoch": 1.1614638635237489, "grad_norm": 8.428518530654248, "learning_rate": 1.9715149538118667e-06, "loss": 0.8322, "step": 8204 }, { "epoch": 1.161605436398386, "grad_norm": 7.600101781464709, "learning_rate": 1.970954721837348e-06, "loss": 0.8745, "step": 8205 }, { "epoch": 1.1617470092730233, "grad_norm": 8.841890281081064, "learning_rate": 1.970394517674345e-06, "loss": 0.9481, "step": 8206 }, { "epoch": 1.1618885821476606, "grad_norm": 8.282471534318537, "learning_rate": 1.9698343413523065e-06, "loss": 0.7966, "step": 8207 }, { "epoch": 1.1620301550222978, "grad_norm": 8.625968547011022, "learning_rate": 1.969274192900682e-06, "loss": 0.8589, "step": 8208 }, { "epoch": 1.162171727896935, "grad_norm": 11.51390363024811, "learning_rate": 1.9687140723489175e-06, "loss": 0.9628, "step": 8209 }, { "epoch": 1.1623133007715722, "grad_norm": 9.665406462859446, "learning_rate": 1.9681539797264583e-06, "loss": 0.8479, "step": 8210 }, { "epoch": 1.1624548736462095, "grad_norm": 8.365911762280177, "learning_rate": 1.967593915062748e-06, "loss": 0.9139, "step": 8211 }, { "epoch": 1.1625964465208467, "grad_norm": 8.256634905244152, "learning_rate": 1.9670338783872277e-06, "loss": 0.7826, "step": 8212 }, { "epoch": 1.162738019395484, "grad_norm": 9.476481186532844, "learning_rate": 1.9664738697293404e-06, "loss": 0.7759, "step": 8213 }, { "epoch": 1.1628795922701212, "grad_norm": 9.985385515299855, "learning_rate": 1.965913889118523e-06, "loss": 0.8841, "step": 8214 }, { "epoch": 1.1630211651447582, "grad_norm": 9.974570557186558, "learning_rate": 1.9653539365842143e-06, "loss": 0.9665, "step": 8215 }, { "epoch": 1.1631627380193954, "grad_norm": 9.321649549816268, "learning_rate": 1.9647940121558508e-06, "loss": 0.8397, "step": 8216 }, { "epoch": 1.1633043108940326, "grad_norm": 9.454702702795164, "learning_rate": 1.9642341158628665e-06, "loss": 0.9894, "step": 8217 }, { "epoch": 1.1634458837686699, "grad_norm": 9.9647568504884, "learning_rate": 1.963674247734696e-06, "loss": 0.8812, "step": 8218 }, { "epoch": 1.163587456643307, "grad_norm": 8.37796195873622, "learning_rate": 1.96311440780077e-06, "loss": 0.9849, "step": 8219 }, { "epoch": 1.1637290295179443, "grad_norm": 8.882741640960779, "learning_rate": 1.9625545960905187e-06, "loss": 0.9196, "step": 8220 }, { "epoch": 1.1638706023925816, "grad_norm": 9.682333472977142, "learning_rate": 1.961994812633372e-06, "loss": 0.8728, "step": 8221 }, { "epoch": 1.1640121752672188, "grad_norm": 8.561482835483883, "learning_rate": 1.961435057458757e-06, "loss": 0.8566, "step": 8222 }, { "epoch": 1.164153748141856, "grad_norm": 8.010500692084772, "learning_rate": 1.9608753305960997e-06, "loss": 0.9085, "step": 8223 }, { "epoch": 1.1642953210164932, "grad_norm": 9.003501211063238, "learning_rate": 1.960315632074824e-06, "loss": 0.8387, "step": 8224 }, { "epoch": 1.1644368938911305, "grad_norm": 8.378374473218331, "learning_rate": 1.9597559619243527e-06, "loss": 0.9182, "step": 8225 }, { "epoch": 1.1645784667657677, "grad_norm": 7.793227449105173, "learning_rate": 1.959196320174108e-06, "loss": 0.8408, "step": 8226 }, { "epoch": 1.164720039640405, "grad_norm": 8.202560434441436, "learning_rate": 1.95863670685351e-06, "loss": 0.8922, "step": 8227 }, { "epoch": 1.1648616125150422, "grad_norm": 9.227032991020614, "learning_rate": 1.958077121991976e-06, "loss": 0.8785, "step": 8228 }, { "epoch": 1.1650031853896794, "grad_norm": 9.999076419141913, "learning_rate": 1.9575175656189236e-06, "loss": 0.9284, "step": 8229 }, { "epoch": 1.1651447582643166, "grad_norm": 8.16171934825785, "learning_rate": 1.9569580377637677e-06, "loss": 0.8382, "step": 8230 }, { "epoch": 1.1652863311389539, "grad_norm": 10.100937118112482, "learning_rate": 1.956398538455924e-06, "loss": 0.8745, "step": 8231 }, { "epoch": 1.165427904013591, "grad_norm": 8.968808270308312, "learning_rate": 1.955839067724803e-06, "loss": 0.9118, "step": 8232 }, { "epoch": 1.165569476888228, "grad_norm": 8.363759710887493, "learning_rate": 1.9552796255998173e-06, "loss": 0.8107, "step": 8233 }, { "epoch": 1.1657110497628653, "grad_norm": 7.862157856057359, "learning_rate": 1.9547202121103757e-06, "loss": 0.8581, "step": 8234 }, { "epoch": 1.1658526226375026, "grad_norm": 8.932499266826472, "learning_rate": 1.9541608272858856e-06, "loss": 0.9487, "step": 8235 }, { "epoch": 1.1659941955121398, "grad_norm": 8.928188354993756, "learning_rate": 1.953601471155753e-06, "loss": 0.9378, "step": 8236 }, { "epoch": 1.166135768386777, "grad_norm": 8.432577774317998, "learning_rate": 1.9530421437493843e-06, "loss": 0.8193, "step": 8237 }, { "epoch": 1.1662773412614142, "grad_norm": 8.652828270152709, "learning_rate": 1.952482845096182e-06, "loss": 0.8621, "step": 8238 }, { "epoch": 1.1664189141360515, "grad_norm": 9.587049697984313, "learning_rate": 1.9519235752255487e-06, "loss": 0.9288, "step": 8239 }, { "epoch": 1.1665604870106887, "grad_norm": 8.176519354224022, "learning_rate": 1.951364334166884e-06, "loss": 0.7628, "step": 8240 }, { "epoch": 1.166702059885326, "grad_norm": 9.964864039204159, "learning_rate": 1.9508051219495877e-06, "loss": 0.7713, "step": 8241 }, { "epoch": 1.1668436327599632, "grad_norm": 9.432750997552937, "learning_rate": 1.950245938603056e-06, "loss": 0.9941, "step": 8242 }, { "epoch": 1.1669852056346004, "grad_norm": 9.41196529090535, "learning_rate": 1.949686784156686e-06, "loss": 0.8798, "step": 8243 }, { "epoch": 1.1671267785092376, "grad_norm": 8.010073518451382, "learning_rate": 1.949127658639872e-06, "loss": 0.9082, "step": 8244 }, { "epoch": 1.1672683513838749, "grad_norm": 7.190452698952211, "learning_rate": 1.948568562082005e-06, "loss": 0.874, "step": 8245 }, { "epoch": 1.167409924258512, "grad_norm": 8.358907697218878, "learning_rate": 1.948009494512478e-06, "loss": 0.7774, "step": 8246 }, { "epoch": 1.1675514971331493, "grad_norm": 9.525065232101104, "learning_rate": 1.94745045596068e-06, "loss": 0.9046, "step": 8247 }, { "epoch": 1.1676930700077865, "grad_norm": 8.484383992283448, "learning_rate": 1.9468914464559994e-06, "loss": 0.9431, "step": 8248 }, { "epoch": 1.1678346428824238, "grad_norm": 9.161235194319843, "learning_rate": 1.9463324660278235e-06, "loss": 0.8654, "step": 8249 }, { "epoch": 1.167976215757061, "grad_norm": 8.548486087014636, "learning_rate": 1.945773514705537e-06, "loss": 0.9161, "step": 8250 }, { "epoch": 1.1681177886316982, "grad_norm": 9.292213001925028, "learning_rate": 1.9452145925185235e-06, "loss": 1.0035, "step": 8251 }, { "epoch": 1.1682593615063355, "grad_norm": 10.907657040661812, "learning_rate": 1.9446556994961645e-06, "loss": 0.8505, "step": 8252 }, { "epoch": 1.1684009343809727, "grad_norm": 8.100373021708341, "learning_rate": 1.944096835667842e-06, "loss": 0.8666, "step": 8253 }, { "epoch": 1.16854250725561, "grad_norm": 10.659337466265272, "learning_rate": 1.9435380010629343e-06, "loss": 0.8003, "step": 8254 }, { "epoch": 1.1686840801302472, "grad_norm": 9.27302325383223, "learning_rate": 1.94297919571082e-06, "loss": 0.8013, "step": 8255 }, { "epoch": 1.1688256530048842, "grad_norm": 10.288071801203506, "learning_rate": 1.942420419640873e-06, "loss": 0.9319, "step": 8256 }, { "epoch": 1.1689672258795214, "grad_norm": 7.892663431780382, "learning_rate": 1.941861672882469e-06, "loss": 0.828, "step": 8257 }, { "epoch": 1.1691087987541586, "grad_norm": 8.030697577213337, "learning_rate": 1.9413029554649798e-06, "loss": 0.9318, "step": 8258 }, { "epoch": 1.1692503716287959, "grad_norm": 9.163366475059808, "learning_rate": 1.9407442674177783e-06, "loss": 0.778, "step": 8259 }, { "epoch": 1.169391944503433, "grad_norm": 9.105838671566593, "learning_rate": 1.9401856087702337e-06, "loss": 0.8435, "step": 8260 }, { "epoch": 1.1695335173780703, "grad_norm": 9.699085813879574, "learning_rate": 1.9396269795517147e-06, "loss": 0.9377, "step": 8261 }, { "epoch": 1.1696750902527075, "grad_norm": 8.924279738996491, "learning_rate": 1.939068379791587e-06, "loss": 0.8142, "step": 8262 }, { "epoch": 1.1698166631273448, "grad_norm": 8.449194369763715, "learning_rate": 1.938509809519216e-06, "loss": 0.8638, "step": 8263 }, { "epoch": 1.169958236001982, "grad_norm": 11.102914211120078, "learning_rate": 1.9379512687639663e-06, "loss": 0.8995, "step": 8264 }, { "epoch": 1.1700998088766192, "grad_norm": 8.72601737400435, "learning_rate": 1.937392757555199e-06, "loss": 0.7853, "step": 8265 }, { "epoch": 1.1702413817512565, "grad_norm": 8.759325997538415, "learning_rate": 1.936834275922276e-06, "loss": 0.832, "step": 8266 }, { "epoch": 1.1703829546258937, "grad_norm": 8.014873982543623, "learning_rate": 1.936275823894554e-06, "loss": 0.8962, "step": 8267 }, { "epoch": 1.170524527500531, "grad_norm": 8.851474152113157, "learning_rate": 1.9357174015013917e-06, "loss": 0.7419, "step": 8268 }, { "epoch": 1.1706661003751682, "grad_norm": 9.224227881231188, "learning_rate": 1.935159008772145e-06, "loss": 0.837, "step": 8269 }, { "epoch": 1.1708076732498054, "grad_norm": 9.295710027456256, "learning_rate": 1.9346006457361684e-06, "loss": 0.9091, "step": 8270 }, { "epoch": 1.1709492461244426, "grad_norm": 9.222180571966325, "learning_rate": 1.9340423124228136e-06, "loss": 0.8735, "step": 8271 }, { "epoch": 1.1710908189990799, "grad_norm": 9.4874385304024, "learning_rate": 1.9334840088614327e-06, "loss": 0.9048, "step": 8272 }, { "epoch": 1.171232391873717, "grad_norm": 10.27674539782317, "learning_rate": 1.9329257350813753e-06, "loss": 0.8641, "step": 8273 }, { "epoch": 1.171373964748354, "grad_norm": 9.633404517873236, "learning_rate": 1.932367491111989e-06, "loss": 0.8951, "step": 8274 }, { "epoch": 1.1715155376229913, "grad_norm": 8.211745842108767, "learning_rate": 1.9318092769826197e-06, "loss": 0.8373, "step": 8275 }, { "epoch": 1.1716571104976286, "grad_norm": 9.137236451237339, "learning_rate": 1.931251092722615e-06, "loss": 0.8565, "step": 8276 }, { "epoch": 1.1717986833722658, "grad_norm": 9.484341616626791, "learning_rate": 1.930692938361315e-06, "loss": 0.7892, "step": 8277 }, { "epoch": 1.171940256246903, "grad_norm": 9.129407262540763, "learning_rate": 1.930134813928063e-06, "loss": 0.8874, "step": 8278 }, { "epoch": 1.1720818291215402, "grad_norm": 8.947199326798136, "learning_rate": 1.9295767194521988e-06, "loss": 0.9112, "step": 8279 }, { "epoch": 1.1722234019961775, "grad_norm": 9.886106787905693, "learning_rate": 1.9290186549630606e-06, "loss": 0.8407, "step": 8280 }, { "epoch": 1.1723649748708147, "grad_norm": 7.8788266043167035, "learning_rate": 1.9284606204899862e-06, "loss": 0.9642, "step": 8281 }, { "epoch": 1.172506547745452, "grad_norm": 9.134970456531276, "learning_rate": 1.927902616062311e-06, "loss": 0.8466, "step": 8282 }, { "epoch": 1.1726481206200892, "grad_norm": 9.478045744522122, "learning_rate": 1.9273446417093687e-06, "loss": 0.7361, "step": 8283 }, { "epoch": 1.1727896934947264, "grad_norm": 9.591412883972822, "learning_rate": 1.9267866974604914e-06, "loss": 0.8408, "step": 8284 }, { "epoch": 1.1729312663693636, "grad_norm": 9.540042211411217, "learning_rate": 1.9262287833450107e-06, "loss": 0.9458, "step": 8285 }, { "epoch": 1.1730728392440009, "grad_norm": 8.595093112372405, "learning_rate": 1.9256708993922542e-06, "loss": 0.7638, "step": 8286 }, { "epoch": 1.173214412118638, "grad_norm": 8.389435127893696, "learning_rate": 1.9251130456315514e-06, "loss": 0.7834, "step": 8287 }, { "epoch": 1.1733559849932753, "grad_norm": 8.433431817285113, "learning_rate": 1.9245552220922264e-06, "loss": 0.8531, "step": 8288 }, { "epoch": 1.1734975578679125, "grad_norm": 9.14185813997706, "learning_rate": 1.9239974288036044e-06, "loss": 0.9968, "step": 8289 }, { "epoch": 1.1736391307425498, "grad_norm": 8.22715370249235, "learning_rate": 1.9234396657950076e-06, "loss": 0.8491, "step": 8290 }, { "epoch": 1.173780703617187, "grad_norm": 9.575594444846125, "learning_rate": 1.922881933095758e-06, "loss": 0.9029, "step": 8291 }, { "epoch": 1.1739222764918242, "grad_norm": 8.532708906136167, "learning_rate": 1.9223242307351753e-06, "loss": 0.8736, "step": 8292 }, { "epoch": 1.1740638493664615, "grad_norm": 8.860963709238703, "learning_rate": 1.9217665587425764e-06, "loss": 0.9266, "step": 8293 }, { "epoch": 1.1742054222410987, "grad_norm": 9.740494618477937, "learning_rate": 1.9212089171472787e-06, "loss": 0.9443, "step": 8294 }, { "epoch": 1.174346995115736, "grad_norm": 9.410457848529227, "learning_rate": 1.9206513059785966e-06, "loss": 0.8243, "step": 8295 }, { "epoch": 1.1744885679903732, "grad_norm": 10.513647746640064, "learning_rate": 1.9200937252658435e-06, "loss": 0.9026, "step": 8296 }, { "epoch": 1.1746301408650104, "grad_norm": 9.43933047296428, "learning_rate": 1.9195361750383312e-06, "loss": 0.8379, "step": 8297 }, { "epoch": 1.1747717137396474, "grad_norm": 8.463908595812086, "learning_rate": 1.918978655325369e-06, "loss": 0.848, "step": 8298 }, { "epoch": 1.1749132866142846, "grad_norm": 9.427931223020037, "learning_rate": 1.9184211661562653e-06, "loss": 0.8158, "step": 8299 }, { "epoch": 1.1750548594889219, "grad_norm": 10.189080800234677, "learning_rate": 1.9178637075603276e-06, "loss": 0.9922, "step": 8300 }, { "epoch": 1.175196432363559, "grad_norm": 10.180427577064505, "learning_rate": 1.9173062795668606e-06, "loss": 0.8899, "step": 8301 }, { "epoch": 1.1753380052381963, "grad_norm": 9.997966941160737, "learning_rate": 1.916748882205168e-06, "loss": 0.8848, "step": 8302 }, { "epoch": 1.1754795781128335, "grad_norm": 8.584385433961396, "learning_rate": 1.916191515504552e-06, "loss": 0.8256, "step": 8303 }, { "epoch": 1.1756211509874708, "grad_norm": 8.955813125848326, "learning_rate": 1.915634179494312e-06, "loss": 0.8249, "step": 8304 }, { "epoch": 1.175762723862108, "grad_norm": 10.080397711885437, "learning_rate": 1.9150768742037477e-06, "loss": 0.8862, "step": 8305 }, { "epoch": 1.1759042967367452, "grad_norm": 7.317211491742778, "learning_rate": 1.9145195996621567e-06, "loss": 0.9124, "step": 8306 }, { "epoch": 1.1760458696113825, "grad_norm": 8.042328907738971, "learning_rate": 1.9139623558988334e-06, "loss": 0.8012, "step": 8307 }, { "epoch": 1.1761874424860197, "grad_norm": 8.904966670150111, "learning_rate": 1.913405142943073e-06, "loss": 0.8239, "step": 8308 }, { "epoch": 1.176329015360657, "grad_norm": 8.778734383051127, "learning_rate": 1.9128479608241656e-06, "loss": 0.8928, "step": 8309 }, { "epoch": 1.1764705882352942, "grad_norm": 10.282885389500057, "learning_rate": 1.9122908095714032e-06, "loss": 0.8797, "step": 8310 }, { "epoch": 1.1766121611099314, "grad_norm": 9.247884560019617, "learning_rate": 1.911733689214075e-06, "loss": 0.9015, "step": 8311 }, { "epoch": 1.1767537339845686, "grad_norm": 8.314237255316648, "learning_rate": 1.911176599781468e-06, "loss": 0.93, "step": 8312 }, { "epoch": 1.1768953068592058, "grad_norm": 9.67982897835974, "learning_rate": 1.910619541302868e-06, "loss": 0.9239, "step": 8313 }, { "epoch": 1.177036879733843, "grad_norm": 8.600156525362907, "learning_rate": 1.9100625138075595e-06, "loss": 0.9906, "step": 8314 }, { "epoch": 1.1771784526084803, "grad_norm": 8.588864022970565, "learning_rate": 1.909505517324825e-06, "loss": 0.8932, "step": 8315 }, { "epoch": 1.1773200254831173, "grad_norm": 6.636114909165338, "learning_rate": 1.9089485518839446e-06, "loss": 0.8989, "step": 8316 }, { "epoch": 1.1774615983577545, "grad_norm": 8.048860116185514, "learning_rate": 1.9083916175141983e-06, "loss": 0.8039, "step": 8317 }, { "epoch": 1.1776031712323918, "grad_norm": 8.11376175906173, "learning_rate": 1.9078347142448638e-06, "loss": 0.8908, "step": 8318 }, { "epoch": 1.177744744107029, "grad_norm": 9.388387657896397, "learning_rate": 1.9072778421052172e-06, "loss": 0.8289, "step": 8319 }, { "epoch": 1.1778863169816662, "grad_norm": 9.16314208732996, "learning_rate": 1.9067210011245318e-06, "loss": 0.952, "step": 8320 }, { "epoch": 1.1780278898563035, "grad_norm": 8.519207961828084, "learning_rate": 1.906164191332081e-06, "loss": 0.8446, "step": 8321 }, { "epoch": 1.1781694627309407, "grad_norm": 8.471950250092304, "learning_rate": 1.905607412757136e-06, "loss": 0.8495, "step": 8322 }, { "epoch": 1.178311035605578, "grad_norm": 8.741144112919775, "learning_rate": 1.9050506654289663e-06, "loss": 0.8757, "step": 8323 }, { "epoch": 1.1784526084802152, "grad_norm": 9.653809470487827, "learning_rate": 1.9044939493768394e-06, "loss": 1.0148, "step": 8324 }, { "epoch": 1.1785941813548524, "grad_norm": 8.308288884275168, "learning_rate": 1.9039372646300216e-06, "loss": 0.8863, "step": 8325 }, { "epoch": 1.1787357542294896, "grad_norm": 8.906683874437228, "learning_rate": 1.9033806112177772e-06, "loss": 0.906, "step": 8326 }, { "epoch": 1.1788773271041268, "grad_norm": 8.937848277742104, "learning_rate": 1.902823989169369e-06, "loss": 0.9276, "step": 8327 }, { "epoch": 1.179018899978764, "grad_norm": 7.838676118595303, "learning_rate": 1.9022673985140585e-06, "loss": 0.8659, "step": 8328 }, { "epoch": 1.1791604728534013, "grad_norm": 8.121750871532461, "learning_rate": 1.9017108392811065e-06, "loss": 0.8901, "step": 8329 }, { "epoch": 1.1793020457280385, "grad_norm": 7.661103372413956, "learning_rate": 1.9011543114997684e-06, "loss": 0.9079, "step": 8330 }, { "epoch": 1.1794436186026758, "grad_norm": 9.090701506585503, "learning_rate": 1.9005978151993014e-06, "loss": 0.8442, "step": 8331 }, { "epoch": 1.179585191477313, "grad_norm": 8.707250305678722, "learning_rate": 1.9000413504089607e-06, "loss": 0.9602, "step": 8332 }, { "epoch": 1.1797267643519502, "grad_norm": 8.874287751321889, "learning_rate": 1.8994849171579981e-06, "loss": 0.9402, "step": 8333 }, { "epoch": 1.1798683372265875, "grad_norm": 10.613727648644907, "learning_rate": 1.8989285154756665e-06, "loss": 0.9699, "step": 8334 }, { "epoch": 1.1800099101012247, "grad_norm": 9.551651928615787, "learning_rate": 1.8983721453912146e-06, "loss": 0.9351, "step": 8335 }, { "epoch": 1.180151482975862, "grad_norm": 9.27971474636617, "learning_rate": 1.89781580693389e-06, "loss": 0.716, "step": 8336 }, { "epoch": 1.1802930558504992, "grad_norm": 6.830899371112249, "learning_rate": 1.8972595001329398e-06, "loss": 0.8327, "step": 8337 }, { "epoch": 1.1804346287251364, "grad_norm": 7.35966086186782, "learning_rate": 1.8967032250176083e-06, "loss": 0.8195, "step": 8338 }, { "epoch": 1.1805762015997734, "grad_norm": 10.250793426322698, "learning_rate": 1.8961469816171383e-06, "loss": 0.9398, "step": 8339 }, { "epoch": 1.1807177744744106, "grad_norm": 7.3681254270234575, "learning_rate": 1.8955907699607717e-06, "loss": 0.8205, "step": 8340 }, { "epoch": 1.1808593473490478, "grad_norm": 8.336094462410468, "learning_rate": 1.895034590077747e-06, "loss": 0.9302, "step": 8341 }, { "epoch": 1.181000920223685, "grad_norm": 7.095500444394073, "learning_rate": 1.894478441997303e-06, "loss": 0.8082, "step": 8342 }, { "epoch": 1.1811424930983223, "grad_norm": 8.089826774487259, "learning_rate": 1.8939223257486759e-06, "loss": 0.7219, "step": 8343 }, { "epoch": 1.1812840659729595, "grad_norm": 8.410561289160817, "learning_rate": 1.8933662413611e-06, "loss": 0.7884, "step": 8344 }, { "epoch": 1.1814256388475968, "grad_norm": 9.324579584293767, "learning_rate": 1.8928101888638087e-06, "loss": 0.8043, "step": 8345 }, { "epoch": 1.181567211722234, "grad_norm": 9.829889330158506, "learning_rate": 1.892254168286033e-06, "loss": 0.8186, "step": 8346 }, { "epoch": 1.1817087845968712, "grad_norm": 9.002846585556505, "learning_rate": 1.8916981796570023e-06, "loss": 0.8261, "step": 8347 }, { "epoch": 1.1818503574715085, "grad_norm": 8.582804197525794, "learning_rate": 1.8911422230059448e-06, "loss": 0.8014, "step": 8348 }, { "epoch": 1.1819919303461457, "grad_norm": 8.785628235292542, "learning_rate": 1.8905862983620863e-06, "loss": 0.8739, "step": 8349 }, { "epoch": 1.182133503220783, "grad_norm": 9.644012696797743, "learning_rate": 1.8900304057546532e-06, "loss": 0.8768, "step": 8350 }, { "epoch": 1.1822750760954202, "grad_norm": 8.185534991267323, "learning_rate": 1.8894745452128657e-06, "loss": 0.8013, "step": 8351 }, { "epoch": 1.1824166489700574, "grad_norm": 9.072653088133437, "learning_rate": 1.8889187167659462e-06, "loss": 0.9632, "step": 8352 }, { "epoch": 1.1825582218446946, "grad_norm": 9.54548037967241, "learning_rate": 1.888362920443114e-06, "loss": 0.8686, "step": 8353 }, { "epoch": 1.1826997947193318, "grad_norm": 10.377464277536347, "learning_rate": 1.8878071562735873e-06, "loss": 0.8042, "step": 8354 }, { "epoch": 1.182841367593969, "grad_norm": 8.688242050424114, "learning_rate": 1.887251424286581e-06, "loss": 0.8747, "step": 8355 }, { "epoch": 1.1829829404686063, "grad_norm": 8.076885785027233, "learning_rate": 1.8866957245113113e-06, "loss": 0.9378, "step": 8356 }, { "epoch": 1.1831245133432433, "grad_norm": 7.8379782304162156, "learning_rate": 1.88614005697699e-06, "loss": 0.8039, "step": 8357 }, { "epoch": 1.1832660862178805, "grad_norm": 8.50592731926078, "learning_rate": 1.8855844217128281e-06, "loss": 0.8414, "step": 8358 }, { "epoch": 1.1834076590925178, "grad_norm": 8.766173755244012, "learning_rate": 1.885028818748035e-06, "loss": 0.8454, "step": 8359 }, { "epoch": 1.183549231967155, "grad_norm": 9.373911069571461, "learning_rate": 1.8844732481118184e-06, "loss": 0.9214, "step": 8360 }, { "epoch": 1.1836908048417922, "grad_norm": 7.717076170281069, "learning_rate": 1.8839177098333856e-06, "loss": 0.8602, "step": 8361 }, { "epoch": 1.1838323777164295, "grad_norm": 8.809459838242159, "learning_rate": 1.8833622039419371e-06, "loss": 0.9328, "step": 8362 }, { "epoch": 1.1839739505910667, "grad_norm": 10.044267710628457, "learning_rate": 1.8828067304666788e-06, "loss": 0.9591, "step": 8363 }, { "epoch": 1.184115523465704, "grad_norm": 7.7333204378919405, "learning_rate": 1.8822512894368106e-06, "loss": 0.7672, "step": 8364 }, { "epoch": 1.1842570963403412, "grad_norm": 9.610132798498945, "learning_rate": 1.8816958808815311e-06, "loss": 0.838, "step": 8365 }, { "epoch": 1.1843986692149784, "grad_norm": 8.534151471177895, "learning_rate": 1.8811405048300383e-06, "loss": 0.8698, "step": 8366 }, { "epoch": 1.1845402420896156, "grad_norm": 7.9913660188460875, "learning_rate": 1.8805851613115278e-06, "loss": 0.864, "step": 8367 }, { "epoch": 1.1846818149642528, "grad_norm": 7.818039539970053, "learning_rate": 1.8800298503551934e-06, "loss": 0.8798, "step": 8368 }, { "epoch": 1.18482338783889, "grad_norm": 10.08869169586807, "learning_rate": 1.8794745719902274e-06, "loss": 0.8755, "step": 8369 }, { "epoch": 1.1849649607135273, "grad_norm": 7.884394295117343, "learning_rate": 1.8789193262458205e-06, "loss": 0.8251, "step": 8370 }, { "epoch": 1.1851065335881645, "grad_norm": 8.11362447366215, "learning_rate": 1.8783641131511624e-06, "loss": 0.8903, "step": 8371 }, { "epoch": 1.1852481064628018, "grad_norm": 10.178753989358368, "learning_rate": 1.8778089327354385e-06, "loss": 0.9799, "step": 8372 }, { "epoch": 1.185389679337439, "grad_norm": 9.454578836422122, "learning_rate": 1.8772537850278352e-06, "loss": 0.8928, "step": 8373 }, { "epoch": 1.1855312522120762, "grad_norm": 8.701378583753984, "learning_rate": 1.876698670057536e-06, "loss": 0.8406, "step": 8374 }, { "epoch": 1.1856728250867135, "grad_norm": 8.94987388820346, "learning_rate": 1.876143587853723e-06, "loss": 0.9372, "step": 8375 }, { "epoch": 1.1858143979613507, "grad_norm": 8.76147580330809, "learning_rate": 1.8755885384455764e-06, "loss": 0.8236, "step": 8376 }, { "epoch": 1.185955970835988, "grad_norm": 8.406054866752392, "learning_rate": 1.8750335218622749e-06, "loss": 0.8711, "step": 8377 }, { "epoch": 1.1860975437106251, "grad_norm": 8.11123994466576, "learning_rate": 1.8744785381329944e-06, "loss": 0.94, "step": 8378 }, { "epoch": 1.1862391165852624, "grad_norm": 7.173659291761586, "learning_rate": 1.8739235872869113e-06, "loss": 0.7029, "step": 8379 }, { "epoch": 1.1863806894598996, "grad_norm": 8.630037909609033, "learning_rate": 1.8733686693531986e-06, "loss": 0.7919, "step": 8380 }, { "epoch": 1.1865222623345366, "grad_norm": 6.477076676852917, "learning_rate": 1.8728137843610276e-06, "loss": 0.8156, "step": 8381 }, { "epoch": 1.1866638352091738, "grad_norm": 9.144901260001433, "learning_rate": 1.8722589323395693e-06, "loss": 0.868, "step": 8382 }, { "epoch": 1.186805408083811, "grad_norm": 9.565053561469954, "learning_rate": 1.8717041133179897e-06, "loss": 0.7949, "step": 8383 }, { "epoch": 1.1869469809584483, "grad_norm": 10.226539372825263, "learning_rate": 1.871149327325456e-06, "loss": 0.8381, "step": 8384 }, { "epoch": 1.1870885538330855, "grad_norm": 8.083786889424585, "learning_rate": 1.8705945743911341e-06, "loss": 0.8226, "step": 8385 }, { "epoch": 1.1872301267077228, "grad_norm": 9.080519027327902, "learning_rate": 1.8700398545441857e-06, "loss": 0.9321, "step": 8386 }, { "epoch": 1.18737169958236, "grad_norm": 8.760554895811083, "learning_rate": 1.8694851678137726e-06, "loss": 0.8809, "step": 8387 }, { "epoch": 1.1875132724569972, "grad_norm": 8.131507028538792, "learning_rate": 1.868930514229054e-06, "loss": 0.8597, "step": 8388 }, { "epoch": 1.1876548453316345, "grad_norm": 7.788647915167019, "learning_rate": 1.8683758938191877e-06, "loss": 0.868, "step": 8389 }, { "epoch": 1.1877964182062717, "grad_norm": 9.213899429014205, "learning_rate": 1.86782130661333e-06, "loss": 0.85, "step": 8390 }, { "epoch": 1.187937991080909, "grad_norm": 10.707591306950397, "learning_rate": 1.8672667526406345e-06, "loss": 0.7698, "step": 8391 }, { "epoch": 1.1880795639555461, "grad_norm": 7.835562395605049, "learning_rate": 1.8667122319302542e-06, "loss": 0.7356, "step": 8392 }, { "epoch": 1.1882211368301834, "grad_norm": 9.321780911673175, "learning_rate": 1.8661577445113399e-06, "loss": 0.9586, "step": 8393 }, { "epoch": 1.1883627097048206, "grad_norm": 8.316718615699033, "learning_rate": 1.8656032904130402e-06, "loss": 0.7548, "step": 8394 }, { "epoch": 1.1885042825794578, "grad_norm": 8.667925620949484, "learning_rate": 1.8650488696645025e-06, "loss": 0.9107, "step": 8395 }, { "epoch": 1.188645855454095, "grad_norm": 7.996238062403768, "learning_rate": 1.864494482294872e-06, "loss": 0.9239, "step": 8396 }, { "epoch": 1.1887874283287323, "grad_norm": 7.521648697406524, "learning_rate": 1.863940128333293e-06, "loss": 0.7828, "step": 8397 }, { "epoch": 1.1889290012033695, "grad_norm": 7.893202558972048, "learning_rate": 1.863385807808907e-06, "loss": 0.9364, "step": 8398 }, { "epoch": 1.1890705740780065, "grad_norm": 9.520228082184607, "learning_rate": 1.8628315207508547e-06, "loss": 0.9487, "step": 8399 }, { "epoch": 1.1892121469526438, "grad_norm": 8.755264006707533, "learning_rate": 1.8622772671882738e-06, "loss": 0.893, "step": 8400 }, { "epoch": 1.189353719827281, "grad_norm": 10.319439766633069, "learning_rate": 1.861723047150301e-06, "loss": 0.8815, "step": 8401 }, { "epoch": 1.1894952927019182, "grad_norm": 9.015702853866621, "learning_rate": 1.8611688606660728e-06, "loss": 0.9168, "step": 8402 }, { "epoch": 1.1896368655765555, "grad_norm": 8.871617491970003, "learning_rate": 1.8606147077647216e-06, "loss": 0.8769, "step": 8403 }, { "epoch": 1.1897784384511927, "grad_norm": 9.686505666586305, "learning_rate": 1.8600605884753775e-06, "loss": 1.012, "step": 8404 }, { "epoch": 1.18992001132583, "grad_norm": 10.308450909629924, "learning_rate": 1.8595065028271713e-06, "loss": 0.8912, "step": 8405 }, { "epoch": 1.1900615842004671, "grad_norm": 10.49876033186106, "learning_rate": 1.8589524508492308e-06, "loss": 0.9798, "step": 8406 }, { "epoch": 1.1902031570751044, "grad_norm": 9.291871848356985, "learning_rate": 1.8583984325706813e-06, "loss": 0.8741, "step": 8407 }, { "epoch": 1.1903447299497416, "grad_norm": 8.265575156395084, "learning_rate": 1.8578444480206487e-06, "loss": 0.9471, "step": 8408 }, { "epoch": 1.1904863028243788, "grad_norm": 7.567058860036689, "learning_rate": 1.8572904972282541e-06, "loss": 0.798, "step": 8409 }, { "epoch": 1.190627875699016, "grad_norm": 9.562218755438273, "learning_rate": 1.856736580222619e-06, "loss": 0.7723, "step": 8410 }, { "epoch": 1.1907694485736533, "grad_norm": 9.310737186997173, "learning_rate": 1.8561826970328623e-06, "loss": 0.8045, "step": 8411 }, { "epoch": 1.1909110214482905, "grad_norm": 10.182334666962166, "learning_rate": 1.8556288476881012e-06, "loss": 0.9732, "step": 8412 }, { "epoch": 1.1910525943229278, "grad_norm": 8.933506640371109, "learning_rate": 1.855075032217451e-06, "loss": 0.7873, "step": 8413 }, { "epoch": 1.191194167197565, "grad_norm": 9.077932542871736, "learning_rate": 1.854521250650026e-06, "loss": 0.8979, "step": 8414 }, { "epoch": 1.1913357400722022, "grad_norm": 8.94539224514127, "learning_rate": 1.8539675030149373e-06, "loss": 0.8723, "step": 8415 }, { "epoch": 1.1914773129468395, "grad_norm": 10.795150685391683, "learning_rate": 1.853413789341295e-06, "loss": 0.9723, "step": 8416 }, { "epoch": 1.1916188858214767, "grad_norm": 9.392090947999188, "learning_rate": 1.8528601096582078e-06, "loss": 0.8953, "step": 8417 }, { "epoch": 1.191760458696114, "grad_norm": 8.469198630298685, "learning_rate": 1.8523064639947818e-06, "loss": 0.844, "step": 8418 }, { "epoch": 1.1919020315707511, "grad_norm": 7.106347816619412, "learning_rate": 1.8517528523801226e-06, "loss": 0.8172, "step": 8419 }, { "epoch": 1.1920436044453884, "grad_norm": 8.964433541813221, "learning_rate": 1.8511992748433321e-06, "loss": 0.8689, "step": 8420 }, { "epoch": 1.1921851773200256, "grad_norm": 10.770507768227514, "learning_rate": 1.8506457314135123e-06, "loss": 0.8563, "step": 8421 }, { "epoch": 1.1923267501946626, "grad_norm": 7.7515112111064, "learning_rate": 1.850092222119762e-06, "loss": 0.8268, "step": 8422 }, { "epoch": 1.1924683230692998, "grad_norm": 8.48816215617673, "learning_rate": 1.849538746991179e-06, "loss": 0.7979, "step": 8423 }, { "epoch": 1.192609895943937, "grad_norm": 7.767947181718523, "learning_rate": 1.8489853060568597e-06, "loss": 0.844, "step": 8424 }, { "epoch": 1.1927514688185743, "grad_norm": 8.60357171342387, "learning_rate": 1.848431899345897e-06, "loss": 0.7654, "step": 8425 }, { "epoch": 1.1928930416932115, "grad_norm": 7.851514157578166, "learning_rate": 1.8478785268873834e-06, "loss": 0.868, "step": 8426 }, { "epoch": 1.1930346145678488, "grad_norm": 7.580456310216012, "learning_rate": 1.8473251887104093e-06, "loss": 0.8053, "step": 8427 }, { "epoch": 1.193176187442486, "grad_norm": 7.374511961225622, "learning_rate": 1.8467718848440636e-06, "loss": 0.8141, "step": 8428 }, { "epoch": 1.1933177603171232, "grad_norm": 9.32899151584118, "learning_rate": 1.8462186153174327e-06, "loss": 0.9424, "step": 8429 }, { "epoch": 1.1934593331917605, "grad_norm": 8.1818371473921, "learning_rate": 1.8456653801596013e-06, "loss": 0.7531, "step": 8430 }, { "epoch": 1.1936009060663977, "grad_norm": 7.620694304947097, "learning_rate": 1.8451121793996534e-06, "loss": 0.858, "step": 8431 }, { "epoch": 1.193742478941035, "grad_norm": 8.626755687759394, "learning_rate": 1.84455901306667e-06, "loss": 0.8944, "step": 8432 }, { "epoch": 1.1938840518156721, "grad_norm": 9.859698838902112, "learning_rate": 1.8440058811897304e-06, "loss": 0.845, "step": 8433 }, { "epoch": 1.1940256246903094, "grad_norm": 8.982962270043192, "learning_rate": 1.8434527837979128e-06, "loss": 0.788, "step": 8434 }, { "epoch": 1.1941671975649466, "grad_norm": 8.271097661045642, "learning_rate": 1.8428997209202935e-06, "loss": 0.9736, "step": 8435 }, { "epoch": 1.1943087704395838, "grad_norm": 7.028256198579495, "learning_rate": 1.8423466925859445e-06, "loss": 0.797, "step": 8436 }, { "epoch": 1.194450343314221, "grad_norm": 8.745994304928168, "learning_rate": 1.84179369882394e-06, "loss": 0.8659, "step": 8437 }, { "epoch": 1.1945919161888583, "grad_norm": 10.403020317488293, "learning_rate": 1.84124073966335e-06, "loss": 0.8011, "step": 8438 }, { "epoch": 1.1947334890634955, "grad_norm": 8.848375384681766, "learning_rate": 1.8406878151332431e-06, "loss": 0.9244, "step": 8439 }, { "epoch": 1.1948750619381325, "grad_norm": 8.512732898366176, "learning_rate": 1.840134925262686e-06, "loss": 0.8776, "step": 8440 }, { "epoch": 1.1950166348127698, "grad_norm": 10.569222923449312, "learning_rate": 1.8395820700807444e-06, "loss": 1.0142, "step": 8441 }, { "epoch": 1.195158207687407, "grad_norm": 8.049525504018467, "learning_rate": 1.8390292496164805e-06, "loss": 0.9128, "step": 8442 }, { "epoch": 1.1952997805620442, "grad_norm": 8.502116220365926, "learning_rate": 1.838476463898956e-06, "loss": 0.9278, "step": 8443 }, { "epoch": 1.1954413534366815, "grad_norm": 8.163163919597759, "learning_rate": 1.8379237129572307e-06, "loss": 0.8117, "step": 8444 }, { "epoch": 1.1955829263113187, "grad_norm": 7.697519115735236, "learning_rate": 1.8373709968203624e-06, "loss": 0.9145, "step": 8445 }, { "epoch": 1.195724499185956, "grad_norm": 9.434349898525733, "learning_rate": 1.8368183155174069e-06, "loss": 0.7474, "step": 8446 }, { "epoch": 1.1958660720605931, "grad_norm": 8.13647823921425, "learning_rate": 1.8362656690774177e-06, "loss": 0.9169, "step": 8447 }, { "epoch": 1.1960076449352304, "grad_norm": 9.043472414429475, "learning_rate": 1.8357130575294474e-06, "loss": 0.8948, "step": 8448 }, { "epoch": 1.1961492178098676, "grad_norm": 7.949312326691231, "learning_rate": 1.8351604809025465e-06, "loss": 0.78, "step": 8449 }, { "epoch": 1.1962907906845048, "grad_norm": 9.462998862577578, "learning_rate": 1.8346079392257632e-06, "loss": 0.8423, "step": 8450 }, { "epoch": 1.196432363559142, "grad_norm": 9.569814240856099, "learning_rate": 1.834055432528144e-06, "loss": 0.8291, "step": 8451 }, { "epoch": 1.1965739364337793, "grad_norm": 9.20936401490798, "learning_rate": 1.8335029608387342e-06, "loss": 0.9037, "step": 8452 }, { "epoch": 1.1967155093084165, "grad_norm": 8.623235107302845, "learning_rate": 1.8329505241865772e-06, "loss": 0.8206, "step": 8453 }, { "epoch": 1.1968570821830538, "grad_norm": 8.628762571659184, "learning_rate": 1.8323981226007136e-06, "loss": 0.7432, "step": 8454 }, { "epoch": 1.196998655057691, "grad_norm": 9.134344045956576, "learning_rate": 1.8318457561101833e-06, "loss": 0.9084, "step": 8455 }, { "epoch": 1.1971402279323282, "grad_norm": 8.59771171111712, "learning_rate": 1.831293424744024e-06, "loss": 0.8166, "step": 8456 }, { "epoch": 1.1972818008069654, "grad_norm": 9.259865937879447, "learning_rate": 1.8307411285312699e-06, "loss": 0.8411, "step": 8457 }, { "epoch": 1.1974233736816027, "grad_norm": 7.8118849855580885, "learning_rate": 1.8301888675009554e-06, "loss": 0.7933, "step": 8458 }, { "epoch": 1.19756494655624, "grad_norm": 9.805259025299772, "learning_rate": 1.8296366416821127e-06, "loss": 0.8753, "step": 8459 }, { "epoch": 1.1977065194308771, "grad_norm": 10.017546328120233, "learning_rate": 1.829084451103772e-06, "loss": 0.8676, "step": 8460 }, { "epoch": 1.1978480923055144, "grad_norm": 7.363745426350748, "learning_rate": 1.8285322957949615e-06, "loss": 0.7824, "step": 8461 }, { "epoch": 1.1979896651801516, "grad_norm": 9.461267315937798, "learning_rate": 1.8279801757847077e-06, "loss": 0.7923, "step": 8462 }, { "epoch": 1.1981312380547886, "grad_norm": 9.575186896429882, "learning_rate": 1.8274280911020349e-06, "loss": 0.85, "step": 8463 }, { "epoch": 1.1982728109294258, "grad_norm": 8.951112424456186, "learning_rate": 1.8268760417759659e-06, "loss": 0.9341, "step": 8464 }, { "epoch": 1.198414383804063, "grad_norm": 9.12784187272205, "learning_rate": 1.8263240278355216e-06, "loss": 0.9062, "step": 8465 }, { "epoch": 1.1985559566787003, "grad_norm": 9.800772687072783, "learning_rate": 1.8257720493097207e-06, "loss": 0.7927, "step": 8466 }, { "epoch": 1.1986975295533375, "grad_norm": 8.972740853840357, "learning_rate": 1.825220106227581e-06, "loss": 0.8865, "step": 8467 }, { "epoch": 1.1988391024279748, "grad_norm": 10.068715326212798, "learning_rate": 1.8246681986181165e-06, "loss": 0.7499, "step": 8468 }, { "epoch": 1.198980675302612, "grad_norm": 10.078677991732604, "learning_rate": 1.8241163265103411e-06, "loss": 0.7875, "step": 8469 }, { "epoch": 1.1991222481772492, "grad_norm": 9.311902737506092, "learning_rate": 1.8235644899332669e-06, "loss": 0.8928, "step": 8470 }, { "epoch": 1.1992638210518864, "grad_norm": 9.983524101188113, "learning_rate": 1.8230126889159027e-06, "loss": 0.9464, "step": 8471 }, { "epoch": 1.1994053939265237, "grad_norm": 7.688624105996505, "learning_rate": 1.822460923487257e-06, "loss": 0.7541, "step": 8472 }, { "epoch": 1.199546966801161, "grad_norm": 9.93969433627288, "learning_rate": 1.8219091936763353e-06, "loss": 0.8, "step": 8473 }, { "epoch": 1.1996885396757981, "grad_norm": 9.051070800153678, "learning_rate": 1.8213574995121417e-06, "loss": 0.8049, "step": 8474 }, { "epoch": 1.1998301125504354, "grad_norm": 7.190882674619071, "learning_rate": 1.8208058410236777e-06, "loss": 0.7259, "step": 8475 }, { "epoch": 1.1999716854250726, "grad_norm": 10.652070151418558, "learning_rate": 1.8202542182399446e-06, "loss": 0.8389, "step": 8476 }, { "epoch": 1.2001132582997098, "grad_norm": 8.089245341671491, "learning_rate": 1.8197026311899419e-06, "loss": 0.8798, "step": 8477 }, { "epoch": 1.200254831174347, "grad_norm": 9.56445731283092, "learning_rate": 1.8191510799026629e-06, "loss": 0.8664, "step": 8478 }, { "epoch": 1.2003964040489843, "grad_norm": 10.349614719462197, "learning_rate": 1.8185995644071047e-06, "loss": 0.9164, "step": 8479 }, { "epoch": 1.2005379769236215, "grad_norm": 9.726850758153263, "learning_rate": 1.818048084732259e-06, "loss": 0.8047, "step": 8480 }, { "epoch": 1.2006795497982587, "grad_norm": 8.148441245205275, "learning_rate": 1.8174966409071162e-06, "loss": 0.7862, "step": 8481 }, { "epoch": 1.2008211226728958, "grad_norm": 8.847642884233833, "learning_rate": 1.8169452329606667e-06, "loss": 0.952, "step": 8482 }, { "epoch": 1.200962695547533, "grad_norm": 9.060757594485969, "learning_rate": 1.8163938609218974e-06, "loss": 0.9078, "step": 8483 }, { "epoch": 1.2011042684221702, "grad_norm": 10.2439736119012, "learning_rate": 1.8158425248197931e-06, "loss": 0.9464, "step": 8484 }, { "epoch": 1.2012458412968074, "grad_norm": 8.301092365493183, "learning_rate": 1.8152912246833368e-06, "loss": 0.9033, "step": 8485 }, { "epoch": 1.2013874141714447, "grad_norm": 8.297559577013113, "learning_rate": 1.8147399605415104e-06, "loss": 0.8716, "step": 8486 }, { "epoch": 1.201528987046082, "grad_norm": 9.455196538261298, "learning_rate": 1.8141887324232932e-06, "loss": 0.773, "step": 8487 }, { "epoch": 1.2016705599207191, "grad_norm": 9.211227399487981, "learning_rate": 1.8136375403576636e-06, "loss": 0.8796, "step": 8488 }, { "epoch": 1.2018121327953564, "grad_norm": 7.8583789596230895, "learning_rate": 1.8130863843735964e-06, "loss": 0.8383, "step": 8489 }, { "epoch": 1.2019537056699936, "grad_norm": 8.853533075139984, "learning_rate": 1.8125352645000654e-06, "loss": 0.8777, "step": 8490 }, { "epoch": 1.2020952785446308, "grad_norm": 8.0578169122358, "learning_rate": 1.8119841807660432e-06, "loss": 0.8398, "step": 8491 }, { "epoch": 1.202236851419268, "grad_norm": 9.975688755665061, "learning_rate": 1.8114331332004998e-06, "loss": 0.8752, "step": 8492 }, { "epoch": 1.2023784242939053, "grad_norm": 8.79452477832999, "learning_rate": 1.810882121832403e-06, "loss": 0.9038, "step": 8493 }, { "epoch": 1.2025199971685425, "grad_norm": 8.49616962968989, "learning_rate": 1.8103311466907191e-06, "loss": 0.802, "step": 8494 }, { "epoch": 1.2026615700431798, "grad_norm": 8.328101639419629, "learning_rate": 1.8097802078044125e-06, "loss": 0.9551, "step": 8495 }, { "epoch": 1.202803142917817, "grad_norm": 9.830582012902509, "learning_rate": 1.809229305202446e-06, "loss": 0.9504, "step": 8496 }, { "epoch": 1.2029447157924542, "grad_norm": 8.133980820110999, "learning_rate": 1.8086784389137796e-06, "loss": 0.9114, "step": 8497 }, { "epoch": 1.2030862886670914, "grad_norm": 9.795920975029526, "learning_rate": 1.8081276089673719e-06, "loss": 0.9011, "step": 8498 }, { "epoch": 1.2032278615417287, "grad_norm": 9.385840899962776, "learning_rate": 1.8075768153921813e-06, "loss": 0.9266, "step": 8499 }, { "epoch": 1.203369434416366, "grad_norm": 7.7494371271308164, "learning_rate": 1.8070260582171605e-06, "loss": 0.854, "step": 8500 }, { "epoch": 1.2035110072910031, "grad_norm": 10.064181169610675, "learning_rate": 1.8064753374712629e-06, "loss": 0.9248, "step": 8501 }, { "epoch": 1.2036525801656404, "grad_norm": 9.105506873981751, "learning_rate": 1.8059246531834395e-06, "loss": 0.7581, "step": 8502 }, { "epoch": 1.2037941530402776, "grad_norm": 8.783289336565442, "learning_rate": 1.8053740053826399e-06, "loss": 0.809, "step": 8503 }, { "epoch": 1.2039357259149148, "grad_norm": 7.928086593709324, "learning_rate": 1.8048233940978103e-06, "loss": 0.8792, "step": 8504 }, { "epoch": 1.2040772987895518, "grad_norm": 10.377048887603964, "learning_rate": 1.8042728193578968e-06, "loss": 0.9562, "step": 8505 }, { "epoch": 1.204218871664189, "grad_norm": 9.961239655343677, "learning_rate": 1.8037222811918426e-06, "loss": 0.8977, "step": 8506 }, { "epoch": 1.2043604445388263, "grad_norm": 8.943251673093028, "learning_rate": 1.803171779628589e-06, "loss": 0.8439, "step": 8507 }, { "epoch": 1.2045020174134635, "grad_norm": 9.228296748396469, "learning_rate": 1.8026213146970752e-06, "loss": 0.8899, "step": 8508 }, { "epoch": 1.2046435902881008, "grad_norm": 9.388653387944665, "learning_rate": 1.80207088642624e-06, "loss": 0.8785, "step": 8509 }, { "epoch": 1.204785163162738, "grad_norm": 9.654617122128787, "learning_rate": 1.8015204948450166e-06, "loss": 0.9168, "step": 8510 }, { "epoch": 1.2049267360373752, "grad_norm": 8.695126486105593, "learning_rate": 1.80097013998234e-06, "loss": 0.9201, "step": 8511 }, { "epoch": 1.2050683089120124, "grad_norm": 7.823551021885204, "learning_rate": 1.8004198218671423e-06, "loss": 0.8654, "step": 8512 }, { "epoch": 1.2052098817866497, "grad_norm": 8.29236475882827, "learning_rate": 1.7998695405283528e-06, "loss": 0.773, "step": 8513 }, { "epoch": 1.205351454661287, "grad_norm": 8.603624032730744, "learning_rate": 1.7993192959948996e-06, "loss": 0.9211, "step": 8514 }, { "epoch": 1.2054930275359241, "grad_norm": 10.606515041045196, "learning_rate": 1.7987690882957084e-06, "loss": 0.9341, "step": 8515 }, { "epoch": 1.2056346004105614, "grad_norm": 8.608957159353393, "learning_rate": 1.7982189174597037e-06, "loss": 0.8189, "step": 8516 }, { "epoch": 1.2057761732851986, "grad_norm": 8.448548267866656, "learning_rate": 1.797668783515807e-06, "loss": 0.873, "step": 8517 }, { "epoch": 1.2059177461598358, "grad_norm": 9.420772051425608, "learning_rate": 1.7971186864929386e-06, "loss": 0.9588, "step": 8518 }, { "epoch": 1.206059319034473, "grad_norm": 9.870972017081789, "learning_rate": 1.7965686264200165e-06, "loss": 0.9056, "step": 8519 }, { "epoch": 1.2062008919091103, "grad_norm": 8.28470317891325, "learning_rate": 1.7960186033259585e-06, "loss": 0.8758, "step": 8520 }, { "epoch": 1.2063424647837475, "grad_norm": 9.056978214068817, "learning_rate": 1.7954686172396764e-06, "loss": 0.8611, "step": 8521 }, { "epoch": 1.2064840376583847, "grad_norm": 8.664124140335284, "learning_rate": 1.7949186681900843e-06, "loss": 0.8437, "step": 8522 }, { "epoch": 1.2066256105330218, "grad_norm": 9.296952139911062, "learning_rate": 1.7943687562060919e-06, "loss": 0.8116, "step": 8523 }, { "epoch": 1.206767183407659, "grad_norm": 8.878310754612544, "learning_rate": 1.7938188813166074e-06, "loss": 0.8702, "step": 8524 }, { "epoch": 1.2069087562822962, "grad_norm": 10.02228618619271, "learning_rate": 1.7932690435505385e-06, "loss": 0.8732, "step": 8525 }, { "epoch": 1.2070503291569334, "grad_norm": 11.147777428230054, "learning_rate": 1.7927192429367885e-06, "loss": 0.9158, "step": 8526 }, { "epoch": 1.2071919020315707, "grad_norm": 9.54924658554603, "learning_rate": 1.79216947950426e-06, "loss": 0.9685, "step": 8527 }, { "epoch": 1.207333474906208, "grad_norm": 9.304907654432661, "learning_rate": 1.7916197532818548e-06, "loss": 0.974, "step": 8528 }, { "epoch": 1.2074750477808451, "grad_norm": 8.941617004123891, "learning_rate": 1.791070064298471e-06, "loss": 0.8251, "step": 8529 }, { "epoch": 1.2076166206554824, "grad_norm": 8.709723500046694, "learning_rate": 1.7905204125830066e-06, "loss": 0.9124, "step": 8530 }, { "epoch": 1.2077581935301196, "grad_norm": 9.901716187584487, "learning_rate": 1.7899707981643538e-06, "loss": 0.8634, "step": 8531 }, { "epoch": 1.2078997664047568, "grad_norm": 9.731384111777576, "learning_rate": 1.7894212210714068e-06, "loss": 0.9026, "step": 8532 }, { "epoch": 1.208041339279394, "grad_norm": 8.87021045663077, "learning_rate": 1.788871681333056e-06, "loss": 0.9667, "step": 8533 }, { "epoch": 1.2081829121540313, "grad_norm": 8.277151108246725, "learning_rate": 1.7883221789781908e-06, "loss": 0.7318, "step": 8534 }, { "epoch": 1.2083244850286685, "grad_norm": 9.422279867959572, "learning_rate": 1.7877727140356982e-06, "loss": 0.9103, "step": 8535 }, { "epoch": 1.2084660579033057, "grad_norm": 8.431090682711083, "learning_rate": 1.787223286534463e-06, "loss": 0.9388, "step": 8536 }, { "epoch": 1.208607630777943, "grad_norm": 9.013089727612185, "learning_rate": 1.7866738965033681e-06, "loss": 0.8929, "step": 8537 }, { "epoch": 1.2087492036525802, "grad_norm": 10.479983915360316, "learning_rate": 1.7861245439712945e-06, "loss": 0.8427, "step": 8538 }, { "epoch": 1.2088907765272174, "grad_norm": 10.060171675383259, "learning_rate": 1.7855752289671215e-06, "loss": 0.8764, "step": 8539 }, { "epoch": 1.2090323494018547, "grad_norm": 8.791457572969582, "learning_rate": 1.785025951519726e-06, "loss": 0.8391, "step": 8540 }, { "epoch": 1.209173922276492, "grad_norm": 8.899844787615812, "learning_rate": 1.7844767116579836e-06, "loss": 0.8366, "step": 8541 }, { "epoch": 1.2093154951511291, "grad_norm": 8.485578532415168, "learning_rate": 1.7839275094107666e-06, "loss": 0.7428, "step": 8542 }, { "epoch": 1.2094570680257664, "grad_norm": 8.228222396048135, "learning_rate": 1.7833783448069464e-06, "loss": 0.8623, "step": 8543 }, { "epoch": 1.2095986409004036, "grad_norm": 9.122166860053387, "learning_rate": 1.782829217875392e-06, "loss": 0.8447, "step": 8544 }, { "epoch": 1.2097402137750408, "grad_norm": 8.339212962825417, "learning_rate": 1.782280128644971e-06, "loss": 0.93, "step": 8545 }, { "epoch": 1.2098817866496778, "grad_norm": 10.058794749205326, "learning_rate": 1.781731077144549e-06, "loss": 0.908, "step": 8546 }, { "epoch": 1.210023359524315, "grad_norm": 9.05674192410191, "learning_rate": 1.781182063402988e-06, "loss": 0.9597, "step": 8547 }, { "epoch": 1.2101649323989523, "grad_norm": 8.501463820128915, "learning_rate": 1.7806330874491504e-06, "loss": 0.8061, "step": 8548 }, { "epoch": 1.2103065052735895, "grad_norm": 9.635021194727678, "learning_rate": 1.7800841493118942e-06, "loss": 0.8748, "step": 8549 }, { "epoch": 1.2104480781482267, "grad_norm": 7.984745024525796, "learning_rate": 1.7795352490200782e-06, "loss": 0.8843, "step": 8550 }, { "epoch": 1.210589651022864, "grad_norm": 8.204772435019098, "learning_rate": 1.778986386602558e-06, "loss": 0.7754, "step": 8551 }, { "epoch": 1.2107312238975012, "grad_norm": 9.233305841765349, "learning_rate": 1.7784375620881847e-06, "loss": 0.9385, "step": 8552 }, { "epoch": 1.2108727967721384, "grad_norm": 8.838416118700493, "learning_rate": 1.7778887755058108e-06, "loss": 0.8388, "step": 8553 }, { "epoch": 1.2110143696467757, "grad_norm": 8.947970998922527, "learning_rate": 1.7773400268842855e-06, "loss": 0.758, "step": 8554 }, { "epoch": 1.211155942521413, "grad_norm": 7.456612272966556, "learning_rate": 1.7767913162524562e-06, "loss": 0.7837, "step": 8555 }, { "epoch": 1.2112975153960501, "grad_norm": 8.05693678432937, "learning_rate": 1.7762426436391675e-06, "loss": 0.8274, "step": 8556 }, { "epoch": 1.2114390882706874, "grad_norm": 7.9627562477164195, "learning_rate": 1.775694009073264e-06, "loss": 0.905, "step": 8557 }, { "epoch": 1.2115806611453246, "grad_norm": 8.560135786459067, "learning_rate": 1.7751454125835866e-06, "loss": 0.9434, "step": 8558 }, { "epoch": 1.2117222340199618, "grad_norm": 9.417923384896962, "learning_rate": 1.774596854198974e-06, "loss": 0.8825, "step": 8559 }, { "epoch": 1.211863806894599, "grad_norm": 10.757441229636443, "learning_rate": 1.774048333948264e-06, "loss": 0.8357, "step": 8560 }, { "epoch": 1.2120053797692363, "grad_norm": 8.410460598156591, "learning_rate": 1.773499851860292e-06, "loss": 0.8338, "step": 8561 }, { "epoch": 1.2121469526438735, "grad_norm": 9.293566408377037, "learning_rate": 1.7729514079638915e-06, "loss": 0.9159, "step": 8562 }, { "epoch": 1.2122885255185107, "grad_norm": 9.946427851342065, "learning_rate": 1.7724030022878928e-06, "loss": 0.879, "step": 8563 }, { "epoch": 1.2124300983931477, "grad_norm": 8.651780279338647, "learning_rate": 1.7718546348611254e-06, "loss": 0.8423, "step": 8564 }, { "epoch": 1.212571671267785, "grad_norm": 8.95482785739438, "learning_rate": 1.7713063057124174e-06, "loss": 0.8648, "step": 8565 }, { "epoch": 1.2127132441424222, "grad_norm": 7.778894029473428, "learning_rate": 1.7707580148705936e-06, "loss": 0.7765, "step": 8566 }, { "epoch": 1.2128548170170594, "grad_norm": 9.895343540853137, "learning_rate": 1.770209762364477e-06, "loss": 0.9743, "step": 8567 }, { "epoch": 1.2129963898916967, "grad_norm": 9.068257364343856, "learning_rate": 1.7696615482228891e-06, "loss": 0.856, "step": 8568 }, { "epoch": 1.213137962766334, "grad_norm": 7.223477223571961, "learning_rate": 1.769113372474649e-06, "loss": 0.9799, "step": 8569 }, { "epoch": 1.2132795356409711, "grad_norm": 7.774750350427634, "learning_rate": 1.768565235148574e-06, "loss": 0.8328, "step": 8570 }, { "epoch": 1.2134211085156084, "grad_norm": 8.903586327278727, "learning_rate": 1.7680171362734794e-06, "loss": 0.7878, "step": 8571 }, { "epoch": 1.2135626813902456, "grad_norm": 10.714123497597802, "learning_rate": 1.767469075878177e-06, "loss": 0.9391, "step": 8572 }, { "epoch": 1.2137042542648828, "grad_norm": 12.001757810910776, "learning_rate": 1.7669210539914813e-06, "loss": 0.9726, "step": 8573 }, { "epoch": 1.21384582713952, "grad_norm": 7.416260983233771, "learning_rate": 1.7663730706421978e-06, "loss": 0.8664, "step": 8574 }, { "epoch": 1.2139874000141573, "grad_norm": 8.473176707159022, "learning_rate": 1.7658251258591352e-06, "loss": 0.8005, "step": 8575 }, { "epoch": 1.2141289728887945, "grad_norm": 7.745910057548365, "learning_rate": 1.7652772196710982e-06, "loss": 0.8902, "step": 8576 }, { "epoch": 1.2142705457634317, "grad_norm": 9.098378684685608, "learning_rate": 1.7647293521068898e-06, "loss": 0.8589, "step": 8577 }, { "epoch": 1.214412118638069, "grad_norm": 9.493694421280287, "learning_rate": 1.7641815231953107e-06, "loss": 0.8594, "step": 8578 }, { "epoch": 1.2145536915127062, "grad_norm": 10.137905608504802, "learning_rate": 1.763633732965161e-06, "loss": 0.9431, "step": 8579 }, { "epoch": 1.2146952643873434, "grad_norm": 10.642313415741297, "learning_rate": 1.7630859814452367e-06, "loss": 0.8398, "step": 8580 }, { "epoch": 1.2148368372619807, "grad_norm": 9.945277595308937, "learning_rate": 1.7625382686643328e-06, "loss": 0.94, "step": 8581 }, { "epoch": 1.214978410136618, "grad_norm": 8.909944323570215, "learning_rate": 1.7619905946512421e-06, "loss": 0.9126, "step": 8582 }, { "epoch": 1.2151199830112551, "grad_norm": 9.692616151552073, "learning_rate": 1.761442959434757e-06, "loss": 0.9044, "step": 8583 }, { "epoch": 1.2152615558858924, "grad_norm": 9.339804540381937, "learning_rate": 1.7608953630436632e-06, "loss": 0.9168, "step": 8584 }, { "epoch": 1.2154031287605296, "grad_norm": 7.4464511380926925, "learning_rate": 1.7603478055067493e-06, "loss": 0.7501, "step": 8585 }, { "epoch": 1.2155447016351668, "grad_norm": 8.870996996252424, "learning_rate": 1.7598002868528002e-06, "loss": 0.7808, "step": 8586 }, { "epoch": 1.215686274509804, "grad_norm": 9.289672728852167, "learning_rate": 1.7592528071105978e-06, "loss": 0.9009, "step": 8587 }, { "epoch": 1.215827847384441, "grad_norm": 8.766552337171728, "learning_rate": 1.7587053663089233e-06, "loss": 0.9163, "step": 8588 }, { "epoch": 1.2159694202590783, "grad_norm": 8.977588724384695, "learning_rate": 1.7581579644765544e-06, "loss": 0.8691, "step": 8589 }, { "epoch": 1.2161109931337155, "grad_norm": 8.54341839451689, "learning_rate": 1.7576106016422684e-06, "loss": 0.8197, "step": 8590 }, { "epoch": 1.2162525660083527, "grad_norm": 9.002393192419333, "learning_rate": 1.7570632778348394e-06, "loss": 0.8268, "step": 8591 }, { "epoch": 1.21639413888299, "grad_norm": 9.431972476495215, "learning_rate": 1.7565159930830405e-06, "loss": 0.8579, "step": 8592 }, { "epoch": 1.2165357117576272, "grad_norm": 9.093280163979607, "learning_rate": 1.7559687474156412e-06, "loss": 0.8518, "step": 8593 }, { "epoch": 1.2166772846322644, "grad_norm": 9.117915081160062, "learning_rate": 1.7554215408614102e-06, "loss": 0.8512, "step": 8594 }, { "epoch": 1.2168188575069017, "grad_norm": 8.813950006760855, "learning_rate": 1.7548743734491136e-06, "loss": 0.8892, "step": 8595 }, { "epoch": 1.216960430381539, "grad_norm": 7.410618519779785, "learning_rate": 1.7543272452075156e-06, "loss": 0.7885, "step": 8596 }, { "epoch": 1.2171020032561761, "grad_norm": 8.832996529929915, "learning_rate": 1.7537801561653777e-06, "loss": 0.8915, "step": 8597 }, { "epoch": 1.2172435761308134, "grad_norm": 11.614652889482494, "learning_rate": 1.7532331063514613e-06, "loss": 0.9321, "step": 8598 }, { "epoch": 1.2173851490054506, "grad_norm": 7.75834329715776, "learning_rate": 1.7526860957945233e-06, "loss": 0.8114, "step": 8599 }, { "epoch": 1.2175267218800878, "grad_norm": 8.553895511284244, "learning_rate": 1.7521391245233202e-06, "loss": 0.9465, "step": 8600 }, { "epoch": 1.217668294754725, "grad_norm": 9.680866160457917, "learning_rate": 1.7515921925666053e-06, "loss": 0.8621, "step": 8601 }, { "epoch": 1.2178098676293623, "grad_norm": 8.538312399150565, "learning_rate": 1.7510452999531308e-06, "loss": 0.8968, "step": 8602 }, { "epoch": 1.2179514405039995, "grad_norm": 7.578130788899206, "learning_rate": 1.7504984467116467e-06, "loss": 0.926, "step": 8603 }, { "epoch": 1.2180930133786367, "grad_norm": 9.64630068696593, "learning_rate": 1.7499516328709016e-06, "loss": 0.7972, "step": 8604 }, { "epoch": 1.218234586253274, "grad_norm": 10.271629400656625, "learning_rate": 1.7494048584596388e-06, "loss": 0.968, "step": 8605 }, { "epoch": 1.218376159127911, "grad_norm": 8.601294185046507, "learning_rate": 1.7488581235066027e-06, "loss": 0.948, "step": 8606 }, { "epoch": 1.2185177320025482, "grad_norm": 8.575639208221732, "learning_rate": 1.7483114280405348e-06, "loss": 0.9988, "step": 8607 }, { "epoch": 1.2186593048771854, "grad_norm": 8.920475036562218, "learning_rate": 1.747764772090175e-06, "loss": 0.8277, "step": 8608 }, { "epoch": 1.2188008777518227, "grad_norm": 8.006080224219629, "learning_rate": 1.7472181556842602e-06, "loss": 0.9087, "step": 8609 }, { "epoch": 1.21894245062646, "grad_norm": 9.255359643412364, "learning_rate": 1.7466715788515256e-06, "loss": 0.9698, "step": 8610 }, { "epoch": 1.2190840235010971, "grad_norm": 7.973214368708819, "learning_rate": 1.7461250416207045e-06, "loss": 0.972, "step": 8611 }, { "epoch": 1.2192255963757344, "grad_norm": 8.366009797632444, "learning_rate": 1.745578544020528e-06, "loss": 0.8816, "step": 8612 }, { "epoch": 1.2193671692503716, "grad_norm": 7.658135131440699, "learning_rate": 1.7450320860797248e-06, "loss": 0.859, "step": 8613 }, { "epoch": 1.2195087421250088, "grad_norm": 7.617146684586162, "learning_rate": 1.7444856678270218e-06, "loss": 0.8853, "step": 8614 }, { "epoch": 1.219650314999646, "grad_norm": 8.17076812308313, "learning_rate": 1.7439392892911443e-06, "loss": 0.8701, "step": 8615 }, { "epoch": 1.2197918878742833, "grad_norm": 7.561765020295723, "learning_rate": 1.7433929505008145e-06, "loss": 0.7366, "step": 8616 }, { "epoch": 1.2199334607489205, "grad_norm": 9.534555331074865, "learning_rate": 1.7428466514847531e-06, "loss": 0.9506, "step": 8617 }, { "epoch": 1.2200750336235577, "grad_norm": 7.665025065381697, "learning_rate": 1.7423003922716784e-06, "loss": 0.7995, "step": 8618 }, { "epoch": 1.220216606498195, "grad_norm": 10.199723662578744, "learning_rate": 1.741754172890307e-06, "loss": 0.912, "step": 8619 }, { "epoch": 1.2203581793728322, "grad_norm": 9.67506565594143, "learning_rate": 1.7412079933693538e-06, "loss": 0.9008, "step": 8620 }, { "epoch": 1.2204997522474694, "grad_norm": 7.585722019580576, "learning_rate": 1.7406618537375303e-06, "loss": 0.8619, "step": 8621 }, { "epoch": 1.2206413251221067, "grad_norm": 9.074774906950386, "learning_rate": 1.740115754023547e-06, "loss": 0.8518, "step": 8622 }, { "epoch": 1.2207828979967439, "grad_norm": 7.9411994543137725, "learning_rate": 1.7395696942561119e-06, "loss": 0.8532, "step": 8623 }, { "epoch": 1.2209244708713811, "grad_norm": 7.391882759155319, "learning_rate": 1.7390236744639304e-06, "loss": 0.9087, "step": 8624 }, { "epoch": 1.2210660437460183, "grad_norm": 8.654730818433972, "learning_rate": 1.7384776946757075e-06, "loss": 0.8101, "step": 8625 }, { "epoch": 1.2212076166206556, "grad_norm": 10.662123507329433, "learning_rate": 1.7379317549201458e-06, "loss": 0.9924, "step": 8626 }, { "epoch": 1.2213491894952928, "grad_norm": 9.386263985160502, "learning_rate": 1.7373858552259421e-06, "loss": 0.8829, "step": 8627 }, { "epoch": 1.22149076236993, "grad_norm": 9.631650930447806, "learning_rate": 1.7368399956217954e-06, "loss": 0.8314, "step": 8628 }, { "epoch": 1.221632335244567, "grad_norm": 10.46263267223322, "learning_rate": 1.7362941761364012e-06, "loss": 0.9555, "step": 8629 }, { "epoch": 1.2217739081192043, "grad_norm": 8.767945116859801, "learning_rate": 1.7357483967984524e-06, "loss": 0.8768, "step": 8630 }, { "epoch": 1.2219154809938415, "grad_norm": 8.338038171788327, "learning_rate": 1.7352026576366405e-06, "loss": 0.9048, "step": 8631 }, { "epoch": 1.2220570538684787, "grad_norm": 8.372759619008258, "learning_rate": 1.734656958679655e-06, "loss": 0.9294, "step": 8632 }, { "epoch": 1.222198626743116, "grad_norm": 9.605768584558845, "learning_rate": 1.7341112999561823e-06, "loss": 0.7481, "step": 8633 }, { "epoch": 1.2223401996177532, "grad_norm": 8.951199362752362, "learning_rate": 1.7335656814949075e-06, "loss": 0.7682, "step": 8634 }, { "epoch": 1.2224817724923904, "grad_norm": 8.947858449785755, "learning_rate": 1.7330201033245137e-06, "loss": 0.8196, "step": 8635 }, { "epoch": 1.2226233453670277, "grad_norm": 9.418409023175412, "learning_rate": 1.7324745654736812e-06, "loss": 0.9806, "step": 8636 }, { "epoch": 1.2227649182416649, "grad_norm": 9.848262467085524, "learning_rate": 1.7319290679710885e-06, "loss": 0.9383, "step": 8637 }, { "epoch": 1.2229064911163021, "grad_norm": 8.983506666597883, "learning_rate": 1.7313836108454118e-06, "loss": 0.8232, "step": 8638 }, { "epoch": 1.2230480639909393, "grad_norm": 8.287202128354958, "learning_rate": 1.7308381941253256e-06, "loss": 0.7824, "step": 8639 }, { "epoch": 1.2231896368655766, "grad_norm": 10.899231321896197, "learning_rate": 1.7302928178395018e-06, "loss": 0.8935, "step": 8640 }, { "epoch": 1.2233312097402138, "grad_norm": 7.115797826304088, "learning_rate": 1.7297474820166108e-06, "loss": 0.7946, "step": 8641 }, { "epoch": 1.223472782614851, "grad_norm": 8.67692218339315, "learning_rate": 1.7292021866853204e-06, "loss": 0.8522, "step": 8642 }, { "epoch": 1.2236143554894883, "grad_norm": 9.456803745714561, "learning_rate": 1.7286569318742962e-06, "loss": 0.808, "step": 8643 }, { "epoch": 1.2237559283641255, "grad_norm": 10.425611475294343, "learning_rate": 1.728111717612202e-06, "loss": 0.8615, "step": 8644 }, { "epoch": 1.2238975012387627, "grad_norm": 10.921895606986103, "learning_rate": 1.727566543927699e-06, "loss": 0.8672, "step": 8645 }, { "epoch": 1.2240390741134, "grad_norm": 9.388590816102651, "learning_rate": 1.7270214108494469e-06, "loss": 0.8004, "step": 8646 }, { "epoch": 1.224180646988037, "grad_norm": 8.721387457401999, "learning_rate": 1.726476318406104e-06, "loss": 0.8569, "step": 8647 }, { "epoch": 1.2243222198626742, "grad_norm": 8.843592558089506, "learning_rate": 1.7259312666263235e-06, "loss": 0.951, "step": 8648 }, { "epoch": 1.2244637927373114, "grad_norm": 10.322188201873784, "learning_rate": 1.7253862555387587e-06, "loss": 1.0204, "step": 8649 }, { "epoch": 1.2246053656119487, "grad_norm": 8.357427580551336, "learning_rate": 1.7248412851720613e-06, "loss": 0.7858, "step": 8650 }, { "epoch": 1.224746938486586, "grad_norm": 9.530025616158065, "learning_rate": 1.7242963555548794e-06, "loss": 0.9613, "step": 8651 }, { "epoch": 1.2248885113612231, "grad_norm": 9.493833849638975, "learning_rate": 1.7237514667158598e-06, "loss": 0.7541, "step": 8652 }, { "epoch": 1.2250300842358604, "grad_norm": 9.032591406652426, "learning_rate": 1.723206618683646e-06, "loss": 0.8584, "step": 8653 }, { "epoch": 1.2251716571104976, "grad_norm": 8.10388868258845, "learning_rate": 1.722661811486882e-06, "loss": 0.8288, "step": 8654 }, { "epoch": 1.2253132299851348, "grad_norm": 8.617219373241145, "learning_rate": 1.7221170451542067e-06, "loss": 0.8487, "step": 8655 }, { "epoch": 1.225454802859772, "grad_norm": 9.100049331028291, "learning_rate": 1.721572319714258e-06, "loss": 0.9077, "step": 8656 }, { "epoch": 1.2255963757344093, "grad_norm": 9.163159988593591, "learning_rate": 1.7210276351956736e-06, "loss": 0.8673, "step": 8657 }, { "epoch": 1.2257379486090465, "grad_norm": 7.77126618544237, "learning_rate": 1.7204829916270842e-06, "loss": 0.8457, "step": 8658 }, { "epoch": 1.2258795214836837, "grad_norm": 8.934662481848477, "learning_rate": 1.7199383890371228e-06, "loss": 0.8818, "step": 8659 }, { "epoch": 1.226021094358321, "grad_norm": 7.720375291468238, "learning_rate": 1.7193938274544187e-06, "loss": 0.8616, "step": 8660 }, { "epoch": 1.2261626672329582, "grad_norm": 9.934224290595585, "learning_rate": 1.718849306907599e-06, "loss": 0.9098, "step": 8661 }, { "epoch": 1.2263042401075954, "grad_norm": 9.843003211380065, "learning_rate": 1.7183048274252889e-06, "loss": 0.9013, "step": 8662 }, { "epoch": 1.2264458129822327, "grad_norm": 9.5586987089405, "learning_rate": 1.717760389036111e-06, "loss": 0.8729, "step": 8663 }, { "epoch": 1.2265873858568699, "grad_norm": 9.438153174830472, "learning_rate": 1.7172159917686866e-06, "loss": 0.9341, "step": 8664 }, { "epoch": 1.2267289587315071, "grad_norm": 7.921419450350956, "learning_rate": 1.7166716356516334e-06, "loss": 0.8272, "step": 8665 }, { "epoch": 1.2268705316061443, "grad_norm": 8.075967113108604, "learning_rate": 1.716127320713568e-06, "loss": 0.8361, "step": 8666 }, { "epoch": 1.2270121044807816, "grad_norm": 12.484552833331364, "learning_rate": 1.7155830469831057e-06, "loss": 0.9286, "step": 8667 }, { "epoch": 1.2271536773554188, "grad_norm": 7.98603412388352, "learning_rate": 1.7150388144888577e-06, "loss": 0.8504, "step": 8668 }, { "epoch": 1.227295250230056, "grad_norm": 8.05257686337889, "learning_rate": 1.7144946232594334e-06, "loss": 0.7733, "step": 8669 }, { "epoch": 1.2274368231046933, "grad_norm": 8.623547417876205, "learning_rate": 1.7139504733234413e-06, "loss": 0.8908, "step": 8670 }, { "epoch": 1.2275783959793303, "grad_norm": 7.347069562236617, "learning_rate": 1.7134063647094866e-06, "loss": 0.8484, "step": 8671 }, { "epoch": 1.2277199688539675, "grad_norm": 9.59428319785102, "learning_rate": 1.7128622974461728e-06, "loss": 0.8872, "step": 8672 }, { "epoch": 1.2278615417286047, "grad_norm": 8.208314950473811, "learning_rate": 1.7123182715621012e-06, "loss": 0.8288, "step": 8673 }, { "epoch": 1.228003114603242, "grad_norm": 8.449645844210165, "learning_rate": 1.7117742870858706e-06, "loss": 0.8086, "step": 8674 }, { "epoch": 1.2281446874778792, "grad_norm": 8.653713474397719, "learning_rate": 1.7112303440460775e-06, "loss": 0.9399, "step": 8675 }, { "epoch": 1.2282862603525164, "grad_norm": 7.877708574764366, "learning_rate": 1.7106864424713177e-06, "loss": 0.8393, "step": 8676 }, { "epoch": 1.2284278332271537, "grad_norm": 8.58796191798643, "learning_rate": 1.710142582390183e-06, "loss": 0.8803, "step": 8677 }, { "epoch": 1.2285694061017909, "grad_norm": 9.499422557748659, "learning_rate": 1.709598763831264e-06, "loss": 0.9041, "step": 8678 }, { "epoch": 1.2287109789764281, "grad_norm": 7.754940058207832, "learning_rate": 1.7090549868231492e-06, "loss": 0.8681, "step": 8679 }, { "epoch": 1.2288525518510653, "grad_norm": 8.493331088191065, "learning_rate": 1.7085112513944235e-06, "loss": 0.8893, "step": 8680 }, { "epoch": 1.2289941247257026, "grad_norm": 9.064411672025987, "learning_rate": 1.7079675575736704e-06, "loss": 0.801, "step": 8681 }, { "epoch": 1.2291356976003398, "grad_norm": 8.677880539436936, "learning_rate": 1.7074239053894725e-06, "loss": 0.8347, "step": 8682 }, { "epoch": 1.229277270474977, "grad_norm": 7.575401209916933, "learning_rate": 1.7068802948704094e-06, "loss": 0.8357, "step": 8683 }, { "epoch": 1.2294188433496143, "grad_norm": 7.60472136960816, "learning_rate": 1.7063367260450576e-06, "loss": 0.8846, "step": 8684 }, { "epoch": 1.2295604162242515, "grad_norm": 9.790712307352901, "learning_rate": 1.7057931989419923e-06, "loss": 0.8205, "step": 8685 }, { "epoch": 1.2297019890988887, "grad_norm": 8.904294090329133, "learning_rate": 1.705249713589786e-06, "loss": 0.8417, "step": 8686 }, { "epoch": 1.229843561973526, "grad_norm": 10.441527177914585, "learning_rate": 1.7047062700170104e-06, "loss": 0.9373, "step": 8687 }, { "epoch": 1.2299851348481632, "grad_norm": 10.393939644006126, "learning_rate": 1.7041628682522326e-06, "loss": 0.8729, "step": 8688 }, { "epoch": 1.2301267077228002, "grad_norm": 8.85668904596597, "learning_rate": 1.7036195083240203e-06, "loss": 0.8377, "step": 8689 }, { "epoch": 1.2302682805974374, "grad_norm": 9.894843143648227, "learning_rate": 1.703076190260936e-06, "loss": 0.814, "step": 8690 }, { "epoch": 1.2304098534720747, "grad_norm": 10.043593183621384, "learning_rate": 1.702532914091542e-06, "loss": 0.8963, "step": 8691 }, { "epoch": 1.2305514263467119, "grad_norm": 7.0280420742002185, "learning_rate": 1.7019896798443984e-06, "loss": 0.7015, "step": 8692 }, { "epoch": 1.2306929992213491, "grad_norm": 9.368511345020474, "learning_rate": 1.7014464875480618e-06, "loss": 0.8811, "step": 8693 }, { "epoch": 1.2308345720959863, "grad_norm": 8.969920437565207, "learning_rate": 1.7009033372310884e-06, "loss": 0.9412, "step": 8694 }, { "epoch": 1.2309761449706236, "grad_norm": 9.617675372221173, "learning_rate": 1.7003602289220305e-06, "loss": 0.8406, "step": 8695 }, { "epoch": 1.2311177178452608, "grad_norm": 8.91857314599422, "learning_rate": 1.6998171626494392e-06, "loss": 0.8175, "step": 8696 }, { "epoch": 1.231259290719898, "grad_norm": 9.058107350702766, "learning_rate": 1.6992741384418632e-06, "loss": 0.9252, "step": 8697 }, { "epoch": 1.2314008635945353, "grad_norm": 9.63616256438657, "learning_rate": 1.698731156327848e-06, "loss": 0.8791, "step": 8698 }, { "epoch": 1.2315424364691725, "grad_norm": 9.818485451187254, "learning_rate": 1.6981882163359391e-06, "loss": 0.9743, "step": 8699 }, { "epoch": 1.2316840093438097, "grad_norm": 9.486323098426196, "learning_rate": 1.6976453184946786e-06, "loss": 0.8454, "step": 8700 }, { "epoch": 1.231825582218447, "grad_norm": 8.613345468156295, "learning_rate": 1.6971024628326046e-06, "loss": 0.7993, "step": 8701 }, { "epoch": 1.2319671550930842, "grad_norm": 7.344483164449413, "learning_rate": 1.6965596493782555e-06, "loss": 0.8689, "step": 8702 }, { "epoch": 1.2321087279677214, "grad_norm": 8.410155796417778, "learning_rate": 1.6960168781601665e-06, "loss": 0.7949, "step": 8703 }, { "epoch": 1.2322503008423586, "grad_norm": 9.975679960475283, "learning_rate": 1.6954741492068698e-06, "loss": 0.8025, "step": 8704 }, { "epoch": 1.2323918737169959, "grad_norm": 8.352141200571106, "learning_rate": 1.6949314625468985e-06, "loss": 0.9052, "step": 8705 }, { "epoch": 1.232533446591633, "grad_norm": 9.779404731218186, "learning_rate": 1.6943888182087796e-06, "loss": 0.8851, "step": 8706 }, { "epoch": 1.2326750194662703, "grad_norm": 9.297727196367207, "learning_rate": 1.6938462162210395e-06, "loss": 0.8543, "step": 8707 }, { "epoch": 1.2328165923409076, "grad_norm": 9.26649489026296, "learning_rate": 1.6933036566122029e-06, "loss": 0.8088, "step": 8708 }, { "epoch": 1.2329581652155448, "grad_norm": 10.093079535830556, "learning_rate": 1.6927611394107918e-06, "loss": 0.9045, "step": 8709 }, { "epoch": 1.233099738090182, "grad_norm": 8.032953104014057, "learning_rate": 1.6922186646453263e-06, "loss": 0.763, "step": 8710 }, { "epoch": 1.2332413109648193, "grad_norm": 11.70742682118652, "learning_rate": 1.6916762323443225e-06, "loss": 0.933, "step": 8711 }, { "epoch": 1.2333828838394563, "grad_norm": 7.560278700278112, "learning_rate": 1.6911338425362967e-06, "loss": 0.8282, "step": 8712 }, { "epoch": 1.2335244567140935, "grad_norm": 8.438879733833714, "learning_rate": 1.6905914952497616e-06, "loss": 0.6918, "step": 8713 }, { "epoch": 1.2336660295887307, "grad_norm": 7.443676859947139, "learning_rate": 1.6900491905132277e-06, "loss": 0.8037, "step": 8714 }, { "epoch": 1.233807602463368, "grad_norm": 10.075570851853639, "learning_rate": 1.689506928355204e-06, "loss": 0.8914, "step": 8715 }, { "epoch": 1.2339491753380052, "grad_norm": 9.006766848323464, "learning_rate": 1.6889647088041972e-06, "loss": 0.9562, "step": 8716 }, { "epoch": 1.2340907482126424, "grad_norm": 7.871805663369457, "learning_rate": 1.6884225318887107e-06, "loss": 0.8819, "step": 8717 }, { "epoch": 1.2342323210872796, "grad_norm": 9.48151245156281, "learning_rate": 1.6878803976372465e-06, "loss": 0.8919, "step": 8718 }, { "epoch": 1.2343738939619169, "grad_norm": 8.262582143124206, "learning_rate": 1.6873383060783043e-06, "loss": 0.7759, "step": 8719 }, { "epoch": 1.234515466836554, "grad_norm": 9.109602805899387, "learning_rate": 1.6867962572403811e-06, "loss": 0.8282, "step": 8720 }, { "epoch": 1.2346570397111913, "grad_norm": 8.272734326674954, "learning_rate": 1.6862542511519734e-06, "loss": 0.7871, "step": 8721 }, { "epoch": 1.2347986125858286, "grad_norm": 10.703526666162967, "learning_rate": 1.6857122878415721e-06, "loss": 0.7805, "step": 8722 }, { "epoch": 1.2349401854604658, "grad_norm": 8.790893472476707, "learning_rate": 1.6851703673376688e-06, "loss": 0.8781, "step": 8723 }, { "epoch": 1.235081758335103, "grad_norm": 7.947653696518562, "learning_rate": 1.6846284896687514e-06, "loss": 0.8883, "step": 8724 }, { "epoch": 1.2352233312097403, "grad_norm": 7.734563651818582, "learning_rate": 1.6840866548633068e-06, "loss": 0.8116, "step": 8725 }, { "epoch": 1.2353649040843775, "grad_norm": 9.36330406494926, "learning_rate": 1.6835448629498182e-06, "loss": 0.8552, "step": 8726 }, { "epoch": 1.2355064769590147, "grad_norm": 8.2871432082299, "learning_rate": 1.683003113956767e-06, "loss": 0.8775, "step": 8727 }, { "epoch": 1.235648049833652, "grad_norm": 10.171830372045918, "learning_rate": 1.6824614079126334e-06, "loss": 0.8978, "step": 8728 }, { "epoch": 1.2357896227082892, "grad_norm": 8.968528399238597, "learning_rate": 1.6819197448458935e-06, "loss": 0.87, "step": 8729 }, { "epoch": 1.2359311955829262, "grad_norm": 10.564997451139215, "learning_rate": 1.681378124785023e-06, "loss": 0.8574, "step": 8730 }, { "epoch": 1.2360727684575634, "grad_norm": 7.345875793553578, "learning_rate": 1.6808365477584953e-06, "loss": 0.9628, "step": 8731 }, { "epoch": 1.2362143413322007, "grad_norm": 8.347097346901636, "learning_rate": 1.6802950137947783e-06, "loss": 0.8535, "step": 8732 }, { "epoch": 1.2363559142068379, "grad_norm": 8.553263562285332, "learning_rate": 1.6797535229223405e-06, "loss": 0.8459, "step": 8733 }, { "epoch": 1.236497487081475, "grad_norm": 10.776918641375746, "learning_rate": 1.6792120751696495e-06, "loss": 0.8638, "step": 8734 }, { "epoch": 1.2366390599561123, "grad_norm": 7.659039421309812, "learning_rate": 1.678670670565167e-06, "loss": 0.7879, "step": 8735 }, { "epoch": 1.2367806328307496, "grad_norm": 8.753042945354641, "learning_rate": 1.678129309137355e-06, "loss": 0.7826, "step": 8736 }, { "epoch": 1.2369222057053868, "grad_norm": 7.396403660521443, "learning_rate": 1.677587990914673e-06, "loss": 0.8305, "step": 8737 }, { "epoch": 1.237063778580024, "grad_norm": 9.327448988145946, "learning_rate": 1.6770467159255768e-06, "loss": 0.8207, "step": 8738 }, { "epoch": 1.2372053514546613, "grad_norm": 10.516837289658353, "learning_rate": 1.6765054841985212e-06, "loss": 0.8859, "step": 8739 }, { "epoch": 1.2373469243292985, "grad_norm": 8.356825509293193, "learning_rate": 1.6759642957619581e-06, "loss": 0.8668, "step": 8740 }, { "epoch": 1.2374884972039357, "grad_norm": 7.661707089394741, "learning_rate": 1.6754231506443375e-06, "loss": 0.7443, "step": 8741 }, { "epoch": 1.237630070078573, "grad_norm": 8.751956829878148, "learning_rate": 1.6748820488741077e-06, "loss": 0.8033, "step": 8742 }, { "epoch": 1.2377716429532102, "grad_norm": 8.25702114156941, "learning_rate": 1.674340990479713e-06, "loss": 0.7672, "step": 8743 }, { "epoch": 1.2379132158278474, "grad_norm": 7.885377132126854, "learning_rate": 1.6737999754895965e-06, "loss": 0.9367, "step": 8744 }, { "epoch": 1.2380547887024846, "grad_norm": 7.543439316060539, "learning_rate": 1.6732590039321993e-06, "loss": 0.7478, "step": 8745 }, { "epoch": 1.2381963615771219, "grad_norm": 8.125246953879213, "learning_rate": 1.6727180758359598e-06, "loss": 0.8805, "step": 8746 }, { "epoch": 1.238337934451759, "grad_norm": 10.086375468560222, "learning_rate": 1.6721771912293145e-06, "loss": 0.8341, "step": 8747 }, { "epoch": 1.2384795073263963, "grad_norm": 7.79055291305836, "learning_rate": 1.6716363501406966e-06, "loss": 0.7665, "step": 8748 }, { "epoch": 1.2386210802010336, "grad_norm": 10.626355253065627, "learning_rate": 1.6710955525985384e-06, "loss": 0.9276, "step": 8749 }, { "epoch": 1.2387626530756708, "grad_norm": 9.683940799675568, "learning_rate": 1.6705547986312681e-06, "loss": 0.8546, "step": 8750 }, { "epoch": 1.238904225950308, "grad_norm": 9.528430359058195, "learning_rate": 1.6700140882673145e-06, "loss": 0.9864, "step": 8751 }, { "epoch": 1.2390457988249453, "grad_norm": 7.452495172467695, "learning_rate": 1.669473421535101e-06, "loss": 0.9168, "step": 8752 }, { "epoch": 1.2391873716995823, "grad_norm": 9.367894442672938, "learning_rate": 1.668932798463052e-06, "loss": 0.9569, "step": 8753 }, { "epoch": 1.2393289445742195, "grad_norm": 9.929268744858163, "learning_rate": 1.668392219079585e-06, "loss": 0.7493, "step": 8754 }, { "epoch": 1.2394705174488567, "grad_norm": 9.429461763298466, "learning_rate": 1.6678516834131184e-06, "loss": 0.8324, "step": 8755 }, { "epoch": 1.239612090323494, "grad_norm": 7.772843933446799, "learning_rate": 1.667311191492068e-06, "loss": 0.8552, "step": 8756 }, { "epoch": 1.2397536631981312, "grad_norm": 7.884230033518526, "learning_rate": 1.6667707433448482e-06, "loss": 0.8295, "step": 8757 }, { "epoch": 1.2398952360727684, "grad_norm": 9.247221658338114, "learning_rate": 1.666230338999869e-06, "loss": 0.9343, "step": 8758 }, { "epoch": 1.2400368089474056, "grad_norm": 8.155368801176506, "learning_rate": 1.6656899784855393e-06, "loss": 0.8712, "step": 8759 }, { "epoch": 1.2401783818220429, "grad_norm": 10.503228554049167, "learning_rate": 1.6651496618302653e-06, "loss": 0.8901, "step": 8760 }, { "epoch": 1.24031995469668, "grad_norm": 8.454762072878477, "learning_rate": 1.6646093890624509e-06, "loss": 0.8622, "step": 8761 }, { "epoch": 1.2404615275713173, "grad_norm": 8.920561845815516, "learning_rate": 1.6640691602104983e-06, "loss": 0.8753, "step": 8762 }, { "epoch": 1.2406031004459546, "grad_norm": 6.647748936141372, "learning_rate": 1.6635289753028073e-06, "loss": 0.8884, "step": 8763 }, { "epoch": 1.2407446733205918, "grad_norm": 9.161298069827154, "learning_rate": 1.6629888343677734e-06, "loss": 0.8339, "step": 8764 }, { "epoch": 1.240886246195229, "grad_norm": 8.429290620725586, "learning_rate": 1.6624487374337925e-06, "loss": 0.7566, "step": 8765 }, { "epoch": 1.2410278190698663, "grad_norm": 10.267852123567291, "learning_rate": 1.661908684529257e-06, "loss": 0.9454, "step": 8766 }, { "epoch": 1.2411693919445035, "grad_norm": 8.288407597390966, "learning_rate": 1.661368675682557e-06, "loss": 0.8916, "step": 8767 }, { "epoch": 1.2413109648191407, "grad_norm": 8.369066556337932, "learning_rate": 1.6608287109220805e-06, "loss": 0.8679, "step": 8768 }, { "epoch": 1.241452537693778, "grad_norm": 9.344918372551385, "learning_rate": 1.6602887902762132e-06, "loss": 0.9094, "step": 8769 }, { "epoch": 1.2415941105684152, "grad_norm": 8.963799043839607, "learning_rate": 1.6597489137733377e-06, "loss": 0.7278, "step": 8770 }, { "epoch": 1.2417356834430524, "grad_norm": 7.720942753364541, "learning_rate": 1.6592090814418354e-06, "loss": 0.7326, "step": 8771 }, { "epoch": 1.2418772563176894, "grad_norm": 8.316214971819784, "learning_rate": 1.6586692933100846e-06, "loss": 0.8904, "step": 8772 }, { "epoch": 1.2420188291923266, "grad_norm": 8.656370747409191, "learning_rate": 1.6581295494064615e-06, "loss": 0.8748, "step": 8773 }, { "epoch": 1.2421604020669639, "grad_norm": 9.067429458095555, "learning_rate": 1.6575898497593417e-06, "loss": 0.7844, "step": 8774 }, { "epoch": 1.242301974941601, "grad_norm": 9.179845410875835, "learning_rate": 1.6570501943970945e-06, "loss": 0.826, "step": 8775 }, { "epoch": 1.2424435478162383, "grad_norm": 7.61310612813278, "learning_rate": 1.65651058334809e-06, "loss": 0.8526, "step": 8776 }, { "epoch": 1.2425851206908756, "grad_norm": 9.856004819076153, "learning_rate": 1.655971016640695e-06, "loss": 0.8252, "step": 8777 }, { "epoch": 1.2427266935655128, "grad_norm": 10.744349569353048, "learning_rate": 1.655431494303274e-06, "loss": 0.7716, "step": 8778 }, { "epoch": 1.24286826644015, "grad_norm": 7.749522532859988, "learning_rate": 1.65489201636419e-06, "loss": 0.7611, "step": 8779 }, { "epoch": 1.2430098393147873, "grad_norm": 8.942041910290436, "learning_rate": 1.6543525828518025e-06, "loss": 0.8424, "step": 8780 }, { "epoch": 1.2431514121894245, "grad_norm": 8.234544553223538, "learning_rate": 1.6538131937944693e-06, "loss": 0.8334, "step": 8781 }, { "epoch": 1.2432929850640617, "grad_norm": 9.74796136915264, "learning_rate": 1.6532738492205456e-06, "loss": 0.8354, "step": 8782 }, { "epoch": 1.243434557938699, "grad_norm": 9.736158278044616, "learning_rate": 1.652734549158384e-06, "loss": 0.8651, "step": 8783 }, { "epoch": 1.2435761308133362, "grad_norm": 8.80105041823733, "learning_rate": 1.652195293636336e-06, "loss": 0.8632, "step": 8784 }, { "epoch": 1.2437177036879734, "grad_norm": 9.789023141256669, "learning_rate": 1.6516560826827494e-06, "loss": 0.9351, "step": 8785 }, { "epoch": 1.2438592765626106, "grad_norm": 9.558175499124836, "learning_rate": 1.6511169163259693e-06, "loss": 0.8744, "step": 8786 }, { "epoch": 1.2440008494372479, "grad_norm": 10.599496548770718, "learning_rate": 1.6505777945943402e-06, "loss": 0.8918, "step": 8787 }, { "epoch": 1.244142422311885, "grad_norm": 9.02647152049745, "learning_rate": 1.650038717516203e-06, "loss": 0.8564, "step": 8788 }, { "epoch": 1.2442839951865223, "grad_norm": 8.656451832315323, "learning_rate": 1.6494996851198965e-06, "loss": 0.8129, "step": 8789 }, { "epoch": 1.2444255680611596, "grad_norm": 8.391824249434592, "learning_rate": 1.6489606974337574e-06, "loss": 0.851, "step": 8790 }, { "epoch": 1.2445671409357968, "grad_norm": 9.261133453010633, "learning_rate": 1.6484217544861204e-06, "loss": 0.8994, "step": 8791 }, { "epoch": 1.244708713810434, "grad_norm": 8.430646811725806, "learning_rate": 1.6478828563053162e-06, "loss": 0.926, "step": 8792 }, { "epoch": 1.2448502866850713, "grad_norm": 9.689747014478234, "learning_rate": 1.6473440029196752e-06, "loss": 0.8969, "step": 8793 }, { "epoch": 1.2449918595597085, "grad_norm": 8.24832506227827, "learning_rate": 1.6468051943575242e-06, "loss": 0.8992, "step": 8794 }, { "epoch": 1.2451334324343455, "grad_norm": 8.551557914276817, "learning_rate": 1.6462664306471882e-06, "loss": 0.9028, "step": 8795 }, { "epoch": 1.2452750053089827, "grad_norm": 10.596465086370085, "learning_rate": 1.6457277118169893e-06, "loss": 1.0169, "step": 8796 }, { "epoch": 1.24541657818362, "grad_norm": 8.015776813103049, "learning_rate": 1.6451890378952472e-06, "loss": 0.8047, "step": 8797 }, { "epoch": 1.2455581510582572, "grad_norm": 7.044111230044585, "learning_rate": 1.6446504089102803e-06, "loss": 0.8212, "step": 8798 }, { "epoch": 1.2456997239328944, "grad_norm": 9.808150767202141, "learning_rate": 1.6441118248904038e-06, "loss": 0.7596, "step": 8799 }, { "epoch": 1.2458412968075316, "grad_norm": 8.458957542600597, "learning_rate": 1.6435732858639298e-06, "loss": 0.8826, "step": 8800 }, { "epoch": 1.2459828696821689, "grad_norm": 9.349773595996147, "learning_rate": 1.6430347918591693e-06, "loss": 0.9085, "step": 8801 }, { "epoch": 1.246124442556806, "grad_norm": 9.0766516325349, "learning_rate": 1.6424963429044315e-06, "loss": 0.8644, "step": 8802 }, { "epoch": 1.2462660154314433, "grad_norm": 8.023652873356928, "learning_rate": 1.6419579390280217e-06, "loss": 0.8002, "step": 8803 }, { "epoch": 1.2464075883060806, "grad_norm": 8.843923829867379, "learning_rate": 1.6414195802582434e-06, "loss": 0.8407, "step": 8804 }, { "epoch": 1.2465491611807178, "grad_norm": 7.8604099813217685, "learning_rate": 1.640881266623397e-06, "loss": 0.8381, "step": 8805 }, { "epoch": 1.246690734055355, "grad_norm": 7.983629165721672, "learning_rate": 1.6403429981517831e-06, "loss": 0.7343, "step": 8806 }, { "epoch": 1.2468323069299923, "grad_norm": 8.288873812355236, "learning_rate": 1.6398047748716955e-06, "loss": 0.8885, "step": 8807 }, { "epoch": 1.2469738798046295, "grad_norm": 9.268866192429583, "learning_rate": 1.6392665968114297e-06, "loss": 0.8343, "step": 8808 }, { "epoch": 1.2471154526792667, "grad_norm": 8.430228257533784, "learning_rate": 1.6387284639992773e-06, "loss": 0.7778, "step": 8809 }, { "epoch": 1.247257025553904, "grad_norm": 9.095853896444728, "learning_rate": 1.6381903764635274e-06, "loss": 0.9076, "step": 8810 }, { "epoch": 1.2473985984285412, "grad_norm": 11.706688454273893, "learning_rate": 1.6376523342324668e-06, "loss": 0.8892, "step": 8811 }, { "epoch": 1.2475401713031784, "grad_norm": 9.278087141268752, "learning_rate": 1.6371143373343798e-06, "loss": 0.9039, "step": 8812 }, { "epoch": 1.2476817441778154, "grad_norm": 9.001652459925197, "learning_rate": 1.6365763857975486e-06, "loss": 1.016, "step": 8813 }, { "epoch": 1.2478233170524526, "grad_norm": 8.54433100635687, "learning_rate": 1.6360384796502532e-06, "loss": 0.874, "step": 8814 }, { "epoch": 1.2479648899270899, "grad_norm": 9.940789979670724, "learning_rate": 1.635500618920771e-06, "loss": 0.7952, "step": 8815 }, { "epoch": 1.248106462801727, "grad_norm": 8.437044710661846, "learning_rate": 1.634962803637377e-06, "loss": 0.8766, "step": 8816 }, { "epoch": 1.2482480356763643, "grad_norm": 8.376964281803598, "learning_rate": 1.6344250338283426e-06, "loss": 0.8231, "step": 8817 }, { "epoch": 1.2483896085510016, "grad_norm": 9.437598625829859, "learning_rate": 1.6338873095219391e-06, "loss": 0.9104, "step": 8818 }, { "epoch": 1.2485311814256388, "grad_norm": 13.227586322416958, "learning_rate": 1.6333496307464335e-06, "loss": 0.9548, "step": 8819 }, { "epoch": 1.248672754300276, "grad_norm": 8.205192726749656, "learning_rate": 1.6328119975300921e-06, "loss": 0.7689, "step": 8820 }, { "epoch": 1.2488143271749133, "grad_norm": 8.467351255999745, "learning_rate": 1.6322744099011772e-06, "loss": 0.8396, "step": 8821 }, { "epoch": 1.2489559000495505, "grad_norm": 9.52421094775027, "learning_rate": 1.6317368678879497e-06, "loss": 0.9156, "step": 8822 }, { "epoch": 1.2490974729241877, "grad_norm": 8.931629306861952, "learning_rate": 1.6311993715186674e-06, "loss": 0.9074, "step": 8823 }, { "epoch": 1.249239045798825, "grad_norm": 9.390464540148434, "learning_rate": 1.6306619208215862e-06, "loss": 0.9443, "step": 8824 }, { "epoch": 1.2493806186734622, "grad_norm": 10.050375511024335, "learning_rate": 1.6301245158249599e-06, "loss": 0.9202, "step": 8825 }, { "epoch": 1.2495221915480994, "grad_norm": 9.60989184966744, "learning_rate": 1.6295871565570392e-06, "loss": 0.8662, "step": 8826 }, { "epoch": 1.2496637644227366, "grad_norm": 7.393906749827623, "learning_rate": 1.6290498430460736e-06, "loss": 0.8657, "step": 8827 }, { "epoch": 1.2498053372973739, "grad_norm": 9.703125393141095, "learning_rate": 1.6285125753203073e-06, "loss": 0.8967, "step": 8828 }, { "epoch": 1.249946910172011, "grad_norm": 9.114243111209749, "learning_rate": 1.6279753534079853e-06, "loss": 0.8648, "step": 8829 }, { "epoch": 1.2500884830466483, "grad_norm": 8.191269800216656, "learning_rate": 1.6274381773373482e-06, "loss": 0.7436, "step": 8830 }, { "epoch": 1.2502300559212856, "grad_norm": 8.594611107426015, "learning_rate": 1.6269010471366359e-06, "loss": 0.9707, "step": 8831 }, { "epoch": 1.2503716287959228, "grad_norm": 9.567403894743665, "learning_rate": 1.6263639628340847e-06, "loss": 1.0321, "step": 8832 }, { "epoch": 1.25051320167056, "grad_norm": 9.623952040656654, "learning_rate": 1.6258269244579283e-06, "loss": 0.8524, "step": 8833 }, { "epoch": 1.2506547745451972, "grad_norm": 10.07793763452285, "learning_rate": 1.6252899320363992e-06, "loss": 0.8649, "step": 8834 }, { "epoch": 1.2507963474198345, "grad_norm": 8.672040838082076, "learning_rate": 1.6247529855977256e-06, "loss": 0.8756, "step": 8835 }, { "epoch": 1.2509379202944717, "grad_norm": 9.059408989296795, "learning_rate": 1.6242160851701353e-06, "loss": 0.8646, "step": 8836 }, { "epoch": 1.251079493169109, "grad_norm": 9.123864926326323, "learning_rate": 1.6236792307818528e-06, "loss": 0.8672, "step": 8837 }, { "epoch": 1.251221066043746, "grad_norm": 10.0303357619796, "learning_rate": 1.6231424224610992e-06, "loss": 0.8062, "step": 8838 }, { "epoch": 1.2513626389183832, "grad_norm": 6.834387100969511, "learning_rate": 1.6226056602360945e-06, "loss": 0.8554, "step": 8839 }, { "epoch": 1.2515042117930204, "grad_norm": 9.684535206604846, "learning_rate": 1.6220689441350561e-06, "loss": 0.9459, "step": 8840 }, { "epoch": 1.2516457846676576, "grad_norm": 8.602423712279345, "learning_rate": 1.6215322741861988e-06, "loss": 0.9746, "step": 8841 }, { "epoch": 1.2517873575422949, "grad_norm": 8.094874476966988, "learning_rate": 1.6209956504177345e-06, "loss": 0.8858, "step": 8842 }, { "epoch": 1.251928930416932, "grad_norm": 10.481964243492508, "learning_rate": 1.6204590728578739e-06, "loss": 0.8971, "step": 8843 }, { "epoch": 1.2520705032915693, "grad_norm": 9.989507511130936, "learning_rate": 1.6199225415348239e-06, "loss": 0.9211, "step": 8844 }, { "epoch": 1.2522120761662066, "grad_norm": 10.907088020722895, "learning_rate": 1.6193860564767893e-06, "loss": 0.8394, "step": 8845 }, { "epoch": 1.2523536490408438, "grad_norm": 8.082515504764938, "learning_rate": 1.6188496177119737e-06, "loss": 0.9279, "step": 8846 }, { "epoch": 1.252495221915481, "grad_norm": 8.7919661002369, "learning_rate": 1.6183132252685758e-06, "loss": 0.9795, "step": 8847 }, { "epoch": 1.2526367947901182, "grad_norm": 9.964744983095402, "learning_rate": 1.6177768791747957e-06, "loss": 0.9455, "step": 8848 }, { "epoch": 1.2527783676647555, "grad_norm": 9.577375821409104, "learning_rate": 1.6172405794588264e-06, "loss": 0.8231, "step": 8849 }, { "epoch": 1.2529199405393927, "grad_norm": 9.761194307390905, "learning_rate": 1.616704326148862e-06, "loss": 0.8687, "step": 8850 }, { "epoch": 1.25306151341403, "grad_norm": 9.097055365380646, "learning_rate": 1.6161681192730918e-06, "loss": 0.7487, "step": 8851 }, { "epoch": 1.2532030862886672, "grad_norm": 8.87294390864481, "learning_rate": 1.615631958859705e-06, "loss": 0.863, "step": 8852 }, { "epoch": 1.2533446591633042, "grad_norm": 10.43311478098478, "learning_rate": 1.6150958449368862e-06, "loss": 0.8201, "step": 8853 }, { "epoch": 1.2534862320379414, "grad_norm": 9.123997045293226, "learning_rate": 1.6145597775328192e-06, "loss": 0.8166, "step": 8854 }, { "epoch": 1.2536278049125786, "grad_norm": 9.21912489306579, "learning_rate": 1.614023756675685e-06, "loss": 0.9953, "step": 8855 }, { "epoch": 1.2537693777872159, "grad_norm": 10.777194733814843, "learning_rate": 1.613487782393661e-06, "loss": 0.8941, "step": 8856 }, { "epoch": 1.253910950661853, "grad_norm": 8.81626043507575, "learning_rate": 1.612951854714923e-06, "loss": 0.9516, "step": 8857 }, { "epoch": 1.2540525235364903, "grad_norm": 7.849830975504768, "learning_rate": 1.6124159736676452e-06, "loss": 0.7724, "step": 8858 }, { "epoch": 1.2541940964111276, "grad_norm": 8.123869832005143, "learning_rate": 1.611880139279998e-06, "loss": 0.8086, "step": 8859 }, { "epoch": 1.2543356692857648, "grad_norm": 8.088605387171203, "learning_rate": 1.6113443515801492e-06, "loss": 0.9746, "step": 8860 }, { "epoch": 1.254477242160402, "grad_norm": 7.603731102858012, "learning_rate": 1.610808610596265e-06, "loss": 0.8424, "step": 8861 }, { "epoch": 1.2546188150350392, "grad_norm": 9.198417975364661, "learning_rate": 1.6102729163565095e-06, "loss": 0.823, "step": 8862 }, { "epoch": 1.2547603879096765, "grad_norm": 8.851684892688686, "learning_rate": 1.6097372688890433e-06, "loss": 0.8561, "step": 8863 }, { "epoch": 1.2549019607843137, "grad_norm": 10.051078047745674, "learning_rate": 1.6092016682220252e-06, "loss": 0.8352, "step": 8864 }, { "epoch": 1.255043533658951, "grad_norm": 8.131483572230175, "learning_rate": 1.6086661143836107e-06, "loss": 0.8278, "step": 8865 }, { "epoch": 1.2551851065335882, "grad_norm": 7.284721627451053, "learning_rate": 1.6081306074019543e-06, "loss": 0.8188, "step": 8866 }, { "epoch": 1.2553266794082254, "grad_norm": 9.748838306569493, "learning_rate": 1.607595147305207e-06, "loss": 0.8889, "step": 8867 }, { "epoch": 1.2554682522828626, "grad_norm": 6.972922953145119, "learning_rate": 1.6070597341215171e-06, "loss": 0.7803, "step": 8868 }, { "epoch": 1.2556098251574999, "grad_norm": 7.795187849775094, "learning_rate": 1.6065243678790321e-06, "loss": 0.7359, "step": 8869 }, { "epoch": 1.255751398032137, "grad_norm": 8.948321853244764, "learning_rate": 1.6059890486058937e-06, "loss": 1.009, "step": 8870 }, { "epoch": 1.2558929709067743, "grad_norm": 9.236974669760912, "learning_rate": 1.605453776330245e-06, "loss": 0.8995, "step": 8871 }, { "epoch": 1.2560345437814115, "grad_norm": 7.234219708022113, "learning_rate": 1.604918551080224e-06, "loss": 0.8273, "step": 8872 }, { "epoch": 1.2561761166560488, "grad_norm": 8.913877199405107, "learning_rate": 1.6043833728839675e-06, "loss": 0.9363, "step": 8873 }, { "epoch": 1.256317689530686, "grad_norm": 9.513621553128564, "learning_rate": 1.6038482417696095e-06, "loss": 0.9039, "step": 8874 }, { "epoch": 1.2564592624053232, "grad_norm": 8.530123122691796, "learning_rate": 1.60331315776528e-06, "loss": 0.854, "step": 8875 }, { "epoch": 1.2566008352799605, "grad_norm": 9.42883468854897, "learning_rate": 1.6027781208991102e-06, "loss": 0.9049, "step": 8876 }, { "epoch": 1.2567424081545977, "grad_norm": 8.583897050537056, "learning_rate": 1.6022431311992257e-06, "loss": 0.816, "step": 8877 }, { "epoch": 1.256883981029235, "grad_norm": 8.675227656386703, "learning_rate": 1.6017081886937502e-06, "loss": 0.8235, "step": 8878 }, { "epoch": 1.257025553903872, "grad_norm": 9.475711089251964, "learning_rate": 1.6011732934108055e-06, "loss": 0.8669, "step": 8879 }, { "epoch": 1.2571671267785092, "grad_norm": 9.960581335544235, "learning_rate": 1.6006384453785115e-06, "loss": 0.8671, "step": 8880 }, { "epoch": 1.2573086996531464, "grad_norm": 7.162889091215684, "learning_rate": 1.6001036446249824e-06, "loss": 0.7975, "step": 8881 }, { "epoch": 1.2574502725277836, "grad_norm": 9.021630467059692, "learning_rate": 1.5995688911783341e-06, "loss": 0.8269, "step": 8882 }, { "epoch": 1.2575918454024209, "grad_norm": 7.591120260641603, "learning_rate": 1.5990341850666779e-06, "loss": 0.7688, "step": 8883 }, { "epoch": 1.257733418277058, "grad_norm": 8.926225713769737, "learning_rate": 1.598499526318123e-06, "loss": 0.8648, "step": 8884 }, { "epoch": 1.2578749911516953, "grad_norm": 8.87474940846153, "learning_rate": 1.5979649149607755e-06, "loss": 0.8968, "step": 8885 }, { "epoch": 1.2580165640263326, "grad_norm": 8.226247638353682, "learning_rate": 1.59743035102274e-06, "loss": 0.8659, "step": 8886 }, { "epoch": 1.2581581369009698, "grad_norm": 10.306596765647157, "learning_rate": 1.5968958345321178e-06, "loss": 0.8897, "step": 8887 }, { "epoch": 1.258299709775607, "grad_norm": 7.505727996589608, "learning_rate": 1.5963613655170082e-06, "loss": 0.8338, "step": 8888 }, { "epoch": 1.2584412826502442, "grad_norm": 8.467299446228967, "learning_rate": 1.595826944005508e-06, "loss": 0.9184, "step": 8889 }, { "epoch": 1.2585828555248815, "grad_norm": 9.475821797103867, "learning_rate": 1.5952925700257116e-06, "loss": 0.9105, "step": 8890 }, { "epoch": 1.2587244283995187, "grad_norm": 10.374471627994295, "learning_rate": 1.5947582436057097e-06, "loss": 0.8148, "step": 8891 }, { "epoch": 1.258866001274156, "grad_norm": 11.887588414004805, "learning_rate": 1.5942239647735918e-06, "loss": 0.8994, "step": 8892 }, { "epoch": 1.2590075741487932, "grad_norm": 8.88456017740318, "learning_rate": 1.5936897335574453e-06, "loss": 0.7929, "step": 8893 }, { "epoch": 1.2591491470234304, "grad_norm": 9.970802789352527, "learning_rate": 1.5931555499853529e-06, "loss": 0.9418, "step": 8894 }, { "epoch": 1.2592907198980674, "grad_norm": 7.890514289910217, "learning_rate": 1.5926214140853976e-06, "loss": 0.821, "step": 8895 }, { "epoch": 1.2594322927727046, "grad_norm": 9.167151513426468, "learning_rate": 1.592087325885658e-06, "loss": 0.8578, "step": 8896 }, { "epoch": 1.2595738656473419, "grad_norm": 8.482553416476604, "learning_rate": 1.5915532854142105e-06, "loss": 0.9031, "step": 8897 }, { "epoch": 1.259715438521979, "grad_norm": 8.73600914805845, "learning_rate": 1.5910192926991291e-06, "loss": 0.7895, "step": 8898 }, { "epoch": 1.2598570113966163, "grad_norm": 7.5865346278727355, "learning_rate": 1.5904853477684863e-06, "loss": 0.7492, "step": 8899 }, { "epoch": 1.2599985842712536, "grad_norm": 10.185161673265029, "learning_rate": 1.5899514506503499e-06, "loss": 0.8642, "step": 8900 }, { "epoch": 1.2601401571458908, "grad_norm": 8.265021779884954, "learning_rate": 1.5894176013727891e-06, "loss": 0.8886, "step": 8901 }, { "epoch": 1.260281730020528, "grad_norm": 9.596907006138627, "learning_rate": 1.5888837999638646e-06, "loss": 0.8708, "step": 8902 }, { "epoch": 1.2604233028951652, "grad_norm": 8.693937482533176, "learning_rate": 1.5883500464516394e-06, "loss": 0.7936, "step": 8903 }, { "epoch": 1.2605648757698025, "grad_norm": 9.219968967344146, "learning_rate": 1.5878163408641717e-06, "loss": 0.8647, "step": 8904 }, { "epoch": 1.2607064486444397, "grad_norm": 9.106790427860556, "learning_rate": 1.5872826832295197e-06, "loss": 0.7943, "step": 8905 }, { "epoch": 1.260848021519077, "grad_norm": 9.42921012927962, "learning_rate": 1.5867490735757366e-06, "loss": 0.793, "step": 8906 }, { "epoch": 1.2609895943937142, "grad_norm": 7.369506082741533, "learning_rate": 1.5862155119308737e-06, "loss": 0.7974, "step": 8907 }, { "epoch": 1.2611311672683514, "grad_norm": 8.734446189194458, "learning_rate": 1.5856819983229796e-06, "loss": 0.8369, "step": 8908 }, { "epoch": 1.2612727401429886, "grad_norm": 9.429396225824522, "learning_rate": 1.5851485327801014e-06, "loss": 0.8891, "step": 8909 }, { "epoch": 1.2614143130176259, "grad_norm": 8.587862418577172, "learning_rate": 1.5846151153302824e-06, "loss": 0.901, "step": 8910 }, { "epoch": 1.261555885892263, "grad_norm": 6.407526847315761, "learning_rate": 1.584081746001565e-06, "loss": 0.6949, "step": 8911 }, { "epoch": 1.2616974587669003, "grad_norm": 8.620025084563043, "learning_rate": 1.583548424821987e-06, "loss": 0.7862, "step": 8912 }, { "epoch": 1.2618390316415375, "grad_norm": 8.160329215196567, "learning_rate": 1.5830151518195846e-06, "loss": 0.7798, "step": 8913 }, { "epoch": 1.2619806045161748, "grad_norm": 7.734272410935789, "learning_rate": 1.5824819270223922e-06, "loss": 0.8699, "step": 8914 }, { "epoch": 1.262122177390812, "grad_norm": 8.832965003437584, "learning_rate": 1.5819487504584408e-06, "loss": 0.8028, "step": 8915 }, { "epoch": 1.2622637502654492, "grad_norm": 8.354987531432386, "learning_rate": 1.5814156221557587e-06, "loss": 0.8421, "step": 8916 }, { "epoch": 1.2624053231400865, "grad_norm": 11.091460343602682, "learning_rate": 1.5808825421423729e-06, "loss": 0.8674, "step": 8917 }, { "epoch": 1.2625468960147237, "grad_norm": 8.855264992004095, "learning_rate": 1.5803495104463063e-06, "loss": 0.8973, "step": 8918 }, { "epoch": 1.262688468889361, "grad_norm": 9.755285151081246, "learning_rate": 1.57981652709558e-06, "loss": 0.9399, "step": 8919 }, { "epoch": 1.262830041763998, "grad_norm": 8.962073203423168, "learning_rate": 1.5792835921182128e-06, "loss": 0.8534, "step": 8920 }, { "epoch": 1.2629716146386352, "grad_norm": 8.783539497325336, "learning_rate": 1.5787507055422201e-06, "loss": 0.8584, "step": 8921 }, { "epoch": 1.2631131875132724, "grad_norm": 8.870686947134505, "learning_rate": 1.5782178673956179e-06, "loss": 0.8805, "step": 8922 }, { "epoch": 1.2632547603879096, "grad_norm": 10.16872881211299, "learning_rate": 1.5776850777064137e-06, "loss": 0.9058, "step": 8923 }, { "epoch": 1.2633963332625469, "grad_norm": 10.169362028773763, "learning_rate": 1.5771523365026175e-06, "loss": 0.8892, "step": 8924 }, { "epoch": 1.263537906137184, "grad_norm": 9.053972533652658, "learning_rate": 1.5766196438122344e-06, "loss": 0.8851, "step": 8925 }, { "epoch": 1.2636794790118213, "grad_norm": 8.747191060016334, "learning_rate": 1.5760869996632685e-06, "loss": 0.8004, "step": 8926 }, { "epoch": 1.2638210518864585, "grad_norm": 8.036407595804137, "learning_rate": 1.5755544040837195e-06, "loss": 0.8863, "step": 8927 }, { "epoch": 1.2639626247610958, "grad_norm": 8.7325946446927, "learning_rate": 1.575021857101587e-06, "loss": 0.8734, "step": 8928 }, { "epoch": 1.264104197635733, "grad_norm": 8.346269205804575, "learning_rate": 1.5744893587448654e-06, "loss": 0.7876, "step": 8929 }, { "epoch": 1.2642457705103702, "grad_norm": 9.847416851760858, "learning_rate": 1.5739569090415482e-06, "loss": 0.8638, "step": 8930 }, { "epoch": 1.2643873433850075, "grad_norm": 9.594173856933713, "learning_rate": 1.573424508019626e-06, "loss": 0.8496, "step": 8931 }, { "epoch": 1.2645289162596447, "grad_norm": 9.674307029017594, "learning_rate": 1.5728921557070864e-06, "loss": 0.8123, "step": 8932 }, { "epoch": 1.264670489134282, "grad_norm": 10.094341059997273, "learning_rate": 1.5723598521319152e-06, "loss": 0.9648, "step": 8933 }, { "epoch": 1.2648120620089192, "grad_norm": 8.262936246727339, "learning_rate": 1.5718275973220944e-06, "loss": 0.7289, "step": 8934 }, { "epoch": 1.2649536348835564, "grad_norm": 8.051428001223554, "learning_rate": 1.571295391305605e-06, "loss": 0.7915, "step": 8935 }, { "epoch": 1.2650952077581934, "grad_norm": 11.320406179520942, "learning_rate": 1.5707632341104246e-06, "loss": 0.9033, "step": 8936 }, { "epoch": 1.2652367806328306, "grad_norm": 8.584055256035569, "learning_rate": 1.5702311257645274e-06, "loss": 0.8495, "step": 8937 }, { "epoch": 1.2653783535074679, "grad_norm": 10.230409462528668, "learning_rate": 1.5696990662958872e-06, "loss": 0.852, "step": 8938 }, { "epoch": 1.265519926382105, "grad_norm": 8.207366834821514, "learning_rate": 1.5691670557324734e-06, "loss": 0.9112, "step": 8939 }, { "epoch": 1.2656614992567423, "grad_norm": 9.24571582455347, "learning_rate": 1.5686350941022533e-06, "loss": 0.8538, "step": 8940 }, { "epoch": 1.2658030721313795, "grad_norm": 9.235829401779222, "learning_rate": 1.5681031814331918e-06, "loss": 0.9958, "step": 8941 }, { "epoch": 1.2659446450060168, "grad_norm": 9.63623184192494, "learning_rate": 1.5675713177532514e-06, "loss": 0.8853, "step": 8942 }, { "epoch": 1.266086217880654, "grad_norm": 8.362186479607265, "learning_rate": 1.5670395030903918e-06, "loss": 0.7587, "step": 8943 }, { "epoch": 1.2662277907552912, "grad_norm": 8.586496406115044, "learning_rate": 1.5665077374725696e-06, "loss": 0.8164, "step": 8944 }, { "epoch": 1.2663693636299285, "grad_norm": 10.643657145876357, "learning_rate": 1.5659760209277395e-06, "loss": 0.8953, "step": 8945 }, { "epoch": 1.2665109365045657, "grad_norm": 8.903034044512799, "learning_rate": 1.5654443534838537e-06, "loss": 0.8797, "step": 8946 }, { "epoch": 1.266652509379203, "grad_norm": 7.946697763258407, "learning_rate": 1.564912735168861e-06, "loss": 0.7817, "step": 8947 }, { "epoch": 1.2667940822538402, "grad_norm": 8.40726461987916, "learning_rate": 1.564381166010709e-06, "loss": 0.8289, "step": 8948 }, { "epoch": 1.2669356551284774, "grad_norm": 8.326178976436685, "learning_rate": 1.5638496460373415e-06, "loss": 0.9086, "step": 8949 }, { "epoch": 1.2670772280031146, "grad_norm": 7.713524659777912, "learning_rate": 1.563318175276699e-06, "loss": 0.7405, "step": 8950 }, { "epoch": 1.2672188008777518, "grad_norm": 9.53387795162004, "learning_rate": 1.5627867537567225e-06, "loss": 1.0165, "step": 8951 }, { "epoch": 1.267360373752389, "grad_norm": 10.32655477201727, "learning_rate": 1.5622553815053476e-06, "loss": 0.8864, "step": 8952 }, { "epoch": 1.2675019466270263, "grad_norm": 8.98765183109647, "learning_rate": 1.5617240585505084e-06, "loss": 0.8433, "step": 8953 }, { "epoch": 1.2676435195016635, "grad_norm": 9.343612822989698, "learning_rate": 1.5611927849201364e-06, "loss": 0.7868, "step": 8954 }, { "epoch": 1.2677850923763008, "grad_norm": 8.69245736270483, "learning_rate": 1.5606615606421588e-06, "loss": 0.9102, "step": 8955 }, { "epoch": 1.267926665250938, "grad_norm": 8.139238763029475, "learning_rate": 1.5601303857445018e-06, "loss": 0.7164, "step": 8956 }, { "epoch": 1.2680682381255752, "grad_norm": 9.3876976998105, "learning_rate": 1.5595992602550903e-06, "loss": 0.9025, "step": 8957 }, { "epoch": 1.2682098110002125, "grad_norm": 8.266560434868897, "learning_rate": 1.5590681842018446e-06, "loss": 0.7779, "step": 8958 }, { "epoch": 1.2683513838748497, "grad_norm": 9.57313733820286, "learning_rate": 1.5585371576126828e-06, "loss": 0.9373, "step": 8959 }, { "epoch": 1.268492956749487, "grad_norm": 9.176855031759288, "learning_rate": 1.5580061805155205e-06, "loss": 0.9125, "step": 8960 }, { "epoch": 1.2686345296241242, "grad_norm": 8.76989446282908, "learning_rate": 1.5574752529382714e-06, "loss": 0.8687, "step": 8961 }, { "epoch": 1.2687761024987612, "grad_norm": 7.58382366677202, "learning_rate": 1.5569443749088449e-06, "loss": 0.8915, "step": 8962 }, { "epoch": 1.2689176753733984, "grad_norm": 9.375535059601058, "learning_rate": 1.5564135464551496e-06, "loss": 0.9112, "step": 8963 }, { "epoch": 1.2690592482480356, "grad_norm": 7.98019126376061, "learning_rate": 1.5558827676050914e-06, "loss": 0.8348, "step": 8964 }, { "epoch": 1.2692008211226729, "grad_norm": 9.431443854400605, "learning_rate": 1.555352038386571e-06, "loss": 0.9128, "step": 8965 }, { "epoch": 1.26934239399731, "grad_norm": 8.764583967816513, "learning_rate": 1.55482135882749e-06, "loss": 0.8634, "step": 8966 }, { "epoch": 1.2694839668719473, "grad_norm": 8.76628950731575, "learning_rate": 1.5542907289557457e-06, "loss": 0.8076, "step": 8967 }, { "epoch": 1.2696255397465845, "grad_norm": 9.514819582473002, "learning_rate": 1.5537601487992325e-06, "loss": 0.9034, "step": 8968 }, { "epoch": 1.2697671126212218, "grad_norm": 7.477171933837942, "learning_rate": 1.5532296183858424e-06, "loss": 0.6835, "step": 8969 }, { "epoch": 1.269908685495859, "grad_norm": 9.172525464051429, "learning_rate": 1.5526991377434655e-06, "loss": 0.8668, "step": 8970 }, { "epoch": 1.2700502583704962, "grad_norm": 9.976064647180598, "learning_rate": 1.5521687068999885e-06, "loss": 0.8602, "step": 8971 }, { "epoch": 1.2701918312451335, "grad_norm": 8.919875901389627, "learning_rate": 1.5516383258832956e-06, "loss": 0.8454, "step": 8972 }, { "epoch": 1.2703334041197707, "grad_norm": 11.392776851140498, "learning_rate": 1.551107994721269e-06, "loss": 0.8343, "step": 8973 }, { "epoch": 1.270474976994408, "grad_norm": 10.675843781261252, "learning_rate": 1.5505777134417876e-06, "loss": 0.8876, "step": 8974 }, { "epoch": 1.2706165498690452, "grad_norm": 8.26771308469754, "learning_rate": 1.550047482072729e-06, "loss": 0.8323, "step": 8975 }, { "epoch": 1.2707581227436824, "grad_norm": 9.526729524839736, "learning_rate": 1.549517300641965e-06, "loss": 0.8987, "step": 8976 }, { "epoch": 1.2708996956183194, "grad_norm": 8.447341265918563, "learning_rate": 1.5489871691773677e-06, "loss": 0.8816, "step": 8977 }, { "epoch": 1.2710412684929566, "grad_norm": 7.972821560556501, "learning_rate": 1.5484570877068055e-06, "loss": 0.7578, "step": 8978 }, { "epoch": 1.2711828413675939, "grad_norm": 10.14949181698603, "learning_rate": 1.547927056258145e-06, "loss": 0.9281, "step": 8979 }, { "epoch": 1.271324414242231, "grad_norm": 10.886728911571325, "learning_rate": 1.5473970748592493e-06, "loss": 0.9037, "step": 8980 }, { "epoch": 1.2714659871168683, "grad_norm": 7.754587599944619, "learning_rate": 1.5468671435379789e-06, "loss": 0.7326, "step": 8981 }, { "epoch": 1.2716075599915055, "grad_norm": 11.102528024735586, "learning_rate": 1.5463372623221923e-06, "loss": 0.7908, "step": 8982 }, { "epoch": 1.2717491328661428, "grad_norm": 8.719390079971145, "learning_rate": 1.5458074312397447e-06, "loss": 0.8434, "step": 8983 }, { "epoch": 1.27189070574078, "grad_norm": 7.757639657329961, "learning_rate": 1.5452776503184891e-06, "loss": 0.8024, "step": 8984 }, { "epoch": 1.2720322786154172, "grad_norm": 8.013538825287773, "learning_rate": 1.5447479195862752e-06, "loss": 0.9467, "step": 8985 }, { "epoch": 1.2721738514900545, "grad_norm": 9.379829891915392, "learning_rate": 1.5442182390709517e-06, "loss": 0.8759, "step": 8986 }, { "epoch": 1.2723154243646917, "grad_norm": 9.553085579633256, "learning_rate": 1.5436886088003622e-06, "loss": 0.8453, "step": 8987 }, { "epoch": 1.272456997239329, "grad_norm": 7.912428541297708, "learning_rate": 1.5431590288023496e-06, "loss": 0.833, "step": 8988 }, { "epoch": 1.2725985701139662, "grad_norm": 9.114075692708607, "learning_rate": 1.542629499104753e-06, "loss": 0.8899, "step": 8989 }, { "epoch": 1.2727401429886034, "grad_norm": 8.752229243091888, "learning_rate": 1.5421000197354099e-06, "loss": 0.818, "step": 8990 }, { "epoch": 1.2728817158632406, "grad_norm": 10.622980980617012, "learning_rate": 1.5415705907221545e-06, "loss": 0.83, "step": 8991 }, { "epoch": 1.2730232887378778, "grad_norm": 8.810942383677046, "learning_rate": 1.5410412120928189e-06, "loss": 0.8275, "step": 8992 }, { "epoch": 1.273164861612515, "grad_norm": 8.443707478583487, "learning_rate": 1.5405118838752314e-06, "loss": 0.8426, "step": 8993 }, { "epoch": 1.2733064344871523, "grad_norm": 7.301333256034396, "learning_rate": 1.539982606097219e-06, "loss": 0.7788, "step": 8994 }, { "epoch": 1.2734480073617895, "grad_norm": 9.210281051008932, "learning_rate": 1.5394533787866045e-06, "loss": 0.9061, "step": 8995 }, { "epoch": 1.2735895802364268, "grad_norm": 9.809792309039187, "learning_rate": 1.5389242019712107e-06, "loss": 0.7895, "step": 8996 }, { "epoch": 1.273731153111064, "grad_norm": 7.667265523105786, "learning_rate": 1.5383950756788545e-06, "loss": 0.8184, "step": 8997 }, { "epoch": 1.2738727259857012, "grad_norm": 8.880280119455692, "learning_rate": 1.5378659999373524e-06, "loss": 0.8551, "step": 8998 }, { "epoch": 1.2740142988603385, "grad_norm": 9.107705226599558, "learning_rate": 1.5373369747745171e-06, "loss": 0.9344, "step": 8999 }, { "epoch": 1.2741558717349757, "grad_norm": 7.907674909810598, "learning_rate": 1.5368080002181591e-06, "loss": 0.7797, "step": 9000 }, { "epoch": 1.274297444609613, "grad_norm": 6.915511467609795, "learning_rate": 1.536279076296086e-06, "loss": 0.7443, "step": 9001 }, { "epoch": 1.2744390174842501, "grad_norm": 8.30799870048296, "learning_rate": 1.5357502030361036e-06, "loss": 0.9447, "step": 9002 }, { "epoch": 1.2745805903588872, "grad_norm": 9.91861028350518, "learning_rate": 1.535221380466014e-06, "loss": 0.8483, "step": 9003 }, { "epoch": 1.2747221632335244, "grad_norm": 9.01496109922863, "learning_rate": 1.5346926086136171e-06, "loss": 0.7958, "step": 9004 }, { "epoch": 1.2748637361081616, "grad_norm": 8.902692974583985, "learning_rate": 1.5341638875067102e-06, "loss": 0.964, "step": 9005 }, { "epoch": 1.2750053089827988, "grad_norm": 10.542169089023497, "learning_rate": 1.5336352171730876e-06, "loss": 0.9276, "step": 9006 }, { "epoch": 1.275146881857436, "grad_norm": 10.942946816190448, "learning_rate": 1.5331065976405412e-06, "loss": 0.9045, "step": 9007 }, { "epoch": 1.2752884547320733, "grad_norm": 9.199409913751559, "learning_rate": 1.53257802893686e-06, "loss": 0.8359, "step": 9008 }, { "epoch": 1.2754300276067105, "grad_norm": 9.936758337595094, "learning_rate": 1.5320495110898304e-06, "loss": 0.9647, "step": 9009 }, { "epoch": 1.2755716004813478, "grad_norm": 8.239510080531943, "learning_rate": 1.531521044127236e-06, "loss": 0.7937, "step": 9010 }, { "epoch": 1.275713173355985, "grad_norm": 8.388055447010881, "learning_rate": 1.5309926280768583e-06, "loss": 0.8525, "step": 9011 }, { "epoch": 1.2758547462306222, "grad_norm": 9.368625355528211, "learning_rate": 1.5304642629664756e-06, "loss": 0.8287, "step": 9012 }, { "epoch": 1.2759963191052595, "grad_norm": 10.053009675715977, "learning_rate": 1.5299359488238635e-06, "loss": 0.8927, "step": 9013 }, { "epoch": 1.2761378919798967, "grad_norm": 8.741086943404031, "learning_rate": 1.5294076856767956e-06, "loss": 0.8592, "step": 9014 }, { "epoch": 1.276279464854534, "grad_norm": 9.28818527712424, "learning_rate": 1.5288794735530416e-06, "loss": 0.8903, "step": 9015 }, { "epoch": 1.2764210377291711, "grad_norm": 9.435353824549114, "learning_rate": 1.52835131248037e-06, "loss": 0.8174, "step": 9016 }, { "epoch": 1.2765626106038084, "grad_norm": 8.394657579610156, "learning_rate": 1.5278232024865458e-06, "loss": 0.877, "step": 9017 }, { "epoch": 1.2767041834784456, "grad_norm": 8.562325803001091, "learning_rate": 1.5272951435993303e-06, "loss": 0.8511, "step": 9018 }, { "epoch": 1.2768457563530826, "grad_norm": 9.679916071252084, "learning_rate": 1.5267671358464837e-06, "loss": 0.8757, "step": 9019 }, { "epoch": 1.2769873292277198, "grad_norm": 8.179809219635192, "learning_rate": 1.5262391792557635e-06, "loss": 0.9031, "step": 9020 }, { "epoch": 1.277128902102357, "grad_norm": 8.014812584468844, "learning_rate": 1.5257112738549233e-06, "loss": 0.9559, "step": 9021 }, { "epoch": 1.2772704749769943, "grad_norm": 9.580498804155543, "learning_rate": 1.525183419671715e-06, "loss": 0.8388, "step": 9022 }, { "epoch": 1.2774120478516315, "grad_norm": 9.917873747206471, "learning_rate": 1.5246556167338875e-06, "loss": 0.7107, "step": 9023 }, { "epoch": 1.2775536207262688, "grad_norm": 8.721896133752612, "learning_rate": 1.5241278650691866e-06, "loss": 0.7432, "step": 9024 }, { "epoch": 1.277695193600906, "grad_norm": 8.218607041888813, "learning_rate": 1.5236001647053564e-06, "loss": 0.8603, "step": 9025 }, { "epoch": 1.2778367664755432, "grad_norm": 8.523469723816264, "learning_rate": 1.5230725156701375e-06, "loss": 0.8729, "step": 9026 }, { "epoch": 1.2779783393501805, "grad_norm": 8.922806619349576, "learning_rate": 1.5225449179912683e-06, "loss": 0.8315, "step": 9027 }, { "epoch": 1.2781199122248177, "grad_norm": 8.957308925046123, "learning_rate": 1.5220173716964847e-06, "loss": 0.8071, "step": 9028 }, { "epoch": 1.278261485099455, "grad_norm": 9.819736023751956, "learning_rate": 1.521489876813518e-06, "loss": 0.7662, "step": 9029 }, { "epoch": 1.2784030579740921, "grad_norm": 7.8050455158282315, "learning_rate": 1.5209624333700985e-06, "loss": 0.9158, "step": 9030 }, { "epoch": 1.2785446308487294, "grad_norm": 10.177336133604273, "learning_rate": 1.520435041393954e-06, "loss": 0.8166, "step": 9031 }, { "epoch": 1.2786862037233666, "grad_norm": 9.390022956730707, "learning_rate": 1.519907700912809e-06, "loss": 0.8611, "step": 9032 }, { "epoch": 1.2788277765980038, "grad_norm": 8.145441720692336, "learning_rate": 1.5193804119543853e-06, "loss": 0.8169, "step": 9033 }, { "epoch": 1.278969349472641, "grad_norm": 8.567868519747469, "learning_rate": 1.5188531745464023e-06, "loss": 0.8329, "step": 9034 }, { "epoch": 1.2791109223472783, "grad_norm": 10.222365553399282, "learning_rate": 1.5183259887165763e-06, "loss": 0.9011, "step": 9035 }, { "epoch": 1.2792524952219155, "grad_norm": 8.318318331299894, "learning_rate": 1.5177988544926208e-06, "loss": 0.84, "step": 9036 }, { "epoch": 1.2793940680965528, "grad_norm": 8.952431750129271, "learning_rate": 1.5172717719022475e-06, "loss": 0.8313, "step": 9037 }, { "epoch": 1.27953564097119, "grad_norm": 9.430927337324498, "learning_rate": 1.5167447409731645e-06, "loss": 0.8244, "step": 9038 }, { "epoch": 1.2796772138458272, "grad_norm": 10.722288031587485, "learning_rate": 1.5162177617330775e-06, "loss": 0.8625, "step": 9039 }, { "epoch": 1.2798187867204645, "grad_norm": 8.775218697890143, "learning_rate": 1.515690834209689e-06, "loss": 0.8384, "step": 9040 }, { "epoch": 1.2799603595951017, "grad_norm": 9.74339344480723, "learning_rate": 1.5151639584306993e-06, "loss": 0.8722, "step": 9041 }, { "epoch": 1.280101932469739, "grad_norm": 9.393929863157686, "learning_rate": 1.5146371344238063e-06, "loss": 0.8253, "step": 9042 }, { "epoch": 1.2802435053443761, "grad_norm": 9.331543569169078, "learning_rate": 1.5141103622167042e-06, "loss": 0.9317, "step": 9043 }, { "epoch": 1.2803850782190134, "grad_norm": 8.418542885728364, "learning_rate": 1.513583641837085e-06, "loss": 0.9223, "step": 9044 }, { "epoch": 1.2805266510936504, "grad_norm": 9.668643836910093, "learning_rate": 1.5130569733126382e-06, "loss": 0.91, "step": 9045 }, { "epoch": 1.2806682239682876, "grad_norm": 8.908369488073289, "learning_rate": 1.5125303566710508e-06, "loss": 0.8713, "step": 9046 }, { "epoch": 1.2808097968429248, "grad_norm": 10.107667003368114, "learning_rate": 1.5120037919400054e-06, "loss": 0.8573, "step": 9047 }, { "epoch": 1.280951369717562, "grad_norm": 9.90503191754563, "learning_rate": 1.5114772791471848e-06, "loss": 0.9899, "step": 9048 }, { "epoch": 1.2810929425921993, "grad_norm": 8.905437445800976, "learning_rate": 1.5109508183202675e-06, "loss": 0.9755, "step": 9049 }, { "epoch": 1.2812345154668365, "grad_norm": 9.110315500065965, "learning_rate": 1.5104244094869272e-06, "loss": 0.7897, "step": 9050 }, { "epoch": 1.2813760883414738, "grad_norm": 9.147272389247297, "learning_rate": 1.509898052674838e-06, "loss": 0.8558, "step": 9051 }, { "epoch": 1.281517661216111, "grad_norm": 9.091540300823834, "learning_rate": 1.5093717479116696e-06, "loss": 0.98, "step": 9052 }, { "epoch": 1.2816592340907482, "grad_norm": 8.770428162652609, "learning_rate": 1.508845495225089e-06, "loss": 0.9186, "step": 9053 }, { "epoch": 1.2818008069653855, "grad_norm": 7.5399661456494, "learning_rate": 1.5083192946427626e-06, "loss": 0.8657, "step": 9054 }, { "epoch": 1.2819423798400227, "grad_norm": 10.075473927598773, "learning_rate": 1.5077931461923518e-06, "loss": 0.8563, "step": 9055 }, { "epoch": 1.28208395271466, "grad_norm": 9.093639674615345, "learning_rate": 1.5072670499015151e-06, "loss": 0.8128, "step": 9056 }, { "epoch": 1.2822255255892971, "grad_norm": 10.165268306765698, "learning_rate": 1.5067410057979094e-06, "loss": 0.8517, "step": 9057 }, { "epoch": 1.2823670984639344, "grad_norm": 8.057930057800592, "learning_rate": 1.5062150139091882e-06, "loss": 0.781, "step": 9058 }, { "epoch": 1.2825086713385716, "grad_norm": 8.856947039711699, "learning_rate": 1.505689074263003e-06, "loss": 0.9558, "step": 9059 }, { "epoch": 1.2826502442132086, "grad_norm": 9.452564468789467, "learning_rate": 1.505163186887002e-06, "loss": 0.8148, "step": 9060 }, { "epoch": 1.2827918170878458, "grad_norm": 8.084922188671639, "learning_rate": 1.5046373518088303e-06, "loss": 0.9258, "step": 9061 }, { "epoch": 1.282933389962483, "grad_norm": 8.810865751198603, "learning_rate": 1.5041115690561308e-06, "loss": 0.8292, "step": 9062 }, { "epoch": 1.2830749628371203, "grad_norm": 7.781704196159939, "learning_rate": 1.5035858386565433e-06, "loss": 0.9053, "step": 9063 }, { "epoch": 1.2832165357117575, "grad_norm": 8.801420565299965, "learning_rate": 1.5030601606377054e-06, "loss": 0.8683, "step": 9064 }, { "epoch": 1.2833581085863948, "grad_norm": 9.488319041968788, "learning_rate": 1.5025345350272518e-06, "loss": 0.8095, "step": 9065 }, { "epoch": 1.283499681461032, "grad_norm": 10.595864234086218, "learning_rate": 1.502008961852814e-06, "loss": 0.8782, "step": 9066 }, { "epoch": 1.2836412543356692, "grad_norm": 7.715036835004646, "learning_rate": 1.5014834411420204e-06, "loss": 0.8153, "step": 9067 }, { "epoch": 1.2837828272103065, "grad_norm": 8.281475711841479, "learning_rate": 1.5009579729224982e-06, "loss": 0.8929, "step": 9068 }, { "epoch": 1.2839244000849437, "grad_norm": 10.978989736638205, "learning_rate": 1.5004325572218698e-06, "loss": 0.8111, "step": 9069 }, { "epoch": 1.284065972959581, "grad_norm": 9.019538439933253, "learning_rate": 1.4999071940677578e-06, "loss": 0.8194, "step": 9070 }, { "epoch": 1.2842075458342181, "grad_norm": 7.951898926244167, "learning_rate": 1.4993818834877783e-06, "loss": 0.8251, "step": 9071 }, { "epoch": 1.2843491187088554, "grad_norm": 9.706670091998715, "learning_rate": 1.498856625509547e-06, "loss": 0.8582, "step": 9072 }, { "epoch": 1.2844906915834926, "grad_norm": 8.350313155646516, "learning_rate": 1.4983314201606764e-06, "loss": 0.8682, "step": 9073 }, { "epoch": 1.2846322644581298, "grad_norm": 10.062947707552762, "learning_rate": 1.497806267468776e-06, "loss": 0.9343, "step": 9074 }, { "epoch": 1.284773837332767, "grad_norm": 9.868641025166218, "learning_rate": 1.4972811674614523e-06, "loss": 0.7458, "step": 9075 }, { "epoch": 1.2849154102074043, "grad_norm": 9.70959080314166, "learning_rate": 1.4967561201663108e-06, "loss": 0.8191, "step": 9076 }, { "epoch": 1.2850569830820415, "grad_norm": 10.254154758893389, "learning_rate": 1.4962311256109518e-06, "loss": 0.7805, "step": 9077 }, { "epoch": 1.2851985559566788, "grad_norm": 9.166265952859685, "learning_rate": 1.4957061838229743e-06, "loss": 0.843, "step": 9078 }, { "epoch": 1.285340128831316, "grad_norm": 7.512598772716683, "learning_rate": 1.4951812948299737e-06, "loss": 0.8235, "step": 9079 }, { "epoch": 1.2854817017059532, "grad_norm": 8.634700903469836, "learning_rate": 1.494656458659543e-06, "loss": 0.9649, "step": 9080 }, { "epoch": 1.2856232745805904, "grad_norm": 9.144563370965585, "learning_rate": 1.4941316753392738e-06, "loss": 0.8698, "step": 9081 }, { "epoch": 1.2857648474552277, "grad_norm": 8.838181322912519, "learning_rate": 1.493606944896751e-06, "loss": 0.8892, "step": 9082 }, { "epoch": 1.285906420329865, "grad_norm": 8.436959708540462, "learning_rate": 1.4930822673595613e-06, "loss": 0.8729, "step": 9083 }, { "epoch": 1.2860479932045021, "grad_norm": 8.603383716000705, "learning_rate": 1.4925576427552864e-06, "loss": 0.894, "step": 9084 }, { "epoch": 1.2861895660791394, "grad_norm": 8.643653931983767, "learning_rate": 1.4920330711115043e-06, "loss": 0.7869, "step": 9085 }, { "epoch": 1.2863311389537764, "grad_norm": 9.687384229398967, "learning_rate": 1.4915085524557924e-06, "loss": 0.8961, "step": 9086 }, { "epoch": 1.2864727118284136, "grad_norm": 8.027645504516963, "learning_rate": 1.4909840868157237e-06, "loss": 0.8091, "step": 9087 }, { "epoch": 1.2866142847030508, "grad_norm": 9.64067287835052, "learning_rate": 1.4904596742188695e-06, "loss": 0.8219, "step": 9088 }, { "epoch": 1.286755857577688, "grad_norm": 8.777734453225824, "learning_rate": 1.4899353146927975e-06, "loss": 0.8037, "step": 9089 }, { "epoch": 1.2868974304523253, "grad_norm": 8.052853513322544, "learning_rate": 1.4894110082650726e-06, "loss": 0.7807, "step": 9090 }, { "epoch": 1.2870390033269625, "grad_norm": 9.173408756981031, "learning_rate": 1.488886754963258e-06, "loss": 0.8505, "step": 9091 }, { "epoch": 1.2871805762015998, "grad_norm": 7.003827002566865, "learning_rate": 1.4883625548149125e-06, "loss": 0.7663, "step": 9092 }, { "epoch": 1.287322149076237, "grad_norm": 7.513290074293586, "learning_rate": 1.4878384078475933e-06, "loss": 0.7986, "step": 9093 }, { "epoch": 1.2874637219508742, "grad_norm": 9.434792641856218, "learning_rate": 1.4873143140888537e-06, "loss": 0.9491, "step": 9094 }, { "epoch": 1.2876052948255114, "grad_norm": 9.016211849265735, "learning_rate": 1.486790273566246e-06, "loss": 0.7965, "step": 9095 }, { "epoch": 1.2877468677001487, "grad_norm": 10.384605281640162, "learning_rate": 1.486266286307318e-06, "loss": 0.8868, "step": 9096 }, { "epoch": 1.287888440574786, "grad_norm": 8.546964264844638, "learning_rate": 1.4857423523396157e-06, "loss": 0.9107, "step": 9097 }, { "epoch": 1.2880300134494231, "grad_norm": 9.110668057883434, "learning_rate": 1.4852184716906808e-06, "loss": 0.7843, "step": 9098 }, { "epoch": 1.2881715863240604, "grad_norm": 8.177998161571331, "learning_rate": 1.484694644388055e-06, "loss": 0.8393, "step": 9099 }, { "epoch": 1.2883131591986976, "grad_norm": 6.935485547373885, "learning_rate": 1.4841708704592745e-06, "loss": 0.8711, "step": 9100 }, { "epoch": 1.2884547320733348, "grad_norm": 9.024710746208678, "learning_rate": 1.4836471499318738e-06, "loss": 0.9528, "step": 9101 }, { "epoch": 1.2885963049479718, "grad_norm": 9.20844481617918, "learning_rate": 1.4831234828333856e-06, "loss": 0.891, "step": 9102 }, { "epoch": 1.288737877822609, "grad_norm": 8.535203625440602, "learning_rate": 1.4825998691913372e-06, "loss": 0.8263, "step": 9103 }, { "epoch": 1.2888794506972463, "grad_norm": 8.465944170224569, "learning_rate": 1.482076309033254e-06, "loss": 0.7323, "step": 9104 }, { "epoch": 1.2890210235718835, "grad_norm": 9.149327287802963, "learning_rate": 1.481552802386661e-06, "loss": 0.872, "step": 9105 }, { "epoch": 1.2891625964465208, "grad_norm": 10.218749626696285, "learning_rate": 1.4810293492790778e-06, "loss": 1.023, "step": 9106 }, { "epoch": 1.289304169321158, "grad_norm": 9.292444946355866, "learning_rate": 1.480505949738022e-06, "loss": 0.9783, "step": 9107 }, { "epoch": 1.2894457421957952, "grad_norm": 7.47844803810214, "learning_rate": 1.4799826037910082e-06, "loss": 0.8547, "step": 9108 }, { "epoch": 1.2895873150704324, "grad_norm": 9.914279126745795, "learning_rate": 1.479459311465548e-06, "loss": 0.9525, "step": 9109 }, { "epoch": 1.2897288879450697, "grad_norm": 9.050121192752925, "learning_rate": 1.478936072789151e-06, "loss": 0.866, "step": 9110 }, { "epoch": 1.289870460819707, "grad_norm": 8.638151011244618, "learning_rate": 1.4784128877893237e-06, "loss": 0.8714, "step": 9111 }, { "epoch": 1.2900120336943441, "grad_norm": 7.5972452792930305, "learning_rate": 1.477889756493569e-06, "loss": 0.845, "step": 9112 }, { "epoch": 1.2901536065689814, "grad_norm": 8.999948713368411, "learning_rate": 1.4773666789293881e-06, "loss": 0.9162, "step": 9113 }, { "epoch": 1.2902951794436186, "grad_norm": 7.906020572032455, "learning_rate": 1.4768436551242776e-06, "loss": 0.871, "step": 9114 }, { "epoch": 1.2904367523182558, "grad_norm": 8.32969733706158, "learning_rate": 1.4763206851057338e-06, "loss": 0.8579, "step": 9115 }, { "epoch": 1.290578325192893, "grad_norm": 8.591554673430355, "learning_rate": 1.4757977689012482e-06, "loss": 0.8709, "step": 9116 }, { "epoch": 1.2907198980675303, "grad_norm": 9.14069886789063, "learning_rate": 1.4752749065383099e-06, "loss": 0.8185, "step": 9117 }, { "epoch": 1.2908614709421675, "grad_norm": 7.1974922051342825, "learning_rate": 1.4747520980444058e-06, "loss": 0.8382, "step": 9118 }, { "epoch": 1.2910030438168048, "grad_norm": 8.803443529719324, "learning_rate": 1.4742293434470196e-06, "loss": 1.0196, "step": 9119 }, { "epoch": 1.291144616691442, "grad_norm": 8.830605811552424, "learning_rate": 1.4737066427736317e-06, "loss": 0.8309, "step": 9120 }, { "epoch": 1.2912861895660792, "grad_norm": 8.972242997666731, "learning_rate": 1.4731839960517202e-06, "loss": 0.874, "step": 9121 }, { "epoch": 1.2914277624407164, "grad_norm": 9.887819106092339, "learning_rate": 1.4726614033087604e-06, "loss": 0.9006, "step": 9122 }, { "epoch": 1.2915693353153537, "grad_norm": 8.213150960127766, "learning_rate": 1.4721388645722262e-06, "loss": 0.8977, "step": 9123 }, { "epoch": 1.291710908189991, "grad_norm": 8.229390150166193, "learning_rate": 1.4716163798695842e-06, "loss": 0.8338, "step": 9124 }, { "epoch": 1.2918524810646281, "grad_norm": 9.007848496377557, "learning_rate": 1.4710939492283022e-06, "loss": 0.8533, "step": 9125 }, { "epoch": 1.2919940539392654, "grad_norm": 9.45640358305344, "learning_rate": 1.4705715726758444e-06, "loss": 0.8917, "step": 9126 }, { "epoch": 1.2921356268139026, "grad_norm": 8.937161099070677, "learning_rate": 1.4700492502396708e-06, "loss": 0.852, "step": 9127 }, { "epoch": 1.2922771996885396, "grad_norm": 8.759328610545419, "learning_rate": 1.4695269819472406e-06, "loss": 0.8677, "step": 9128 }, { "epoch": 1.2924187725631768, "grad_norm": 8.548046080647612, "learning_rate": 1.4690047678260086e-06, "loss": 0.9638, "step": 9129 }, { "epoch": 1.292560345437814, "grad_norm": 8.528977758728576, "learning_rate": 1.468482607903427e-06, "loss": 0.7126, "step": 9130 }, { "epoch": 1.2927019183124513, "grad_norm": 7.58124281465421, "learning_rate": 1.467960502206946e-06, "loss": 0.8637, "step": 9131 }, { "epoch": 1.2928434911870885, "grad_norm": 9.632897245926895, "learning_rate": 1.4674384507640115e-06, "loss": 0.8581, "step": 9132 }, { "epoch": 1.2929850640617258, "grad_norm": 8.74427128315662, "learning_rate": 1.466916453602068e-06, "loss": 0.8759, "step": 9133 }, { "epoch": 1.293126636936363, "grad_norm": 9.585050036598743, "learning_rate": 1.4663945107485567e-06, "loss": 0.8962, "step": 9134 }, { "epoch": 1.2932682098110002, "grad_norm": 9.183513996357226, "learning_rate": 1.465872622230915e-06, "loss": 0.8587, "step": 9135 }, { "epoch": 1.2934097826856374, "grad_norm": 8.766788616226671, "learning_rate": 1.4653507880765783e-06, "loss": 0.7773, "step": 9136 }, { "epoch": 1.2935513555602747, "grad_norm": 8.07260704841413, "learning_rate": 1.464829008312979e-06, "loss": 0.8704, "step": 9137 }, { "epoch": 1.293692928434912, "grad_norm": 8.942029965412985, "learning_rate": 1.464307282967547e-06, "loss": 0.871, "step": 9138 }, { "epoch": 1.2938345013095491, "grad_norm": 9.320142234404539, "learning_rate": 1.4637856120677088e-06, "loss": 0.8561, "step": 9139 }, { "epoch": 1.2939760741841864, "grad_norm": 9.958600369202319, "learning_rate": 1.4632639956408884e-06, "loss": 0.8388, "step": 9140 }, { "epoch": 1.2941176470588236, "grad_norm": 7.806026614973323, "learning_rate": 1.4627424337145069e-06, "loss": 0.8372, "step": 9141 }, { "epoch": 1.2942592199334608, "grad_norm": 10.32998488123213, "learning_rate": 1.462220926315982e-06, "loss": 0.8874, "step": 9142 }, { "epoch": 1.2944007928080978, "grad_norm": 8.541500877694292, "learning_rate": 1.4616994734727293e-06, "loss": 0.9374, "step": 9143 }, { "epoch": 1.294542365682735, "grad_norm": 9.124181841398293, "learning_rate": 1.461178075212162e-06, "loss": 0.9057, "step": 9144 }, { "epoch": 1.2946839385573723, "grad_norm": 9.170387160386781, "learning_rate": 1.4606567315616876e-06, "loss": 0.8638, "step": 9145 }, { "epoch": 1.2948255114320095, "grad_norm": 10.187776713913522, "learning_rate": 1.4601354425487141e-06, "loss": 0.8991, "step": 9146 }, { "epoch": 1.2949670843066468, "grad_norm": 9.996990895531116, "learning_rate": 1.4596142082006448e-06, "loss": 0.751, "step": 9147 }, { "epoch": 1.295108657181284, "grad_norm": 8.862465617082238, "learning_rate": 1.4590930285448807e-06, "loss": 0.9016, "step": 9148 }, { "epoch": 1.2952502300559212, "grad_norm": 7.252863877381688, "learning_rate": 1.4585719036088205e-06, "loss": 0.9228, "step": 9149 }, { "epoch": 1.2953918029305584, "grad_norm": 9.726496611222615, "learning_rate": 1.458050833419858e-06, "loss": 0.9051, "step": 9150 }, { "epoch": 1.2955333758051957, "grad_norm": 8.721967424800287, "learning_rate": 1.4575298180053875e-06, "loss": 0.78, "step": 9151 }, { "epoch": 1.295674948679833, "grad_norm": 9.154035801763118, "learning_rate": 1.4570088573927959e-06, "loss": 0.8422, "step": 9152 }, { "epoch": 1.2958165215544701, "grad_norm": 9.14231880340143, "learning_rate": 1.4564879516094721e-06, "loss": 0.8046, "step": 9153 }, { "epoch": 1.2959580944291074, "grad_norm": 7.66981170940427, "learning_rate": 1.4559671006827977e-06, "loss": 0.7103, "step": 9154 }, { "epoch": 1.2960996673037446, "grad_norm": 12.323924694392078, "learning_rate": 1.4554463046401554e-06, "loss": 0.8194, "step": 9155 }, { "epoch": 1.2962412401783818, "grad_norm": 9.713262744543218, "learning_rate": 1.4549255635089219e-06, "loss": 0.9304, "step": 9156 }, { "epoch": 1.296382813053019, "grad_norm": 9.696702880511998, "learning_rate": 1.4544048773164712e-06, "loss": 0.7857, "step": 9157 }, { "epoch": 1.2965243859276563, "grad_norm": 8.47023273170796, "learning_rate": 1.4538842460901774e-06, "loss": 0.8168, "step": 9158 }, { "epoch": 1.2966659588022935, "grad_norm": 8.295367260161363, "learning_rate": 1.453363669857408e-06, "loss": 0.7896, "step": 9159 }, { "epoch": 1.2968075316769307, "grad_norm": 10.999264432415433, "learning_rate": 1.4528431486455311e-06, "loss": 0.9441, "step": 9160 }, { "epoch": 1.296949104551568, "grad_norm": 10.675285989472659, "learning_rate": 1.4523226824819081e-06, "loss": 0.9147, "step": 9161 }, { "epoch": 1.2970906774262052, "grad_norm": 7.9188048302875576, "learning_rate": 1.4518022713939e-06, "loss": 0.799, "step": 9162 }, { "epoch": 1.2972322503008424, "grad_norm": 9.553108739917059, "learning_rate": 1.4512819154088665e-06, "loss": 0.8667, "step": 9163 }, { "epoch": 1.2973738231754797, "grad_norm": 9.094098585860804, "learning_rate": 1.4507616145541595e-06, "loss": 0.9427, "step": 9164 }, { "epoch": 1.297515396050117, "grad_norm": 9.438668892327158, "learning_rate": 1.4502413688571332e-06, "loss": 0.8455, "step": 9165 }, { "epoch": 1.2976569689247541, "grad_norm": 8.050521587272991, "learning_rate": 1.4497211783451355e-06, "loss": 0.8695, "step": 9166 }, { "epoch": 1.2977985417993914, "grad_norm": 8.642860826944153, "learning_rate": 1.4492010430455108e-06, "loss": 0.9231, "step": 9167 }, { "epoch": 1.2979401146740286, "grad_norm": 8.762312155980728, "learning_rate": 1.4486809629856052e-06, "loss": 0.9157, "step": 9168 }, { "epoch": 1.2980816875486656, "grad_norm": 7.98821487212642, "learning_rate": 1.4481609381927565e-06, "loss": 0.8001, "step": 9169 }, { "epoch": 1.2982232604233028, "grad_norm": 9.099621322633817, "learning_rate": 1.4476409686943039e-06, "loss": 0.8914, "step": 9170 }, { "epoch": 1.29836483329794, "grad_norm": 9.741290383839564, "learning_rate": 1.4471210545175795e-06, "loss": 0.7967, "step": 9171 }, { "epoch": 1.2985064061725773, "grad_norm": 8.663752970829142, "learning_rate": 1.446601195689918e-06, "loss": 0.8675, "step": 9172 }, { "epoch": 1.2986479790472145, "grad_norm": 8.272038472069532, "learning_rate": 1.4460813922386446e-06, "loss": 0.8577, "step": 9173 }, { "epoch": 1.2987895519218517, "grad_norm": 8.980868456334303, "learning_rate": 1.4455616441910878e-06, "loss": 0.8943, "step": 9174 }, { "epoch": 1.298931124796489, "grad_norm": 8.230530393249223, "learning_rate": 1.445041951574568e-06, "loss": 0.7445, "step": 9175 }, { "epoch": 1.2990726976711262, "grad_norm": 9.28406951925603, "learning_rate": 1.4445223144164073e-06, "loss": 0.9853, "step": 9176 }, { "epoch": 1.2992142705457634, "grad_norm": 8.437102131823018, "learning_rate": 1.4440027327439215e-06, "loss": 0.8029, "step": 9177 }, { "epoch": 1.2993558434204007, "grad_norm": 8.39712834999961, "learning_rate": 1.443483206584424e-06, "loss": 0.9716, "step": 9178 }, { "epoch": 1.299497416295038, "grad_norm": 8.584325887333344, "learning_rate": 1.4429637359652271e-06, "loss": 0.8442, "step": 9179 }, { "epoch": 1.2996389891696751, "grad_norm": 9.763195010171652, "learning_rate": 1.4424443209136375e-06, "loss": 0.8617, "step": 9180 }, { "epoch": 1.2997805620443124, "grad_norm": 9.893980689380586, "learning_rate": 1.4419249614569626e-06, "loss": 0.841, "step": 9181 }, { "epoch": 1.2999221349189496, "grad_norm": 9.053293326527776, "learning_rate": 1.4414056576225025e-06, "loss": 0.8005, "step": 9182 }, { "epoch": 1.3000637077935868, "grad_norm": 7.936897074711284, "learning_rate": 1.4408864094375586e-06, "loss": 0.8516, "step": 9183 }, { "epoch": 1.300205280668224, "grad_norm": 9.084455741573375, "learning_rate": 1.4403672169294252e-06, "loss": 0.9411, "step": 9184 }, { "epoch": 1.300346853542861, "grad_norm": 10.02694581285769, "learning_rate": 1.4398480801253976e-06, "loss": 0.9667, "step": 9185 }, { "epoch": 1.3004884264174983, "grad_norm": 7.880003535468378, "learning_rate": 1.4393289990527665e-06, "loss": 0.7749, "step": 9186 }, { "epoch": 1.3006299992921355, "grad_norm": 8.582867310350885, "learning_rate": 1.4388099737388196e-06, "loss": 0.8513, "step": 9187 }, { "epoch": 1.3007715721667727, "grad_norm": 8.560367067969628, "learning_rate": 1.4382910042108405e-06, "loss": 0.727, "step": 9188 }, { "epoch": 1.30091314504141, "grad_norm": 9.908706889852173, "learning_rate": 1.437772090496111e-06, "loss": 0.8771, "step": 9189 }, { "epoch": 1.3010547179160472, "grad_norm": 8.448040291430019, "learning_rate": 1.4372532326219104e-06, "loss": 0.8951, "step": 9190 }, { "epoch": 1.3011962907906844, "grad_norm": 7.941363738434542, "learning_rate": 1.4367344306155163e-06, "loss": 0.8589, "step": 9191 }, { "epoch": 1.3013378636653217, "grad_norm": 7.968387610479289, "learning_rate": 1.4362156845041992e-06, "loss": 0.7499, "step": 9192 }, { "epoch": 1.301479436539959, "grad_norm": 9.429638550385585, "learning_rate": 1.4356969943152315e-06, "loss": 0.8794, "step": 9193 }, { "epoch": 1.3016210094145961, "grad_norm": 7.682770553576766, "learning_rate": 1.435178360075878e-06, "loss": 0.7438, "step": 9194 }, { "epoch": 1.3017625822892334, "grad_norm": 7.577068829428597, "learning_rate": 1.4346597818134052e-06, "loss": 0.9429, "step": 9195 }, { "epoch": 1.3019041551638706, "grad_norm": 7.537029913418912, "learning_rate": 1.4341412595550724e-06, "loss": 0.8498, "step": 9196 }, { "epoch": 1.3020457280385078, "grad_norm": 9.116939799548627, "learning_rate": 1.4336227933281398e-06, "loss": 0.9333, "step": 9197 }, { "epoch": 1.302187300913145, "grad_norm": 9.856699923718407, "learning_rate": 1.433104383159862e-06, "loss": 0.7905, "step": 9198 }, { "epoch": 1.3023288737877823, "grad_norm": 8.814909477093652, "learning_rate": 1.43258602907749e-06, "loss": 0.8626, "step": 9199 }, { "epoch": 1.3024704466624195, "grad_norm": 8.55541654641753, "learning_rate": 1.432067731108276e-06, "loss": 0.8627, "step": 9200 }, { "epoch": 1.3026120195370567, "grad_norm": 6.481264843827174, "learning_rate": 1.4315494892794635e-06, "loss": 0.7698, "step": 9201 }, { "epoch": 1.302753592411694, "grad_norm": 9.286000895006973, "learning_rate": 1.4310313036182994e-06, "loss": 0.896, "step": 9202 }, { "epoch": 1.3028951652863312, "grad_norm": 8.415017246493857, "learning_rate": 1.4305131741520209e-06, "loss": 0.8574, "step": 9203 }, { "epoch": 1.3030367381609684, "grad_norm": 10.998881196432762, "learning_rate": 1.4299951009078688e-06, "loss": 0.9087, "step": 9204 }, { "epoch": 1.3031783110356057, "grad_norm": 9.07906957757959, "learning_rate": 1.429477083913075e-06, "loss": 0.7955, "step": 9205 }, { "epoch": 1.303319883910243, "grad_norm": 9.61816044339776, "learning_rate": 1.4289591231948742e-06, "loss": 0.8725, "step": 9206 }, { "epoch": 1.3034614567848801, "grad_norm": 6.503435840721042, "learning_rate": 1.4284412187804925e-06, "loss": 0.7229, "step": 9207 }, { "epoch": 1.3036030296595174, "grad_norm": 10.337709628174188, "learning_rate": 1.4279233706971579e-06, "loss": 0.8243, "step": 9208 }, { "epoch": 1.3037446025341546, "grad_norm": 7.776545929618045, "learning_rate": 1.4274055789720923e-06, "loss": 0.8248, "step": 9209 }, { "epoch": 1.3038861754087916, "grad_norm": 9.216027636963414, "learning_rate": 1.4268878436325145e-06, "loss": 0.8935, "step": 9210 }, { "epoch": 1.3040277482834288, "grad_norm": 7.858376289754579, "learning_rate": 1.4263701647056439e-06, "loss": 0.8016, "step": 9211 }, { "epoch": 1.304169321158066, "grad_norm": 10.48703364902866, "learning_rate": 1.425852542218692e-06, "loss": 0.9038, "step": 9212 }, { "epoch": 1.3043108940327033, "grad_norm": 8.375464127618681, "learning_rate": 1.4253349761988714e-06, "loss": 0.8083, "step": 9213 }, { "epoch": 1.3044524669073405, "grad_norm": 7.254599066184428, "learning_rate": 1.4248174666733905e-06, "loss": 0.8717, "step": 9214 }, { "epoch": 1.3045940397819777, "grad_norm": 7.643467536254943, "learning_rate": 1.4243000136694527e-06, "loss": 0.9187, "step": 9215 }, { "epoch": 1.304735612656615, "grad_norm": 8.187929098518335, "learning_rate": 1.423782617214262e-06, "loss": 0.8878, "step": 9216 }, { "epoch": 1.3048771855312522, "grad_norm": 9.137158797921677, "learning_rate": 1.4232652773350159e-06, "loss": 0.8388, "step": 9217 }, { "epoch": 1.3050187584058894, "grad_norm": 10.730867615220617, "learning_rate": 1.4227479940589122e-06, "loss": 1.0152, "step": 9218 }, { "epoch": 1.3051603312805267, "grad_norm": 7.9298436041438825, "learning_rate": 1.422230767413143e-06, "loss": 0.8374, "step": 9219 }, { "epoch": 1.305301904155164, "grad_norm": 8.576937567233806, "learning_rate": 1.421713597424898e-06, "loss": 0.883, "step": 9220 }, { "epoch": 1.3054434770298011, "grad_norm": 9.259456852252754, "learning_rate": 1.4211964841213663e-06, "loss": 0.9079, "step": 9221 }, { "epoch": 1.3055850499044384, "grad_norm": 10.967840173270499, "learning_rate": 1.4206794275297298e-06, "loss": 0.8223, "step": 9222 }, { "epoch": 1.3057266227790756, "grad_norm": 8.869923173814698, "learning_rate": 1.4201624276771723e-06, "loss": 0.798, "step": 9223 }, { "epoch": 1.3058681956537128, "grad_norm": 10.673367978130523, "learning_rate": 1.4196454845908696e-06, "loss": 0.8114, "step": 9224 }, { "epoch": 1.30600976852835, "grad_norm": 8.730086855461764, "learning_rate": 1.4191285982979992e-06, "loss": 0.9137, "step": 9225 }, { "epoch": 1.306151341402987, "grad_norm": 10.452801663767964, "learning_rate": 1.4186117688257317e-06, "loss": 0.9018, "step": 9226 }, { "epoch": 1.3062929142776243, "grad_norm": 9.410283133354827, "learning_rate": 1.4180949962012377e-06, "loss": 0.8118, "step": 9227 }, { "epoch": 1.3064344871522615, "grad_norm": 10.870525272596005, "learning_rate": 1.4175782804516824e-06, "loss": 0.7972, "step": 9228 }, { "epoch": 1.3065760600268987, "grad_norm": 9.144424040246166, "learning_rate": 1.417061621604231e-06, "loss": 0.9146, "step": 9229 }, { "epoch": 1.306717632901536, "grad_norm": 9.965722655771522, "learning_rate": 1.4165450196860423e-06, "loss": 0.8164, "step": 9230 }, { "epoch": 1.3068592057761732, "grad_norm": 9.740807136445808, "learning_rate": 1.4160284747242731e-06, "loss": 0.8541, "step": 9231 }, { "epoch": 1.3070007786508104, "grad_norm": 8.387062609392348, "learning_rate": 1.4155119867460799e-06, "loss": 0.8041, "step": 9232 }, { "epoch": 1.3071423515254477, "grad_norm": 7.45867572474461, "learning_rate": 1.4149955557786118e-06, "loss": 0.8382, "step": 9233 }, { "epoch": 1.307283924400085, "grad_norm": 8.68427463732939, "learning_rate": 1.4144791818490194e-06, "loss": 0.9613, "step": 9234 }, { "epoch": 1.3074254972747221, "grad_norm": 8.093160364773542, "learning_rate": 1.4139628649844462e-06, "loss": 0.7594, "step": 9235 }, { "epoch": 1.3075670701493594, "grad_norm": 8.163362522324881, "learning_rate": 1.4134466052120349e-06, "loss": 0.7411, "step": 9236 }, { "epoch": 1.3077086430239966, "grad_norm": 8.710556938856355, "learning_rate": 1.412930402558927e-06, "loss": 0.7797, "step": 9237 }, { "epoch": 1.3078502158986338, "grad_norm": 8.716781339261868, "learning_rate": 1.412414257052256e-06, "loss": 0.796, "step": 9238 }, { "epoch": 1.307991788773271, "grad_norm": 7.82656960634597, "learning_rate": 1.4118981687191573e-06, "loss": 0.718, "step": 9239 }, { "epoch": 1.3081333616479083, "grad_norm": 8.476274476673478, "learning_rate": 1.411382137586761e-06, "loss": 0.8428, "step": 9240 }, { "epoch": 1.3082749345225455, "grad_norm": 9.50505292930896, "learning_rate": 1.4108661636821928e-06, "loss": 0.8994, "step": 9241 }, { "epoch": 1.3084165073971827, "grad_norm": 9.784745344416386, "learning_rate": 1.4103502470325791e-06, "loss": 0.9548, "step": 9242 }, { "epoch": 1.30855808027182, "grad_norm": 8.200673461798779, "learning_rate": 1.4098343876650398e-06, "loss": 0.8582, "step": 9243 }, { "epoch": 1.3086996531464572, "grad_norm": 10.391808188626232, "learning_rate": 1.4093185856066945e-06, "loss": 0.8346, "step": 9244 }, { "epoch": 1.3088412260210944, "grad_norm": 9.172660209148708, "learning_rate": 1.4088028408846572e-06, "loss": 0.9069, "step": 9245 }, { "epoch": 1.3089827988957317, "grad_norm": 7.993328889278717, "learning_rate": 1.4082871535260418e-06, "loss": 0.814, "step": 9246 }, { "epoch": 1.3091243717703689, "grad_norm": 9.442057134551183, "learning_rate": 1.4077715235579559e-06, "loss": 0.8325, "step": 9247 }, { "epoch": 1.3092659446450061, "grad_norm": 8.445053675352801, "learning_rate": 1.4072559510075073e-06, "loss": 0.9037, "step": 9248 }, { "epoch": 1.3094075175196433, "grad_norm": 8.81832065848367, "learning_rate": 1.4067404359017977e-06, "loss": 0.943, "step": 9249 }, { "epoch": 1.3095490903942806, "grad_norm": 10.510738149867407, "learning_rate": 1.4062249782679294e-06, "loss": 0.8538, "step": 9250 }, { "epoch": 1.3096906632689178, "grad_norm": 8.752503827030726, "learning_rate": 1.4057095781329983e-06, "loss": 0.7909, "step": 9251 }, { "epoch": 1.3098322361435548, "grad_norm": 8.534186783471275, "learning_rate": 1.4051942355240977e-06, "loss": 0.7751, "step": 9252 }, { "epoch": 1.309973809018192, "grad_norm": 8.108437426984574, "learning_rate": 1.404678950468321e-06, "loss": 0.987, "step": 9253 }, { "epoch": 1.3101153818928293, "grad_norm": 8.634111981522631, "learning_rate": 1.4041637229927541e-06, "loss": 0.8892, "step": 9254 }, { "epoch": 1.3102569547674665, "grad_norm": 8.727993566914538, "learning_rate": 1.403648553124484e-06, "loss": 0.8401, "step": 9255 }, { "epoch": 1.3103985276421037, "grad_norm": 8.44678263611983, "learning_rate": 1.4031334408905911e-06, "loss": 0.8879, "step": 9256 }, { "epoch": 1.310540100516741, "grad_norm": 9.283142513411955, "learning_rate": 1.4026183863181563e-06, "loss": 0.7893, "step": 9257 }, { "epoch": 1.3106816733913782, "grad_norm": 9.587373981866154, "learning_rate": 1.4021033894342539e-06, "loss": 0.8784, "step": 9258 }, { "epoch": 1.3108232462660154, "grad_norm": 9.432749784323358, "learning_rate": 1.4015884502659574e-06, "loss": 0.8999, "step": 9259 }, { "epoch": 1.3109648191406527, "grad_norm": 8.30842157570196, "learning_rate": 1.4010735688403383e-06, "loss": 0.8738, "step": 9260 }, { "epoch": 1.31110639201529, "grad_norm": 8.397211938194781, "learning_rate": 1.4005587451844621e-06, "loss": 0.8936, "step": 9261 }, { "epoch": 1.3112479648899271, "grad_norm": 7.7306197480831464, "learning_rate": 1.4000439793253931e-06, "loss": 0.8172, "step": 9262 }, { "epoch": 1.3113895377645644, "grad_norm": 9.15933665043362, "learning_rate": 1.3995292712901908e-06, "loss": 0.9141, "step": 9263 }, { "epoch": 1.3115311106392016, "grad_norm": 8.8885081474566, "learning_rate": 1.3990146211059141e-06, "loss": 0.7951, "step": 9264 }, { "epoch": 1.3116726835138388, "grad_norm": 9.31486163820307, "learning_rate": 1.398500028799619e-06, "loss": 0.8557, "step": 9265 }, { "epoch": 1.311814256388476, "grad_norm": 8.377948754294803, "learning_rate": 1.397985494398355e-06, "loss": 0.7867, "step": 9266 }, { "epoch": 1.311955829263113, "grad_norm": 10.819570357558934, "learning_rate": 1.3974710179291729e-06, "loss": 0.8781, "step": 9267 }, { "epoch": 1.3120974021377503, "grad_norm": 8.796110889049826, "learning_rate": 1.3969565994191165e-06, "loss": 0.9116, "step": 9268 }, { "epoch": 1.3122389750123875, "grad_norm": 8.703035145266627, "learning_rate": 1.3964422388952298e-06, "loss": 0.8406, "step": 9269 }, { "epoch": 1.3123805478870247, "grad_norm": 8.755685757326761, "learning_rate": 1.3959279363845508e-06, "loss": 0.8009, "step": 9270 }, { "epoch": 1.312522120761662, "grad_norm": 6.90016202874608, "learning_rate": 1.3954136919141182e-06, "loss": 0.7934, "step": 9271 }, { "epoch": 1.3126636936362992, "grad_norm": 8.402349652049299, "learning_rate": 1.3948995055109641e-06, "loss": 0.8835, "step": 9272 }, { "epoch": 1.3128052665109364, "grad_norm": 7.534910966312984, "learning_rate": 1.3943853772021179e-06, "loss": 0.8382, "step": 9273 }, { "epoch": 1.3129468393855737, "grad_norm": 7.688200879905219, "learning_rate": 1.3938713070146093e-06, "loss": 0.7999, "step": 9274 }, { "epoch": 1.313088412260211, "grad_norm": 8.661543236501966, "learning_rate": 1.3933572949754598e-06, "loss": 0.7855, "step": 9275 }, { "epoch": 1.3132299851348481, "grad_norm": 8.975352551859775, "learning_rate": 1.3928433411116938e-06, "loss": 0.8989, "step": 9276 }, { "epoch": 1.3133715580094854, "grad_norm": 10.012081096520383, "learning_rate": 1.3923294454503263e-06, "loss": 0.8849, "step": 9277 }, { "epoch": 1.3135131308841226, "grad_norm": 7.591702157644926, "learning_rate": 1.3918156080183754e-06, "loss": 0.7301, "step": 9278 }, { "epoch": 1.3136547037587598, "grad_norm": 9.043198650560326, "learning_rate": 1.3913018288428503e-06, "loss": 0.8735, "step": 9279 }, { "epoch": 1.313796276633397, "grad_norm": 8.769004890940804, "learning_rate": 1.3907881079507623e-06, "loss": 0.9101, "step": 9280 }, { "epoch": 1.3139378495080343, "grad_norm": 9.23483889684512, "learning_rate": 1.3902744453691158e-06, "loss": 0.8311, "step": 9281 }, { "epoch": 1.3140794223826715, "grad_norm": 8.626885691736112, "learning_rate": 1.3897608411249153e-06, "loss": 0.8987, "step": 9282 }, { "epoch": 1.3142209952573087, "grad_norm": 9.029340179095271, "learning_rate": 1.3892472952451592e-06, "loss": 0.8304, "step": 9283 }, { "epoch": 1.314362568131946, "grad_norm": 8.961853991520506, "learning_rate": 1.3887338077568437e-06, "loss": 0.8492, "step": 9284 }, { "epoch": 1.3145041410065832, "grad_norm": 8.056085444886524, "learning_rate": 1.3882203786869644e-06, "loss": 0.8333, "step": 9285 }, { "epoch": 1.3146457138812204, "grad_norm": 8.868057759469824, "learning_rate": 1.3877070080625098e-06, "loss": 0.775, "step": 9286 }, { "epoch": 1.3147872867558577, "grad_norm": 9.305247920124453, "learning_rate": 1.3871936959104684e-06, "loss": 0.8863, "step": 9287 }, { "epoch": 1.3149288596304949, "grad_norm": 9.051850052883267, "learning_rate": 1.3866804422578256e-06, "loss": 0.8545, "step": 9288 }, { "epoch": 1.3150704325051321, "grad_norm": 9.551124338801893, "learning_rate": 1.386167247131561e-06, "loss": 0.9034, "step": 9289 }, { "epoch": 1.3152120053797693, "grad_norm": 9.050104332422602, "learning_rate": 1.3856541105586545e-06, "loss": 0.8904, "step": 9290 }, { "epoch": 1.3153535782544066, "grad_norm": 8.765972708539278, "learning_rate": 1.3851410325660796e-06, "loss": 0.7846, "step": 9291 }, { "epoch": 1.3154951511290438, "grad_norm": 8.277699987172568, "learning_rate": 1.3846280131808103e-06, "loss": 0.7554, "step": 9292 }, { "epoch": 1.3156367240036808, "grad_norm": 7.9453833172698225, "learning_rate": 1.3841150524298148e-06, "loss": 0.9372, "step": 9293 }, { "epoch": 1.315778296878318, "grad_norm": 8.36087074089167, "learning_rate": 1.3836021503400583e-06, "loss": 0.9744, "step": 9294 }, { "epoch": 1.3159198697529553, "grad_norm": 9.06703693344421, "learning_rate": 1.3830893069385046e-06, "loss": 0.8198, "step": 9295 }, { "epoch": 1.3160614426275925, "grad_norm": 8.645264192408638, "learning_rate": 1.3825765222521127e-06, "loss": 0.8414, "step": 9296 }, { "epoch": 1.3162030155022297, "grad_norm": 7.744042352387183, "learning_rate": 1.3820637963078406e-06, "loss": 0.873, "step": 9297 }, { "epoch": 1.316344588376867, "grad_norm": 6.813003101495428, "learning_rate": 1.3815511291326404e-06, "loss": 0.8152, "step": 9298 }, { "epoch": 1.3164861612515042, "grad_norm": 8.131118114202337, "learning_rate": 1.3810385207534641e-06, "loss": 0.8815, "step": 9299 }, { "epoch": 1.3166277341261414, "grad_norm": 9.566548207399165, "learning_rate": 1.3805259711972577e-06, "loss": 1.005, "step": 9300 }, { "epoch": 1.3167693070007787, "grad_norm": 8.443544384466806, "learning_rate": 1.380013480490967e-06, "loss": 0.805, "step": 9301 }, { "epoch": 1.3169108798754159, "grad_norm": 8.115352595784502, "learning_rate": 1.3795010486615318e-06, "loss": 0.7639, "step": 9302 }, { "epoch": 1.3170524527500531, "grad_norm": 10.272890536176355, "learning_rate": 1.3789886757358916e-06, "loss": 0.8829, "step": 9303 }, { "epoch": 1.3171940256246903, "grad_norm": 8.506388787700661, "learning_rate": 1.3784763617409814e-06, "loss": 0.8485, "step": 9304 }, { "epoch": 1.3173355984993276, "grad_norm": 7.919671408647411, "learning_rate": 1.3779641067037313e-06, "loss": 0.8867, "step": 9305 }, { "epoch": 1.3174771713739648, "grad_norm": 8.67736312824207, "learning_rate": 1.3774519106510725e-06, "loss": 0.7923, "step": 9306 }, { "epoch": 1.317618744248602, "grad_norm": 7.788319757951622, "learning_rate": 1.3769397736099288e-06, "loss": 0.7466, "step": 9307 }, { "epoch": 1.3177603171232393, "grad_norm": 8.391681966859215, "learning_rate": 1.3764276956072248e-06, "loss": 0.8844, "step": 9308 }, { "epoch": 1.3179018899978763, "grad_norm": 10.520590438808668, "learning_rate": 1.3759156766698783e-06, "loss": 0.9495, "step": 9309 }, { "epoch": 1.3180434628725135, "grad_norm": 8.418848743157003, "learning_rate": 1.3754037168248063e-06, "loss": 0.8368, "step": 9310 }, { "epoch": 1.3181850357471507, "grad_norm": 8.590169708742662, "learning_rate": 1.3748918160989232e-06, "loss": 0.8008, "step": 9311 }, { "epoch": 1.318326608621788, "grad_norm": 8.88860256475669, "learning_rate": 1.3743799745191377e-06, "loss": 0.941, "step": 9312 }, { "epoch": 1.3184681814964252, "grad_norm": 9.58796920324543, "learning_rate": 1.3738681921123586e-06, "loss": 0.8058, "step": 9313 }, { "epoch": 1.3186097543710624, "grad_norm": 7.091581172704204, "learning_rate": 1.373356468905489e-06, "loss": 0.9454, "step": 9314 }, { "epoch": 1.3187513272456997, "grad_norm": 10.159802563768926, "learning_rate": 1.3728448049254296e-06, "loss": 0.867, "step": 9315 }, { "epoch": 1.3188929001203369, "grad_norm": 7.629622262226212, "learning_rate": 1.3723332001990774e-06, "loss": 0.836, "step": 9316 }, { "epoch": 1.3190344729949741, "grad_norm": 7.764955672157837, "learning_rate": 1.3718216547533282e-06, "loss": 0.7896, "step": 9317 }, { "epoch": 1.3191760458696113, "grad_norm": 9.45814287663306, "learning_rate": 1.3713101686150742e-06, "loss": 0.7509, "step": 9318 }, { "epoch": 1.3193176187442486, "grad_norm": 8.319039203480349, "learning_rate": 1.370798741811202e-06, "loss": 0.8812, "step": 9319 }, { "epoch": 1.3194591916188858, "grad_norm": 10.209242384521879, "learning_rate": 1.370287374368599e-06, "loss": 0.875, "step": 9320 }, { "epoch": 1.319600764493523, "grad_norm": 10.295823178092697, "learning_rate": 1.3697760663141457e-06, "loss": 0.8216, "step": 9321 }, { "epoch": 1.3197423373681603, "grad_norm": 9.586081157873222, "learning_rate": 1.3692648176747224e-06, "loss": 0.8398, "step": 9322 }, { "epoch": 1.3198839102427975, "grad_norm": 8.83675299135241, "learning_rate": 1.368753628477204e-06, "loss": 0.8599, "step": 9323 }, { "epoch": 1.3200254831174347, "grad_norm": 8.757140815288347, "learning_rate": 1.3682424987484647e-06, "loss": 0.8198, "step": 9324 }, { "epoch": 1.320167055992072, "grad_norm": 10.14861604623057, "learning_rate": 1.367731428515373e-06, "loss": 0.7715, "step": 9325 }, { "epoch": 1.3203086288667092, "grad_norm": 8.831554833595758, "learning_rate": 1.3672204178047955e-06, "loss": 0.8206, "step": 9326 }, { "epoch": 1.3204502017413464, "grad_norm": 8.689837902738974, "learning_rate": 1.3667094666435964e-06, "loss": 0.9098, "step": 9327 }, { "epoch": 1.3205917746159836, "grad_norm": 8.565846033038344, "learning_rate": 1.3661985750586348e-06, "loss": 0.8205, "step": 9328 }, { "epoch": 1.3207333474906209, "grad_norm": 7.430500744135199, "learning_rate": 1.36568774307677e-06, "loss": 0.8441, "step": 9329 }, { "epoch": 1.320874920365258, "grad_norm": 7.5802799267778065, "learning_rate": 1.3651769707248535e-06, "loss": 0.7944, "step": 9330 }, { "epoch": 1.3210164932398953, "grad_norm": 8.428636204283594, "learning_rate": 1.3646662580297385e-06, "loss": 0.8435, "step": 9331 }, { "epoch": 1.3211580661145326, "grad_norm": 8.760816591427583, "learning_rate": 1.364155605018271e-06, "loss": 0.8393, "step": 9332 }, { "epoch": 1.3212996389891698, "grad_norm": 9.417919739477412, "learning_rate": 1.3636450117172962e-06, "loss": 0.871, "step": 9333 }, { "epoch": 1.321441211863807, "grad_norm": 7.725669323217194, "learning_rate": 1.3631344781536565e-06, "loss": 0.8422, "step": 9334 }, { "epoch": 1.321582784738444, "grad_norm": 7.7982210820595315, "learning_rate": 1.3626240043541901e-06, "loss": 0.7895, "step": 9335 }, { "epoch": 1.3217243576130813, "grad_norm": 8.32737697083274, "learning_rate": 1.3621135903457318e-06, "loss": 0.7094, "step": 9336 }, { "epoch": 1.3218659304877185, "grad_norm": 8.455085569340104, "learning_rate": 1.3616032361551124e-06, "loss": 0.9153, "step": 9337 }, { "epoch": 1.3220075033623557, "grad_norm": 7.769230272419457, "learning_rate": 1.3610929418091618e-06, "loss": 0.8159, "step": 9338 }, { "epoch": 1.322149076236993, "grad_norm": 8.80415414381122, "learning_rate": 1.3605827073347074e-06, "loss": 0.8228, "step": 9339 }, { "epoch": 1.3222906491116302, "grad_norm": 9.596571914224517, "learning_rate": 1.3600725327585695e-06, "loss": 0.8173, "step": 9340 }, { "epoch": 1.3224322219862674, "grad_norm": 7.124042379708367, "learning_rate": 1.3595624181075695e-06, "loss": 0.8146, "step": 9341 }, { "epoch": 1.3225737948609047, "grad_norm": 8.858739436528026, "learning_rate": 1.3590523634085218e-06, "loss": 0.7369, "step": 9342 }, { "epoch": 1.3227153677355419, "grad_norm": 9.64804251785146, "learning_rate": 1.3585423686882415e-06, "loss": 1.0431, "step": 9343 }, { "epoch": 1.322856940610179, "grad_norm": 9.271034038528533, "learning_rate": 1.3580324339735369e-06, "loss": 0.8496, "step": 9344 }, { "epoch": 1.3229985134848163, "grad_norm": 8.843501542446296, "learning_rate": 1.3575225592912166e-06, "loss": 0.9133, "step": 9345 }, { "epoch": 1.3231400863594536, "grad_norm": 8.93344728577467, "learning_rate": 1.3570127446680838e-06, "loss": 0.7715, "step": 9346 }, { "epoch": 1.3232816592340908, "grad_norm": 8.224734836002837, "learning_rate": 1.3565029901309378e-06, "loss": 0.8619, "step": 9347 }, { "epoch": 1.323423232108728, "grad_norm": 12.422497159144184, "learning_rate": 1.3559932957065777e-06, "loss": 0.8912, "step": 9348 }, { "epoch": 1.3235648049833653, "grad_norm": 8.37648202109959, "learning_rate": 1.3554836614217963e-06, "loss": 0.764, "step": 9349 }, { "epoch": 1.3237063778580023, "grad_norm": 7.982956373942033, "learning_rate": 1.354974087303386e-06, "loss": 0.8157, "step": 9350 }, { "epoch": 1.3238479507326395, "grad_norm": 9.39503718102342, "learning_rate": 1.3544645733781335e-06, "loss": 0.9476, "step": 9351 }, { "epoch": 1.3239895236072767, "grad_norm": 7.497863719598137, "learning_rate": 1.3539551196728252e-06, "loss": 0.8711, "step": 9352 }, { "epoch": 1.324131096481914, "grad_norm": 7.578795476138713, "learning_rate": 1.3534457262142408e-06, "loss": 0.7747, "step": 9353 }, { "epoch": 1.3242726693565512, "grad_norm": 10.88045145837195, "learning_rate": 1.3529363930291606e-06, "loss": 0.9319, "step": 9354 }, { "epoch": 1.3244142422311884, "grad_norm": 8.80681280499515, "learning_rate": 1.3524271201443578e-06, "loss": 0.8882, "step": 9355 }, { "epoch": 1.3245558151058257, "grad_norm": 7.296362652649906, "learning_rate": 1.3519179075866067e-06, "loss": 0.8109, "step": 9356 }, { "epoch": 1.3246973879804629, "grad_norm": 9.040349158236756, "learning_rate": 1.3514087553826753e-06, "loss": 0.8832, "step": 9357 }, { "epoch": 1.3248389608551001, "grad_norm": 9.59799766321426, "learning_rate": 1.350899663559328e-06, "loss": 0.8522, "step": 9358 }, { "epoch": 1.3249805337297373, "grad_norm": 8.348208718410936, "learning_rate": 1.3503906321433298e-06, "loss": 0.8785, "step": 9359 }, { "epoch": 1.3251221066043746, "grad_norm": 10.863961065118655, "learning_rate": 1.3498816611614373e-06, "loss": 1.0423, "step": 9360 }, { "epoch": 1.3252636794790118, "grad_norm": 8.402837237803642, "learning_rate": 1.3493727506404092e-06, "loss": 0.8219, "step": 9361 }, { "epoch": 1.325405252353649, "grad_norm": 9.099968845188341, "learning_rate": 1.348863900606998e-06, "loss": 0.9283, "step": 9362 }, { "epoch": 1.3255468252282863, "grad_norm": 10.242932371846392, "learning_rate": 1.3483551110879525e-06, "loss": 0.8589, "step": 9363 }, { "epoch": 1.3256883981029235, "grad_norm": 8.870715759367792, "learning_rate": 1.347846382110021e-06, "loss": 0.9346, "step": 9364 }, { "epoch": 1.3258299709775607, "grad_norm": 9.076042212127934, "learning_rate": 1.3473377136999452e-06, "loss": 0.7983, "step": 9365 }, { "epoch": 1.325971543852198, "grad_norm": 8.64662312649853, "learning_rate": 1.3468291058844673e-06, "loss": 0.8602, "step": 9366 }, { "epoch": 1.3261131167268352, "grad_norm": 9.942822452541025, "learning_rate": 1.3463205586903233e-06, "loss": 0.8775, "step": 9367 }, { "epoch": 1.3262546896014724, "grad_norm": 11.004255338689758, "learning_rate": 1.3458120721442464e-06, "loss": 0.8915, "step": 9368 }, { "epoch": 1.3263962624761096, "grad_norm": 9.81172245127926, "learning_rate": 1.3453036462729697e-06, "loss": 0.9112, "step": 9369 }, { "epoch": 1.3265378353507469, "grad_norm": 8.107871914078173, "learning_rate": 1.3447952811032177e-06, "loss": 0.7975, "step": 9370 }, { "epoch": 1.326679408225384, "grad_norm": 8.039419805743961, "learning_rate": 1.3442869766617178e-06, "loss": 0.8063, "step": 9371 }, { "epoch": 1.3268209811000213, "grad_norm": 9.494152879736486, "learning_rate": 1.3437787329751887e-06, "loss": 0.9347, "step": 9372 }, { "epoch": 1.3269625539746586, "grad_norm": 7.682518810642699, "learning_rate": 1.3432705500703501e-06, "loss": 0.8578, "step": 9373 }, { "epoch": 1.3271041268492958, "grad_norm": 9.308894950933732, "learning_rate": 1.342762427973916e-06, "loss": 0.8317, "step": 9374 }, { "epoch": 1.327245699723933, "grad_norm": 8.662099466242923, "learning_rate": 1.3422543667125988e-06, "loss": 0.8742, "step": 9375 }, { "epoch": 1.32738727259857, "grad_norm": 7.345173929023561, "learning_rate": 1.341746366313105e-06, "loss": 0.8414, "step": 9376 }, { "epoch": 1.3275288454732073, "grad_norm": 8.484245959682836, "learning_rate": 1.3412384268021421e-06, "loss": 0.8196, "step": 9377 }, { "epoch": 1.3276704183478445, "grad_norm": 9.440932300242064, "learning_rate": 1.3407305482064115e-06, "loss": 0.8245, "step": 9378 }, { "epoch": 1.3278119912224817, "grad_norm": 10.709284481648936, "learning_rate": 1.3402227305526106e-06, "loss": 0.9767, "step": 9379 }, { "epoch": 1.327953564097119, "grad_norm": 10.106378078651735, "learning_rate": 1.3397149738674363e-06, "loss": 0.9002, "step": 9380 }, { "epoch": 1.3280951369717562, "grad_norm": 9.633110295477975, "learning_rate": 1.3392072781775806e-06, "loss": 0.917, "step": 9381 }, { "epoch": 1.3282367098463934, "grad_norm": 9.454168088757903, "learning_rate": 1.3386996435097333e-06, "loss": 0.9671, "step": 9382 }, { "epoch": 1.3283782827210306, "grad_norm": 9.358127313408641, "learning_rate": 1.3381920698905788e-06, "loss": 0.7945, "step": 9383 }, { "epoch": 1.3285198555956679, "grad_norm": 8.944485582003034, "learning_rate": 1.3376845573468012e-06, "loss": 0.7975, "step": 9384 }, { "epoch": 1.328661428470305, "grad_norm": 7.861721894253073, "learning_rate": 1.3371771059050803e-06, "loss": 0.8018, "step": 9385 }, { "epoch": 1.3288030013449423, "grad_norm": 9.010383550035481, "learning_rate": 1.3366697155920913e-06, "loss": 0.9149, "step": 9386 }, { "epoch": 1.3289445742195796, "grad_norm": 8.224431036195794, "learning_rate": 1.3361623864345086e-06, "loss": 0.8137, "step": 9387 }, { "epoch": 1.3290861470942168, "grad_norm": 7.696230265307221, "learning_rate": 1.3356551184590017e-06, "loss": 0.7919, "step": 9388 }, { "epoch": 1.329227719968854, "grad_norm": 8.627277460580245, "learning_rate": 1.3351479116922372e-06, "loss": 0.8519, "step": 9389 }, { "epoch": 1.3293692928434913, "grad_norm": 8.525577881319789, "learning_rate": 1.3346407661608771e-06, "loss": 0.816, "step": 9390 }, { "epoch": 1.3295108657181285, "grad_norm": 7.854712618227443, "learning_rate": 1.3341336818915832e-06, "loss": 0.8765, "step": 9391 }, { "epoch": 1.3296524385927655, "grad_norm": 9.659141724804709, "learning_rate": 1.3336266589110131e-06, "loss": 0.9358, "step": 9392 }, { "epoch": 1.3297940114674027, "grad_norm": 10.880116278644103, "learning_rate": 1.333119697245819e-06, "loss": 1.0242, "step": 9393 }, { "epoch": 1.32993558434204, "grad_norm": 8.405873343385508, "learning_rate": 1.3326127969226535e-06, "loss": 0.7772, "step": 9394 }, { "epoch": 1.3300771572166772, "grad_norm": 7.837485923936971, "learning_rate": 1.3321059579681617e-06, "loss": 0.7177, "step": 9395 }, { "epoch": 1.3302187300913144, "grad_norm": 11.258239102669258, "learning_rate": 1.3315991804089897e-06, "loss": 0.973, "step": 9396 }, { "epoch": 1.3303603029659516, "grad_norm": 7.962379931348577, "learning_rate": 1.3310924642717767e-06, "loss": 0.8829, "step": 9397 }, { "epoch": 1.3305018758405889, "grad_norm": 8.47260897505829, "learning_rate": 1.3305858095831626e-06, "loss": 0.8523, "step": 9398 }, { "epoch": 1.330643448715226, "grad_norm": 8.460669684992713, "learning_rate": 1.33007921636978e-06, "loss": 0.8254, "step": 9399 }, { "epoch": 1.3307850215898633, "grad_norm": 8.692267776823487, "learning_rate": 1.3295726846582602e-06, "loss": 0.935, "step": 9400 }, { "epoch": 1.3309265944645006, "grad_norm": 8.272188807444836, "learning_rate": 1.3290662144752322e-06, "loss": 0.7689, "step": 9401 }, { "epoch": 1.3310681673391378, "grad_norm": 9.127100454553984, "learning_rate": 1.3285598058473195e-06, "loss": 0.8884, "step": 9402 }, { "epoch": 1.331209740213775, "grad_norm": 8.279005905405448, "learning_rate": 1.3280534588011451e-06, "loss": 0.7953, "step": 9403 }, { "epoch": 1.3313513130884123, "grad_norm": 10.618274310292293, "learning_rate": 1.3275471733633258e-06, "loss": 0.9412, "step": 9404 }, { "epoch": 1.3314928859630495, "grad_norm": 9.357243927354657, "learning_rate": 1.3270409495604783e-06, "loss": 0.8286, "step": 9405 }, { "epoch": 1.3316344588376867, "grad_norm": 10.429394161371128, "learning_rate": 1.3265347874192125e-06, "loss": 0.845, "step": 9406 }, { "epoch": 1.331776031712324, "grad_norm": 8.507825054513235, "learning_rate": 1.3260286869661378e-06, "loss": 0.8428, "step": 9407 }, { "epoch": 1.3319176045869612, "grad_norm": 7.707046098883313, "learning_rate": 1.325522648227861e-06, "loss": 0.7705, "step": 9408 }, { "epoch": 1.3320591774615984, "grad_norm": 9.0572435590897, "learning_rate": 1.3250166712309825e-06, "loss": 0.8714, "step": 9409 }, { "epoch": 1.3322007503362356, "grad_norm": 10.669948867632433, "learning_rate": 1.3245107560021015e-06, "loss": 0.9061, "step": 9410 }, { "epoch": 1.3323423232108729, "grad_norm": 7.920973747195465, "learning_rate": 1.324004902567813e-06, "loss": 0.7323, "step": 9411 }, { "epoch": 1.33248389608551, "grad_norm": 10.065923073022748, "learning_rate": 1.3234991109547104e-06, "loss": 0.9816, "step": 9412 }, { "epoch": 1.3326254689601473, "grad_norm": 8.108149029740176, "learning_rate": 1.3229933811893814e-06, "loss": 0.9112, "step": 9413 }, { "epoch": 1.3327670418347846, "grad_norm": 8.233356680688948, "learning_rate": 1.3224877132984131e-06, "loss": 0.8386, "step": 9414 }, { "epoch": 1.3329086147094218, "grad_norm": 8.6655650783686, "learning_rate": 1.3219821073083882e-06, "loss": 0.9431, "step": 9415 }, { "epoch": 1.333050187584059, "grad_norm": 9.05251714781175, "learning_rate": 1.3214765632458852e-06, "loss": 0.8836, "step": 9416 }, { "epoch": 1.3331917604586963, "grad_norm": 7.345793744040223, "learning_rate": 1.320971081137481e-06, "loss": 0.7617, "step": 9417 }, { "epoch": 1.3333333333333333, "grad_norm": 7.874774505777655, "learning_rate": 1.3204656610097472e-06, "loss": 0.912, "step": 9418 }, { "epoch": 1.3334749062079705, "grad_norm": 9.23851702592894, "learning_rate": 1.3199603028892548e-06, "loss": 0.8405, "step": 9419 }, { "epoch": 1.3336164790826077, "grad_norm": 7.644610342670172, "learning_rate": 1.3194550068025697e-06, "loss": 0.9222, "step": 9420 }, { "epoch": 1.333758051957245, "grad_norm": 8.232099591482237, "learning_rate": 1.3189497727762535e-06, "loss": 0.8796, "step": 9421 }, { "epoch": 1.3338996248318822, "grad_norm": 8.506891486247433, "learning_rate": 1.318444600836868e-06, "loss": 0.8684, "step": 9422 }, { "epoch": 1.3340411977065194, "grad_norm": 8.32843371994633, "learning_rate": 1.3179394910109683e-06, "loss": 0.7851, "step": 9423 }, { "epoch": 1.3341827705811566, "grad_norm": 8.985380378530172, "learning_rate": 1.3174344433251086e-06, "loss": 0.9431, "step": 9424 }, { "epoch": 1.3343243434557939, "grad_norm": 8.716787466035141, "learning_rate": 1.3169294578058378e-06, "loss": 0.8383, "step": 9425 }, { "epoch": 1.334465916330431, "grad_norm": 9.310619599634686, "learning_rate": 1.3164245344797045e-06, "loss": 0.9142, "step": 9426 }, { "epoch": 1.3346074892050683, "grad_norm": 8.790938167913042, "learning_rate": 1.3159196733732494e-06, "loss": 0.8203, "step": 9427 }, { "epoch": 1.3347490620797056, "grad_norm": 10.439722977978564, "learning_rate": 1.3154148745130151e-06, "loss": 0.8839, "step": 9428 }, { "epoch": 1.3348906349543428, "grad_norm": 9.047165096816931, "learning_rate": 1.314910137925537e-06, "loss": 0.9524, "step": 9429 }, { "epoch": 1.33503220782898, "grad_norm": 10.214344066411277, "learning_rate": 1.3144054636373505e-06, "loss": 0.8286, "step": 9430 }, { "epoch": 1.3351737807036173, "grad_norm": 8.541847437984865, "learning_rate": 1.313900851674984e-06, "loss": 0.7413, "step": 9431 }, { "epoch": 1.3353153535782545, "grad_norm": 7.799998298057957, "learning_rate": 1.3133963020649648e-06, "loss": 0.8241, "step": 9432 }, { "epoch": 1.3354569264528915, "grad_norm": 7.894929161406266, "learning_rate": 1.3128918148338183e-06, "loss": 0.8074, "step": 9433 }, { "epoch": 1.3355984993275287, "grad_norm": 8.14076651598444, "learning_rate": 1.312387390008063e-06, "loss": 0.793, "step": 9434 }, { "epoch": 1.335740072202166, "grad_norm": 9.199609781365432, "learning_rate": 1.3118830276142169e-06, "loss": 0.8474, "step": 9435 }, { "epoch": 1.3358816450768032, "grad_norm": 6.746831362125119, "learning_rate": 1.3113787276787951e-06, "loss": 0.7793, "step": 9436 }, { "epoch": 1.3360232179514404, "grad_norm": 9.14252659506681, "learning_rate": 1.3108744902283065e-06, "loss": 0.865, "step": 9437 }, { "epoch": 1.3361647908260776, "grad_norm": 8.399753258123912, "learning_rate": 1.31037031528926e-06, "loss": 0.8704, "step": 9438 }, { "epoch": 1.3363063637007149, "grad_norm": 9.06227185192345, "learning_rate": 1.309866202888158e-06, "loss": 0.8574, "step": 9439 }, { "epoch": 1.336447936575352, "grad_norm": 9.17080396283265, "learning_rate": 1.3093621530515038e-06, "loss": 0.9615, "step": 9440 }, { "epoch": 1.3365895094499893, "grad_norm": 8.125682626072004, "learning_rate": 1.308858165805793e-06, "loss": 0.8154, "step": 9441 }, { "epoch": 1.3367310823246266, "grad_norm": 9.230832009224278, "learning_rate": 1.3083542411775196e-06, "loss": 0.9343, "step": 9442 }, { "epoch": 1.3368726551992638, "grad_norm": 9.426809154478569, "learning_rate": 1.307850379193176e-06, "loss": 0.8125, "step": 9443 }, { "epoch": 1.337014228073901, "grad_norm": 8.174594638118675, "learning_rate": 1.3073465798792482e-06, "loss": 0.8787, "step": 9444 }, { "epoch": 1.3371558009485383, "grad_norm": 9.209075713690723, "learning_rate": 1.3068428432622221e-06, "loss": 0.9482, "step": 9445 }, { "epoch": 1.3372973738231755, "grad_norm": 8.788368896242654, "learning_rate": 1.3063391693685773e-06, "loss": 0.9382, "step": 9446 }, { "epoch": 1.3374389466978127, "grad_norm": 7.515179087522086, "learning_rate": 1.3058355582247933e-06, "loss": 0.8796, "step": 9447 }, { "epoch": 1.33758051957245, "grad_norm": 8.174453707766052, "learning_rate": 1.3053320098573428e-06, "loss": 0.8512, "step": 9448 }, { "epoch": 1.3377220924470872, "grad_norm": 8.07873839774015, "learning_rate": 1.3048285242926983e-06, "loss": 0.8022, "step": 9449 }, { "epoch": 1.3378636653217244, "grad_norm": 8.30705140024477, "learning_rate": 1.3043251015573266e-06, "loss": 0.8678, "step": 9450 }, { "epoch": 1.3380052381963616, "grad_norm": 9.801396593501636, "learning_rate": 1.3038217416776936e-06, "loss": 0.9305, "step": 9451 }, { "epoch": 1.3381468110709989, "grad_norm": 8.633065253652848, "learning_rate": 1.3033184446802596e-06, "loss": 0.7564, "step": 9452 }, { "epoch": 1.338288383945636, "grad_norm": 9.254560428940435, "learning_rate": 1.3028152105914818e-06, "loss": 0.8722, "step": 9453 }, { "epoch": 1.3384299568202733, "grad_norm": 9.532944825621296, "learning_rate": 1.3023120394378167e-06, "loss": 0.7961, "step": 9454 }, { "epoch": 1.3385715296949106, "grad_norm": 9.826288533830953, "learning_rate": 1.3018089312457137e-06, "loss": 0.8361, "step": 9455 }, { "epoch": 1.3387131025695478, "grad_norm": 7.558510224425455, "learning_rate": 1.3013058860416229e-06, "loss": 0.7195, "step": 9456 }, { "epoch": 1.338854675444185, "grad_norm": 9.612177246389894, "learning_rate": 1.3008029038519866e-06, "loss": 0.7902, "step": 9457 }, { "epoch": 1.3389962483188222, "grad_norm": 7.367975542754794, "learning_rate": 1.3002999847032476e-06, "loss": 0.7907, "step": 9458 }, { "epoch": 1.3391378211934593, "grad_norm": 6.806755513736284, "learning_rate": 1.2997971286218448e-06, "loss": 0.7539, "step": 9459 }, { "epoch": 1.3392793940680965, "grad_norm": 9.251920346086646, "learning_rate": 1.2992943356342111e-06, "loss": 0.8367, "step": 9460 }, { "epoch": 1.3394209669427337, "grad_norm": 8.244446243372764, "learning_rate": 1.2987916057667799e-06, "loss": 0.8616, "step": 9461 }, { "epoch": 1.339562539817371, "grad_norm": 8.231775209996332, "learning_rate": 1.2982889390459781e-06, "loss": 0.8451, "step": 9462 }, { "epoch": 1.3397041126920082, "grad_norm": 10.133411822098715, "learning_rate": 1.297786335498231e-06, "loss": 0.8732, "step": 9463 }, { "epoch": 1.3398456855666454, "grad_norm": 6.397580118456533, "learning_rate": 1.297283795149959e-06, "loss": 0.7391, "step": 9464 }, { "epoch": 1.3399872584412826, "grad_norm": 8.252579719183837, "learning_rate": 1.2967813180275809e-06, "loss": 0.7478, "step": 9465 }, { "epoch": 1.3401288313159199, "grad_norm": 8.128053062205687, "learning_rate": 1.2962789041575127e-06, "loss": 0.8832, "step": 9466 }, { "epoch": 1.340270404190557, "grad_norm": 8.872428843969875, "learning_rate": 1.2957765535661644e-06, "loss": 0.8028, "step": 9467 }, { "epoch": 1.3404119770651943, "grad_norm": 9.885213857217742, "learning_rate": 1.295274266279945e-06, "loss": 0.863, "step": 9468 }, { "epoch": 1.3405535499398316, "grad_norm": 10.03028213727899, "learning_rate": 1.2947720423252586e-06, "loss": 0.8582, "step": 9469 }, { "epoch": 1.3406951228144688, "grad_norm": 8.941234315324868, "learning_rate": 1.2942698817285082e-06, "loss": 0.8566, "step": 9470 }, { "epoch": 1.340836695689106, "grad_norm": 9.633114651460533, "learning_rate": 1.29376778451609e-06, "loss": 0.8141, "step": 9471 }, { "epoch": 1.3409782685637432, "grad_norm": 8.60444868486123, "learning_rate": 1.2932657507144014e-06, "loss": 0.8453, "step": 9472 }, { "epoch": 1.3411198414383805, "grad_norm": 8.05675781164774, "learning_rate": 1.2927637803498323e-06, "loss": 0.8196, "step": 9473 }, { "epoch": 1.3412614143130177, "grad_norm": 8.68534725604036, "learning_rate": 1.2922618734487697e-06, "loss": 0.7611, "step": 9474 }, { "epoch": 1.3414029871876547, "grad_norm": 10.06402159375905, "learning_rate": 1.2917600300376012e-06, "loss": 0.8308, "step": 9475 }, { "epoch": 1.341544560062292, "grad_norm": 10.612417871811187, "learning_rate": 1.2912582501427062e-06, "loss": 0.9095, "step": 9476 }, { "epoch": 1.3416861329369292, "grad_norm": 9.048920658724477, "learning_rate": 1.2907565337904642e-06, "loss": 0.8388, "step": 9477 }, { "epoch": 1.3418277058115664, "grad_norm": 7.8902232454962595, "learning_rate": 1.290254881007249e-06, "loss": 0.8007, "step": 9478 }, { "epoch": 1.3419692786862036, "grad_norm": 9.912447846745684, "learning_rate": 1.2897532918194336e-06, "loss": 0.8548, "step": 9479 }, { "epoch": 1.3421108515608409, "grad_norm": 8.289555368682281, "learning_rate": 1.2892517662533844e-06, "loss": 0.8235, "step": 9480 }, { "epoch": 1.342252424435478, "grad_norm": 10.155425522424155, "learning_rate": 1.2887503043354668e-06, "loss": 0.8864, "step": 9481 }, { "epoch": 1.3423939973101153, "grad_norm": 8.508662578054643, "learning_rate": 1.2882489060920436e-06, "loss": 0.8152, "step": 9482 }, { "epoch": 1.3425355701847526, "grad_norm": 9.875285470433683, "learning_rate": 1.287747571549472e-06, "loss": 0.8882, "step": 9483 }, { "epoch": 1.3426771430593898, "grad_norm": 8.791444121763714, "learning_rate": 1.2872463007341065e-06, "loss": 0.7847, "step": 9484 }, { "epoch": 1.342818715934027, "grad_norm": 8.879028695399652, "learning_rate": 1.286745093672298e-06, "loss": 0.9423, "step": 9485 }, { "epoch": 1.3429602888086642, "grad_norm": 8.453226536618697, "learning_rate": 1.2862439503903958e-06, "loss": 0.8392, "step": 9486 }, { "epoch": 1.3431018616833015, "grad_norm": 9.06136551694822, "learning_rate": 1.2857428709147434e-06, "loss": 0.7427, "step": 9487 }, { "epoch": 1.3432434345579387, "grad_norm": 7.725131836787768, "learning_rate": 1.285241855271683e-06, "loss": 0.8009, "step": 9488 }, { "epoch": 1.343385007432576, "grad_norm": 9.576134230562865, "learning_rate": 1.2847409034875536e-06, "loss": 0.9076, "step": 9489 }, { "epoch": 1.3435265803072132, "grad_norm": 7.811389569521926, "learning_rate": 1.2842400155886876e-06, "loss": 0.8454, "step": 9490 }, { "epoch": 1.3436681531818504, "grad_norm": 7.7971352818529205, "learning_rate": 1.2837391916014182e-06, "loss": 0.8519, "step": 9491 }, { "epoch": 1.3438097260564876, "grad_norm": 7.159046676080743, "learning_rate": 1.2832384315520717e-06, "loss": 0.8941, "step": 9492 }, { "epoch": 1.3439512989311249, "grad_norm": 9.039591702043209, "learning_rate": 1.2827377354669752e-06, "loss": 0.9143, "step": 9493 }, { "epoch": 1.344092871805762, "grad_norm": 8.707767255231909, "learning_rate": 1.2822371033724478e-06, "loss": 0.8834, "step": 9494 }, { "epoch": 1.3442344446803993, "grad_norm": 8.858392785471821, "learning_rate": 1.2817365352948069e-06, "loss": 0.8315, "step": 9495 }, { "epoch": 1.3443760175550366, "grad_norm": 9.13259070317181, "learning_rate": 1.2812360312603689e-06, "loss": 0.7819, "step": 9496 }, { "epoch": 1.3445175904296738, "grad_norm": 8.947778300295022, "learning_rate": 1.2807355912954433e-06, "loss": 0.8554, "step": 9497 }, { "epoch": 1.344659163304311, "grad_norm": 8.288256175438082, "learning_rate": 1.2802352154263392e-06, "loss": 0.8331, "step": 9498 }, { "epoch": 1.3448007361789482, "grad_norm": 9.50674118990087, "learning_rate": 1.2797349036793595e-06, "loss": 0.8264, "step": 9499 }, { "epoch": 1.3449423090535852, "grad_norm": 8.392363355772467, "learning_rate": 1.2792346560808068e-06, "loss": 0.8303, "step": 9500 }, { "epoch": 1.3450838819282225, "grad_norm": 9.250107223301692, "learning_rate": 1.2787344726569772e-06, "loss": 0.7269, "step": 9501 }, { "epoch": 1.3452254548028597, "grad_norm": 9.146320257643021, "learning_rate": 1.2782343534341667e-06, "loss": 0.8124, "step": 9502 }, { "epoch": 1.345367027677497, "grad_norm": 9.501562291229853, "learning_rate": 1.2777342984386648e-06, "loss": 0.837, "step": 9503 }, { "epoch": 1.3455086005521342, "grad_norm": 9.65520385210904, "learning_rate": 1.2772343076967596e-06, "loss": 0.865, "step": 9504 }, { "epoch": 1.3456501734267714, "grad_norm": 7.70092519180744, "learning_rate": 1.2767343812347356e-06, "loss": 0.9128, "step": 9505 }, { "epoch": 1.3457917463014086, "grad_norm": 9.641366494225627, "learning_rate": 1.2762345190788722e-06, "loss": 0.9294, "step": 9506 }, { "epoch": 1.3459333191760459, "grad_norm": 8.952134322235109, "learning_rate": 1.2757347212554484e-06, "loss": 0.8286, "step": 9507 }, { "epoch": 1.346074892050683, "grad_norm": 7.5586048530280205, "learning_rate": 1.2752349877907364e-06, "loss": 0.8157, "step": 9508 }, { "epoch": 1.3462164649253203, "grad_norm": 7.784108077487858, "learning_rate": 1.274735318711009e-06, "loss": 0.8313, "step": 9509 }, { "epoch": 1.3463580377999576, "grad_norm": 7.176726119661447, "learning_rate": 1.274235714042531e-06, "loss": 0.773, "step": 9510 }, { "epoch": 1.3464996106745948, "grad_norm": 7.872411150995752, "learning_rate": 1.2737361738115681e-06, "loss": 0.7981, "step": 9511 }, { "epoch": 1.346641183549232, "grad_norm": 10.537024479790075, "learning_rate": 1.2732366980443808e-06, "loss": 0.9576, "step": 9512 }, { "epoch": 1.3467827564238692, "grad_norm": 6.936120248317578, "learning_rate": 1.2727372867672247e-06, "loss": 0.7735, "step": 9513 }, { "epoch": 1.3469243292985065, "grad_norm": 10.340538794168848, "learning_rate": 1.2722379400063553e-06, "loss": 0.8698, "step": 9514 }, { "epoch": 1.3470659021731437, "grad_norm": 8.486649294483577, "learning_rate": 1.271738657788022e-06, "loss": 0.8917, "step": 9515 }, { "epoch": 1.3472074750477807, "grad_norm": 9.309245725043372, "learning_rate": 1.2712394401384703e-06, "loss": 0.9239, "step": 9516 }, { "epoch": 1.347349047922418, "grad_norm": 9.717693700761329, "learning_rate": 1.2707402870839464e-06, "loss": 0.9006, "step": 9517 }, { "epoch": 1.3474906207970552, "grad_norm": 9.319148001304349, "learning_rate": 1.270241198650688e-06, "loss": 0.9996, "step": 9518 }, { "epoch": 1.3476321936716924, "grad_norm": 10.117684064189488, "learning_rate": 1.269742174864934e-06, "loss": 0.8739, "step": 9519 }, { "epoch": 1.3477737665463296, "grad_norm": 8.215457619419894, "learning_rate": 1.2692432157529153e-06, "loss": 0.8471, "step": 9520 }, { "epoch": 1.3479153394209669, "grad_norm": 7.5327215379509775, "learning_rate": 1.268744321340864e-06, "loss": 0.7793, "step": 9521 }, { "epoch": 1.348056912295604, "grad_norm": 8.060571720869753, "learning_rate": 1.2682454916550046e-06, "loss": 0.8317, "step": 9522 }, { "epoch": 1.3481984851702413, "grad_norm": 9.505533714821766, "learning_rate": 1.2677467267215626e-06, "loss": 0.8374, "step": 9523 }, { "epoch": 1.3483400580448786, "grad_norm": 8.327851997753346, "learning_rate": 1.2672480265667553e-06, "loss": 0.7851, "step": 9524 }, { "epoch": 1.3484816309195158, "grad_norm": 9.46088427679084, "learning_rate": 1.2667493912168008e-06, "loss": 0.9667, "step": 9525 }, { "epoch": 1.348623203794153, "grad_norm": 8.345629305415978, "learning_rate": 1.2662508206979113e-06, "loss": 0.7775, "step": 9526 }, { "epoch": 1.3487647766687902, "grad_norm": 10.366060013105871, "learning_rate": 1.2657523150362955e-06, "loss": 0.9099, "step": 9527 }, { "epoch": 1.3489063495434275, "grad_norm": 8.106133255893631, "learning_rate": 1.265253874258161e-06, "loss": 0.8626, "step": 9528 }, { "epoch": 1.3490479224180647, "grad_norm": 8.980969548210199, "learning_rate": 1.2647554983897087e-06, "loss": 0.8305, "step": 9529 }, { "epoch": 1.349189495292702, "grad_norm": 9.472785517803546, "learning_rate": 1.2642571874571396e-06, "loss": 0.9074, "step": 9530 }, { "epoch": 1.3493310681673392, "grad_norm": 8.848803905975442, "learning_rate": 1.2637589414866483e-06, "loss": 0.8587, "step": 9531 }, { "epoch": 1.3494726410419764, "grad_norm": 8.327228089894788, "learning_rate": 1.2632607605044272e-06, "loss": 0.8163, "step": 9532 }, { "epoch": 1.3496142139166136, "grad_norm": 6.7509615177984035, "learning_rate": 1.262762644536667e-06, "loss": 0.739, "step": 9533 }, { "epoch": 1.3497557867912509, "grad_norm": 9.94509156274623, "learning_rate": 1.262264593609551e-06, "loss": 0.7557, "step": 9534 }, { "epoch": 1.349897359665888, "grad_norm": 13.26189651482479, "learning_rate": 1.2617666077492636e-06, "loss": 0.7054, "step": 9535 }, { "epoch": 1.3500389325405253, "grad_norm": 8.61635875434593, "learning_rate": 1.2612686869819818e-06, "loss": 0.8862, "step": 9536 }, { "epoch": 1.3501805054151625, "grad_norm": 7.622468872206936, "learning_rate": 1.2607708313338818e-06, "loss": 0.7671, "step": 9537 }, { "epoch": 1.3503220782897998, "grad_norm": 6.955601351949113, "learning_rate": 1.2602730408311342e-06, "loss": 0.9185, "step": 9538 }, { "epoch": 1.350463651164437, "grad_norm": 7.521058844428584, "learning_rate": 1.2597753154999088e-06, "loss": 0.8429, "step": 9539 }, { "epoch": 1.3506052240390742, "grad_norm": 8.746120791885394, "learning_rate": 1.259277655366371e-06, "loss": 0.8581, "step": 9540 }, { "epoch": 1.3507467969137115, "grad_norm": 9.689854095903733, "learning_rate": 1.2587800604566808e-06, "loss": 0.879, "step": 9541 }, { "epoch": 1.3508883697883485, "grad_norm": 9.100021244904587, "learning_rate": 1.2582825307969981e-06, "loss": 0.9235, "step": 9542 }, { "epoch": 1.3510299426629857, "grad_norm": 9.742050059801365, "learning_rate": 1.257785066413476e-06, "loss": 0.8249, "step": 9543 }, { "epoch": 1.351171515537623, "grad_norm": 8.008638962686979, "learning_rate": 1.2572876673322676e-06, "loss": 0.8552, "step": 9544 }, { "epoch": 1.3513130884122602, "grad_norm": 7.860404157658086, "learning_rate": 1.2567903335795191e-06, "loss": 0.8702, "step": 9545 }, { "epoch": 1.3514546612868974, "grad_norm": 7.900414767422939, "learning_rate": 1.2562930651813772e-06, "loss": 0.863, "step": 9546 }, { "epoch": 1.3515962341615346, "grad_norm": 6.8262967732672415, "learning_rate": 1.255795862163981e-06, "loss": 0.776, "step": 9547 }, { "epoch": 1.3517378070361719, "grad_norm": 9.083357178076382, "learning_rate": 1.2552987245534675e-06, "loss": 0.828, "step": 9548 }, { "epoch": 1.351879379910809, "grad_norm": 9.625160909521444, "learning_rate": 1.2548016523759733e-06, "loss": 0.9784, "step": 9549 }, { "epoch": 1.3520209527854463, "grad_norm": 7.023330500957761, "learning_rate": 1.2543046456576267e-06, "loss": 0.8532, "step": 9550 }, { "epoch": 1.3521625256600835, "grad_norm": 10.43313745024305, "learning_rate": 1.253807704424557e-06, "loss": 0.8858, "step": 9551 }, { "epoch": 1.3523040985347208, "grad_norm": 7.927115067273523, "learning_rate": 1.2533108287028862e-06, "loss": 0.8284, "step": 9552 }, { "epoch": 1.352445671409358, "grad_norm": 9.38936602742553, "learning_rate": 1.2528140185187362e-06, "loss": 0.8194, "step": 9553 }, { "epoch": 1.3525872442839952, "grad_norm": 7.866609735875567, "learning_rate": 1.2523172738982225e-06, "loss": 0.7644, "step": 9554 }, { "epoch": 1.3527288171586325, "grad_norm": 9.420604816304548, "learning_rate": 1.2518205948674593e-06, "loss": 0.8981, "step": 9555 }, { "epoch": 1.3528703900332697, "grad_norm": 7.88300195471974, "learning_rate": 1.2513239814525583e-06, "loss": 0.8532, "step": 9556 }, { "epoch": 1.3530119629079067, "grad_norm": 7.606606232349841, "learning_rate": 1.250827433679624e-06, "loss": 0.8689, "step": 9557 }, { "epoch": 1.353153535782544, "grad_norm": 9.280353952432089, "learning_rate": 1.2503309515747602e-06, "loss": 0.7946, "step": 9558 }, { "epoch": 1.3532951086571812, "grad_norm": 8.409738944012787, "learning_rate": 1.2498345351640655e-06, "loss": 0.7963, "step": 9559 }, { "epoch": 1.3534366815318184, "grad_norm": 8.45989369499206, "learning_rate": 1.2493381844736382e-06, "loss": 0.8268, "step": 9560 }, { "epoch": 1.3535782544064556, "grad_norm": 8.686465853606915, "learning_rate": 1.2488418995295689e-06, "loss": 0.8048, "step": 9561 }, { "epoch": 1.3537198272810929, "grad_norm": 9.392006872379316, "learning_rate": 1.2483456803579484e-06, "loss": 0.879, "step": 9562 }, { "epoch": 1.35386140015573, "grad_norm": 7.701429942734128, "learning_rate": 1.2478495269848626e-06, "loss": 0.8235, "step": 9563 }, { "epoch": 1.3540029730303673, "grad_norm": 9.027470943094636, "learning_rate": 1.247353439436393e-06, "loss": 0.9553, "step": 9564 }, { "epoch": 1.3541445459050045, "grad_norm": 7.565511560988019, "learning_rate": 1.2468574177386198e-06, "loss": 0.7335, "step": 9565 }, { "epoch": 1.3542861187796418, "grad_norm": 8.86797043624995, "learning_rate": 1.2463614619176167e-06, "loss": 0.8246, "step": 9566 }, { "epoch": 1.354427691654279, "grad_norm": 10.430099696325676, "learning_rate": 1.2458655719994582e-06, "loss": 0.8433, "step": 9567 }, { "epoch": 1.3545692645289162, "grad_norm": 8.441440594562248, "learning_rate": 1.2453697480102111e-06, "loss": 0.905, "step": 9568 }, { "epoch": 1.3547108374035535, "grad_norm": 8.815475935353511, "learning_rate": 1.2448739899759398e-06, "loss": 0.9304, "step": 9569 }, { "epoch": 1.3548524102781907, "grad_norm": 7.4355081407416375, "learning_rate": 1.2443782979227084e-06, "loss": 0.8798, "step": 9570 }, { "epoch": 1.354993983152828, "grad_norm": 8.465900913162425, "learning_rate": 1.2438826718765724e-06, "loss": 0.8347, "step": 9571 }, { "epoch": 1.3551355560274652, "grad_norm": 8.591391721877777, "learning_rate": 1.2433871118635888e-06, "loss": 0.7758, "step": 9572 }, { "epoch": 1.3552771289021024, "grad_norm": 9.360389412498462, "learning_rate": 1.2428916179098065e-06, "loss": 0.8734, "step": 9573 }, { "epoch": 1.3554187017767396, "grad_norm": 8.81157013877475, "learning_rate": 1.2423961900412756e-06, "loss": 0.9009, "step": 9574 }, { "epoch": 1.3555602746513769, "grad_norm": 8.29517043827899, "learning_rate": 1.2419008282840387e-06, "loss": 0.8223, "step": 9575 }, { "epoch": 1.355701847526014, "grad_norm": 9.766612840662546, "learning_rate": 1.2414055326641378e-06, "loss": 0.8347, "step": 9576 }, { "epoch": 1.3558434204006513, "grad_norm": 7.142414428070153, "learning_rate": 1.2409103032076087e-06, "loss": 0.8634, "step": 9577 }, { "epoch": 1.3559849932752885, "grad_norm": 9.615912569076988, "learning_rate": 1.2404151399404859e-06, "loss": 0.8478, "step": 9578 }, { "epoch": 1.3561265661499258, "grad_norm": 7.194859195309665, "learning_rate": 1.2399200428888023e-06, "loss": 0.7984, "step": 9579 }, { "epoch": 1.356268139024563, "grad_norm": 8.615996153055818, "learning_rate": 1.2394250120785806e-06, "loss": 0.8295, "step": 9580 }, { "epoch": 1.3564097118992002, "grad_norm": 8.668275488178997, "learning_rate": 1.2389300475358468e-06, "loss": 0.8872, "step": 9581 }, { "epoch": 1.3565512847738375, "grad_norm": 8.768676879316924, "learning_rate": 1.2384351492866192e-06, "loss": 0.7581, "step": 9582 }, { "epoch": 1.3566928576484745, "grad_norm": 8.453698101817908, "learning_rate": 1.237940317356916e-06, "loss": 0.8219, "step": 9583 }, { "epoch": 1.3568344305231117, "grad_norm": 7.80698780149648, "learning_rate": 1.2374455517727485e-06, "loss": 0.7795, "step": 9584 }, { "epoch": 1.356976003397749, "grad_norm": 7.619918380660628, "learning_rate": 1.236950852560127e-06, "loss": 0.7617, "step": 9585 }, { "epoch": 1.3571175762723862, "grad_norm": 10.32688206105925, "learning_rate": 1.2364562197450583e-06, "loss": 0.9245, "step": 9586 }, { "epoch": 1.3572591491470234, "grad_norm": 10.152814548168982, "learning_rate": 1.235961653353543e-06, "loss": 0.9579, "step": 9587 }, { "epoch": 1.3574007220216606, "grad_norm": 7.947840645669431, "learning_rate": 1.235467153411582e-06, "loss": 0.7664, "step": 9588 }, { "epoch": 1.3575422948962979, "grad_norm": 8.312271000281987, "learning_rate": 1.2349727199451696e-06, "loss": 0.8639, "step": 9589 }, { "epoch": 1.357683867770935, "grad_norm": 7.757162414542185, "learning_rate": 1.2344783529802975e-06, "loss": 0.8445, "step": 9590 }, { "epoch": 1.3578254406455723, "grad_norm": 8.443552968446223, "learning_rate": 1.2339840525429559e-06, "loss": 0.9886, "step": 9591 }, { "epoch": 1.3579670135202095, "grad_norm": 8.585102182978808, "learning_rate": 1.2334898186591274e-06, "loss": 0.7893, "step": 9592 }, { "epoch": 1.3581085863948468, "grad_norm": 10.969866312719828, "learning_rate": 1.2329956513547957e-06, "loss": 0.7583, "step": 9593 }, { "epoch": 1.358250159269484, "grad_norm": 8.375761993815892, "learning_rate": 1.232501550655937e-06, "loss": 0.7058, "step": 9594 }, { "epoch": 1.3583917321441212, "grad_norm": 7.662378466390644, "learning_rate": 1.2320075165885278e-06, "loss": 0.787, "step": 9595 }, { "epoch": 1.3585333050187585, "grad_norm": 11.489032989260686, "learning_rate": 1.2315135491785369e-06, "loss": 0.9545, "step": 9596 }, { "epoch": 1.3586748778933957, "grad_norm": 10.032934504932912, "learning_rate": 1.2310196484519339e-06, "loss": 0.9037, "step": 9597 }, { "epoch": 1.358816450768033, "grad_norm": 8.479742257141934, "learning_rate": 1.2305258144346807e-06, "loss": 0.7943, "step": 9598 }, { "epoch": 1.35895802364267, "grad_norm": 8.425611470386132, "learning_rate": 1.23003204715274e-06, "loss": 0.8624, "step": 9599 }, { "epoch": 1.3590995965173072, "grad_norm": 8.93145035024473, "learning_rate": 1.2295383466320677e-06, "loss": 0.8983, "step": 9600 }, { "epoch": 1.3592411693919444, "grad_norm": 8.986853004055641, "learning_rate": 1.229044712898616e-06, "loss": 0.8537, "step": 9601 }, { "epoch": 1.3593827422665816, "grad_norm": 9.415418243923517, "learning_rate": 1.2285511459783373e-06, "loss": 0.8675, "step": 9602 }, { "epoch": 1.3595243151412189, "grad_norm": 8.988295255448671, "learning_rate": 1.2280576458971757e-06, "loss": 0.8068, "step": 9603 }, { "epoch": 1.359665888015856, "grad_norm": 10.989677093732633, "learning_rate": 1.2275642126810764e-06, "loss": 0.8601, "step": 9604 }, { "epoch": 1.3598074608904933, "grad_norm": 7.690575705459019, "learning_rate": 1.2270708463559766e-06, "loss": 0.9046, "step": 9605 }, { "epoch": 1.3599490337651305, "grad_norm": 9.873511093500456, "learning_rate": 1.226577546947814e-06, "loss": 0.8527, "step": 9606 }, { "epoch": 1.3600906066397678, "grad_norm": 9.993356405192948, "learning_rate": 1.2260843144825196e-06, "loss": 0.8813, "step": 9607 }, { "epoch": 1.360232179514405, "grad_norm": 8.871999313918586, "learning_rate": 1.2255911489860228e-06, "loss": 0.8106, "step": 9608 }, { "epoch": 1.3603737523890422, "grad_norm": 8.735416575446305, "learning_rate": 1.2250980504842503e-06, "loss": 0.798, "step": 9609 }, { "epoch": 1.3605153252636795, "grad_norm": 8.386556367052803, "learning_rate": 1.2246050190031222e-06, "loss": 0.9163, "step": 9610 }, { "epoch": 1.3606568981383167, "grad_norm": 8.145880059175715, "learning_rate": 1.2241120545685575e-06, "loss": 0.8674, "step": 9611 }, { "epoch": 1.360798471012954, "grad_norm": 8.923602202559863, "learning_rate": 1.2236191572064697e-06, "loss": 0.7729, "step": 9612 }, { "epoch": 1.3609400438875912, "grad_norm": 9.744226164374494, "learning_rate": 1.2231263269427716e-06, "loss": 0.8722, "step": 9613 }, { "epoch": 1.3610816167622284, "grad_norm": 7.6638054138678955, "learning_rate": 1.2226335638033708e-06, "loss": 0.822, "step": 9614 }, { "epoch": 1.3612231896368656, "grad_norm": 10.045673209493408, "learning_rate": 1.2221408678141702e-06, "loss": 0.8439, "step": 9615 }, { "epoch": 1.3613647625115028, "grad_norm": 7.562640984852546, "learning_rate": 1.2216482390010726e-06, "loss": 0.8356, "step": 9616 }, { "epoch": 1.36150633538614, "grad_norm": 8.638250814616955, "learning_rate": 1.2211556773899728e-06, "loss": 0.8196, "step": 9617 }, { "epoch": 1.3616479082607773, "grad_norm": 7.817513040080965, "learning_rate": 1.2206631830067663e-06, "loss": 0.7726, "step": 9618 }, { "epoch": 1.3617894811354145, "grad_norm": 9.092882042851697, "learning_rate": 1.2201707558773416e-06, "loss": 0.9599, "step": 9619 }, { "epoch": 1.3619310540100518, "grad_norm": 9.058437936618128, "learning_rate": 1.2196783960275867e-06, "loss": 0.9231, "step": 9620 }, { "epoch": 1.362072626884689, "grad_norm": 9.148263618517271, "learning_rate": 1.2191861034833841e-06, "loss": 0.7956, "step": 9621 }, { "epoch": 1.3622141997593262, "grad_norm": 8.762740968011148, "learning_rate": 1.218693878270612e-06, "loss": 0.8466, "step": 9622 }, { "epoch": 1.3623557726339635, "grad_norm": 8.167671712676428, "learning_rate": 1.2182017204151484e-06, "loss": 0.8373, "step": 9623 }, { "epoch": 1.3624973455086007, "grad_norm": 8.015211901514013, "learning_rate": 1.2177096299428634e-06, "loss": 0.8292, "step": 9624 }, { "epoch": 1.3626389183832377, "grad_norm": 8.763739126315953, "learning_rate": 1.2172176068796281e-06, "loss": 0.8911, "step": 9625 }, { "epoch": 1.362780491257875, "grad_norm": 8.610801620117353, "learning_rate": 1.216725651251306e-06, "loss": 0.8913, "step": 9626 }, { "epoch": 1.3629220641325122, "grad_norm": 8.135581301017694, "learning_rate": 1.2162337630837604e-06, "loss": 0.8915, "step": 9627 }, { "epoch": 1.3630636370071494, "grad_norm": 8.319481234149825, "learning_rate": 1.2157419424028473e-06, "loss": 0.8484, "step": 9628 }, { "epoch": 1.3632052098817866, "grad_norm": 8.184414114609186, "learning_rate": 1.2152501892344232e-06, "loss": 0.7941, "step": 9629 }, { "epoch": 1.3633467827564238, "grad_norm": 8.895756908031878, "learning_rate": 1.2147585036043397e-06, "loss": 0.8194, "step": 9630 }, { "epoch": 1.363488355631061, "grad_norm": 9.513633181313738, "learning_rate": 1.2142668855384421e-06, "loss": 0.8984, "step": 9631 }, { "epoch": 1.3636299285056983, "grad_norm": 8.961630517556676, "learning_rate": 1.2137753350625774e-06, "loss": 0.8195, "step": 9632 }, { "epoch": 1.3637715013803355, "grad_norm": 8.607359163814552, "learning_rate": 1.2132838522025827e-06, "loss": 0.8329, "step": 9633 }, { "epoch": 1.3639130742549728, "grad_norm": 8.944282145900182, "learning_rate": 1.2127924369842975e-06, "loss": 0.901, "step": 9634 }, { "epoch": 1.36405464712961, "grad_norm": 8.601474688754775, "learning_rate": 1.212301089433553e-06, "loss": 0.852, "step": 9635 }, { "epoch": 1.3641962200042472, "grad_norm": 8.948704238947242, "learning_rate": 1.21180980957618e-06, "loss": 0.8386, "step": 9636 }, { "epoch": 1.3643377928788845, "grad_norm": 8.27856355674028, "learning_rate": 1.211318597438006e-06, "loss": 0.9374, "step": 9637 }, { "epoch": 1.3644793657535217, "grad_norm": 7.426289548539044, "learning_rate": 1.2108274530448513e-06, "loss": 0.8013, "step": 9638 }, { "epoch": 1.364620938628159, "grad_norm": 8.790475146890836, "learning_rate": 1.210336376422537e-06, "loss": 0.8453, "step": 9639 }, { "epoch": 1.364762511502796, "grad_norm": 8.88487832934397, "learning_rate": 1.2098453675968772e-06, "loss": 0.7955, "step": 9640 }, { "epoch": 1.3649040843774332, "grad_norm": 9.284002955431717, "learning_rate": 1.2093544265936848e-06, "loss": 0.8134, "step": 9641 }, { "epoch": 1.3650456572520704, "grad_norm": 10.215123457571032, "learning_rate": 1.2088635534387684e-06, "loss": 0.9477, "step": 9642 }, { "epoch": 1.3651872301267076, "grad_norm": 8.784552662402088, "learning_rate": 1.208372748157931e-06, "loss": 0.8816, "step": 9643 }, { "epoch": 1.3653288030013448, "grad_norm": 9.929224563239876, "learning_rate": 1.2078820107769762e-06, "loss": 0.9084, "step": 9644 }, { "epoch": 1.365470375875982, "grad_norm": 8.737036993723187, "learning_rate": 1.2073913413216998e-06, "loss": 0.8447, "step": 9645 }, { "epoch": 1.3656119487506193, "grad_norm": 7.644523515341642, "learning_rate": 1.2069007398178978e-06, "loss": 0.7647, "step": 9646 }, { "epoch": 1.3657535216252565, "grad_norm": 8.881827293924463, "learning_rate": 1.2064102062913585e-06, "loss": 0.7829, "step": 9647 }, { "epoch": 1.3658950944998938, "grad_norm": 8.321884479581767, "learning_rate": 1.2059197407678714e-06, "loss": 0.8237, "step": 9648 }, { "epoch": 1.366036667374531, "grad_norm": 8.892491257057042, "learning_rate": 1.2054293432732172e-06, "loss": 0.8465, "step": 9649 }, { "epoch": 1.3661782402491682, "grad_norm": 8.60653080146254, "learning_rate": 1.2049390138331785e-06, "loss": 0.8277, "step": 9650 }, { "epoch": 1.3663198131238055, "grad_norm": 8.079658643102597, "learning_rate": 1.204448752473529e-06, "loss": 0.8233, "step": 9651 }, { "epoch": 1.3664613859984427, "grad_norm": 8.270720384212497, "learning_rate": 1.2039585592200428e-06, "loss": 0.7072, "step": 9652 }, { "epoch": 1.36660295887308, "grad_norm": 9.064832084771215, "learning_rate": 1.2034684340984907e-06, "loss": 0.8726, "step": 9653 }, { "epoch": 1.3667445317477172, "grad_norm": 8.076119681285395, "learning_rate": 1.2029783771346344e-06, "loss": 0.8352, "step": 9654 }, { "epoch": 1.3668861046223544, "grad_norm": 9.493891307992142, "learning_rate": 1.2024883883542384e-06, "loss": 0.9458, "step": 9655 }, { "epoch": 1.3670276774969916, "grad_norm": 9.04208579315208, "learning_rate": 1.2019984677830597e-06, "loss": 0.8185, "step": 9656 }, { "epoch": 1.3671692503716288, "grad_norm": 8.53158445715738, "learning_rate": 1.2015086154468544e-06, "loss": 0.9043, "step": 9657 }, { "epoch": 1.367310823246266, "grad_norm": 8.974221929544793, "learning_rate": 1.201018831371372e-06, "loss": 0.7794, "step": 9658 }, { "epoch": 1.3674523961209033, "grad_norm": 9.602378518771973, "learning_rate": 1.2005291155823612e-06, "loss": 0.8178, "step": 9659 }, { "epoch": 1.3675939689955405, "grad_norm": 7.918747986369491, "learning_rate": 1.200039468105567e-06, "loss": 0.8742, "step": 9660 }, { "epoch": 1.3677355418701778, "grad_norm": 10.368729132504242, "learning_rate": 1.1995498889667276e-06, "loss": 0.9105, "step": 9661 }, { "epoch": 1.367877114744815, "grad_norm": 9.4769485281794, "learning_rate": 1.1990603781915816e-06, "loss": 0.7832, "step": 9662 }, { "epoch": 1.3680186876194522, "grad_norm": 10.182735522548347, "learning_rate": 1.1985709358058616e-06, "loss": 0.8158, "step": 9663 }, { "epoch": 1.3681602604940895, "grad_norm": 8.328499676433418, "learning_rate": 1.1980815618352964e-06, "loss": 0.9318, "step": 9664 }, { "epoch": 1.3683018333687267, "grad_norm": 8.530069010948445, "learning_rate": 1.1975922563056136e-06, "loss": 0.8984, "step": 9665 }, { "epoch": 1.3684434062433637, "grad_norm": 9.241967915290857, "learning_rate": 1.1971030192425337e-06, "loss": 0.8826, "step": 9666 }, { "epoch": 1.368584979118001, "grad_norm": 7.141658115567311, "learning_rate": 1.1966138506717776e-06, "loss": 0.6696, "step": 9667 }, { "epoch": 1.3687265519926382, "grad_norm": 9.039231729860303, "learning_rate": 1.1961247506190588e-06, "loss": 0.8547, "step": 9668 }, { "epoch": 1.3688681248672754, "grad_norm": 8.245123606924096, "learning_rate": 1.1956357191100903e-06, "loss": 0.7973, "step": 9669 }, { "epoch": 1.3690096977419126, "grad_norm": 9.105519442292398, "learning_rate": 1.1951467561705784e-06, "loss": 0.9088, "step": 9670 }, { "epoch": 1.3691512706165498, "grad_norm": 9.492701889354592, "learning_rate": 1.19465786182623e-06, "loss": 0.8407, "step": 9671 }, { "epoch": 1.369292843491187, "grad_norm": 10.336054494920809, "learning_rate": 1.1941690361027432e-06, "loss": 0.8765, "step": 9672 }, { "epoch": 1.3694344163658243, "grad_norm": 7.790475057236987, "learning_rate": 1.1936802790258176e-06, "loss": 0.8634, "step": 9673 }, { "epoch": 1.3695759892404615, "grad_norm": 9.014305613217058, "learning_rate": 1.1931915906211456e-06, "loss": 0.838, "step": 9674 }, { "epoch": 1.3697175621150988, "grad_norm": 9.297356293345445, "learning_rate": 1.1927029709144163e-06, "loss": 0.8127, "step": 9675 }, { "epoch": 1.369859134989736, "grad_norm": 7.998885553936902, "learning_rate": 1.1922144199313181e-06, "loss": 0.8454, "step": 9676 }, { "epoch": 1.3700007078643732, "grad_norm": 8.47545940763155, "learning_rate": 1.1917259376975318e-06, "loss": 0.7946, "step": 9677 }, { "epoch": 1.3701422807390105, "grad_norm": 8.66863370238432, "learning_rate": 1.1912375242387384e-06, "loss": 0.8845, "step": 9678 }, { "epoch": 1.3702838536136477, "grad_norm": 11.132940994324253, "learning_rate": 1.1907491795806117e-06, "loss": 0.9097, "step": 9679 }, { "epoch": 1.370425426488285, "grad_norm": 8.118600995793933, "learning_rate": 1.190260903748825e-06, "loss": 0.8548, "step": 9680 }, { "epoch": 1.3705669993629221, "grad_norm": 8.907154131054176, "learning_rate": 1.1897726967690454e-06, "loss": 0.8661, "step": 9681 }, { "epoch": 1.3707085722375592, "grad_norm": 8.890012696539241, "learning_rate": 1.189284558666938e-06, "loss": 0.8466, "step": 9682 }, { "epoch": 1.3708501451121964, "grad_norm": 9.049461508900638, "learning_rate": 1.188796489468165e-06, "loss": 0.8444, "step": 9683 }, { "epoch": 1.3709917179868336, "grad_norm": 8.765242025947572, "learning_rate": 1.1883084891983828e-06, "loss": 0.8714, "step": 9684 }, { "epoch": 1.3711332908614708, "grad_norm": 9.282074040780712, "learning_rate": 1.1878205578832455e-06, "loss": 0.8039, "step": 9685 }, { "epoch": 1.371274863736108, "grad_norm": 8.563219917271919, "learning_rate": 1.187332695548402e-06, "loss": 0.7756, "step": 9686 }, { "epoch": 1.3714164366107453, "grad_norm": 7.8576684998891615, "learning_rate": 1.1868449022194997e-06, "loss": 0.8005, "step": 9687 }, { "epoch": 1.3715580094853825, "grad_norm": 7.928442405328386, "learning_rate": 1.186357177922183e-06, "loss": 0.9073, "step": 9688 }, { "epoch": 1.3716995823600198, "grad_norm": 8.687633925029992, "learning_rate": 1.185869522682089e-06, "loss": 0.9176, "step": 9689 }, { "epoch": 1.371841155234657, "grad_norm": 8.366171667443874, "learning_rate": 1.1853819365248553e-06, "loss": 0.7835, "step": 9690 }, { "epoch": 1.3719827281092942, "grad_norm": 9.650854485980519, "learning_rate": 1.184894419476112e-06, "loss": 0.9824, "step": 9691 }, { "epoch": 1.3721243009839315, "grad_norm": 7.205442109689991, "learning_rate": 1.1844069715614893e-06, "loss": 0.8542, "step": 9692 }, { "epoch": 1.3722658738585687, "grad_norm": 7.975522981434671, "learning_rate": 1.1839195928066101e-06, "loss": 0.837, "step": 9693 }, { "epoch": 1.372407446733206, "grad_norm": 6.827447294564682, "learning_rate": 1.183432283237098e-06, "loss": 0.8031, "step": 9694 }, { "epoch": 1.3725490196078431, "grad_norm": 8.196550099445183, "learning_rate": 1.1829450428785689e-06, "loss": 0.8491, "step": 9695 }, { "epoch": 1.3726905924824804, "grad_norm": 9.262722030223065, "learning_rate": 1.1824578717566358e-06, "loss": 0.9622, "step": 9696 }, { "epoch": 1.3728321653571176, "grad_norm": 8.644884270356172, "learning_rate": 1.181970769896911e-06, "loss": 0.805, "step": 9697 }, { "epoch": 1.3729737382317548, "grad_norm": 8.803216034429015, "learning_rate": 1.1814837373249991e-06, "loss": 0.8052, "step": 9698 }, { "epoch": 1.373115311106392, "grad_norm": 8.370954305009823, "learning_rate": 1.180996774066505e-06, "loss": 0.7866, "step": 9699 }, { "epoch": 1.3732568839810293, "grad_norm": 8.686191377945622, "learning_rate": 1.1805098801470259e-06, "loss": 0.7254, "step": 9700 }, { "epoch": 1.3733984568556665, "grad_norm": 10.790428611013327, "learning_rate": 1.1800230555921597e-06, "loss": 0.8835, "step": 9701 }, { "epoch": 1.3735400297303038, "grad_norm": 9.280995580609074, "learning_rate": 1.179536300427496e-06, "loss": 0.9346, "step": 9702 }, { "epoch": 1.373681602604941, "grad_norm": 8.46085679569659, "learning_rate": 1.1790496146786257e-06, "loss": 0.8626, "step": 9703 }, { "epoch": 1.3738231754795782, "grad_norm": 9.165238378976131, "learning_rate": 1.1785629983711311e-06, "loss": 0.8483, "step": 9704 }, { "epoch": 1.3739647483542154, "grad_norm": 9.527737730168836, "learning_rate": 1.1780764515305942e-06, "loss": 0.8341, "step": 9705 }, { "epoch": 1.3741063212288527, "grad_norm": 8.364017858587435, "learning_rate": 1.1775899741825947e-06, "loss": 0.8248, "step": 9706 }, { "epoch": 1.37424789410349, "grad_norm": 8.373948671769005, "learning_rate": 1.1771035663527021e-06, "loss": 0.8395, "step": 9707 }, { "epoch": 1.374389466978127, "grad_norm": 9.954544517102164, "learning_rate": 1.17661722806649e-06, "loss": 0.8141, "step": 9708 }, { "epoch": 1.3745310398527641, "grad_norm": 8.17351006512306, "learning_rate": 1.1761309593495224e-06, "loss": 0.909, "step": 9709 }, { "epoch": 1.3746726127274014, "grad_norm": 10.021509307669726, "learning_rate": 1.1756447602273629e-06, "loss": 0.901, "step": 9710 }, { "epoch": 1.3748141856020386, "grad_norm": 9.753998547821219, "learning_rate": 1.1751586307255719e-06, "loss": 0.8507, "step": 9711 }, { "epoch": 1.3749557584766758, "grad_norm": 8.990129356259851, "learning_rate": 1.174672570869703e-06, "loss": 0.8848, "step": 9712 }, { "epoch": 1.375097331351313, "grad_norm": 7.469734925005941, "learning_rate": 1.1741865806853097e-06, "loss": 0.8411, "step": 9713 }, { "epoch": 1.3752389042259503, "grad_norm": 9.483897968594503, "learning_rate": 1.1737006601979384e-06, "loss": 0.9481, "step": 9714 }, { "epoch": 1.3753804771005875, "grad_norm": 8.412860072508598, "learning_rate": 1.1732148094331353e-06, "loss": 0.8557, "step": 9715 }, { "epoch": 1.3755220499752248, "grad_norm": 8.507329585554217, "learning_rate": 1.1727290284164406e-06, "loss": 0.8627, "step": 9716 }, { "epoch": 1.375663622849862, "grad_norm": 8.928578011646568, "learning_rate": 1.1722433171733903e-06, "loss": 0.8697, "step": 9717 }, { "epoch": 1.3758051957244992, "grad_norm": 9.08828371805928, "learning_rate": 1.1717576757295192e-06, "loss": 0.7951, "step": 9718 }, { "epoch": 1.3759467685991364, "grad_norm": 8.834797671839675, "learning_rate": 1.171272104110356e-06, "loss": 0.768, "step": 9719 }, { "epoch": 1.3760883414737737, "grad_norm": 8.785548342600807, "learning_rate": 1.1707866023414288e-06, "loss": 0.9242, "step": 9720 }, { "epoch": 1.376229914348411, "grad_norm": 7.220861349108351, "learning_rate": 1.1703011704482577e-06, "loss": 0.7428, "step": 9721 }, { "epoch": 1.3763714872230481, "grad_norm": 8.845630459309062, "learning_rate": 1.1698158084563635e-06, "loss": 0.8926, "step": 9722 }, { "epoch": 1.3765130600976851, "grad_norm": 9.290841332517504, "learning_rate": 1.1693305163912597e-06, "loss": 0.8257, "step": 9723 }, { "epoch": 1.3766546329723224, "grad_norm": 10.359737706343644, "learning_rate": 1.1688452942784592e-06, "loss": 0.8889, "step": 9724 }, { "epoch": 1.3767962058469596, "grad_norm": 8.254550950182582, "learning_rate": 1.168360142143468e-06, "loss": 0.8429, "step": 9725 }, { "epoch": 1.3769377787215968, "grad_norm": 9.018183247714335, "learning_rate": 1.1678750600117914e-06, "loss": 0.8032, "step": 9726 }, { "epoch": 1.377079351596234, "grad_norm": 8.069000226077423, "learning_rate": 1.1673900479089314e-06, "loss": 0.8694, "step": 9727 }, { "epoch": 1.3772209244708713, "grad_norm": 8.444060311308514, "learning_rate": 1.1669051058603811e-06, "loss": 0.8317, "step": 9728 }, { "epoch": 1.3773624973455085, "grad_norm": 8.500405694592244, "learning_rate": 1.1664202338916364e-06, "loss": 0.7984, "step": 9729 }, { "epoch": 1.3775040702201458, "grad_norm": 9.811282331394207, "learning_rate": 1.1659354320281845e-06, "loss": 0.815, "step": 9730 }, { "epoch": 1.377645643094783, "grad_norm": 9.896009669824192, "learning_rate": 1.1654507002955135e-06, "loss": 0.802, "step": 9731 }, { "epoch": 1.3777872159694202, "grad_norm": 7.654658436296631, "learning_rate": 1.1649660387191027e-06, "loss": 0.8009, "step": 9732 }, { "epoch": 1.3779287888440575, "grad_norm": 9.716566226847974, "learning_rate": 1.1644814473244322e-06, "loss": 0.8297, "step": 9733 }, { "epoch": 1.3780703617186947, "grad_norm": 9.96728351772218, "learning_rate": 1.163996926136977e-06, "loss": 0.7936, "step": 9734 }, { "epoch": 1.378211934593332, "grad_norm": 7.953681851361405, "learning_rate": 1.1635124751822063e-06, "loss": 0.7384, "step": 9735 }, { "epoch": 1.3783535074679691, "grad_norm": 8.906498420329289, "learning_rate": 1.163028094485589e-06, "loss": 0.8787, "step": 9736 }, { "epoch": 1.3784950803426064, "grad_norm": 8.06675998013753, "learning_rate": 1.162543784072588e-06, "loss": 0.8698, "step": 9737 }, { "epoch": 1.3786366532172436, "grad_norm": 8.576713404176614, "learning_rate": 1.1620595439686632e-06, "loss": 0.7629, "step": 9738 }, { "epoch": 1.3787782260918808, "grad_norm": 8.006615764234834, "learning_rate": 1.1615753741992696e-06, "loss": 0.8169, "step": 9739 }, { "epoch": 1.378919798966518, "grad_norm": 9.192080646874915, "learning_rate": 1.1610912747898607e-06, "loss": 0.8155, "step": 9740 }, { "epoch": 1.3790613718411553, "grad_norm": 8.477142567278023, "learning_rate": 1.1606072457658856e-06, "loss": 0.8251, "step": 9741 }, { "epoch": 1.3792029447157925, "grad_norm": 8.732186631882271, "learning_rate": 1.1601232871527884e-06, "loss": 0.9039, "step": 9742 }, { "epoch": 1.3793445175904298, "grad_norm": 9.55121859619825, "learning_rate": 1.1596393989760118e-06, "loss": 0.8759, "step": 9743 }, { "epoch": 1.379486090465067, "grad_norm": 8.818677536134777, "learning_rate": 1.1591555812609914e-06, "loss": 0.7009, "step": 9744 }, { "epoch": 1.3796276633397042, "grad_norm": 9.205049323302596, "learning_rate": 1.1586718340331634e-06, "loss": 0.8761, "step": 9745 }, { "epoch": 1.3797692362143414, "grad_norm": 9.259645536489277, "learning_rate": 1.1581881573179562e-06, "loss": 0.884, "step": 9746 }, { "epoch": 1.3799108090889787, "grad_norm": 9.31349944825554, "learning_rate": 1.1577045511407977e-06, "loss": 0.9514, "step": 9747 }, { "epoch": 1.380052381963616, "grad_norm": 8.993194550201471, "learning_rate": 1.1572210155271105e-06, "loss": 1.0123, "step": 9748 }, { "epoch": 1.380193954838253, "grad_norm": 7.017736036613933, "learning_rate": 1.156737550502312e-06, "loss": 0.6483, "step": 9749 }, { "epoch": 1.3803355277128901, "grad_norm": 7.775449252871783, "learning_rate": 1.15625415609182e-06, "loss": 0.9069, "step": 9750 }, { "epoch": 1.3804771005875274, "grad_norm": 9.361917955873144, "learning_rate": 1.155770832321044e-06, "loss": 0.8426, "step": 9751 }, { "epoch": 1.3806186734621646, "grad_norm": 9.992640648320386, "learning_rate": 1.1552875792153943e-06, "loss": 0.8544, "step": 9752 }, { "epoch": 1.3807602463368018, "grad_norm": 9.469116203856034, "learning_rate": 1.1548043968002725e-06, "loss": 0.8401, "step": 9753 }, { "epoch": 1.380901819211439, "grad_norm": 7.835428025483311, "learning_rate": 1.1543212851010819e-06, "loss": 0.8921, "step": 9754 }, { "epoch": 1.3810433920860763, "grad_norm": 8.230631895037073, "learning_rate": 1.1538382441432166e-06, "loss": 0.8406, "step": 9755 }, { "epoch": 1.3811849649607135, "grad_norm": 8.007464264531807, "learning_rate": 1.1533552739520715e-06, "loss": 0.7953, "step": 9756 }, { "epoch": 1.3813265378353508, "grad_norm": 8.646184143547986, "learning_rate": 1.1528723745530362e-06, "loss": 0.917, "step": 9757 }, { "epoch": 1.381468110709988, "grad_norm": 8.57054306179156, "learning_rate": 1.1523895459714948e-06, "loss": 0.897, "step": 9758 }, { "epoch": 1.3816096835846252, "grad_norm": 8.059646929634765, "learning_rate": 1.151906788232832e-06, "loss": 0.8193, "step": 9759 }, { "epoch": 1.3817512564592624, "grad_norm": 10.224802074055678, "learning_rate": 1.1514241013624225e-06, "loss": 0.8739, "step": 9760 }, { "epoch": 1.3818928293338997, "grad_norm": 8.280321292264269, "learning_rate": 1.1509414853856421e-06, "loss": 0.7137, "step": 9761 }, { "epoch": 1.382034402208537, "grad_norm": 9.193074513923767, "learning_rate": 1.1504589403278631e-06, "loss": 0.7711, "step": 9762 }, { "epoch": 1.3821759750831741, "grad_norm": 8.593339390048468, "learning_rate": 1.1499764662144505e-06, "loss": 0.7503, "step": 9763 }, { "epoch": 1.3823175479578114, "grad_norm": 7.796925148965101, "learning_rate": 1.1494940630707693e-06, "loss": 0.7545, "step": 9764 }, { "epoch": 1.3824591208324484, "grad_norm": 7.147568680125388, "learning_rate": 1.1490117309221772e-06, "loss": 0.7928, "step": 9765 }, { "epoch": 1.3826006937070856, "grad_norm": 7.926111416545947, "learning_rate": 1.148529469794032e-06, "loss": 0.8144, "step": 9766 }, { "epoch": 1.3827422665817228, "grad_norm": 8.875047710451645, "learning_rate": 1.148047279711684e-06, "loss": 0.8496, "step": 9767 }, { "epoch": 1.38288383945636, "grad_norm": 7.482725467362936, "learning_rate": 1.1475651607004834e-06, "loss": 0.8036, "step": 9768 }, { "epoch": 1.3830254123309973, "grad_norm": 10.455848145273137, "learning_rate": 1.1470831127857738e-06, "loss": 0.8806, "step": 9769 }, { "epoch": 1.3831669852056345, "grad_norm": 8.30141541695178, "learning_rate": 1.1466011359928951e-06, "loss": 0.7425, "step": 9770 }, { "epoch": 1.3833085580802718, "grad_norm": 8.746493263541662, "learning_rate": 1.146119230347187e-06, "loss": 0.7811, "step": 9771 }, { "epoch": 1.383450130954909, "grad_norm": 8.642280847641537, "learning_rate": 1.14563739587398e-06, "loss": 0.8745, "step": 9772 }, { "epoch": 1.3835917038295462, "grad_norm": 9.274593342300193, "learning_rate": 1.1451556325986065e-06, "loss": 1.0036, "step": 9773 }, { "epoch": 1.3837332767041834, "grad_norm": 6.943212408342646, "learning_rate": 1.14467394054639e-06, "loss": 0.7539, "step": 9774 }, { "epoch": 1.3838748495788207, "grad_norm": 8.828517776798884, "learning_rate": 1.144192319742655e-06, "loss": 0.7745, "step": 9775 }, { "epoch": 1.384016422453458, "grad_norm": 8.208125336104652, "learning_rate": 1.1437107702127178e-06, "loss": 0.7765, "step": 9776 }, { "epoch": 1.3841579953280951, "grad_norm": 10.211629363993687, "learning_rate": 1.1432292919818952e-06, "loss": 0.7524, "step": 9777 }, { "epoch": 1.3842995682027324, "grad_norm": 8.387171768156097, "learning_rate": 1.1427478850754959e-06, "loss": 0.8619, "step": 9778 }, { "epoch": 1.3844411410773696, "grad_norm": 8.988336847258516, "learning_rate": 1.1422665495188284e-06, "loss": 0.8033, "step": 9779 }, { "epoch": 1.3845827139520068, "grad_norm": 8.455317468531407, "learning_rate": 1.1417852853371978e-06, "loss": 0.8864, "step": 9780 }, { "epoch": 1.384724286826644, "grad_norm": 7.606472832731221, "learning_rate": 1.1413040925559e-06, "loss": 0.7408, "step": 9781 }, { "epoch": 1.3848658597012813, "grad_norm": 7.862197156935298, "learning_rate": 1.1408229712002345e-06, "loss": 0.7327, "step": 9782 }, { "epoch": 1.3850074325759185, "grad_norm": 10.151753812457727, "learning_rate": 1.1403419212954904e-06, "loss": 0.8642, "step": 9783 }, { "epoch": 1.3851490054505557, "grad_norm": 8.801958421050308, "learning_rate": 1.1398609428669582e-06, "loss": 0.7963, "step": 9784 }, { "epoch": 1.385290578325193, "grad_norm": 8.67375797757888, "learning_rate": 1.1393800359399225e-06, "loss": 0.8177, "step": 9785 }, { "epoch": 1.3854321511998302, "grad_norm": 9.115285222230689, "learning_rate": 1.1388992005396632e-06, "loss": 0.8526, "step": 9786 }, { "epoch": 1.3855737240744674, "grad_norm": 9.536821581708848, "learning_rate": 1.1384184366914588e-06, "loss": 0.8469, "step": 9787 }, { "epoch": 1.3857152969491047, "grad_norm": 10.376255556910895, "learning_rate": 1.1379377444205814e-06, "loss": 0.9038, "step": 9788 }, { "epoch": 1.385856869823742, "grad_norm": 9.538657389918463, "learning_rate": 1.1374571237523015e-06, "loss": 0.82, "step": 9789 }, { "epoch": 1.385998442698379, "grad_norm": 9.939114805292922, "learning_rate": 1.1369765747118853e-06, "loss": 0.8483, "step": 9790 }, { "epoch": 1.3861400155730161, "grad_norm": 8.63225615076623, "learning_rate": 1.1364960973245927e-06, "loss": 0.8317, "step": 9791 }, { "epoch": 1.3862815884476534, "grad_norm": 8.597192581062664, "learning_rate": 1.136015691615685e-06, "loss": 0.7667, "step": 9792 }, { "epoch": 1.3864231613222906, "grad_norm": 8.684183269743908, "learning_rate": 1.135535357610414e-06, "loss": 0.7721, "step": 9793 }, { "epoch": 1.3865647341969278, "grad_norm": 7.737876269138331, "learning_rate": 1.1350550953340334e-06, "loss": 0.7823, "step": 9794 }, { "epoch": 1.386706307071565, "grad_norm": 9.551156290565892, "learning_rate": 1.1345749048117872e-06, "loss": 0.9441, "step": 9795 }, { "epoch": 1.3868478799462023, "grad_norm": 8.454190848065107, "learning_rate": 1.1340947860689214e-06, "loss": 0.7868, "step": 9796 }, { "epoch": 1.3869894528208395, "grad_norm": 8.35138024897611, "learning_rate": 1.133614739130673e-06, "loss": 0.878, "step": 9797 }, { "epoch": 1.3871310256954767, "grad_norm": 7.902577387796793, "learning_rate": 1.13313476402228e-06, "loss": 0.7842, "step": 9798 }, { "epoch": 1.387272598570114, "grad_norm": 8.041040056815568, "learning_rate": 1.1326548607689724e-06, "loss": 0.7582, "step": 9799 }, { "epoch": 1.3874141714447512, "grad_norm": 8.810174729934285, "learning_rate": 1.1321750293959802e-06, "loss": 0.8959, "step": 9800 }, { "epoch": 1.3875557443193884, "grad_norm": 7.368819152145171, "learning_rate": 1.1316952699285268e-06, "loss": 0.676, "step": 9801 }, { "epoch": 1.3876973171940257, "grad_norm": 9.26049580377521, "learning_rate": 1.131215582391832e-06, "loss": 0.8548, "step": 9802 }, { "epoch": 1.387838890068663, "grad_norm": 9.682348444407781, "learning_rate": 1.1307359668111141e-06, "loss": 0.8715, "step": 9803 }, { "epoch": 1.3879804629433001, "grad_norm": 8.140939893141946, "learning_rate": 1.1302564232115848e-06, "loss": 0.8557, "step": 9804 }, { "epoch": 1.3881220358179374, "grad_norm": 8.963402832185512, "learning_rate": 1.1297769516184544e-06, "loss": 0.8406, "step": 9805 }, { "epoch": 1.3882636086925744, "grad_norm": 7.214649439468547, "learning_rate": 1.1292975520569278e-06, "loss": 0.8558, "step": 9806 }, { "epoch": 1.3884051815672116, "grad_norm": 9.10191581596952, "learning_rate": 1.1288182245522063e-06, "loss": 0.7217, "step": 9807 }, { "epoch": 1.3885467544418488, "grad_norm": 9.353530918824937, "learning_rate": 1.1283389691294894e-06, "loss": 0.8546, "step": 9808 }, { "epoch": 1.388688327316486, "grad_norm": 8.809941346474465, "learning_rate": 1.1278597858139692e-06, "loss": 0.8339, "step": 9809 }, { "epoch": 1.3888299001911233, "grad_norm": 9.219080618409325, "learning_rate": 1.127380674630838e-06, "loss": 0.8861, "step": 9810 }, { "epoch": 1.3889714730657605, "grad_norm": 9.140902523164126, "learning_rate": 1.1269016356052803e-06, "loss": 0.8483, "step": 9811 }, { "epoch": 1.3891130459403978, "grad_norm": 9.720162537397098, "learning_rate": 1.1264226687624815e-06, "loss": 0.8266, "step": 9812 }, { "epoch": 1.389254618815035, "grad_norm": 9.609568325672164, "learning_rate": 1.1259437741276172e-06, "loss": 0.8594, "step": 9813 }, { "epoch": 1.3893961916896722, "grad_norm": 6.514895757395994, "learning_rate": 1.125464951725864e-06, "loss": 0.8409, "step": 9814 }, { "epoch": 1.3895377645643094, "grad_norm": 9.165734908403843, "learning_rate": 1.1249862015823943e-06, "loss": 0.9325, "step": 9815 }, { "epoch": 1.3896793374389467, "grad_norm": 9.986152980140723, "learning_rate": 1.1245075237223741e-06, "loss": 0.7843, "step": 9816 }, { "epoch": 1.389820910313584, "grad_norm": 9.63829212619452, "learning_rate": 1.1240289181709681e-06, "loss": 0.8521, "step": 9817 }, { "epoch": 1.3899624831882211, "grad_norm": 11.226778265911088, "learning_rate": 1.1235503849533355e-06, "loss": 0.9811, "step": 9818 }, { "epoch": 1.3901040560628584, "grad_norm": 7.856093705715855, "learning_rate": 1.1230719240946336e-06, "loss": 0.8572, "step": 9819 }, { "epoch": 1.3902456289374956, "grad_norm": 9.280625653058474, "learning_rate": 1.1225935356200129e-06, "loss": 0.8821, "step": 9820 }, { "epoch": 1.3903872018121328, "grad_norm": 10.831751624816679, "learning_rate": 1.1221152195546241e-06, "loss": 0.8941, "step": 9821 }, { "epoch": 1.39052877468677, "grad_norm": 8.064326589510445, "learning_rate": 1.1216369759236108e-06, "loss": 0.8269, "step": 9822 }, { "epoch": 1.3906703475614073, "grad_norm": 8.806131862175551, "learning_rate": 1.121158804752113e-06, "loss": 0.7651, "step": 9823 }, { "epoch": 1.3908119204360445, "grad_norm": 8.231211682623655, "learning_rate": 1.1206807060652696e-06, "loss": 0.8137, "step": 9824 }, { "epoch": 1.3909534933106817, "grad_norm": 7.829320271927197, "learning_rate": 1.120202679888212e-06, "loss": 0.781, "step": 9825 }, { "epoch": 1.391095066185319, "grad_norm": 9.212529763907355, "learning_rate": 1.119724726246072e-06, "loss": 0.85, "step": 9826 }, { "epoch": 1.3912366390599562, "grad_norm": 8.617034772416957, "learning_rate": 1.1192468451639727e-06, "loss": 0.7812, "step": 9827 }, { "epoch": 1.3913782119345934, "grad_norm": 9.029804470441984, "learning_rate": 1.1187690366670381e-06, "loss": 0.8712, "step": 9828 }, { "epoch": 1.3915197848092307, "grad_norm": 9.18062329006809, "learning_rate": 1.1182913007803847e-06, "loss": 0.8024, "step": 9829 }, { "epoch": 1.391661357683868, "grad_norm": 9.963159602022397, "learning_rate": 1.117813637529127e-06, "loss": 0.931, "step": 9830 }, { "epoch": 1.3918029305585051, "grad_norm": 9.588030871797146, "learning_rate": 1.117336046938377e-06, "loss": 0.8929, "step": 9831 }, { "epoch": 1.3919445034331421, "grad_norm": 8.49947669718416, "learning_rate": 1.116858529033239e-06, "loss": 0.7863, "step": 9832 }, { "epoch": 1.3920860763077794, "grad_norm": 8.978233719674359, "learning_rate": 1.1163810838388187e-06, "loss": 0.8595, "step": 9833 }, { "epoch": 1.3922276491824166, "grad_norm": 9.585539146546765, "learning_rate": 1.1159037113802113e-06, "loss": 0.8025, "step": 9834 }, { "epoch": 1.3923692220570538, "grad_norm": 8.601302611593836, "learning_rate": 1.1154264116825147e-06, "loss": 0.886, "step": 9835 }, { "epoch": 1.392510794931691, "grad_norm": 10.344401635794926, "learning_rate": 1.1149491847708186e-06, "loss": 1.0152, "step": 9836 }, { "epoch": 1.3926523678063283, "grad_norm": 9.706814321016722, "learning_rate": 1.1144720306702106e-06, "loss": 0.869, "step": 9837 }, { "epoch": 1.3927939406809655, "grad_norm": 8.559124134923575, "learning_rate": 1.113994949405776e-06, "loss": 0.922, "step": 9838 }, { "epoch": 1.3929355135556027, "grad_norm": 9.69671074853841, "learning_rate": 1.1135179410025925e-06, "loss": 0.8682, "step": 9839 }, { "epoch": 1.39307708643024, "grad_norm": 10.097611294024684, "learning_rate": 1.1130410054857382e-06, "loss": 0.9207, "step": 9840 }, { "epoch": 1.3932186593048772, "grad_norm": 10.029844761994426, "learning_rate": 1.1125641428802831e-06, "loss": 0.7538, "step": 9841 }, { "epoch": 1.3933602321795144, "grad_norm": 7.652865413003225, "learning_rate": 1.1120873532112971e-06, "loss": 0.7699, "step": 9842 }, { "epoch": 1.3935018050541517, "grad_norm": 8.271219419088755, "learning_rate": 1.1116106365038443e-06, "loss": 0.8823, "step": 9843 }, { "epoch": 1.393643377928789, "grad_norm": 9.452729927823967, "learning_rate": 1.1111339927829842e-06, "loss": 0.8047, "step": 9844 }, { "epoch": 1.3937849508034261, "grad_norm": 9.875627328768473, "learning_rate": 1.1106574220737754e-06, "loss": 0.9125, "step": 9845 }, { "epoch": 1.3939265236780634, "grad_norm": 8.410143096105598, "learning_rate": 1.110180924401269e-06, "loss": 0.822, "step": 9846 }, { "epoch": 1.3940680965527004, "grad_norm": 7.921011311476155, "learning_rate": 1.1097044997905162e-06, "loss": 0.8037, "step": 9847 }, { "epoch": 1.3942096694273376, "grad_norm": 9.516549803043569, "learning_rate": 1.1092281482665601e-06, "loss": 0.9226, "step": 9848 }, { "epoch": 1.3943512423019748, "grad_norm": 8.117195489198426, "learning_rate": 1.1087518698544444e-06, "loss": 0.9419, "step": 9849 }, { "epoch": 1.394492815176612, "grad_norm": 8.67184244785557, "learning_rate": 1.1082756645792046e-06, "loss": 0.8783, "step": 9850 }, { "epoch": 1.3946343880512493, "grad_norm": 8.017889524631729, "learning_rate": 1.1077995324658762e-06, "loss": 0.7382, "step": 9851 }, { "epoch": 1.3947759609258865, "grad_norm": 9.499809263221955, "learning_rate": 1.1073234735394872e-06, "loss": 0.837, "step": 9852 }, { "epoch": 1.3949175338005237, "grad_norm": 9.167548489781769, "learning_rate": 1.1068474878250649e-06, "loss": 0.9133, "step": 9853 }, { "epoch": 1.395059106675161, "grad_norm": 7.6657406897354425, "learning_rate": 1.1063715753476334e-06, "loss": 0.8328, "step": 9854 }, { "epoch": 1.3952006795497982, "grad_norm": 7.702886798263068, "learning_rate": 1.105895736132207e-06, "loss": 0.8445, "step": 9855 }, { "epoch": 1.3953422524244354, "grad_norm": 7.156123397573495, "learning_rate": 1.1054199702038032e-06, "loss": 0.72, "step": 9856 }, { "epoch": 1.3954838252990727, "grad_norm": 10.573584885300734, "learning_rate": 1.104944277587431e-06, "loss": 0.762, "step": 9857 }, { "epoch": 1.39562539817371, "grad_norm": 6.936382856720768, "learning_rate": 1.1044686583080976e-06, "loss": 0.7464, "step": 9858 }, { "epoch": 1.3957669710483471, "grad_norm": 7.725508599683214, "learning_rate": 1.1039931123908074e-06, "loss": 0.8459, "step": 9859 }, { "epoch": 1.3959085439229844, "grad_norm": 9.003058867392047, "learning_rate": 1.1035176398605576e-06, "loss": 0.8712, "step": 9860 }, { "epoch": 1.3960501167976216, "grad_norm": 9.184017014251792, "learning_rate": 1.103042240742345e-06, "loss": 0.8202, "step": 9861 }, { "epoch": 1.3961916896722588, "grad_norm": 9.858889415166816, "learning_rate": 1.1025669150611594e-06, "loss": 0.8567, "step": 9862 }, { "epoch": 1.396333262546896, "grad_norm": 9.169553914675491, "learning_rate": 1.1020916628419898e-06, "loss": 0.8273, "step": 9863 }, { "epoch": 1.3964748354215333, "grad_norm": 7.812877188160287, "learning_rate": 1.1016164841098193e-06, "loss": 0.8388, "step": 9864 }, { "epoch": 1.3966164082961705, "grad_norm": 9.179639295086197, "learning_rate": 1.1011413788896263e-06, "loss": 0.8439, "step": 9865 }, { "epoch": 1.3967579811708077, "grad_norm": 9.416965806198787, "learning_rate": 1.1006663472063892e-06, "loss": 0.858, "step": 9866 }, { "epoch": 1.396899554045445, "grad_norm": 9.047190395516962, "learning_rate": 1.100191389085078e-06, "loss": 0.9711, "step": 9867 }, { "epoch": 1.3970411269200822, "grad_norm": 11.969181541053478, "learning_rate": 1.0997165045506624e-06, "loss": 0.8552, "step": 9868 }, { "epoch": 1.3971826997947194, "grad_norm": 8.43605221230351, "learning_rate": 1.0992416936281054e-06, "loss": 0.8645, "step": 9869 }, { "epoch": 1.3973242726693567, "grad_norm": 8.605547654824365, "learning_rate": 1.098766956342369e-06, "loss": 0.8454, "step": 9870 }, { "epoch": 1.397465845543994, "grad_norm": 8.62637273949051, "learning_rate": 1.0982922927184077e-06, "loss": 0.8818, "step": 9871 }, { "epoch": 1.3976074184186311, "grad_norm": 9.220211003799742, "learning_rate": 1.0978177027811767e-06, "loss": 0.8384, "step": 9872 }, { "epoch": 1.3977489912932681, "grad_norm": 8.62620160122295, "learning_rate": 1.0973431865556225e-06, "loss": 0.8127, "step": 9873 }, { "epoch": 1.3978905641679054, "grad_norm": 9.251818091623305, "learning_rate": 1.096868744066692e-06, "loss": 0.8075, "step": 9874 }, { "epoch": 1.3980321370425426, "grad_norm": 8.924199805197436, "learning_rate": 1.0963943753393252e-06, "loss": 0.8333, "step": 9875 }, { "epoch": 1.3981737099171798, "grad_norm": 8.858580969156932, "learning_rate": 1.095920080398459e-06, "loss": 0.8142, "step": 9876 }, { "epoch": 1.398315282791817, "grad_norm": 9.109591080734806, "learning_rate": 1.0954458592690278e-06, "loss": 0.7505, "step": 9877 }, { "epoch": 1.3984568556664543, "grad_norm": 9.098962712118531, "learning_rate": 1.0949717119759597e-06, "loss": 0.9449, "step": 9878 }, { "epoch": 1.3985984285410915, "grad_norm": 8.152905710297164, "learning_rate": 1.0944976385441822e-06, "loss": 0.8312, "step": 9879 }, { "epoch": 1.3987400014157287, "grad_norm": 8.647631599900498, "learning_rate": 1.0940236389986148e-06, "loss": 0.8303, "step": 9880 }, { "epoch": 1.398881574290366, "grad_norm": 8.280536894414857, "learning_rate": 1.0935497133641765e-06, "loss": 0.8293, "step": 9881 }, { "epoch": 1.3990231471650032, "grad_norm": 7.55832424422632, "learning_rate": 1.0930758616657816e-06, "loss": 0.7787, "step": 9882 }, { "epoch": 1.3991647200396404, "grad_norm": 9.480142418801615, "learning_rate": 1.0926020839283392e-06, "loss": 0.7847, "step": 9883 }, { "epoch": 1.3993062929142777, "grad_norm": 10.403558973093805, "learning_rate": 1.0921283801767562e-06, "loss": 0.9338, "step": 9884 }, { "epoch": 1.399447865788915, "grad_norm": 8.331643454553765, "learning_rate": 1.091654750435934e-06, "loss": 0.7137, "step": 9885 }, { "epoch": 1.3995894386635521, "grad_norm": 11.139179922494435, "learning_rate": 1.0911811947307732e-06, "loss": 1.0044, "step": 9886 }, { "epoch": 1.3997310115381894, "grad_norm": 9.801492725271732, "learning_rate": 1.0907077130861646e-06, "loss": 0.8965, "step": 9887 }, { "epoch": 1.3998725844128266, "grad_norm": 9.032082067260381, "learning_rate": 1.0902343055270006e-06, "loss": 0.7977, "step": 9888 }, { "epoch": 1.4000141572874636, "grad_norm": 9.654822975562686, "learning_rate": 1.0897609720781693e-06, "loss": 0.9001, "step": 9889 }, { "epoch": 1.4001557301621008, "grad_norm": 8.63233304310543, "learning_rate": 1.089287712764551e-06, "loss": 0.8907, "step": 9890 }, { "epoch": 1.400297303036738, "grad_norm": 9.713305552104039, "learning_rate": 1.0888145276110268e-06, "loss": 0.8362, "step": 9891 }, { "epoch": 1.4004388759113753, "grad_norm": 7.960119503025153, "learning_rate": 1.0883414166424697e-06, "loss": 0.8676, "step": 9892 }, { "epoch": 1.4005804487860125, "grad_norm": 8.209047341315138, "learning_rate": 1.0878683798837524e-06, "loss": 0.8522, "step": 9893 }, { "epoch": 1.4007220216606497, "grad_norm": 9.550206079306696, "learning_rate": 1.087395417359741e-06, "loss": 0.9205, "step": 9894 }, { "epoch": 1.400863594535287, "grad_norm": 9.839678209827252, "learning_rate": 1.0869225290952997e-06, "loss": 0.9336, "step": 9895 }, { "epoch": 1.4010051674099242, "grad_norm": 7.834678480298246, "learning_rate": 1.0864497151152879e-06, "loss": 0.9238, "step": 9896 }, { "epoch": 1.4011467402845614, "grad_norm": 8.169556969124907, "learning_rate": 1.0859769754445592e-06, "loss": 0.8193, "step": 9897 }, { "epoch": 1.4012883131591987, "grad_norm": 9.433106467099208, "learning_rate": 1.0855043101079677e-06, "loss": 0.7997, "step": 9898 }, { "epoch": 1.401429886033836, "grad_norm": 9.511847085889867, "learning_rate": 1.085031719130359e-06, "loss": 0.8171, "step": 9899 }, { "epoch": 1.4015714589084731, "grad_norm": 8.562941936587857, "learning_rate": 1.0845592025365786e-06, "loss": 0.8869, "step": 9900 }, { "epoch": 1.4017130317831104, "grad_norm": 9.340473123530023, "learning_rate": 1.0840867603514648e-06, "loss": 0.887, "step": 9901 }, { "epoch": 1.4018546046577476, "grad_norm": 9.332329241919718, "learning_rate": 1.083614392599855e-06, "loss": 0.8506, "step": 9902 }, { "epoch": 1.4019961775323848, "grad_norm": 9.198495940950542, "learning_rate": 1.0831420993065798e-06, "loss": 0.8502, "step": 9903 }, { "epoch": 1.402137750407022, "grad_norm": 9.209947631554511, "learning_rate": 1.0826698804964679e-06, "loss": 0.8125, "step": 9904 }, { "epoch": 1.4022793232816593, "grad_norm": 8.740985259295583, "learning_rate": 1.0821977361943441e-06, "loss": 0.8488, "step": 9905 }, { "epoch": 1.4024208961562965, "grad_norm": 8.47082223911813, "learning_rate": 1.0817256664250275e-06, "loss": 0.781, "step": 9906 }, { "epoch": 1.4025624690309337, "grad_norm": 8.818652014454974, "learning_rate": 1.081253671213337e-06, "loss": 0.8815, "step": 9907 }, { "epoch": 1.402704041905571, "grad_norm": 7.087879053707144, "learning_rate": 1.0807817505840815e-06, "loss": 0.7696, "step": 9908 }, { "epoch": 1.4028456147802082, "grad_norm": 8.719073327198172, "learning_rate": 1.0803099045620716e-06, "loss": 0.8029, "step": 9909 }, { "epoch": 1.4029871876548454, "grad_norm": 7.766569329608236, "learning_rate": 1.079838133172111e-06, "loss": 0.7612, "step": 9910 }, { "epoch": 1.4031287605294827, "grad_norm": 9.810383367951115, "learning_rate": 1.0793664364390004e-06, "loss": 0.784, "step": 9911 }, { "epoch": 1.4032703334041199, "grad_norm": 7.6448091934972755, "learning_rate": 1.0788948143875383e-06, "loss": 0.8947, "step": 9912 }, { "epoch": 1.4034119062787571, "grad_norm": 8.900883714328375, "learning_rate": 1.0784232670425148e-06, "loss": 0.8358, "step": 9913 }, { "epoch": 1.4035534791533943, "grad_norm": 8.696952233363943, "learning_rate": 1.0779517944287216e-06, "loss": 0.9328, "step": 9914 }, { "epoch": 1.4036950520280314, "grad_norm": 8.178120839253097, "learning_rate": 1.077480396570941e-06, "loss": 0.8366, "step": 9915 }, { "epoch": 1.4038366249026686, "grad_norm": 10.539966270204605, "learning_rate": 1.0770090734939564e-06, "loss": 0.8043, "step": 9916 }, { "epoch": 1.4039781977773058, "grad_norm": 8.275567404664203, "learning_rate": 1.0765378252225436e-06, "loss": 0.8805, "step": 9917 }, { "epoch": 1.404119770651943, "grad_norm": 8.870638353155902, "learning_rate": 1.076066651781475e-06, "loss": 0.8022, "step": 9918 }, { "epoch": 1.4042613435265803, "grad_norm": 9.778568375539319, "learning_rate": 1.075595553195522e-06, "loss": 0.9031, "step": 9919 }, { "epoch": 1.4044029164012175, "grad_norm": 8.49986446496952, "learning_rate": 1.0751245294894474e-06, "loss": 0.8218, "step": 9920 }, { "epoch": 1.4045444892758547, "grad_norm": 7.804611984666577, "learning_rate": 1.074653580688015e-06, "loss": 0.8081, "step": 9921 }, { "epoch": 1.404686062150492, "grad_norm": 9.337168949902505, "learning_rate": 1.0741827068159803e-06, "loss": 0.8499, "step": 9922 }, { "epoch": 1.4048276350251292, "grad_norm": 8.70476019409042, "learning_rate": 1.0737119078980981e-06, "loss": 0.9963, "step": 9923 }, { "epoch": 1.4049692078997664, "grad_norm": 10.20255071124986, "learning_rate": 1.0732411839591167e-06, "loss": 0.8538, "step": 9924 }, { "epoch": 1.4051107807744037, "grad_norm": 9.985634212457414, "learning_rate": 1.0727705350237833e-06, "loss": 0.8579, "step": 9925 }, { "epoch": 1.4052523536490409, "grad_norm": 8.819503274022198, "learning_rate": 1.0722999611168377e-06, "loss": 0.7624, "step": 9926 }, { "epoch": 1.4053939265236781, "grad_norm": 7.834648049050464, "learning_rate": 1.0718294622630188e-06, "loss": 0.8153, "step": 9927 }, { "epoch": 1.4055354993983153, "grad_norm": 8.46187117929483, "learning_rate": 1.071359038487062e-06, "loss": 0.8293, "step": 9928 }, { "epoch": 1.4056770722729526, "grad_norm": 12.729095075856543, "learning_rate": 1.0708886898136932e-06, "loss": 0.9914, "step": 9929 }, { "epoch": 1.4058186451475896, "grad_norm": 8.914074054412977, "learning_rate": 1.0704184162676417e-06, "loss": 0.8136, "step": 9930 }, { "epoch": 1.4059602180222268, "grad_norm": 9.069014108862014, "learning_rate": 1.069948217873627e-06, "loss": 0.9195, "step": 9931 }, { "epoch": 1.406101790896864, "grad_norm": 10.699894015063595, "learning_rate": 1.069478094656369e-06, "loss": 0.9055, "step": 9932 }, { "epoch": 1.4062433637715013, "grad_norm": 9.201351182648436, "learning_rate": 1.0690080466405803e-06, "loss": 0.878, "step": 9933 }, { "epoch": 1.4063849366461385, "grad_norm": 8.68913111022521, "learning_rate": 1.0685380738509712e-06, "loss": 0.8163, "step": 9934 }, { "epoch": 1.4065265095207757, "grad_norm": 8.176018738338849, "learning_rate": 1.0680681763122493e-06, "loss": 0.8514, "step": 9935 }, { "epoch": 1.406668082395413, "grad_norm": 9.0937093098048, "learning_rate": 1.067598354049115e-06, "loss": 0.832, "step": 9936 }, { "epoch": 1.4068096552700502, "grad_norm": 9.517799166643647, "learning_rate": 1.0671286070862678e-06, "loss": 0.8112, "step": 9937 }, { "epoch": 1.4069512281446874, "grad_norm": 7.941875063018907, "learning_rate": 1.0666589354484005e-06, "loss": 0.7704, "step": 9938 }, { "epoch": 1.4070928010193247, "grad_norm": 10.361002844505336, "learning_rate": 1.066189339160205e-06, "loss": 0.9222, "step": 9939 }, { "epoch": 1.4072343738939619, "grad_norm": 9.932726595310449, "learning_rate": 1.065719818246367e-06, "loss": 0.9069, "step": 9940 }, { "epoch": 1.4073759467685991, "grad_norm": 8.459532503959476, "learning_rate": 1.065250372731568e-06, "loss": 0.7735, "step": 9941 }, { "epoch": 1.4075175196432363, "grad_norm": 7.901205873805207, "learning_rate": 1.0647810026404878e-06, "loss": 0.8202, "step": 9942 }, { "epoch": 1.4076590925178736, "grad_norm": 9.674274695363351, "learning_rate": 1.064311707997799e-06, "loss": 0.8935, "step": 9943 }, { "epoch": 1.4078006653925108, "grad_norm": 10.467041605006521, "learning_rate": 1.0638424888281744e-06, "loss": 0.9387, "step": 9944 }, { "epoch": 1.407942238267148, "grad_norm": 8.285833508239804, "learning_rate": 1.0633733451562787e-06, "loss": 0.8715, "step": 9945 }, { "epoch": 1.4080838111417853, "grad_norm": 9.836399009973032, "learning_rate": 1.0629042770067754e-06, "loss": 0.8861, "step": 9946 }, { "epoch": 1.4082253840164225, "grad_norm": 9.732939833259607, "learning_rate": 1.0624352844043224e-06, "loss": 0.8863, "step": 9947 }, { "epoch": 1.4083669568910597, "grad_norm": 9.6155698083503, "learning_rate": 1.061966367373575e-06, "loss": 0.9301, "step": 9948 }, { "epoch": 1.408508529765697, "grad_norm": 8.36161851100223, "learning_rate": 1.0614975259391835e-06, "loss": 0.8521, "step": 9949 }, { "epoch": 1.4086501026403342, "grad_norm": 9.8966383650073, "learning_rate": 1.0610287601257937e-06, "loss": 0.9797, "step": 9950 }, { "epoch": 1.4087916755149714, "grad_norm": 8.979264851067986, "learning_rate": 1.06056006995805e-06, "loss": 0.9052, "step": 9951 }, { "epoch": 1.4089332483896087, "grad_norm": 9.747535981089618, "learning_rate": 1.060091455460589e-06, "loss": 0.9163, "step": 9952 }, { "epoch": 1.4090748212642459, "grad_norm": 9.589575241276616, "learning_rate": 1.0596229166580477e-06, "loss": 0.8149, "step": 9953 }, { "epoch": 1.409216394138883, "grad_norm": 11.625933209770597, "learning_rate": 1.0591544535750545e-06, "loss": 0.8741, "step": 9954 }, { "epoch": 1.4093579670135203, "grad_norm": 7.120661167349916, "learning_rate": 1.0586860662362375e-06, "loss": 0.8009, "step": 9955 }, { "epoch": 1.4094995398881573, "grad_norm": 8.1544753442296, "learning_rate": 1.0582177546662203e-06, "loss": 0.8037, "step": 9956 }, { "epoch": 1.4096411127627946, "grad_norm": 9.926259335200166, "learning_rate": 1.0577495188896198e-06, "loss": 0.9137, "step": 9957 }, { "epoch": 1.4097826856374318, "grad_norm": 7.734160942429272, "learning_rate": 1.0572813589310524e-06, "loss": 0.8815, "step": 9958 }, { "epoch": 1.409924258512069, "grad_norm": 10.485716777963072, "learning_rate": 1.0568132748151274e-06, "loss": 0.8739, "step": 9959 }, { "epoch": 1.4100658313867063, "grad_norm": 10.102592634056245, "learning_rate": 1.0563452665664542e-06, "loss": 0.8105, "step": 9960 }, { "epoch": 1.4102074042613435, "grad_norm": 8.736384670689462, "learning_rate": 1.055877334209632e-06, "loss": 0.8821, "step": 9961 }, { "epoch": 1.4103489771359807, "grad_norm": 9.767655648250145, "learning_rate": 1.055409477769262e-06, "loss": 0.9457, "step": 9962 }, { "epoch": 1.410490550010618, "grad_norm": 10.020088617538285, "learning_rate": 1.0549416972699392e-06, "loss": 0.8565, "step": 9963 }, { "epoch": 1.4106321228852552, "grad_norm": 9.578635969114213, "learning_rate": 1.054473992736253e-06, "loss": 0.9073, "step": 9964 }, { "epoch": 1.4107736957598924, "grad_norm": 8.78122306629544, "learning_rate": 1.0540063641927923e-06, "loss": 0.8208, "step": 9965 }, { "epoch": 1.4109152686345297, "grad_norm": 10.009056948998781, "learning_rate": 1.0535388116641376e-06, "loss": 0.8654, "step": 9966 }, { "epoch": 1.4110568415091669, "grad_norm": 9.460241139344745, "learning_rate": 1.0530713351748704e-06, "loss": 0.8524, "step": 9967 }, { "epoch": 1.4111984143838041, "grad_norm": 8.203792755559581, "learning_rate": 1.052603934749563e-06, "loss": 0.777, "step": 9968 }, { "epoch": 1.4113399872584413, "grad_norm": 9.507689325542518, "learning_rate": 1.0521366104127885e-06, "loss": 0.8394, "step": 9969 }, { "epoch": 1.4114815601330786, "grad_norm": 9.352156413888924, "learning_rate": 1.0516693621891127e-06, "loss": 0.8419, "step": 9970 }, { "epoch": 1.4116231330077158, "grad_norm": 7.527214773723137, "learning_rate": 1.0512021901030978e-06, "loss": 0.6645, "step": 9971 }, { "epoch": 1.4117647058823528, "grad_norm": 7.869951597886494, "learning_rate": 1.0507350941793044e-06, "loss": 0.8569, "step": 9972 }, { "epoch": 1.41190627875699, "grad_norm": 9.573354108499212, "learning_rate": 1.0502680744422856e-06, "loss": 0.8073, "step": 9973 }, { "epoch": 1.4120478516316273, "grad_norm": 8.197083433688329, "learning_rate": 1.049801130916594e-06, "loss": 0.6623, "step": 9974 }, { "epoch": 1.4121894245062645, "grad_norm": 7.257332612465079, "learning_rate": 1.0493342636267747e-06, "loss": 0.8704, "step": 9975 }, { "epoch": 1.4123309973809017, "grad_norm": 9.234495623683413, "learning_rate": 1.0488674725973727e-06, "loss": 0.7398, "step": 9976 }, { "epoch": 1.412472570255539, "grad_norm": 8.952985246509611, "learning_rate": 1.0484007578529246e-06, "loss": 0.8903, "step": 9977 }, { "epoch": 1.4126141431301762, "grad_norm": 7.972027990771516, "learning_rate": 1.047934119417966e-06, "loss": 0.8269, "step": 9978 }, { "epoch": 1.4127557160048134, "grad_norm": 8.466600659130641, "learning_rate": 1.0474675573170293e-06, "loss": 0.9543, "step": 9979 }, { "epoch": 1.4128972888794507, "grad_norm": 8.278795331957271, "learning_rate": 1.047001071574639e-06, "loss": 0.8608, "step": 9980 }, { "epoch": 1.4130388617540879, "grad_norm": 7.618099404549514, "learning_rate": 1.0465346622153209e-06, "loss": 0.8302, "step": 9981 }, { "epoch": 1.4131804346287251, "grad_norm": 8.143163010820844, "learning_rate": 1.04606832926359e-06, "loss": 0.8864, "step": 9982 }, { "epoch": 1.4133220075033623, "grad_norm": 8.759141343069599, "learning_rate": 1.0456020727439635e-06, "loss": 0.8847, "step": 9983 }, { "epoch": 1.4134635803779996, "grad_norm": 9.3040298754802, "learning_rate": 1.0451358926809513e-06, "loss": 0.8128, "step": 9984 }, { "epoch": 1.4136051532526368, "grad_norm": 8.628454428811578, "learning_rate": 1.04466978909906e-06, "loss": 0.7237, "step": 9985 }, { "epoch": 1.413746726127274, "grad_norm": 7.968062307566116, "learning_rate": 1.0442037620227938e-06, "loss": 0.8156, "step": 9986 }, { "epoch": 1.4138882990019113, "grad_norm": 8.452921020528754, "learning_rate": 1.0437378114766495e-06, "loss": 0.8164, "step": 9987 }, { "epoch": 1.4140298718765485, "grad_norm": 9.858456818255195, "learning_rate": 1.0432719374851233e-06, "loss": 0.8262, "step": 9988 }, { "epoch": 1.4141714447511857, "grad_norm": 7.497789184244059, "learning_rate": 1.0428061400727045e-06, "loss": 0.7783, "step": 9989 }, { "epoch": 1.414313017625823, "grad_norm": 9.755195211689209, "learning_rate": 1.0423404192638812e-06, "loss": 0.7844, "step": 9990 }, { "epoch": 1.4144545905004602, "grad_norm": 9.896866936495176, "learning_rate": 1.041874775083134e-06, "loss": 0.7959, "step": 9991 }, { "epoch": 1.4145961633750974, "grad_norm": 8.375195401317745, "learning_rate": 1.041409207554944e-06, "loss": 0.7404, "step": 9992 }, { "epoch": 1.4147377362497346, "grad_norm": 7.918712820012485, "learning_rate": 1.0409437167037843e-06, "loss": 0.8226, "step": 9993 }, { "epoch": 1.4148793091243719, "grad_norm": 8.618581401456689, "learning_rate": 1.0404783025541244e-06, "loss": 0.7749, "step": 9994 }, { "epoch": 1.415020881999009, "grad_norm": 9.195453963573055, "learning_rate": 1.0400129651304328e-06, "loss": 0.9549, "step": 9995 }, { "epoch": 1.4151624548736463, "grad_norm": 8.610724092489125, "learning_rate": 1.03954770445717e-06, "loss": 0.7244, "step": 9996 }, { "epoch": 1.4153040277482836, "grad_norm": 10.644776014961113, "learning_rate": 1.0390825205587966e-06, "loss": 0.8435, "step": 9997 }, { "epoch": 1.4154456006229206, "grad_norm": 8.290841935386212, "learning_rate": 1.0386174134597649e-06, "loss": 0.8096, "step": 9998 }, { "epoch": 1.4155871734975578, "grad_norm": 9.09155960183138, "learning_rate": 1.0381523831845266e-06, "loss": 0.8952, "step": 9999 }, { "epoch": 1.415728746372195, "grad_norm": 9.357487305070244, "learning_rate": 1.037687429757527e-06, "loss": 0.829, "step": 10000 }, { "epoch": 1.4159411056841509, "grad_norm": 8.336110936427945, "learning_rate": 1.0372225532032087e-06, "loss": 0.7403, "step": 10001 }, { "epoch": 1.416082678558788, "grad_norm": 7.010041527727135, "learning_rate": 1.0367577535460122e-06, "loss": 0.6943, "step": 10002 }, { "epoch": 1.4162242514334253, "grad_norm": 8.885554954701892, "learning_rate": 1.0362930308103675e-06, "loss": 0.6757, "step": 10003 }, { "epoch": 1.4163658243080626, "grad_norm": 9.95202803143543, "learning_rate": 1.0358283850207077e-06, "loss": 0.8437, "step": 10004 }, { "epoch": 1.4165073971826998, "grad_norm": 8.435420479415995, "learning_rate": 1.035363816201457e-06, "loss": 0.6966, "step": 10005 }, { "epoch": 1.416648970057337, "grad_norm": 7.662360543853565, "learning_rate": 1.0348993243770395e-06, "loss": 0.7292, "step": 10006 }, { "epoch": 1.4167905429319743, "grad_norm": 8.43810580869142, "learning_rate": 1.0344349095718712e-06, "loss": 0.8496, "step": 10007 }, { "epoch": 1.4169321158066115, "grad_norm": 8.58622406610606, "learning_rate": 1.0339705718103666e-06, "loss": 0.7578, "step": 10008 }, { "epoch": 1.4170736886812487, "grad_norm": 8.12908410350246, "learning_rate": 1.0335063111169372e-06, "loss": 0.7728, "step": 10009 }, { "epoch": 1.417215261555886, "grad_norm": 9.327597445038014, "learning_rate": 1.0330421275159863e-06, "loss": 0.6548, "step": 10010 }, { "epoch": 1.4173568344305232, "grad_norm": 9.23585253156758, "learning_rate": 1.032578021031918e-06, "loss": 0.7968, "step": 10011 }, { "epoch": 1.4174984073051604, "grad_norm": 7.574135900381718, "learning_rate": 1.032113991689128e-06, "loss": 0.6546, "step": 10012 }, { "epoch": 1.4176399801797976, "grad_norm": 9.313650584987515, "learning_rate": 1.031650039512012e-06, "loss": 0.8909, "step": 10013 }, { "epoch": 1.4177815530544349, "grad_norm": 8.160999070135587, "learning_rate": 1.0311861645249588e-06, "loss": 0.7451, "step": 10014 }, { "epoch": 1.417923125929072, "grad_norm": 8.643792949578083, "learning_rate": 1.0307223667523524e-06, "loss": 0.7123, "step": 10015 }, { "epoch": 1.4180646988037093, "grad_norm": 8.121813809618235, "learning_rate": 1.0302586462185769e-06, "loss": 0.7167, "step": 10016 }, { "epoch": 1.4182062716783466, "grad_norm": 8.601454288034125, "learning_rate": 1.0297950029480073e-06, "loss": 0.7731, "step": 10017 }, { "epoch": 1.4183478445529836, "grad_norm": 10.073180033815158, "learning_rate": 1.0293314369650193e-06, "loss": 0.8383, "step": 10018 }, { "epoch": 1.4184894174276208, "grad_norm": 7.745904886518187, "learning_rate": 1.0288679482939801e-06, "loss": 0.7153, "step": 10019 }, { "epoch": 1.418630990302258, "grad_norm": 10.556460849243988, "learning_rate": 1.0284045369592567e-06, "loss": 0.6577, "step": 10020 }, { "epoch": 1.4187725631768953, "grad_norm": 7.802025204396553, "learning_rate": 1.0279412029852087e-06, "loss": 0.8063, "step": 10021 }, { "epoch": 1.4189141360515325, "grad_norm": 10.207443842120272, "learning_rate": 1.0274779463961947e-06, "loss": 0.8187, "step": 10022 }, { "epoch": 1.4190557089261697, "grad_norm": 8.481149302638903, "learning_rate": 1.0270147672165677e-06, "loss": 0.8272, "step": 10023 }, { "epoch": 1.419197281800807, "grad_norm": 11.671127846944637, "learning_rate": 1.0265516654706748e-06, "loss": 0.8077, "step": 10024 }, { "epoch": 1.4193388546754442, "grad_norm": 8.544702006099854, "learning_rate": 1.026088641182863e-06, "loss": 0.7447, "step": 10025 }, { "epoch": 1.4194804275500814, "grad_norm": 8.003452033079276, "learning_rate": 1.0256256943774718e-06, "loss": 0.6872, "step": 10026 }, { "epoch": 1.4196220004247186, "grad_norm": 8.354643721686077, "learning_rate": 1.025162825078839e-06, "loss": 0.8287, "step": 10027 }, { "epoch": 1.4197635732993559, "grad_norm": 8.438679026419804, "learning_rate": 1.0247000333112962e-06, "loss": 0.7338, "step": 10028 }, { "epoch": 1.419905146173993, "grad_norm": 9.052049385829493, "learning_rate": 1.0242373190991734e-06, "loss": 0.7386, "step": 10029 }, { "epoch": 1.4200467190486303, "grad_norm": 7.256435826277175, "learning_rate": 1.0237746824667932e-06, "loss": 0.7254, "step": 10030 }, { "epoch": 1.4201882919232676, "grad_norm": 7.86324219710558, "learning_rate": 1.0233121234384777e-06, "loss": 0.7405, "step": 10031 }, { "epoch": 1.4203298647979048, "grad_norm": 8.84203092763721, "learning_rate": 1.0228496420385434e-06, "loss": 0.7672, "step": 10032 }, { "epoch": 1.4204714376725418, "grad_norm": 8.238168729228024, "learning_rate": 1.022387238291301e-06, "loss": 0.7539, "step": 10033 }, { "epoch": 1.420613010547179, "grad_norm": 8.102845019627944, "learning_rate": 1.021924912221062e-06, "loss": 0.7028, "step": 10034 }, { "epoch": 1.4207545834218163, "grad_norm": 8.358481443188877, "learning_rate": 1.021462663852126e-06, "loss": 0.7283, "step": 10035 }, { "epoch": 1.4208961562964535, "grad_norm": 9.143930525441538, "learning_rate": 1.0210004932087956e-06, "loss": 0.729, "step": 10036 }, { "epoch": 1.4210377291710907, "grad_norm": 8.811724256636722, "learning_rate": 1.0205384003153673e-06, "loss": 0.7282, "step": 10037 }, { "epoch": 1.421179302045728, "grad_norm": 7.634193756163163, "learning_rate": 1.0200763851961313e-06, "loss": 0.7612, "step": 10038 }, { "epoch": 1.4213208749203652, "grad_norm": 8.289350585533393, "learning_rate": 1.019614447875377e-06, "loss": 0.7321, "step": 10039 }, { "epoch": 1.4214624477950024, "grad_norm": 7.4815622670869235, "learning_rate": 1.0191525883773867e-06, "loss": 0.815, "step": 10040 }, { "epoch": 1.4216040206696396, "grad_norm": 8.565958702826856, "learning_rate": 1.0186908067264415e-06, "loss": 0.7204, "step": 10041 }, { "epoch": 1.4217455935442769, "grad_norm": 8.139035822121041, "learning_rate": 1.018229102946815e-06, "loss": 0.664, "step": 10042 }, { "epoch": 1.421887166418914, "grad_norm": 8.089725863759615, "learning_rate": 1.0177674770627807e-06, "loss": 0.7383, "step": 10043 }, { "epoch": 1.4220287392935513, "grad_norm": 8.798687438178812, "learning_rate": 1.0173059290986048e-06, "loss": 0.6614, "step": 10044 }, { "epoch": 1.4221703121681886, "grad_norm": 8.45927907482785, "learning_rate": 1.01684445907855e-06, "loss": 0.6846, "step": 10045 }, { "epoch": 1.4223118850428258, "grad_norm": 8.39267153085385, "learning_rate": 1.0163830670268768e-06, "loss": 0.8033, "step": 10046 }, { "epoch": 1.422453457917463, "grad_norm": 10.258971219304382, "learning_rate": 1.015921752967839e-06, "loss": 0.7502, "step": 10047 }, { "epoch": 1.4225950307921003, "grad_norm": 9.934428958080305, "learning_rate": 1.0154605169256884e-06, "loss": 0.7557, "step": 10048 }, { "epoch": 1.4227366036667375, "grad_norm": 10.4689256340211, "learning_rate": 1.014999358924671e-06, "loss": 0.7495, "step": 10049 }, { "epoch": 1.4228781765413747, "grad_norm": 8.634267057889344, "learning_rate": 1.014538278989031e-06, "loss": 0.7303, "step": 10050 }, { "epoch": 1.423019749416012, "grad_norm": 9.269575695215824, "learning_rate": 1.014077277143005e-06, "loss": 0.7056, "step": 10051 }, { "epoch": 1.4231613222906492, "grad_norm": 9.07850023764616, "learning_rate": 1.0136163534108284e-06, "loss": 0.7246, "step": 10052 }, { "epoch": 1.4233028951652864, "grad_norm": 10.627488865780343, "learning_rate": 1.0131555078167328e-06, "loss": 0.7668, "step": 10053 }, { "epoch": 1.4234444680399236, "grad_norm": 9.189561262315392, "learning_rate": 1.012694740384943e-06, "loss": 0.7285, "step": 10054 }, { "epoch": 1.4235860409145609, "grad_norm": 7.403333129791206, "learning_rate": 1.0122340511396833e-06, "loss": 0.7305, "step": 10055 }, { "epoch": 1.423727613789198, "grad_norm": 9.892840916117148, "learning_rate": 1.0117734401051682e-06, "loss": 0.7461, "step": 10056 }, { "epoch": 1.4238691866638353, "grad_norm": 11.455172533977963, "learning_rate": 1.0113129073056149e-06, "loss": 0.7253, "step": 10057 }, { "epoch": 1.4240107595384726, "grad_norm": 10.016380245876276, "learning_rate": 1.0108524527652308e-06, "loss": 0.7348, "step": 10058 }, { "epoch": 1.4241523324131096, "grad_norm": 8.780172152906113, "learning_rate": 1.010392076508223e-06, "loss": 0.776, "step": 10059 }, { "epoch": 1.4242939052877468, "grad_norm": 10.110932927314103, "learning_rate": 1.0099317785587941e-06, "loss": 0.7626, "step": 10060 }, { "epoch": 1.424435478162384, "grad_norm": 8.515898701880126, "learning_rate": 1.0094715589411398e-06, "loss": 0.7257, "step": 10061 }, { "epoch": 1.4245770510370213, "grad_norm": 7.961902264207652, "learning_rate": 1.009011417679455e-06, "loss": 0.7591, "step": 10062 }, { "epoch": 1.4247186239116585, "grad_norm": 7.777611797317696, "learning_rate": 1.0085513547979272e-06, "loss": 0.6997, "step": 10063 }, { "epoch": 1.4248601967862957, "grad_norm": 7.7401831194681945, "learning_rate": 1.0080913703207434e-06, "loss": 0.836, "step": 10064 }, { "epoch": 1.425001769660933, "grad_norm": 9.55249257738094, "learning_rate": 1.0076314642720834e-06, "loss": 0.7765, "step": 10065 }, { "epoch": 1.4251433425355702, "grad_norm": 10.36345499053382, "learning_rate": 1.007171636676125e-06, "loss": 0.7741, "step": 10066 }, { "epoch": 1.4252849154102074, "grad_norm": 9.82523991337966, "learning_rate": 1.006711887557041e-06, "loss": 0.8645, "step": 10067 }, { "epoch": 1.4254264882848446, "grad_norm": 8.337248480118502, "learning_rate": 1.0062522169389986e-06, "loss": 0.7137, "step": 10068 }, { "epoch": 1.4255680611594819, "grad_norm": 8.070645734437907, "learning_rate": 1.0057926248461638e-06, "loss": 0.7321, "step": 10069 }, { "epoch": 1.425709634034119, "grad_norm": 8.415914319345605, "learning_rate": 1.0053331113026962e-06, "loss": 0.6877, "step": 10070 }, { "epoch": 1.4258512069087563, "grad_norm": 8.728028969077647, "learning_rate": 1.0048736763327532e-06, "loss": 0.7273, "step": 10071 }, { "epoch": 1.4259927797833936, "grad_norm": 8.854381842897007, "learning_rate": 1.0044143199604856e-06, "loss": 0.8135, "step": 10072 }, { "epoch": 1.4261343526580308, "grad_norm": 8.994109451289624, "learning_rate": 1.0039550422100424e-06, "loss": 0.7681, "step": 10073 }, { "epoch": 1.426275925532668, "grad_norm": 9.109931104380674, "learning_rate": 1.0034958431055666e-06, "loss": 0.8775, "step": 10074 }, { "epoch": 1.426417498407305, "grad_norm": 9.231880793524137, "learning_rate": 1.0030367226711984e-06, "loss": 0.8369, "step": 10075 }, { "epoch": 1.4265590712819423, "grad_norm": 11.037384661615134, "learning_rate": 1.0025776809310752e-06, "loss": 0.7887, "step": 10076 }, { "epoch": 1.4267006441565795, "grad_norm": 7.3498992497811555, "learning_rate": 1.0021187179093254e-06, "loss": 0.7034, "step": 10077 }, { "epoch": 1.4268422170312167, "grad_norm": 8.594302627828009, "learning_rate": 1.0016598336300781e-06, "loss": 0.7055, "step": 10078 }, { "epoch": 1.426983789905854, "grad_norm": 8.532696388235586, "learning_rate": 1.0012010281174555e-06, "loss": 0.6774, "step": 10079 }, { "epoch": 1.4271253627804912, "grad_norm": 9.222798121393977, "learning_rate": 1.0007423013955784e-06, "loss": 0.7723, "step": 10080 }, { "epoch": 1.4272669356551284, "grad_norm": 9.864364111349959, "learning_rate": 1.0002836534885594e-06, "loss": 0.7378, "step": 10081 }, { "epoch": 1.4274085085297656, "grad_norm": 8.911749604082207, "learning_rate": 9.998250844205107e-07, "loss": 0.7493, "step": 10082 }, { "epoch": 1.4275500814044029, "grad_norm": 8.691089794098438, "learning_rate": 9.993665942155395e-07, "loss": 0.8144, "step": 10083 }, { "epoch": 1.42769165427904, "grad_norm": 7.766022639388714, "learning_rate": 9.989081828977464e-07, "loss": 0.7231, "step": 10084 }, { "epoch": 1.4278332271536773, "grad_norm": 9.053797680609295, "learning_rate": 9.984498504912321e-07, "loss": 0.8559, "step": 10085 }, { "epoch": 1.4279748000283146, "grad_norm": 7.294342974018114, "learning_rate": 9.979915970200888e-07, "loss": 0.7241, "step": 10086 }, { "epoch": 1.4281163729029518, "grad_norm": 8.23557847437672, "learning_rate": 9.97533422508408e-07, "loss": 0.8239, "step": 10087 }, { "epoch": 1.428257945777589, "grad_norm": 10.104930437198714, "learning_rate": 9.970753269802746e-07, "loss": 0.896, "step": 10088 }, { "epoch": 1.4283995186522263, "grad_norm": 8.39594712349869, "learning_rate": 9.966173104597701e-07, "loss": 0.6431, "step": 10089 }, { "epoch": 1.4285410915268635, "grad_norm": 9.938857651584362, "learning_rate": 9.961593729709734e-07, "loss": 0.8019, "step": 10090 }, { "epoch": 1.4286826644015007, "grad_norm": 7.865724944736499, "learning_rate": 9.957015145379564e-07, "loss": 0.6856, "step": 10091 }, { "epoch": 1.428824237276138, "grad_norm": 8.679808360854077, "learning_rate": 9.9524373518479e-07, "loss": 0.7306, "step": 10092 }, { "epoch": 1.4289658101507752, "grad_norm": 10.002922012667295, "learning_rate": 9.947860349355372e-07, "loss": 0.8024, "step": 10093 }, { "epoch": 1.4291073830254124, "grad_norm": 10.21638036661763, "learning_rate": 9.943284138142615e-07, "loss": 0.77, "step": 10094 }, { "epoch": 1.4292489559000496, "grad_norm": 9.504573724891324, "learning_rate": 9.938708718450175e-07, "loss": 0.751, "step": 10095 }, { "epoch": 1.4293905287746869, "grad_norm": 7.991147149801172, "learning_rate": 9.934134090518593e-07, "loss": 0.7482, "step": 10096 }, { "epoch": 1.429532101649324, "grad_norm": 9.350775587583923, "learning_rate": 9.929560254588353e-07, "loss": 0.694, "step": 10097 }, { "epoch": 1.4296736745239613, "grad_norm": 8.219709333120976, "learning_rate": 9.92498721089988e-07, "loss": 0.7375, "step": 10098 }, { "epoch": 1.4298152473985986, "grad_norm": 10.623784971887668, "learning_rate": 9.9204149596936e-07, "loss": 0.7713, "step": 10099 }, { "epoch": 1.4299568202732358, "grad_norm": 8.55362926887959, "learning_rate": 9.91584350120985e-07, "loss": 0.7341, "step": 10100 }, { "epoch": 1.4300983931478728, "grad_norm": 9.72577258966198, "learning_rate": 9.911272835688973e-07, "loss": 0.7287, "step": 10101 }, { "epoch": 1.43023996602251, "grad_norm": 10.331252185892842, "learning_rate": 9.906702963371222e-07, "loss": 0.7488, "step": 10102 }, { "epoch": 1.4303815388971473, "grad_norm": 9.182903359576867, "learning_rate": 9.902133884496853e-07, "loss": 0.7304, "step": 10103 }, { "epoch": 1.4305231117717845, "grad_norm": 8.825072066145196, "learning_rate": 9.897565599306037e-07, "loss": 0.737, "step": 10104 }, { "epoch": 1.4306646846464217, "grad_norm": 7.0940925414612686, "learning_rate": 9.892998108038937e-07, "loss": 0.6715, "step": 10105 }, { "epoch": 1.430806257521059, "grad_norm": 9.178210486227268, "learning_rate": 9.88843141093567e-07, "loss": 0.6683, "step": 10106 }, { "epoch": 1.4309478303956962, "grad_norm": 9.57404383573902, "learning_rate": 9.88386550823629e-07, "loss": 0.8101, "step": 10107 }, { "epoch": 1.4310894032703334, "grad_norm": 7.879113228178103, "learning_rate": 9.879300400180844e-07, "loss": 0.7118, "step": 10108 }, { "epoch": 1.4312309761449706, "grad_norm": 9.469591965711633, "learning_rate": 9.874736087009285e-07, "loss": 0.8701, "step": 10109 }, { "epoch": 1.4313725490196079, "grad_norm": 10.833127846358451, "learning_rate": 9.870172568961572e-07, "loss": 0.766, "step": 10110 }, { "epoch": 1.431514121894245, "grad_norm": 8.550357869115269, "learning_rate": 9.865609846277615e-07, "loss": 0.7384, "step": 10111 }, { "epoch": 1.4316556947688823, "grad_norm": 7.726338598326269, "learning_rate": 9.861047919197254e-07, "loss": 0.5823, "step": 10112 }, { "epoch": 1.4317972676435196, "grad_norm": 8.907213660859119, "learning_rate": 9.856486787960326e-07, "loss": 0.6821, "step": 10113 }, { "epoch": 1.4319388405181568, "grad_norm": 8.02959451794165, "learning_rate": 9.851926452806584e-07, "loss": 0.7394, "step": 10114 }, { "epoch": 1.432080413392794, "grad_norm": 8.378488753439006, "learning_rate": 9.847366913975787e-07, "loss": 0.8158, "step": 10115 }, { "epoch": 1.432221986267431, "grad_norm": 8.2675040692555, "learning_rate": 9.842808171707602e-07, "loss": 0.6804, "step": 10116 }, { "epoch": 1.4323635591420683, "grad_norm": 9.705120772131595, "learning_rate": 9.838250226241696e-07, "loss": 0.8248, "step": 10117 }, { "epoch": 1.4325051320167055, "grad_norm": 8.46740576854675, "learning_rate": 9.833693077817666e-07, "loss": 0.6392, "step": 10118 }, { "epoch": 1.4326467048913427, "grad_norm": 7.722581167439764, "learning_rate": 9.82913672667509e-07, "loss": 0.7094, "step": 10119 }, { "epoch": 1.43278827776598, "grad_norm": 10.118966647980871, "learning_rate": 9.824581173053483e-07, "loss": 0.7093, "step": 10120 }, { "epoch": 1.4329298506406172, "grad_norm": 5.95020324616347, "learning_rate": 9.820026417192322e-07, "loss": 0.684, "step": 10121 }, { "epoch": 1.4330714235152544, "grad_norm": 8.88112160618729, "learning_rate": 9.815472459331061e-07, "loss": 0.7013, "step": 10122 }, { "epoch": 1.4332129963898916, "grad_norm": 10.172414645223398, "learning_rate": 9.81091929970908e-07, "loss": 0.7621, "step": 10123 }, { "epoch": 1.4333545692645289, "grad_norm": 9.494811397887023, "learning_rate": 9.806366938565756e-07, "loss": 0.7712, "step": 10124 }, { "epoch": 1.433496142139166, "grad_norm": 10.01194850436237, "learning_rate": 9.801815376140385e-07, "loss": 0.6583, "step": 10125 }, { "epoch": 1.4336377150138033, "grad_norm": 9.962183209483571, "learning_rate": 9.797264612672256e-07, "loss": 0.7529, "step": 10126 }, { "epoch": 1.4337792878884406, "grad_norm": 8.557178040715748, "learning_rate": 9.792714648400584e-07, "loss": 0.7121, "step": 10127 }, { "epoch": 1.4339208607630778, "grad_norm": 8.56333885817339, "learning_rate": 9.78816548356456e-07, "loss": 0.7069, "step": 10128 }, { "epoch": 1.434062433637715, "grad_norm": 8.857848020688614, "learning_rate": 9.783617118403354e-07, "loss": 0.7281, "step": 10129 }, { "epoch": 1.4342040065123522, "grad_norm": 8.211947450673147, "learning_rate": 9.779069553156031e-07, "loss": 0.6776, "step": 10130 }, { "epoch": 1.4343455793869895, "grad_norm": 10.641433035352826, "learning_rate": 9.774522788061685e-07, "loss": 0.8165, "step": 10131 }, { "epoch": 1.4344871522616267, "grad_norm": 8.688105061269924, "learning_rate": 9.769976823359311e-07, "loss": 0.6081, "step": 10132 }, { "epoch": 1.434628725136264, "grad_norm": 8.344896102084785, "learning_rate": 9.765431659287901e-07, "loss": 0.7017, "step": 10133 }, { "epoch": 1.4347702980109012, "grad_norm": 8.732888192439006, "learning_rate": 9.760887296086397e-07, "loss": 0.7258, "step": 10134 }, { "epoch": 1.4349118708855384, "grad_norm": 10.076219766010922, "learning_rate": 9.756343733993679e-07, "loss": 0.7968, "step": 10135 }, { "epoch": 1.4350534437601756, "grad_norm": 8.357779491428555, "learning_rate": 9.75180097324861e-07, "loss": 0.8054, "step": 10136 }, { "epoch": 1.4351950166348129, "grad_norm": 8.956305932391425, "learning_rate": 9.747259014089988e-07, "loss": 0.7219, "step": 10137 }, { "epoch": 1.43533658950945, "grad_norm": 9.856546277302286, "learning_rate": 9.742717856756595e-07, "loss": 0.711, "step": 10138 }, { "epoch": 1.4354781623840873, "grad_norm": 8.18776976162823, "learning_rate": 9.738177501487137e-07, "loss": 0.7583, "step": 10139 }, { "epoch": 1.4356197352587245, "grad_norm": 8.223772842090227, "learning_rate": 9.73363794852032e-07, "loss": 0.7632, "step": 10140 }, { "epoch": 1.4357613081333618, "grad_norm": 8.97359109940041, "learning_rate": 9.729099198094771e-07, "loss": 0.7341, "step": 10141 }, { "epoch": 1.4359028810079988, "grad_norm": 10.938476867648532, "learning_rate": 9.724561250449082e-07, "loss": 0.7496, "step": 10142 }, { "epoch": 1.436044453882636, "grad_norm": 9.050974284465008, "learning_rate": 9.720024105821827e-07, "loss": 0.731, "step": 10143 }, { "epoch": 1.4361860267572732, "grad_norm": 10.008914407404337, "learning_rate": 9.715487764451504e-07, "loss": 0.7101, "step": 10144 }, { "epoch": 1.4363275996319105, "grad_norm": 9.921032582962674, "learning_rate": 9.7109522265766e-07, "loss": 0.7366, "step": 10145 }, { "epoch": 1.4364691725065477, "grad_norm": 8.12412057665585, "learning_rate": 9.70641749243553e-07, "loss": 0.7463, "step": 10146 }, { "epoch": 1.436610745381185, "grad_norm": 9.625318645800935, "learning_rate": 9.701883562266696e-07, "loss": 0.7496, "step": 10147 }, { "epoch": 1.4367523182558222, "grad_norm": 8.736224420564346, "learning_rate": 9.697350436308428e-07, "loss": 0.7576, "step": 10148 }, { "epoch": 1.4368938911304594, "grad_norm": 11.378696386023861, "learning_rate": 9.692818114799038e-07, "loss": 0.7248, "step": 10149 }, { "epoch": 1.4370354640050966, "grad_norm": 9.43102360498546, "learning_rate": 9.688286597976804e-07, "loss": 0.7734, "step": 10150 }, { "epoch": 1.4371770368797339, "grad_norm": 8.594359442168532, "learning_rate": 9.68375588607991e-07, "loss": 0.7077, "step": 10151 }, { "epoch": 1.437318609754371, "grad_norm": 8.84910610016473, "learning_rate": 9.679225979346558e-07, "loss": 0.6353, "step": 10152 }, { "epoch": 1.4374601826290083, "grad_norm": 8.696592992390352, "learning_rate": 9.674696878014862e-07, "loss": 0.77, "step": 10153 }, { "epoch": 1.4376017555036456, "grad_norm": 9.780100209477501, "learning_rate": 9.67016858232293e-07, "loss": 0.8119, "step": 10154 }, { "epoch": 1.4377433283782828, "grad_norm": 8.009756814263016, "learning_rate": 9.6656410925088e-07, "loss": 0.7184, "step": 10155 }, { "epoch": 1.43788490125292, "grad_norm": 9.266405146725548, "learning_rate": 9.661114408810485e-07, "loss": 0.7702, "step": 10156 }, { "epoch": 1.4380264741275572, "grad_norm": 7.480368798287851, "learning_rate": 9.656588531465954e-07, "loss": 0.7716, "step": 10157 }, { "epoch": 1.4381680470021942, "grad_norm": 7.2264028170702295, "learning_rate": 9.652063460713117e-07, "loss": 0.6738, "step": 10158 }, { "epoch": 1.4383096198768315, "grad_norm": 8.415864459291628, "learning_rate": 9.647539196789868e-07, "loss": 0.8109, "step": 10159 }, { "epoch": 1.4384511927514687, "grad_norm": 8.768301434507524, "learning_rate": 9.643015739934027e-07, "loss": 0.785, "step": 10160 }, { "epoch": 1.438592765626106, "grad_norm": 8.15110365058967, "learning_rate": 9.638493090383408e-07, "loss": 0.7325, "step": 10161 }, { "epoch": 1.4387343385007432, "grad_norm": 9.70005958774787, "learning_rate": 9.633971248375753e-07, "loss": 0.8104, "step": 10162 }, { "epoch": 1.4388759113753804, "grad_norm": 9.178374241168068, "learning_rate": 9.629450214148764e-07, "loss": 0.7879, "step": 10163 }, { "epoch": 1.4390174842500176, "grad_norm": 8.972915160745341, "learning_rate": 9.624929987940124e-07, "loss": 0.7758, "step": 10164 }, { "epoch": 1.4391590571246549, "grad_norm": 8.561659277413435, "learning_rate": 9.62041056998744e-07, "loss": 0.7849, "step": 10165 }, { "epoch": 1.439300629999292, "grad_norm": 8.51043419775836, "learning_rate": 9.615891960528314e-07, "loss": 0.7342, "step": 10166 }, { "epoch": 1.4394422028739293, "grad_norm": 9.302767384927263, "learning_rate": 9.611374159800272e-07, "loss": 0.741, "step": 10167 }, { "epoch": 1.4395837757485666, "grad_norm": 10.321185529169556, "learning_rate": 9.60685716804082e-07, "loss": 0.8896, "step": 10168 }, { "epoch": 1.4397253486232038, "grad_norm": 8.338257771475659, "learning_rate": 9.6023409854874e-07, "loss": 0.9251, "step": 10169 }, { "epoch": 1.439866921497841, "grad_norm": 8.293357433925989, "learning_rate": 9.597825612377448e-07, "loss": 0.7608, "step": 10170 }, { "epoch": 1.4400084943724782, "grad_norm": 8.943756261326206, "learning_rate": 9.593311048948306e-07, "loss": 0.6649, "step": 10171 }, { "epoch": 1.4401500672471155, "grad_norm": 7.780229984208548, "learning_rate": 9.588797295437324e-07, "loss": 0.8134, "step": 10172 }, { "epoch": 1.4402916401217527, "grad_norm": 8.655559856569884, "learning_rate": 9.584284352081777e-07, "loss": 0.7804, "step": 10173 }, { "epoch": 1.44043321299639, "grad_norm": 7.443850843129238, "learning_rate": 9.579772219118899e-07, "loss": 0.7528, "step": 10174 }, { "epoch": 1.4405747858710272, "grad_norm": 9.783299240581457, "learning_rate": 9.575260896785907e-07, "loss": 0.7932, "step": 10175 }, { "epoch": 1.4407163587456644, "grad_norm": 8.947285876764354, "learning_rate": 9.570750385319939e-07, "loss": 0.749, "step": 10176 }, { "epoch": 1.4408579316203016, "grad_norm": 7.744993253819462, "learning_rate": 9.566240684958128e-07, "loss": 0.8143, "step": 10177 }, { "epoch": 1.4409995044949389, "grad_norm": 7.990049133913603, "learning_rate": 9.561731795937526e-07, "loss": 0.7686, "step": 10178 }, { "epoch": 1.441141077369576, "grad_norm": 9.913002384342125, "learning_rate": 9.557223718495173e-07, "loss": 0.7764, "step": 10179 }, { "epoch": 1.4412826502442133, "grad_norm": 8.699886575595261, "learning_rate": 9.552716452868064e-07, "loss": 0.689, "step": 10180 }, { "epoch": 1.4414242231188505, "grad_norm": 8.761989118237107, "learning_rate": 9.548209999293122e-07, "loss": 0.6265, "step": 10181 }, { "epoch": 1.4415657959934878, "grad_norm": 9.384650794367714, "learning_rate": 9.543704358007281e-07, "loss": 0.7698, "step": 10182 }, { "epoch": 1.441707368868125, "grad_norm": 8.484331836906781, "learning_rate": 9.539199529247356e-07, "loss": 0.7002, "step": 10183 }, { "epoch": 1.441848941742762, "grad_norm": 8.57509471963334, "learning_rate": 9.534695513250183e-07, "loss": 0.8006, "step": 10184 }, { "epoch": 1.4419905146173992, "grad_norm": 8.683818229076685, "learning_rate": 9.530192310252548e-07, "loss": 0.7367, "step": 10185 }, { "epoch": 1.4421320874920365, "grad_norm": 11.158194407214781, "learning_rate": 9.525689920491157e-07, "loss": 0.8474, "step": 10186 }, { "epoch": 1.4422736603666737, "grad_norm": 7.161129816091866, "learning_rate": 9.521188344202717e-07, "loss": 0.7068, "step": 10187 }, { "epoch": 1.442415233241311, "grad_norm": 9.830925425745152, "learning_rate": 9.516687581623857e-07, "loss": 0.7967, "step": 10188 }, { "epoch": 1.4425568061159482, "grad_norm": 9.19067660060744, "learning_rate": 9.512187632991193e-07, "loss": 0.7542, "step": 10189 }, { "epoch": 1.4426983789905854, "grad_norm": 8.993659435078676, "learning_rate": 9.50768849854127e-07, "loss": 0.7738, "step": 10190 }, { "epoch": 1.4428399518652226, "grad_norm": 10.183037465027818, "learning_rate": 9.503190178510618e-07, "loss": 0.7751, "step": 10191 }, { "epoch": 1.4429815247398599, "grad_norm": 8.463553435488613, "learning_rate": 9.498692673135698e-07, "loss": 0.6609, "step": 10192 }, { "epoch": 1.443123097614497, "grad_norm": 9.406944274626989, "learning_rate": 9.494195982652951e-07, "loss": 0.8621, "step": 10193 }, { "epoch": 1.4432646704891343, "grad_norm": 7.337467444378507, "learning_rate": 9.489700107298763e-07, "loss": 0.7023, "step": 10194 }, { "epoch": 1.4434062433637715, "grad_norm": 8.0884851247789, "learning_rate": 9.485205047309465e-07, "loss": 0.8038, "step": 10195 }, { "epoch": 1.4435478162384088, "grad_norm": 9.38282632310597, "learning_rate": 9.480710802921377e-07, "loss": 0.6988, "step": 10196 }, { "epoch": 1.443689389113046, "grad_norm": 9.220463790647868, "learning_rate": 9.476217374370741e-07, "loss": 0.6568, "step": 10197 }, { "epoch": 1.4438309619876832, "grad_norm": 9.307371634787497, "learning_rate": 9.471724761893794e-07, "loss": 0.7653, "step": 10198 }, { "epoch": 1.4439725348623202, "grad_norm": 9.894753701728563, "learning_rate": 9.467232965726689e-07, "loss": 0.7096, "step": 10199 }, { "epoch": 1.4441141077369575, "grad_norm": 9.010331052399136, "learning_rate": 9.462741986105573e-07, "loss": 0.7177, "step": 10200 }, { "epoch": 1.4442556806115947, "grad_norm": 9.266586691239121, "learning_rate": 9.458251823266518e-07, "loss": 0.7346, "step": 10201 }, { "epoch": 1.444397253486232, "grad_norm": 8.243355126861792, "learning_rate": 9.453762477445574e-07, "loss": 0.6832, "step": 10202 }, { "epoch": 1.4445388263608692, "grad_norm": 7.961304541035796, "learning_rate": 9.449273948878762e-07, "loss": 0.7086, "step": 10203 }, { "epoch": 1.4446803992355064, "grad_norm": 8.588092509211656, "learning_rate": 9.444786237802009e-07, "loss": 0.6591, "step": 10204 }, { "epoch": 1.4448219721101436, "grad_norm": 8.952550633299813, "learning_rate": 9.440299344451251e-07, "loss": 0.7918, "step": 10205 }, { "epoch": 1.4449635449847809, "grad_norm": 8.517735994796434, "learning_rate": 9.435813269062349e-07, "loss": 0.6962, "step": 10206 }, { "epoch": 1.445105117859418, "grad_norm": 7.879921964462315, "learning_rate": 9.431328011871135e-07, "loss": 0.6345, "step": 10207 }, { "epoch": 1.4452466907340553, "grad_norm": 8.368577458808756, "learning_rate": 9.426843573113409e-07, "loss": 0.6911, "step": 10208 }, { "epoch": 1.4453882636086925, "grad_norm": 8.913091447848768, "learning_rate": 9.422359953024895e-07, "loss": 0.7136, "step": 10209 }, { "epoch": 1.4455298364833298, "grad_norm": 8.405429956080892, "learning_rate": 9.417877151841315e-07, "loss": 0.8061, "step": 10210 }, { "epoch": 1.445671409357967, "grad_norm": 10.519870303554757, "learning_rate": 9.413395169798303e-07, "loss": 0.8081, "step": 10211 }, { "epoch": 1.4458129822326042, "grad_norm": 9.409910180598622, "learning_rate": 9.408914007131495e-07, "loss": 0.75, "step": 10212 }, { "epoch": 1.4459545551072415, "grad_norm": 9.69943152266116, "learning_rate": 9.404433664076442e-07, "loss": 0.7665, "step": 10213 }, { "epoch": 1.4460961279818787, "grad_norm": 8.678933729787039, "learning_rate": 9.399954140868695e-07, "loss": 0.7635, "step": 10214 }, { "epoch": 1.446237700856516, "grad_norm": 8.59178644115927, "learning_rate": 9.395475437743723e-07, "loss": 0.7, "step": 10215 }, { "epoch": 1.4463792737311532, "grad_norm": 8.653422530801443, "learning_rate": 9.390997554936964e-07, "loss": 0.7166, "step": 10216 }, { "epoch": 1.4465208466057904, "grad_norm": 9.660501772404418, "learning_rate": 9.386520492683835e-07, "loss": 0.8205, "step": 10217 }, { "epoch": 1.4466624194804276, "grad_norm": 8.622100342657099, "learning_rate": 9.382044251219672e-07, "loss": 0.7645, "step": 10218 }, { "epoch": 1.4468039923550648, "grad_norm": 8.888612864765118, "learning_rate": 9.377568830779807e-07, "loss": 0.7881, "step": 10219 }, { "epoch": 1.446945565229702, "grad_norm": 8.757105530815078, "learning_rate": 9.373094231599491e-07, "loss": 0.8354, "step": 10220 }, { "epoch": 1.4470871381043393, "grad_norm": 8.876436788820047, "learning_rate": 9.368620453913968e-07, "loss": 0.6479, "step": 10221 }, { "epoch": 1.4472287109789765, "grad_norm": 8.593362917418812, "learning_rate": 9.364147497958404e-07, "loss": 0.7054, "step": 10222 }, { "epoch": 1.4473702838536138, "grad_norm": 10.321506335898466, "learning_rate": 9.359675363967958e-07, "loss": 0.7435, "step": 10223 }, { "epoch": 1.447511856728251, "grad_norm": 9.125206095837173, "learning_rate": 9.355204052177705e-07, "loss": 0.7533, "step": 10224 }, { "epoch": 1.447653429602888, "grad_norm": 7.85099087432537, "learning_rate": 9.350733562822717e-07, "loss": 0.8348, "step": 10225 }, { "epoch": 1.4477950024775252, "grad_norm": 10.910044012916241, "learning_rate": 9.346263896138e-07, "loss": 0.7648, "step": 10226 }, { "epoch": 1.4479365753521625, "grad_norm": 10.069656765040339, "learning_rate": 9.341795052358507e-07, "loss": 0.7789, "step": 10227 }, { "epoch": 1.4480781482267997, "grad_norm": 8.329620856807594, "learning_rate": 9.337327031719185e-07, "loss": 0.7387, "step": 10228 }, { "epoch": 1.448219721101437, "grad_norm": 9.964710912114397, "learning_rate": 9.332859834454891e-07, "loss": 0.727, "step": 10229 }, { "epoch": 1.4483612939760742, "grad_norm": 9.869780501038099, "learning_rate": 9.328393460800475e-07, "loss": 0.6897, "step": 10230 }, { "epoch": 1.4485028668507114, "grad_norm": 8.947245373189807, "learning_rate": 9.323927910990735e-07, "loss": 0.6766, "step": 10231 }, { "epoch": 1.4486444397253486, "grad_norm": 8.723098818821766, "learning_rate": 9.31946318526041e-07, "loss": 0.8314, "step": 10232 }, { "epoch": 1.4487860125999859, "grad_norm": 7.0263380737973975, "learning_rate": 9.314999283844223e-07, "loss": 0.7489, "step": 10233 }, { "epoch": 1.448927585474623, "grad_norm": 8.123451437464, "learning_rate": 9.310536206976819e-07, "loss": 0.7143, "step": 10234 }, { "epoch": 1.4490691583492603, "grad_norm": 10.731806418602027, "learning_rate": 9.306073954892844e-07, "loss": 0.7515, "step": 10235 }, { "epoch": 1.4492107312238975, "grad_norm": 9.386767110648414, "learning_rate": 9.301612527826844e-07, "loss": 0.7888, "step": 10236 }, { "epoch": 1.4493523040985348, "grad_norm": 7.911554178172589, "learning_rate": 9.297151926013368e-07, "loss": 0.8504, "step": 10237 }, { "epoch": 1.449493876973172, "grad_norm": 9.542874411117815, "learning_rate": 9.292692149686913e-07, "loss": 0.7223, "step": 10238 }, { "epoch": 1.4496354498478092, "grad_norm": 8.566430741956552, "learning_rate": 9.288233199081914e-07, "loss": 0.7882, "step": 10239 }, { "epoch": 1.4497770227224465, "grad_norm": 9.173138039232617, "learning_rate": 9.283775074432788e-07, "loss": 0.767, "step": 10240 }, { "epoch": 1.4499185955970835, "grad_norm": 8.54902691574045, "learning_rate": 9.279317775973879e-07, "loss": 0.7591, "step": 10241 }, { "epoch": 1.4500601684717207, "grad_norm": 10.773792994044188, "learning_rate": 9.274861303939523e-07, "loss": 0.6278, "step": 10242 }, { "epoch": 1.450201741346358, "grad_norm": 19.97387591315568, "learning_rate": 9.270405658563972e-07, "loss": 0.7659, "step": 10243 }, { "epoch": 1.4503433142209952, "grad_norm": 9.189765910258627, "learning_rate": 9.265950840081475e-07, "loss": 0.8106, "step": 10244 }, { "epoch": 1.4504848870956324, "grad_norm": 9.951168233254977, "learning_rate": 9.261496848726204e-07, "loss": 0.6959, "step": 10245 }, { "epoch": 1.4506264599702696, "grad_norm": 8.831288322910922, "learning_rate": 9.257043684732316e-07, "loss": 0.6895, "step": 10246 }, { "epoch": 1.4507680328449069, "grad_norm": 8.500044878672885, "learning_rate": 9.252591348333906e-07, "loss": 0.7105, "step": 10247 }, { "epoch": 1.450909605719544, "grad_norm": 9.361310398937421, "learning_rate": 9.248139839765013e-07, "loss": 0.8183, "step": 10248 }, { "epoch": 1.4510511785941813, "grad_norm": 9.46566147997159, "learning_rate": 9.243689159259677e-07, "loss": 0.7212, "step": 10249 }, { "epoch": 1.4511927514688185, "grad_norm": 7.94689937562313, "learning_rate": 9.239239307051842e-07, "loss": 0.701, "step": 10250 }, { "epoch": 1.4513343243434558, "grad_norm": 8.880395243328664, "learning_rate": 9.234790283375456e-07, "loss": 0.7859, "step": 10251 }, { "epoch": 1.451475897218093, "grad_norm": 10.57999681959483, "learning_rate": 9.230342088464381e-07, "loss": 0.7072, "step": 10252 }, { "epoch": 1.4516174700927302, "grad_norm": 10.749649396989797, "learning_rate": 9.225894722552462e-07, "loss": 0.8205, "step": 10253 }, { "epoch": 1.4517590429673675, "grad_norm": 8.887379783107038, "learning_rate": 9.221448185873505e-07, "loss": 0.6743, "step": 10254 }, { "epoch": 1.4519006158420047, "grad_norm": 9.908654916826107, "learning_rate": 9.217002478661244e-07, "loss": 0.7728, "step": 10255 }, { "epoch": 1.452042188716642, "grad_norm": 8.598537817565127, "learning_rate": 9.212557601149411e-07, "loss": 0.6862, "step": 10256 }, { "epoch": 1.4521837615912792, "grad_norm": 9.263879621260578, "learning_rate": 9.208113553571638e-07, "loss": 0.786, "step": 10257 }, { "epoch": 1.4523253344659164, "grad_norm": 9.414566571048459, "learning_rate": 9.203670336161558e-07, "loss": 0.7336, "step": 10258 }, { "epoch": 1.4524669073405536, "grad_norm": 9.558471629021687, "learning_rate": 9.199227949152758e-07, "loss": 0.6713, "step": 10259 }, { "epoch": 1.4526084802151908, "grad_norm": 9.605882161828427, "learning_rate": 9.194786392778757e-07, "loss": 0.8019, "step": 10260 }, { "epoch": 1.452750053089828, "grad_norm": 8.601554517196568, "learning_rate": 9.190345667273059e-07, "loss": 0.6735, "step": 10261 }, { "epoch": 1.4528916259644653, "grad_norm": 11.462467146132715, "learning_rate": 9.185905772869091e-07, "loss": 0.7995, "step": 10262 }, { "epoch": 1.4530331988391025, "grad_norm": 7.713484601400649, "learning_rate": 9.181466709800274e-07, "loss": 0.7487, "step": 10263 }, { "epoch": 1.4531747717137398, "grad_norm": 7.929603794201058, "learning_rate": 9.177028478299948e-07, "loss": 0.7315, "step": 10264 }, { "epoch": 1.453316344588377, "grad_norm": 11.713436295287135, "learning_rate": 9.172591078601448e-07, "loss": 0.8537, "step": 10265 }, { "epoch": 1.453457917463014, "grad_norm": 9.695312893457888, "learning_rate": 9.168154510938024e-07, "loss": 0.6863, "step": 10266 }, { "epoch": 1.4535994903376512, "grad_norm": 7.8249307842666855, "learning_rate": 9.163718775542921e-07, "loss": 0.638, "step": 10267 }, { "epoch": 1.4537410632122885, "grad_norm": 7.705147437109033, "learning_rate": 9.159283872649313e-07, "loss": 0.7474, "step": 10268 }, { "epoch": 1.4538826360869257, "grad_norm": 11.824991070540365, "learning_rate": 9.154849802490332e-07, "loss": 0.9526, "step": 10269 }, { "epoch": 1.454024208961563, "grad_norm": 9.792225879906237, "learning_rate": 9.150416565299092e-07, "loss": 0.7611, "step": 10270 }, { "epoch": 1.4541657818362002, "grad_norm": 8.368886965598756, "learning_rate": 9.145984161308627e-07, "loss": 0.8283, "step": 10271 }, { "epoch": 1.4543073547108374, "grad_norm": 11.154952970274817, "learning_rate": 9.14155259075196e-07, "loss": 0.6626, "step": 10272 }, { "epoch": 1.4544489275854746, "grad_norm": 10.613164076230253, "learning_rate": 9.137121853862041e-07, "loss": 0.79, "step": 10273 }, { "epoch": 1.4545905004601118, "grad_norm": 9.426155193576989, "learning_rate": 9.132691950871808e-07, "loss": 0.7553, "step": 10274 }, { "epoch": 1.454732073334749, "grad_norm": 8.462995875831202, "learning_rate": 9.128262882014117e-07, "loss": 0.724, "step": 10275 }, { "epoch": 1.4548736462093863, "grad_norm": 9.681468636572527, "learning_rate": 9.123834647521812e-07, "loss": 0.7363, "step": 10276 }, { "epoch": 1.4550152190840235, "grad_norm": 11.305572580568917, "learning_rate": 9.119407247627701e-07, "loss": 0.7976, "step": 10277 }, { "epoch": 1.4551567919586608, "grad_norm": 9.97328852835534, "learning_rate": 9.114980682564492e-07, "loss": 0.7782, "step": 10278 }, { "epoch": 1.455298364833298, "grad_norm": 8.69198602349974, "learning_rate": 9.110554952564912e-07, "loss": 0.7359, "step": 10279 }, { "epoch": 1.4554399377079352, "grad_norm": 8.296095644451945, "learning_rate": 9.106130057861604e-07, "loss": 0.7587, "step": 10280 }, { "epoch": 1.4555815105825725, "grad_norm": 10.100888777803787, "learning_rate": 9.101705998687185e-07, "loss": 0.7765, "step": 10281 }, { "epoch": 1.4557230834572095, "grad_norm": 9.915440673269, "learning_rate": 9.097282775274238e-07, "loss": 0.7646, "step": 10282 }, { "epoch": 1.4558646563318467, "grad_norm": 9.241762359463106, "learning_rate": 9.092860387855271e-07, "loss": 0.7781, "step": 10283 }, { "epoch": 1.456006229206484, "grad_norm": 9.514792720683836, "learning_rate": 9.088438836662777e-07, "loss": 0.7057, "step": 10284 }, { "epoch": 1.4561478020811212, "grad_norm": 9.846837700882428, "learning_rate": 9.084018121929184e-07, "loss": 0.7564, "step": 10285 }, { "epoch": 1.4562893749557584, "grad_norm": 7.887609649718229, "learning_rate": 9.079598243886897e-07, "loss": 0.729, "step": 10286 }, { "epoch": 1.4564309478303956, "grad_norm": 8.713535294394587, "learning_rate": 9.075179202768253e-07, "loss": 0.6939, "step": 10287 }, { "epoch": 1.4565725207050328, "grad_norm": 9.076451999321987, "learning_rate": 9.070760998805569e-07, "loss": 0.7604, "step": 10288 }, { "epoch": 1.45671409357967, "grad_norm": 9.226913096641193, "learning_rate": 9.066343632231106e-07, "loss": 0.5999, "step": 10289 }, { "epoch": 1.4568556664543073, "grad_norm": 9.816461043047436, "learning_rate": 9.061927103277068e-07, "loss": 0.7374, "step": 10290 }, { "epoch": 1.4569972393289445, "grad_norm": 9.800106312700612, "learning_rate": 9.057511412175646e-07, "loss": 0.7584, "step": 10291 }, { "epoch": 1.4571388122035818, "grad_norm": 9.150352062685553, "learning_rate": 9.053096559158956e-07, "loss": 0.7265, "step": 10292 }, { "epoch": 1.457280385078219, "grad_norm": 10.083571838416574, "learning_rate": 9.048682544459094e-07, "loss": 0.7995, "step": 10293 }, { "epoch": 1.4574219579528562, "grad_norm": 9.52743624071034, "learning_rate": 9.044269368308089e-07, "loss": 0.7647, "step": 10294 }, { "epoch": 1.4575635308274935, "grad_norm": 8.444220684946256, "learning_rate": 9.039857030937957e-07, "loss": 0.6712, "step": 10295 }, { "epoch": 1.4577051037021307, "grad_norm": 9.140703458530798, "learning_rate": 9.03544553258063e-07, "loss": 0.7171, "step": 10296 }, { "epoch": 1.457846676576768, "grad_norm": 8.64276813880103, "learning_rate": 9.031034873468039e-07, "loss": 0.729, "step": 10297 }, { "epoch": 1.4579882494514051, "grad_norm": 7.619534143740479, "learning_rate": 9.026625053832028e-07, "loss": 0.7153, "step": 10298 }, { "epoch": 1.4581298223260424, "grad_norm": 6.675614392297419, "learning_rate": 9.022216073904433e-07, "loss": 0.7233, "step": 10299 }, { "epoch": 1.4582713952006796, "grad_norm": 8.712957392635344, "learning_rate": 9.017807933917027e-07, "loss": 0.6917, "step": 10300 }, { "epoch": 1.4584129680753168, "grad_norm": 8.536360666594154, "learning_rate": 9.013400634101535e-07, "loss": 0.7487, "step": 10301 }, { "epoch": 1.458554540949954, "grad_norm": 9.378263185341684, "learning_rate": 9.008994174689659e-07, "loss": 0.6859, "step": 10302 }, { "epoch": 1.4586961138245913, "grad_norm": 11.970494554034268, "learning_rate": 9.004588555913027e-07, "loss": 0.7616, "step": 10303 }, { "epoch": 1.4588376866992285, "grad_norm": 10.13913785260083, "learning_rate": 9.000183778003246e-07, "loss": 0.6735, "step": 10304 }, { "epoch": 1.4589792595738658, "grad_norm": 9.302737040402201, "learning_rate": 8.995779841191884e-07, "loss": 0.6735, "step": 10305 }, { "epoch": 1.459120832448503, "grad_norm": 9.930339800810962, "learning_rate": 8.991376745710436e-07, "loss": 0.7096, "step": 10306 }, { "epoch": 1.4592624053231402, "grad_norm": 7.758451959814157, "learning_rate": 8.986974491790381e-07, "loss": 0.6813, "step": 10307 }, { "epoch": 1.4594039781977772, "grad_norm": 10.186413291589455, "learning_rate": 8.982573079663132e-07, "loss": 0.8104, "step": 10308 }, { "epoch": 1.4595455510724145, "grad_norm": 10.032944390583019, "learning_rate": 8.978172509560087e-07, "loss": 0.8188, "step": 10309 }, { "epoch": 1.4596871239470517, "grad_norm": 8.37455862220208, "learning_rate": 8.973772781712553e-07, "loss": 0.6721, "step": 10310 }, { "epoch": 1.459828696821689, "grad_norm": 9.428188960257028, "learning_rate": 8.969373896351833e-07, "loss": 0.7679, "step": 10311 }, { "epoch": 1.4599702696963262, "grad_norm": 9.293757273319068, "learning_rate": 8.964975853709179e-07, "loss": 0.7019, "step": 10312 }, { "epoch": 1.4601118425709634, "grad_norm": 9.238028950703876, "learning_rate": 8.960578654015783e-07, "loss": 0.7089, "step": 10313 }, { "epoch": 1.4602534154456006, "grad_norm": 8.166905717529954, "learning_rate": 8.956182297502817e-07, "loss": 0.7952, "step": 10314 }, { "epoch": 1.4603949883202378, "grad_norm": 11.283652939958477, "learning_rate": 8.951786784401376e-07, "loss": 0.7656, "step": 10315 }, { "epoch": 1.460536561194875, "grad_norm": 8.634139815947396, "learning_rate": 8.947392114942547e-07, "loss": 0.6713, "step": 10316 }, { "epoch": 1.4606781340695123, "grad_norm": 9.761852787069971, "learning_rate": 8.942998289357333e-07, "loss": 0.6949, "step": 10317 }, { "epoch": 1.4608197069441495, "grad_norm": 9.808647419954719, "learning_rate": 8.938605307876738e-07, "loss": 0.7496, "step": 10318 }, { "epoch": 1.4609612798187868, "grad_norm": 8.695710398437617, "learning_rate": 8.934213170731676e-07, "loss": 0.732, "step": 10319 }, { "epoch": 1.461102852693424, "grad_norm": 8.718199134843454, "learning_rate": 8.929821878153058e-07, "loss": 0.7114, "step": 10320 }, { "epoch": 1.4612444255680612, "grad_norm": 8.766895657680445, "learning_rate": 8.92543143037172e-07, "loss": 0.7843, "step": 10321 }, { "epoch": 1.4613859984426985, "grad_norm": 9.611791886736093, "learning_rate": 8.921041827618459e-07, "loss": 0.7974, "step": 10322 }, { "epoch": 1.4615275713173355, "grad_norm": 9.748431348546504, "learning_rate": 8.916653070124048e-07, "loss": 0.7075, "step": 10323 }, { "epoch": 1.4616691441919727, "grad_norm": 9.505815833897096, "learning_rate": 8.912265158119185e-07, "loss": 0.7297, "step": 10324 }, { "epoch": 1.46181071706661, "grad_norm": 8.699947085164826, "learning_rate": 8.907878091834554e-07, "loss": 0.6894, "step": 10325 }, { "epoch": 1.4619522899412472, "grad_norm": 9.033147170434807, "learning_rate": 8.903491871500767e-07, "loss": 0.7477, "step": 10326 }, { "epoch": 1.4620938628158844, "grad_norm": 8.354873386339126, "learning_rate": 8.899106497348409e-07, "loss": 0.7158, "step": 10327 }, { "epoch": 1.4622354356905216, "grad_norm": 8.952208467076058, "learning_rate": 8.894721969608025e-07, "loss": 0.7018, "step": 10328 }, { "epoch": 1.4623770085651588, "grad_norm": 9.446500616702842, "learning_rate": 8.890338288510089e-07, "loss": 0.7235, "step": 10329 }, { "epoch": 1.462518581439796, "grad_norm": 9.227845751150767, "learning_rate": 8.885955454285078e-07, "loss": 0.7921, "step": 10330 }, { "epoch": 1.4626601543144333, "grad_norm": 11.031759363793453, "learning_rate": 8.881573467163354e-07, "loss": 0.7595, "step": 10331 }, { "epoch": 1.4628017271890705, "grad_norm": 9.92306602718549, "learning_rate": 8.877192327375303e-07, "loss": 0.7554, "step": 10332 }, { "epoch": 1.4629433000637078, "grad_norm": 8.771536345106005, "learning_rate": 8.872812035151221e-07, "loss": 0.647, "step": 10333 }, { "epoch": 1.463084872938345, "grad_norm": 10.169536831641704, "learning_rate": 8.868432590721384e-07, "loss": 0.8402, "step": 10334 }, { "epoch": 1.4632264458129822, "grad_norm": 9.077753528867117, "learning_rate": 8.86405399431603e-07, "loss": 0.681, "step": 10335 }, { "epoch": 1.4633680186876195, "grad_norm": 9.156306243827895, "learning_rate": 8.859676246165314e-07, "loss": 0.6319, "step": 10336 }, { "epoch": 1.4635095915622567, "grad_norm": 10.687775190056328, "learning_rate": 8.855299346499394e-07, "loss": 0.771, "step": 10337 }, { "epoch": 1.463651164436894, "grad_norm": 9.563153013982765, "learning_rate": 8.850923295548339e-07, "loss": 0.784, "step": 10338 }, { "epoch": 1.4637927373115311, "grad_norm": 8.938658312451473, "learning_rate": 8.846548093542215e-07, "loss": 0.726, "step": 10339 }, { "epoch": 1.4639343101861684, "grad_norm": 8.64811594295503, "learning_rate": 8.842173740711002e-07, "loss": 0.7486, "step": 10340 }, { "epoch": 1.4640758830608056, "grad_norm": 7.972845962113887, "learning_rate": 8.83780023728468e-07, "loss": 0.7358, "step": 10341 }, { "epoch": 1.4642174559354428, "grad_norm": 9.159786023301843, "learning_rate": 8.833427583493146e-07, "loss": 0.7482, "step": 10342 }, { "epoch": 1.46435902881008, "grad_norm": 10.322195223565501, "learning_rate": 8.829055779566262e-07, "loss": 0.6844, "step": 10343 }, { "epoch": 1.4645006016847173, "grad_norm": 8.775557332394373, "learning_rate": 8.824684825733865e-07, "loss": 0.7778, "step": 10344 }, { "epoch": 1.4646421745593545, "grad_norm": 8.622051232511593, "learning_rate": 8.82031472222572e-07, "loss": 0.7102, "step": 10345 }, { "epoch": 1.4647837474339918, "grad_norm": 8.883706134276155, "learning_rate": 8.815945469271573e-07, "loss": 0.6655, "step": 10346 }, { "epoch": 1.464925320308629, "grad_norm": 7.652388864163212, "learning_rate": 8.811577067101096e-07, "loss": 0.7124, "step": 10347 }, { "epoch": 1.4650668931832662, "grad_norm": 10.132763935287436, "learning_rate": 8.807209515943952e-07, "loss": 0.6768, "step": 10348 }, { "epoch": 1.4652084660579032, "grad_norm": 10.211952865306527, "learning_rate": 8.802842816029722e-07, "loss": 0.7775, "step": 10349 }, { "epoch": 1.4653500389325405, "grad_norm": 9.7281664402928, "learning_rate": 8.798476967587965e-07, "loss": 0.7578, "step": 10350 }, { "epoch": 1.4654916118071777, "grad_norm": 10.795042553208669, "learning_rate": 8.794111970848205e-07, "loss": 0.7224, "step": 10351 }, { "epoch": 1.465633184681815, "grad_norm": 8.563670280284827, "learning_rate": 8.789747826039893e-07, "loss": 0.71, "step": 10352 }, { "epoch": 1.4657747575564521, "grad_norm": 9.122337057258434, "learning_rate": 8.785384533392452e-07, "loss": 0.7543, "step": 10353 }, { "epoch": 1.4659163304310894, "grad_norm": 9.095520476411254, "learning_rate": 8.78102209313525e-07, "loss": 0.8308, "step": 10354 }, { "epoch": 1.4660579033057266, "grad_norm": 9.671223804940816, "learning_rate": 8.776660505497619e-07, "loss": 0.7324, "step": 10355 }, { "epoch": 1.4661994761803638, "grad_norm": 9.131126124111615, "learning_rate": 8.772299770708859e-07, "loss": 0.8071, "step": 10356 }, { "epoch": 1.466341049055001, "grad_norm": 9.666322460842705, "learning_rate": 8.767939888998192e-07, "loss": 0.7631, "step": 10357 }, { "epoch": 1.4664826219296383, "grad_norm": 9.14682240159864, "learning_rate": 8.763580860594828e-07, "loss": 0.7194, "step": 10358 }, { "epoch": 1.4666241948042755, "grad_norm": 9.606215340546209, "learning_rate": 8.759222685727905e-07, "loss": 0.8491, "step": 10359 }, { "epoch": 1.4667657676789128, "grad_norm": 8.12748985287547, "learning_rate": 8.754865364626544e-07, "loss": 0.6886, "step": 10360 }, { "epoch": 1.46690734055355, "grad_norm": 7.954142747073396, "learning_rate": 8.750508897519791e-07, "loss": 0.76, "step": 10361 }, { "epoch": 1.4670489134281872, "grad_norm": 9.646346955325603, "learning_rate": 8.746153284636677e-07, "loss": 0.7508, "step": 10362 }, { "epoch": 1.4671904863028244, "grad_norm": 8.501598432254978, "learning_rate": 8.741798526206164e-07, "loss": 0.7594, "step": 10363 }, { "epoch": 1.4673320591774617, "grad_norm": 8.24394281145941, "learning_rate": 8.737444622457172e-07, "loss": 0.6985, "step": 10364 }, { "epoch": 1.4674736320520987, "grad_norm": 10.113510205862356, "learning_rate": 8.733091573618599e-07, "loss": 0.7993, "step": 10365 }, { "epoch": 1.467615204926736, "grad_norm": 9.268949738792088, "learning_rate": 8.728739379919266e-07, "loss": 0.7981, "step": 10366 }, { "epoch": 1.4677567778013731, "grad_norm": 9.845338723481184, "learning_rate": 8.724388041587976e-07, "loss": 0.7457, "step": 10367 }, { "epoch": 1.4678983506760104, "grad_norm": 9.388107698685044, "learning_rate": 8.720037558853464e-07, "loss": 0.6981, "step": 10368 }, { "epoch": 1.4680399235506476, "grad_norm": 7.85465652462748, "learning_rate": 8.71568793194445e-07, "loss": 0.8011, "step": 10369 }, { "epoch": 1.4681814964252848, "grad_norm": 10.010076786322191, "learning_rate": 8.711339161089568e-07, "loss": 0.7849, "step": 10370 }, { "epoch": 1.468323069299922, "grad_norm": 9.030558523350166, "learning_rate": 8.706991246517449e-07, "loss": 0.7106, "step": 10371 }, { "epoch": 1.4684646421745593, "grad_norm": 9.113845487247943, "learning_rate": 8.702644188456646e-07, "loss": 0.6303, "step": 10372 }, { "epoch": 1.4686062150491965, "grad_norm": 8.807801637749423, "learning_rate": 8.698297987135693e-07, "loss": 0.8579, "step": 10373 }, { "epoch": 1.4687477879238338, "grad_norm": 8.769214133325361, "learning_rate": 8.69395264278306e-07, "loss": 0.8286, "step": 10374 }, { "epoch": 1.468889360798471, "grad_norm": 7.522602355808346, "learning_rate": 8.689608155627169e-07, "loss": 0.7383, "step": 10375 }, { "epoch": 1.4690309336731082, "grad_norm": 8.439469623262628, "learning_rate": 8.685264525896426e-07, "loss": 0.7888, "step": 10376 }, { "epoch": 1.4691725065477454, "grad_norm": 10.190491406811779, "learning_rate": 8.680921753819152e-07, "loss": 0.7261, "step": 10377 }, { "epoch": 1.4693140794223827, "grad_norm": 9.53831345275197, "learning_rate": 8.676579839623653e-07, "loss": 0.7585, "step": 10378 }, { "epoch": 1.46945565229702, "grad_norm": 7.983334109545877, "learning_rate": 8.672238783538189e-07, "loss": 0.7517, "step": 10379 }, { "epoch": 1.4695972251716571, "grad_norm": 8.918736535471835, "learning_rate": 8.667898585790951e-07, "loss": 0.7513, "step": 10380 }, { "epoch": 1.4697387980462944, "grad_norm": 10.281470392062197, "learning_rate": 8.663559246610115e-07, "loss": 0.7369, "step": 10381 }, { "epoch": 1.4698803709209316, "grad_norm": 8.94971319836607, "learning_rate": 8.659220766223778e-07, "loss": 0.753, "step": 10382 }, { "epoch": 1.4700219437955688, "grad_norm": 8.336871451458546, "learning_rate": 8.654883144860038e-07, "loss": 0.7782, "step": 10383 }, { "epoch": 1.470163516670206, "grad_norm": 9.579578980052233, "learning_rate": 8.650546382746888e-07, "loss": 0.697, "step": 10384 }, { "epoch": 1.4703050895448433, "grad_norm": 8.652731280119418, "learning_rate": 8.646210480112325e-07, "loss": 0.6945, "step": 10385 }, { "epoch": 1.4704466624194805, "grad_norm": 9.437015748389507, "learning_rate": 8.641875437184288e-07, "loss": 0.7231, "step": 10386 }, { "epoch": 1.4705882352941178, "grad_norm": 7.4591127411054385, "learning_rate": 8.637541254190657e-07, "loss": 0.6368, "step": 10387 }, { "epoch": 1.470729808168755, "grad_norm": 7.358121015942447, "learning_rate": 8.633207931359292e-07, "loss": 0.7398, "step": 10388 }, { "epoch": 1.4708713810433922, "grad_norm": 8.500442942129682, "learning_rate": 8.628875468917969e-07, "loss": 0.7495, "step": 10389 }, { "epoch": 1.4710129539180294, "grad_norm": 8.48195078237943, "learning_rate": 8.624543867094468e-07, "loss": 0.5658, "step": 10390 }, { "epoch": 1.4711545267926665, "grad_norm": 8.503356719583643, "learning_rate": 8.620213126116476e-07, "loss": 0.6547, "step": 10391 }, { "epoch": 1.4712960996673037, "grad_norm": 10.24804668888174, "learning_rate": 8.615883246211676e-07, "loss": 0.755, "step": 10392 }, { "epoch": 1.471437672541941, "grad_norm": 9.237782425750664, "learning_rate": 8.611554227607665e-07, "loss": 0.7586, "step": 10393 }, { "epoch": 1.4715792454165781, "grad_norm": 10.506319959635642, "learning_rate": 8.607226070532041e-07, "loss": 0.7421, "step": 10394 }, { "epoch": 1.4717208182912154, "grad_norm": 9.936898075323858, "learning_rate": 8.602898775212317e-07, "loss": 0.7535, "step": 10395 }, { "epoch": 1.4718623911658526, "grad_norm": 9.751701793338993, "learning_rate": 8.598572341875975e-07, "loss": 0.7112, "step": 10396 }, { "epoch": 1.4720039640404898, "grad_norm": 8.171020229704384, "learning_rate": 8.594246770750459e-07, "loss": 0.8036, "step": 10397 }, { "epoch": 1.472145536915127, "grad_norm": 9.397452362256427, "learning_rate": 8.589922062063149e-07, "loss": 0.7054, "step": 10398 }, { "epoch": 1.4722871097897643, "grad_norm": 7.7778398632417485, "learning_rate": 8.58559821604141e-07, "loss": 0.7621, "step": 10399 }, { "epoch": 1.4724286826644015, "grad_norm": 10.153688078318975, "learning_rate": 8.581275232912526e-07, "loss": 0.7326, "step": 10400 }, { "epoch": 1.4725702555390388, "grad_norm": 10.011056128701492, "learning_rate": 8.57695311290376e-07, "loss": 0.676, "step": 10401 }, { "epoch": 1.472711828413676, "grad_norm": 9.845986539079483, "learning_rate": 8.572631856242333e-07, "loss": 0.7405, "step": 10402 }, { "epoch": 1.4728534012883132, "grad_norm": 8.777566266093585, "learning_rate": 8.56831146315539e-07, "loss": 0.8542, "step": 10403 }, { "epoch": 1.4729949741629504, "grad_norm": 9.087127266524213, "learning_rate": 8.563991933870072e-07, "loss": 0.6874, "step": 10404 }, { "epoch": 1.4731365470375877, "grad_norm": 8.616694777816084, "learning_rate": 8.559673268613442e-07, "loss": 0.7868, "step": 10405 }, { "epoch": 1.4732781199122247, "grad_norm": 11.061468906087995, "learning_rate": 8.555355467612527e-07, "loss": 0.776, "step": 10406 }, { "epoch": 1.473419692786862, "grad_norm": 8.58573889741301, "learning_rate": 8.551038531094308e-07, "loss": 0.7134, "step": 10407 }, { "epoch": 1.4735612656614991, "grad_norm": 10.39760581114729, "learning_rate": 8.546722459285727e-07, "loss": 0.7733, "step": 10408 }, { "epoch": 1.4737028385361364, "grad_norm": 7.311911534815554, "learning_rate": 8.54240725241369e-07, "loss": 0.7223, "step": 10409 }, { "epoch": 1.4738444114107736, "grad_norm": 11.698440082601223, "learning_rate": 8.538092910705021e-07, "loss": 0.7734, "step": 10410 }, { "epoch": 1.4739859842854108, "grad_norm": 9.038965434061803, "learning_rate": 8.533779434386544e-07, "loss": 0.7513, "step": 10411 }, { "epoch": 1.474127557160048, "grad_norm": 8.95836545953239, "learning_rate": 8.529466823684993e-07, "loss": 0.8117, "step": 10412 }, { "epoch": 1.4742691300346853, "grad_norm": 9.655427471794605, "learning_rate": 8.525155078827099e-07, "loss": 0.7466, "step": 10413 }, { "epoch": 1.4744107029093225, "grad_norm": 11.006969671424386, "learning_rate": 8.520844200039511e-07, "loss": 0.754, "step": 10414 }, { "epoch": 1.4745522757839598, "grad_norm": 8.162427879120031, "learning_rate": 8.516534187548864e-07, "loss": 0.6975, "step": 10415 }, { "epoch": 1.474693848658597, "grad_norm": 9.894927958514868, "learning_rate": 8.512225041581726e-07, "loss": 0.7187, "step": 10416 }, { "epoch": 1.4748354215332342, "grad_norm": 8.344734733849862, "learning_rate": 8.507916762364613e-07, "loss": 0.8072, "step": 10417 }, { "epoch": 1.4749769944078714, "grad_norm": 8.146666293146819, "learning_rate": 8.503609350124029e-07, "loss": 0.6569, "step": 10418 }, { "epoch": 1.4751185672825087, "grad_norm": 8.641749386466614, "learning_rate": 8.49930280508639e-07, "loss": 0.7313, "step": 10419 }, { "epoch": 1.475260140157146, "grad_norm": 8.052416743144473, "learning_rate": 8.494997127478111e-07, "loss": 0.6802, "step": 10420 }, { "epoch": 1.4754017130317831, "grad_norm": 8.985919109837077, "learning_rate": 8.490692317525514e-07, "loss": 0.7044, "step": 10421 }, { "epoch": 1.4755432859064204, "grad_norm": 9.31556108587811, "learning_rate": 8.486388375454924e-07, "loss": 0.704, "step": 10422 }, { "epoch": 1.4756848587810576, "grad_norm": 10.028552301657994, "learning_rate": 8.482085301492574e-07, "loss": 0.7817, "step": 10423 }, { "epoch": 1.4758264316556948, "grad_norm": 8.913031957301342, "learning_rate": 8.477783095864683e-07, "loss": 0.7766, "step": 10424 }, { "epoch": 1.475968004530332, "grad_norm": 9.747126229128384, "learning_rate": 8.473481758797425e-07, "loss": 0.6518, "step": 10425 }, { "epoch": 1.4761095774049693, "grad_norm": 8.113443930422479, "learning_rate": 8.469181290516906e-07, "loss": 0.8257, "step": 10426 }, { "epoch": 1.4762511502796065, "grad_norm": 7.657632508309877, "learning_rate": 8.464881691249202e-07, "loss": 0.7623, "step": 10427 }, { "epoch": 1.4763927231542437, "grad_norm": 10.040706562241672, "learning_rate": 8.460582961220332e-07, "loss": 0.869, "step": 10428 }, { "epoch": 1.476534296028881, "grad_norm": 8.98721931878734, "learning_rate": 8.456285100656289e-07, "loss": 0.6619, "step": 10429 }, { "epoch": 1.4766758689035182, "grad_norm": 9.340856608087481, "learning_rate": 8.451988109782997e-07, "loss": 0.7959, "step": 10430 }, { "epoch": 1.4768174417781554, "grad_norm": 9.54470385947813, "learning_rate": 8.447691988826348e-07, "loss": 0.7533, "step": 10431 }, { "epoch": 1.4769590146527924, "grad_norm": 10.519574040249028, "learning_rate": 8.443396738012199e-07, "loss": 0.7505, "step": 10432 }, { "epoch": 1.4771005875274297, "grad_norm": 8.514341933155345, "learning_rate": 8.439102357566331e-07, "loss": 0.665, "step": 10433 }, { "epoch": 1.477242160402067, "grad_norm": 8.961463653363042, "learning_rate": 8.434808847714512e-07, "loss": 0.7523, "step": 10434 }, { "epoch": 1.4773837332767041, "grad_norm": 10.028970714937511, "learning_rate": 8.430516208682429e-07, "loss": 0.8315, "step": 10435 }, { "epoch": 1.4775253061513414, "grad_norm": 9.663213792815725, "learning_rate": 8.426224440695765e-07, "loss": 0.6872, "step": 10436 }, { "epoch": 1.4776668790259786, "grad_norm": 9.453833586562748, "learning_rate": 8.421933543980126e-07, "loss": 0.7333, "step": 10437 }, { "epoch": 1.4778084519006158, "grad_norm": 9.834104820203317, "learning_rate": 8.417643518761068e-07, "loss": 0.71, "step": 10438 }, { "epoch": 1.477950024775253, "grad_norm": 9.284251950994802, "learning_rate": 8.413354365264137e-07, "loss": 0.7573, "step": 10439 }, { "epoch": 1.4780915976498903, "grad_norm": 9.259087300151993, "learning_rate": 8.40906608371479e-07, "loss": 0.767, "step": 10440 }, { "epoch": 1.4782331705245275, "grad_norm": 9.84962389852439, "learning_rate": 8.404778674338476e-07, "loss": 0.7957, "step": 10441 }, { "epoch": 1.4783747433991647, "grad_norm": 8.771868163711934, "learning_rate": 8.400492137360564e-07, "loss": 0.7483, "step": 10442 }, { "epoch": 1.478516316273802, "grad_norm": 10.332748232447951, "learning_rate": 8.396206473006413e-07, "loss": 0.8899, "step": 10443 }, { "epoch": 1.4786578891484392, "grad_norm": 7.650457191518544, "learning_rate": 8.391921681501297e-07, "loss": 0.6025, "step": 10444 }, { "epoch": 1.4787994620230764, "grad_norm": 8.505231705816657, "learning_rate": 8.387637763070486e-07, "loss": 0.778, "step": 10445 }, { "epoch": 1.4789410348977137, "grad_norm": 8.577642931012758, "learning_rate": 8.383354717939163e-07, "loss": 0.707, "step": 10446 }, { "epoch": 1.479082607772351, "grad_norm": 10.400052628017194, "learning_rate": 8.379072546332498e-07, "loss": 0.8486, "step": 10447 }, { "epoch": 1.479224180646988, "grad_norm": 7.850616732139659, "learning_rate": 8.374791248475597e-07, "loss": 0.7861, "step": 10448 }, { "epoch": 1.4793657535216251, "grad_norm": 9.151708941519773, "learning_rate": 8.370510824593517e-07, "loss": 0.7398, "step": 10449 }, { "epoch": 1.4795073263962624, "grad_norm": 9.507365533608585, "learning_rate": 8.366231274911291e-07, "loss": 0.6588, "step": 10450 }, { "epoch": 1.4796488992708996, "grad_norm": 8.630390638790633, "learning_rate": 8.361952599653875e-07, "loss": 0.7187, "step": 10451 }, { "epoch": 1.4797904721455368, "grad_norm": 9.433662493279552, "learning_rate": 8.357674799046206e-07, "loss": 0.7919, "step": 10452 }, { "epoch": 1.479932045020174, "grad_norm": 8.28166917423797, "learning_rate": 8.353397873313171e-07, "loss": 0.6991, "step": 10453 }, { "epoch": 1.4800736178948113, "grad_norm": 10.18907181485351, "learning_rate": 8.34912182267959e-07, "loss": 0.7714, "step": 10454 }, { "epoch": 1.4802151907694485, "grad_norm": 8.210587193704827, "learning_rate": 8.34484664737027e-07, "loss": 0.6915, "step": 10455 }, { "epoch": 1.4803567636440857, "grad_norm": 8.387319585050838, "learning_rate": 8.340572347609932e-07, "loss": 0.7076, "step": 10456 }, { "epoch": 1.480498336518723, "grad_norm": 8.080825678873648, "learning_rate": 8.336298923623301e-07, "loss": 0.8135, "step": 10457 }, { "epoch": 1.4806399093933602, "grad_norm": 8.572085173752896, "learning_rate": 8.332026375634994e-07, "loss": 0.7095, "step": 10458 }, { "epoch": 1.4807814822679974, "grad_norm": 9.242602302742457, "learning_rate": 8.327754703869631e-07, "loss": 0.8021, "step": 10459 }, { "epoch": 1.4809230551426347, "grad_norm": 8.966690730898117, "learning_rate": 8.323483908551783e-07, "loss": 0.7092, "step": 10460 }, { "epoch": 1.481064628017272, "grad_norm": 8.736970628411129, "learning_rate": 8.319213989905942e-07, "loss": 0.6062, "step": 10461 }, { "epoch": 1.4812062008919091, "grad_norm": 8.733092621647915, "learning_rate": 8.314944948156589e-07, "loss": 0.6408, "step": 10462 }, { "epoch": 1.4813477737665464, "grad_norm": 7.689101881900392, "learning_rate": 8.310676783528135e-07, "loss": 0.6902, "step": 10463 }, { "epoch": 1.4814893466411836, "grad_norm": 8.146623682053331, "learning_rate": 8.306409496244966e-07, "loss": 0.7508, "step": 10464 }, { "epoch": 1.4816309195158208, "grad_norm": 7.163050456306978, "learning_rate": 8.302143086531392e-07, "loss": 0.7513, "step": 10465 }, { "epoch": 1.481772492390458, "grad_norm": 8.583827723613275, "learning_rate": 8.297877554611717e-07, "loss": 0.6737, "step": 10466 }, { "epoch": 1.4819140652650953, "grad_norm": 9.819631912573785, "learning_rate": 8.293612900710155e-07, "loss": 0.7282, "step": 10467 }, { "epoch": 1.4820556381397325, "grad_norm": 9.0623042644541, "learning_rate": 8.289349125050914e-07, "loss": 0.7858, "step": 10468 }, { "epoch": 1.4821972110143697, "grad_norm": 7.962941405129967, "learning_rate": 8.28508622785813e-07, "loss": 0.7116, "step": 10469 }, { "epoch": 1.482338783889007, "grad_norm": 9.490492630965896, "learning_rate": 8.280824209355892e-07, "loss": 0.7344, "step": 10470 }, { "epoch": 1.4824803567636442, "grad_norm": 10.704987791251321, "learning_rate": 8.276563069768267e-07, "loss": 0.7615, "step": 10471 }, { "epoch": 1.4826219296382814, "grad_norm": 9.787826715816319, "learning_rate": 8.272302809319243e-07, "loss": 0.726, "step": 10472 }, { "epoch": 1.4827635025129187, "grad_norm": 8.510223971690747, "learning_rate": 8.268043428232798e-07, "loss": 0.6561, "step": 10473 }, { "epoch": 1.4829050753875557, "grad_norm": 9.551411102058552, "learning_rate": 8.263784926732824e-07, "loss": 0.7001, "step": 10474 }, { "epoch": 1.483046648262193, "grad_norm": 9.333792084818324, "learning_rate": 8.259527305043197e-07, "loss": 0.7324, "step": 10475 }, { "epoch": 1.4831882211368301, "grad_norm": 10.23538021363524, "learning_rate": 8.255270563387746e-07, "loss": 0.7682, "step": 10476 }, { "epoch": 1.4833297940114674, "grad_norm": 10.598381177979494, "learning_rate": 8.251014701990229e-07, "loss": 0.7643, "step": 10477 }, { "epoch": 1.4834713668861046, "grad_norm": 9.370321912253486, "learning_rate": 8.246759721074388e-07, "loss": 0.7575, "step": 10478 }, { "epoch": 1.4836129397607418, "grad_norm": 8.764648383095404, "learning_rate": 8.242505620863894e-07, "loss": 0.6845, "step": 10479 }, { "epoch": 1.483754512635379, "grad_norm": 10.012517342808405, "learning_rate": 8.238252401582389e-07, "loss": 0.8338, "step": 10480 }, { "epoch": 1.4838960855100163, "grad_norm": 8.500000897575779, "learning_rate": 8.234000063453446e-07, "loss": 0.6777, "step": 10481 }, { "epoch": 1.4840376583846535, "grad_norm": 11.694789958892686, "learning_rate": 8.229748606700619e-07, "loss": 0.8207, "step": 10482 }, { "epoch": 1.4841792312592907, "grad_norm": 9.431635569280587, "learning_rate": 8.225498031547413e-07, "loss": 0.7099, "step": 10483 }, { "epoch": 1.484320804133928, "grad_norm": 8.65287632390281, "learning_rate": 8.221248338217258e-07, "loss": 0.6964, "step": 10484 }, { "epoch": 1.4844623770085652, "grad_norm": 9.779337638159001, "learning_rate": 8.216999526933578e-07, "loss": 0.7559, "step": 10485 }, { "epoch": 1.4846039498832024, "grad_norm": 9.084746737857673, "learning_rate": 8.212751597919708e-07, "loss": 0.7358, "step": 10486 }, { "epoch": 1.4847455227578397, "grad_norm": 9.819255083036706, "learning_rate": 8.208504551398977e-07, "loss": 0.7111, "step": 10487 }, { "epoch": 1.484887095632477, "grad_norm": 9.9777347649674, "learning_rate": 8.204258387594635e-07, "loss": 0.8758, "step": 10488 }, { "epoch": 1.485028668507114, "grad_norm": 7.77943196946783, "learning_rate": 8.200013106729915e-07, "loss": 0.7386, "step": 10489 }, { "epoch": 1.4851702413817511, "grad_norm": 8.337156054767295, "learning_rate": 8.195768709027979e-07, "loss": 0.7518, "step": 10490 }, { "epoch": 1.4853118142563884, "grad_norm": 10.603058902659257, "learning_rate": 8.191525194711941e-07, "loss": 0.7198, "step": 10491 }, { "epoch": 1.4854533871310256, "grad_norm": 8.906308679220071, "learning_rate": 8.187282564004903e-07, "loss": 0.6682, "step": 10492 }, { "epoch": 1.4855949600056628, "grad_norm": 10.409723108953385, "learning_rate": 8.183040817129873e-07, "loss": 0.69, "step": 10493 }, { "epoch": 1.4857365328803, "grad_norm": 8.122992164774281, "learning_rate": 8.178799954309857e-07, "loss": 0.6663, "step": 10494 }, { "epoch": 1.4858781057549373, "grad_norm": 8.130886820981182, "learning_rate": 8.174559975767779e-07, "loss": 0.6671, "step": 10495 }, { "epoch": 1.4860196786295745, "grad_norm": 8.65490756315501, "learning_rate": 8.170320881726542e-07, "loss": 0.7471, "step": 10496 }, { "epoch": 1.4861612515042117, "grad_norm": 9.09515642649225, "learning_rate": 8.166082672408985e-07, "loss": 0.7309, "step": 10497 }, { "epoch": 1.486302824378849, "grad_norm": 10.633643012086502, "learning_rate": 8.161845348037906e-07, "loss": 0.7148, "step": 10498 }, { "epoch": 1.4864443972534862, "grad_norm": 7.6127458504500805, "learning_rate": 8.157608908836071e-07, "loss": 0.7247, "step": 10499 }, { "epoch": 1.4865859701281234, "grad_norm": 10.04874328043628, "learning_rate": 8.153373355026176e-07, "loss": 0.7352, "step": 10500 }, { "epoch": 1.4867275430027607, "grad_norm": 7.737300188630681, "learning_rate": 8.149138686830882e-07, "loss": 0.7015, "step": 10501 }, { "epoch": 1.486869115877398, "grad_norm": 7.646868754307574, "learning_rate": 8.144904904472798e-07, "loss": 0.6797, "step": 10502 }, { "epoch": 1.4870106887520351, "grad_norm": 9.517444455440879, "learning_rate": 8.1406720081745e-07, "loss": 0.7471, "step": 10503 }, { "epoch": 1.4871522616266724, "grad_norm": 9.880654563070534, "learning_rate": 8.136439998158499e-07, "loss": 0.7388, "step": 10504 }, { "epoch": 1.4872938345013096, "grad_norm": 9.601980847400585, "learning_rate": 8.132208874647271e-07, "loss": 0.7591, "step": 10505 }, { "epoch": 1.4874354073759468, "grad_norm": 9.356372689237125, "learning_rate": 8.127978637863254e-07, "loss": 0.7453, "step": 10506 }, { "epoch": 1.487576980250584, "grad_norm": 8.7317440867998, "learning_rate": 8.12374928802881e-07, "loss": 0.6458, "step": 10507 }, { "epoch": 1.4877185531252213, "grad_norm": 8.100036771714374, "learning_rate": 8.119520825366292e-07, "loss": 0.7354, "step": 10508 }, { "epoch": 1.4878601259998585, "grad_norm": 8.871011616886408, "learning_rate": 8.115293250097969e-07, "loss": 0.7284, "step": 10509 }, { "epoch": 1.4880016988744957, "grad_norm": 9.660047259797617, "learning_rate": 8.111066562446098e-07, "loss": 0.8455, "step": 10510 }, { "epoch": 1.488143271749133, "grad_norm": 9.20401944799246, "learning_rate": 8.106840762632867e-07, "loss": 0.7435, "step": 10511 }, { "epoch": 1.4882848446237702, "grad_norm": 8.89089359353724, "learning_rate": 8.102615850880413e-07, "loss": 0.6754, "step": 10512 }, { "epoch": 1.4884264174984074, "grad_norm": 9.218154939746027, "learning_rate": 8.098391827410851e-07, "loss": 0.7301, "step": 10513 }, { "epoch": 1.4885679903730447, "grad_norm": 8.952610287342527, "learning_rate": 8.09416869244622e-07, "loss": 0.7192, "step": 10514 }, { "epoch": 1.4887095632476817, "grad_norm": 7.919558455364143, "learning_rate": 8.089946446208543e-07, "loss": 0.6876, "step": 10515 }, { "epoch": 1.488851136122319, "grad_norm": 8.087055987708991, "learning_rate": 8.085725088919766e-07, "loss": 0.7454, "step": 10516 }, { "epoch": 1.4889927089969561, "grad_norm": 8.608430287891306, "learning_rate": 8.081504620801816e-07, "loss": 0.7687, "step": 10517 }, { "epoch": 1.4891342818715934, "grad_norm": 7.933848584964338, "learning_rate": 8.077285042076546e-07, "loss": 0.6707, "step": 10518 }, { "epoch": 1.4892758547462306, "grad_norm": 9.446706563272627, "learning_rate": 8.073066352965792e-07, "loss": 0.7994, "step": 10519 }, { "epoch": 1.4894174276208678, "grad_norm": 9.713880097712467, "learning_rate": 8.068848553691308e-07, "loss": 0.7763, "step": 10520 }, { "epoch": 1.489559000495505, "grad_norm": 8.853056954049297, "learning_rate": 8.06463164447484e-07, "loss": 0.6508, "step": 10521 }, { "epoch": 1.4897005733701423, "grad_norm": 8.084665038012117, "learning_rate": 8.060415625538059e-07, "loss": 0.6705, "step": 10522 }, { "epoch": 1.4898421462447795, "grad_norm": 8.233678683349535, "learning_rate": 8.056200497102592e-07, "loss": 0.6507, "step": 10523 }, { "epoch": 1.4899837191194167, "grad_norm": 8.053142943415933, "learning_rate": 8.051986259390038e-07, "loss": 0.7109, "step": 10524 }, { "epoch": 1.490125291994054, "grad_norm": 8.774234018829649, "learning_rate": 8.047772912621921e-07, "loss": 0.763, "step": 10525 }, { "epoch": 1.4902668648686912, "grad_norm": 9.551975417420211, "learning_rate": 8.04356045701975e-07, "loss": 0.6747, "step": 10526 }, { "epoch": 1.4904084377433284, "grad_norm": 11.208835822166424, "learning_rate": 8.039348892804955e-07, "loss": 0.8121, "step": 10527 }, { "epoch": 1.4905500106179657, "grad_norm": 9.70896021138363, "learning_rate": 8.03513822019894e-07, "loss": 0.7373, "step": 10528 }, { "epoch": 1.490691583492603, "grad_norm": 7.5310438452399, "learning_rate": 8.030928439423069e-07, "loss": 0.6702, "step": 10529 }, { "epoch": 1.4908331563672401, "grad_norm": 9.191838699718389, "learning_rate": 8.026719550698628e-07, "loss": 0.7006, "step": 10530 }, { "epoch": 1.4909747292418771, "grad_norm": 9.139031895758938, "learning_rate": 8.022511554246895e-07, "loss": 0.7261, "step": 10531 }, { "epoch": 1.4911163021165144, "grad_norm": 8.670164527255956, "learning_rate": 8.018304450289069e-07, "loss": 0.6977, "step": 10532 }, { "epoch": 1.4912578749911516, "grad_norm": 10.147744334968763, "learning_rate": 8.014098239046309e-07, "loss": 0.6471, "step": 10533 }, { "epoch": 1.4913994478657888, "grad_norm": 8.946202023185506, "learning_rate": 8.009892920739745e-07, "loss": 0.7409, "step": 10534 }, { "epoch": 1.491541020740426, "grad_norm": 8.406613934094635, "learning_rate": 8.005688495590435e-07, "loss": 0.7326, "step": 10535 }, { "epoch": 1.4916825936150633, "grad_norm": 9.334412875639805, "learning_rate": 8.001484963819417e-07, "loss": 0.8319, "step": 10536 }, { "epoch": 1.4918241664897005, "grad_norm": 11.838014849671996, "learning_rate": 7.997282325647654e-07, "loss": 0.8702, "step": 10537 }, { "epoch": 1.4919657393643377, "grad_norm": 8.230801988834049, "learning_rate": 7.993080581296087e-07, "loss": 0.7502, "step": 10538 }, { "epoch": 1.492107312238975, "grad_norm": 9.32879401128989, "learning_rate": 7.988879730985585e-07, "loss": 0.7289, "step": 10539 }, { "epoch": 1.4922488851136122, "grad_norm": 8.369330009890465, "learning_rate": 7.984679774937002e-07, "loss": 0.6678, "step": 10540 }, { "epoch": 1.4923904579882494, "grad_norm": 9.498540665821958, "learning_rate": 7.980480713371106e-07, "loss": 0.6878, "step": 10541 }, { "epoch": 1.4925320308628867, "grad_norm": 9.509593336072815, "learning_rate": 7.976282546508654e-07, "loss": 0.7507, "step": 10542 }, { "epoch": 1.492673603737524, "grad_norm": 9.15528880207044, "learning_rate": 7.972085274570341e-07, "loss": 0.654, "step": 10543 }, { "epoch": 1.4928151766121611, "grad_norm": 9.217756004409964, "learning_rate": 7.967888897776796e-07, "loss": 0.8321, "step": 10544 }, { "epoch": 1.4929567494867984, "grad_norm": 8.843297508714535, "learning_rate": 7.963693416348642e-07, "loss": 0.7506, "step": 10545 }, { "epoch": 1.4930983223614356, "grad_norm": 8.244634097038084, "learning_rate": 7.959498830506412e-07, "loss": 0.6904, "step": 10546 }, { "epoch": 1.4932398952360728, "grad_norm": 8.963851813995015, "learning_rate": 7.955305140470635e-07, "loss": 0.7154, "step": 10547 }, { "epoch": 1.49338146811071, "grad_norm": 9.32852575824635, "learning_rate": 7.951112346461745e-07, "loss": 0.6799, "step": 10548 }, { "epoch": 1.4935230409853473, "grad_norm": 7.908423083659979, "learning_rate": 7.946920448700168e-07, "loss": 0.652, "step": 10549 }, { "epoch": 1.4936646138599845, "grad_norm": 8.161899758390545, "learning_rate": 7.942729447406278e-07, "loss": 0.6087, "step": 10550 }, { "epoch": 1.4938061867346217, "grad_norm": 8.80289883118233, "learning_rate": 7.938539342800373e-07, "loss": 0.7591, "step": 10551 }, { "epoch": 1.493947759609259, "grad_norm": 8.549694872313747, "learning_rate": 7.934350135102742e-07, "loss": 0.7454, "step": 10552 }, { "epoch": 1.4940893324838962, "grad_norm": 9.70607939875426, "learning_rate": 7.930161824533597e-07, "loss": 0.7348, "step": 10553 }, { "epoch": 1.4942309053585334, "grad_norm": 7.800434286303016, "learning_rate": 7.92597441131312e-07, "loss": 0.7167, "step": 10554 }, { "epoch": 1.4943724782331707, "grad_norm": 9.403974188064595, "learning_rate": 7.921787895661429e-07, "loss": 0.8732, "step": 10555 }, { "epoch": 1.4945140511078077, "grad_norm": 9.518493320262962, "learning_rate": 7.917602277798613e-07, "loss": 0.7838, "step": 10556 }, { "epoch": 1.494655623982445, "grad_norm": 8.38611969143083, "learning_rate": 7.913417557944716e-07, "loss": 0.663, "step": 10557 }, { "epoch": 1.4947971968570821, "grad_norm": 8.589464485431229, "learning_rate": 7.909233736319711e-07, "loss": 0.7285, "step": 10558 }, { "epoch": 1.4949387697317194, "grad_norm": 8.80769119544594, "learning_rate": 7.905050813143553e-07, "loss": 0.7466, "step": 10559 }, { "epoch": 1.4950803426063566, "grad_norm": 8.93905604761828, "learning_rate": 7.900868788636118e-07, "loss": 0.694, "step": 10560 }, { "epoch": 1.4952219154809938, "grad_norm": 8.959114880439806, "learning_rate": 7.896687663017269e-07, "loss": 0.6617, "step": 10561 }, { "epoch": 1.495363488355631, "grad_norm": 8.172835634562237, "learning_rate": 7.892507436506791e-07, "loss": 0.7389, "step": 10562 }, { "epoch": 1.4955050612302683, "grad_norm": 7.310176643293399, "learning_rate": 7.888328109324448e-07, "loss": 0.7092, "step": 10563 }, { "epoch": 1.4956466341049055, "grad_norm": 9.337526832552227, "learning_rate": 7.884149681689937e-07, "loss": 0.8015, "step": 10564 }, { "epoch": 1.4957882069795427, "grad_norm": 10.23999540030853, "learning_rate": 7.879972153822907e-07, "loss": 0.7254, "step": 10565 }, { "epoch": 1.49592977985418, "grad_norm": 8.07582446161773, "learning_rate": 7.875795525942984e-07, "loss": 0.6426, "step": 10566 }, { "epoch": 1.4960713527288172, "grad_norm": 8.061252704512842, "learning_rate": 7.871619798269711e-07, "loss": 0.7739, "step": 10567 }, { "epoch": 1.4962129256034544, "grad_norm": 15.135458112827516, "learning_rate": 7.867444971022626e-07, "loss": 0.6962, "step": 10568 }, { "epoch": 1.4963544984780917, "grad_norm": 9.124013350982464, "learning_rate": 7.863271044421175e-07, "loss": 0.7182, "step": 10569 }, { "epoch": 1.4964960713527289, "grad_norm": 10.269649744347761, "learning_rate": 7.859098018684793e-07, "loss": 0.7654, "step": 10570 }, { "epoch": 1.4966376442273661, "grad_norm": 8.2110271648589, "learning_rate": 7.854925894032842e-07, "loss": 0.726, "step": 10571 }, { "epoch": 1.4967792171020031, "grad_norm": 8.977425131255881, "learning_rate": 7.850754670684654e-07, "loss": 0.7178, "step": 10572 }, { "epoch": 1.4969207899766404, "grad_norm": 10.307416553071636, "learning_rate": 7.846584348859512e-07, "loss": 0.6873, "step": 10573 }, { "epoch": 1.4970623628512776, "grad_norm": 9.29689346439868, "learning_rate": 7.84241492877664e-07, "loss": 0.7668, "step": 10574 }, { "epoch": 1.4972039357259148, "grad_norm": 8.833364906494527, "learning_rate": 7.838246410655223e-07, "loss": 0.6311, "step": 10575 }, { "epoch": 1.497345508600552, "grad_norm": 9.098819748351337, "learning_rate": 7.834078794714389e-07, "loss": 0.6098, "step": 10576 }, { "epoch": 1.4974870814751893, "grad_norm": 8.349345984515232, "learning_rate": 7.829912081173238e-07, "loss": 0.7727, "step": 10577 }, { "epoch": 1.4976286543498265, "grad_norm": 7.289589228104829, "learning_rate": 7.825746270250803e-07, "loss": 0.705, "step": 10578 }, { "epoch": 1.4977702272244637, "grad_norm": 8.813622802197406, "learning_rate": 7.821581362166078e-07, "loss": 0.6764, "step": 10579 }, { "epoch": 1.497911800099101, "grad_norm": 9.612562987439926, "learning_rate": 7.817417357138018e-07, "loss": 0.7583, "step": 10580 }, { "epoch": 1.4980533729737382, "grad_norm": 10.522386210944056, "learning_rate": 7.813254255385511e-07, "loss": 0.7772, "step": 10581 }, { "epoch": 1.4981949458483754, "grad_norm": 9.208934459368962, "learning_rate": 7.809092057127421e-07, "loss": 0.6907, "step": 10582 }, { "epoch": 1.4983365187230127, "grad_norm": 9.787560130747002, "learning_rate": 7.804930762582533e-07, "loss": 0.7251, "step": 10583 }, { "epoch": 1.4984780915976499, "grad_norm": 10.722366657063898, "learning_rate": 7.800770371969624e-07, "loss": 0.7092, "step": 10584 }, { "epoch": 1.4986196644722871, "grad_norm": 8.066139996362446, "learning_rate": 7.796610885507391e-07, "loss": 0.776, "step": 10585 }, { "epoch": 1.4987612373469243, "grad_norm": 8.164597021975426, "learning_rate": 7.792452303414489e-07, "loss": 0.723, "step": 10586 }, { "epoch": 1.4989028102215616, "grad_norm": 8.99328065751666, "learning_rate": 7.788294625909546e-07, "loss": 0.5856, "step": 10587 }, { "epoch": 1.4990443830961988, "grad_norm": 8.814965302328195, "learning_rate": 7.784137853211113e-07, "loss": 0.6814, "step": 10588 }, { "epoch": 1.499185955970836, "grad_norm": 9.068404595851604, "learning_rate": 7.779981985537724e-07, "loss": 0.6289, "step": 10589 }, { "epoch": 1.4993275288454733, "grad_norm": 9.278888440482403, "learning_rate": 7.775827023107835e-07, "loss": 0.7446, "step": 10590 }, { "epoch": 1.4994691017201105, "grad_norm": 10.013784444766625, "learning_rate": 7.771672966139885e-07, "loss": 0.7152, "step": 10591 }, { "epoch": 1.4996106745947477, "grad_norm": 8.736452787018578, "learning_rate": 7.767519814852234e-07, "loss": 0.6547, "step": 10592 }, { "epoch": 1.499752247469385, "grad_norm": 9.657117884870473, "learning_rate": 7.763367569463224e-07, "loss": 0.7174, "step": 10593 }, { "epoch": 1.4998938203440222, "grad_norm": 11.763144425384777, "learning_rate": 7.759216230191119e-07, "loss": 0.7206, "step": 10594 }, { "epoch": 1.5000353932186594, "grad_norm": 9.416941095815073, "learning_rate": 7.755065797254172e-07, "loss": 0.6697, "step": 10595 }, { "epoch": 1.5001769660932966, "grad_norm": 10.790624462650218, "learning_rate": 7.750916270870556e-07, "loss": 0.7256, "step": 10596 }, { "epoch": 1.5003185389679339, "grad_norm": 9.179273594392049, "learning_rate": 7.746767651258405e-07, "loss": 0.7203, "step": 10597 }, { "epoch": 1.500460111842571, "grad_norm": 9.425560681085875, "learning_rate": 7.74261993863582e-07, "loss": 0.7005, "step": 10598 }, { "epoch": 1.5006016847172083, "grad_norm": 8.894915519257424, "learning_rate": 7.738473133220828e-07, "loss": 0.6626, "step": 10599 }, { "epoch": 1.5007432575918453, "grad_norm": 10.078492150531265, "learning_rate": 7.734327235231443e-07, "loss": 0.7283, "step": 10600 }, { "epoch": 1.5008848304664826, "grad_norm": 9.453206110834873, "learning_rate": 7.730182244885593e-07, "loss": 0.7822, "step": 10601 }, { "epoch": 1.5010264033411198, "grad_norm": 7.029985509127663, "learning_rate": 7.726038162401184e-07, "loss": 0.6945, "step": 10602 }, { "epoch": 1.501167976215757, "grad_norm": 8.31351413133544, "learning_rate": 7.721894987996076e-07, "loss": 0.7201, "step": 10603 }, { "epoch": 1.5013095490903943, "grad_norm": 8.72387457204814, "learning_rate": 7.717752721888058e-07, "loss": 0.8142, "step": 10604 }, { "epoch": 1.5014511219650315, "grad_norm": 10.349524047522857, "learning_rate": 7.713611364294904e-07, "loss": 0.7115, "step": 10605 }, { "epoch": 1.5015926948396687, "grad_norm": 7.822325852218204, "learning_rate": 7.709470915434309e-07, "loss": 0.5806, "step": 10606 }, { "epoch": 1.501734267714306, "grad_norm": 8.310256081501743, "learning_rate": 7.705331375523928e-07, "loss": 0.7447, "step": 10607 }, { "epoch": 1.5018758405889432, "grad_norm": 9.888435205727502, "learning_rate": 7.701192744781389e-07, "loss": 0.8044, "step": 10608 }, { "epoch": 1.5020174134635804, "grad_norm": 9.82094215674807, "learning_rate": 7.69705502342424e-07, "loss": 0.7421, "step": 10609 }, { "epoch": 1.5021589863382176, "grad_norm": 9.302489359766783, "learning_rate": 7.692918211670017e-07, "loss": 0.7114, "step": 10610 }, { "epoch": 1.5023005592128547, "grad_norm": 7.926438441133823, "learning_rate": 7.68878230973617e-07, "loss": 0.6618, "step": 10611 }, { "epoch": 1.5024421320874919, "grad_norm": 7.537086093349634, "learning_rate": 7.684647317840138e-07, "loss": 0.677, "step": 10612 }, { "epoch": 1.5025837049621291, "grad_norm": 9.284811961170613, "learning_rate": 7.680513236199275e-07, "loss": 0.7741, "step": 10613 }, { "epoch": 1.5027252778367663, "grad_norm": 7.1474835535789545, "learning_rate": 7.676380065030928e-07, "loss": 0.7172, "step": 10614 }, { "epoch": 1.5028668507114036, "grad_norm": 8.700957708152735, "learning_rate": 7.672247804552355e-07, "loss": 0.6755, "step": 10615 }, { "epoch": 1.5030084235860408, "grad_norm": 10.619161639842641, "learning_rate": 7.668116454980804e-07, "loss": 0.6858, "step": 10616 }, { "epoch": 1.503149996460678, "grad_norm": 8.354772024189042, "learning_rate": 7.663986016533446e-07, "loss": 0.7773, "step": 10617 }, { "epoch": 1.5032915693353153, "grad_norm": 8.380090788938947, "learning_rate": 7.659856489427409e-07, "loss": 0.7382, "step": 10618 }, { "epoch": 1.5034331422099525, "grad_norm": 9.037066108133404, "learning_rate": 7.655727873879792e-07, "loss": 0.7616, "step": 10619 }, { "epoch": 1.5035747150845897, "grad_norm": 9.704314178966076, "learning_rate": 7.651600170107626e-07, "loss": 0.856, "step": 10620 }, { "epoch": 1.503716287959227, "grad_norm": 9.562252664795604, "learning_rate": 7.647473378327908e-07, "loss": 0.777, "step": 10621 }, { "epoch": 1.5038578608338642, "grad_norm": 8.11520123526627, "learning_rate": 7.64334749875757e-07, "loss": 0.7493, "step": 10622 }, { "epoch": 1.5039994337085014, "grad_norm": 9.746638183065073, "learning_rate": 7.639222531613519e-07, "loss": 0.6603, "step": 10623 }, { "epoch": 1.5041410065831387, "grad_norm": 7.590188026964021, "learning_rate": 7.635098477112588e-07, "loss": 0.696, "step": 10624 }, { "epoch": 1.5042825794577759, "grad_norm": 8.770242872017837, "learning_rate": 7.63097533547158e-07, "loss": 0.6827, "step": 10625 }, { "epoch": 1.5044241523324131, "grad_norm": 7.34863767908892, "learning_rate": 7.626853106907256e-07, "loss": 0.7922, "step": 10626 }, { "epoch": 1.5045657252070503, "grad_norm": 8.291138240774709, "learning_rate": 7.622731791636312e-07, "loss": 0.6968, "step": 10627 }, { "epoch": 1.5047072980816876, "grad_norm": 7.654317308278417, "learning_rate": 7.6186113898754e-07, "loss": 0.7374, "step": 10628 }, { "epoch": 1.5048488709563248, "grad_norm": 8.307432078228205, "learning_rate": 7.614491901841118e-07, "loss": 0.6774, "step": 10629 }, { "epoch": 1.504990443830962, "grad_norm": 8.9380603961385, "learning_rate": 7.610373327750034e-07, "loss": 0.6123, "step": 10630 }, { "epoch": 1.5051320167055993, "grad_norm": 8.564614584780792, "learning_rate": 7.606255667818668e-07, "loss": 0.7179, "step": 10631 }, { "epoch": 1.5052735895802365, "grad_norm": 10.723413281564158, "learning_rate": 7.602138922263461e-07, "loss": 0.7222, "step": 10632 }, { "epoch": 1.5054151624548737, "grad_norm": 9.770256714117886, "learning_rate": 7.598023091300849e-07, "loss": 0.7802, "step": 10633 }, { "epoch": 1.505556735329511, "grad_norm": 8.108799202823976, "learning_rate": 7.593908175147177e-07, "loss": 0.7, "step": 10634 }, { "epoch": 1.5056983082041482, "grad_norm": 9.234629051470433, "learning_rate": 7.589794174018786e-07, "loss": 0.842, "step": 10635 }, { "epoch": 1.5058398810787854, "grad_norm": 7.609410342657271, "learning_rate": 7.585681088131921e-07, "loss": 0.6648, "step": 10636 }, { "epoch": 1.5059814539534226, "grad_norm": 10.015583103705273, "learning_rate": 7.581568917702828e-07, "loss": 0.7753, "step": 10637 }, { "epoch": 1.5061230268280599, "grad_norm": 9.458865202571086, "learning_rate": 7.577457662947668e-07, "loss": 0.7132, "step": 10638 }, { "epoch": 1.506264599702697, "grad_norm": 9.508963923449333, "learning_rate": 7.573347324082558e-07, "loss": 0.798, "step": 10639 }, { "epoch": 1.5064061725773343, "grad_norm": 10.62914312029336, "learning_rate": 7.569237901323595e-07, "loss": 0.7617, "step": 10640 }, { "epoch": 1.5065477454519716, "grad_norm": 8.563459133809525, "learning_rate": 7.565129394886792e-07, "loss": 0.6005, "step": 10641 }, { "epoch": 1.5066893183266086, "grad_norm": 9.414748500031621, "learning_rate": 7.561021804988141e-07, "loss": 0.736, "step": 10642 }, { "epoch": 1.5068308912012458, "grad_norm": 8.653720086638499, "learning_rate": 7.556915131843568e-07, "loss": 0.747, "step": 10643 }, { "epoch": 1.506972464075883, "grad_norm": 7.657992666787447, "learning_rate": 7.552809375668965e-07, "loss": 0.7889, "step": 10644 }, { "epoch": 1.5071140369505203, "grad_norm": 10.894464371545354, "learning_rate": 7.548704536680157e-07, "loss": 0.7614, "step": 10645 }, { "epoch": 1.5072556098251575, "grad_norm": 8.725849501562916, "learning_rate": 7.544600615092937e-07, "loss": 0.7299, "step": 10646 }, { "epoch": 1.5073971826997947, "grad_norm": 9.072815805101566, "learning_rate": 7.540497611123058e-07, "loss": 0.7486, "step": 10647 }, { "epoch": 1.507538755574432, "grad_norm": 9.36263466752177, "learning_rate": 7.536395524986201e-07, "loss": 0.7218, "step": 10648 }, { "epoch": 1.5076803284490692, "grad_norm": 10.209556620476658, "learning_rate": 7.532294356898006e-07, "loss": 0.7246, "step": 10649 }, { "epoch": 1.5078219013237064, "grad_norm": 9.616780127567266, "learning_rate": 7.528194107074069e-07, "loss": 0.8043, "step": 10650 }, { "epoch": 1.5079634741983436, "grad_norm": 8.677127491509287, "learning_rate": 7.524094775729948e-07, "loss": 0.6756, "step": 10651 }, { "epoch": 1.5081050470729807, "grad_norm": 7.715530032766604, "learning_rate": 7.519996363081123e-07, "loss": 0.6634, "step": 10652 }, { "epoch": 1.5082466199476179, "grad_norm": 11.052979591455015, "learning_rate": 7.515898869343058e-07, "loss": 0.7592, "step": 10653 }, { "epoch": 1.5083881928222551, "grad_norm": 8.724570679690885, "learning_rate": 7.51180229473116e-07, "loss": 0.7867, "step": 10654 }, { "epoch": 1.5085297656968923, "grad_norm": 8.660382217535702, "learning_rate": 7.507706639460768e-07, "loss": 0.6863, "step": 10655 }, { "epoch": 1.5086713385715296, "grad_norm": 8.214162496270495, "learning_rate": 7.503611903747202e-07, "loss": 0.7273, "step": 10656 }, { "epoch": 1.5088129114461668, "grad_norm": 7.938540893549311, "learning_rate": 7.499518087805704e-07, "loss": 0.6197, "step": 10657 }, { "epoch": 1.508954484320804, "grad_norm": 9.526806004837539, "learning_rate": 7.495425191851502e-07, "loss": 0.779, "step": 10658 }, { "epoch": 1.5090960571954413, "grad_norm": 7.628863747161428, "learning_rate": 7.491333216099744e-07, "loss": 0.5942, "step": 10659 }, { "epoch": 1.5092376300700785, "grad_norm": 8.696392091700883, "learning_rate": 7.487242160765535e-07, "loss": 0.7576, "step": 10660 }, { "epoch": 1.5093792029447157, "grad_norm": 8.103873619397094, "learning_rate": 7.483152026063955e-07, "loss": 0.6927, "step": 10661 }, { "epoch": 1.509520775819353, "grad_norm": 8.53282290759986, "learning_rate": 7.479062812210005e-07, "loss": 0.7497, "step": 10662 }, { "epoch": 1.5096623486939902, "grad_norm": 8.282848736450399, "learning_rate": 7.474974519418668e-07, "loss": 0.6988, "step": 10663 }, { "epoch": 1.5098039215686274, "grad_norm": 9.091097205556594, "learning_rate": 7.470887147904845e-07, "loss": 0.728, "step": 10664 }, { "epoch": 1.5099454944432646, "grad_norm": 8.502839007014023, "learning_rate": 7.466800697883422e-07, "loss": 0.6974, "step": 10665 }, { "epoch": 1.5100870673179019, "grad_norm": 8.7020313390765, "learning_rate": 7.462715169569204e-07, "loss": 0.6554, "step": 10666 }, { "epoch": 1.510228640192539, "grad_norm": 8.886945395461483, "learning_rate": 7.458630563176983e-07, "loss": 0.6474, "step": 10667 }, { "epoch": 1.5103702130671763, "grad_norm": 7.097278406460366, "learning_rate": 7.454546878921465e-07, "loss": 0.6152, "step": 10668 }, { "epoch": 1.5105117859418136, "grad_norm": 9.015098622523668, "learning_rate": 7.450464117017342e-07, "loss": 0.7992, "step": 10669 }, { "epoch": 1.5106533588164508, "grad_norm": 8.950152637735824, "learning_rate": 7.446382277679235e-07, "loss": 0.7802, "step": 10670 }, { "epoch": 1.510794931691088, "grad_norm": 10.080518429457912, "learning_rate": 7.442301361121718e-07, "loss": 0.7915, "step": 10671 }, { "epoch": 1.5109365045657253, "grad_norm": 8.930354750059712, "learning_rate": 7.438221367559331e-07, "loss": 0.6741, "step": 10672 }, { "epoch": 1.5110780774403625, "grad_norm": 8.59971339613161, "learning_rate": 7.434142297206546e-07, "loss": 0.7013, "step": 10673 }, { "epoch": 1.5112196503149997, "grad_norm": 9.567212906859332, "learning_rate": 7.43006415027781e-07, "loss": 0.7245, "step": 10674 }, { "epoch": 1.511361223189637, "grad_norm": 10.255050531390237, "learning_rate": 7.425986926987494e-07, "loss": 0.7398, "step": 10675 }, { "epoch": 1.5115027960642742, "grad_norm": 8.72723522683959, "learning_rate": 7.421910627549942e-07, "loss": 0.7262, "step": 10676 }, { "epoch": 1.5116443689389114, "grad_norm": 9.006633856784442, "learning_rate": 7.417835252179447e-07, "loss": 0.678, "step": 10677 }, { "epoch": 1.5117859418135486, "grad_norm": 8.521778704019098, "learning_rate": 7.413760801090239e-07, "loss": 0.7003, "step": 10678 }, { "epoch": 1.5119275146881859, "grad_norm": 9.786945866163101, "learning_rate": 7.409687274496516e-07, "loss": 0.7316, "step": 10679 }, { "epoch": 1.512069087562823, "grad_norm": 9.999708552881835, "learning_rate": 7.405614672612421e-07, "loss": 0.7714, "step": 10680 }, { "epoch": 1.5122106604374603, "grad_norm": 8.773672289267084, "learning_rate": 7.401542995652033e-07, "loss": 0.7082, "step": 10681 }, { "epoch": 1.5123522333120976, "grad_norm": 9.293636187412567, "learning_rate": 7.397472243829418e-07, "loss": 0.6988, "step": 10682 }, { "epoch": 1.5124938061867346, "grad_norm": 8.859592441803589, "learning_rate": 7.393402417358552e-07, "loss": 0.7526, "step": 10683 }, { "epoch": 1.5126353790613718, "grad_norm": 8.348292796419283, "learning_rate": 7.389333516453403e-07, "loss": 0.7511, "step": 10684 }, { "epoch": 1.512776951936009, "grad_norm": 9.588498744146756, "learning_rate": 7.385265541327852e-07, "loss": 0.7805, "step": 10685 }, { "epoch": 1.5129185248106463, "grad_norm": 8.876364589607432, "learning_rate": 7.381198492195762e-07, "loss": 0.6983, "step": 10686 }, { "epoch": 1.5130600976852835, "grad_norm": 9.439447265219059, "learning_rate": 7.377132369270926e-07, "loss": 0.7304, "step": 10687 }, { "epoch": 1.5132016705599207, "grad_norm": 8.133365022357847, "learning_rate": 7.373067172767107e-07, "loss": 0.6992, "step": 10688 }, { "epoch": 1.513343243434558, "grad_norm": 9.53023255968088, "learning_rate": 7.369002902897998e-07, "loss": 0.6446, "step": 10689 }, { "epoch": 1.5134848163091952, "grad_norm": 8.974351150732858, "learning_rate": 7.364939559877265e-07, "loss": 0.7944, "step": 10690 }, { "epoch": 1.5136263891838324, "grad_norm": 7.873815780488413, "learning_rate": 7.360877143918512e-07, "loss": 0.666, "step": 10691 }, { "epoch": 1.5137679620584696, "grad_norm": 9.683987676033583, "learning_rate": 7.356815655235286e-07, "loss": 0.7528, "step": 10692 }, { "epoch": 1.5139095349331069, "grad_norm": 8.984780901157071, "learning_rate": 7.352755094041114e-07, "loss": 0.6731, "step": 10693 }, { "epoch": 1.5140511078077439, "grad_norm": 7.844948240149517, "learning_rate": 7.348695460549443e-07, "loss": 0.6742, "step": 10694 }, { "epoch": 1.514192680682381, "grad_norm": 10.422058012354883, "learning_rate": 7.344636754973695e-07, "loss": 0.7678, "step": 10695 }, { "epoch": 1.5143342535570183, "grad_norm": 9.148342845502002, "learning_rate": 7.340578977527221e-07, "loss": 0.6815, "step": 10696 }, { "epoch": 1.5144758264316556, "grad_norm": 8.395976656186608, "learning_rate": 7.336522128423351e-07, "loss": 0.745, "step": 10697 }, { "epoch": 1.5146173993062928, "grad_norm": 8.26054956265916, "learning_rate": 7.332466207875336e-07, "loss": 0.7395, "step": 10698 }, { "epoch": 1.51475897218093, "grad_norm": 8.791850252328858, "learning_rate": 7.328411216096399e-07, "loss": 0.6823, "step": 10699 }, { "epoch": 1.5149005450555673, "grad_norm": 8.770259835407751, "learning_rate": 7.324357153299714e-07, "loss": 0.7049, "step": 10700 }, { "epoch": 1.5150421179302045, "grad_norm": 10.014750854623184, "learning_rate": 7.320304019698393e-07, "loss": 0.6519, "step": 10701 }, { "epoch": 1.5151836908048417, "grad_norm": 8.35690310999498, "learning_rate": 7.31625181550551e-07, "loss": 0.8133, "step": 10702 }, { "epoch": 1.515325263679479, "grad_norm": 9.270954214158088, "learning_rate": 7.312200540934073e-07, "loss": 0.6823, "step": 10703 }, { "epoch": 1.5154668365541162, "grad_norm": 9.612877283517602, "learning_rate": 7.308150196197064e-07, "loss": 0.7947, "step": 10704 }, { "epoch": 1.5156084094287534, "grad_norm": 9.699039797105682, "learning_rate": 7.30410078150742e-07, "loss": 0.7764, "step": 10705 }, { "epoch": 1.5157499823033906, "grad_norm": 11.076872201949444, "learning_rate": 7.300052297077992e-07, "loss": 0.783, "step": 10706 }, { "epoch": 1.5158915551780279, "grad_norm": 8.282949597113356, "learning_rate": 7.296004743121627e-07, "loss": 0.7193, "step": 10707 }, { "epoch": 1.516033128052665, "grad_norm": 10.12976790829763, "learning_rate": 7.291958119851081e-07, "loss": 0.7134, "step": 10708 }, { "epoch": 1.5161747009273023, "grad_norm": 8.138184165027617, "learning_rate": 7.287912427479105e-07, "loss": 0.6825, "step": 10709 }, { "epoch": 1.5163162738019396, "grad_norm": 8.3549765735711, "learning_rate": 7.283867666218355e-07, "loss": 0.7421, "step": 10710 }, { "epoch": 1.5164578466765768, "grad_norm": 10.484240739988898, "learning_rate": 7.27982383628148e-07, "loss": 0.7589, "step": 10711 }, { "epoch": 1.516599419551214, "grad_norm": 8.158286121869535, "learning_rate": 7.275780937881055e-07, "loss": 0.7766, "step": 10712 }, { "epoch": 1.5167409924258513, "grad_norm": 9.250922904497742, "learning_rate": 7.2717389712296e-07, "loss": 0.7181, "step": 10713 }, { "epoch": 1.5168825653004885, "grad_norm": 10.511921245959275, "learning_rate": 7.267697936539619e-07, "loss": 0.6703, "step": 10714 }, { "epoch": 1.5170241381751257, "grad_norm": 8.21673350381137, "learning_rate": 7.263657834023527e-07, "loss": 0.7036, "step": 10715 }, { "epoch": 1.517165711049763, "grad_norm": 8.844959836774848, "learning_rate": 7.259618663893725e-07, "loss": 0.6983, "step": 10716 }, { "epoch": 1.5173072839244002, "grad_norm": 7.057420102085071, "learning_rate": 7.255580426362535e-07, "loss": 0.6215, "step": 10717 }, { "epoch": 1.5174488567990374, "grad_norm": 7.609821909601408, "learning_rate": 7.25154312164226e-07, "loss": 0.6705, "step": 10718 }, { "epoch": 1.5175904296736746, "grad_norm": 8.681247574412458, "learning_rate": 7.247506749945118e-07, "loss": 0.7245, "step": 10719 }, { "epoch": 1.5177320025483119, "grad_norm": 8.630163885971719, "learning_rate": 7.243471311483322e-07, "loss": 0.6549, "step": 10720 }, { "epoch": 1.517873575422949, "grad_norm": 7.942987662651453, "learning_rate": 7.239436806468989e-07, "loss": 0.6974, "step": 10721 }, { "epoch": 1.5180151482975863, "grad_norm": 9.504762760675236, "learning_rate": 7.235403235114227e-07, "loss": 0.7654, "step": 10722 }, { "epoch": 1.5181567211722236, "grad_norm": 7.4975632841740865, "learning_rate": 7.231370597631071e-07, "loss": 0.6497, "step": 10723 }, { "epoch": 1.5182982940468608, "grad_norm": 11.477646041004146, "learning_rate": 7.227338894231509e-07, "loss": 0.6356, "step": 10724 }, { "epoch": 1.5184398669214978, "grad_norm": 7.210010869921401, "learning_rate": 7.223308125127495e-07, "loss": 0.6691, "step": 10725 }, { "epoch": 1.518581439796135, "grad_norm": 8.976171101535607, "learning_rate": 7.219278290530909e-07, "loss": 0.7139, "step": 10726 }, { "epoch": 1.5187230126707723, "grad_norm": 9.76080584216675, "learning_rate": 7.215249390653609e-07, "loss": 0.5775, "step": 10727 }, { "epoch": 1.5188645855454095, "grad_norm": 10.379676959646726, "learning_rate": 7.211221425707393e-07, "loss": 0.7566, "step": 10728 }, { "epoch": 1.5190061584200467, "grad_norm": 8.387976315386236, "learning_rate": 7.207194395903997e-07, "loss": 0.6539, "step": 10729 }, { "epoch": 1.519147731294684, "grad_norm": 8.174836360681832, "learning_rate": 7.203168301455129e-07, "loss": 0.8039, "step": 10730 }, { "epoch": 1.5192893041693212, "grad_norm": 11.339138638515562, "learning_rate": 7.199143142572429e-07, "loss": 0.7215, "step": 10731 }, { "epoch": 1.5194308770439584, "grad_norm": 9.491878451116076, "learning_rate": 7.195118919467506e-07, "loss": 0.682, "step": 10732 }, { "epoch": 1.5195724499185956, "grad_norm": 8.442042957390917, "learning_rate": 7.191095632351908e-07, "loss": 0.6128, "step": 10733 }, { "epoch": 1.5197140227932329, "grad_norm": 8.166920197372686, "learning_rate": 7.187073281437124e-07, "loss": 0.7231, "step": 10734 }, { "epoch": 1.5198555956678699, "grad_norm": 8.596510121385878, "learning_rate": 7.183051866934626e-07, "loss": 0.6545, "step": 10735 }, { "epoch": 1.519997168542507, "grad_norm": 10.173008259403824, "learning_rate": 7.179031389055796e-07, "loss": 0.7454, "step": 10736 }, { "epoch": 1.5201387414171443, "grad_norm": 9.25994297413744, "learning_rate": 7.175011848012009e-07, "loss": 0.7268, "step": 10737 }, { "epoch": 1.5202803142917816, "grad_norm": 8.447444678376904, "learning_rate": 7.170993244014548e-07, "loss": 0.7297, "step": 10738 }, { "epoch": 1.5204218871664188, "grad_norm": 8.477086317340447, "learning_rate": 7.166975577274687e-07, "loss": 0.7168, "step": 10739 }, { "epoch": 1.520563460041056, "grad_norm": 8.64259467689093, "learning_rate": 7.162958848003615e-07, "loss": 0.7186, "step": 10740 }, { "epoch": 1.5207050329156933, "grad_norm": 9.185084849439674, "learning_rate": 7.158943056412504e-07, "loss": 0.7282, "step": 10741 }, { "epoch": 1.5208466057903305, "grad_norm": 8.37541402675885, "learning_rate": 7.154928202712447e-07, "loss": 0.6671, "step": 10742 }, { "epoch": 1.5209881786649677, "grad_norm": 8.931082602360998, "learning_rate": 7.150914287114513e-07, "loss": 0.5436, "step": 10743 }, { "epoch": 1.521129751539605, "grad_norm": 8.610453404839612, "learning_rate": 7.146901309829709e-07, "loss": 0.7327, "step": 10744 }, { "epoch": 1.5212713244142422, "grad_norm": 9.435834927229587, "learning_rate": 7.142889271068984e-07, "loss": 0.8148, "step": 10745 }, { "epoch": 1.5214128972888794, "grad_norm": 8.37818552043019, "learning_rate": 7.138878171043262e-07, "loss": 0.7353, "step": 10746 }, { "epoch": 1.5215544701635166, "grad_norm": 10.034397855368514, "learning_rate": 7.134868009963389e-07, "loss": 0.7134, "step": 10747 }, { "epoch": 1.5216960430381539, "grad_norm": 9.068853426868655, "learning_rate": 7.13085878804019e-07, "loss": 0.7238, "step": 10748 }, { "epoch": 1.521837615912791, "grad_norm": 9.009075251636453, "learning_rate": 7.126850505484415e-07, "loss": 0.701, "step": 10749 }, { "epoch": 1.5219791887874283, "grad_norm": 9.218752482785682, "learning_rate": 7.122843162506781e-07, "loss": 0.6219, "step": 10750 }, { "epoch": 1.5221207616620656, "grad_norm": 8.771283232566283, "learning_rate": 7.118836759317963e-07, "loss": 0.7592, "step": 10751 }, { "epoch": 1.5222623345367028, "grad_norm": 9.86137164373196, "learning_rate": 7.114831296128552e-07, "loss": 0.7376, "step": 10752 }, { "epoch": 1.52240390741134, "grad_norm": 8.764049476835572, "learning_rate": 7.110826773149135e-07, "loss": 0.6818, "step": 10753 }, { "epoch": 1.5225454802859772, "grad_norm": 8.06430293779683, "learning_rate": 7.106823190590214e-07, "loss": 0.6191, "step": 10754 }, { "epoch": 1.5226870531606145, "grad_norm": 7.422378683566719, "learning_rate": 7.102820548662257e-07, "loss": 0.6712, "step": 10755 }, { "epoch": 1.5228286260352517, "grad_norm": 10.571624967599181, "learning_rate": 7.09881884757567e-07, "loss": 0.687, "step": 10756 }, { "epoch": 1.522970198909889, "grad_norm": 9.54672475843319, "learning_rate": 7.094818087540827e-07, "loss": 0.8271, "step": 10757 }, { "epoch": 1.5231117717845262, "grad_norm": 9.157999646185363, "learning_rate": 7.090818268768057e-07, "loss": 0.7321, "step": 10758 }, { "epoch": 1.5232533446591634, "grad_norm": 8.903574330809947, "learning_rate": 7.086819391467612e-07, "loss": 0.7802, "step": 10759 }, { "epoch": 1.5233949175338006, "grad_norm": 9.13336383601012, "learning_rate": 7.082821455849717e-07, "loss": 0.7412, "step": 10760 }, { "epoch": 1.5235364904084379, "grad_norm": 9.742974513369315, "learning_rate": 7.078824462124534e-07, "loss": 0.7135, "step": 10761 }, { "epoch": 1.523678063283075, "grad_norm": 9.780011278391695, "learning_rate": 7.074828410502193e-07, "loss": 0.7427, "step": 10762 }, { "epoch": 1.5238196361577123, "grad_norm": 9.94249172917272, "learning_rate": 7.07083330119275e-07, "loss": 0.7144, "step": 10763 }, { "epoch": 1.5239612090323496, "grad_norm": 10.007298285847863, "learning_rate": 7.066839134406239e-07, "loss": 0.6346, "step": 10764 }, { "epoch": 1.5241027819069868, "grad_norm": 9.5311571460641, "learning_rate": 7.062845910352622e-07, "loss": 0.7725, "step": 10765 }, { "epoch": 1.5242443547816238, "grad_norm": 7.190412909651246, "learning_rate": 7.058853629241816e-07, "loss": 0.7578, "step": 10766 }, { "epoch": 1.524385927656261, "grad_norm": 9.532948026895777, "learning_rate": 7.054862291283702e-07, "loss": 0.6453, "step": 10767 }, { "epoch": 1.5245275005308982, "grad_norm": 7.753445751439021, "learning_rate": 7.050871896688091e-07, "loss": 0.7934, "step": 10768 }, { "epoch": 1.5246690734055355, "grad_norm": 9.243223027496153, "learning_rate": 7.046882445664768e-07, "loss": 0.7677, "step": 10769 }, { "epoch": 1.5248106462801727, "grad_norm": 9.198157117616091, "learning_rate": 7.042893938423442e-07, "loss": 0.7701, "step": 10770 }, { "epoch": 1.52495221915481, "grad_norm": 8.57798714170873, "learning_rate": 7.038906375173799e-07, "loss": 0.7128, "step": 10771 }, { "epoch": 1.5250937920294472, "grad_norm": 9.758266368172526, "learning_rate": 7.034919756125447e-07, "loss": 0.7251, "step": 10772 }, { "epoch": 1.5252353649040844, "grad_norm": 10.074929089923769, "learning_rate": 7.030934081487969e-07, "loss": 0.7321, "step": 10773 }, { "epoch": 1.5253769377787216, "grad_norm": 10.241182209555516, "learning_rate": 7.026949351470894e-07, "loss": 0.7282, "step": 10774 }, { "epoch": 1.5255185106533589, "grad_norm": 10.616700644765604, "learning_rate": 7.022965566283693e-07, "loss": 0.7342, "step": 10775 }, { "epoch": 1.5256600835279959, "grad_norm": 8.067407814636388, "learning_rate": 7.018982726135787e-07, "loss": 0.6836, "step": 10776 }, { "epoch": 1.525801656402633, "grad_norm": 10.487188606958954, "learning_rate": 7.015000831236543e-07, "loss": 0.8104, "step": 10777 }, { "epoch": 1.5259432292772703, "grad_norm": 8.903006193815063, "learning_rate": 7.011019881795298e-07, "loss": 0.692, "step": 10778 }, { "epoch": 1.5260848021519076, "grad_norm": 8.737305942721632, "learning_rate": 7.00703987802133e-07, "loss": 0.694, "step": 10779 }, { "epoch": 1.5262263750265448, "grad_norm": 9.607158026782036, "learning_rate": 7.003060820123852e-07, "loss": 0.686, "step": 10780 }, { "epoch": 1.526367947901182, "grad_norm": 8.678674400117895, "learning_rate": 6.999082708312055e-07, "loss": 0.686, "step": 10781 }, { "epoch": 1.5265095207758193, "grad_norm": 8.158528327624795, "learning_rate": 6.99510554279505e-07, "loss": 0.704, "step": 10782 }, { "epoch": 1.5266510936504565, "grad_norm": 7.49662374160484, "learning_rate": 6.991129323781931e-07, "loss": 0.657, "step": 10783 }, { "epoch": 1.5267926665250937, "grad_norm": 8.36770813477089, "learning_rate": 6.987154051481707e-07, "loss": 0.6569, "step": 10784 }, { "epoch": 1.526934239399731, "grad_norm": 8.760163861836745, "learning_rate": 6.98317972610337e-07, "loss": 0.6586, "step": 10785 }, { "epoch": 1.5270758122743682, "grad_norm": 7.391141651636151, "learning_rate": 6.979206347855843e-07, "loss": 0.7087, "step": 10786 }, { "epoch": 1.5272173851490054, "grad_norm": 10.254081843734612, "learning_rate": 6.975233916947993e-07, "loss": 0.8164, "step": 10787 }, { "epoch": 1.5273589580236426, "grad_norm": 10.230064917215786, "learning_rate": 6.971262433588663e-07, "loss": 0.739, "step": 10788 }, { "epoch": 1.5275005308982799, "grad_norm": 9.06048477391684, "learning_rate": 6.967291897986617e-07, "loss": 0.7651, "step": 10789 }, { "epoch": 1.527642103772917, "grad_norm": 7.609073954116163, "learning_rate": 6.963322310350598e-07, "loss": 0.7317, "step": 10790 }, { "epoch": 1.5277836766475543, "grad_norm": 7.560971949026451, "learning_rate": 6.959353670889269e-07, "loss": 0.6517, "step": 10791 }, { "epoch": 1.5279252495221916, "grad_norm": 8.262221098793686, "learning_rate": 6.955385979811275e-07, "loss": 0.6982, "step": 10792 }, { "epoch": 1.5280668223968288, "grad_norm": 9.762493638587276, "learning_rate": 6.951419237325174e-07, "loss": 0.7342, "step": 10793 }, { "epoch": 1.528208395271466, "grad_norm": 8.963269197237604, "learning_rate": 6.947453443639515e-07, "loss": 0.7247, "step": 10794 }, { "epoch": 1.5283499681461032, "grad_norm": 8.85219642409508, "learning_rate": 6.943488598962761e-07, "loss": 0.8381, "step": 10795 }, { "epoch": 1.5284915410207405, "grad_norm": 7.594855836325042, "learning_rate": 6.939524703503356e-07, "loss": 0.7483, "step": 10796 }, { "epoch": 1.5286331138953777, "grad_norm": 8.009292450895076, "learning_rate": 6.93556175746967e-07, "loss": 0.7417, "step": 10797 }, { "epoch": 1.528774686770015, "grad_norm": 9.908265302941608, "learning_rate": 6.931599761070027e-07, "loss": 0.7003, "step": 10798 }, { "epoch": 1.5289162596446522, "grad_norm": 10.110651846528924, "learning_rate": 6.927638714512716e-07, "loss": 0.6864, "step": 10799 }, { "epoch": 1.5290578325192894, "grad_norm": 10.364806533531587, "learning_rate": 6.923678618005958e-07, "loss": 0.7908, "step": 10800 }, { "epoch": 1.5291994053939266, "grad_norm": 7.966023775387509, "learning_rate": 6.919719471757938e-07, "loss": 0.7528, "step": 10801 }, { "epoch": 1.5293409782685639, "grad_norm": 9.668421312008693, "learning_rate": 6.915761275976787e-07, "loss": 0.6933, "step": 10802 }, { "epoch": 1.529482551143201, "grad_norm": 8.221077825046345, "learning_rate": 6.911804030870578e-07, "loss": 0.7071, "step": 10803 }, { "epoch": 1.5296241240178383, "grad_norm": 8.15046995715345, "learning_rate": 6.90784773664735e-07, "loss": 0.7561, "step": 10804 }, { "epoch": 1.5297656968924755, "grad_norm": 9.303915073417178, "learning_rate": 6.903892393515068e-07, "loss": 0.6563, "step": 10805 }, { "epoch": 1.5299072697671128, "grad_norm": 10.23534443464414, "learning_rate": 6.89993800168168e-07, "loss": 0.827, "step": 10806 }, { "epoch": 1.53004884264175, "grad_norm": 9.02041683747902, "learning_rate": 6.895984561355054e-07, "loss": 0.7408, "step": 10807 }, { "epoch": 1.530190415516387, "grad_norm": 9.015947835288996, "learning_rate": 6.892032072743013e-07, "loss": 0.6615, "step": 10808 }, { "epoch": 1.5303319883910242, "grad_norm": 9.406616609342397, "learning_rate": 6.888080536053351e-07, "loss": 0.8207, "step": 10809 }, { "epoch": 1.5304735612656615, "grad_norm": 8.311737713961119, "learning_rate": 6.884129951493785e-07, "loss": 0.7763, "step": 10810 }, { "epoch": 1.5306151341402987, "grad_norm": 9.920182788149267, "learning_rate": 6.880180319272006e-07, "loss": 0.6957, "step": 10811 }, { "epoch": 1.530756707014936, "grad_norm": 8.110569742794546, "learning_rate": 6.876231639595629e-07, "loss": 0.7191, "step": 10812 }, { "epoch": 1.5308982798895732, "grad_norm": 8.26228666044546, "learning_rate": 6.872283912672247e-07, "loss": 0.7284, "step": 10813 }, { "epoch": 1.5310398527642104, "grad_norm": 8.936987375350151, "learning_rate": 6.868337138709377e-07, "loss": 0.7437, "step": 10814 }, { "epoch": 1.5311814256388476, "grad_norm": 7.919740046849263, "learning_rate": 6.864391317914512e-07, "loss": 0.6236, "step": 10815 }, { "epoch": 1.5313229985134849, "grad_norm": 8.367614222311968, "learning_rate": 6.860446450495068e-07, "loss": 0.7682, "step": 10816 }, { "epoch": 1.531464571388122, "grad_norm": 8.153082572385054, "learning_rate": 6.856502536658433e-07, "loss": 0.7382, "step": 10817 }, { "epoch": 1.531606144262759, "grad_norm": 9.568094848290276, "learning_rate": 6.852559576611931e-07, "loss": 0.7064, "step": 10818 }, { "epoch": 1.5317477171373963, "grad_norm": 7.885418252344278, "learning_rate": 6.848617570562832e-07, "loss": 0.701, "step": 10819 }, { "epoch": 1.5318892900120336, "grad_norm": 9.185011338510924, "learning_rate": 6.844676518718385e-07, "loss": 0.7177, "step": 10820 }, { "epoch": 1.5320308628866708, "grad_norm": 8.584342329354012, "learning_rate": 6.840736421285746e-07, "loss": 0.7565, "step": 10821 }, { "epoch": 1.532172435761308, "grad_norm": 9.332669325323803, "learning_rate": 6.83679727847206e-07, "loss": 0.6928, "step": 10822 }, { "epoch": 1.5323140086359452, "grad_norm": 9.265646408596684, "learning_rate": 6.832859090484392e-07, "loss": 0.7135, "step": 10823 }, { "epoch": 1.5324555815105825, "grad_norm": 8.584833353565816, "learning_rate": 6.828921857529774e-07, "loss": 0.7283, "step": 10824 }, { "epoch": 1.5325971543852197, "grad_norm": 7.879706535490342, "learning_rate": 6.824985579815194e-07, "loss": 0.7638, "step": 10825 }, { "epoch": 1.532738727259857, "grad_norm": 10.232737076708982, "learning_rate": 6.821050257547562e-07, "loss": 0.8547, "step": 10826 }, { "epoch": 1.5328803001344942, "grad_norm": 7.381685491856422, "learning_rate": 6.817115890933773e-07, "loss": 0.7061, "step": 10827 }, { "epoch": 1.5330218730091314, "grad_norm": 8.98727110251589, "learning_rate": 6.813182480180641e-07, "loss": 0.6865, "step": 10828 }, { "epoch": 1.5331634458837686, "grad_norm": 8.052034431364953, "learning_rate": 6.809250025494946e-07, "loss": 0.78, "step": 10829 }, { "epoch": 1.5333050187584059, "grad_norm": 9.855315084389623, "learning_rate": 6.805318527083407e-07, "loss": 0.793, "step": 10830 }, { "epoch": 1.533446591633043, "grad_norm": 10.185479273804908, "learning_rate": 6.801387985152705e-07, "loss": 0.7282, "step": 10831 }, { "epoch": 1.5335881645076803, "grad_norm": 8.94462333589815, "learning_rate": 6.797458399909476e-07, "loss": 0.6892, "step": 10832 }, { "epoch": 1.5337297373823175, "grad_norm": 8.281824401512052, "learning_rate": 6.793529771560278e-07, "loss": 0.752, "step": 10833 }, { "epoch": 1.5338713102569548, "grad_norm": 8.61902179026679, "learning_rate": 6.789602100311654e-07, "loss": 0.6422, "step": 10834 }, { "epoch": 1.534012883131592, "grad_norm": 7.951808977945114, "learning_rate": 6.785675386370061e-07, "loss": 0.6906, "step": 10835 }, { "epoch": 1.5341544560062292, "grad_norm": 9.149765478578578, "learning_rate": 6.781749629941938e-07, "loss": 0.7106, "step": 10836 }, { "epoch": 1.5342960288808665, "grad_norm": 8.146928978592275, "learning_rate": 6.777824831233645e-07, "loss": 0.7157, "step": 10837 }, { "epoch": 1.5344376017555037, "grad_norm": 8.374670221468985, "learning_rate": 6.773900990451523e-07, "loss": 0.6536, "step": 10838 }, { "epoch": 1.534579174630141, "grad_norm": 9.419368885061838, "learning_rate": 6.769978107801837e-07, "loss": 0.7323, "step": 10839 }, { "epoch": 1.5347207475047782, "grad_norm": 9.420816188044737, "learning_rate": 6.766056183490799e-07, "loss": 0.6788, "step": 10840 }, { "epoch": 1.5348623203794154, "grad_norm": 8.422412182833316, "learning_rate": 6.7621352177246e-07, "loss": 0.6611, "step": 10841 }, { "epoch": 1.5350038932540526, "grad_norm": 7.971357569693273, "learning_rate": 6.758215210709345e-07, "loss": 0.6564, "step": 10842 }, { "epoch": 1.5351454661286899, "grad_norm": 10.711404045956916, "learning_rate": 6.754296162651122e-07, "loss": 0.7312, "step": 10843 }, { "epoch": 1.535287039003327, "grad_norm": 7.780542303174897, "learning_rate": 6.750378073755939e-07, "loss": 0.7154, "step": 10844 }, { "epoch": 1.5354286118779643, "grad_norm": 8.279473571259679, "learning_rate": 6.746460944229783e-07, "loss": 0.6656, "step": 10845 }, { "epoch": 1.5355701847526015, "grad_norm": 9.706621360184009, "learning_rate": 6.742544774278553e-07, "loss": 0.6986, "step": 10846 }, { "epoch": 1.5357117576272388, "grad_norm": 9.128899146204981, "learning_rate": 6.738629564108134e-07, "loss": 0.6115, "step": 10847 }, { "epoch": 1.535853330501876, "grad_norm": 8.365022095937784, "learning_rate": 6.734715313924348e-07, "loss": 0.7273, "step": 10848 }, { "epoch": 1.535994903376513, "grad_norm": 9.016269389747407, "learning_rate": 6.730802023932962e-07, "loss": 0.7307, "step": 10849 }, { "epoch": 1.5361364762511502, "grad_norm": 8.263086286049731, "learning_rate": 6.726889694339689e-07, "loss": 0.6209, "step": 10850 }, { "epoch": 1.5362780491257875, "grad_norm": 10.604814091212523, "learning_rate": 6.72297832535019e-07, "loss": 0.7751, "step": 10851 }, { "epoch": 1.5364196220004247, "grad_norm": 8.922773700085063, "learning_rate": 6.719067917170105e-07, "loss": 0.6738, "step": 10852 }, { "epoch": 1.536561194875062, "grad_norm": 9.923927875716442, "learning_rate": 6.715158470004979e-07, "loss": 0.801, "step": 10853 }, { "epoch": 1.5367027677496992, "grad_norm": 10.349995090649312, "learning_rate": 6.711249984060337e-07, "loss": 0.75, "step": 10854 }, { "epoch": 1.5368443406243364, "grad_norm": 10.021706102290347, "learning_rate": 6.707342459541655e-07, "loss": 0.7752, "step": 10855 }, { "epoch": 1.5369859134989736, "grad_norm": 9.71981678103224, "learning_rate": 6.703435896654334e-07, "loss": 0.7544, "step": 10856 }, { "epoch": 1.5371274863736109, "grad_norm": 8.315831026078925, "learning_rate": 6.699530295603751e-07, "loss": 0.6882, "step": 10857 }, { "epoch": 1.537269059248248, "grad_norm": 8.914789542956964, "learning_rate": 6.695625656595209e-07, "loss": 0.6796, "step": 10858 }, { "epoch": 1.537410632122885, "grad_norm": 9.695959323206013, "learning_rate": 6.691721979833984e-07, "loss": 0.723, "step": 10859 }, { "epoch": 1.5375522049975223, "grad_norm": 9.334549370520442, "learning_rate": 6.687819265525286e-07, "loss": 0.775, "step": 10860 }, { "epoch": 1.5376937778721596, "grad_norm": 10.525913052132564, "learning_rate": 6.683917513874266e-07, "loss": 0.767, "step": 10861 }, { "epoch": 1.5378353507467968, "grad_norm": 8.849933309357654, "learning_rate": 6.680016725086053e-07, "loss": 0.7852, "step": 10862 }, { "epoch": 1.537976923621434, "grad_norm": 7.7712335423833245, "learning_rate": 6.676116899365692e-07, "loss": 0.6096, "step": 10863 }, { "epoch": 1.5381184964960712, "grad_norm": 9.44407455118912, "learning_rate": 6.67221803691821e-07, "loss": 0.7063, "step": 10864 }, { "epoch": 1.5382600693707085, "grad_norm": 9.191722496418823, "learning_rate": 6.668320137948556e-07, "loss": 0.7031, "step": 10865 }, { "epoch": 1.5384016422453457, "grad_norm": 8.058200369852042, "learning_rate": 6.664423202661649e-07, "loss": 0.7687, "step": 10866 }, { "epoch": 1.538543215119983, "grad_norm": 9.042534664932388, "learning_rate": 6.660527231262334e-07, "loss": 0.6885, "step": 10867 }, { "epoch": 1.5386847879946202, "grad_norm": 8.654713628605169, "learning_rate": 6.656632223955437e-07, "loss": 0.6928, "step": 10868 }, { "epoch": 1.5388263608692574, "grad_norm": 9.614931066526816, "learning_rate": 6.652738180945698e-07, "loss": 0.7514, "step": 10869 }, { "epoch": 1.5389679337438946, "grad_norm": 9.823215343887297, "learning_rate": 6.648845102437839e-07, "loss": 0.6007, "step": 10870 }, { "epoch": 1.5391095066185319, "grad_norm": 8.313382897778625, "learning_rate": 6.644952988636514e-07, "loss": 0.6674, "step": 10871 }, { "epoch": 1.539251079493169, "grad_norm": 9.747369655961599, "learning_rate": 6.641061839746313e-07, "loss": 0.7623, "step": 10872 }, { "epoch": 1.5393926523678063, "grad_norm": 9.034505184875753, "learning_rate": 6.637171655971811e-07, "loss": 0.7082, "step": 10873 }, { "epoch": 1.5395342252424435, "grad_norm": 8.895143242422948, "learning_rate": 6.633282437517496e-07, "loss": 0.7324, "step": 10874 }, { "epoch": 1.5396757981170808, "grad_norm": 9.911551516013517, "learning_rate": 6.629394184587826e-07, "loss": 0.6346, "step": 10875 }, { "epoch": 1.539817370991718, "grad_norm": 9.227339748168212, "learning_rate": 6.625506897387215e-07, "loss": 0.7519, "step": 10876 }, { "epoch": 1.5399589438663552, "grad_norm": 10.380249166272757, "learning_rate": 6.621620576119999e-07, "loss": 0.7396, "step": 10877 }, { "epoch": 1.5401005167409925, "grad_norm": 10.070385609339931, "learning_rate": 6.617735220990495e-07, "loss": 0.7566, "step": 10878 }, { "epoch": 1.5402420896156297, "grad_norm": 11.289068244483833, "learning_rate": 6.613850832202934e-07, "loss": 0.7312, "step": 10879 }, { "epoch": 1.540383662490267, "grad_norm": 8.889337697884542, "learning_rate": 6.609967409961531e-07, "loss": 0.745, "step": 10880 }, { "epoch": 1.5405252353649042, "grad_norm": 8.41020795821308, "learning_rate": 6.606084954470434e-07, "loss": 0.7856, "step": 10881 }, { "epoch": 1.5406668082395414, "grad_norm": 11.172112650444353, "learning_rate": 6.602203465933727e-07, "loss": 0.7436, "step": 10882 }, { "epoch": 1.5408083811141786, "grad_norm": 9.39140816721759, "learning_rate": 6.598322944555471e-07, "loss": 0.8074, "step": 10883 }, { "epoch": 1.5409499539888158, "grad_norm": 11.232173104367844, "learning_rate": 6.594443390539651e-07, "loss": 0.6867, "step": 10884 }, { "epoch": 1.541091526863453, "grad_norm": 10.49519129040381, "learning_rate": 6.590564804090224e-07, "loss": 0.7045, "step": 10885 }, { "epoch": 1.5412330997380903, "grad_norm": 8.977492268536945, "learning_rate": 6.586687185411073e-07, "loss": 0.8053, "step": 10886 }, { "epoch": 1.5413746726127275, "grad_norm": 10.113347636417878, "learning_rate": 6.582810534706055e-07, "loss": 0.7289, "step": 10887 }, { "epoch": 1.5415162454873648, "grad_norm": 9.564137437037946, "learning_rate": 6.578934852178945e-07, "loss": 0.7205, "step": 10888 }, { "epoch": 1.541657818362002, "grad_norm": 9.05769715471775, "learning_rate": 6.575060138033504e-07, "loss": 0.768, "step": 10889 }, { "epoch": 1.541799391236639, "grad_norm": 8.531957352486499, "learning_rate": 6.571186392473406e-07, "loss": 0.634, "step": 10890 }, { "epoch": 1.5419409641112762, "grad_norm": 8.682506855336698, "learning_rate": 6.567313615702304e-07, "loss": 0.6491, "step": 10891 }, { "epoch": 1.5420825369859135, "grad_norm": 8.177186483392664, "learning_rate": 6.563441807923782e-07, "loss": 0.6337, "step": 10892 }, { "epoch": 1.5422241098605507, "grad_norm": 8.755992036623272, "learning_rate": 6.559570969341369e-07, "loss": 0.5828, "step": 10893 }, { "epoch": 1.542365682735188, "grad_norm": 10.953555459176972, "learning_rate": 6.555701100158571e-07, "loss": 0.7217, "step": 10894 }, { "epoch": 1.5425072556098252, "grad_norm": 8.32605664761635, "learning_rate": 6.551832200578803e-07, "loss": 0.639, "step": 10895 }, { "epoch": 1.5426488284844624, "grad_norm": 7.84031839269167, "learning_rate": 6.547964270805468e-07, "loss": 0.7403, "step": 10896 }, { "epoch": 1.5427904013590996, "grad_norm": 7.995896240538213, "learning_rate": 6.544097311041888e-07, "loss": 0.5445, "step": 10897 }, { "epoch": 1.5429319742337368, "grad_norm": 9.304967509299594, "learning_rate": 6.54023132149135e-07, "loss": 0.6817, "step": 10898 }, { "epoch": 1.543073547108374, "grad_norm": 9.026037064668715, "learning_rate": 6.536366302357094e-07, "loss": 0.5817, "step": 10899 }, { "epoch": 1.5432151199830113, "grad_norm": 10.062197473373502, "learning_rate": 6.532502253842288e-07, "loss": 0.6506, "step": 10900 }, { "epoch": 1.5433566928576483, "grad_norm": 8.430217849975579, "learning_rate": 6.528639176150072e-07, "loss": 0.6344, "step": 10901 }, { "epoch": 1.5434982657322855, "grad_norm": 7.235575038832652, "learning_rate": 6.524777069483526e-07, "loss": 0.6211, "step": 10902 }, { "epoch": 1.5436398386069228, "grad_norm": 9.432391065933555, "learning_rate": 6.520915934045674e-07, "loss": 0.6314, "step": 10903 }, { "epoch": 1.54378141148156, "grad_norm": 8.359354921120746, "learning_rate": 6.517055770039482e-07, "loss": 0.6734, "step": 10904 }, { "epoch": 1.5439229843561972, "grad_norm": 8.72482514739058, "learning_rate": 6.51319657766789e-07, "loss": 0.6472, "step": 10905 }, { "epoch": 1.5440645572308345, "grad_norm": 9.592776966764214, "learning_rate": 6.509338357133776e-07, "loss": 0.6923, "step": 10906 }, { "epoch": 1.5442061301054717, "grad_norm": 10.204868979615235, "learning_rate": 6.50548110863995e-07, "loss": 0.693, "step": 10907 }, { "epoch": 1.544347702980109, "grad_norm": 8.9877613353271, "learning_rate": 6.501624832389197e-07, "loss": 0.7423, "step": 10908 }, { "epoch": 1.5444892758547462, "grad_norm": 9.74702995252953, "learning_rate": 6.497769528584227e-07, "loss": 0.7202, "step": 10909 }, { "epoch": 1.5446308487293834, "grad_norm": 10.52003964492812, "learning_rate": 6.493915197427727e-07, "loss": 0.7157, "step": 10910 }, { "epoch": 1.5447724216040206, "grad_norm": 9.395980754860194, "learning_rate": 6.490061839122297e-07, "loss": 0.6795, "step": 10911 }, { "epoch": 1.5449139944786578, "grad_norm": 10.79767734842107, "learning_rate": 6.486209453870523e-07, "loss": 0.7039, "step": 10912 }, { "epoch": 1.545055567353295, "grad_norm": 10.004698221897074, "learning_rate": 6.482358041874914e-07, "loss": 0.717, "step": 10913 }, { "epoch": 1.5451971402279323, "grad_norm": 9.154382925440986, "learning_rate": 6.478507603337928e-07, "loss": 0.7594, "step": 10914 }, { "epoch": 1.5453387131025695, "grad_norm": 9.394795182103326, "learning_rate": 6.474658138461992e-07, "loss": 0.7098, "step": 10915 }, { "epoch": 1.5454802859772068, "grad_norm": 9.123183396203418, "learning_rate": 6.470809647449458e-07, "loss": 0.6262, "step": 10916 }, { "epoch": 1.545621858851844, "grad_norm": 8.703558042974256, "learning_rate": 6.466962130502655e-07, "loss": 0.7223, "step": 10917 }, { "epoch": 1.5457634317264812, "grad_norm": 9.760303237456784, "learning_rate": 6.463115587823824e-07, "loss": 0.7524, "step": 10918 }, { "epoch": 1.5459050046011185, "grad_norm": 9.193529291570291, "learning_rate": 6.459270019615191e-07, "loss": 0.7609, "step": 10919 }, { "epoch": 1.5460465774757557, "grad_norm": 9.32808246954291, "learning_rate": 6.455425426078904e-07, "loss": 0.6896, "step": 10920 }, { "epoch": 1.546188150350393, "grad_norm": 12.453204644114829, "learning_rate": 6.451581807417074e-07, "loss": 0.7306, "step": 10921 }, { "epoch": 1.5463297232250302, "grad_norm": 8.484559789510435, "learning_rate": 6.447739163831765e-07, "loss": 0.7279, "step": 10922 }, { "epoch": 1.5464712960996674, "grad_norm": 8.955817385312326, "learning_rate": 6.443897495524976e-07, "loss": 0.7659, "step": 10923 }, { "epoch": 1.5466128689743046, "grad_norm": 8.941651560500137, "learning_rate": 6.440056802698658e-07, "loss": 0.5905, "step": 10924 }, { "epoch": 1.5467544418489418, "grad_norm": 7.6145445635252615, "learning_rate": 6.436217085554708e-07, "loss": 0.6589, "step": 10925 }, { "epoch": 1.546896014723579, "grad_norm": 7.323154349740756, "learning_rate": 6.432378344294992e-07, "loss": 0.694, "step": 10926 }, { "epoch": 1.5470375875982163, "grad_norm": 9.465989922346258, "learning_rate": 6.428540579121296e-07, "loss": 0.6527, "step": 10927 }, { "epoch": 1.5471791604728535, "grad_norm": 8.87488136749497, "learning_rate": 6.424703790235374e-07, "loss": 0.6615, "step": 10928 }, { "epoch": 1.5473207333474908, "grad_norm": 8.495350632012775, "learning_rate": 6.420867977838929e-07, "loss": 0.7695, "step": 10929 }, { "epoch": 1.547462306222128, "grad_norm": 10.146806757691293, "learning_rate": 6.417033142133594e-07, "loss": 0.848, "step": 10930 }, { "epoch": 1.5476038790967652, "grad_norm": 8.789874094472554, "learning_rate": 6.413199283320979e-07, "loss": 0.6888, "step": 10931 }, { "epoch": 1.5477454519714022, "grad_norm": 8.26747269341291, "learning_rate": 6.40936640160261e-07, "loss": 0.6922, "step": 10932 }, { "epoch": 1.5478870248460395, "grad_norm": 7.224179294528354, "learning_rate": 6.405534497179996e-07, "loss": 0.7762, "step": 10933 }, { "epoch": 1.5480285977206767, "grad_norm": 8.69433281135867, "learning_rate": 6.401703570254569e-07, "loss": 0.599, "step": 10934 }, { "epoch": 1.548170170595314, "grad_norm": 8.682907097935962, "learning_rate": 6.397873621027711e-07, "loss": 0.6132, "step": 10935 }, { "epoch": 1.5483117434699512, "grad_norm": 7.93607443676156, "learning_rate": 6.394044649700773e-07, "loss": 0.662, "step": 10936 }, { "epoch": 1.5484533163445884, "grad_norm": 9.79367689776084, "learning_rate": 6.390216656475027e-07, "loss": 0.7276, "step": 10937 }, { "epoch": 1.5485948892192256, "grad_norm": 9.668234292439136, "learning_rate": 6.386389641551721e-07, "loss": 0.7434, "step": 10938 }, { "epoch": 1.5487364620938628, "grad_norm": 8.208092339039041, "learning_rate": 6.382563605132027e-07, "loss": 0.6746, "step": 10939 }, { "epoch": 1.5488780349685, "grad_norm": 9.41195353711256, "learning_rate": 6.37873854741709e-07, "loss": 0.888, "step": 10940 }, { "epoch": 1.5490196078431373, "grad_norm": 8.172631660864214, "learning_rate": 6.374914468607976e-07, "loss": 0.6389, "step": 10941 }, { "epoch": 1.5491611807177743, "grad_norm": 9.207464623073838, "learning_rate": 6.37109136890573e-07, "loss": 0.703, "step": 10942 }, { "epoch": 1.5493027535924115, "grad_norm": 9.74644718484899, "learning_rate": 6.367269248511309e-07, "loss": 0.6917, "step": 10943 }, { "epoch": 1.5494443264670488, "grad_norm": 8.89905908304224, "learning_rate": 6.363448107625653e-07, "loss": 0.6849, "step": 10944 }, { "epoch": 1.549585899341686, "grad_norm": 10.316106362192773, "learning_rate": 6.359627946449648e-07, "loss": 0.707, "step": 10945 }, { "epoch": 1.5497274722163232, "grad_norm": 9.412569172922906, "learning_rate": 6.355808765184088e-07, "loss": 0.7174, "step": 10946 }, { "epoch": 1.5498690450909605, "grad_norm": 9.003738050896425, "learning_rate": 6.351990564029767e-07, "loss": 0.6888, "step": 10947 }, { "epoch": 1.5500106179655977, "grad_norm": 8.522052655887933, "learning_rate": 6.348173343187392e-07, "loss": 0.701, "step": 10948 }, { "epoch": 1.550152190840235, "grad_norm": 11.000677954849355, "learning_rate": 6.344357102857643e-07, "loss": 0.7268, "step": 10949 }, { "epoch": 1.5502937637148722, "grad_norm": 8.58847361102572, "learning_rate": 6.340541843241124e-07, "loss": 0.6953, "step": 10950 }, { "epoch": 1.5504353365895094, "grad_norm": 7.919395404819793, "learning_rate": 6.336727564538406e-07, "loss": 0.7235, "step": 10951 }, { "epoch": 1.5505769094641466, "grad_norm": 7.510971436841498, "learning_rate": 6.332914266950011e-07, "loss": 0.6513, "step": 10952 }, { "epoch": 1.5507184823387838, "grad_norm": 8.360374775932176, "learning_rate": 6.329101950676389e-07, "loss": 0.7555, "step": 10953 }, { "epoch": 1.550860055213421, "grad_norm": 8.986047738303983, "learning_rate": 6.325290615917961e-07, "loss": 0.775, "step": 10954 }, { "epoch": 1.5510016280880583, "grad_norm": 9.475257777429091, "learning_rate": 6.321480262875082e-07, "loss": 0.7027, "step": 10955 }, { "epoch": 1.5511432009626955, "grad_norm": 8.441216900496661, "learning_rate": 6.317670891748051e-07, "loss": 0.659, "step": 10956 }, { "epoch": 1.5512847738373328, "grad_norm": 7.943865528962354, "learning_rate": 6.313862502737139e-07, "loss": 0.707, "step": 10957 }, { "epoch": 1.55142634671197, "grad_norm": 7.582059927978995, "learning_rate": 6.310055096042533e-07, "loss": 0.6781, "step": 10958 }, { "epoch": 1.5515679195866072, "grad_norm": 7.976799463531504, "learning_rate": 6.306248671864404e-07, "loss": 0.7261, "step": 10959 }, { "epoch": 1.5517094924612445, "grad_norm": 9.36245620778414, "learning_rate": 6.302443230402836e-07, "loss": 0.8197, "step": 10960 }, { "epoch": 1.5518510653358817, "grad_norm": 9.658873167796648, "learning_rate": 6.298638771857893e-07, "loss": 0.8025, "step": 10961 }, { "epoch": 1.551992638210519, "grad_norm": 8.23430041400637, "learning_rate": 6.294835296429558e-07, "loss": 0.7191, "step": 10962 }, { "epoch": 1.5521342110851561, "grad_norm": 8.951971542326802, "learning_rate": 6.291032804317789e-07, "loss": 0.6905, "step": 10963 }, { "epoch": 1.5522757839597934, "grad_norm": 8.211857331327556, "learning_rate": 6.28723129572247e-07, "loss": 0.6707, "step": 10964 }, { "epoch": 1.5524173568344306, "grad_norm": 9.70513374311141, "learning_rate": 6.28343077084346e-07, "loss": 0.7439, "step": 10965 }, { "epoch": 1.5525589297090678, "grad_norm": 7.775459800935685, "learning_rate": 6.279631229880534e-07, "loss": 0.7157, "step": 10966 }, { "epoch": 1.552700502583705, "grad_norm": 8.714302268448055, "learning_rate": 6.27583267303343e-07, "loss": 0.6182, "step": 10967 }, { "epoch": 1.5528420754583423, "grad_norm": 9.56041101314344, "learning_rate": 6.272035100501849e-07, "loss": 0.6956, "step": 10968 }, { "epoch": 1.5529836483329795, "grad_norm": 8.32953521640834, "learning_rate": 6.268238512485412e-07, "loss": 0.7244, "step": 10969 }, { "epoch": 1.5531252212076168, "grad_norm": 10.470165555532247, "learning_rate": 6.264442909183715e-07, "loss": 0.71, "step": 10970 }, { "epoch": 1.553266794082254, "grad_norm": 9.6022835717594, "learning_rate": 6.260648290796278e-07, "loss": 0.7326, "step": 10971 }, { "epoch": 1.5534083669568912, "grad_norm": 8.557706730335054, "learning_rate": 6.256854657522587e-07, "loss": 0.6673, "step": 10972 }, { "epoch": 1.5535499398315282, "grad_norm": 8.88264200790099, "learning_rate": 6.253062009562078e-07, "loss": 0.6626, "step": 10973 }, { "epoch": 1.5536915127061655, "grad_norm": 9.34985437942773, "learning_rate": 6.249270347114114e-07, "loss": 0.688, "step": 10974 }, { "epoch": 1.5538330855808027, "grad_norm": 9.86867890673609, "learning_rate": 6.245479670378036e-07, "loss": 0.7137, "step": 10975 }, { "epoch": 1.55397465845544, "grad_norm": 8.242579504155493, "learning_rate": 6.241689979553106e-07, "loss": 0.668, "step": 10976 }, { "epoch": 1.5541162313300771, "grad_norm": 8.992493784151748, "learning_rate": 6.237901274838546e-07, "loss": 0.6513, "step": 10977 }, { "epoch": 1.5542578042047144, "grad_norm": 9.525064431120253, "learning_rate": 6.234113556433522e-07, "loss": 0.8314, "step": 10978 }, { "epoch": 1.5543993770793516, "grad_norm": 9.94933952554786, "learning_rate": 6.230326824537153e-07, "loss": 0.81, "step": 10979 }, { "epoch": 1.5545409499539888, "grad_norm": 7.1296829093201035, "learning_rate": 6.226541079348517e-07, "loss": 0.6552, "step": 10980 }, { "epoch": 1.554682522828626, "grad_norm": 9.337155467753758, "learning_rate": 6.222756321066609e-07, "loss": 0.6674, "step": 10981 }, { "epoch": 1.5548240957032633, "grad_norm": 9.943986419226533, "learning_rate": 6.218972549890409e-07, "loss": 0.6688, "step": 10982 }, { "epoch": 1.5549656685779005, "grad_norm": 7.832652718485509, "learning_rate": 6.215189766018812e-07, "loss": 0.6265, "step": 10983 }, { "epoch": 1.5551072414525375, "grad_norm": 8.357054657523076, "learning_rate": 6.211407969650687e-07, "loss": 0.7514, "step": 10984 }, { "epoch": 1.5552488143271748, "grad_norm": 9.862351828115997, "learning_rate": 6.20762716098483e-07, "loss": 0.7024, "step": 10985 }, { "epoch": 1.555390387201812, "grad_norm": 7.991008473945204, "learning_rate": 6.203847340220006e-07, "loss": 0.5906, "step": 10986 }, { "epoch": 1.5555319600764492, "grad_norm": 8.688594172889644, "learning_rate": 6.200068507554915e-07, "loss": 0.6781, "step": 10987 }, { "epoch": 1.5556735329510865, "grad_norm": 10.042012273346757, "learning_rate": 6.196290663188198e-07, "loss": 0.7169, "step": 10988 }, { "epoch": 1.5558151058257237, "grad_norm": 10.087705520748504, "learning_rate": 6.192513807318468e-07, "loss": 0.712, "step": 10989 }, { "epoch": 1.555956678700361, "grad_norm": 8.942736818730719, "learning_rate": 6.188737940144254e-07, "loss": 0.7138, "step": 10990 }, { "epoch": 1.5560982515749981, "grad_norm": 10.72029480770749, "learning_rate": 6.184963061864069e-07, "loss": 0.6848, "step": 10991 }, { "epoch": 1.5562398244496354, "grad_norm": 11.142117479249029, "learning_rate": 6.181189172676338e-07, "loss": 0.6774, "step": 10992 }, { "epoch": 1.5563813973242726, "grad_norm": 9.110126654103587, "learning_rate": 6.177416272779468e-07, "loss": 0.6749, "step": 10993 }, { "epoch": 1.5565229701989098, "grad_norm": 10.985448749528425, "learning_rate": 6.173644362371783e-07, "loss": 0.7481, "step": 10994 }, { "epoch": 1.556664543073547, "grad_norm": 9.063983664246237, "learning_rate": 6.169873441651575e-07, "loss": 0.8008, "step": 10995 }, { "epoch": 1.5568061159481843, "grad_norm": 8.371506147243746, "learning_rate": 6.166103510817089e-07, "loss": 0.6375, "step": 10996 }, { "epoch": 1.5569476888228215, "grad_norm": 9.258156144729647, "learning_rate": 6.162334570066497e-07, "loss": 0.8275, "step": 10997 }, { "epoch": 1.5570892616974588, "grad_norm": 10.109297267137393, "learning_rate": 6.158566619597933e-07, "loss": 0.7258, "step": 10998 }, { "epoch": 1.557230834572096, "grad_norm": 8.637521693510203, "learning_rate": 6.154799659609464e-07, "loss": 0.6417, "step": 10999 }, { "epoch": 1.5573724074467332, "grad_norm": 9.110516066103097, "learning_rate": 6.151033690299133e-07, "loss": 0.6835, "step": 11000 }, { "epoch": 1.5575139803213705, "grad_norm": 8.80587714484134, "learning_rate": 6.147268711864898e-07, "loss": 0.691, "step": 11001 }, { "epoch": 1.5576555531960077, "grad_norm": 9.665062694553432, "learning_rate": 6.14350472450469e-07, "loss": 0.6944, "step": 11002 }, { "epoch": 1.557797126070645, "grad_norm": 8.241741786674869, "learning_rate": 6.139741728416387e-07, "loss": 0.6954, "step": 11003 }, { "epoch": 1.5579386989452821, "grad_norm": 9.08301699959553, "learning_rate": 6.135979723797792e-07, "loss": 0.737, "step": 11004 }, { "epoch": 1.5580802718199194, "grad_norm": 8.78863540628962, "learning_rate": 6.132218710846683e-07, "loss": 0.6696, "step": 11005 }, { "epoch": 1.5582218446945566, "grad_norm": 9.696403890777546, "learning_rate": 6.12845868976076e-07, "loss": 0.7916, "step": 11006 }, { "epoch": 1.5583634175691938, "grad_norm": 8.68171246630849, "learning_rate": 6.124699660737702e-07, "loss": 0.6176, "step": 11007 }, { "epoch": 1.558504990443831, "grad_norm": 8.566277109434704, "learning_rate": 6.120941623975107e-07, "loss": 0.7774, "step": 11008 }, { "epoch": 1.5586465633184683, "grad_norm": 11.112816128777247, "learning_rate": 6.117184579670527e-07, "loss": 0.7729, "step": 11009 }, { "epoch": 1.5587881361931055, "grad_norm": 8.548828794336849, "learning_rate": 6.113428528021481e-07, "loss": 0.6967, "step": 11010 }, { "epoch": 1.5589297090677428, "grad_norm": 9.63775146789611, "learning_rate": 6.109673469225408e-07, "loss": 0.5926, "step": 11011 }, { "epoch": 1.55907128194238, "grad_norm": 8.360999404087782, "learning_rate": 6.105919403479724e-07, "loss": 0.7869, "step": 11012 }, { "epoch": 1.5592128548170172, "grad_norm": 9.109880017894291, "learning_rate": 6.10216633098176e-07, "loss": 0.697, "step": 11013 }, { "epoch": 1.5593544276916544, "grad_norm": 9.894392841634591, "learning_rate": 6.098414251928831e-07, "loss": 0.7054, "step": 11014 }, { "epoch": 1.5594960005662915, "grad_norm": 7.5422571558765545, "learning_rate": 6.094663166518161e-07, "loss": 0.6533, "step": 11015 }, { "epoch": 1.5596375734409287, "grad_norm": 10.232136861665767, "learning_rate": 6.090913074946958e-07, "loss": 0.7041, "step": 11016 }, { "epoch": 1.559779146315566, "grad_norm": 10.15478730452762, "learning_rate": 6.087163977412352e-07, "loss": 0.8062, "step": 11017 }, { "epoch": 1.5599207191902031, "grad_norm": 10.296518600385566, "learning_rate": 6.083415874111432e-07, "loss": 0.7274, "step": 11018 }, { "epoch": 1.5600622920648404, "grad_norm": 8.752230114800563, "learning_rate": 6.079668765241248e-07, "loss": 0.6322, "step": 11019 }, { "epoch": 1.5602038649394776, "grad_norm": 8.032480583464269, "learning_rate": 6.075922650998756e-07, "loss": 0.6857, "step": 11020 }, { "epoch": 1.5603454378141148, "grad_norm": 9.298216240567202, "learning_rate": 6.072177531580909e-07, "loss": 0.6693, "step": 11021 }, { "epoch": 1.560487010688752, "grad_norm": 9.1553321352285, "learning_rate": 6.068433407184566e-07, "loss": 0.6346, "step": 11022 }, { "epoch": 1.5606285835633893, "grad_norm": 8.874761014057823, "learning_rate": 6.064690278006572e-07, "loss": 0.7756, "step": 11023 }, { "epoch": 1.5607701564380265, "grad_norm": 8.155428673325709, "learning_rate": 6.060948144243683e-07, "loss": 0.6922, "step": 11024 }, { "epoch": 1.5609117293126635, "grad_norm": 9.23699655774077, "learning_rate": 6.057207006092628e-07, "loss": 0.6388, "step": 11025 }, { "epoch": 1.5610533021873008, "grad_norm": 8.560116178490675, "learning_rate": 6.053466863750085e-07, "loss": 0.6768, "step": 11026 }, { "epoch": 1.561194875061938, "grad_norm": 7.585138657974115, "learning_rate": 6.049727717412654e-07, "loss": 0.8687, "step": 11027 }, { "epoch": 1.5613364479365752, "grad_norm": 9.74564714344968, "learning_rate": 6.045989567276913e-07, "loss": 0.6677, "step": 11028 }, { "epoch": 1.5614780208112125, "grad_norm": 7.855483803907753, "learning_rate": 6.042252413539368e-07, "loss": 0.6819, "step": 11029 }, { "epoch": 1.5616195936858497, "grad_norm": 9.811890407020199, "learning_rate": 6.038516256396473e-07, "loss": 0.6759, "step": 11030 }, { "epoch": 1.561761166560487, "grad_norm": 9.366916273961161, "learning_rate": 6.034781096044645e-07, "loss": 0.7171, "step": 11031 }, { "epoch": 1.5619027394351241, "grad_norm": 9.204169481264659, "learning_rate": 6.031046932680229e-07, "loss": 0.7083, "step": 11032 }, { "epoch": 1.5620443123097614, "grad_norm": 10.175277398754373, "learning_rate": 6.027313766499538e-07, "loss": 0.7548, "step": 11033 }, { "epoch": 1.5621858851843986, "grad_norm": 8.535833783176862, "learning_rate": 6.023581597698807e-07, "loss": 0.6813, "step": 11034 }, { "epoch": 1.5623274580590358, "grad_norm": 8.291067386101677, "learning_rate": 6.019850426474249e-07, "loss": 0.6568, "step": 11035 }, { "epoch": 1.562469030933673, "grad_norm": 7.2895525964825, "learning_rate": 6.016120253021998e-07, "loss": 0.673, "step": 11036 }, { "epoch": 1.5626106038083103, "grad_norm": 8.917476391495024, "learning_rate": 6.012391077538154e-07, "loss": 0.6791, "step": 11037 }, { "epoch": 1.5627521766829475, "grad_norm": 9.16757262405828, "learning_rate": 6.008662900218748e-07, "loss": 0.6323, "step": 11038 }, { "epoch": 1.5628937495575848, "grad_norm": 8.904830444709402, "learning_rate": 6.00493572125978e-07, "loss": 0.5909, "step": 11039 }, { "epoch": 1.563035322432222, "grad_norm": 9.667237977432059, "learning_rate": 6.001209540857178e-07, "loss": 0.694, "step": 11040 }, { "epoch": 1.5631768953068592, "grad_norm": 7.702375209135471, "learning_rate": 5.997484359206815e-07, "loss": 0.6958, "step": 11041 }, { "epoch": 1.5633184681814964, "grad_norm": 9.578170004609808, "learning_rate": 5.99376017650454e-07, "loss": 0.7547, "step": 11042 }, { "epoch": 1.5634600410561337, "grad_norm": 10.497805048086802, "learning_rate": 5.990036992946114e-07, "loss": 0.7235, "step": 11043 }, { "epoch": 1.563601613930771, "grad_norm": 9.66671590682895, "learning_rate": 5.986314808727273e-07, "loss": 0.8614, "step": 11044 }, { "epoch": 1.5637431868054081, "grad_norm": 8.243445827376238, "learning_rate": 5.982593624043682e-07, "loss": 0.7087, "step": 11045 }, { "epoch": 1.5638847596800454, "grad_norm": 10.035780029391828, "learning_rate": 5.978873439090968e-07, "loss": 0.7295, "step": 11046 }, { "epoch": 1.5640263325546826, "grad_norm": 9.944108409164828, "learning_rate": 5.975154254064688e-07, "loss": 0.7884, "step": 11047 }, { "epoch": 1.5641679054293198, "grad_norm": 9.579157663158307, "learning_rate": 5.971436069160363e-07, "loss": 0.6057, "step": 11048 }, { "epoch": 1.564309478303957, "grad_norm": 9.028594474694085, "learning_rate": 5.967718884573465e-07, "loss": 0.7527, "step": 11049 }, { "epoch": 1.5644510511785943, "grad_norm": 8.135503933707554, "learning_rate": 5.964002700499394e-07, "loss": 0.6913, "step": 11050 }, { "epoch": 1.5645926240532315, "grad_norm": 9.43137185919866, "learning_rate": 5.960287517133506e-07, "loss": 0.6989, "step": 11051 }, { "epoch": 1.5647341969278687, "grad_norm": 9.97732261444594, "learning_rate": 5.956573334671098e-07, "loss": 0.6904, "step": 11052 }, { "epoch": 1.564875769802506, "grad_norm": 11.512545169903243, "learning_rate": 5.952860153307433e-07, "loss": 0.8445, "step": 11053 }, { "epoch": 1.5650173426771432, "grad_norm": 9.508189235423384, "learning_rate": 5.949147973237713e-07, "loss": 0.6404, "step": 11054 }, { "epoch": 1.5651589155517804, "grad_norm": 8.395537744308776, "learning_rate": 5.945436794657072e-07, "loss": 0.7249, "step": 11055 }, { "epoch": 1.5653004884264174, "grad_norm": 9.374296034767674, "learning_rate": 5.941726617760621e-07, "loss": 0.6668, "step": 11056 }, { "epoch": 1.5654420613010547, "grad_norm": 7.115968032361982, "learning_rate": 5.938017442743382e-07, "loss": 0.7224, "step": 11057 }, { "epoch": 1.565583634175692, "grad_norm": 8.461585360843891, "learning_rate": 5.934309269800359e-07, "loss": 0.7366, "step": 11058 }, { "epoch": 1.5657252070503291, "grad_norm": 10.245445914517566, "learning_rate": 5.930602099126476e-07, "loss": 0.7031, "step": 11059 }, { "epoch": 1.5658667799249664, "grad_norm": 8.970430754968655, "learning_rate": 5.926895930916629e-07, "loss": 0.7441, "step": 11060 }, { "epoch": 1.5660083527996036, "grad_norm": 8.410437466269126, "learning_rate": 5.923190765365641e-07, "loss": 0.6514, "step": 11061 }, { "epoch": 1.5661499256742408, "grad_norm": 10.083440942951965, "learning_rate": 5.919486602668281e-07, "loss": 0.6571, "step": 11062 }, { "epoch": 1.566291498548878, "grad_norm": 9.747602118998934, "learning_rate": 5.915783443019293e-07, "loss": 0.6931, "step": 11063 }, { "epoch": 1.5664330714235153, "grad_norm": 11.247211704158683, "learning_rate": 5.912081286613334e-07, "loss": 0.7116, "step": 11064 }, { "epoch": 1.5665746442981525, "grad_norm": 9.961121704791875, "learning_rate": 5.908380133645033e-07, "loss": 0.733, "step": 11065 }, { "epoch": 1.5667162171727895, "grad_norm": 10.267218293402308, "learning_rate": 5.904679984308947e-07, "loss": 0.7335, "step": 11066 }, { "epoch": 1.5668577900474268, "grad_norm": 10.192324005246777, "learning_rate": 5.900980838799603e-07, "loss": 0.746, "step": 11067 }, { "epoch": 1.566999362922064, "grad_norm": 8.91417290822291, "learning_rate": 5.897282697311449e-07, "loss": 0.7128, "step": 11068 }, { "epoch": 1.5671409357967012, "grad_norm": 9.463845773841346, "learning_rate": 5.8935855600389e-07, "loss": 0.6552, "step": 11069 }, { "epoch": 1.5672825086713384, "grad_norm": 10.085658370217692, "learning_rate": 5.889889427176318e-07, "loss": 0.7214, "step": 11070 }, { "epoch": 1.5674240815459757, "grad_norm": 10.603674816623679, "learning_rate": 5.886194298917994e-07, "loss": 0.7843, "step": 11071 }, { "epoch": 1.567565654420613, "grad_norm": 10.437273765442065, "learning_rate": 5.882500175458198e-07, "loss": 0.6486, "step": 11072 }, { "epoch": 1.5677072272952501, "grad_norm": 8.589320147326081, "learning_rate": 5.878807056991098e-07, "loss": 0.6984, "step": 11073 }, { "epoch": 1.5678488001698874, "grad_norm": 8.754967287744293, "learning_rate": 5.87511494371086e-07, "loss": 0.6216, "step": 11074 }, { "epoch": 1.5679903730445246, "grad_norm": 9.413938098892787, "learning_rate": 5.871423835811566e-07, "loss": 0.8196, "step": 11075 }, { "epoch": 1.5681319459191618, "grad_norm": 8.482479663558044, "learning_rate": 5.867733733487255e-07, "loss": 0.7186, "step": 11076 }, { "epoch": 1.568273518793799, "grad_norm": 11.51334701222143, "learning_rate": 5.864044636931923e-07, "loss": 0.8443, "step": 11077 }, { "epoch": 1.5684150916684363, "grad_norm": 7.420632348684693, "learning_rate": 5.86035654633949e-07, "loss": 0.704, "step": 11078 }, { "epoch": 1.5685566645430735, "grad_norm": 10.55797123176426, "learning_rate": 5.85666946190385e-07, "loss": 0.6368, "step": 11079 }, { "epoch": 1.5686982374177108, "grad_norm": 8.558525555268021, "learning_rate": 5.852983383818813e-07, "loss": 0.629, "step": 11080 }, { "epoch": 1.568839810292348, "grad_norm": 8.919940905867042, "learning_rate": 5.84929831227817e-07, "loss": 0.6536, "step": 11081 }, { "epoch": 1.5689813831669852, "grad_norm": 9.331924559221635, "learning_rate": 5.845614247475637e-07, "loss": 0.7541, "step": 11082 }, { "epoch": 1.5691229560416224, "grad_norm": 9.69999784489244, "learning_rate": 5.841931189604874e-07, "loss": 0.722, "step": 11083 }, { "epoch": 1.5692645289162597, "grad_norm": 9.48129519085308, "learning_rate": 5.838249138859509e-07, "loss": 0.8046, "step": 11084 }, { "epoch": 1.569406101790897, "grad_norm": 8.877818774830361, "learning_rate": 5.834568095433093e-07, "loss": 0.6809, "step": 11085 }, { "epoch": 1.5695476746655341, "grad_norm": 9.056564597279008, "learning_rate": 5.830888059519149e-07, "loss": 0.7088, "step": 11086 }, { "epoch": 1.5696892475401714, "grad_norm": 8.376692871263256, "learning_rate": 5.827209031311121e-07, "loss": 0.7157, "step": 11087 }, { "epoch": 1.5698308204148086, "grad_norm": 9.505928598923518, "learning_rate": 5.823531011002423e-07, "loss": 0.6833, "step": 11088 }, { "epoch": 1.5699723932894458, "grad_norm": 9.520040555595116, "learning_rate": 5.819853998786395e-07, "loss": 0.7957, "step": 11089 }, { "epoch": 1.570113966164083, "grad_norm": 8.428543875882463, "learning_rate": 5.816177994856347e-07, "loss": 0.7402, "step": 11090 }, { "epoch": 1.5702555390387203, "grad_norm": 9.603010150732848, "learning_rate": 5.812502999405514e-07, "loss": 0.7255, "step": 11091 }, { "epoch": 1.5703971119133575, "grad_norm": 9.547354878104041, "learning_rate": 5.80882901262709e-07, "loss": 0.7788, "step": 11092 }, { "epoch": 1.5705386847879947, "grad_norm": 7.527931590305554, "learning_rate": 5.805156034714227e-07, "loss": 0.6612, "step": 11093 }, { "epoch": 1.570680257662632, "grad_norm": 9.174816277350981, "learning_rate": 5.801484065859989e-07, "loss": 0.6656, "step": 11094 }, { "epoch": 1.5708218305372692, "grad_norm": 10.057699444467127, "learning_rate": 5.797813106257422e-07, "loss": 0.8073, "step": 11095 }, { "epoch": 1.5709634034119064, "grad_norm": 9.347815404255224, "learning_rate": 5.794143156099497e-07, "loss": 0.7287, "step": 11096 }, { "epoch": 1.5711049762865437, "grad_norm": 9.012476432441446, "learning_rate": 5.79047421557915e-07, "loss": 0.5682, "step": 11097 }, { "epoch": 1.5712465491611807, "grad_norm": 9.914324914031038, "learning_rate": 5.786806284889246e-07, "loss": 0.7309, "step": 11098 }, { "epoch": 1.571388122035818, "grad_norm": 11.33195315166125, "learning_rate": 5.783139364222609e-07, "loss": 0.7921, "step": 11099 }, { "epoch": 1.5715296949104551, "grad_norm": 7.621275195254836, "learning_rate": 5.779473453772017e-07, "loss": 0.669, "step": 11100 }, { "epoch": 1.5716712677850924, "grad_norm": 9.530434295269304, "learning_rate": 5.775808553730164e-07, "loss": 0.6772, "step": 11101 }, { "epoch": 1.5718128406597296, "grad_norm": 8.976044881453479, "learning_rate": 5.772144664289728e-07, "loss": 0.7165, "step": 11102 }, { "epoch": 1.5719544135343668, "grad_norm": 9.168985616806852, "learning_rate": 5.768481785643309e-07, "loss": 0.5944, "step": 11103 }, { "epoch": 1.572095986409004, "grad_norm": 8.752510800478913, "learning_rate": 5.764819917983458e-07, "loss": 0.6748, "step": 11104 }, { "epoch": 1.5722375592836413, "grad_norm": 9.8584293450017, "learning_rate": 5.761159061502688e-07, "loss": 0.7154, "step": 11105 }, { "epoch": 1.5723791321582785, "grad_norm": 9.30695930888002, "learning_rate": 5.757499216393433e-07, "loss": 0.6573, "step": 11106 }, { "epoch": 1.5725207050329157, "grad_norm": 7.00336021426654, "learning_rate": 5.753840382848105e-07, "loss": 0.7076, "step": 11107 }, { "epoch": 1.5726622779075528, "grad_norm": 9.87601784397891, "learning_rate": 5.750182561059031e-07, "loss": 0.6828, "step": 11108 }, { "epoch": 1.57280385078219, "grad_norm": 8.345820366526711, "learning_rate": 5.746525751218512e-07, "loss": 0.7031, "step": 11109 }, { "epoch": 1.5729454236568272, "grad_norm": 8.771135362754231, "learning_rate": 5.742869953518773e-07, "loss": 0.7353, "step": 11110 }, { "epoch": 1.5730869965314644, "grad_norm": 10.149237362628226, "learning_rate": 5.739215168152007e-07, "loss": 0.7, "step": 11111 }, { "epoch": 1.5732285694061017, "grad_norm": 8.847485511597613, "learning_rate": 5.735561395310333e-07, "loss": 0.6977, "step": 11112 }, { "epoch": 1.573370142280739, "grad_norm": 11.220944670757415, "learning_rate": 5.731908635185837e-07, "loss": 0.6201, "step": 11113 }, { "epoch": 1.5735117151553761, "grad_norm": 9.913035478595965, "learning_rate": 5.728256887970537e-07, "loss": 0.683, "step": 11114 }, { "epoch": 1.5736532880300134, "grad_norm": 8.70899247834059, "learning_rate": 5.724606153856396e-07, "loss": 0.6592, "step": 11115 }, { "epoch": 1.5737948609046506, "grad_norm": 8.539961730013646, "learning_rate": 5.720956433035346e-07, "loss": 0.6018, "step": 11116 }, { "epoch": 1.5739364337792878, "grad_norm": 9.094537340405786, "learning_rate": 5.717307725699234e-07, "loss": 0.7414, "step": 11117 }, { "epoch": 1.574078006653925, "grad_norm": 8.376540768222837, "learning_rate": 5.713660032039884e-07, "loss": 0.7129, "step": 11118 }, { "epoch": 1.5742195795285623, "grad_norm": 10.743086965183885, "learning_rate": 5.710013352249039e-07, "loss": 0.7026, "step": 11119 }, { "epoch": 1.5743611524031995, "grad_norm": 9.14479363744129, "learning_rate": 5.706367686518414e-07, "loss": 0.7097, "step": 11120 }, { "epoch": 1.5745027252778367, "grad_norm": 8.164328830883104, "learning_rate": 5.702723035039648e-07, "loss": 0.571, "step": 11121 }, { "epoch": 1.574644298152474, "grad_norm": 9.57929943159194, "learning_rate": 5.699079398004342e-07, "loss": 0.7444, "step": 11122 }, { "epoch": 1.5747858710271112, "grad_norm": 9.921412852179795, "learning_rate": 5.695436775604049e-07, "loss": 0.7421, "step": 11123 }, { "epoch": 1.5749274439017484, "grad_norm": 9.048921080288254, "learning_rate": 5.691795168030242e-07, "loss": 0.6277, "step": 11124 }, { "epoch": 1.5750690167763857, "grad_norm": 8.324577105437548, "learning_rate": 5.688154575474384e-07, "loss": 0.6672, "step": 11125 }, { "epoch": 1.575210589651023, "grad_norm": 11.03459242198975, "learning_rate": 5.684514998127822e-07, "loss": 0.7533, "step": 11126 }, { "epoch": 1.5753521625256601, "grad_norm": 8.44382719940653, "learning_rate": 5.680876436181907e-07, "loss": 0.792, "step": 11127 }, { "epoch": 1.5754937354002974, "grad_norm": 8.63986473118892, "learning_rate": 5.677238889827918e-07, "loss": 0.6414, "step": 11128 }, { "epoch": 1.5756353082749346, "grad_norm": 7.633614848358291, "learning_rate": 5.673602359257069e-07, "loss": 0.6962, "step": 11129 }, { "epoch": 1.5757768811495718, "grad_norm": 9.791268285127707, "learning_rate": 5.669966844660538e-07, "loss": 0.7733, "step": 11130 }, { "epoch": 1.575918454024209, "grad_norm": 8.66084778897702, "learning_rate": 5.66633234622943e-07, "loss": 0.6941, "step": 11131 }, { "epoch": 1.5760600268988463, "grad_norm": 9.56034876741887, "learning_rate": 5.662698864154823e-07, "loss": 0.7405, "step": 11132 }, { "epoch": 1.5762015997734835, "grad_norm": 7.937455545135724, "learning_rate": 5.65906639862771e-07, "loss": 0.6939, "step": 11133 }, { "epoch": 1.5763431726481207, "grad_norm": 12.255613947990835, "learning_rate": 5.655434949839061e-07, "loss": 0.754, "step": 11134 }, { "epoch": 1.576484745522758, "grad_norm": 8.876020346000633, "learning_rate": 5.651804517979775e-07, "loss": 0.7091, "step": 11135 }, { "epoch": 1.5766263183973952, "grad_norm": 8.223239847163914, "learning_rate": 5.648175103240694e-07, "loss": 0.6328, "step": 11136 }, { "epoch": 1.5767678912720324, "grad_norm": 10.248772152186886, "learning_rate": 5.64454670581262e-07, "loss": 0.7757, "step": 11137 }, { "epoch": 1.5769094641466697, "grad_norm": 9.523484765828155, "learning_rate": 5.64091932588629e-07, "loss": 0.6847, "step": 11138 }, { "epoch": 1.5770510370213067, "grad_norm": 8.19640210002254, "learning_rate": 5.637292963652405e-07, "loss": 0.7128, "step": 11139 }, { "epoch": 1.577192609895944, "grad_norm": 10.456975071057535, "learning_rate": 5.63366761930158e-07, "loss": 0.7664, "step": 11140 }, { "epoch": 1.5773341827705811, "grad_norm": 9.066093626799931, "learning_rate": 5.630043293024418e-07, "loss": 0.6801, "step": 11141 }, { "epoch": 1.5774757556452184, "grad_norm": 8.125944112064907, "learning_rate": 5.62641998501143e-07, "loss": 0.7156, "step": 11142 }, { "epoch": 1.5776173285198556, "grad_norm": 8.754575132717438, "learning_rate": 5.622797695453106e-07, "loss": 0.7292, "step": 11143 }, { "epoch": 1.5777589013944928, "grad_norm": 7.9169470754119, "learning_rate": 5.619176424539849e-07, "loss": 0.7, "step": 11144 }, { "epoch": 1.57790047426913, "grad_norm": 8.20446324649492, "learning_rate": 5.615556172462039e-07, "loss": 0.7173, "step": 11145 }, { "epoch": 1.5780420471437673, "grad_norm": 9.721722016560912, "learning_rate": 5.611936939409998e-07, "loss": 0.7242, "step": 11146 }, { "epoch": 1.5781836200184045, "grad_norm": 9.766185140185641, "learning_rate": 5.608318725573964e-07, "loss": 0.7146, "step": 11147 }, { "epoch": 1.5783251928930417, "grad_norm": 8.883430882169414, "learning_rate": 5.604701531144164e-07, "loss": 0.807, "step": 11148 }, { "epoch": 1.5784667657676787, "grad_norm": 9.763084044429494, "learning_rate": 5.601085356310734e-07, "loss": 0.698, "step": 11149 }, { "epoch": 1.578608338642316, "grad_norm": 8.987568216502675, "learning_rate": 5.597470201263783e-07, "loss": 0.6513, "step": 11150 }, { "epoch": 1.5787499115169532, "grad_norm": 8.057365734415002, "learning_rate": 5.593856066193362e-07, "loss": 0.6697, "step": 11151 }, { "epoch": 1.5788914843915904, "grad_norm": 8.594922682773333, "learning_rate": 5.590242951289451e-07, "loss": 0.7467, "step": 11152 }, { "epoch": 1.5790330572662277, "grad_norm": 8.336225796033894, "learning_rate": 5.586630856742004e-07, "loss": 0.6287, "step": 11153 }, { "epoch": 1.579174630140865, "grad_norm": 9.582994228389563, "learning_rate": 5.58301978274089e-07, "loss": 0.7595, "step": 11154 }, { "epoch": 1.5793162030155021, "grad_norm": 11.757451717886726, "learning_rate": 5.579409729475954e-07, "loss": 0.7847, "step": 11155 }, { "epoch": 1.5794577758901394, "grad_norm": 10.1348595356629, "learning_rate": 5.575800697136968e-07, "loss": 0.7043, "step": 11156 }, { "epoch": 1.5795993487647766, "grad_norm": 9.673566088024364, "learning_rate": 5.572192685913652e-07, "loss": 0.813, "step": 11157 }, { "epoch": 1.5797409216394138, "grad_norm": 8.856477132292738, "learning_rate": 5.568585695995684e-07, "loss": 0.6088, "step": 11158 }, { "epoch": 1.579882494514051, "grad_norm": 7.4540669547744045, "learning_rate": 5.564979727572673e-07, "loss": 0.655, "step": 11159 }, { "epoch": 1.5800240673886883, "grad_norm": 7.814503161158854, "learning_rate": 5.561374780834192e-07, "loss": 0.7531, "step": 11160 }, { "epoch": 1.5801656402633255, "grad_norm": 9.227671299008167, "learning_rate": 5.557770855969738e-07, "loss": 0.7092, "step": 11161 }, { "epoch": 1.5803072131379627, "grad_norm": 8.781009331388374, "learning_rate": 5.554167953168779e-07, "loss": 0.6383, "step": 11162 }, { "epoch": 1.5804487860126, "grad_norm": 10.213190742207782, "learning_rate": 5.550566072620705e-07, "loss": 0.7521, "step": 11163 }, { "epoch": 1.5805903588872372, "grad_norm": 11.25269471849896, "learning_rate": 5.54696521451488e-07, "loss": 0.7663, "step": 11164 }, { "epoch": 1.5807319317618744, "grad_norm": 7.814231009402688, "learning_rate": 5.54336537904058e-07, "loss": 0.6592, "step": 11165 }, { "epoch": 1.5808735046365117, "grad_norm": 9.710414243243726, "learning_rate": 5.539766566387053e-07, "loss": 0.8934, "step": 11166 }, { "epoch": 1.581015077511149, "grad_norm": 9.69898984702708, "learning_rate": 5.536168776743503e-07, "loss": 0.6598, "step": 11167 }, { "epoch": 1.5811566503857861, "grad_norm": 8.739137282751503, "learning_rate": 5.532572010299034e-07, "loss": 0.7291, "step": 11168 }, { "epoch": 1.5812982232604234, "grad_norm": 10.793765734123774, "learning_rate": 5.528976267242745e-07, "loss": 0.7642, "step": 11169 }, { "epoch": 1.5814397961350606, "grad_norm": 7.356570995396071, "learning_rate": 5.525381547763647e-07, "loss": 0.7087, "step": 11170 }, { "epoch": 1.5815813690096978, "grad_norm": 10.847677349637383, "learning_rate": 5.52178785205073e-07, "loss": 0.5956, "step": 11171 }, { "epoch": 1.581722941884335, "grad_norm": 9.582874806809713, "learning_rate": 5.518195180292893e-07, "loss": 0.712, "step": 11172 }, { "epoch": 1.5818645147589723, "grad_norm": 9.796670573898874, "learning_rate": 5.514603532679011e-07, "loss": 0.6907, "step": 11173 }, { "epoch": 1.5820060876336095, "grad_norm": 7.295787264600218, "learning_rate": 5.511012909397898e-07, "loss": 0.7289, "step": 11174 }, { "epoch": 1.5821476605082467, "grad_norm": 8.64478983882326, "learning_rate": 5.507423310638299e-07, "loss": 0.6908, "step": 11175 }, { "epoch": 1.582289233382884, "grad_norm": 10.303572055696684, "learning_rate": 5.503834736588929e-07, "loss": 0.6751, "step": 11176 }, { "epoch": 1.5824308062575212, "grad_norm": 10.099367072792042, "learning_rate": 5.500247187438429e-07, "loss": 0.6184, "step": 11177 }, { "epoch": 1.5825723791321584, "grad_norm": 10.091255375749528, "learning_rate": 5.496660663375389e-07, "loss": 0.7257, "step": 11178 }, { "epoch": 1.5827139520067957, "grad_norm": 7.914838506192123, "learning_rate": 5.49307516458836e-07, "loss": 0.6875, "step": 11179 }, { "epoch": 1.5828555248814327, "grad_norm": 8.467037238874775, "learning_rate": 5.489490691265819e-07, "loss": 0.6689, "step": 11180 }, { "epoch": 1.58299709775607, "grad_norm": 8.077936579796045, "learning_rate": 5.485907243596214e-07, "loss": 0.6053, "step": 11181 }, { "epoch": 1.5831386706307071, "grad_norm": 9.549228209620612, "learning_rate": 5.482324821767904e-07, "loss": 0.6816, "step": 11182 }, { "epoch": 1.5832802435053444, "grad_norm": 10.544872133321745, "learning_rate": 5.478743425969235e-07, "loss": 0.6262, "step": 11183 }, { "epoch": 1.5834218163799816, "grad_norm": 8.625142414188828, "learning_rate": 5.47516305638846e-07, "loss": 0.7786, "step": 11184 }, { "epoch": 1.5835633892546188, "grad_norm": 9.289690386290845, "learning_rate": 5.471583713213812e-07, "loss": 0.6939, "step": 11185 }, { "epoch": 1.583704962129256, "grad_norm": 9.560770115179661, "learning_rate": 5.468005396633442e-07, "loss": 0.7149, "step": 11186 }, { "epoch": 1.5838465350038933, "grad_norm": 10.00470241609266, "learning_rate": 5.464428106835467e-07, "loss": 0.6433, "step": 11187 }, { "epoch": 1.5839881078785305, "grad_norm": 8.939634695078167, "learning_rate": 5.460851844007945e-07, "loss": 0.668, "step": 11188 }, { "epoch": 1.5841296807531677, "grad_norm": 11.063760976530576, "learning_rate": 5.457276608338862e-07, "loss": 0.6427, "step": 11189 }, { "epoch": 1.584271253627805, "grad_norm": 8.665401317462209, "learning_rate": 5.453702400016186e-07, "loss": 0.6641, "step": 11190 }, { "epoch": 1.584412826502442, "grad_norm": 9.925388080687128, "learning_rate": 5.450129219227792e-07, "loss": 0.7682, "step": 11191 }, { "epoch": 1.5845543993770792, "grad_norm": 9.1118478968169, "learning_rate": 5.446557066161537e-07, "loss": 0.7808, "step": 11192 }, { "epoch": 1.5846959722517164, "grad_norm": 10.023026467605106, "learning_rate": 5.442985941005188e-07, "loss": 0.6823, "step": 11193 }, { "epoch": 1.5848375451263537, "grad_norm": 8.274802177346523, "learning_rate": 5.439415843946493e-07, "loss": 0.7322, "step": 11194 }, { "epoch": 1.584979118000991, "grad_norm": 10.39390661290231, "learning_rate": 5.435846775173115e-07, "loss": 0.7602, "step": 11195 }, { "epoch": 1.5851206908756281, "grad_norm": 10.175682281188584, "learning_rate": 5.432278734872687e-07, "loss": 0.6943, "step": 11196 }, { "epoch": 1.5852622637502654, "grad_norm": 8.659757158429372, "learning_rate": 5.428711723232779e-07, "loss": 0.6781, "step": 11197 }, { "epoch": 1.5854038366249026, "grad_norm": 8.123086087681862, "learning_rate": 5.425145740440896e-07, "loss": 0.77, "step": 11198 }, { "epoch": 1.5855454094995398, "grad_norm": 10.558003388277303, "learning_rate": 5.421580786684522e-07, "loss": 0.7228, "step": 11199 }, { "epoch": 1.585686982374177, "grad_norm": 8.145024434258499, "learning_rate": 5.418016862151032e-07, "loss": 0.6776, "step": 11200 }, { "epoch": 1.5858285552488143, "grad_norm": 8.490930543188881, "learning_rate": 5.414453967027797e-07, "loss": 0.7138, "step": 11201 }, { "epoch": 1.5859701281234515, "grad_norm": 9.969859285729468, "learning_rate": 5.410892101502119e-07, "loss": 0.7712, "step": 11202 }, { "epoch": 1.5861117009980887, "grad_norm": 9.079146047087024, "learning_rate": 5.407331265761229e-07, "loss": 0.7248, "step": 11203 }, { "epoch": 1.586253273872726, "grad_norm": 10.504146665387378, "learning_rate": 5.403771459992333e-07, "loss": 0.721, "step": 11204 }, { "epoch": 1.5863948467473632, "grad_norm": 8.236146797787606, "learning_rate": 5.400212684382553e-07, "loss": 0.6624, "step": 11205 }, { "epoch": 1.5865364196220004, "grad_norm": 8.410358091661761, "learning_rate": 5.396654939118984e-07, "loss": 0.7447, "step": 11206 }, { "epoch": 1.5866779924966377, "grad_norm": 8.04254424951278, "learning_rate": 5.393098224388643e-07, "loss": 0.6258, "step": 11207 }, { "epoch": 1.5868195653712749, "grad_norm": 8.184340005503412, "learning_rate": 5.389542540378515e-07, "loss": 0.7019, "step": 11208 }, { "epoch": 1.5869611382459121, "grad_norm": 9.942561174319126, "learning_rate": 5.385987887275512e-07, "loss": 0.7133, "step": 11209 }, { "epoch": 1.5871027111205493, "grad_norm": 9.534410896846346, "learning_rate": 5.382434265266495e-07, "loss": 0.7872, "step": 11210 }, { "epoch": 1.5872442839951866, "grad_norm": 9.758463780557127, "learning_rate": 5.378881674538288e-07, "loss": 0.7295, "step": 11211 }, { "epoch": 1.5873858568698238, "grad_norm": 8.64099716295607, "learning_rate": 5.375330115277635e-07, "loss": 0.6043, "step": 11212 }, { "epoch": 1.587527429744461, "grad_norm": 9.504997143772584, "learning_rate": 5.371779587671252e-07, "loss": 0.6683, "step": 11213 }, { "epoch": 1.5876690026190983, "grad_norm": 8.312315056830071, "learning_rate": 5.368230091905774e-07, "loss": 0.7424, "step": 11214 }, { "epoch": 1.5878105754937355, "grad_norm": 8.07146718128099, "learning_rate": 5.364681628167806e-07, "loss": 0.706, "step": 11215 }, { "epoch": 1.5879521483683727, "grad_norm": 9.782083006661656, "learning_rate": 5.36113419664388e-07, "loss": 0.7354, "step": 11216 }, { "epoch": 1.58809372124301, "grad_norm": 8.840746042522595, "learning_rate": 5.357587797520491e-07, "loss": 0.6056, "step": 11217 }, { "epoch": 1.5882352941176472, "grad_norm": 8.461830155473349, "learning_rate": 5.354042430984061e-07, "loss": 0.5781, "step": 11218 }, { "epoch": 1.5883768669922844, "grad_norm": 10.132337007564606, "learning_rate": 5.350498097220972e-07, "loss": 0.7055, "step": 11219 }, { "epoch": 1.5885184398669216, "grad_norm": 6.9599288055461805, "learning_rate": 5.346954796417558e-07, "loss": 0.7849, "step": 11220 }, { "epoch": 1.5886600127415589, "grad_norm": 9.785614307517351, "learning_rate": 5.343412528760064e-07, "loss": 0.7028, "step": 11221 }, { "epoch": 1.5888015856161959, "grad_norm": 7.574854823568867, "learning_rate": 5.339871294434724e-07, "loss": 0.6753, "step": 11222 }, { "epoch": 1.5889431584908331, "grad_norm": 9.159598197286941, "learning_rate": 5.336331093627683e-07, "loss": 0.7192, "step": 11223 }, { "epoch": 1.5890847313654703, "grad_norm": 9.542031315883468, "learning_rate": 5.332791926525055e-07, "loss": 0.6833, "step": 11224 }, { "epoch": 1.5892263042401076, "grad_norm": 10.289358356367135, "learning_rate": 5.329253793312897e-07, "loss": 0.8026, "step": 11225 }, { "epoch": 1.5893678771147448, "grad_norm": 9.817965595035103, "learning_rate": 5.325716694177194e-07, "loss": 0.731, "step": 11226 }, { "epoch": 1.589509449989382, "grad_norm": 8.703241590612679, "learning_rate": 5.322180629303902e-07, "loss": 0.7422, "step": 11227 }, { "epoch": 1.5896510228640193, "grad_norm": 9.546954914763726, "learning_rate": 5.318645598878894e-07, "loss": 0.8198, "step": 11228 }, { "epoch": 1.5897925957386565, "grad_norm": 9.424719638853778, "learning_rate": 5.315111603088019e-07, "loss": 0.7246, "step": 11229 }, { "epoch": 1.5899341686132937, "grad_norm": 8.480214147202998, "learning_rate": 5.311578642117049e-07, "loss": 0.794, "step": 11230 }, { "epoch": 1.590075741487931, "grad_norm": 9.845220934247267, "learning_rate": 5.308046716151705e-07, "loss": 0.7454, "step": 11231 }, { "epoch": 1.590217314362568, "grad_norm": 9.772549888501066, "learning_rate": 5.304515825377666e-07, "loss": 0.7156, "step": 11232 }, { "epoch": 1.5903588872372052, "grad_norm": 9.087680114892109, "learning_rate": 5.300985969980537e-07, "loss": 0.7739, "step": 11233 }, { "epoch": 1.5905004601118424, "grad_norm": 9.318078744660792, "learning_rate": 5.297457150145898e-07, "loss": 0.7353, "step": 11234 }, { "epoch": 1.5906420329864797, "grad_norm": 9.978565131568814, "learning_rate": 5.293929366059236e-07, "loss": 0.7148, "step": 11235 }, { "epoch": 1.590783605861117, "grad_norm": 8.134632211165881, "learning_rate": 5.290402617906021e-07, "loss": 0.6069, "step": 11236 }, { "epoch": 1.5909251787357541, "grad_norm": 10.230891209896157, "learning_rate": 5.286876905871638e-07, "loss": 0.7137, "step": 11237 }, { "epoch": 1.5910667516103913, "grad_norm": 9.68159393428574, "learning_rate": 5.283352230141445e-07, "loss": 0.7799, "step": 11238 }, { "epoch": 1.5912083244850286, "grad_norm": 9.015428669872692, "learning_rate": 5.279828590900715e-07, "loss": 0.6777, "step": 11239 }, { "epoch": 1.5913498973596658, "grad_norm": 7.444031996554196, "learning_rate": 5.276305988334701e-07, "loss": 0.7408, "step": 11240 }, { "epoch": 1.591491470234303, "grad_norm": 8.536396416623251, "learning_rate": 5.272784422628574e-07, "loss": 0.6271, "step": 11241 }, { "epoch": 1.5916330431089403, "grad_norm": 9.309258428063362, "learning_rate": 5.269263893967453e-07, "loss": 0.6202, "step": 11242 }, { "epoch": 1.5917746159835775, "grad_norm": 9.72420002998419, "learning_rate": 5.265744402536424e-07, "loss": 0.7704, "step": 11243 }, { "epoch": 1.5919161888582147, "grad_norm": 9.070875625149625, "learning_rate": 5.262225948520491e-07, "loss": 0.6931, "step": 11244 }, { "epoch": 1.592057761732852, "grad_norm": 9.054523193343151, "learning_rate": 5.258708532104631e-07, "loss": 0.6781, "step": 11245 }, { "epoch": 1.5921993346074892, "grad_norm": 9.48838738876411, "learning_rate": 5.255192153473734e-07, "loss": 0.6943, "step": 11246 }, { "epoch": 1.5923409074821264, "grad_norm": 8.715080117816527, "learning_rate": 5.251676812812664e-07, "loss": 0.7408, "step": 11247 }, { "epoch": 1.5924824803567637, "grad_norm": 8.466517305433394, "learning_rate": 5.248162510306229e-07, "loss": 0.7124, "step": 11248 }, { "epoch": 1.5926240532314009, "grad_norm": 9.773258735125706, "learning_rate": 5.244649246139152e-07, "loss": 0.6972, "step": 11249 }, { "epoch": 1.5927656261060381, "grad_norm": 7.919765334457888, "learning_rate": 5.241137020496142e-07, "loss": 0.7065, "step": 11250 }, { "epoch": 1.5929071989806753, "grad_norm": 10.272101416208994, "learning_rate": 5.237625833561821e-07, "loss": 0.7324, "step": 11251 }, { "epoch": 1.5930487718553126, "grad_norm": 11.032356185131327, "learning_rate": 5.234115685520788e-07, "loss": 0.7674, "step": 11252 }, { "epoch": 1.5931903447299498, "grad_norm": 8.379725247706562, "learning_rate": 5.23060657655754e-07, "loss": 0.677, "step": 11253 }, { "epoch": 1.593331917604587, "grad_norm": 9.524324296027432, "learning_rate": 5.227098506856563e-07, "loss": 0.6302, "step": 11254 }, { "epoch": 1.5934734904792243, "grad_norm": 9.704251283893932, "learning_rate": 5.223591476602283e-07, "loss": 0.7034, "step": 11255 }, { "epoch": 1.5936150633538615, "grad_norm": 9.26830274926595, "learning_rate": 5.220085485979046e-07, "loss": 0.727, "step": 11256 }, { "epoch": 1.5937566362284987, "grad_norm": 8.994345690183986, "learning_rate": 5.216580535171173e-07, "loss": 0.8242, "step": 11257 }, { "epoch": 1.593898209103136, "grad_norm": 8.389417849180271, "learning_rate": 5.213076624362903e-07, "loss": 0.6978, "step": 11258 }, { "epoch": 1.5940397819777732, "grad_norm": 8.114849146617779, "learning_rate": 5.209573753738448e-07, "loss": 0.6776, "step": 11259 }, { "epoch": 1.5941813548524104, "grad_norm": 10.699919327761563, "learning_rate": 5.206071923481937e-07, "loss": 0.7381, "step": 11260 }, { "epoch": 1.5943229277270476, "grad_norm": 9.376978957798721, "learning_rate": 5.202571133777474e-07, "loss": 0.7592, "step": 11261 }, { "epoch": 1.5944645006016849, "grad_norm": 8.496483019099589, "learning_rate": 5.199071384809085e-07, "loss": 0.7291, "step": 11262 }, { "epoch": 1.5946060734763219, "grad_norm": 9.284682541423779, "learning_rate": 5.19557267676074e-07, "loss": 0.7538, "step": 11263 }, { "epoch": 1.5947476463509591, "grad_norm": 8.583317975768143, "learning_rate": 5.192075009816381e-07, "loss": 0.7032, "step": 11264 }, { "epoch": 1.5948892192255963, "grad_norm": 9.17617785301889, "learning_rate": 5.188578384159862e-07, "loss": 0.7106, "step": 11265 }, { "epoch": 1.5950307921002336, "grad_norm": 9.65572219936998, "learning_rate": 5.185082799975013e-07, "loss": 0.7368, "step": 11266 }, { "epoch": 1.5951723649748708, "grad_norm": 10.981086423173398, "learning_rate": 5.18158825744558e-07, "loss": 0.708, "step": 11267 }, { "epoch": 1.595313937849508, "grad_norm": 10.899693308024055, "learning_rate": 5.17809475675528e-07, "loss": 0.6554, "step": 11268 }, { "epoch": 1.5954555107241453, "grad_norm": 8.609585020407941, "learning_rate": 5.174602298087755e-07, "loss": 0.7055, "step": 11269 }, { "epoch": 1.5955970835987825, "grad_norm": 9.225350606401642, "learning_rate": 5.171110881626604e-07, "loss": 0.7189, "step": 11270 }, { "epoch": 1.5957386564734197, "grad_norm": 9.472177017516715, "learning_rate": 5.167620507555373e-07, "loss": 0.7502, "step": 11271 }, { "epoch": 1.595880229348057, "grad_norm": 10.026309308860045, "learning_rate": 5.164131176057541e-07, "loss": 0.7287, "step": 11272 }, { "epoch": 1.5960218022226942, "grad_norm": 9.172380735631018, "learning_rate": 5.160642887316555e-07, "loss": 0.6803, "step": 11273 }, { "epoch": 1.5961633750973312, "grad_norm": 9.122579175083331, "learning_rate": 5.157155641515766e-07, "loss": 0.6502, "step": 11274 }, { "epoch": 1.5963049479719684, "grad_norm": 8.215806789425605, "learning_rate": 5.153669438838507e-07, "loss": 0.7387, "step": 11275 }, { "epoch": 1.5964465208466057, "grad_norm": 8.658540388268305, "learning_rate": 5.150184279468057e-07, "loss": 0.7033, "step": 11276 }, { "epoch": 1.5965880937212429, "grad_norm": 7.606651617776938, "learning_rate": 5.146700163587612e-07, "loss": 0.7607, "step": 11277 }, { "epoch": 1.5967296665958801, "grad_norm": 9.253003019580783, "learning_rate": 5.143217091380343e-07, "loss": 0.5872, "step": 11278 }, { "epoch": 1.5968712394705173, "grad_norm": 9.202951737280324, "learning_rate": 5.139735063029338e-07, "loss": 0.69, "step": 11279 }, { "epoch": 1.5970128123451546, "grad_norm": 9.309774319938352, "learning_rate": 5.136254078717659e-07, "loss": 0.7289, "step": 11280 }, { "epoch": 1.5971543852197918, "grad_norm": 7.989088723706472, "learning_rate": 5.132774138628286e-07, "loss": 0.5978, "step": 11281 }, { "epoch": 1.597295958094429, "grad_norm": 8.867609519830664, "learning_rate": 5.129295242944168e-07, "loss": 0.7181, "step": 11282 }, { "epoch": 1.5974375309690663, "grad_norm": 8.346373870556565, "learning_rate": 5.125817391848187e-07, "loss": 0.6373, "step": 11283 }, { "epoch": 1.5975791038437035, "grad_norm": 9.98641388183157, "learning_rate": 5.122340585523156e-07, "loss": 0.7044, "step": 11284 }, { "epoch": 1.5977206767183407, "grad_norm": 10.88076489115996, "learning_rate": 5.118864824151868e-07, "loss": 0.7887, "step": 11285 }, { "epoch": 1.597862249592978, "grad_norm": 10.118434331351422, "learning_rate": 5.115390107917024e-07, "loss": 0.7642, "step": 11286 }, { "epoch": 1.5980038224676152, "grad_norm": 9.737288576311137, "learning_rate": 5.111916437001302e-07, "loss": 0.7279, "step": 11287 }, { "epoch": 1.5981453953422524, "grad_norm": 8.954652772409105, "learning_rate": 5.1084438115873e-07, "loss": 0.7747, "step": 11288 }, { "epoch": 1.5982869682168896, "grad_norm": 8.305186331149782, "learning_rate": 5.104972231857577e-07, "loss": 0.6945, "step": 11289 }, { "epoch": 1.5984285410915269, "grad_norm": 9.422065290107827, "learning_rate": 5.101501697994626e-07, "loss": 0.6927, "step": 11290 }, { "epoch": 1.598570113966164, "grad_norm": 8.972748081267122, "learning_rate": 5.098032210180901e-07, "loss": 0.7074, "step": 11291 }, { "epoch": 1.5987116868408013, "grad_norm": 8.191289825412495, "learning_rate": 5.094563768598773e-07, "loss": 0.6903, "step": 11292 }, { "epoch": 1.5988532597154386, "grad_norm": 9.326641635747572, "learning_rate": 5.091096373430588e-07, "loss": 0.6555, "step": 11293 }, { "epoch": 1.5989948325900758, "grad_norm": 10.849802576193856, "learning_rate": 5.087630024858637e-07, "loss": 0.7681, "step": 11294 }, { "epoch": 1.599136405464713, "grad_norm": 9.379378453465872, "learning_rate": 5.084164723065111e-07, "loss": 0.6782, "step": 11295 }, { "epoch": 1.5992779783393503, "grad_norm": 7.78701017501713, "learning_rate": 5.080700468232206e-07, "loss": 0.6468, "step": 11296 }, { "epoch": 1.5994195512139875, "grad_norm": 9.210451690830428, "learning_rate": 5.077237260542014e-07, "loss": 0.7008, "step": 11297 }, { "epoch": 1.5995611240886247, "grad_norm": 9.73511248836565, "learning_rate": 5.073775100176609e-07, "loss": 0.6668, "step": 11298 }, { "epoch": 1.599702696963262, "grad_norm": 9.696145414590218, "learning_rate": 5.070313987317992e-07, "loss": 0.6648, "step": 11299 }, { "epoch": 1.5998442698378992, "grad_norm": 8.87751626892232, "learning_rate": 5.066853922148104e-07, "loss": 0.769, "step": 11300 }, { "epoch": 1.5999858427125364, "grad_norm": 8.39240653732843, "learning_rate": 5.063394904848851e-07, "loss": 0.67, "step": 11301 }, { "epoch": 1.6001274155871736, "grad_norm": 8.89658149473608, "learning_rate": 5.059936935602052e-07, "loss": 0.7802, "step": 11302 }, { "epoch": 1.6002689884618109, "grad_norm": 9.632646966085812, "learning_rate": 5.05648001458951e-07, "loss": 0.6874, "step": 11303 }, { "epoch": 1.600410561336448, "grad_norm": 9.007837485736163, "learning_rate": 5.053024141992935e-07, "loss": 0.6878, "step": 11304 }, { "epoch": 1.600552134211085, "grad_norm": 9.22555239297715, "learning_rate": 5.049569317994013e-07, "loss": 0.656, "step": 11305 }, { "epoch": 1.6006937070857223, "grad_norm": 9.682170755672107, "learning_rate": 5.046115542774358e-07, "loss": 0.6737, "step": 11306 }, { "epoch": 1.6008352799603596, "grad_norm": 10.544647840658127, "learning_rate": 5.042662816515523e-07, "loss": 0.7287, "step": 11307 }, { "epoch": 1.6009768528349968, "grad_norm": 9.113863066779093, "learning_rate": 5.039211139399031e-07, "loss": 0.6576, "step": 11308 }, { "epoch": 1.601118425709634, "grad_norm": 8.431268043583593, "learning_rate": 5.035760511606319e-07, "loss": 0.6968, "step": 11309 }, { "epoch": 1.6012599985842713, "grad_norm": 8.875211041587397, "learning_rate": 5.032310933318798e-07, "loss": 0.5961, "step": 11310 }, { "epoch": 1.6014015714589085, "grad_norm": 8.78770432214341, "learning_rate": 5.028862404717796e-07, "loss": 0.7517, "step": 11311 }, { "epoch": 1.6015431443335457, "grad_norm": 8.984264180702409, "learning_rate": 5.025414925984612e-07, "loss": 0.6959, "step": 11312 }, { "epoch": 1.601684717208183, "grad_norm": 9.601212075328286, "learning_rate": 5.021968497300464e-07, "loss": 0.6885, "step": 11313 }, { "epoch": 1.6018262900828202, "grad_norm": 7.904314972875872, "learning_rate": 5.018523118846544e-07, "loss": 0.5955, "step": 11314 }, { "epoch": 1.6019678629574572, "grad_norm": 8.513093624618692, "learning_rate": 5.015078790803965e-07, "loss": 0.6903, "step": 11315 }, { "epoch": 1.6021094358320944, "grad_norm": 7.70961427785025, "learning_rate": 5.011635513353786e-07, "loss": 0.6535, "step": 11316 }, { "epoch": 1.6022510087067316, "grad_norm": 9.00786458882923, "learning_rate": 5.008193286677029e-07, "loss": 0.761, "step": 11317 }, { "epoch": 1.6023925815813689, "grad_norm": 9.045342982733876, "learning_rate": 5.004752110954642e-07, "loss": 0.6475, "step": 11318 }, { "epoch": 1.602534154456006, "grad_norm": 8.110337863568352, "learning_rate": 5.00131198636753e-07, "loss": 0.5844, "step": 11319 }, { "epoch": 1.6026757273306433, "grad_norm": 8.650867537725533, "learning_rate": 4.997872913096529e-07, "loss": 0.6799, "step": 11320 }, { "epoch": 1.6028173002052806, "grad_norm": 9.03692596427508, "learning_rate": 4.994434891322436e-07, "loss": 0.5959, "step": 11321 }, { "epoch": 1.6029588730799178, "grad_norm": 8.373136854033092, "learning_rate": 4.99099792122599e-07, "loss": 0.6237, "step": 11322 }, { "epoch": 1.603100445954555, "grad_norm": 9.7570243376647, "learning_rate": 4.987562002987858e-07, "loss": 0.6327, "step": 11323 }, { "epoch": 1.6032420188291923, "grad_norm": 8.34756028401787, "learning_rate": 4.984127136788675e-07, "loss": 0.8089, "step": 11324 }, { "epoch": 1.6033835917038295, "grad_norm": 8.206400945572437, "learning_rate": 4.980693322808999e-07, "loss": 0.6696, "step": 11325 }, { "epoch": 1.6035251645784667, "grad_norm": 10.154747860669389, "learning_rate": 4.97726056122936e-07, "loss": 0.7383, "step": 11326 }, { "epoch": 1.603666737453104, "grad_norm": 9.85103122328365, "learning_rate": 4.97382885223019e-07, "loss": 0.6748, "step": 11327 }, { "epoch": 1.6038083103277412, "grad_norm": 8.44262539640273, "learning_rate": 4.970398195991908e-07, "loss": 0.6913, "step": 11328 }, { "epoch": 1.6039498832023784, "grad_norm": 9.086743149336385, "learning_rate": 4.96696859269486e-07, "loss": 0.6555, "step": 11329 }, { "epoch": 1.6040914560770156, "grad_norm": 9.346706573285044, "learning_rate": 4.963540042519333e-07, "loss": 0.6266, "step": 11330 }, { "epoch": 1.6042330289516529, "grad_norm": 8.394842526365844, "learning_rate": 4.96011254564557e-07, "loss": 0.677, "step": 11331 }, { "epoch": 1.60437460182629, "grad_norm": 8.153212175235847, "learning_rate": 4.956686102253744e-07, "loss": 0.5537, "step": 11332 }, { "epoch": 1.6045161747009273, "grad_norm": 8.642472412842189, "learning_rate": 4.953260712523992e-07, "loss": 0.6469, "step": 11333 }, { "epoch": 1.6046577475755646, "grad_norm": 9.2869871755236, "learning_rate": 4.949836376636366e-07, "loss": 0.7087, "step": 11334 }, { "epoch": 1.6047993204502018, "grad_norm": 8.701164202788425, "learning_rate": 4.946413094770902e-07, "loss": 0.6936, "step": 11335 }, { "epoch": 1.604940893324839, "grad_norm": 9.636016090237115, "learning_rate": 4.942990867107547e-07, "loss": 0.6471, "step": 11336 }, { "epoch": 1.6050824661994763, "grad_norm": 9.615976041944874, "learning_rate": 4.939569693826202e-07, "loss": 0.691, "step": 11337 }, { "epoch": 1.6052240390741135, "grad_norm": 8.336003397115356, "learning_rate": 4.936149575106727e-07, "loss": 0.6575, "step": 11338 }, { "epoch": 1.6053656119487507, "grad_norm": 9.7780717555078, "learning_rate": 4.9327305111289e-07, "loss": 0.729, "step": 11339 }, { "epoch": 1.605507184823388, "grad_norm": 9.633207314622888, "learning_rate": 4.929312502072475e-07, "loss": 0.7261, "step": 11340 }, { "epoch": 1.6056487576980252, "grad_norm": 8.817921803581728, "learning_rate": 4.925895548117121e-07, "loss": 0.6969, "step": 11341 }, { "epoch": 1.6057903305726624, "grad_norm": 8.79875333798683, "learning_rate": 4.922479649442477e-07, "loss": 0.7834, "step": 11342 }, { "epoch": 1.6059319034472996, "grad_norm": 8.182889382724316, "learning_rate": 4.919064806228099e-07, "loss": 0.6858, "step": 11343 }, { "epoch": 1.6060734763219369, "grad_norm": 9.485754877354871, "learning_rate": 4.915651018653511e-07, "loss": 0.6831, "step": 11344 }, { "epoch": 1.606215049196574, "grad_norm": 9.951935653650224, "learning_rate": 4.91223828689818e-07, "loss": 0.7654, "step": 11345 }, { "epoch": 1.606356622071211, "grad_norm": 8.957612569039348, "learning_rate": 4.908826611141498e-07, "loss": 0.6674, "step": 11346 }, { "epoch": 1.6064981949458483, "grad_norm": 8.846939643992895, "learning_rate": 4.905415991562834e-07, "loss": 0.6377, "step": 11347 }, { "epoch": 1.6066397678204856, "grad_norm": 7.527074645715158, "learning_rate": 4.902006428341457e-07, "loss": 0.7302, "step": 11348 }, { "epoch": 1.6067813406951228, "grad_norm": 8.472891720304462, "learning_rate": 4.89859792165662e-07, "loss": 0.7326, "step": 11349 }, { "epoch": 1.60692291356976, "grad_norm": 8.760762162866676, "learning_rate": 4.895190471687497e-07, "loss": 0.7355, "step": 11350 }, { "epoch": 1.6070644864443973, "grad_norm": 9.768262534447697, "learning_rate": 4.891784078613218e-07, "loss": 0.7279, "step": 11351 }, { "epoch": 1.6072060593190345, "grad_norm": 9.05501821076928, "learning_rate": 4.888378742612865e-07, "loss": 0.6824, "step": 11352 }, { "epoch": 1.6073476321936717, "grad_norm": 7.7764973661283, "learning_rate": 4.884974463865438e-07, "loss": 0.6625, "step": 11353 }, { "epoch": 1.607489205068309, "grad_norm": 9.74678612583219, "learning_rate": 4.881571242549915e-07, "loss": 0.6791, "step": 11354 }, { "epoch": 1.6076307779429462, "grad_norm": 8.355725327699153, "learning_rate": 4.87816907884518e-07, "loss": 0.7112, "step": 11355 }, { "epoch": 1.6077723508175832, "grad_norm": 9.033638291882967, "learning_rate": 4.874767972930103e-07, "loss": 0.6373, "step": 11356 }, { "epoch": 1.6079139236922204, "grad_norm": 8.685005104182034, "learning_rate": 4.871367924983458e-07, "loss": 0.6453, "step": 11357 }, { "epoch": 1.6080554965668576, "grad_norm": 9.172355366333589, "learning_rate": 4.867968935184e-07, "loss": 0.6716, "step": 11358 }, { "epoch": 1.6081970694414949, "grad_norm": 10.828863737990105, "learning_rate": 4.864571003710405e-07, "loss": 0.8004, "step": 11359 }, { "epoch": 1.608338642316132, "grad_norm": 9.543527968546316, "learning_rate": 4.861174130741292e-07, "loss": 0.755, "step": 11360 }, { "epoch": 1.6084802151907693, "grad_norm": 10.237225271039023, "learning_rate": 4.857778316455245e-07, "loss": 0.7753, "step": 11361 }, { "epoch": 1.6086217880654066, "grad_norm": 9.472703366506535, "learning_rate": 4.854383561030768e-07, "loss": 0.7148, "step": 11362 }, { "epoch": 1.6087633609400438, "grad_norm": 10.380369336681522, "learning_rate": 4.85098986464633e-07, "loss": 0.6743, "step": 11363 }, { "epoch": 1.608904933814681, "grad_norm": 10.264363715086578, "learning_rate": 4.847597227480327e-07, "loss": 0.6401, "step": 11364 }, { "epoch": 1.6090465066893183, "grad_norm": 10.96396700048369, "learning_rate": 4.844205649711118e-07, "loss": 0.6957, "step": 11365 }, { "epoch": 1.6091880795639555, "grad_norm": 11.027173204822718, "learning_rate": 4.840815131516979e-07, "loss": 0.6919, "step": 11366 }, { "epoch": 1.6093296524385927, "grad_norm": 10.090868275752774, "learning_rate": 4.83742567307616e-07, "loss": 0.6942, "step": 11367 }, { "epoch": 1.60947122531323, "grad_norm": 8.639218759184361, "learning_rate": 4.834037274566852e-07, "loss": 0.6783, "step": 11368 }, { "epoch": 1.6096127981878672, "grad_norm": 10.055750128992779, "learning_rate": 4.830649936167156e-07, "loss": 0.7552, "step": 11369 }, { "epoch": 1.6097543710625044, "grad_norm": 10.218091471040317, "learning_rate": 4.827263658055161e-07, "loss": 0.6794, "step": 11370 }, { "epoch": 1.6098959439371416, "grad_norm": 8.786429293045025, "learning_rate": 4.823878440408866e-07, "loss": 0.7185, "step": 11371 }, { "epoch": 1.6100375168117789, "grad_norm": 9.788258926764392, "learning_rate": 4.820494283406238e-07, "loss": 0.7605, "step": 11372 }, { "epoch": 1.610179089686416, "grad_norm": 9.35128878110797, "learning_rate": 4.817111187225184e-07, "loss": 0.7751, "step": 11373 }, { "epoch": 1.6103206625610533, "grad_norm": 13.44469454474021, "learning_rate": 4.813729152043542e-07, "loss": 0.6815, "step": 11374 }, { "epoch": 1.6104622354356906, "grad_norm": 9.044980287208817, "learning_rate": 4.810348178039112e-07, "loss": 0.6193, "step": 11375 }, { "epoch": 1.6106038083103278, "grad_norm": 7.896216498130707, "learning_rate": 4.806968265389619e-07, "loss": 0.6811, "step": 11376 }, { "epoch": 1.610745381184965, "grad_norm": 9.177082825284346, "learning_rate": 4.803589414272752e-07, "loss": 0.7561, "step": 11377 }, { "epoch": 1.6108869540596022, "grad_norm": 9.452129418777359, "learning_rate": 4.800211624866128e-07, "loss": 0.7225, "step": 11378 }, { "epoch": 1.6110285269342395, "grad_norm": 9.375148517703808, "learning_rate": 4.796834897347319e-07, "loss": 0.7739, "step": 11379 }, { "epoch": 1.6111700998088767, "grad_norm": 10.513242817329619, "learning_rate": 4.793459231893838e-07, "loss": 0.7959, "step": 11380 }, { "epoch": 1.611311672683514, "grad_norm": 9.709288674257172, "learning_rate": 4.790084628683131e-07, "loss": 0.6692, "step": 11381 }, { "epoch": 1.6114532455581512, "grad_norm": 9.20418232930412, "learning_rate": 4.786711087892613e-07, "loss": 0.783, "step": 11382 }, { "epoch": 1.6115948184327884, "grad_norm": 7.7626325955330655, "learning_rate": 4.783338609699614e-07, "loss": 0.6678, "step": 11383 }, { "epoch": 1.6117363913074256, "grad_norm": 9.59504974209215, "learning_rate": 4.779967194281438e-07, "loss": 0.7117, "step": 11384 }, { "epoch": 1.6118779641820629, "grad_norm": 8.418248798995958, "learning_rate": 4.776596841815304e-07, "loss": 0.694, "step": 11385 }, { "epoch": 1.6120195370567, "grad_norm": 9.808030975616479, "learning_rate": 4.773227552478399e-07, "loss": 0.6356, "step": 11386 }, { "epoch": 1.6121611099313373, "grad_norm": 8.219636934527536, "learning_rate": 4.769859326447834e-07, "loss": 0.7579, "step": 11387 }, { "epoch": 1.6123026828059743, "grad_norm": 9.542539820359385, "learning_rate": 4.7664921639006877e-07, "loss": 0.7186, "step": 11388 }, { "epoch": 1.6124442556806116, "grad_norm": 9.730060631620931, "learning_rate": 4.7631260650139595e-07, "loss": 0.7117, "step": 11389 }, { "epoch": 1.6125858285552488, "grad_norm": 9.03914774835557, "learning_rate": 4.7597610299645993e-07, "loss": 0.7493, "step": 11390 }, { "epoch": 1.612727401429886, "grad_norm": 9.969221432818237, "learning_rate": 4.7563970589295185e-07, "loss": 0.6679, "step": 11391 }, { "epoch": 1.6128689743045233, "grad_norm": 8.609928397170009, "learning_rate": 4.753034152085542e-07, "loss": 0.634, "step": 11392 }, { "epoch": 1.6130105471791605, "grad_norm": 8.82909805201581, "learning_rate": 4.7496723096094684e-07, "loss": 0.666, "step": 11393 }, { "epoch": 1.6131521200537977, "grad_norm": 8.792997383060888, "learning_rate": 4.7463115316780163e-07, "loss": 0.6116, "step": 11394 }, { "epoch": 1.613293692928435, "grad_norm": 8.800625206672553, "learning_rate": 4.7429518184678667e-07, "loss": 0.6258, "step": 11395 }, { "epoch": 1.6134352658030722, "grad_norm": 9.836148090850235, "learning_rate": 4.73959317015564e-07, "loss": 0.7514, "step": 11396 }, { "epoch": 1.6135768386777094, "grad_norm": 7.914783079701583, "learning_rate": 4.736235586917889e-07, "loss": 0.6735, "step": 11397 }, { "epoch": 1.6137184115523464, "grad_norm": 9.28022981424886, "learning_rate": 4.732879068931132e-07, "loss": 0.5911, "step": 11398 }, { "epoch": 1.6138599844269836, "grad_norm": 9.108394615849017, "learning_rate": 4.7295236163718006e-07, "loss": 0.6351, "step": 11399 }, { "epoch": 1.6140015573016209, "grad_norm": 7.9552191055209605, "learning_rate": 4.7261692294163134e-07, "loss": 0.5912, "step": 11400 }, { "epoch": 1.614143130176258, "grad_norm": 7.490760452516315, "learning_rate": 4.72281590824098e-07, "loss": 0.7308, "step": 11401 }, { "epoch": 1.6142847030508953, "grad_norm": 8.225586807049847, "learning_rate": 4.719463653022094e-07, "loss": 0.6247, "step": 11402 }, { "epoch": 1.6144262759255326, "grad_norm": 9.858659189298622, "learning_rate": 4.7161124639358873e-07, "loss": 0.6583, "step": 11403 }, { "epoch": 1.6145678488001698, "grad_norm": 10.084339018131887, "learning_rate": 4.7127623411585194e-07, "loss": 0.6692, "step": 11404 }, { "epoch": 1.614709421674807, "grad_norm": 9.420838863658561, "learning_rate": 4.7094132848661154e-07, "loss": 0.6636, "step": 11405 }, { "epoch": 1.6148509945494443, "grad_norm": 11.347797438880692, "learning_rate": 4.706065295234719e-07, "loss": 0.7669, "step": 11406 }, { "epoch": 1.6149925674240815, "grad_norm": 9.429038188990095, "learning_rate": 4.702718372440343e-07, "loss": 0.6603, "step": 11407 }, { "epoch": 1.6151341402987187, "grad_norm": 9.375603007950758, "learning_rate": 4.699372516658923e-07, "loss": 0.7176, "step": 11408 }, { "epoch": 1.615275713173356, "grad_norm": 9.597909032080162, "learning_rate": 4.6960277280663574e-07, "loss": 0.7631, "step": 11409 }, { "epoch": 1.6154172860479932, "grad_norm": 9.582273695592612, "learning_rate": 4.692684006838477e-07, "loss": 0.7017, "step": 11410 }, { "epoch": 1.6155588589226304, "grad_norm": 9.622407241972827, "learning_rate": 4.68934135315105e-07, "loss": 0.6959, "step": 11411 }, { "epoch": 1.6157004317972676, "grad_norm": 7.297690499130102, "learning_rate": 4.685999767179808e-07, "loss": 0.6402, "step": 11412 }, { "epoch": 1.6158420046719049, "grad_norm": 9.329987562034306, "learning_rate": 4.6826592491004075e-07, "loss": 0.7022, "step": 11413 }, { "epoch": 1.615983577546542, "grad_norm": 9.453916305336152, "learning_rate": 4.679319799088466e-07, "loss": 0.7008, "step": 11414 }, { "epoch": 1.6161251504211793, "grad_norm": 8.334790623717607, "learning_rate": 4.675981417319528e-07, "loss": 0.6754, "step": 11415 }, { "epoch": 1.6162667232958166, "grad_norm": 8.923919817527944, "learning_rate": 4.6726441039690955e-07, "loss": 0.6894, "step": 11416 }, { "epoch": 1.6164082961704538, "grad_norm": 9.799759095014148, "learning_rate": 4.669307859212599e-07, "loss": 0.7094, "step": 11417 }, { "epoch": 1.616549869045091, "grad_norm": 7.171067269244467, "learning_rate": 4.665972683225431e-07, "loss": 0.6101, "step": 11418 }, { "epoch": 1.6166914419197282, "grad_norm": 9.23393504067736, "learning_rate": 4.6626385761829234e-07, "loss": 0.6712, "step": 11419 }, { "epoch": 1.6168330147943655, "grad_norm": 7.451353874333635, "learning_rate": 4.6593055382603334e-07, "loss": 0.6431, "step": 11420 }, { "epoch": 1.6169745876690027, "grad_norm": 8.64922656531916, "learning_rate": 4.655973569632899e-07, "loss": 0.7753, "step": 11421 }, { "epoch": 1.61711616054364, "grad_norm": 9.666286154087066, "learning_rate": 4.6526426704757545e-07, "loss": 0.6739, "step": 11422 }, { "epoch": 1.6172577334182772, "grad_norm": 9.26344929925791, "learning_rate": 4.6493128409640153e-07, "loss": 0.6511, "step": 11423 }, { "epoch": 1.6173993062929144, "grad_norm": 8.98556632726506, "learning_rate": 4.6459840812727227e-07, "loss": 0.6417, "step": 11424 }, { "epoch": 1.6175408791675516, "grad_norm": 8.071338628950194, "learning_rate": 4.642656391576869e-07, "loss": 0.6523, "step": 11425 }, { "epoch": 1.6176824520421889, "grad_norm": 7.5506908176491025, "learning_rate": 4.6393297720513957e-07, "loss": 0.6697, "step": 11426 }, { "epoch": 1.617824024916826, "grad_norm": 9.142190704579852, "learning_rate": 4.6360042228711684e-07, "loss": 0.7108, "step": 11427 }, { "epoch": 1.6179655977914633, "grad_norm": 8.477730694272458, "learning_rate": 4.6326797442110217e-07, "loss": 0.7915, "step": 11428 }, { "epoch": 1.6181071706661003, "grad_norm": 7.370136791186496, "learning_rate": 4.629356336245708e-07, "loss": 0.6783, "step": 11429 }, { "epoch": 1.6182487435407376, "grad_norm": 8.635371509858812, "learning_rate": 4.626033999149948e-07, "loss": 0.6874, "step": 11430 }, { "epoch": 1.6183903164153748, "grad_norm": 7.890070950536008, "learning_rate": 4.622712733098386e-07, "loss": 0.656, "step": 11431 }, { "epoch": 1.618531889290012, "grad_norm": 9.92825482325314, "learning_rate": 4.619392538265624e-07, "loss": 0.6611, "step": 11432 }, { "epoch": 1.6186734621646492, "grad_norm": 9.3956713835186, "learning_rate": 4.6160734148262027e-07, "loss": 0.7109, "step": 11433 }, { "epoch": 1.6188150350392865, "grad_norm": 10.675548987822886, "learning_rate": 4.612755362954596e-07, "loss": 0.8122, "step": 11434 }, { "epoch": 1.6189566079139237, "grad_norm": 10.835552712053206, "learning_rate": 4.609438382825246e-07, "loss": 0.6872, "step": 11435 }, { "epoch": 1.619098180788561, "grad_norm": 9.459532226223708, "learning_rate": 4.6061224746125067e-07, "loss": 0.6343, "step": 11436 }, { "epoch": 1.6192397536631982, "grad_norm": 8.819598862571908, "learning_rate": 4.602807638490711e-07, "loss": 0.7601, "step": 11437 }, { "epoch": 1.6193813265378354, "grad_norm": 6.619523573958432, "learning_rate": 4.5994938746341033e-07, "loss": 0.6275, "step": 11438 }, { "epoch": 1.6195228994124724, "grad_norm": 8.527484663717749, "learning_rate": 4.5961811832168965e-07, "loss": 0.6477, "step": 11439 }, { "epoch": 1.6196644722871096, "grad_norm": 7.962396220394752, "learning_rate": 4.592869564413227e-07, "loss": 0.6572, "step": 11440 }, { "epoch": 1.6198060451617469, "grad_norm": 9.425894163098045, "learning_rate": 4.5895590183971854e-07, "loss": 0.687, "step": 11441 }, { "epoch": 1.619947618036384, "grad_norm": 7.1917101926875135, "learning_rate": 4.5862495453428216e-07, "loss": 0.7021, "step": 11442 }, { "epoch": 1.6200891909110213, "grad_norm": 12.785266646139634, "learning_rate": 4.5829411454240856e-07, "loss": 0.7686, "step": 11443 }, { "epoch": 1.6202307637856586, "grad_norm": 9.00061965504624, "learning_rate": 4.579633818814916e-07, "loss": 0.6098, "step": 11444 }, { "epoch": 1.6203723366602958, "grad_norm": 9.498761146479634, "learning_rate": 4.576327565689165e-07, "loss": 0.654, "step": 11445 }, { "epoch": 1.620513909534933, "grad_norm": 7.7064028685320585, "learning_rate": 4.5730223862206493e-07, "loss": 0.6574, "step": 11446 }, { "epoch": 1.6206554824095702, "grad_norm": 9.146967951462283, "learning_rate": 4.569718280583113e-07, "loss": 0.7061, "step": 11447 }, { "epoch": 1.6207970552842075, "grad_norm": 8.999064926739003, "learning_rate": 4.566415248950251e-07, "loss": 0.693, "step": 11448 }, { "epoch": 1.6209386281588447, "grad_norm": 7.329992776115157, "learning_rate": 4.5631132914957076e-07, "loss": 0.7273, "step": 11449 }, { "epoch": 1.621080201033482, "grad_norm": 9.714233526522435, "learning_rate": 4.5598124083930577e-07, "loss": 0.7483, "step": 11450 }, { "epoch": 1.6212217739081192, "grad_norm": 10.647669763799296, "learning_rate": 4.556512599815832e-07, "loss": 0.6602, "step": 11451 }, { "epoch": 1.6213633467827564, "grad_norm": 9.301900476678265, "learning_rate": 4.553213865937492e-07, "loss": 0.6044, "step": 11452 }, { "epoch": 1.6215049196573936, "grad_norm": 7.8170579965505915, "learning_rate": 4.5499162069314567e-07, "loss": 0.6794, "step": 11453 }, { "epoch": 1.6216464925320309, "grad_norm": 9.588094529240946, "learning_rate": 4.5466196229710797e-07, "loss": 0.6437, "step": 11454 }, { "epoch": 1.621788065406668, "grad_norm": 10.38655788588293, "learning_rate": 4.5433241142296524e-07, "loss": 0.7143, "step": 11455 }, { "epoch": 1.6219296382813053, "grad_norm": 8.94410429545347, "learning_rate": 4.5400296808804313e-07, "loss": 0.6457, "step": 11456 }, { "epoch": 1.6220712111559425, "grad_norm": 10.000306696957004, "learning_rate": 4.536736323096586e-07, "loss": 0.573, "step": 11457 }, { "epoch": 1.6222127840305798, "grad_norm": 8.3188998030452, "learning_rate": 4.5334440410512605e-07, "loss": 0.6415, "step": 11458 }, { "epoch": 1.622354356905217, "grad_norm": 10.00998418206519, "learning_rate": 4.5301528349175144e-07, "loss": 0.6231, "step": 11459 }, { "epoch": 1.6224959297798542, "grad_norm": 9.032891675374113, "learning_rate": 4.526862704868376e-07, "loss": 0.5963, "step": 11460 }, { "epoch": 1.6226375026544915, "grad_norm": 11.569708479338406, "learning_rate": 4.5235736510767957e-07, "loss": 0.8147, "step": 11461 }, { "epoch": 1.6227790755291287, "grad_norm": 8.363112482241585, "learning_rate": 4.520285673715688e-07, "loss": 0.7044, "step": 11462 }, { "epoch": 1.622920648403766, "grad_norm": 9.564632003572825, "learning_rate": 4.5169987729578897e-07, "loss": 0.7062, "step": 11463 }, { "epoch": 1.6230622212784032, "grad_norm": 8.42123948567535, "learning_rate": 4.5137129489761874e-07, "loss": 0.6373, "step": 11464 }, { "epoch": 1.6232037941530404, "grad_norm": 11.56046817557475, "learning_rate": 4.510428201943326e-07, "loss": 0.806, "step": 11465 }, { "epoch": 1.6233453670276776, "grad_norm": 9.41217726195947, "learning_rate": 4.5071445320319706e-07, "loss": 0.603, "step": 11466 }, { "epoch": 1.6234869399023149, "grad_norm": 8.892499836644731, "learning_rate": 4.5038619394147554e-07, "loss": 0.5813, "step": 11467 }, { "epoch": 1.623628512776952, "grad_norm": 8.754317608027856, "learning_rate": 4.500580424264225e-07, "loss": 0.6344, "step": 11468 }, { "epoch": 1.6237700856515893, "grad_norm": 10.945446680677348, "learning_rate": 4.497299986752901e-07, "loss": 0.7388, "step": 11469 }, { "epoch": 1.6239116585262263, "grad_norm": 10.294733082301835, "learning_rate": 4.4940206270532333e-07, "loss": 0.7516, "step": 11470 }, { "epoch": 1.6240532314008636, "grad_norm": 9.521999385110202, "learning_rate": 4.4907423453376034e-07, "loss": 0.6498, "step": 11471 }, { "epoch": 1.6241948042755008, "grad_norm": 7.301787525956829, "learning_rate": 4.487465141778366e-07, "loss": 0.685, "step": 11472 }, { "epoch": 1.624336377150138, "grad_norm": 9.663006933813598, "learning_rate": 4.4841890165477825e-07, "loss": 0.7621, "step": 11473 }, { "epoch": 1.6244779500247752, "grad_norm": 8.837836023657301, "learning_rate": 4.480913969818099e-07, "loss": 0.7022, "step": 11474 }, { "epoch": 1.6246195228994125, "grad_norm": 9.779451540056652, "learning_rate": 4.4776400017614546e-07, "loss": 0.5978, "step": 11475 }, { "epoch": 1.6247610957740497, "grad_norm": 9.145390551253726, "learning_rate": 4.474367112549974e-07, "loss": 0.6771, "step": 11476 }, { "epoch": 1.624902668648687, "grad_norm": 9.582183326606076, "learning_rate": 4.471095302355716e-07, "loss": 0.7267, "step": 11477 }, { "epoch": 1.6250442415233242, "grad_norm": 8.833250033387698, "learning_rate": 4.467824571350665e-07, "loss": 0.5993, "step": 11478 }, { "epoch": 1.6251858143979614, "grad_norm": 8.942306399917193, "learning_rate": 4.4645549197067736e-07, "loss": 0.6711, "step": 11479 }, { "epoch": 1.6253273872725986, "grad_norm": 8.822260652464283, "learning_rate": 4.461286347595911e-07, "loss": 0.6852, "step": 11480 }, { "epoch": 1.6254689601472356, "grad_norm": 9.24863609362403, "learning_rate": 4.4580188551899164e-07, "loss": 0.6791, "step": 11481 }, { "epoch": 1.6256105330218729, "grad_norm": 9.085104066864712, "learning_rate": 4.4547524426605484e-07, "loss": 0.616, "step": 11482 }, { "epoch": 1.62575210589651, "grad_norm": 8.707752798585267, "learning_rate": 4.451487110179531e-07, "loss": 0.7745, "step": 11483 }, { "epoch": 1.6258936787711473, "grad_norm": 8.539512350198882, "learning_rate": 4.448222857918508e-07, "loss": 0.64, "step": 11484 }, { "epoch": 1.6260352516457846, "grad_norm": 9.546986081340261, "learning_rate": 4.444959686049094e-07, "loss": 0.6758, "step": 11485 }, { "epoch": 1.6261768245204218, "grad_norm": 8.363206901380174, "learning_rate": 4.441697594742819e-07, "loss": 0.7103, "step": 11486 }, { "epoch": 1.626318397395059, "grad_norm": 9.966503882806453, "learning_rate": 4.4384365841711684e-07, "loss": 0.6463, "step": 11487 }, { "epoch": 1.6264599702696962, "grad_norm": 9.712274583633363, "learning_rate": 4.4351766545055826e-07, "loss": 0.7125, "step": 11488 }, { "epoch": 1.6266015431443335, "grad_norm": 8.768532010860671, "learning_rate": 4.4319178059174186e-07, "loss": 0.618, "step": 11489 }, { "epoch": 1.6267431160189707, "grad_norm": 8.73250465635388, "learning_rate": 4.428660038578006e-07, "loss": 0.6391, "step": 11490 }, { "epoch": 1.626884688893608, "grad_norm": 8.007233210754277, "learning_rate": 4.4254033526585917e-07, "loss": 0.7029, "step": 11491 }, { "epoch": 1.6270262617682452, "grad_norm": 9.21371394819908, "learning_rate": 4.42214774833038e-07, "loss": 0.6958, "step": 11492 }, { "epoch": 1.6271678346428824, "grad_norm": 9.366643003748353, "learning_rate": 4.418893225764526e-07, "loss": 0.7206, "step": 11493 }, { "epoch": 1.6273094075175196, "grad_norm": 9.3346932192525, "learning_rate": 4.4156397851321003e-07, "loss": 0.6426, "step": 11494 }, { "epoch": 1.6274509803921569, "grad_norm": 8.919538469220605, "learning_rate": 4.412387426604156e-07, "loss": 0.7575, "step": 11495 }, { "epoch": 1.627592553266794, "grad_norm": 9.697973484046326, "learning_rate": 4.4091361503516424e-07, "loss": 0.7495, "step": 11496 }, { "epoch": 1.6277341261414313, "grad_norm": 8.326962388068333, "learning_rate": 4.405885956545494e-07, "loss": 0.7106, "step": 11497 }, { "epoch": 1.6278756990160685, "grad_norm": 7.756897594826869, "learning_rate": 4.402636845356559e-07, "loss": 0.6976, "step": 11498 }, { "epoch": 1.6280172718907058, "grad_norm": 9.2437009241376, "learning_rate": 4.3993888169556463e-07, "loss": 0.6776, "step": 11499 }, { "epoch": 1.628158844765343, "grad_norm": 11.046631547330637, "learning_rate": 4.3961418715135097e-07, "loss": 0.7905, "step": 11500 }, { "epoch": 1.6283004176399802, "grad_norm": 9.96527747059348, "learning_rate": 4.3928960092008254e-07, "loss": 0.7008, "step": 11501 }, { "epoch": 1.6284419905146175, "grad_norm": 9.651779374435236, "learning_rate": 4.389651230188241e-07, "loss": 0.6438, "step": 11502 }, { "epoch": 1.6285835633892547, "grad_norm": 9.976208422896462, "learning_rate": 4.386407534646314e-07, "loss": 0.7846, "step": 11503 }, { "epoch": 1.628725136263892, "grad_norm": 8.229301612487957, "learning_rate": 4.3831649227455806e-07, "loss": 0.7041, "step": 11504 }, { "epoch": 1.6288667091385292, "grad_norm": 8.942331995272367, "learning_rate": 4.3799233946564904e-07, "loss": 0.7351, "step": 11505 }, { "epoch": 1.6290082820131664, "grad_norm": 9.474026157387332, "learning_rate": 4.3766829505494574e-07, "loss": 0.6708, "step": 11506 }, { "epoch": 1.6291498548878036, "grad_norm": 7.862046745105043, "learning_rate": 4.3734435905948226e-07, "loss": 0.6301, "step": 11507 }, { "epoch": 1.6292914277624408, "grad_norm": 8.486172817551825, "learning_rate": 4.370205314962872e-07, "loss": 0.6563, "step": 11508 }, { "epoch": 1.629433000637078, "grad_norm": 9.446817207222741, "learning_rate": 4.366968123823856e-07, "loss": 0.6653, "step": 11509 }, { "epoch": 1.6295745735117153, "grad_norm": 9.283370575296521, "learning_rate": 4.36373201734793e-07, "loss": 0.6823, "step": 11510 }, { "epoch": 1.6297161463863525, "grad_norm": 9.120280260163378, "learning_rate": 4.360496995705235e-07, "loss": 0.614, "step": 11511 }, { "epoch": 1.6298577192609895, "grad_norm": 8.95222465954583, "learning_rate": 4.3572630590658136e-07, "loss": 0.6769, "step": 11512 }, { "epoch": 1.6299992921356268, "grad_norm": 10.042682348356937, "learning_rate": 4.354030207599691e-07, "loss": 0.6894, "step": 11513 }, { "epoch": 1.630140865010264, "grad_norm": 8.723575473564079, "learning_rate": 4.3507984414767974e-07, "loss": 0.6123, "step": 11514 }, { "epoch": 1.6302824378849012, "grad_norm": 9.291006797544156, "learning_rate": 4.347567760867036e-07, "loss": 0.6782, "step": 11515 }, { "epoch": 1.6304240107595385, "grad_norm": 8.897647812578315, "learning_rate": 4.344338165940248e-07, "loss": 0.6738, "step": 11516 }, { "epoch": 1.6305655836341757, "grad_norm": 9.245121262097047, "learning_rate": 4.341109656866188e-07, "loss": 0.6181, "step": 11517 }, { "epoch": 1.630707156508813, "grad_norm": 8.681680829842225, "learning_rate": 4.337882233814597e-07, "loss": 0.5501, "step": 11518 }, { "epoch": 1.6308487293834502, "grad_norm": 10.064499934698539, "learning_rate": 4.3346558969551253e-07, "loss": 0.704, "step": 11519 }, { "epoch": 1.6309903022580874, "grad_norm": 11.334277020228907, "learning_rate": 4.331430646457391e-07, "loss": 0.7895, "step": 11520 }, { "epoch": 1.6311318751327246, "grad_norm": 9.9397783846491, "learning_rate": 4.3282064824909265e-07, "loss": 0.6505, "step": 11521 }, { "epoch": 1.6312734480073616, "grad_norm": 11.244298210583672, "learning_rate": 4.324983405225236e-07, "loss": 0.7948, "step": 11522 }, { "epoch": 1.6314150208819989, "grad_norm": 7.6475925621773495, "learning_rate": 4.321761414829759e-07, "loss": 0.7356, "step": 11523 }, { "epoch": 1.631556593756636, "grad_norm": 10.004729106387014, "learning_rate": 4.3185405114738593e-07, "loss": 0.6943, "step": 11524 }, { "epoch": 1.6316981666312733, "grad_norm": 10.08879076181306, "learning_rate": 4.3153206953268715e-07, "loss": 0.7024, "step": 11525 }, { "epoch": 1.6318397395059105, "grad_norm": 10.342525167639755, "learning_rate": 4.312101966558044e-07, "loss": 0.7503, "step": 11526 }, { "epoch": 1.6319813123805478, "grad_norm": 10.117137352071662, "learning_rate": 4.308884325336596e-07, "loss": 0.736, "step": 11527 }, { "epoch": 1.632122885255185, "grad_norm": 9.044976491481076, "learning_rate": 4.305667771831673e-07, "loss": 0.7524, "step": 11528 }, { "epoch": 1.6322644581298222, "grad_norm": 9.12242738175806, "learning_rate": 4.302452306212357e-07, "loss": 0.7279, "step": 11529 }, { "epoch": 1.6324060310044595, "grad_norm": 9.816726843277618, "learning_rate": 4.2992379286476984e-07, "loss": 0.7049, "step": 11530 }, { "epoch": 1.6325476038790967, "grad_norm": 9.271766415620075, "learning_rate": 4.296024639306659e-07, "loss": 0.6448, "step": 11531 }, { "epoch": 1.632689176753734, "grad_norm": 7.712234279809575, "learning_rate": 4.292812438358174e-07, "loss": 0.6454, "step": 11532 }, { "epoch": 1.6328307496283712, "grad_norm": 9.58894869168246, "learning_rate": 4.2896013259710905e-07, "loss": 0.681, "step": 11533 }, { "epoch": 1.6329723225030084, "grad_norm": 8.754401707448151, "learning_rate": 4.286391302314233e-07, "loss": 0.6994, "step": 11534 }, { "epoch": 1.6331138953776456, "grad_norm": 7.709427984370268, "learning_rate": 4.2831823675563324e-07, "loss": 0.7075, "step": 11535 }, { "epoch": 1.6332554682522828, "grad_norm": 8.63938169174305, "learning_rate": 4.279974521866093e-07, "loss": 0.6691, "step": 11536 }, { "epoch": 1.63339704112692, "grad_norm": 9.295742036401624, "learning_rate": 4.2767677654121375e-07, "loss": 0.8074, "step": 11537 }, { "epoch": 1.6335386140015573, "grad_norm": 7.5337434178816185, "learning_rate": 4.2735620983630543e-07, "loss": 0.6122, "step": 11538 }, { "epoch": 1.6336801868761945, "grad_norm": 10.297381421893103, "learning_rate": 4.2703575208873585e-07, "loss": 0.6645, "step": 11539 }, { "epoch": 1.6338217597508318, "grad_norm": 8.029471946446387, "learning_rate": 4.267154033153503e-07, "loss": 0.6994, "step": 11540 }, { "epoch": 1.633963332625469, "grad_norm": 8.87694173713516, "learning_rate": 4.26395163532991e-07, "loss": 0.7059, "step": 11541 }, { "epoch": 1.6341049055001062, "grad_norm": 9.108764837385635, "learning_rate": 4.2607503275849116e-07, "loss": 0.7363, "step": 11542 }, { "epoch": 1.6342464783747435, "grad_norm": 9.173160911240771, "learning_rate": 4.2575501100868085e-07, "loss": 0.6928, "step": 11543 }, { "epoch": 1.6343880512493807, "grad_norm": 8.824209671725077, "learning_rate": 4.2543509830038243e-07, "loss": 0.6975, "step": 11544 }, { "epoch": 1.634529624124018, "grad_norm": 10.084308377457532, "learning_rate": 4.2511529465041417e-07, "loss": 0.7597, "step": 11545 }, { "epoch": 1.6346711969986552, "grad_norm": 9.654048137790559, "learning_rate": 4.2479560007558845e-07, "loss": 0.6962, "step": 11546 }, { "epoch": 1.6348127698732924, "grad_norm": 10.598318549614442, "learning_rate": 4.2447601459270987e-07, "loss": 0.6557, "step": 11547 }, { "epoch": 1.6349543427479296, "grad_norm": 8.398949384836355, "learning_rate": 4.241565382185808e-07, "loss": 0.6915, "step": 11548 }, { "epoch": 1.6350959156225668, "grad_norm": 8.925153834622666, "learning_rate": 4.238371709699937e-07, "loss": 0.7228, "step": 11549 }, { "epoch": 1.635237488497204, "grad_norm": 8.838155857553224, "learning_rate": 4.2351791286373847e-07, "loss": 0.6924, "step": 11550 }, { "epoch": 1.6353790613718413, "grad_norm": 9.507247569494352, "learning_rate": 4.231987639165988e-07, "loss": 0.6871, "step": 11551 }, { "epoch": 1.6355206342464785, "grad_norm": 9.30542337757073, "learning_rate": 4.2287972414535084e-07, "loss": 0.6682, "step": 11552 }, { "epoch": 1.6356622071211155, "grad_norm": 9.46589844322251, "learning_rate": 4.2256079356676776e-07, "loss": 0.7545, "step": 11553 }, { "epoch": 1.6358037799957528, "grad_norm": 8.734401204678937, "learning_rate": 4.222419721976143e-07, "loss": 0.7151, "step": 11554 }, { "epoch": 1.63594535287039, "grad_norm": 9.956085674991625, "learning_rate": 4.2192326005465134e-07, "loss": 0.7197, "step": 11555 }, { "epoch": 1.6360869257450272, "grad_norm": 9.443299791370972, "learning_rate": 4.216046571546328e-07, "loss": 0.663, "step": 11556 }, { "epoch": 1.6362284986196645, "grad_norm": 10.11885204393323, "learning_rate": 4.212861635143084e-07, "loss": 0.707, "step": 11557 }, { "epoch": 1.6363700714943017, "grad_norm": 10.376133868292872, "learning_rate": 4.2096777915041964e-07, "loss": 0.6965, "step": 11558 }, { "epoch": 1.636511644368939, "grad_norm": 10.400522483025643, "learning_rate": 4.206495040797051e-07, "loss": 0.7424, "step": 11559 }, { "epoch": 1.6366532172435762, "grad_norm": 8.727290301594234, "learning_rate": 4.203313383188959e-07, "loss": 0.6773, "step": 11560 }, { "epoch": 1.6367947901182134, "grad_norm": 10.536726526582916, "learning_rate": 4.200132818847169e-07, "loss": 0.5392, "step": 11561 }, { "epoch": 1.6369363629928506, "grad_norm": 9.593575441841315, "learning_rate": 4.1969533479388925e-07, "loss": 0.7417, "step": 11562 }, { "epoch": 1.6370779358674878, "grad_norm": 10.6445516771593, "learning_rate": 4.193774970631262e-07, "loss": 0.7336, "step": 11563 }, { "epoch": 1.6372195087421249, "grad_norm": 8.934118951822418, "learning_rate": 4.1905976870913747e-07, "loss": 0.6944, "step": 11564 }, { "epoch": 1.637361081616762, "grad_norm": 8.84212842976492, "learning_rate": 4.1874214974862436e-07, "loss": 0.6934, "step": 11565 }, { "epoch": 1.6375026544913993, "grad_norm": 8.844074364634634, "learning_rate": 4.1842464019828444e-07, "loss": 0.6412, "step": 11566 }, { "epoch": 1.6376442273660365, "grad_norm": 8.885276754573889, "learning_rate": 4.1810724007480987e-07, "loss": 0.7173, "step": 11567 }, { "epoch": 1.6377858002406738, "grad_norm": 10.057463528735063, "learning_rate": 4.1778994939488476e-07, "loss": 0.5894, "step": 11568 }, { "epoch": 1.637927373115311, "grad_norm": 8.792442492442595, "learning_rate": 4.174727681751906e-07, "loss": 0.731, "step": 11569 }, { "epoch": 1.6380689459899482, "grad_norm": 8.727181900158666, "learning_rate": 4.1715569643239916e-07, "loss": 0.6604, "step": 11570 }, { "epoch": 1.6382105188645855, "grad_norm": 8.347586332036391, "learning_rate": 4.1683873418318007e-07, "loss": 0.6945, "step": 11571 }, { "epoch": 1.6383520917392227, "grad_norm": 9.87123132571323, "learning_rate": 4.1652188144419516e-07, "loss": 0.74, "step": 11572 }, { "epoch": 1.63849366461386, "grad_norm": 8.408573099075728, "learning_rate": 4.1620513823210115e-07, "loss": 0.7346, "step": 11573 }, { "epoch": 1.6386352374884972, "grad_norm": 7.642006840790415, "learning_rate": 4.1588850456354995e-07, "loss": 0.7438, "step": 11574 }, { "epoch": 1.6387768103631344, "grad_norm": 8.6255484144035, "learning_rate": 4.1557198045518554e-07, "loss": 0.7651, "step": 11575 }, { "epoch": 1.6389183832377716, "grad_norm": 8.652569480689772, "learning_rate": 4.152555659236485e-07, "loss": 0.6253, "step": 11576 }, { "epoch": 1.6390599561124088, "grad_norm": 8.217565879081882, "learning_rate": 4.1493926098557127e-07, "loss": 0.7678, "step": 11577 }, { "epoch": 1.639201528987046, "grad_norm": 8.299040718255437, "learning_rate": 4.146230656575831e-07, "loss": 0.6932, "step": 11578 }, { "epoch": 1.6393431018616833, "grad_norm": 8.846484728592632, "learning_rate": 4.1430697995630486e-07, "loss": 0.6102, "step": 11579 }, { "epoch": 1.6394846747363205, "grad_norm": 10.347768688635057, "learning_rate": 4.139910038983541e-07, "loss": 0.6747, "step": 11580 }, { "epoch": 1.6396262476109578, "grad_norm": 7.951189626300499, "learning_rate": 4.136751375003406e-07, "loss": 0.6697, "step": 11581 }, { "epoch": 1.639767820485595, "grad_norm": 9.119989979387098, "learning_rate": 4.133593807788691e-07, "loss": 0.6706, "step": 11582 }, { "epoch": 1.6399093933602322, "grad_norm": 10.213481326446143, "learning_rate": 4.1304373375053995e-07, "loss": 0.6586, "step": 11583 }, { "epoch": 1.6400509662348695, "grad_norm": 9.242668339272223, "learning_rate": 4.127281964319446e-07, "loss": 0.6503, "step": 11584 }, { "epoch": 1.6401925391095067, "grad_norm": 8.576510584933512, "learning_rate": 4.1241276883967256e-07, "loss": 0.6705, "step": 11585 }, { "epoch": 1.640334111984144, "grad_norm": 8.336438579568494, "learning_rate": 4.120974509903039e-07, "loss": 0.6615, "step": 11586 }, { "epoch": 1.6404756848587811, "grad_norm": 7.865321675856447, "learning_rate": 4.117822429004159e-07, "loss": 0.6306, "step": 11587 }, { "epoch": 1.6406172577334184, "grad_norm": 8.703027693856837, "learning_rate": 4.114671445865781e-07, "loss": 0.6537, "step": 11588 }, { "epoch": 1.6407588306080556, "grad_norm": 7.860268998918012, "learning_rate": 4.11152156065355e-07, "loss": 0.6043, "step": 11589 }, { "epoch": 1.6409004034826928, "grad_norm": 10.373854976113252, "learning_rate": 4.1083727735330677e-07, "loss": 0.6454, "step": 11590 }, { "epoch": 1.64104197635733, "grad_norm": 9.728939688170273, "learning_rate": 4.105225084669839e-07, "loss": 0.6955, "step": 11591 }, { "epoch": 1.6411835492319673, "grad_norm": 8.752862516564893, "learning_rate": 4.1020784942293557e-07, "loss": 0.6753, "step": 11592 }, { "epoch": 1.6413251221066045, "grad_norm": 8.787560201468528, "learning_rate": 4.0989330023770146e-07, "loss": 0.7056, "step": 11593 }, { "epoch": 1.6414666949812418, "grad_norm": 9.77421809432697, "learning_rate": 4.0957886092781897e-07, "loss": 0.6746, "step": 11594 }, { "epoch": 1.6416082678558788, "grad_norm": 10.413809748198767, "learning_rate": 4.092645315098165e-07, "loss": 0.643, "step": 11595 }, { "epoch": 1.641749840730516, "grad_norm": 8.82785320032367, "learning_rate": 4.0895031200021836e-07, "loss": 0.6962, "step": 11596 }, { "epoch": 1.6418914136051532, "grad_norm": 9.39959907488371, "learning_rate": 4.0863620241554407e-07, "loss": 0.6817, "step": 11597 }, { "epoch": 1.6420329864797905, "grad_norm": 10.02399845590298, "learning_rate": 4.0832220277230467e-07, "loss": 0.7854, "step": 11598 }, { "epoch": 1.6421745593544277, "grad_norm": 9.341060800043921, "learning_rate": 4.0800831308700773e-07, "loss": 0.6178, "step": 11599 }, { "epoch": 1.642316132229065, "grad_norm": 9.989496818753912, "learning_rate": 4.0769453337615367e-07, "loss": 0.6786, "step": 11600 }, { "epoch": 1.6424577051037021, "grad_norm": 8.37484285577268, "learning_rate": 4.073808636562382e-07, "loss": 0.6382, "step": 11601 }, { "epoch": 1.6425992779783394, "grad_norm": 9.105087082439606, "learning_rate": 4.070673039437506e-07, "loss": 0.7091, "step": 11602 }, { "epoch": 1.6427408508529766, "grad_norm": 8.405860636603023, "learning_rate": 4.0675385425517356e-07, "loss": 0.6989, "step": 11603 }, { "epoch": 1.6428824237276138, "grad_norm": 9.23247249001093, "learning_rate": 4.0644051460698634e-07, "loss": 0.7005, "step": 11604 }, { "epoch": 1.6430239966022508, "grad_norm": 10.462320568227838, "learning_rate": 4.0612728501565973e-07, "loss": 0.6469, "step": 11605 }, { "epoch": 1.643165569476888, "grad_norm": 8.633333023051646, "learning_rate": 4.058141654976608e-07, "loss": 0.675, "step": 11606 }, { "epoch": 1.6433071423515253, "grad_norm": 11.551244917984963, "learning_rate": 4.055011560694494e-07, "loss": 0.7031, "step": 11607 }, { "epoch": 1.6434487152261625, "grad_norm": 8.935063810371483, "learning_rate": 4.0518825674748076e-07, "loss": 0.7411, "step": 11608 }, { "epoch": 1.6435902881007998, "grad_norm": 9.810517128973917, "learning_rate": 4.0487546754820304e-07, "loss": 0.7295, "step": 11609 }, { "epoch": 1.643731860975437, "grad_norm": 8.341132164089512, "learning_rate": 4.0456278848806067e-07, "loss": 0.624, "step": 11610 }, { "epoch": 1.6438734338500742, "grad_norm": 10.225777265857005, "learning_rate": 4.042502195834891e-07, "loss": 0.745, "step": 11611 }, { "epoch": 1.6440150067247115, "grad_norm": 10.773122715102987, "learning_rate": 4.039377608509218e-07, "loss": 0.7635, "step": 11612 }, { "epoch": 1.6441565795993487, "grad_norm": 9.458909968025022, "learning_rate": 4.0362541230678316e-07, "loss": 0.6626, "step": 11613 }, { "epoch": 1.644298152473986, "grad_norm": 9.347562388527924, "learning_rate": 4.033131739674931e-07, "loss": 0.6697, "step": 11614 }, { "epoch": 1.6444397253486231, "grad_norm": 9.932765384626537, "learning_rate": 4.0300104584946655e-07, "loss": 0.6771, "step": 11615 }, { "epoch": 1.6445812982232604, "grad_norm": 10.720067068431062, "learning_rate": 4.026890279691109e-07, "loss": 0.672, "step": 11616 }, { "epoch": 1.6447228710978976, "grad_norm": 9.02175055253838, "learning_rate": 4.0237712034283004e-07, "loss": 0.6939, "step": 11617 }, { "epoch": 1.6448644439725348, "grad_norm": 8.455222724494062, "learning_rate": 4.020653229870192e-07, "loss": 0.7196, "step": 11618 }, { "epoch": 1.645006016847172, "grad_norm": 8.313314527027892, "learning_rate": 4.0175363591806985e-07, "loss": 0.6715, "step": 11619 }, { "epoch": 1.6451475897218093, "grad_norm": 6.938796188648822, "learning_rate": 4.0144205915236797e-07, "loss": 0.6958, "step": 11620 }, { "epoch": 1.6452891625964465, "grad_norm": 7.619461799898808, "learning_rate": 4.0113059270629193e-07, "loss": 0.5618, "step": 11621 }, { "epoch": 1.6454307354710838, "grad_norm": 9.230262524806944, "learning_rate": 4.008192365962166e-07, "loss": 0.8625, "step": 11622 }, { "epoch": 1.645572308345721, "grad_norm": 7.974516812697563, "learning_rate": 4.0050799083850787e-07, "loss": 0.6477, "step": 11623 }, { "epoch": 1.6457138812203582, "grad_norm": 8.891771829180449, "learning_rate": 4.0019685544952835e-07, "loss": 0.6359, "step": 11624 }, { "epoch": 1.6458554540949955, "grad_norm": 7.892276039752188, "learning_rate": 3.998858304456352e-07, "loss": 0.7353, "step": 11625 }, { "epoch": 1.6459970269696327, "grad_norm": 9.64396048394631, "learning_rate": 3.995749158431772e-07, "loss": 0.7064, "step": 11626 }, { "epoch": 1.64613859984427, "grad_norm": 9.538723776362238, "learning_rate": 3.9926411165850054e-07, "loss": 0.7567, "step": 11627 }, { "epoch": 1.6462801727189071, "grad_norm": 8.53885476574284, "learning_rate": 3.989534179079427e-07, "loss": 0.6875, "step": 11628 }, { "epoch": 1.6464217455935444, "grad_norm": 7.529918252164318, "learning_rate": 3.986428346078375e-07, "loss": 0.6571, "step": 11629 }, { "epoch": 1.6465633184681816, "grad_norm": 9.07881579503816, "learning_rate": 3.983323617745111e-07, "loss": 0.6776, "step": 11630 }, { "epoch": 1.6467048913428188, "grad_norm": 8.850454424382198, "learning_rate": 3.980219994242859e-07, "loss": 0.6537, "step": 11631 }, { "epoch": 1.646846464217456, "grad_norm": 9.063815317398637, "learning_rate": 3.9771174757347626e-07, "loss": 0.7063, "step": 11632 }, { "epoch": 1.6469880370920933, "grad_norm": 8.267975615370862, "learning_rate": 3.9740160623839314e-07, "loss": 0.6531, "step": 11633 }, { "epoch": 1.6471296099667305, "grad_norm": 8.098520176719594, "learning_rate": 3.9709157543533996e-07, "loss": 0.6827, "step": 11634 }, { "epoch": 1.6472711828413678, "grad_norm": 10.523020984588497, "learning_rate": 3.967816551806139e-07, "loss": 0.5753, "step": 11635 }, { "epoch": 1.6474127557160048, "grad_norm": 10.47899888942686, "learning_rate": 3.9647184549050865e-07, "loss": 0.7089, "step": 11636 }, { "epoch": 1.647554328590642, "grad_norm": 8.447368361008207, "learning_rate": 3.9616214638130953e-07, "loss": 0.7248, "step": 11637 }, { "epoch": 1.6476959014652792, "grad_norm": 9.951591050114095, "learning_rate": 3.9585255786929816e-07, "loss": 0.6867, "step": 11638 }, { "epoch": 1.6478374743399165, "grad_norm": 9.751251531629439, "learning_rate": 3.9554307997074826e-07, "loss": 0.7026, "step": 11639 }, { "epoch": 1.6479790472145537, "grad_norm": 7.688288454390233, "learning_rate": 3.952337127019301e-07, "loss": 0.69, "step": 11640 }, { "epoch": 1.648120620089191, "grad_norm": 9.27268345120389, "learning_rate": 3.9492445607910574e-07, "loss": 0.7283, "step": 11641 }, { "epoch": 1.6482621929638281, "grad_norm": 8.933300392128102, "learning_rate": 3.946153101185332e-07, "loss": 0.7146, "step": 11642 }, { "epoch": 1.6484037658384654, "grad_norm": 8.174665102383878, "learning_rate": 3.943062748364651e-07, "loss": 0.6774, "step": 11643 }, { "epoch": 1.6485453387131026, "grad_norm": 10.184885262848217, "learning_rate": 3.939973502491448e-07, "loss": 0.6675, "step": 11644 }, { "epoch": 1.6486869115877398, "grad_norm": 9.782989639752456, "learning_rate": 3.9368853637281404e-07, "loss": 0.6405, "step": 11645 }, { "epoch": 1.6488284844623768, "grad_norm": 9.029492269881816, "learning_rate": 3.93379833223706e-07, "loss": 0.6748, "step": 11646 }, { "epoch": 1.648970057337014, "grad_norm": 8.232264557734716, "learning_rate": 3.9307124081804924e-07, "loss": 0.6537, "step": 11647 }, { "epoch": 1.6491116302116513, "grad_norm": 9.13249045425029, "learning_rate": 3.92762759172067e-07, "loss": 0.6986, "step": 11648 }, { "epoch": 1.6492532030862885, "grad_norm": 10.655781755199996, "learning_rate": 3.9245438830197464e-07, "loss": 0.728, "step": 11649 }, { "epoch": 1.6493947759609258, "grad_norm": 9.212497051761316, "learning_rate": 3.9214612822398443e-07, "loss": 0.7796, "step": 11650 }, { "epoch": 1.649536348835563, "grad_norm": 10.41979941499033, "learning_rate": 3.9183797895429973e-07, "loss": 0.6006, "step": 11651 }, { "epoch": 1.6496779217102002, "grad_norm": 7.013765152279347, "learning_rate": 3.9152994050912134e-07, "loss": 0.6422, "step": 11652 }, { "epoch": 1.6498194945848375, "grad_norm": 8.46601491306865, "learning_rate": 3.9122201290464095e-07, "loss": 0.6598, "step": 11653 }, { "epoch": 1.6499610674594747, "grad_norm": 9.313891825258503, "learning_rate": 3.909141961570478e-07, "loss": 0.6232, "step": 11654 }, { "epoch": 1.650102640334112, "grad_norm": 9.38403942462155, "learning_rate": 3.9060649028252265e-07, "loss": 0.6889, "step": 11655 }, { "epoch": 1.6502442132087491, "grad_norm": 9.13877351708009, "learning_rate": 3.9029889529724113e-07, "loss": 0.6936, "step": 11656 }, { "epoch": 1.6503857860833864, "grad_norm": 9.059179500116555, "learning_rate": 3.899914112173739e-07, "loss": 0.7319, "step": 11657 }, { "epoch": 1.6505273589580236, "grad_norm": 9.791171273760325, "learning_rate": 3.896840380590844e-07, "loss": 0.7563, "step": 11658 }, { "epoch": 1.6506689318326608, "grad_norm": 8.975991757992583, "learning_rate": 3.8937677583853224e-07, "loss": 0.5967, "step": 11659 }, { "epoch": 1.650810504707298, "grad_norm": 9.295918493993623, "learning_rate": 3.890696245718686e-07, "loss": 0.5858, "step": 11660 }, { "epoch": 1.6509520775819353, "grad_norm": 8.847707125957479, "learning_rate": 3.887625842752413e-07, "loss": 0.5973, "step": 11661 }, { "epoch": 1.6510936504565725, "grad_norm": 9.337637953141325, "learning_rate": 3.8845565496479026e-07, "loss": 0.6708, "step": 11662 }, { "epoch": 1.6512352233312098, "grad_norm": 10.622249471646326, "learning_rate": 3.8814883665665076e-07, "loss": 0.688, "step": 11663 }, { "epoch": 1.651376796205847, "grad_norm": 7.801006682552609, "learning_rate": 3.878421293669532e-07, "loss": 0.6397, "step": 11664 }, { "epoch": 1.6515183690804842, "grad_norm": 8.169233372928405, "learning_rate": 3.8753553311181966e-07, "loss": 0.6578, "step": 11665 }, { "epoch": 1.6516599419551214, "grad_norm": 8.79904251091485, "learning_rate": 3.8722904790736815e-07, "loss": 0.6642, "step": 11666 }, { "epoch": 1.6518015148297587, "grad_norm": 9.188591094331498, "learning_rate": 3.869226737697099e-07, "loss": 0.6648, "step": 11667 }, { "epoch": 1.651943087704396, "grad_norm": 9.797001157979574, "learning_rate": 3.8661641071495145e-07, "loss": 0.7234, "step": 11668 }, { "epoch": 1.6520846605790331, "grad_norm": 8.03173114211128, "learning_rate": 3.863102587591919e-07, "loss": 0.6414, "step": 11669 }, { "epoch": 1.6522262334536704, "grad_norm": 11.392145335352637, "learning_rate": 3.860042179185261e-07, "loss": 0.6717, "step": 11670 }, { "epoch": 1.6523678063283076, "grad_norm": 10.574811453074064, "learning_rate": 3.8569828820904265e-07, "loss": 0.7108, "step": 11671 }, { "epoch": 1.6525093792029448, "grad_norm": 9.995498025307482, "learning_rate": 3.8539246964682336e-07, "loss": 0.6895, "step": 11672 }, { "epoch": 1.652650952077582, "grad_norm": 7.946444540482299, "learning_rate": 3.850867622479457e-07, "loss": 0.7669, "step": 11673 }, { "epoch": 1.6527925249522193, "grad_norm": 9.131399758407884, "learning_rate": 3.847811660284795e-07, "loss": 0.7089, "step": 11674 }, { "epoch": 1.6529340978268565, "grad_norm": 9.617851079487924, "learning_rate": 3.844756810044914e-07, "loss": 0.6793, "step": 11675 }, { "epoch": 1.6530756707014937, "grad_norm": 8.350225443280754, "learning_rate": 3.841703071920383e-07, "loss": 0.657, "step": 11676 }, { "epoch": 1.653217243576131, "grad_norm": 9.659512953080123, "learning_rate": 3.8386504460717426e-07, "loss": 0.6972, "step": 11677 }, { "epoch": 1.653358816450768, "grad_norm": 9.253313450567484, "learning_rate": 3.835598932659476e-07, "loss": 0.7481, "step": 11678 }, { "epoch": 1.6535003893254052, "grad_norm": 10.206845877095875, "learning_rate": 3.8325485318439883e-07, "loss": 0.6647, "step": 11679 }, { "epoch": 1.6536419622000424, "grad_norm": 10.910642247723665, "learning_rate": 3.829499243785645e-07, "loss": 0.6364, "step": 11680 }, { "epoch": 1.6537835350746797, "grad_norm": 10.874350692833234, "learning_rate": 3.8264510686447376e-07, "loss": 0.8345, "step": 11681 }, { "epoch": 1.653925107949317, "grad_norm": 8.727335759860052, "learning_rate": 3.823404006581513e-07, "loss": 0.6603, "step": 11682 }, { "epoch": 1.6540666808239541, "grad_norm": 11.03399330155275, "learning_rate": 3.820358057756146e-07, "loss": 0.7102, "step": 11683 }, { "epoch": 1.6542082536985914, "grad_norm": 7.448139434541821, "learning_rate": 3.8173132223287693e-07, "loss": 0.6933, "step": 11684 }, { "epoch": 1.6543498265732286, "grad_norm": 9.85214872983181, "learning_rate": 3.814269500459436e-07, "loss": 0.7154, "step": 11685 }, { "epoch": 1.6544913994478658, "grad_norm": 8.222664600648967, "learning_rate": 3.8112268923081645e-07, "loss": 0.6868, "step": 11686 }, { "epoch": 1.654632972322503, "grad_norm": 9.983092701792271, "learning_rate": 3.808185398034897e-07, "loss": 0.7473, "step": 11687 }, { "epoch": 1.65477454519714, "grad_norm": 10.764718204394368, "learning_rate": 3.8051450177995136e-07, "loss": 0.6845, "step": 11688 }, { "epoch": 1.6549161180717773, "grad_norm": 8.729158266438256, "learning_rate": 3.802105751761859e-07, "loss": 0.585, "step": 11689 }, { "epoch": 1.6550576909464145, "grad_norm": 8.057260156104215, "learning_rate": 3.799067600081696e-07, "loss": 0.6837, "step": 11690 }, { "epoch": 1.6551992638210518, "grad_norm": 9.740181698837409, "learning_rate": 3.7960305629187454e-07, "loss": 0.7099, "step": 11691 }, { "epoch": 1.655340836695689, "grad_norm": 9.823120201378106, "learning_rate": 3.792994640432651e-07, "loss": 0.6285, "step": 11692 }, { "epoch": 1.6554824095703262, "grad_norm": 8.720836758844536, "learning_rate": 3.789959832783016e-07, "loss": 0.599, "step": 11693 }, { "epoch": 1.6556239824449634, "grad_norm": 8.758070793052159, "learning_rate": 3.786926140129385e-07, "loss": 0.6113, "step": 11694 }, { "epoch": 1.6557655553196007, "grad_norm": 9.269792568350917, "learning_rate": 3.7838935626312246e-07, "loss": 0.6214, "step": 11695 }, { "epoch": 1.655907128194238, "grad_norm": 8.713604027129163, "learning_rate": 3.780862100447971e-07, "loss": 0.6512, "step": 11696 }, { "epoch": 1.6560487010688751, "grad_norm": 8.435694070332458, "learning_rate": 3.7778317537389613e-07, "loss": 0.6188, "step": 11697 }, { "epoch": 1.6561902739435124, "grad_norm": 8.415654137267374, "learning_rate": 3.774802522663515e-07, "loss": 0.6974, "step": 11698 }, { "epoch": 1.6563318468181496, "grad_norm": 11.548855705879886, "learning_rate": 3.771774407380879e-07, "loss": 0.7371, "step": 11699 }, { "epoch": 1.6564734196927868, "grad_norm": 9.727104104524322, "learning_rate": 3.768747408050227e-07, "loss": 0.6617, "step": 11700 }, { "epoch": 1.656614992567424, "grad_norm": 10.126307273583171, "learning_rate": 3.765721524830701e-07, "loss": 0.6863, "step": 11701 }, { "epoch": 1.6567565654420613, "grad_norm": 8.927142777721667, "learning_rate": 3.762696757881354e-07, "loss": 0.6073, "step": 11702 }, { "epoch": 1.6568981383166985, "grad_norm": 8.028172478511319, "learning_rate": 3.7596731073612085e-07, "loss": 0.572, "step": 11703 }, { "epoch": 1.6570397111913358, "grad_norm": 8.906175044229538, "learning_rate": 3.756650573429205e-07, "loss": 0.7394, "step": 11704 }, { "epoch": 1.657181284065973, "grad_norm": 8.976610945528542, "learning_rate": 3.7536291562442483e-07, "loss": 0.6765, "step": 11705 }, { "epoch": 1.6573228569406102, "grad_norm": 8.934023734596412, "learning_rate": 3.750608855965157e-07, "loss": 0.768, "step": 11706 }, { "epoch": 1.6574644298152474, "grad_norm": 9.840913684883143, "learning_rate": 3.747589672750723e-07, "loss": 0.7413, "step": 11707 }, { "epoch": 1.6576060026898847, "grad_norm": 8.246891245188149, "learning_rate": 3.7445716067596506e-07, "loss": 0.7684, "step": 11708 }, { "epoch": 1.657747575564522, "grad_norm": 9.437152780372777, "learning_rate": 3.7415546581505954e-07, "loss": 0.6682, "step": 11709 }, { "epoch": 1.6578891484391591, "grad_norm": 9.427570298124232, "learning_rate": 3.7385388270821666e-07, "loss": 0.6039, "step": 11710 }, { "epoch": 1.6580307213137964, "grad_norm": 9.292225317704068, "learning_rate": 3.735524113712891e-07, "loss": 0.7476, "step": 11711 }, { "epoch": 1.6581722941884336, "grad_norm": 7.752236566530156, "learning_rate": 3.7325105182012656e-07, "loss": 0.6302, "step": 11712 }, { "epoch": 1.6583138670630708, "grad_norm": 9.273768225463119, "learning_rate": 3.729498040705698e-07, "loss": 0.6698, "step": 11713 }, { "epoch": 1.658455439937708, "grad_norm": 10.129403746098031, "learning_rate": 3.726486681384564e-07, "loss": 0.6262, "step": 11714 }, { "epoch": 1.6585970128123453, "grad_norm": 10.406038482624187, "learning_rate": 3.723476440396157e-07, "loss": 0.7152, "step": 11715 }, { "epoch": 1.6587385856869825, "grad_norm": 8.589372997559888, "learning_rate": 3.7204673178987294e-07, "loss": 0.7606, "step": 11716 }, { "epoch": 1.6588801585616197, "grad_norm": 8.957431150443826, "learning_rate": 3.717459314050473e-07, "loss": 0.7139, "step": 11717 }, { "epoch": 1.659021731436257, "grad_norm": 9.965204738536837, "learning_rate": 3.714452429009513e-07, "loss": 0.6517, "step": 11718 }, { "epoch": 1.659163304310894, "grad_norm": 8.046321753584493, "learning_rate": 3.711446662933915e-07, "loss": 0.5939, "step": 11719 }, { "epoch": 1.6593048771855312, "grad_norm": 10.517949701827623, "learning_rate": 3.708442015981689e-07, "loss": 0.6123, "step": 11720 }, { "epoch": 1.6594464500601684, "grad_norm": 8.849753993866333, "learning_rate": 3.705438488310792e-07, "loss": 0.6308, "step": 11721 }, { "epoch": 1.6595880229348057, "grad_norm": 8.49099299111109, "learning_rate": 3.7024360800791195e-07, "loss": 0.749, "step": 11722 }, { "epoch": 1.659729595809443, "grad_norm": 9.495762331603544, "learning_rate": 3.699434791444495e-07, "loss": 0.655, "step": 11723 }, { "epoch": 1.6598711686840801, "grad_norm": 10.98265372582274, "learning_rate": 3.6964346225647097e-07, "loss": 0.6705, "step": 11724 }, { "epoch": 1.6600127415587174, "grad_norm": 9.281671687863618, "learning_rate": 3.6934355735974647e-07, "loss": 0.598, "step": 11725 }, { "epoch": 1.6601543144333546, "grad_norm": 9.097761914371354, "learning_rate": 3.690437644700431e-07, "loss": 0.5924, "step": 11726 }, { "epoch": 1.6602958873079918, "grad_norm": 8.65287940991434, "learning_rate": 3.687440836031195e-07, "loss": 0.6622, "step": 11727 }, { "epoch": 1.660437460182629, "grad_norm": 9.784332082307662, "learning_rate": 3.684445147747309e-07, "loss": 0.6471, "step": 11728 }, { "epoch": 1.660579033057266, "grad_norm": 8.753347138129818, "learning_rate": 3.681450580006246e-07, "loss": 0.6586, "step": 11729 }, { "epoch": 1.6607206059319033, "grad_norm": 9.874855137015974, "learning_rate": 3.6784571329654265e-07, "loss": 0.8456, "step": 11730 }, { "epoch": 1.6608621788065405, "grad_norm": 9.592444116639896, "learning_rate": 3.675464806782222e-07, "loss": 0.6792, "step": 11731 }, { "epoch": 1.6610037516811778, "grad_norm": 10.387596480636473, "learning_rate": 3.6724736016139293e-07, "loss": 0.6722, "step": 11732 }, { "epoch": 1.661145324555815, "grad_norm": 9.459938708504001, "learning_rate": 3.6694835176178e-07, "loss": 0.6614, "step": 11733 }, { "epoch": 1.6612868974304522, "grad_norm": 9.102450164599828, "learning_rate": 3.666494554951014e-07, "loss": 0.6244, "step": 11734 }, { "epoch": 1.6614284703050894, "grad_norm": 9.012814194365067, "learning_rate": 3.6635067137707063e-07, "loss": 0.691, "step": 11735 }, { "epoch": 1.6615700431797267, "grad_norm": 8.316647978961058, "learning_rate": 3.660519994233935e-07, "loss": 0.8189, "step": 11736 }, { "epoch": 1.661711616054364, "grad_norm": 7.5594115453107165, "learning_rate": 3.657534396497725e-07, "loss": 0.6022, "step": 11737 }, { "epoch": 1.6618531889290011, "grad_norm": 7.352448980575308, "learning_rate": 3.654549920719011e-07, "loss": 0.669, "step": 11738 }, { "epoch": 1.6619947618036384, "grad_norm": 10.478507797935102, "learning_rate": 3.6515665670546956e-07, "loss": 0.7484, "step": 11739 }, { "epoch": 1.6621363346782756, "grad_norm": 9.495497188557856, "learning_rate": 3.6485843356616093e-07, "loss": 0.749, "step": 11740 }, { "epoch": 1.6622779075529128, "grad_norm": 9.574968175611072, "learning_rate": 3.6456032266965173e-07, "loss": 0.6351, "step": 11741 }, { "epoch": 1.66241948042755, "grad_norm": 8.966097236713592, "learning_rate": 3.6426232403161484e-07, "loss": 0.6471, "step": 11742 }, { "epoch": 1.6625610533021873, "grad_norm": 7.780793571107732, "learning_rate": 3.639644376677146e-07, "loss": 0.6884, "step": 11743 }, { "epoch": 1.6627026261768245, "grad_norm": 9.607345838220658, "learning_rate": 3.636666635936112e-07, "loss": 0.7208, "step": 11744 }, { "epoch": 1.6628441990514617, "grad_norm": 7.632132774237733, "learning_rate": 3.633690018249586e-07, "loss": 0.6573, "step": 11745 }, { "epoch": 1.662985771926099, "grad_norm": 10.170991030018405, "learning_rate": 3.6307145237740427e-07, "loss": 0.812, "step": 11746 }, { "epoch": 1.6631273448007362, "grad_norm": 8.20847295910223, "learning_rate": 3.6277401526659067e-07, "loss": 0.6906, "step": 11747 }, { "epoch": 1.6632689176753734, "grad_norm": 10.587424581319894, "learning_rate": 3.624766905081528e-07, "loss": 0.6983, "step": 11748 }, { "epoch": 1.6634104905500107, "grad_norm": 9.075291097716963, "learning_rate": 3.621794781177229e-07, "loss": 0.7335, "step": 11749 }, { "epoch": 1.663552063424648, "grad_norm": 8.398501544087205, "learning_rate": 3.618823781109226e-07, "loss": 0.707, "step": 11750 }, { "epoch": 1.6636936362992851, "grad_norm": 10.7775136471748, "learning_rate": 3.6158539050337146e-07, "loss": 0.7188, "step": 11751 }, { "epoch": 1.6638352091739224, "grad_norm": 8.605626558925254, "learning_rate": 3.6128851531068236e-07, "loss": 0.6299, "step": 11752 }, { "epoch": 1.6639767820485596, "grad_norm": 8.890462381119312, "learning_rate": 3.609917525484608e-07, "loss": 0.6349, "step": 11753 }, { "epoch": 1.6641183549231968, "grad_norm": 10.542443730559425, "learning_rate": 3.6069510223230854e-07, "loss": 0.6906, "step": 11754 }, { "epoch": 1.664259927797834, "grad_norm": 10.480050162857792, "learning_rate": 3.603985643778188e-07, "loss": 0.7764, "step": 11755 }, { "epoch": 1.6644015006724713, "grad_norm": 10.913518281774264, "learning_rate": 3.601021390005821e-07, "loss": 0.6072, "step": 11756 }, { "epoch": 1.6645430735471085, "grad_norm": 10.882803386347584, "learning_rate": 3.5980582611617966e-07, "loss": 0.6852, "step": 11757 }, { "epoch": 1.6646846464217457, "grad_norm": 9.940626888041121, "learning_rate": 3.595096257401895e-07, "loss": 0.7089, "step": 11758 }, { "epoch": 1.664826219296383, "grad_norm": 10.170632469593174, "learning_rate": 3.59213537888182e-07, "loss": 0.6397, "step": 11759 }, { "epoch": 1.66496779217102, "grad_norm": 9.624380909219157, "learning_rate": 3.58917562575723e-07, "loss": 0.7109, "step": 11760 }, { "epoch": 1.6651093650456572, "grad_norm": 8.724907782212112, "learning_rate": 3.586216998183714e-07, "loss": 0.6447, "step": 11761 }, { "epoch": 1.6652509379202944, "grad_norm": 10.299961304823402, "learning_rate": 3.583259496316796e-07, "loss": 0.7929, "step": 11762 }, { "epoch": 1.6653925107949317, "grad_norm": 10.264868767926549, "learning_rate": 3.580303120311965e-07, "loss": 0.6989, "step": 11763 }, { "epoch": 1.665534083669569, "grad_norm": 9.897752263919807, "learning_rate": 3.5773478703246213e-07, "loss": 0.6667, "step": 11764 }, { "epoch": 1.6656756565442061, "grad_norm": 7.981376189307464, "learning_rate": 3.5743937465101323e-07, "loss": 0.6959, "step": 11765 }, { "epoch": 1.6658172294188434, "grad_norm": 8.148491805308016, "learning_rate": 3.571440749023783e-07, "loss": 0.6293, "step": 11766 }, { "epoch": 1.6659588022934806, "grad_norm": 10.706575558640171, "learning_rate": 3.568488878020815e-07, "loss": 0.6891, "step": 11767 }, { "epoch": 1.6661003751681178, "grad_norm": 8.649438705314477, "learning_rate": 3.5655381336564127e-07, "loss": 0.6936, "step": 11768 }, { "epoch": 1.666241948042755, "grad_norm": 9.97146579018766, "learning_rate": 3.562588516085683e-07, "loss": 0.7419, "step": 11769 }, { "epoch": 1.6663835209173923, "grad_norm": 9.60381929399644, "learning_rate": 3.559640025463704e-07, "loss": 0.7106, "step": 11770 }, { "epoch": 1.6665250937920293, "grad_norm": 9.505825063824968, "learning_rate": 3.556692661945446e-07, "loss": 0.659, "step": 11771 }, { "epoch": 1.6666666666666665, "grad_norm": 9.247488144750697, "learning_rate": 3.553746425685875e-07, "loss": 0.6558, "step": 11772 }, { "epoch": 1.6668082395413037, "grad_norm": 9.027733774974559, "learning_rate": 3.550801316839858e-07, "loss": 0.6943, "step": 11773 }, { "epoch": 1.666949812415941, "grad_norm": 8.701957254468315, "learning_rate": 3.5478573355622213e-07, "loss": 0.595, "step": 11774 }, { "epoch": 1.6670913852905782, "grad_norm": 7.930352544849169, "learning_rate": 3.544914482007736e-07, "loss": 0.6584, "step": 11775 }, { "epoch": 1.6672329581652154, "grad_norm": 8.67079235570104, "learning_rate": 3.541972756331091e-07, "loss": 0.5975, "step": 11776 }, { "epoch": 1.6673745310398527, "grad_norm": 9.807614416213015, "learning_rate": 3.5390321586869473e-07, "loss": 0.6902, "step": 11777 }, { "epoch": 1.66751610391449, "grad_norm": 8.540595556572006, "learning_rate": 3.5360926892298723e-07, "loss": 0.7451, "step": 11778 }, { "epoch": 1.6676576767891271, "grad_norm": 9.176186167364142, "learning_rate": 3.5331543481144094e-07, "loss": 0.6837, "step": 11779 }, { "epoch": 1.6677992496637644, "grad_norm": 10.772574563767641, "learning_rate": 3.5302171354950065e-07, "loss": 0.8141, "step": 11780 }, { "epoch": 1.6679408225384016, "grad_norm": 8.39389639220466, "learning_rate": 3.527281051526088e-07, "loss": 0.6387, "step": 11781 }, { "epoch": 1.6680823954130388, "grad_norm": 9.391021466256346, "learning_rate": 3.5243460963619944e-07, "loss": 0.7606, "step": 11782 }, { "epoch": 1.668223968287676, "grad_norm": 9.184606394881941, "learning_rate": 3.521412270157007e-07, "loss": 0.7386, "step": 11783 }, { "epoch": 1.6683655411623133, "grad_norm": 8.392935152473942, "learning_rate": 3.518479573065367e-07, "loss": 0.642, "step": 11784 }, { "epoch": 1.6685071140369505, "grad_norm": 9.020963202430735, "learning_rate": 3.5155480052412344e-07, "loss": 0.6912, "step": 11785 }, { "epoch": 1.6686486869115877, "grad_norm": 8.840214430335749, "learning_rate": 3.5126175668387275e-07, "loss": 0.6592, "step": 11786 }, { "epoch": 1.668790259786225, "grad_norm": 9.990580127509098, "learning_rate": 3.5096882580118866e-07, "loss": 0.7039, "step": 11787 }, { "epoch": 1.6689318326608622, "grad_norm": 9.179657164178598, "learning_rate": 3.50676007891472e-07, "loss": 0.6994, "step": 11788 }, { "epoch": 1.6690734055354994, "grad_norm": 9.051464438635701, "learning_rate": 3.50383302970114e-07, "loss": 0.7218, "step": 11789 }, { "epoch": 1.6692149784101367, "grad_norm": 8.722092510751837, "learning_rate": 3.5009071105250314e-07, "loss": 0.6848, "step": 11790 }, { "epoch": 1.669356551284774, "grad_norm": 11.40380431828289, "learning_rate": 3.497982321540211e-07, "loss": 0.6531, "step": 11791 }, { "epoch": 1.6694981241594111, "grad_norm": 9.917516805501382, "learning_rate": 3.495058662900427e-07, "loss": 0.7471, "step": 11792 }, { "epoch": 1.6696396970340484, "grad_norm": 7.91739734155502, "learning_rate": 3.492136134759377e-07, "loss": 0.6467, "step": 11793 }, { "epoch": 1.6697812699086856, "grad_norm": 9.375496813007972, "learning_rate": 3.4892147372706854e-07, "loss": 0.6644, "step": 11794 }, { "epoch": 1.6699228427833228, "grad_norm": 10.692480521818716, "learning_rate": 3.4862944705879364e-07, "loss": 0.6518, "step": 11795 }, { "epoch": 1.67006441565796, "grad_norm": 9.843377581620826, "learning_rate": 3.48337533486465e-07, "loss": 0.69, "step": 11796 }, { "epoch": 1.6702059885325973, "grad_norm": 9.22355624844149, "learning_rate": 3.480457330254275e-07, "loss": 0.6766, "step": 11797 }, { "epoch": 1.6703475614072345, "grad_norm": 8.813917979337246, "learning_rate": 3.477540456910217e-07, "loss": 0.6956, "step": 11798 }, { "epoch": 1.6704891342818717, "grad_norm": 8.40435565464281, "learning_rate": 3.474624714985805e-07, "loss": 0.5681, "step": 11799 }, { "epoch": 1.670630707156509, "grad_norm": 12.672045385529902, "learning_rate": 3.4717101046343265e-07, "loss": 0.701, "step": 11800 }, { "epoch": 1.6707722800311462, "grad_norm": 10.65474424043588, "learning_rate": 3.4687966260089913e-07, "loss": 0.7659, "step": 11801 }, { "epoch": 1.6709138529057832, "grad_norm": 8.759543745448045, "learning_rate": 3.465884279262968e-07, "loss": 0.6976, "step": 11802 }, { "epoch": 1.6710554257804204, "grad_norm": 10.018066108764222, "learning_rate": 3.4629730645493493e-07, "loss": 0.7338, "step": 11803 }, { "epoch": 1.6711969986550577, "grad_norm": 8.127739077818003, "learning_rate": 3.4600629820211755e-07, "loss": 0.6307, "step": 11804 }, { "epoch": 1.671338571529695, "grad_norm": 9.613074110171848, "learning_rate": 3.4571540318314335e-07, "loss": 0.6835, "step": 11805 }, { "epoch": 1.6714801444043321, "grad_norm": 9.573528238435864, "learning_rate": 3.4542462141330365e-07, "loss": 0.6917, "step": 11806 }, { "epoch": 1.6716217172789694, "grad_norm": 10.07591348676503, "learning_rate": 3.4513395290788566e-07, "loss": 0.8101, "step": 11807 }, { "epoch": 1.6717632901536066, "grad_norm": 8.114437338642396, "learning_rate": 3.448433976821683e-07, "loss": 0.6559, "step": 11808 }, { "epoch": 1.6719048630282438, "grad_norm": 8.63690690517571, "learning_rate": 3.445529557514274e-07, "loss": 0.6657, "step": 11809 }, { "epoch": 1.672046435902881, "grad_norm": 8.508425407646262, "learning_rate": 3.4426262713092963e-07, "loss": 0.7189, "step": 11810 }, { "epoch": 1.6721880087775183, "grad_norm": 7.025505194367447, "learning_rate": 3.4397241183593887e-07, "loss": 0.6197, "step": 11811 }, { "epoch": 1.6723295816521553, "grad_norm": 8.092629609199918, "learning_rate": 3.436823098817102e-07, "loss": 0.6898, "step": 11812 }, { "epoch": 1.6724711545267925, "grad_norm": 8.718810378655181, "learning_rate": 3.4339232128349527e-07, "loss": 0.6811, "step": 11813 }, { "epoch": 1.6726127274014297, "grad_norm": 9.141928242430373, "learning_rate": 3.43102446056538e-07, "loss": 0.6723, "step": 11814 }, { "epoch": 1.672754300276067, "grad_norm": 9.112248538384637, "learning_rate": 3.428126842160762e-07, "loss": 0.6917, "step": 11815 }, { "epoch": 1.6728958731507042, "grad_norm": 8.368042290458524, "learning_rate": 3.4252303577734376e-07, "loss": 0.7274, "step": 11816 }, { "epoch": 1.6730374460253414, "grad_norm": 7.290985802242784, "learning_rate": 3.4223350075556605e-07, "loss": 0.623, "step": 11817 }, { "epoch": 1.6731790188999787, "grad_norm": 8.450105871857788, "learning_rate": 3.419440791659645e-07, "loss": 0.6302, "step": 11818 }, { "epoch": 1.673320591774616, "grad_norm": 8.278214729718487, "learning_rate": 3.4165477102375386e-07, "loss": 0.6465, "step": 11819 }, { "epoch": 1.6734621646492531, "grad_norm": 10.388347817903586, "learning_rate": 3.413655763441423e-07, "loss": 0.708, "step": 11820 }, { "epoch": 1.6736037375238904, "grad_norm": 8.45485501734109, "learning_rate": 3.4107649514233343e-07, "loss": 0.6232, "step": 11821 }, { "epoch": 1.6737453103985276, "grad_norm": 8.820571557726884, "learning_rate": 3.4078752743352263e-07, "loss": 0.6784, "step": 11822 }, { "epoch": 1.6738868832731648, "grad_norm": 9.94640100460141, "learning_rate": 3.404986732329027e-07, "loss": 0.7304, "step": 11823 }, { "epoch": 1.674028456147802, "grad_norm": 7.9286593968679835, "learning_rate": 3.402099325556563e-07, "loss": 0.6075, "step": 11824 }, { "epoch": 1.6741700290224393, "grad_norm": 11.592229121311249, "learning_rate": 3.3992130541696336e-07, "loss": 0.7071, "step": 11825 }, { "epoch": 1.6743116018970765, "grad_norm": 8.601268461850964, "learning_rate": 3.396327918319972e-07, "loss": 0.6914, "step": 11826 }, { "epoch": 1.6744531747717137, "grad_norm": 10.154358673054402, "learning_rate": 3.3934439181592393e-07, "loss": 0.651, "step": 11827 }, { "epoch": 1.674594747646351, "grad_norm": 7.859031351209694, "learning_rate": 3.390561053839053e-07, "loss": 0.6714, "step": 11828 }, { "epoch": 1.6747363205209882, "grad_norm": 10.0911321404477, "learning_rate": 3.3876793255109565e-07, "loss": 0.6651, "step": 11829 }, { "epoch": 1.6748778933956254, "grad_norm": 10.273488370240024, "learning_rate": 3.3847987333264473e-07, "loss": 0.7117, "step": 11830 }, { "epoch": 1.6750194662702627, "grad_norm": 9.65107267278366, "learning_rate": 3.381919277436946e-07, "loss": 0.6746, "step": 11831 }, { "epoch": 1.6751610391448999, "grad_norm": 8.175593214064376, "learning_rate": 3.3790409579938343e-07, "loss": 0.7589, "step": 11832 }, { "epoch": 1.6753026120195371, "grad_norm": 9.285728940847381, "learning_rate": 3.376163775148414e-07, "loss": 0.6859, "step": 11833 }, { "epoch": 1.6754441848941743, "grad_norm": 9.169966595005437, "learning_rate": 3.3732877290519437e-07, "loss": 0.7377, "step": 11834 }, { "epoch": 1.6755857577688116, "grad_norm": 9.525951876598654, "learning_rate": 3.370412819855615e-07, "loss": 0.6247, "step": 11835 }, { "epoch": 1.6757273306434488, "grad_norm": 11.327376726364166, "learning_rate": 3.3675390477105496e-07, "loss": 0.6842, "step": 11836 }, { "epoch": 1.675868903518086, "grad_norm": 9.427395090810727, "learning_rate": 3.364666412767831e-07, "loss": 0.7176, "step": 11837 }, { "epoch": 1.6760104763927233, "grad_norm": 9.870592896375403, "learning_rate": 3.3617949151784623e-07, "loss": 0.6239, "step": 11838 }, { "epoch": 1.6761520492673605, "grad_norm": 9.239121922637842, "learning_rate": 3.358924555093407e-07, "loss": 0.6415, "step": 11839 }, { "epoch": 1.6762936221419977, "grad_norm": 8.383596079698679, "learning_rate": 3.3560553326635467e-07, "loss": 0.6888, "step": 11840 }, { "epoch": 1.676435195016635, "grad_norm": 8.208097916017193, "learning_rate": 3.353187248039716e-07, "loss": 0.6779, "step": 11841 }, { "epoch": 1.6765767678912722, "grad_norm": 9.068603564919782, "learning_rate": 3.3503203013727006e-07, "loss": 0.6598, "step": 11842 }, { "epoch": 1.6767183407659092, "grad_norm": 9.597606567204537, "learning_rate": 3.3474544928131956e-07, "loss": 0.6771, "step": 11843 }, { "epoch": 1.6768599136405464, "grad_norm": 9.15559754634639, "learning_rate": 3.3445898225118704e-07, "loss": 0.7053, "step": 11844 }, { "epoch": 1.6770014865151837, "grad_norm": 9.015759974482489, "learning_rate": 3.3417262906193096e-07, "loss": 0.6494, "step": 11845 }, { "epoch": 1.677143059389821, "grad_norm": 9.729621914675773, "learning_rate": 3.3388638972860515e-07, "loss": 0.6125, "step": 11846 }, { "epoch": 1.6772846322644581, "grad_norm": 9.624676191159262, "learning_rate": 3.3360026426625615e-07, "loss": 0.8033, "step": 11847 }, { "epoch": 1.6774262051390953, "grad_norm": 9.9008497085438, "learning_rate": 3.333142526899255e-07, "loss": 0.6946, "step": 11848 }, { "epoch": 1.6775677780137326, "grad_norm": 8.251397679161268, "learning_rate": 3.330283550146499e-07, "loss": 0.5745, "step": 11849 }, { "epoch": 1.6777093508883698, "grad_norm": 9.328190022150892, "learning_rate": 3.3274257125545747e-07, "loss": 0.7151, "step": 11850 }, { "epoch": 1.677850923763007, "grad_norm": 8.69352853302341, "learning_rate": 3.3245690142737236e-07, "loss": 0.7285, "step": 11851 }, { "epoch": 1.6779924966376443, "grad_norm": 8.210989069028116, "learning_rate": 3.3217134554541145e-07, "loss": 0.7034, "step": 11852 }, { "epoch": 1.6781340695122815, "grad_norm": 9.342755421042654, "learning_rate": 3.3188590362458696e-07, "loss": 0.6756, "step": 11853 }, { "epoch": 1.6782756423869185, "grad_norm": 9.284752387225511, "learning_rate": 3.316005756799032e-07, "loss": 0.7552, "step": 11854 }, { "epoch": 1.6784172152615557, "grad_norm": 9.233545876086866, "learning_rate": 3.313153617263612e-07, "loss": 0.6819, "step": 11855 }, { "epoch": 1.678558788136193, "grad_norm": 9.53119957081946, "learning_rate": 3.310302617789532e-07, "loss": 0.6615, "step": 11856 }, { "epoch": 1.6787003610108302, "grad_norm": 9.136971341768177, "learning_rate": 3.307452758526669e-07, "loss": 0.7082, "step": 11857 }, { "epoch": 1.6788419338854674, "grad_norm": 10.939852913769103, "learning_rate": 3.3046040396248453e-07, "loss": 0.683, "step": 11858 }, { "epoch": 1.6789835067601047, "grad_norm": 8.554424403773922, "learning_rate": 3.3017564612338013e-07, "loss": 0.7518, "step": 11859 }, { "epoch": 1.679125079634742, "grad_norm": 9.228156201664785, "learning_rate": 3.298910023503249e-07, "loss": 0.7476, "step": 11860 }, { "epoch": 1.6792666525093791, "grad_norm": 6.760642527455006, "learning_rate": 3.296064726582812e-07, "loss": 0.6707, "step": 11861 }, { "epoch": 1.6794082253840164, "grad_norm": 8.278338226302576, "learning_rate": 3.2932205706220714e-07, "loss": 0.6152, "step": 11862 }, { "epoch": 1.6795497982586536, "grad_norm": 10.471423180741755, "learning_rate": 3.290377555770538e-07, "loss": 0.6793, "step": 11863 }, { "epoch": 1.6796913711332908, "grad_norm": 8.475278470575972, "learning_rate": 3.287535682177667e-07, "loss": 0.6214, "step": 11864 }, { "epoch": 1.679832944007928, "grad_norm": 9.200385160261932, "learning_rate": 3.2846949499928616e-07, "loss": 0.6365, "step": 11865 }, { "epoch": 1.6799745168825653, "grad_norm": 9.152444615054527, "learning_rate": 3.281855359365452e-07, "loss": 0.7803, "step": 11866 }, { "epoch": 1.6801160897572025, "grad_norm": 8.798195341771667, "learning_rate": 3.27901691044471e-07, "loss": 0.7199, "step": 11867 }, { "epoch": 1.6802576626318397, "grad_norm": 8.480965788203816, "learning_rate": 3.27617960337985e-07, "loss": 0.6165, "step": 11868 }, { "epoch": 1.680399235506477, "grad_norm": 9.423821041124281, "learning_rate": 3.273343438320034e-07, "loss": 0.6791, "step": 11869 }, { "epoch": 1.6805408083811142, "grad_norm": 8.675492365603496, "learning_rate": 3.2705084154143504e-07, "loss": 0.8037, "step": 11870 }, { "epoch": 1.6806823812557514, "grad_norm": 9.119616867182481, "learning_rate": 3.267674534811835e-07, "loss": 0.7548, "step": 11871 }, { "epoch": 1.6808239541303887, "grad_norm": 10.389998659458753, "learning_rate": 3.264841796661469e-07, "loss": 0.7137, "step": 11872 }, { "epoch": 1.6809655270050259, "grad_norm": 9.807139882543801, "learning_rate": 3.2620102011121616e-07, "loss": 0.6737, "step": 11873 }, { "epoch": 1.6811070998796631, "grad_norm": 10.824094344174114, "learning_rate": 3.259179748312774e-07, "loss": 0.6701, "step": 11874 }, { "epoch": 1.6812486727543003, "grad_norm": 9.031440072880907, "learning_rate": 3.25635043841209e-07, "loss": 0.7502, "step": 11875 }, { "epoch": 1.6813902456289376, "grad_norm": 9.819884030653737, "learning_rate": 3.253522271558857e-07, "loss": 0.7637, "step": 11876 }, { "epoch": 1.6815318185035748, "grad_norm": 8.555191373574091, "learning_rate": 3.2506952479017417e-07, "loss": 0.8108, "step": 11877 }, { "epoch": 1.681673391378212, "grad_norm": 8.54782562269356, "learning_rate": 3.247869367589354e-07, "loss": 0.7768, "step": 11878 }, { "epoch": 1.6818149642528493, "grad_norm": 9.81103854624764, "learning_rate": 3.245044630770264e-07, "loss": 0.6341, "step": 11879 }, { "epoch": 1.6819565371274865, "grad_norm": 9.354125523352817, "learning_rate": 3.242221037592949e-07, "loss": 0.6509, "step": 11880 }, { "epoch": 1.6820981100021237, "grad_norm": 9.06600400335601, "learning_rate": 3.2393985882058555e-07, "loss": 0.6421, "step": 11881 }, { "epoch": 1.682239682876761, "grad_norm": 7.744777273247245, "learning_rate": 3.2365772827573473e-07, "loss": 0.6349, "step": 11882 }, { "epoch": 1.6823812557513982, "grad_norm": 8.172094396851683, "learning_rate": 3.23375712139575e-07, "loss": 0.7244, "step": 11883 }, { "epoch": 1.6825228286260354, "grad_norm": 9.237393010285144, "learning_rate": 3.230938104269307e-07, "loss": 0.7338, "step": 11884 }, { "epoch": 1.6826644015006724, "grad_norm": 8.334811219455714, "learning_rate": 3.228120231526219e-07, "loss": 0.6553, "step": 11885 }, { "epoch": 1.6828059743753097, "grad_norm": 8.62691310729059, "learning_rate": 3.225303503314614e-07, "loss": 0.6371, "step": 11886 }, { "epoch": 1.6829475472499469, "grad_norm": 8.56619917884134, "learning_rate": 3.2224879197825717e-07, "loss": 0.6532, "step": 11887 }, { "epoch": 1.6830891201245841, "grad_norm": 10.254277151030038, "learning_rate": 3.2196734810781007e-07, "loss": 0.6437, "step": 11888 }, { "epoch": 1.6832306929992213, "grad_norm": 8.266807312948213, "learning_rate": 3.2168601873491493e-07, "loss": 0.6386, "step": 11889 }, { "epoch": 1.6833722658738586, "grad_norm": 11.120706136773364, "learning_rate": 3.214048038743622e-07, "loss": 0.7599, "step": 11890 }, { "epoch": 1.6835138387484958, "grad_norm": 10.404382119922513, "learning_rate": 3.2112370354093397e-07, "loss": 0.7493, "step": 11891 }, { "epoch": 1.683655411623133, "grad_norm": 9.203883504131133, "learning_rate": 3.208427177494081e-07, "loss": 0.758, "step": 11892 }, { "epoch": 1.6837969844977703, "grad_norm": 9.449108537880413, "learning_rate": 3.205618465145563e-07, "loss": 0.742, "step": 11893 }, { "epoch": 1.6839385573724075, "grad_norm": 10.233209022321944, "learning_rate": 3.202810898511424e-07, "loss": 0.622, "step": 11894 }, { "epoch": 1.6840801302470445, "grad_norm": 10.478192526181347, "learning_rate": 3.2000044777392684e-07, "loss": 0.7394, "step": 11895 }, { "epoch": 1.6842217031216817, "grad_norm": 8.241279848572093, "learning_rate": 3.1971992029766197e-07, "loss": 0.6432, "step": 11896 }, { "epoch": 1.684363275996319, "grad_norm": 8.709106800342136, "learning_rate": 3.194395074370957e-07, "loss": 0.6965, "step": 11897 }, { "epoch": 1.6845048488709562, "grad_norm": 9.165524313289447, "learning_rate": 3.191592092069684e-07, "loss": 0.6536, "step": 11898 }, { "epoch": 1.6846464217455934, "grad_norm": 9.114497581441853, "learning_rate": 3.1887902562201506e-07, "loss": 0.6319, "step": 11899 }, { "epoch": 1.6847879946202307, "grad_norm": 8.455434769202023, "learning_rate": 3.185989566969655e-07, "loss": 0.6434, "step": 11900 }, { "epoch": 1.6849295674948679, "grad_norm": 11.61969726378244, "learning_rate": 3.1831900244654157e-07, "loss": 0.7519, "step": 11901 }, { "epoch": 1.6850711403695051, "grad_norm": 9.614568035958058, "learning_rate": 3.1803916288546176e-07, "loss": 0.6768, "step": 11902 }, { "epoch": 1.6852127132441423, "grad_norm": 11.431252028943232, "learning_rate": 3.1775943802843546e-07, "loss": 0.6649, "step": 11903 }, { "epoch": 1.6853542861187796, "grad_norm": 8.365123333885357, "learning_rate": 3.174798278901692e-07, "loss": 0.7362, "step": 11904 }, { "epoch": 1.6854958589934168, "grad_norm": 7.816196879223626, "learning_rate": 3.172003324853601e-07, "loss": 0.665, "step": 11905 }, { "epoch": 1.685637431868054, "grad_norm": 8.237937663235828, "learning_rate": 3.169209518287028e-07, "loss": 0.6919, "step": 11906 }, { "epoch": 1.6857790047426913, "grad_norm": 8.271341636534046, "learning_rate": 3.166416859348825e-07, "loss": 0.6125, "step": 11907 }, { "epoch": 1.6859205776173285, "grad_norm": 10.220942177538333, "learning_rate": 3.163625348185814e-07, "loss": 0.7231, "step": 11908 }, { "epoch": 1.6860621504919657, "grad_norm": 9.252539621148237, "learning_rate": 3.1608349849447385e-07, "loss": 0.6894, "step": 11909 }, { "epoch": 1.686203723366603, "grad_norm": 8.510975651649389, "learning_rate": 3.1580457697722777e-07, "loss": 0.6891, "step": 11910 }, { "epoch": 1.6863452962412402, "grad_norm": 8.346233555502044, "learning_rate": 3.1552577028150677e-07, "loss": 0.6367, "step": 11911 }, { "epoch": 1.6864868691158774, "grad_norm": 8.605969650367522, "learning_rate": 3.152470784219669e-07, "loss": 0.6234, "step": 11912 }, { "epoch": 1.6866284419905146, "grad_norm": 9.362251667554183, "learning_rate": 3.1496850141325973e-07, "loss": 0.6954, "step": 11913 }, { "epoch": 1.6867700148651519, "grad_norm": 9.764985916588506, "learning_rate": 3.146900392700286e-07, "loss": 0.6719, "step": 11914 }, { "epoch": 1.686911587739789, "grad_norm": 9.070766283195454, "learning_rate": 3.1441169200691265e-07, "loss": 0.6598, "step": 11915 }, { "epoch": 1.6870531606144263, "grad_norm": 8.068503340445808, "learning_rate": 3.141334596385448e-07, "loss": 0.6357, "step": 11916 }, { "epoch": 1.6871947334890636, "grad_norm": 9.069085615640013, "learning_rate": 3.138553421795507e-07, "loss": 0.733, "step": 11917 }, { "epoch": 1.6873363063637008, "grad_norm": 9.182493753241562, "learning_rate": 3.1357733964455185e-07, "loss": 0.6923, "step": 11918 }, { "epoch": 1.687477879238338, "grad_norm": 8.378537925325235, "learning_rate": 3.1329945204816166e-07, "loss": 0.5412, "step": 11919 }, { "epoch": 1.6876194521129753, "grad_norm": 10.208795487718353, "learning_rate": 3.1302167940498893e-07, "loss": 0.6917, "step": 11920 }, { "epoch": 1.6877610249876125, "grad_norm": 11.75451119039815, "learning_rate": 3.127440217296354e-07, "loss": 0.7385, "step": 11921 }, { "epoch": 1.6879025978622497, "grad_norm": 7.9957063596926625, "learning_rate": 3.1246647903669794e-07, "loss": 0.6744, "step": 11922 }, { "epoch": 1.688044170736887, "grad_norm": 10.391043885471044, "learning_rate": 3.121890513407669e-07, "loss": 0.6322, "step": 11923 }, { "epoch": 1.6881857436115242, "grad_norm": 8.985023753750946, "learning_rate": 3.119117386564255e-07, "loss": 0.7329, "step": 11924 }, { "epoch": 1.6883273164861614, "grad_norm": 7.2441379920458955, "learning_rate": 3.1163454099825326e-07, "loss": 0.6855, "step": 11925 }, { "epoch": 1.6884688893607984, "grad_norm": 9.925531437480124, "learning_rate": 3.113574583808207e-07, "loss": 0.6228, "step": 11926 }, { "epoch": 1.6886104622354356, "grad_norm": 8.533981165534138, "learning_rate": 3.110804908186954e-07, "loss": 0.7521, "step": 11927 }, { "epoch": 1.6887520351100729, "grad_norm": 8.204061981458056, "learning_rate": 3.1080363832643593e-07, "loss": 0.6937, "step": 11928 }, { "epoch": 1.68889360798471, "grad_norm": 10.300621636159082, "learning_rate": 3.105269009185974e-07, "loss": 0.6202, "step": 11929 }, { "epoch": 1.6890351808593473, "grad_norm": 10.200877387514158, "learning_rate": 3.102502786097272e-07, "loss": 0.7203, "step": 11930 }, { "epoch": 1.6891767537339846, "grad_norm": 9.46959317422105, "learning_rate": 3.0997377141436665e-07, "loss": 0.6809, "step": 11931 }, { "epoch": 1.6893183266086218, "grad_norm": 10.68689677002834, "learning_rate": 3.096973793470523e-07, "loss": 0.6903, "step": 11932 }, { "epoch": 1.689459899483259, "grad_norm": 9.260102400079653, "learning_rate": 3.0942110242231316e-07, "loss": 0.6478, "step": 11933 }, { "epoch": 1.6896014723578963, "grad_norm": 8.332021177619323, "learning_rate": 3.091449406546737e-07, "loss": 0.6361, "step": 11934 }, { "epoch": 1.6897430452325335, "grad_norm": 9.481110917682507, "learning_rate": 3.088688940586507e-07, "loss": 0.6822, "step": 11935 }, { "epoch": 1.6898846181071705, "grad_norm": 8.535279604371768, "learning_rate": 3.0859296264875686e-07, "loss": 0.6635, "step": 11936 }, { "epoch": 1.6900261909818077, "grad_norm": 9.526997001941378, "learning_rate": 3.083171464394963e-07, "loss": 0.6458, "step": 11937 }, { "epoch": 1.690167763856445, "grad_norm": 8.598146513878506, "learning_rate": 3.0804144544536897e-07, "loss": 0.6863, "step": 11938 }, { "epoch": 1.6903093367310822, "grad_norm": 10.4874406815171, "learning_rate": 3.0776585968086914e-07, "loss": 0.6981, "step": 11939 }, { "epoch": 1.6904509096057194, "grad_norm": 8.265720532735967, "learning_rate": 3.0749038916048356e-07, "loss": 0.7332, "step": 11940 }, { "epoch": 1.6905924824803567, "grad_norm": 9.980288342302932, "learning_rate": 3.0721503389869344e-07, "loss": 0.7684, "step": 11941 }, { "epoch": 1.6907340553549939, "grad_norm": 9.23034518074821, "learning_rate": 3.0693979390997333e-07, "loss": 0.742, "step": 11942 }, { "epoch": 1.6908756282296311, "grad_norm": 8.462118219641203, "learning_rate": 3.066646692087938e-07, "loss": 0.7415, "step": 11943 }, { "epoch": 1.6910172011042683, "grad_norm": 8.652880732490372, "learning_rate": 3.063896598096164e-07, "loss": 0.7114, "step": 11944 }, { "epoch": 1.6911587739789056, "grad_norm": 8.814866201474777, "learning_rate": 3.0611476572689896e-07, "loss": 0.7502, "step": 11945 }, { "epoch": 1.6913003468535428, "grad_norm": 9.789192265824855, "learning_rate": 3.0583998697509305e-07, "loss": 0.6598, "step": 11946 }, { "epoch": 1.69144191972818, "grad_norm": 9.208497841402647, "learning_rate": 3.055653235686426e-07, "loss": 0.6705, "step": 11947 }, { "epoch": 1.6915834926028173, "grad_norm": 8.81906857094242, "learning_rate": 3.0529077552198724e-07, "loss": 0.7202, "step": 11948 }, { "epoch": 1.6917250654774545, "grad_norm": 9.26281963419798, "learning_rate": 3.0501634284955867e-07, "loss": 0.6572, "step": 11949 }, { "epoch": 1.6918666383520917, "grad_norm": 8.980729134573329, "learning_rate": 3.0474202556578513e-07, "loss": 0.7273, "step": 11950 }, { "epoch": 1.692008211226729, "grad_norm": 8.94769175509196, "learning_rate": 3.044678236850862e-07, "loss": 0.6385, "step": 11951 }, { "epoch": 1.6921497841013662, "grad_norm": 10.725816716039587, "learning_rate": 3.0419373722187645e-07, "loss": 0.6978, "step": 11952 }, { "epoch": 1.6922913569760034, "grad_norm": 8.982522312373355, "learning_rate": 3.039197661905652e-07, "loss": 0.7965, "step": 11953 }, { "epoch": 1.6924329298506406, "grad_norm": 9.130202808947868, "learning_rate": 3.0364591060555363e-07, "loss": 0.7381, "step": 11954 }, { "epoch": 1.6925745027252779, "grad_norm": 10.560309730380188, "learning_rate": 3.033721704812395e-07, "loss": 0.6905, "step": 11955 }, { "epoch": 1.692716075599915, "grad_norm": 11.075056805260463, "learning_rate": 3.030985458320118e-07, "loss": 0.7657, "step": 11956 }, { "epoch": 1.6928576484745523, "grad_norm": 7.66015674799208, "learning_rate": 3.028250366722563e-07, "loss": 0.6, "step": 11957 }, { "epoch": 1.6929992213491896, "grad_norm": 8.581640527205948, "learning_rate": 3.025516430163497e-07, "loss": 0.6317, "step": 11958 }, { "epoch": 1.6931407942238268, "grad_norm": 8.023512619840652, "learning_rate": 3.022783648786651e-07, "loss": 0.6416, "step": 11959 }, { "epoch": 1.693282367098464, "grad_norm": 9.381180823857003, "learning_rate": 3.020052022735678e-07, "loss": 0.6227, "step": 11960 }, { "epoch": 1.6934239399731013, "grad_norm": 9.082416406483558, "learning_rate": 3.017321552154187e-07, "loss": 0.7313, "step": 11961 }, { "epoch": 1.6935655128477385, "grad_norm": 7.804839261615719, "learning_rate": 3.0145922371857097e-07, "loss": 0.6919, "step": 11962 }, { "epoch": 1.6937070857223757, "grad_norm": 8.714095647378457, "learning_rate": 3.0118640779737225e-07, "loss": 0.6172, "step": 11963 }, { "epoch": 1.693848658597013, "grad_norm": 8.09582634292539, "learning_rate": 3.009137074661647e-07, "loss": 0.7163, "step": 11964 }, { "epoch": 1.6939902314716502, "grad_norm": 9.595607515616194, "learning_rate": 3.006411227392836e-07, "loss": 0.7026, "step": 11965 }, { "epoch": 1.6941318043462874, "grad_norm": 9.294591697027531, "learning_rate": 3.003686536310593e-07, "loss": 0.7019, "step": 11966 }, { "epoch": 1.6942733772209246, "grad_norm": 9.204279310700786, "learning_rate": 3.000963001558141e-07, "loss": 0.6755, "step": 11967 }, { "epoch": 1.6944149500955616, "grad_norm": 10.49062146552387, "learning_rate": 2.9982406232786614e-07, "loss": 0.7063, "step": 11968 }, { "epoch": 1.6945565229701989, "grad_norm": 10.107048038074486, "learning_rate": 2.995519401615274e-07, "loss": 0.6973, "step": 11969 }, { "epoch": 1.694698095844836, "grad_norm": 9.586451634032304, "learning_rate": 2.9927993367110165e-07, "loss": 0.6663, "step": 11970 }, { "epoch": 1.6948396687194733, "grad_norm": 8.484755814859552, "learning_rate": 2.9900804287088944e-07, "loss": 0.744, "step": 11971 }, { "epoch": 1.6949812415941106, "grad_norm": 8.87761982679268, "learning_rate": 2.9873626777518343e-07, "loss": 0.5745, "step": 11972 }, { "epoch": 1.6951228144687478, "grad_norm": 9.87817640764858, "learning_rate": 2.984646083982698e-07, "loss": 0.7559, "step": 11973 }, { "epoch": 1.695264387343385, "grad_norm": 10.427504087349917, "learning_rate": 2.9819306475443096e-07, "loss": 0.7541, "step": 11974 }, { "epoch": 1.6954059602180223, "grad_norm": 8.327354982430974, "learning_rate": 2.9792163685794015e-07, "loss": 0.6549, "step": 11975 }, { "epoch": 1.6955475330926595, "grad_norm": 9.897505598150964, "learning_rate": 2.976503247230675e-07, "loss": 0.6219, "step": 11976 }, { "epoch": 1.6956891059672967, "grad_norm": 8.862664474876519, "learning_rate": 2.9737912836407477e-07, "loss": 0.5724, "step": 11977 }, { "epoch": 1.6958306788419337, "grad_norm": 8.897485322053212, "learning_rate": 2.971080477952193e-07, "loss": 0.616, "step": 11978 }, { "epoch": 1.695972251716571, "grad_norm": 9.415684426541432, "learning_rate": 2.968370830307507e-07, "loss": 0.6134, "step": 11979 }, { "epoch": 1.6961138245912082, "grad_norm": 8.701383406168949, "learning_rate": 2.965662340849146e-07, "loss": 0.5682, "step": 11980 }, { "epoch": 1.6962553974658454, "grad_norm": 9.901948879577672, "learning_rate": 2.9629550097194787e-07, "loss": 0.8315, "step": 11981 }, { "epoch": 1.6963969703404826, "grad_norm": 9.953962784844553, "learning_rate": 2.960248837060842e-07, "loss": 0.6211, "step": 11982 }, { "epoch": 1.6965385432151199, "grad_norm": 10.48906065248848, "learning_rate": 2.957543823015491e-07, "loss": 0.7005, "step": 11983 }, { "epoch": 1.696680116089757, "grad_norm": 9.531986395732922, "learning_rate": 2.9548399677256174e-07, "loss": 0.6995, "step": 11984 }, { "epoch": 1.6968216889643943, "grad_norm": 9.617261277387605, "learning_rate": 2.9521372713333773e-07, "loss": 0.7118, "step": 11985 }, { "epoch": 1.6969632618390316, "grad_norm": 9.305470520946855, "learning_rate": 2.9494357339808347e-07, "loss": 0.769, "step": 11986 }, { "epoch": 1.6971048347136688, "grad_norm": 8.632109876666977, "learning_rate": 2.946735355810018e-07, "loss": 0.6671, "step": 11987 }, { "epoch": 1.697246407588306, "grad_norm": 8.842130586879481, "learning_rate": 2.9440361369628773e-07, "loss": 0.6585, "step": 11988 }, { "epoch": 1.6973879804629433, "grad_norm": 8.554418606647321, "learning_rate": 2.94133807758131e-07, "loss": 0.6431, "step": 11989 }, { "epoch": 1.6975295533375805, "grad_norm": 9.499950609580697, "learning_rate": 2.9386411778071584e-07, "loss": 0.7186, "step": 11990 }, { "epoch": 1.6976711262122177, "grad_norm": 9.840989661342421, "learning_rate": 2.935945437782184e-07, "loss": 0.7917, "step": 11991 }, { "epoch": 1.697812699086855, "grad_norm": 9.086530303640899, "learning_rate": 2.933250857648112e-07, "loss": 0.6552, "step": 11992 }, { "epoch": 1.6979542719614922, "grad_norm": 9.128045396937654, "learning_rate": 2.9305574375465884e-07, "loss": 0.6581, "step": 11993 }, { "epoch": 1.6980958448361294, "grad_norm": 8.007682449401276, "learning_rate": 2.9278651776192073e-07, "loss": 0.6419, "step": 11994 }, { "epoch": 1.6982374177107666, "grad_norm": 10.530839227198445, "learning_rate": 2.925174078007487e-07, "loss": 0.6778, "step": 11995 }, { "epoch": 1.6983789905854039, "grad_norm": 9.6173572664748, "learning_rate": 2.922484138852907e-07, "loss": 0.6947, "step": 11996 }, { "epoch": 1.698520563460041, "grad_norm": 8.44868914125537, "learning_rate": 2.9197953602968814e-07, "loss": 0.589, "step": 11997 }, { "epoch": 1.6986621363346783, "grad_norm": 10.833067983654333, "learning_rate": 2.917107742480743e-07, "loss": 0.7156, "step": 11998 }, { "epoch": 1.6988037092093156, "grad_norm": 9.006220469270852, "learning_rate": 2.9144212855457906e-07, "loss": 0.6678, "step": 11999 }, { "epoch": 1.6989452820839528, "grad_norm": 7.91907362905119, "learning_rate": 2.911735989633238e-07, "loss": 0.7084, "step": 12000 }, { "epoch": 1.69908685495859, "grad_norm": 9.740194231481844, "learning_rate": 2.9090518548842594e-07, "loss": 0.6351, "step": 12001 }, { "epoch": 1.6992284278332273, "grad_norm": 8.355118111506657, "learning_rate": 2.906368881439947e-07, "loss": 0.758, "step": 12002 }, { "epoch": 1.6993700007078645, "grad_norm": 9.798998442871518, "learning_rate": 2.903687069441358e-07, "loss": 0.7047, "step": 12003 }, { "epoch": 1.6995115735825017, "grad_norm": 7.950856183090564, "learning_rate": 2.901006419029459e-07, "loss": 0.7168, "step": 12004 }, { "epoch": 1.699653146457139, "grad_norm": 9.528021593993449, "learning_rate": 2.8983269303451715e-07, "loss": 0.522, "step": 12005 }, { "epoch": 1.6997947193317762, "grad_norm": 7.996383326308809, "learning_rate": 2.8956486035293635e-07, "loss": 0.6425, "step": 12006 }, { "epoch": 1.6999362922064134, "grad_norm": 8.213126343600072, "learning_rate": 2.892971438722822e-07, "loss": 0.6563, "step": 12007 }, { "epoch": 1.7000778650810506, "grad_norm": 9.114572079606148, "learning_rate": 2.8902954360662925e-07, "loss": 0.6864, "step": 12008 }, { "epoch": 1.7002194379556876, "grad_norm": 10.27533363858699, "learning_rate": 2.887620595700441e-07, "loss": 0.6859, "step": 12009 }, { "epoch": 1.7003610108303249, "grad_norm": 10.946116862396197, "learning_rate": 2.8849469177658933e-07, "loss": 0.6943, "step": 12010 }, { "epoch": 1.700502583704962, "grad_norm": 8.717013716003892, "learning_rate": 2.8822744024031904e-07, "loss": 0.7589, "step": 12011 }, { "epoch": 1.7006441565795993, "grad_norm": 8.888857057302488, "learning_rate": 2.8796030497528325e-07, "loss": 0.5598, "step": 12012 }, { "epoch": 1.7007857294542366, "grad_norm": 9.230168709416793, "learning_rate": 2.8769328599552503e-07, "loss": 0.719, "step": 12013 }, { "epoch": 1.7009273023288738, "grad_norm": 10.220019153793512, "learning_rate": 2.874263833150814e-07, "loss": 0.6473, "step": 12014 }, { "epoch": 1.701068875203511, "grad_norm": 8.895190415984422, "learning_rate": 2.871595969479832e-07, "loss": 0.6599, "step": 12015 }, { "epoch": 1.7012104480781483, "grad_norm": 9.686105541649072, "learning_rate": 2.86892926908254e-07, "loss": 0.7238, "step": 12016 }, { "epoch": 1.7013520209527855, "grad_norm": 7.878748410355028, "learning_rate": 2.866263732099145e-07, "loss": 0.7384, "step": 12017 }, { "epoch": 1.7014935938274227, "grad_norm": 8.80514804072271, "learning_rate": 2.8635993586697555e-07, "loss": 0.782, "step": 12018 }, { "epoch": 1.7016351667020597, "grad_norm": 8.583805503326907, "learning_rate": 2.86093614893444e-07, "loss": 0.5738, "step": 12019 }, { "epoch": 1.701776739576697, "grad_norm": 8.883052987070029, "learning_rate": 2.8582741030332095e-07, "loss": 0.6448, "step": 12020 }, { "epoch": 1.7019183124513342, "grad_norm": 10.009995233647054, "learning_rate": 2.8556132211059963e-07, "loss": 0.7087, "step": 12021 }, { "epoch": 1.7020598853259714, "grad_norm": 9.4395697136784, "learning_rate": 2.852953503292688e-07, "loss": 0.7276, "step": 12022 }, { "epoch": 1.7022014582006086, "grad_norm": 8.79517625902097, "learning_rate": 2.8502949497330954e-07, "loss": 0.7537, "step": 12023 }, { "epoch": 1.7023430310752459, "grad_norm": 7.833065704751935, "learning_rate": 2.8476375605669905e-07, "loss": 0.607, "step": 12024 }, { "epoch": 1.702484603949883, "grad_norm": 8.618706217454738, "learning_rate": 2.8449813359340576e-07, "loss": 0.8089, "step": 12025 }, { "epoch": 1.7026261768245203, "grad_norm": 8.804178840952549, "learning_rate": 2.8423262759739307e-07, "loss": 0.6476, "step": 12026 }, { "epoch": 1.7027677496991576, "grad_norm": 8.424753917869658, "learning_rate": 2.839672380826197e-07, "loss": 0.6657, "step": 12027 }, { "epoch": 1.7029093225737948, "grad_norm": 9.092784292849416, "learning_rate": 2.8370196506303573e-07, "loss": 0.6934, "step": 12028 }, { "epoch": 1.703050895448432, "grad_norm": 7.60826275038247, "learning_rate": 2.8343680855258764e-07, "loss": 0.6432, "step": 12029 }, { "epoch": 1.7031924683230693, "grad_norm": 9.824937458179004, "learning_rate": 2.83171768565213e-07, "loss": 0.6693, "step": 12030 }, { "epoch": 1.7033340411977065, "grad_norm": 9.73719102714541, "learning_rate": 2.8290684511484615e-07, "loss": 0.7375, "step": 12031 }, { "epoch": 1.7034756140723437, "grad_norm": 8.992417426198152, "learning_rate": 2.826420382154127e-07, "loss": 0.7125, "step": 12032 }, { "epoch": 1.703617186946981, "grad_norm": 9.088283298321354, "learning_rate": 2.823773478808348e-07, "loss": 0.7212, "step": 12033 }, { "epoch": 1.7037587598216182, "grad_norm": 10.024115666604258, "learning_rate": 2.8211277412502543e-07, "loss": 0.7771, "step": 12034 }, { "epoch": 1.7039003326962554, "grad_norm": 8.360570519067277, "learning_rate": 2.818483169618941e-07, "loss": 0.6388, "step": 12035 }, { "epoch": 1.7040419055708926, "grad_norm": 8.653679531482187, "learning_rate": 2.8158397640534326e-07, "loss": 0.539, "step": 12036 }, { "epoch": 1.7041834784455299, "grad_norm": 7.6768216037174355, "learning_rate": 2.813197524692679e-07, "loss": 0.6547, "step": 12037 }, { "epoch": 1.704325051320167, "grad_norm": 9.207062324274036, "learning_rate": 2.810556451675592e-07, "loss": 0.6413, "step": 12038 }, { "epoch": 1.7044666241948043, "grad_norm": 8.54510066777234, "learning_rate": 2.807916545141004e-07, "loss": 0.6649, "step": 12039 }, { "epoch": 1.7046081970694416, "grad_norm": 10.311737957166368, "learning_rate": 2.805277805227702e-07, "loss": 0.786, "step": 12040 }, { "epoch": 1.7047497699440788, "grad_norm": 10.346902695287232, "learning_rate": 2.8026402320743914e-07, "loss": 0.7014, "step": 12041 }, { "epoch": 1.704891342818716, "grad_norm": 9.10255954503985, "learning_rate": 2.8000038258197334e-07, "loss": 0.6616, "step": 12042 }, { "epoch": 1.7050329156933532, "grad_norm": 9.294690608004936, "learning_rate": 2.7973685866023224e-07, "loss": 0.7171, "step": 12043 }, { "epoch": 1.7051744885679905, "grad_norm": 10.80981553109979, "learning_rate": 2.7947345145606877e-07, "loss": 0.7397, "step": 12044 }, { "epoch": 1.7053160614426277, "grad_norm": 10.249631921625703, "learning_rate": 2.792101609833309e-07, "loss": 0.7208, "step": 12045 }, { "epoch": 1.705457634317265, "grad_norm": 8.379811285620768, "learning_rate": 2.7894698725585866e-07, "loss": 0.5825, "step": 12046 }, { "epoch": 1.7055992071919022, "grad_norm": 9.608735846375927, "learning_rate": 2.786839302874869e-07, "loss": 0.6974, "step": 12047 }, { "epoch": 1.7057407800665394, "grad_norm": 9.306660504635117, "learning_rate": 2.784209900920451e-07, "loss": 0.7417, "step": 12048 }, { "epoch": 1.7058823529411766, "grad_norm": 8.731232925868014, "learning_rate": 2.781581666833549e-07, "loss": 0.657, "step": 12049 }, { "epoch": 1.7060239258158136, "grad_norm": 9.058427408590259, "learning_rate": 2.778954600752337e-07, "loss": 0.5376, "step": 12050 }, { "epoch": 1.7061654986904509, "grad_norm": 7.342570238514544, "learning_rate": 2.776328702814909e-07, "loss": 0.7369, "step": 12051 }, { "epoch": 1.706307071565088, "grad_norm": 10.885496839504897, "learning_rate": 2.773703973159314e-07, "loss": 0.7, "step": 12052 }, { "epoch": 1.7064486444397253, "grad_norm": 8.51782780425442, "learning_rate": 2.771080411923524e-07, "loss": 0.6891, "step": 12053 }, { "epoch": 1.7065902173143626, "grad_norm": 9.12663693311969, "learning_rate": 2.7684580192454653e-07, "loss": 0.6762, "step": 12054 }, { "epoch": 1.7067317901889998, "grad_norm": 6.939755184901495, "learning_rate": 2.7658367952629885e-07, "loss": 0.6328, "step": 12055 }, { "epoch": 1.706873363063637, "grad_norm": 9.223135625909757, "learning_rate": 2.7632167401138996e-07, "loss": 0.6902, "step": 12056 }, { "epoch": 1.7070149359382742, "grad_norm": 8.38923823939365, "learning_rate": 2.760597853935923e-07, "loss": 0.6784, "step": 12057 }, { "epoch": 1.7071565088129115, "grad_norm": 8.869852211805862, "learning_rate": 2.757980136866731e-07, "loss": 0.6824, "step": 12058 }, { "epoch": 1.7072980816875487, "grad_norm": 10.595995639683483, "learning_rate": 2.755363589043944e-07, "loss": 0.7433, "step": 12059 }, { "epoch": 1.707439654562186, "grad_norm": 9.090593242360306, "learning_rate": 2.7527482106051025e-07, "loss": 0.6795, "step": 12060 }, { "epoch": 1.707581227436823, "grad_norm": 7.896951748860324, "learning_rate": 2.7501340016877044e-07, "loss": 0.6473, "step": 12061 }, { "epoch": 1.7077228003114602, "grad_norm": 9.686767944240556, "learning_rate": 2.7475209624291674e-07, "loss": 0.7228, "step": 12062 }, { "epoch": 1.7078643731860974, "grad_norm": 9.191627457628176, "learning_rate": 2.744909092966863e-07, "loss": 0.6295, "step": 12063 }, { "epoch": 1.7080059460607346, "grad_norm": 7.877160396634477, "learning_rate": 2.742298393438092e-07, "loss": 0.6843, "step": 12064 }, { "epoch": 1.7081475189353719, "grad_norm": 8.874929078584366, "learning_rate": 2.739688863980097e-07, "loss": 0.7869, "step": 12065 }, { "epoch": 1.708289091810009, "grad_norm": 9.946779536952565, "learning_rate": 2.7370805047300633e-07, "loss": 0.6616, "step": 12066 }, { "epoch": 1.7084306646846463, "grad_norm": 9.157098203566763, "learning_rate": 2.734473315825112e-07, "loss": 0.713, "step": 12067 }, { "epoch": 1.7085722375592836, "grad_norm": 9.189000428873422, "learning_rate": 2.7318672974022936e-07, "loss": 0.5917, "step": 12068 }, { "epoch": 1.7087138104339208, "grad_norm": 10.417199774769655, "learning_rate": 2.729262449598602e-07, "loss": 0.7152, "step": 12069 }, { "epoch": 1.708855383308558, "grad_norm": 8.30511973019716, "learning_rate": 2.7266587725509805e-07, "loss": 0.6351, "step": 12070 }, { "epoch": 1.7089969561831952, "grad_norm": 8.773115741539783, "learning_rate": 2.724056266396302e-07, "loss": 0.6187, "step": 12071 }, { "epoch": 1.7091385290578325, "grad_norm": 8.759820712967809, "learning_rate": 2.7214549312713723e-07, "loss": 0.691, "step": 12072 }, { "epoch": 1.7092801019324697, "grad_norm": 8.693858941147724, "learning_rate": 2.7188547673129477e-07, "loss": 0.6438, "step": 12073 }, { "epoch": 1.709421674807107, "grad_norm": 10.049341542702912, "learning_rate": 2.716255774657714e-07, "loss": 0.75, "step": 12074 }, { "epoch": 1.7095632476817442, "grad_norm": 8.102820538774985, "learning_rate": 2.7136579534423003e-07, "loss": 0.6562, "step": 12075 }, { "epoch": 1.7097048205563814, "grad_norm": 9.825533817922002, "learning_rate": 2.711061303803267e-07, "loss": 0.6365, "step": 12076 }, { "epoch": 1.7098463934310186, "grad_norm": 9.938142191931588, "learning_rate": 2.7084658258771265e-07, "loss": 0.6914, "step": 12077 }, { "epoch": 1.7099879663056559, "grad_norm": 9.749632217121865, "learning_rate": 2.7058715198003155e-07, "loss": 0.6018, "step": 12078 }, { "epoch": 1.710129539180293, "grad_norm": 9.187311079716865, "learning_rate": 2.7032783857092096e-07, "loss": 0.7125, "step": 12079 }, { "epoch": 1.7102711120549303, "grad_norm": 8.939542950376216, "learning_rate": 2.7006864237401426e-07, "loss": 0.6834, "step": 12080 }, { "epoch": 1.7104126849295676, "grad_norm": 10.091320773013196, "learning_rate": 2.6980956340293543e-07, "loss": 0.7528, "step": 12081 }, { "epoch": 1.7105542578042048, "grad_norm": 10.484741386458607, "learning_rate": 2.695506016713056e-07, "loss": 0.694, "step": 12082 }, { "epoch": 1.710695830678842, "grad_norm": 9.71808114157413, "learning_rate": 2.692917571927373e-07, "loss": 0.6152, "step": 12083 }, { "epoch": 1.7108374035534792, "grad_norm": 10.186810616413165, "learning_rate": 2.69033029980838e-07, "loss": 0.6849, "step": 12084 }, { "epoch": 1.7109789764281165, "grad_norm": 9.136601010624451, "learning_rate": 2.6877442004920873e-07, "loss": 0.7121, "step": 12085 }, { "epoch": 1.7111205493027537, "grad_norm": 8.66621392852614, "learning_rate": 2.685159274114443e-07, "loss": 0.6943, "step": 12086 }, { "epoch": 1.711262122177391, "grad_norm": 10.982405376065769, "learning_rate": 2.6825755208113454e-07, "loss": 0.6963, "step": 12087 }, { "epoch": 1.7114036950520282, "grad_norm": 9.4775740295111, "learning_rate": 2.6799929407186095e-07, "loss": 0.6766, "step": 12088 }, { "epoch": 1.7115452679266654, "grad_norm": 9.8733982766965, "learning_rate": 2.677411533972002e-07, "loss": 0.6656, "step": 12089 }, { "epoch": 1.7116868408013026, "grad_norm": 9.744738601928832, "learning_rate": 2.674831300707223e-07, "loss": 0.6913, "step": 12090 }, { "epoch": 1.7118284136759399, "grad_norm": 9.52371468269718, "learning_rate": 2.6722522410599196e-07, "loss": 0.6235, "step": 12091 }, { "epoch": 1.7119699865505769, "grad_norm": 8.619210331762654, "learning_rate": 2.669674355165661e-07, "loss": 0.6548, "step": 12092 }, { "epoch": 1.712111559425214, "grad_norm": 9.239083111341799, "learning_rate": 2.667097643159974e-07, "loss": 0.5625, "step": 12093 }, { "epoch": 1.7122531322998513, "grad_norm": 7.934550300455533, "learning_rate": 2.664522105178316e-07, "loss": 0.6692, "step": 12094 }, { "epoch": 1.7123947051744886, "grad_norm": 9.76020865420575, "learning_rate": 2.661947741356072e-07, "loss": 0.6704, "step": 12095 }, { "epoch": 1.7125362780491258, "grad_norm": 9.085814484200554, "learning_rate": 2.6593745518285836e-07, "loss": 0.6411, "step": 12096 }, { "epoch": 1.712677850923763, "grad_norm": 8.350422794808104, "learning_rate": 2.6568025367311125e-07, "loss": 0.6618, "step": 12097 }, { "epoch": 1.7128194237984002, "grad_norm": 8.963772233077119, "learning_rate": 2.654231696198878e-07, "loss": 0.7242, "step": 12098 }, { "epoch": 1.7129609966730375, "grad_norm": 9.390202923776835, "learning_rate": 2.651662030367019e-07, "loss": 0.6209, "step": 12099 }, { "epoch": 1.7131025695476747, "grad_norm": 9.053810320701565, "learning_rate": 2.64909353937062e-07, "loss": 0.736, "step": 12100 }, { "epoch": 1.713244142422312, "grad_norm": 8.926503919732736, "learning_rate": 2.646526223344714e-07, "loss": 0.6229, "step": 12101 }, { "epoch": 1.713385715296949, "grad_norm": 8.165926633768267, "learning_rate": 2.6439600824242515e-07, "loss": 0.598, "step": 12102 }, { "epoch": 1.7135272881715862, "grad_norm": 8.894785572917629, "learning_rate": 2.6413951167441415e-07, "loss": 0.6275, "step": 12103 }, { "epoch": 1.7136688610462234, "grad_norm": 9.055630309791649, "learning_rate": 2.6388313264392174e-07, "loss": 0.7336, "step": 12104 }, { "epoch": 1.7138104339208606, "grad_norm": 7.868881997720068, "learning_rate": 2.6362687116442605e-07, "loss": 0.5792, "step": 12105 }, { "epoch": 1.7139520067954979, "grad_norm": 10.935907738453395, "learning_rate": 2.633707272493977e-07, "loss": 0.7312, "step": 12106 }, { "epoch": 1.714093579670135, "grad_norm": 9.821536428676739, "learning_rate": 2.631147009123028e-07, "loss": 0.6893, "step": 12107 }, { "epoch": 1.7142351525447723, "grad_norm": 8.209165372811299, "learning_rate": 2.628587921666001e-07, "loss": 0.6506, "step": 12108 }, { "epoch": 1.7143767254194096, "grad_norm": 8.076656244921708, "learning_rate": 2.626030010257427e-07, "loss": 0.6003, "step": 12109 }, { "epoch": 1.7145182982940468, "grad_norm": 9.92270965669924, "learning_rate": 2.6234732750317765e-07, "loss": 0.7017, "step": 12110 }, { "epoch": 1.714659871168684, "grad_norm": 8.83836950532706, "learning_rate": 2.620917716123444e-07, "loss": 0.5994, "step": 12111 }, { "epoch": 1.7148014440433212, "grad_norm": 9.823917428281618, "learning_rate": 2.6183633336667845e-07, "loss": 0.6888, "step": 12112 }, { "epoch": 1.7149430169179585, "grad_norm": 10.07679974026333, "learning_rate": 2.615810127796073e-07, "loss": 0.6385, "step": 12113 }, { "epoch": 1.7150845897925957, "grad_norm": 8.825479242641993, "learning_rate": 2.613258098645538e-07, "loss": 0.7114, "step": 12114 }, { "epoch": 1.715226162667233, "grad_norm": 9.667324394562698, "learning_rate": 2.610707246349328e-07, "loss": 0.7349, "step": 12115 }, { "epoch": 1.7153677355418702, "grad_norm": 7.471326311834476, "learning_rate": 2.608157571041542e-07, "loss": 0.6157, "step": 12116 }, { "epoch": 1.7155093084165074, "grad_norm": 9.598245269623995, "learning_rate": 2.6056090728562216e-07, "loss": 0.6808, "step": 12117 }, { "epoch": 1.7156508812911446, "grad_norm": 10.178832316102664, "learning_rate": 2.60306175192733e-07, "loss": 0.6359, "step": 12118 }, { "epoch": 1.7157924541657819, "grad_norm": 9.554965373105755, "learning_rate": 2.600515608388787e-07, "loss": 0.623, "step": 12119 }, { "epoch": 1.715934027040419, "grad_norm": 8.691649838580929, "learning_rate": 2.5979706423744396e-07, "loss": 0.6602, "step": 12120 }, { "epoch": 1.7160755999150563, "grad_norm": 9.203272146398996, "learning_rate": 2.595426854018063e-07, "loss": 0.6084, "step": 12121 }, { "epoch": 1.7162171727896935, "grad_norm": 10.197611091459423, "learning_rate": 2.592884243453397e-07, "loss": 0.7484, "step": 12122 }, { "epoch": 1.7163587456643308, "grad_norm": 10.643024907751165, "learning_rate": 2.590342810814095e-07, "loss": 0.6352, "step": 12123 }, { "epoch": 1.716500318538968, "grad_norm": 9.916764031795084, "learning_rate": 2.587802556233765e-07, "loss": 0.6828, "step": 12124 }, { "epoch": 1.7166418914136052, "grad_norm": 8.246222353627674, "learning_rate": 2.5852634798459397e-07, "loss": 0.6159, "step": 12125 }, { "epoch": 1.7167834642882425, "grad_norm": 10.188887612793303, "learning_rate": 2.5827255817841067e-07, "loss": 0.612, "step": 12126 }, { "epoch": 1.7169250371628797, "grad_norm": 9.312783051355867, "learning_rate": 2.580188862181668e-07, "loss": 0.7238, "step": 12127 }, { "epoch": 1.717066610037517, "grad_norm": 8.749407502958816, "learning_rate": 2.5776533211719883e-07, "loss": 0.6854, "step": 12128 }, { "epoch": 1.7172081829121542, "grad_norm": 10.318441632917908, "learning_rate": 2.5751189588883506e-07, "loss": 0.6785, "step": 12129 }, { "epoch": 1.7173497557867914, "grad_norm": 10.121742760694518, "learning_rate": 2.572585775463993e-07, "loss": 0.6035, "step": 12130 }, { "epoch": 1.7174913286614286, "grad_norm": 10.220525278227662, "learning_rate": 2.57005377103208e-07, "loss": 0.7558, "step": 12131 }, { "epoch": 1.7176329015360658, "grad_norm": 8.141487646981787, "learning_rate": 2.567522945725709e-07, "loss": 0.7162, "step": 12132 }, { "epoch": 1.7177744744107029, "grad_norm": 7.945813248570114, "learning_rate": 2.564993299677937e-07, "loss": 0.6312, "step": 12133 }, { "epoch": 1.71791604728534, "grad_norm": 10.506520381173756, "learning_rate": 2.5624648330217327e-07, "loss": 0.6, "step": 12134 }, { "epoch": 1.7180576201599773, "grad_norm": 7.706373415760784, "learning_rate": 2.559937545890029e-07, "loss": 0.644, "step": 12135 }, { "epoch": 1.7181991930346145, "grad_norm": 8.148808266972232, "learning_rate": 2.557411438415669e-07, "loss": 0.7245, "step": 12136 }, { "epoch": 1.7183407659092518, "grad_norm": 9.024670590124268, "learning_rate": 2.5548865107314606e-07, "loss": 0.6968, "step": 12137 }, { "epoch": 1.718482338783889, "grad_norm": 8.628997318724958, "learning_rate": 2.552362762970129e-07, "loss": 0.6212, "step": 12138 }, { "epoch": 1.7186239116585262, "grad_norm": 8.898165704529976, "learning_rate": 2.54984019526435e-07, "loss": 0.6763, "step": 12139 }, { "epoch": 1.7187654845331635, "grad_norm": 8.876357713461317, "learning_rate": 2.547318807746738e-07, "loss": 0.6032, "step": 12140 }, { "epoch": 1.7189070574078007, "grad_norm": 9.977958802542451, "learning_rate": 2.5447986005498303e-07, "loss": 0.7897, "step": 12141 }, { "epoch": 1.719048630282438, "grad_norm": 10.63148677501709, "learning_rate": 2.542279573806122e-07, "loss": 0.7088, "step": 12142 }, { "epoch": 1.7191902031570752, "grad_norm": 9.504348562713938, "learning_rate": 2.539761727648024e-07, "loss": 0.595, "step": 12143 }, { "epoch": 1.7193317760317122, "grad_norm": 9.567687379125008, "learning_rate": 2.537245062207905e-07, "loss": 0.6159, "step": 12144 }, { "epoch": 1.7194733489063494, "grad_norm": 9.878824386770344, "learning_rate": 2.5347295776180697e-07, "loss": 0.6663, "step": 12145 }, { "epoch": 1.7196149217809866, "grad_norm": 9.915806153305029, "learning_rate": 2.5322152740107436e-07, "loss": 0.7237, "step": 12146 }, { "epoch": 1.7197564946556239, "grad_norm": 9.664722071646342, "learning_rate": 2.5297021515181123e-07, "loss": 0.7821, "step": 12147 }, { "epoch": 1.719898067530261, "grad_norm": 10.279932743550129, "learning_rate": 2.527190210272282e-07, "loss": 0.7356, "step": 12148 }, { "epoch": 1.7200396404048983, "grad_norm": 10.072219608860854, "learning_rate": 2.5246794504053094e-07, "loss": 0.696, "step": 12149 }, { "epoch": 1.7201812132795355, "grad_norm": 10.531644095000077, "learning_rate": 2.522169872049174e-07, "loss": 0.6479, "step": 12150 }, { "epoch": 1.7203227861541728, "grad_norm": 9.478210758735623, "learning_rate": 2.5196614753358136e-07, "loss": 0.684, "step": 12151 }, { "epoch": 1.72046435902881, "grad_norm": 9.351912489534644, "learning_rate": 2.5171542603970897e-07, "loss": 0.7013, "step": 12152 }, { "epoch": 1.7206059319034472, "grad_norm": 8.024769584223762, "learning_rate": 2.514648227364794e-07, "loss": 0.6379, "step": 12153 }, { "epoch": 1.7207475047780845, "grad_norm": 9.076296912806129, "learning_rate": 2.512143376370682e-07, "loss": 0.5896, "step": 12154 }, { "epoch": 1.7208890776527217, "grad_norm": 10.034305855730512, "learning_rate": 2.509639707546421e-07, "loss": 0.6348, "step": 12155 }, { "epoch": 1.721030650527359, "grad_norm": 8.794430652296194, "learning_rate": 2.507137221023634e-07, "loss": 0.6659, "step": 12156 }, { "epoch": 1.7211722234019962, "grad_norm": 9.274900171611518, "learning_rate": 2.5046359169338677e-07, "loss": 0.6405, "step": 12157 }, { "epoch": 1.7213137962766334, "grad_norm": 8.637783141953836, "learning_rate": 2.502135795408622e-07, "loss": 0.6894, "step": 12158 }, { "epoch": 1.7214553691512706, "grad_norm": 8.73605761758437, "learning_rate": 2.499636856579321e-07, "loss": 0.6637, "step": 12159 }, { "epoch": 1.7215969420259079, "grad_norm": 8.4783916697199, "learning_rate": 2.4971391005773337e-07, "loss": 0.669, "step": 12160 }, { "epoch": 1.721738514900545, "grad_norm": 9.607724626442964, "learning_rate": 2.4946425275339634e-07, "loss": 0.6784, "step": 12161 }, { "epoch": 1.7218800877751823, "grad_norm": 7.279250758674688, "learning_rate": 2.492147137580458e-07, "loss": 0.6342, "step": 12162 }, { "epoch": 1.7220216606498195, "grad_norm": 10.341942758945095, "learning_rate": 2.4896529308479966e-07, "loss": 0.6008, "step": 12163 }, { "epoch": 1.7221632335244568, "grad_norm": 9.737333628808786, "learning_rate": 2.48715990746769e-07, "loss": 0.6824, "step": 12164 }, { "epoch": 1.722304806399094, "grad_norm": 9.059584155107594, "learning_rate": 2.484668067570606e-07, "loss": 0.6452, "step": 12165 }, { "epoch": 1.7224463792737312, "grad_norm": 7.661878362085918, "learning_rate": 2.482177411287728e-07, "loss": 0.6439, "step": 12166 }, { "epoch": 1.7225879521483685, "grad_norm": 9.00757873178886, "learning_rate": 2.4796879387499947e-07, "loss": 0.6476, "step": 12167 }, { "epoch": 1.7227295250230057, "grad_norm": 11.161807089356095, "learning_rate": 2.47719965008828e-07, "loss": 0.6713, "step": 12168 }, { "epoch": 1.722871097897643, "grad_norm": 9.235772403124939, "learning_rate": 2.4747125454333805e-07, "loss": 0.6407, "step": 12169 }, { "epoch": 1.7230126707722802, "grad_norm": 8.226301430003957, "learning_rate": 2.4722266249160493e-07, "loss": 0.581, "step": 12170 }, { "epoch": 1.7231542436469174, "grad_norm": 8.831648132032171, "learning_rate": 2.4697418886669654e-07, "loss": 0.6624, "step": 12171 }, { "epoch": 1.7232958165215546, "grad_norm": 9.14581516643507, "learning_rate": 2.467258336816755e-07, "loss": 0.6266, "step": 12172 }, { "epoch": 1.7234373893961918, "grad_norm": 8.80605085593117, "learning_rate": 2.4647759694959724e-07, "loss": 0.7374, "step": 12173 }, { "epoch": 1.723578962270829, "grad_norm": 10.010578663626996, "learning_rate": 2.462294786835109e-07, "loss": 0.6845, "step": 12174 }, { "epoch": 1.723720535145466, "grad_norm": 9.292458082858362, "learning_rate": 2.4598147889646097e-07, "loss": 0.678, "step": 12175 }, { "epoch": 1.7238621080201033, "grad_norm": 7.865323858365443, "learning_rate": 2.4573359760148354e-07, "loss": 0.6941, "step": 12176 }, { "epoch": 1.7240036808947405, "grad_norm": 10.308712535432521, "learning_rate": 2.4548583481161044e-07, "loss": 0.6793, "step": 12177 }, { "epoch": 1.7241452537693778, "grad_norm": 8.991026325304816, "learning_rate": 2.4523819053986544e-07, "loss": 0.7563, "step": 12178 }, { "epoch": 1.724286826644015, "grad_norm": 9.218152870623888, "learning_rate": 2.4499066479926807e-07, "loss": 0.7047, "step": 12179 }, { "epoch": 1.7244283995186522, "grad_norm": 8.240463293439717, "learning_rate": 2.447432576028294e-07, "loss": 0.5781, "step": 12180 }, { "epoch": 1.7245699723932895, "grad_norm": 8.833707357681277, "learning_rate": 2.4449596896355677e-07, "loss": 0.6152, "step": 12181 }, { "epoch": 1.7247115452679267, "grad_norm": 9.075874088952293, "learning_rate": 2.442487988944489e-07, "loss": 0.6543, "step": 12182 }, { "epoch": 1.724853118142564, "grad_norm": 9.005043312150748, "learning_rate": 2.440017474084999e-07, "loss": 0.6762, "step": 12183 }, { "epoch": 1.7249946910172012, "grad_norm": 10.786081840149478, "learning_rate": 2.4375481451869713e-07, "loss": 0.7561, "step": 12184 }, { "epoch": 1.7251362638918382, "grad_norm": 9.457043754509494, "learning_rate": 2.4350800023802106e-07, "loss": 0.7037, "step": 12185 }, { "epoch": 1.7252778367664754, "grad_norm": 9.008588719401725, "learning_rate": 2.4326130457944713e-07, "loss": 0.6474, "step": 12186 }, { "epoch": 1.7254194096411126, "grad_norm": 8.475079975482256, "learning_rate": 2.430147275559433e-07, "loss": 0.7046, "step": 12187 }, { "epoch": 1.7255609825157499, "grad_norm": 9.235297400658697, "learning_rate": 2.4276826918047283e-07, "loss": 0.6642, "step": 12188 }, { "epoch": 1.725702555390387, "grad_norm": 8.83313256761333, "learning_rate": 2.425219294659908e-07, "loss": 0.7589, "step": 12189 }, { "epoch": 1.7258441282650243, "grad_norm": 10.315591614584532, "learning_rate": 2.422757084254479e-07, "loss": 0.7451, "step": 12190 }, { "epoch": 1.7259857011396615, "grad_norm": 8.269313054366323, "learning_rate": 2.4202960607178806e-07, "loss": 0.7397, "step": 12191 }, { "epoch": 1.7261272740142988, "grad_norm": 10.437753274551481, "learning_rate": 2.417836224179476e-07, "loss": 0.6291, "step": 12192 }, { "epoch": 1.726268846888936, "grad_norm": 9.184367158256265, "learning_rate": 2.4153775747685906e-07, "loss": 0.6858, "step": 12193 }, { "epoch": 1.7264104197635732, "grad_norm": 10.730230205871452, "learning_rate": 2.412920112614464e-07, "loss": 0.6106, "step": 12194 }, { "epoch": 1.7265519926382105, "grad_norm": 8.986345741737242, "learning_rate": 2.41046383784628e-07, "loss": 0.7046, "step": 12195 }, { "epoch": 1.7266935655128477, "grad_norm": 8.299196999553773, "learning_rate": 2.4080087505931744e-07, "loss": 0.6701, "step": 12196 }, { "epoch": 1.726835138387485, "grad_norm": 10.325586142539885, "learning_rate": 2.4055548509841984e-07, "loss": 0.6164, "step": 12197 }, { "epoch": 1.7269767112621222, "grad_norm": 9.016707277705978, "learning_rate": 2.403102139148361e-07, "loss": 0.751, "step": 12198 }, { "epoch": 1.7271182841367594, "grad_norm": 10.149540677506321, "learning_rate": 2.400650615214592e-07, "loss": 0.6823, "step": 12199 }, { "epoch": 1.7272598570113966, "grad_norm": 10.474779994356721, "learning_rate": 2.3982002793117744e-07, "loss": 0.6414, "step": 12200 }, { "epoch": 1.7274014298860338, "grad_norm": 9.498362751469124, "learning_rate": 2.3957511315687075e-07, "loss": 0.6552, "step": 12201 }, { "epoch": 1.727543002760671, "grad_norm": 10.908563843912669, "learning_rate": 2.393303172114159e-07, "loss": 0.6666, "step": 12202 }, { "epoch": 1.7276845756353083, "grad_norm": 10.255957756086406, "learning_rate": 2.3908564010767966e-07, "loss": 0.6354, "step": 12203 }, { "epoch": 1.7278261485099455, "grad_norm": 11.63110060229483, "learning_rate": 2.388410818585263e-07, "loss": 0.6901, "step": 12204 }, { "epoch": 1.7279677213845828, "grad_norm": 9.853274626946284, "learning_rate": 2.38596642476811e-07, "loss": 0.7818, "step": 12205 }, { "epoch": 1.72810929425922, "grad_norm": 8.577658051669301, "learning_rate": 2.383523219753839e-07, "loss": 0.5998, "step": 12206 }, { "epoch": 1.7282508671338572, "grad_norm": 10.020923088026013, "learning_rate": 2.381081203670893e-07, "loss": 0.7484, "step": 12207 }, { "epoch": 1.7283924400084945, "grad_norm": 10.182338038710476, "learning_rate": 2.3786403766476368e-07, "loss": 0.6267, "step": 12208 }, { "epoch": 1.7285340128831317, "grad_norm": 9.584516325096178, "learning_rate": 2.3762007388123927e-07, "loss": 0.7085, "step": 12209 }, { "epoch": 1.728675585757769, "grad_norm": 8.813869505177443, "learning_rate": 2.3737622902934022e-07, "loss": 0.5746, "step": 12210 }, { "epoch": 1.7288171586324061, "grad_norm": 9.642583861338682, "learning_rate": 2.371325031218863e-07, "loss": 0.5855, "step": 12211 }, { "epoch": 1.7289587315070434, "grad_norm": 8.457773224524317, "learning_rate": 2.368888961716889e-07, "loss": 0.5657, "step": 12212 }, { "epoch": 1.7291003043816806, "grad_norm": 10.83504891138519, "learning_rate": 2.366454081915548e-07, "loss": 0.6611, "step": 12213 }, { "epoch": 1.7292418772563178, "grad_norm": 10.73641108896679, "learning_rate": 2.3640203919428451e-07, "loss": 0.6941, "step": 12214 }, { "epoch": 1.729383450130955, "grad_norm": 9.217914504644822, "learning_rate": 2.3615878919267116e-07, "loss": 0.5976, "step": 12215 }, { "epoch": 1.729525023005592, "grad_norm": 12.169509207264817, "learning_rate": 2.359156581995023e-07, "loss": 0.7428, "step": 12216 }, { "epoch": 1.7296665958802293, "grad_norm": 9.235324662330628, "learning_rate": 2.3567264622755853e-07, "loss": 0.671, "step": 12217 }, { "epoch": 1.7298081687548665, "grad_norm": 8.275774372666595, "learning_rate": 2.3542975328961548e-07, "loss": 0.6591, "step": 12218 }, { "epoch": 1.7299497416295038, "grad_norm": 9.913960918510321, "learning_rate": 2.351869793984421e-07, "loss": 0.7565, "step": 12219 }, { "epoch": 1.730091314504141, "grad_norm": 8.55856566988205, "learning_rate": 2.3494432456680038e-07, "loss": 0.5871, "step": 12220 }, { "epoch": 1.7302328873787782, "grad_norm": 8.996271844741697, "learning_rate": 2.3470178880744681e-07, "loss": 0.6213, "step": 12221 }, { "epoch": 1.7303744602534155, "grad_norm": 8.79415694368128, "learning_rate": 2.3445937213313062e-07, "loss": 0.6616, "step": 12222 }, { "epoch": 1.7305160331280527, "grad_norm": 8.579447437624461, "learning_rate": 2.3421707455659664e-07, "loss": 0.7012, "step": 12223 }, { "epoch": 1.73065760600269, "grad_norm": 9.028809531178126, "learning_rate": 2.3397489609058104e-07, "loss": 0.6968, "step": 12224 }, { "epoch": 1.7307991788773271, "grad_norm": 10.811200190568432, "learning_rate": 2.3373283674781588e-07, "loss": 0.77, "step": 12225 }, { "epoch": 1.7309407517519642, "grad_norm": 8.166772595192182, "learning_rate": 2.3349089654102597e-07, "loss": 0.5837, "step": 12226 }, { "epoch": 1.7310823246266014, "grad_norm": 11.080131661083866, "learning_rate": 2.332490754829289e-07, "loss": 0.7084, "step": 12227 }, { "epoch": 1.7312238975012386, "grad_norm": 8.378417271571195, "learning_rate": 2.3300737358623843e-07, "loss": 0.6821, "step": 12228 }, { "epoch": 1.7313654703758758, "grad_norm": 9.18084558851824, "learning_rate": 2.3276579086365937e-07, "loss": 0.7055, "step": 12229 }, { "epoch": 1.731507043250513, "grad_norm": 10.063254760489357, "learning_rate": 2.3252432732789264e-07, "loss": 0.7297, "step": 12230 }, { "epoch": 1.7316486161251503, "grad_norm": 8.477263165886228, "learning_rate": 2.3228298299163092e-07, "loss": 0.6468, "step": 12231 }, { "epoch": 1.7317901889997875, "grad_norm": 8.994359262068436, "learning_rate": 2.3204175786756238e-07, "loss": 0.6818, "step": 12232 }, { "epoch": 1.7319317618744248, "grad_norm": 8.988231169732279, "learning_rate": 2.3180065196836716e-07, "loss": 0.6921, "step": 12233 }, { "epoch": 1.732073334749062, "grad_norm": 10.914581876287322, "learning_rate": 2.3155966530672092e-07, "loss": 0.6822, "step": 12234 }, { "epoch": 1.7322149076236992, "grad_norm": 9.328151581437211, "learning_rate": 2.3131879789529105e-07, "loss": 0.6812, "step": 12235 }, { "epoch": 1.7323564804983365, "grad_norm": 9.254251276414085, "learning_rate": 2.3107804974674074e-07, "loss": 0.6338, "step": 12236 }, { "epoch": 1.7324980533729737, "grad_norm": 9.016136115277916, "learning_rate": 2.3083742087372574e-07, "loss": 0.7135, "step": 12237 }, { "epoch": 1.732639626247611, "grad_norm": 8.55263513480047, "learning_rate": 2.3059691128889504e-07, "loss": 0.6581, "step": 12238 }, { "epoch": 1.7327811991222482, "grad_norm": 9.154209573604946, "learning_rate": 2.303565210048933e-07, "loss": 0.6681, "step": 12239 }, { "epoch": 1.7329227719968854, "grad_norm": 10.31357675190719, "learning_rate": 2.301162500343562e-07, "loss": 0.731, "step": 12240 }, { "epoch": 1.7330643448715226, "grad_norm": 9.730213531001604, "learning_rate": 2.2987609838991536e-07, "loss": 0.6724, "step": 12241 }, { "epoch": 1.7332059177461598, "grad_norm": 11.106839562665915, "learning_rate": 2.2963606608419593e-07, "loss": 0.7441, "step": 12242 }, { "epoch": 1.733347490620797, "grad_norm": 8.079870157237213, "learning_rate": 2.29396153129815e-07, "loss": 0.724, "step": 12243 }, { "epoch": 1.7334890634954343, "grad_norm": 9.715660462754915, "learning_rate": 2.2915635953938587e-07, "loss": 0.6389, "step": 12244 }, { "epoch": 1.7336306363700715, "grad_norm": 9.200867769531435, "learning_rate": 2.2891668532551315e-07, "loss": 0.6844, "step": 12245 }, { "epoch": 1.7337722092447088, "grad_norm": 8.32645249167924, "learning_rate": 2.2867713050079732e-07, "loss": 0.6334, "step": 12246 }, { "epoch": 1.733913782119346, "grad_norm": 8.705215504370875, "learning_rate": 2.2843769507783137e-07, "loss": 0.6805, "step": 12247 }, { "epoch": 1.7340553549939832, "grad_norm": 8.783236350212341, "learning_rate": 2.2819837906920134e-07, "loss": 0.6344, "step": 12248 }, { "epoch": 1.7341969278686205, "grad_norm": 7.721422975865058, "learning_rate": 2.2795918248748939e-07, "loss": 0.683, "step": 12249 }, { "epoch": 1.7343385007432577, "grad_norm": 10.875252907103219, "learning_rate": 2.2772010534526822e-07, "loss": 0.7242, "step": 12250 }, { "epoch": 1.734480073617895, "grad_norm": 9.045378829757665, "learning_rate": 2.2748114765510754e-07, "loss": 0.6962, "step": 12251 }, { "epoch": 1.7346216464925321, "grad_norm": 9.77554730300264, "learning_rate": 2.272423094295681e-07, "loss": 0.7401, "step": 12252 }, { "epoch": 1.7347632193671694, "grad_norm": 10.525681831945318, "learning_rate": 2.2700359068120624e-07, "loss": 0.6705, "step": 12253 }, { "epoch": 1.7349047922418066, "grad_norm": 10.157158538088844, "learning_rate": 2.2676499142257002e-07, "loss": 0.7034, "step": 12254 }, { "epoch": 1.7350463651164438, "grad_norm": 8.768339719302935, "learning_rate": 2.265265116662041e-07, "loss": 0.6196, "step": 12255 }, { "epoch": 1.735187937991081, "grad_norm": 7.892547916995519, "learning_rate": 2.2628815142464344e-07, "loss": 0.6992, "step": 12256 }, { "epoch": 1.7353295108657183, "grad_norm": 9.599749450592977, "learning_rate": 2.2604991071041999e-07, "loss": 0.6911, "step": 12257 }, { "epoch": 1.7354710837403553, "grad_norm": 8.885101587061476, "learning_rate": 2.258117895360573e-07, "loss": 0.6804, "step": 12258 }, { "epoch": 1.7356126566149925, "grad_norm": 8.010279726628037, "learning_rate": 2.2557378791407264e-07, "loss": 0.637, "step": 12259 }, { "epoch": 1.7357542294896298, "grad_norm": 8.779357923522449, "learning_rate": 2.2533590585697817e-07, "loss": 0.6645, "step": 12260 }, { "epoch": 1.735895802364267, "grad_norm": 8.459895047738476, "learning_rate": 2.2509814337727891e-07, "loss": 0.6237, "step": 12261 }, { "epoch": 1.7360373752389042, "grad_norm": 9.297601648992059, "learning_rate": 2.2486050048747459e-07, "loss": 0.7069, "step": 12262 }, { "epoch": 1.7361789481135415, "grad_norm": 9.090648213891596, "learning_rate": 2.246229772000566e-07, "loss": 0.6875, "step": 12263 }, { "epoch": 1.7363205209881787, "grad_norm": 9.72206025003811, "learning_rate": 2.2438557352751216e-07, "loss": 0.7497, "step": 12264 }, { "epoch": 1.736462093862816, "grad_norm": 9.9586562951574, "learning_rate": 2.2414828948232186e-07, "loss": 0.7409, "step": 12265 }, { "epoch": 1.7366036667374531, "grad_norm": 9.16040528046351, "learning_rate": 2.2391112507695877e-07, "loss": 0.6951, "step": 12266 }, { "epoch": 1.7367452396120904, "grad_norm": 10.384806950206942, "learning_rate": 2.23674080323891e-07, "loss": 0.6888, "step": 12267 }, { "epoch": 1.7368868124867274, "grad_norm": 10.516314955773407, "learning_rate": 2.2343715523557934e-07, "loss": 0.7286, "step": 12268 }, { "epoch": 1.7370283853613646, "grad_norm": 8.80811519972247, "learning_rate": 2.232003498244792e-07, "loss": 0.6535, "step": 12269 }, { "epoch": 1.7371699582360018, "grad_norm": 9.70817869224232, "learning_rate": 2.229636641030386e-07, "loss": 0.7193, "step": 12270 }, { "epoch": 1.737311531110639, "grad_norm": 9.416264168118323, "learning_rate": 2.2272709808370013e-07, "loss": 0.6075, "step": 12271 }, { "epoch": 1.7374531039852763, "grad_norm": 8.978147892934397, "learning_rate": 2.2249065177890077e-07, "loss": 0.6427, "step": 12272 }, { "epoch": 1.7375946768599135, "grad_norm": 8.519314084090274, "learning_rate": 2.222543252010692e-07, "loss": 0.605, "step": 12273 }, { "epoch": 1.7377362497345508, "grad_norm": 10.034471986694557, "learning_rate": 2.2201811836262966e-07, "loss": 0.6485, "step": 12274 }, { "epoch": 1.737877822609188, "grad_norm": 8.889112829990228, "learning_rate": 2.2178203127599883e-07, "loss": 0.6662, "step": 12275 }, { "epoch": 1.7380193954838252, "grad_norm": 11.552070491543997, "learning_rate": 2.2154606395358824e-07, "loss": 0.6673, "step": 12276 }, { "epoch": 1.7381609683584625, "grad_norm": 8.71049562710551, "learning_rate": 2.2131021640780182e-07, "loss": 0.606, "step": 12277 }, { "epoch": 1.7383025412330997, "grad_norm": 9.604960002666957, "learning_rate": 2.2107448865103853e-07, "loss": 0.6566, "step": 12278 }, { "epoch": 1.738444114107737, "grad_norm": 10.047468817501843, "learning_rate": 2.2083888069569042e-07, "loss": 0.7288, "step": 12279 }, { "epoch": 1.7385856869823741, "grad_norm": 8.71759222403585, "learning_rate": 2.2060339255414232e-07, "loss": 0.6539, "step": 12280 }, { "epoch": 1.7387272598570114, "grad_norm": 8.874144177073545, "learning_rate": 2.2036802423877458e-07, "loss": 0.6065, "step": 12281 }, { "epoch": 1.7388688327316486, "grad_norm": 7.165370676124825, "learning_rate": 2.201327757619598e-07, "loss": 0.5969, "step": 12282 }, { "epoch": 1.7390104056062858, "grad_norm": 8.044343124135626, "learning_rate": 2.198976471360656e-07, "loss": 0.7011, "step": 12283 }, { "epoch": 1.739151978480923, "grad_norm": 8.681913267302146, "learning_rate": 2.1966263837345125e-07, "loss": 0.662, "step": 12284 }, { "epoch": 1.7392935513555603, "grad_norm": 7.885077673723595, "learning_rate": 2.1942774948647245e-07, "loss": 0.6417, "step": 12285 }, { "epoch": 1.7394351242301975, "grad_norm": 9.551241761009198, "learning_rate": 2.1919298048747567e-07, "loss": 0.6316, "step": 12286 }, { "epoch": 1.7395766971048348, "grad_norm": 10.45618087280553, "learning_rate": 2.189583313888033e-07, "loss": 0.6591, "step": 12287 }, { "epoch": 1.739718269979472, "grad_norm": 10.834709510447489, "learning_rate": 2.1872380220279127e-07, "loss": 0.676, "step": 12288 }, { "epoch": 1.7398598428541092, "grad_norm": 8.699012209183715, "learning_rate": 2.184893929417678e-07, "loss": 0.6866, "step": 12289 }, { "epoch": 1.7400014157287464, "grad_norm": 9.140652126695858, "learning_rate": 2.182551036180558e-07, "loss": 0.6917, "step": 12290 }, { "epoch": 1.7401429886033837, "grad_norm": 9.368131842906259, "learning_rate": 2.1802093424397126e-07, "loss": 0.7272, "step": 12291 }, { "epoch": 1.740284561478021, "grad_norm": 8.98496389034702, "learning_rate": 2.1778688483182486e-07, "loss": 0.6672, "step": 12292 }, { "epoch": 1.7404261343526581, "grad_norm": 8.920715363560046, "learning_rate": 2.175529553939204e-07, "loss": 0.7326, "step": 12293 }, { "epoch": 1.7405677072272954, "grad_norm": 8.559287255106566, "learning_rate": 2.1731914594255498e-07, "loss": 0.6752, "step": 12294 }, { "epoch": 1.7407092801019326, "grad_norm": 9.099608326961198, "learning_rate": 2.1708545649002015e-07, "loss": 0.7506, "step": 12295 }, { "epoch": 1.7408508529765698, "grad_norm": 9.290593334750948, "learning_rate": 2.1685188704860056e-07, "loss": 0.6386, "step": 12296 }, { "epoch": 1.740992425851207, "grad_norm": 12.094010540031078, "learning_rate": 2.1661843763057522e-07, "loss": 0.6172, "step": 12297 }, { "epoch": 1.7411339987258443, "grad_norm": 8.709250029237726, "learning_rate": 2.1638510824821547e-07, "loss": 0.6142, "step": 12298 }, { "epoch": 1.7412755716004813, "grad_norm": 10.30652126050124, "learning_rate": 2.161518989137884e-07, "loss": 0.6542, "step": 12299 }, { "epoch": 1.7414171444751185, "grad_norm": 10.577275342076964, "learning_rate": 2.1591880963955314e-07, "loss": 0.6838, "step": 12300 }, { "epoch": 1.7415587173497558, "grad_norm": 11.766298718053223, "learning_rate": 2.1568584043776237e-07, "loss": 0.6977, "step": 12301 }, { "epoch": 1.741700290224393, "grad_norm": 8.578576359226176, "learning_rate": 2.1545299132066432e-07, "loss": 0.7937, "step": 12302 }, { "epoch": 1.7418418630990302, "grad_norm": 9.509395169861332, "learning_rate": 2.152202623004987e-07, "loss": 0.7133, "step": 12303 }, { "epoch": 1.7419834359736674, "grad_norm": 9.045747413268206, "learning_rate": 2.1498765338950067e-07, "loss": 0.7073, "step": 12304 }, { "epoch": 1.7421250088483047, "grad_norm": 8.393597806117432, "learning_rate": 2.1475516459989743e-07, "loss": 0.6817, "step": 12305 }, { "epoch": 1.742266581722942, "grad_norm": 10.10750811406446, "learning_rate": 2.1452279594391167e-07, "loss": 0.7647, "step": 12306 }, { "epoch": 1.7424081545975791, "grad_norm": 8.524992607762329, "learning_rate": 2.142905474337578e-07, "loss": 0.5428, "step": 12307 }, { "epoch": 1.7425497274722164, "grad_norm": 8.916163445173865, "learning_rate": 2.1405841908164636e-07, "loss": 0.6862, "step": 12308 }, { "epoch": 1.7426913003468534, "grad_norm": 9.80956754176479, "learning_rate": 2.1382641089977867e-07, "loss": 0.6935, "step": 12309 }, { "epoch": 1.7428328732214906, "grad_norm": 9.674495508412061, "learning_rate": 2.1359452290035194e-07, "loss": 0.6603, "step": 12310 }, { "epoch": 1.7429744460961278, "grad_norm": 10.622427236191529, "learning_rate": 2.1336275509555722e-07, "loss": 0.7143, "step": 12311 }, { "epoch": 1.743116018970765, "grad_norm": 8.59591947047951, "learning_rate": 2.1313110749757672e-07, "loss": 0.5637, "step": 12312 }, { "epoch": 1.7432575918454023, "grad_norm": 10.075832466964888, "learning_rate": 2.1289958011858903e-07, "loss": 0.6731, "step": 12313 }, { "epoch": 1.7433991647200395, "grad_norm": 10.684722360976552, "learning_rate": 2.1266817297076469e-07, "loss": 0.6815, "step": 12314 }, { "epoch": 1.7435407375946768, "grad_norm": 8.897117027793328, "learning_rate": 2.12436886066269e-07, "loss": 0.6206, "step": 12315 }, { "epoch": 1.743682310469314, "grad_norm": 9.964940219054801, "learning_rate": 2.1220571941726082e-07, "loss": 0.614, "step": 12316 }, { "epoch": 1.7438238833439512, "grad_norm": 9.398178540956504, "learning_rate": 2.119746730358918e-07, "loss": 0.809, "step": 12317 }, { "epoch": 1.7439654562185885, "grad_norm": 9.016932348073071, "learning_rate": 2.1174374693430865e-07, "loss": 0.6399, "step": 12318 }, { "epoch": 1.7441070290932257, "grad_norm": 9.718577687002393, "learning_rate": 2.1151294112464997e-07, "loss": 0.6856, "step": 12319 }, { "epoch": 1.744248601967863, "grad_norm": 9.95380834033257, "learning_rate": 2.1128225561905024e-07, "loss": 0.6637, "step": 12320 }, { "epoch": 1.7443901748425001, "grad_norm": 9.463299986308126, "learning_rate": 2.1105169042963585e-07, "loss": 0.6493, "step": 12321 }, { "epoch": 1.7445317477171374, "grad_norm": 11.061474423903174, "learning_rate": 2.1082124556852684e-07, "loss": 0.6035, "step": 12322 }, { "epoch": 1.7446733205917746, "grad_norm": 8.454945704885313, "learning_rate": 2.1059092104783824e-07, "loss": 0.7793, "step": 12323 }, { "epoch": 1.7448148934664118, "grad_norm": 7.74247203808306, "learning_rate": 2.1036071687967785e-07, "loss": 0.6776, "step": 12324 }, { "epoch": 1.744956466341049, "grad_norm": 8.94955975179654, "learning_rate": 2.101306330761474e-07, "loss": 0.7961, "step": 12325 }, { "epoch": 1.7450980392156863, "grad_norm": 8.631175166419574, "learning_rate": 2.0990066964934193e-07, "loss": 0.7341, "step": 12326 }, { "epoch": 1.7452396120903235, "grad_norm": 9.374708247413414, "learning_rate": 2.096708266113512e-07, "loss": 0.6895, "step": 12327 }, { "epoch": 1.7453811849649608, "grad_norm": 9.188117389306036, "learning_rate": 2.0944110397425693e-07, "loss": 0.691, "step": 12328 }, { "epoch": 1.745522757839598, "grad_norm": 7.897834490690089, "learning_rate": 2.0921150175013616e-07, "loss": 0.7774, "step": 12329 }, { "epoch": 1.7456643307142352, "grad_norm": 8.8850861309387, "learning_rate": 2.089820199510584e-07, "loss": 0.7375, "step": 12330 }, { "epoch": 1.7458059035888724, "grad_norm": 8.879246515147432, "learning_rate": 2.0875265858908782e-07, "loss": 0.6047, "step": 12331 }, { "epoch": 1.7459474764635097, "grad_norm": 8.965245429454319, "learning_rate": 2.0852341767628182e-07, "loss": 0.6891, "step": 12332 }, { "epoch": 1.746089049338147, "grad_norm": 10.333825089433434, "learning_rate": 2.082942972246907e-07, "loss": 0.6795, "step": 12333 }, { "epoch": 1.7462306222127841, "grad_norm": 8.014816392115069, "learning_rate": 2.0806529724635982e-07, "loss": 0.6438, "step": 12334 }, { "epoch": 1.7463721950874214, "grad_norm": 8.491789049558816, "learning_rate": 2.0783641775332708e-07, "loss": 0.6217, "step": 12335 }, { "epoch": 1.7465137679620586, "grad_norm": 9.404270711606909, "learning_rate": 2.0760765875762506e-07, "loss": 0.6462, "step": 12336 }, { "epoch": 1.7466553408366958, "grad_norm": 8.874733934309532, "learning_rate": 2.0737902027127888e-07, "loss": 0.6443, "step": 12337 }, { "epoch": 1.746796913711333, "grad_norm": 9.676723072475864, "learning_rate": 2.0715050230630807e-07, "loss": 0.5377, "step": 12338 }, { "epoch": 1.7469384865859703, "grad_norm": 9.577343160509608, "learning_rate": 2.069221048747261e-07, "loss": 0.759, "step": 12339 }, { "epoch": 1.7470800594606073, "grad_norm": 9.904496961410699, "learning_rate": 2.0669382798853887e-07, "loss": 0.7145, "step": 12340 }, { "epoch": 1.7472216323352445, "grad_norm": 8.199787267391072, "learning_rate": 2.064656716597474e-07, "loss": 0.7585, "step": 12341 }, { "epoch": 1.7473632052098818, "grad_norm": 9.284416300843219, "learning_rate": 2.0623763590034567e-07, "loss": 0.6068, "step": 12342 }, { "epoch": 1.747504778084519, "grad_norm": 8.38411979045174, "learning_rate": 2.0600972072232105e-07, "loss": 0.6218, "step": 12343 }, { "epoch": 1.7476463509591562, "grad_norm": 8.616005450715418, "learning_rate": 2.0578192613765453e-07, "loss": 0.5899, "step": 12344 }, { "epoch": 1.7477879238337934, "grad_norm": 10.59036101650746, "learning_rate": 2.0555425215832176e-07, "loss": 0.6114, "step": 12345 }, { "epoch": 1.7479294967084307, "grad_norm": 7.707251505500711, "learning_rate": 2.0532669879629124e-07, "loss": 0.6997, "step": 12346 }, { "epoch": 1.748071069583068, "grad_norm": 8.636415308017476, "learning_rate": 2.050992660635248e-07, "loss": 0.6177, "step": 12347 }, { "epoch": 1.7482126424577051, "grad_norm": 8.456328456652857, "learning_rate": 2.0487195397197928e-07, "loss": 0.6906, "step": 12348 }, { "epoch": 1.7483542153323424, "grad_norm": 9.30107573163583, "learning_rate": 2.0464476253360344e-07, "loss": 0.6638, "step": 12349 }, { "epoch": 1.7484957882069796, "grad_norm": 7.891408870021716, "learning_rate": 2.044176917603413e-07, "loss": 0.6593, "step": 12350 }, { "epoch": 1.7486373610816166, "grad_norm": 9.58735806634877, "learning_rate": 2.0419074166412893e-07, "loss": 0.7251, "step": 12351 }, { "epoch": 1.7487789339562538, "grad_norm": 9.262960890450083, "learning_rate": 2.0396391225689817e-07, "loss": 0.678, "step": 12352 }, { "epoch": 1.748920506830891, "grad_norm": 8.821199924910124, "learning_rate": 2.037372035505722e-07, "loss": 0.6681, "step": 12353 }, { "epoch": 1.7490620797055283, "grad_norm": 9.77718768602974, "learning_rate": 2.0351061555706901e-07, "loss": 0.6901, "step": 12354 }, { "epoch": 1.7492036525801655, "grad_norm": 7.213277220211021, "learning_rate": 2.0328414828830078e-07, "loss": 0.6712, "step": 12355 }, { "epoch": 1.7493452254548028, "grad_norm": 9.397000958505865, "learning_rate": 2.0305780175617213e-07, "loss": 0.7425, "step": 12356 }, { "epoch": 1.74948679832944, "grad_norm": 8.92398479254849, "learning_rate": 2.0283157597258241e-07, "loss": 0.6972, "step": 12357 }, { "epoch": 1.7496283712040772, "grad_norm": 9.092590467848842, "learning_rate": 2.026054709494235e-07, "loss": 0.6172, "step": 12358 }, { "epoch": 1.7497699440787144, "grad_norm": 9.15191276860847, "learning_rate": 2.0237948669858233e-07, "loss": 0.6892, "step": 12359 }, { "epoch": 1.7499115169533517, "grad_norm": 9.584163685083873, "learning_rate": 2.0215362323193822e-07, "loss": 0.7451, "step": 12360 }, { "epoch": 1.750053089827989, "grad_norm": 7.798413814752526, "learning_rate": 2.0192788056136446e-07, "loss": 0.7323, "step": 12361 }, { "epoch": 1.7501946627026261, "grad_norm": 9.548618987725602, "learning_rate": 2.0170225869872912e-07, "loss": 0.7241, "step": 12362 }, { "epoch": 1.7503362355772634, "grad_norm": 9.08743832657782, "learning_rate": 2.0147675765589236e-07, "loss": 0.7016, "step": 12363 }, { "epoch": 1.7504778084519006, "grad_norm": 9.438981299958012, "learning_rate": 2.0125137744470863e-07, "loss": 0.6036, "step": 12364 }, { "epoch": 1.7506193813265378, "grad_norm": 8.061112158743349, "learning_rate": 2.0102611807702539e-07, "loss": 0.6738, "step": 12365 }, { "epoch": 1.750760954201175, "grad_norm": 9.543038304341286, "learning_rate": 2.0080097956468537e-07, "loss": 0.761, "step": 12366 }, { "epoch": 1.7509025270758123, "grad_norm": 9.131946166940454, "learning_rate": 2.0057596191952327e-07, "loss": 0.683, "step": 12367 }, { "epoch": 1.7510440999504495, "grad_norm": 9.311035860415602, "learning_rate": 2.0035106515336798e-07, "loss": 0.712, "step": 12368 }, { "epoch": 1.7511856728250867, "grad_norm": 10.57332440171059, "learning_rate": 2.001262892780434e-07, "loss": 0.7003, "step": 12369 }, { "epoch": 1.751327245699724, "grad_norm": 8.975257772075146, "learning_rate": 1.999016343053642e-07, "loss": 0.6909, "step": 12370 }, { "epoch": 1.7514688185743612, "grad_norm": 9.09724490200901, "learning_rate": 1.996771002471415e-07, "loss": 0.6632, "step": 12371 }, { "epoch": 1.7516103914489984, "grad_norm": 7.8317782163770175, "learning_rate": 1.9945268711517807e-07, "loss": 0.7289, "step": 12372 }, { "epoch": 1.7517519643236357, "grad_norm": 9.430009509311384, "learning_rate": 1.9922839492127199e-07, "loss": 0.6479, "step": 12373 }, { "epoch": 1.751893537198273, "grad_norm": 9.959495529819575, "learning_rate": 1.9900422367721355e-07, "loss": 0.7948, "step": 12374 }, { "epoch": 1.7520351100729101, "grad_norm": 10.04798363342896, "learning_rate": 1.9878017339478695e-07, "loss": 0.6463, "step": 12375 }, { "epoch": 1.7521766829475474, "grad_norm": 10.503442835630953, "learning_rate": 1.9855624408577136e-07, "loss": 0.6632, "step": 12376 }, { "epoch": 1.7523182558221846, "grad_norm": 9.358122421791162, "learning_rate": 1.9833243576193734e-07, "loss": 0.6741, "step": 12377 }, { "epoch": 1.7524598286968218, "grad_norm": 9.895527039187998, "learning_rate": 1.9810874843505164e-07, "loss": 0.7283, "step": 12378 }, { "epoch": 1.752601401571459, "grad_norm": 8.82502235647509, "learning_rate": 1.9788518211687202e-07, "loss": 0.6477, "step": 12379 }, { "epoch": 1.7527429744460963, "grad_norm": 8.692627196545645, "learning_rate": 1.9766173681915247e-07, "loss": 0.665, "step": 12380 }, { "epoch": 1.7528845473207335, "grad_norm": 9.58837620835709, "learning_rate": 1.9743841255363827e-07, "loss": 0.6712, "step": 12381 }, { "epoch": 1.7530261201953705, "grad_norm": 8.892645688368967, "learning_rate": 1.9721520933207006e-07, "loss": 0.7137, "step": 12382 }, { "epoch": 1.7531676930700077, "grad_norm": 8.48844348455754, "learning_rate": 1.9699212716618123e-07, "loss": 0.6253, "step": 12383 }, { "epoch": 1.753309265944645, "grad_norm": 8.557896622680843, "learning_rate": 1.9676916606769874e-07, "loss": 0.6112, "step": 12384 }, { "epoch": 1.7534508388192822, "grad_norm": 9.640238007579141, "learning_rate": 1.9654632604834494e-07, "loss": 0.6765, "step": 12385 }, { "epoch": 1.7535924116939194, "grad_norm": 8.730563128684501, "learning_rate": 1.9632360711983212e-07, "loss": 0.6348, "step": 12386 }, { "epoch": 1.7537339845685567, "grad_norm": 8.057860466365135, "learning_rate": 1.9610100929387006e-07, "loss": 0.6969, "step": 12387 }, { "epoch": 1.753875557443194, "grad_norm": 10.563285482420016, "learning_rate": 1.9587853258215995e-07, "loss": 0.6941, "step": 12388 }, { "epoch": 1.7540171303178311, "grad_norm": 8.629114468826641, "learning_rate": 1.9565617699639717e-07, "loss": 0.722, "step": 12389 }, { "epoch": 1.7541587031924684, "grad_norm": 9.991186454205291, "learning_rate": 1.9543394254827125e-07, "loss": 0.7731, "step": 12390 }, { "epoch": 1.7543002760671056, "grad_norm": 8.868692655156176, "learning_rate": 1.9521182924946426e-07, "loss": 0.7073, "step": 12391 }, { "epoch": 1.7544418489417426, "grad_norm": 9.94884874594186, "learning_rate": 1.9498983711165347e-07, "loss": 0.6867, "step": 12392 }, { "epoch": 1.7545834218163798, "grad_norm": 8.975338526214832, "learning_rate": 1.9476796614650766e-07, "loss": 0.733, "step": 12393 }, { "epoch": 1.754724994691017, "grad_norm": 9.748970172976925, "learning_rate": 1.9454621636569138e-07, "loss": 0.6476, "step": 12394 }, { "epoch": 1.7548665675656543, "grad_norm": 8.420370162112363, "learning_rate": 1.9432458778086167e-07, "loss": 0.7135, "step": 12395 }, { "epoch": 1.7550081404402915, "grad_norm": 8.211992045447449, "learning_rate": 1.9410308040366867e-07, "loss": 0.6731, "step": 12396 }, { "epoch": 1.7551497133149287, "grad_norm": 9.921012588626798, "learning_rate": 1.9388169424575802e-07, "loss": 0.6876, "step": 12397 }, { "epoch": 1.755291286189566, "grad_norm": 10.883424148842739, "learning_rate": 1.936604293187666e-07, "loss": 0.7222, "step": 12398 }, { "epoch": 1.7554328590642032, "grad_norm": 8.26403216354448, "learning_rate": 1.93439285634327e-07, "loss": 0.6791, "step": 12399 }, { "epoch": 1.7555744319388404, "grad_norm": 7.857901766898251, "learning_rate": 1.932182632040641e-07, "loss": 0.6525, "step": 12400 }, { "epoch": 1.7557160048134777, "grad_norm": 10.492517984487096, "learning_rate": 1.929973620395975e-07, "loss": 0.6408, "step": 12401 }, { "epoch": 1.755857577688115, "grad_norm": 8.440567024311491, "learning_rate": 1.9277658215253904e-07, "loss": 0.6462, "step": 12402 }, { "epoch": 1.7559991505627521, "grad_norm": 9.625533448336205, "learning_rate": 1.925559235544955e-07, "loss": 0.7501, "step": 12403 }, { "epoch": 1.7561407234373894, "grad_norm": 9.103773956791088, "learning_rate": 1.9233538625706622e-07, "loss": 0.6287, "step": 12404 }, { "epoch": 1.7562822963120266, "grad_norm": 8.955914074600152, "learning_rate": 1.9211497027184556e-07, "loss": 0.6729, "step": 12405 }, { "epoch": 1.7564238691866638, "grad_norm": 8.593403757229472, "learning_rate": 1.918946756104201e-07, "loss": 0.5405, "step": 12406 }, { "epoch": 1.756565442061301, "grad_norm": 8.960862575972737, "learning_rate": 1.9167450228436995e-07, "loss": 0.7053, "step": 12407 }, { "epoch": 1.7567070149359383, "grad_norm": 9.237526396222012, "learning_rate": 1.9145445030527065e-07, "loss": 0.6799, "step": 12408 }, { "epoch": 1.7568485878105755, "grad_norm": 8.736126609779063, "learning_rate": 1.9123451968468903e-07, "loss": 0.7412, "step": 12409 }, { "epoch": 1.7569901606852127, "grad_norm": 9.621524728344491, "learning_rate": 1.910147104341875e-07, "loss": 0.5814, "step": 12410 }, { "epoch": 1.75713173355985, "grad_norm": 9.475252946280436, "learning_rate": 1.9079502256532073e-07, "loss": 0.6063, "step": 12411 }, { "epoch": 1.7572733064344872, "grad_norm": 8.343064183542108, "learning_rate": 1.9057545608963807e-07, "loss": 0.6865, "step": 12412 }, { "epoch": 1.7574148793091244, "grad_norm": 7.628975254104208, "learning_rate": 1.9035601101868168e-07, "loss": 0.5968, "step": 12413 }, { "epoch": 1.7575564521837617, "grad_norm": 8.096433688010757, "learning_rate": 1.9013668736398761e-07, "loss": 0.6119, "step": 12414 }, { "epoch": 1.757698025058399, "grad_norm": 9.544207061235332, "learning_rate": 1.899174851370858e-07, "loss": 0.6448, "step": 12415 }, { "epoch": 1.7578395979330361, "grad_norm": 8.12644640445938, "learning_rate": 1.8969840434949926e-07, "loss": 0.5969, "step": 12416 }, { "epoch": 1.7579811708076734, "grad_norm": 10.206763654165417, "learning_rate": 1.8947944501274517e-07, "loss": 0.8268, "step": 12417 }, { "epoch": 1.7581227436823106, "grad_norm": 9.004986123556728, "learning_rate": 1.892606071383332e-07, "loss": 0.6549, "step": 12418 }, { "epoch": 1.7582643165569478, "grad_norm": 8.259721980923356, "learning_rate": 1.8904189073776835e-07, "loss": 0.6668, "step": 12419 }, { "epoch": 1.758405889431585, "grad_norm": 8.887954498559106, "learning_rate": 1.8882329582254833e-07, "loss": 0.7924, "step": 12420 }, { "epoch": 1.7585474623062223, "grad_norm": 8.039034977783306, "learning_rate": 1.8860482240416424e-07, "loss": 0.6046, "step": 12421 }, { "epoch": 1.7586890351808595, "grad_norm": 11.380318875797977, "learning_rate": 1.8838647049410157e-07, "loss": 0.7478, "step": 12422 }, { "epoch": 1.7588306080554965, "grad_norm": 9.392199392252495, "learning_rate": 1.881682401038379e-07, "loss": 0.7436, "step": 12423 }, { "epoch": 1.7589721809301337, "grad_norm": 10.362931183425003, "learning_rate": 1.8795013124484674e-07, "loss": 0.7867, "step": 12424 }, { "epoch": 1.759113753804771, "grad_norm": 9.630738764518048, "learning_rate": 1.8773214392859284e-07, "loss": 0.6985, "step": 12425 }, { "epoch": 1.7592553266794082, "grad_norm": 8.55947711238835, "learning_rate": 1.8751427816653623e-07, "loss": 0.6757, "step": 12426 }, { "epoch": 1.7593968995540454, "grad_norm": 7.841183428683189, "learning_rate": 1.8729653397012993e-07, "loss": 0.7784, "step": 12427 }, { "epoch": 1.7595384724286827, "grad_norm": 8.827349765463921, "learning_rate": 1.870789113508198e-07, "loss": 0.7894, "step": 12428 }, { "epoch": 1.75968004530332, "grad_norm": 9.329886572047146, "learning_rate": 1.8686141032004724e-07, "loss": 0.6852, "step": 12429 }, { "epoch": 1.7598216181779571, "grad_norm": 8.367680325851635, "learning_rate": 1.8664403088924533e-07, "loss": 0.6893, "step": 12430 }, { "epoch": 1.7599631910525944, "grad_norm": 9.739420310439346, "learning_rate": 1.8642677306984213e-07, "loss": 0.7347, "step": 12431 }, { "epoch": 1.7601047639272316, "grad_norm": 13.004982213672177, "learning_rate": 1.8620963687325772e-07, "loss": 0.6928, "step": 12432 }, { "epoch": 1.7602463368018688, "grad_norm": 7.804647909527964, "learning_rate": 1.859926223109082e-07, "loss": 0.7392, "step": 12433 }, { "epoch": 1.7603879096765058, "grad_norm": 9.49397287414803, "learning_rate": 1.857757293942006e-07, "loss": 0.6624, "step": 12434 }, { "epoch": 1.760529482551143, "grad_norm": 11.45769115151205, "learning_rate": 1.855589581345374e-07, "loss": 0.7255, "step": 12435 }, { "epoch": 1.7606710554257803, "grad_norm": 8.725546098905276, "learning_rate": 1.8534230854331454e-07, "loss": 0.6357, "step": 12436 }, { "epoch": 1.7608126283004175, "grad_norm": 12.091837729346667, "learning_rate": 1.851257806319201e-07, "loss": 0.5653, "step": 12437 }, { "epoch": 1.7609542011750547, "grad_norm": 10.317625679302536, "learning_rate": 1.8490937441173807e-07, "loss": 0.687, "step": 12438 }, { "epoch": 1.761095774049692, "grad_norm": 10.62497558590945, "learning_rate": 1.846930898941432e-07, "loss": 0.7062, "step": 12439 }, { "epoch": 1.7612373469243292, "grad_norm": 7.8614064922336135, "learning_rate": 1.8447692709050668e-07, "loss": 0.689, "step": 12440 }, { "epoch": 1.7613789197989664, "grad_norm": 8.871841083393253, "learning_rate": 1.842608860121914e-07, "loss": 0.69, "step": 12441 }, { "epoch": 1.7615204926736037, "grad_norm": 8.414490471938498, "learning_rate": 1.8404496667055433e-07, "loss": 0.6378, "step": 12442 }, { "epoch": 1.761662065548241, "grad_norm": 10.56910814866162, "learning_rate": 1.8382916907694725e-07, "loss": 0.7425, "step": 12443 }, { "epoch": 1.7618036384228781, "grad_norm": 7.388311495804248, "learning_rate": 1.8361349324271304e-07, "loss": 0.6663, "step": 12444 }, { "epoch": 1.7619452112975154, "grad_norm": 8.085130734209423, "learning_rate": 1.8339793917919096e-07, "loss": 0.5535, "step": 12445 }, { "epoch": 1.7620867841721526, "grad_norm": 8.313273687854844, "learning_rate": 1.831825068977111e-07, "loss": 0.6264, "step": 12446 }, { "epoch": 1.7622283570467898, "grad_norm": 8.289278795186945, "learning_rate": 1.8296719640960025e-07, "loss": 0.7454, "step": 12447 }, { "epoch": 1.762369929921427, "grad_norm": 9.685099248894536, "learning_rate": 1.8275200772617603e-07, "loss": 0.7168, "step": 12448 }, { "epoch": 1.7625115027960643, "grad_norm": 12.097563544002233, "learning_rate": 1.8253694085875047e-07, "loss": 0.583, "step": 12449 }, { "epoch": 1.7626530756707015, "grad_norm": 9.331136399514673, "learning_rate": 1.8232199581863036e-07, "loss": 0.6341, "step": 12450 }, { "epoch": 1.7627946485453387, "grad_norm": 9.447667587905626, "learning_rate": 1.8210717261711448e-07, "loss": 0.6422, "step": 12451 }, { "epoch": 1.762936221419976, "grad_norm": 8.891003860419593, "learning_rate": 1.8189247126549653e-07, "loss": 0.6677, "step": 12452 }, { "epoch": 1.7630777942946132, "grad_norm": 9.034633543840302, "learning_rate": 1.816778917750625e-07, "loss": 0.6378, "step": 12453 }, { "epoch": 1.7632193671692504, "grad_norm": 8.096208942657302, "learning_rate": 1.8146343415709367e-07, "loss": 0.667, "step": 12454 }, { "epoch": 1.7633609400438877, "grad_norm": 8.309961375893852, "learning_rate": 1.8124909842286293e-07, "loss": 0.676, "step": 12455 }, { "epoch": 1.763502512918525, "grad_norm": 8.931196217144747, "learning_rate": 1.810348845836385e-07, "loss": 0.5729, "step": 12456 }, { "epoch": 1.7636440857931621, "grad_norm": 10.110322462725557, "learning_rate": 1.8082079265068053e-07, "loss": 0.6546, "step": 12457 }, { "epoch": 1.7637856586677993, "grad_norm": 9.267597265193617, "learning_rate": 1.806068226352445e-07, "loss": 0.6609, "step": 12458 }, { "epoch": 1.7639272315424366, "grad_norm": 10.397437777707959, "learning_rate": 1.8039297454857885e-07, "loss": 0.6982, "step": 12459 }, { "epoch": 1.7640688044170738, "grad_norm": 11.398891970677663, "learning_rate": 1.8017924840192435e-07, "loss": 0.663, "step": 12460 }, { "epoch": 1.764210377291711, "grad_norm": 8.688368060976025, "learning_rate": 1.7996564420651758e-07, "loss": 0.7203, "step": 12461 }, { "epoch": 1.7643519501663483, "grad_norm": 9.629666472905203, "learning_rate": 1.7975216197358648e-07, "loss": 0.6273, "step": 12462 }, { "epoch": 1.7644935230409855, "grad_norm": 9.180770381607672, "learning_rate": 1.7953880171435455e-07, "loss": 0.6583, "step": 12463 }, { "epoch": 1.7646350959156227, "grad_norm": 8.639405093728106, "learning_rate": 1.7932556344003703e-07, "loss": 0.6973, "step": 12464 }, { "epoch": 1.7647766687902597, "grad_norm": 9.04013353232277, "learning_rate": 1.7911244716184468e-07, "loss": 0.7701, "step": 12465 }, { "epoch": 1.764918241664897, "grad_norm": 9.639955469558878, "learning_rate": 1.7889945289098042e-07, "loss": 0.7217, "step": 12466 }, { "epoch": 1.7650598145395342, "grad_norm": 9.46607213190589, "learning_rate": 1.786865806386412e-07, "loss": 0.7194, "step": 12467 }, { "epoch": 1.7652013874141714, "grad_norm": 9.046037546245646, "learning_rate": 1.7847383041601772e-07, "loss": 0.6878, "step": 12468 }, { "epoch": 1.7653429602888087, "grad_norm": 8.25863380642542, "learning_rate": 1.7826120223429416e-07, "loss": 0.6033, "step": 12469 }, { "epoch": 1.765484533163446, "grad_norm": 8.897745991431051, "learning_rate": 1.7804869610464766e-07, "loss": 0.7116, "step": 12470 }, { "epoch": 1.7656261060380831, "grad_norm": 10.158605120566774, "learning_rate": 1.7783631203825007e-07, "loss": 0.6887, "step": 12471 }, { "epoch": 1.7657676789127204, "grad_norm": 8.368020864777966, "learning_rate": 1.7762405004626586e-07, "loss": 0.7237, "step": 12472 }, { "epoch": 1.7659092517873576, "grad_norm": 11.554930482570255, "learning_rate": 1.7741191013985387e-07, "loss": 0.7471, "step": 12473 }, { "epoch": 1.7660508246619948, "grad_norm": 9.354439531069175, "learning_rate": 1.7719989233016572e-07, "loss": 0.6504, "step": 12474 }, { "epoch": 1.7661923975366318, "grad_norm": 9.424807874912036, "learning_rate": 1.7698799662834776e-07, "loss": 0.6707, "step": 12475 }, { "epoch": 1.766333970411269, "grad_norm": 10.361693155975704, "learning_rate": 1.7677622304553833e-07, "loss": 0.5932, "step": 12476 }, { "epoch": 1.7664755432859063, "grad_norm": 9.274192310135211, "learning_rate": 1.76564571592871e-07, "loss": 0.6545, "step": 12477 }, { "epoch": 1.7666171161605435, "grad_norm": 9.450756337411367, "learning_rate": 1.7635304228147104e-07, "loss": 0.6323, "step": 12478 }, { "epoch": 1.7667586890351807, "grad_norm": 8.161282327909007, "learning_rate": 1.7614163512245957e-07, "loss": 0.6891, "step": 12479 }, { "epoch": 1.766900261909818, "grad_norm": 9.364991809619392, "learning_rate": 1.7593035012694992e-07, "loss": 0.6114, "step": 12480 }, { "epoch": 1.7670418347844552, "grad_norm": 7.949779713775634, "learning_rate": 1.757191873060482e-07, "loss": 0.6966, "step": 12481 }, { "epoch": 1.7671834076590924, "grad_norm": 8.733471764008536, "learning_rate": 1.755081466708561e-07, "loss": 0.5464, "step": 12482 }, { "epoch": 1.7673249805337297, "grad_norm": 9.076232187568598, "learning_rate": 1.752972282324672e-07, "loss": 0.738, "step": 12483 }, { "epoch": 1.767466553408367, "grad_norm": 8.567785706020947, "learning_rate": 1.750864320019699e-07, "loss": 0.552, "step": 12484 }, { "epoch": 1.7676081262830041, "grad_norm": 9.425410934110802, "learning_rate": 1.7487575799044505e-07, "loss": 0.6886, "step": 12485 }, { "epoch": 1.7677496991576414, "grad_norm": 8.703707937445486, "learning_rate": 1.746652062089685e-07, "loss": 0.6315, "step": 12486 }, { "epoch": 1.7678912720322786, "grad_norm": 6.946894153426137, "learning_rate": 1.7445477666860749e-07, "loss": 0.7126, "step": 12487 }, { "epoch": 1.7680328449069158, "grad_norm": 9.655524266695522, "learning_rate": 1.7424446938042517e-07, "loss": 0.7276, "step": 12488 }, { "epoch": 1.768174417781553, "grad_norm": 8.209389813848691, "learning_rate": 1.740342843554771e-07, "loss": 0.6734, "step": 12489 }, { "epoch": 1.7683159906561903, "grad_norm": 10.348862781797083, "learning_rate": 1.7382422160481193e-07, "loss": 0.7249, "step": 12490 }, { "epoch": 1.7684575635308275, "grad_norm": 8.169891758625463, "learning_rate": 1.7361428113947392e-07, "loss": 0.6332, "step": 12491 }, { "epoch": 1.7685991364054647, "grad_norm": 9.44883885591757, "learning_rate": 1.7340446297049784e-07, "loss": 0.7026, "step": 12492 }, { "epoch": 1.768740709280102, "grad_norm": 9.035748163436544, "learning_rate": 1.7319476710891431e-07, "loss": 0.6977, "step": 12493 }, { "epoch": 1.7688822821547392, "grad_norm": 10.923945982345176, "learning_rate": 1.7298519356574728e-07, "loss": 0.6452, "step": 12494 }, { "epoch": 1.7690238550293764, "grad_norm": 9.526277839871241, "learning_rate": 1.7277574235201295e-07, "loss": 0.7402, "step": 12495 }, { "epoch": 1.7691654279040137, "grad_norm": 10.66593604764537, "learning_rate": 1.7256641347872304e-07, "loss": 0.7044, "step": 12496 }, { "epoch": 1.7693070007786509, "grad_norm": 9.481174085963186, "learning_rate": 1.723572069568813e-07, "loss": 0.7052, "step": 12497 }, { "epoch": 1.7694485736532881, "grad_norm": 9.236591828216413, "learning_rate": 1.7214812279748584e-07, "loss": 0.6757, "step": 12498 }, { "epoch": 1.7695901465279253, "grad_norm": 8.863543355671652, "learning_rate": 1.719391610115273e-07, "loss": 0.652, "step": 12499 }, { "epoch": 1.7697317194025626, "grad_norm": 9.724258480878103, "learning_rate": 1.7173032160999164e-07, "loss": 0.7338, "step": 12500 }, { "epoch": 1.7698732922771998, "grad_norm": 9.529422772469738, "learning_rate": 1.7152160460385703e-07, "loss": 0.6117, "step": 12501 }, { "epoch": 1.770014865151837, "grad_norm": 10.662800406084697, "learning_rate": 1.7131301000409496e-07, "loss": 0.6427, "step": 12502 }, { "epoch": 1.7701564380264743, "grad_norm": 9.861055209817772, "learning_rate": 1.7110453782167218e-07, "loss": 0.7438, "step": 12503 }, { "epoch": 1.7702980109011115, "grad_norm": 9.451237463285024, "learning_rate": 1.7089618806754692e-07, "loss": 0.6743, "step": 12504 }, { "epoch": 1.7704395837757487, "grad_norm": 8.90983386294784, "learning_rate": 1.7068796075267264e-07, "loss": 0.6161, "step": 12505 }, { "epoch": 1.7705811566503857, "grad_norm": 8.295146065150842, "learning_rate": 1.7047985588799525e-07, "loss": 0.6519, "step": 12506 }, { "epoch": 1.770722729525023, "grad_norm": 8.471919181142866, "learning_rate": 1.7027187348445522e-07, "loss": 0.64, "step": 12507 }, { "epoch": 1.7708643023996602, "grad_norm": 9.928733941970128, "learning_rate": 1.700640135529852e-07, "loss": 0.7236, "step": 12508 }, { "epoch": 1.7710058752742974, "grad_norm": 9.64255577302706, "learning_rate": 1.6985627610451278e-07, "loss": 0.7516, "step": 12509 }, { "epoch": 1.7711474481489347, "grad_norm": 11.28839341659097, "learning_rate": 1.6964866114995871e-07, "loss": 0.5875, "step": 12510 }, { "epoch": 1.7712890210235719, "grad_norm": 8.624782393654383, "learning_rate": 1.6944116870023675e-07, "loss": 0.6424, "step": 12511 }, { "epoch": 1.7714305938982091, "grad_norm": 8.606878289010245, "learning_rate": 1.6923379876625568e-07, "loss": 0.6278, "step": 12512 }, { "epoch": 1.7715721667728463, "grad_norm": 10.072104094169651, "learning_rate": 1.690265513589151e-07, "loss": 0.7351, "step": 12513 }, { "epoch": 1.7717137396474836, "grad_norm": 10.565553844430058, "learning_rate": 1.6881942648911077e-07, "loss": 0.7096, "step": 12514 }, { "epoch": 1.7718553125221208, "grad_norm": 10.152408753759769, "learning_rate": 1.6861242416773087e-07, "loss": 0.668, "step": 12515 }, { "epoch": 1.7719968853967578, "grad_norm": 8.703807427379708, "learning_rate": 1.684055444056573e-07, "loss": 0.6775, "step": 12516 }, { "epoch": 1.772138458271395, "grad_norm": 8.441321291798292, "learning_rate": 1.6819878721376637e-07, "loss": 0.623, "step": 12517 }, { "epoch": 1.7722800311460323, "grad_norm": 10.03325122111063, "learning_rate": 1.67992152602926e-07, "loss": 0.6735, "step": 12518 }, { "epoch": 1.7724216040206695, "grad_norm": 9.430385712292853, "learning_rate": 1.6778564058399977e-07, "loss": 0.6125, "step": 12519 }, { "epoch": 1.7725631768953067, "grad_norm": 8.405601504813017, "learning_rate": 1.6757925116784313e-07, "loss": 0.6157, "step": 12520 }, { "epoch": 1.772704749769944, "grad_norm": 8.615154009132642, "learning_rate": 1.673729843653063e-07, "loss": 0.6176, "step": 12521 }, { "epoch": 1.7728463226445812, "grad_norm": 9.86567537465857, "learning_rate": 1.6716684018723256e-07, "loss": 0.6259, "step": 12522 }, { "epoch": 1.7729878955192184, "grad_norm": 8.738712114293724, "learning_rate": 1.6696081864445823e-07, "loss": 0.6347, "step": 12523 }, { "epoch": 1.7731294683938557, "grad_norm": 8.622452069307199, "learning_rate": 1.6675491974781438e-07, "loss": 0.6293, "step": 12524 }, { "epoch": 1.7732710412684929, "grad_norm": 8.995120527171375, "learning_rate": 1.665491435081243e-07, "loss": 0.6821, "step": 12525 }, { "epoch": 1.7734126141431301, "grad_norm": 9.534309671553885, "learning_rate": 1.6634348993620624e-07, "loss": 0.7319, "step": 12526 }, { "epoch": 1.7735541870177673, "grad_norm": 10.635774422711343, "learning_rate": 1.661379590428705e-07, "loss": 0.7308, "step": 12527 }, { "epoch": 1.7736957598924046, "grad_norm": 10.236893624965886, "learning_rate": 1.6593255083892228e-07, "loss": 0.6901, "step": 12528 }, { "epoch": 1.7738373327670418, "grad_norm": 7.841671368080322, "learning_rate": 1.6572726533515936e-07, "loss": 0.6181, "step": 12529 }, { "epoch": 1.773978905641679, "grad_norm": 8.868996322254292, "learning_rate": 1.6552210254237395e-07, "loss": 0.7347, "step": 12530 }, { "epoch": 1.7741204785163163, "grad_norm": 9.255730168635527, "learning_rate": 1.6531706247135042e-07, "loss": 0.6584, "step": 12531 }, { "epoch": 1.7742620513909535, "grad_norm": 8.310244146586724, "learning_rate": 1.6511214513286826e-07, "loss": 0.6483, "step": 12532 }, { "epoch": 1.7744036242655907, "grad_norm": 8.527929756834304, "learning_rate": 1.6490735053770023e-07, "loss": 0.6435, "step": 12533 }, { "epoch": 1.774545197140228, "grad_norm": 9.563228006014471, "learning_rate": 1.6470267869661105e-07, "loss": 0.6323, "step": 12534 }, { "epoch": 1.7746867700148652, "grad_norm": 9.408157915541677, "learning_rate": 1.6449812962036128e-07, "loss": 0.6352, "step": 12535 }, { "epoch": 1.7748283428895024, "grad_norm": 9.626115214156277, "learning_rate": 1.6429370331970285e-07, "loss": 0.5827, "step": 12536 }, { "epoch": 1.7749699157641396, "grad_norm": 8.671450492847338, "learning_rate": 1.640893998053833e-07, "loss": 0.6323, "step": 12537 }, { "epoch": 1.7751114886387769, "grad_norm": 8.571900490984701, "learning_rate": 1.6388521908814181e-07, "loss": 0.5348, "step": 12538 }, { "epoch": 1.775253061513414, "grad_norm": 8.555854836165645, "learning_rate": 1.6368116117871257e-07, "loss": 0.697, "step": 12539 }, { "epoch": 1.7753946343880513, "grad_norm": 11.366940504189387, "learning_rate": 1.6347722608782284e-07, "loss": 0.7807, "step": 12540 }, { "epoch": 1.7755362072626886, "grad_norm": 8.366530505689722, "learning_rate": 1.6327341382619294e-07, "loss": 0.6264, "step": 12541 }, { "epoch": 1.7756777801373258, "grad_norm": 9.475686934639661, "learning_rate": 1.6306972440453788e-07, "loss": 0.7662, "step": 12542 }, { "epoch": 1.775819353011963, "grad_norm": 8.755407787610109, "learning_rate": 1.6286615783356468e-07, "loss": 0.6688, "step": 12543 }, { "epoch": 1.7759609258866003, "grad_norm": 8.965388821256365, "learning_rate": 1.626627141239745e-07, "loss": 0.6085, "step": 12544 }, { "epoch": 1.7761024987612375, "grad_norm": 10.921879191254561, "learning_rate": 1.6245939328646322e-07, "loss": 0.6676, "step": 12545 }, { "epoch": 1.7762440716358747, "grad_norm": 7.262985964354344, "learning_rate": 1.622561953317181e-07, "loss": 0.6125, "step": 12546 }, { "epoch": 1.776385644510512, "grad_norm": 10.00268480499638, "learning_rate": 1.620531202704223e-07, "loss": 0.6369, "step": 12547 }, { "epoch": 1.776527217385149, "grad_norm": 8.46894909388897, "learning_rate": 1.6185016811325033e-07, "loss": 0.7453, "step": 12548 }, { "epoch": 1.7766687902597862, "grad_norm": 8.31884294167065, "learning_rate": 1.6164733887087168e-07, "loss": 0.6813, "step": 12549 }, { "epoch": 1.7768103631344234, "grad_norm": 8.74806279127543, "learning_rate": 1.614446325539487e-07, "loss": 0.6968, "step": 12550 }, { "epoch": 1.7769519360090607, "grad_norm": 10.64902714687143, "learning_rate": 1.6124204917313811e-07, "loss": 0.7225, "step": 12551 }, { "epoch": 1.7770935088836979, "grad_norm": 9.624996433009688, "learning_rate": 1.6103958873908893e-07, "loss": 0.7845, "step": 12552 }, { "epoch": 1.7772350817583351, "grad_norm": 8.991692417545114, "learning_rate": 1.608372512624448e-07, "loss": 0.5925, "step": 12553 }, { "epoch": 1.7773766546329723, "grad_norm": 10.364367448612736, "learning_rate": 1.6063503675384202e-07, "loss": 0.6637, "step": 12554 }, { "epoch": 1.7775182275076096, "grad_norm": 10.665882399655514, "learning_rate": 1.604329452239109e-07, "loss": 0.6457, "step": 12555 }, { "epoch": 1.7776598003822468, "grad_norm": 9.064002182208588, "learning_rate": 1.6023097668327574e-07, "loss": 0.6592, "step": 12556 }, { "epoch": 1.777801373256884, "grad_norm": 10.646720675062932, "learning_rate": 1.6002913114255309e-07, "loss": 0.7643, "step": 12557 }, { "epoch": 1.777942946131521, "grad_norm": 9.89675322958518, "learning_rate": 1.5982740861235468e-07, "loss": 0.6547, "step": 12558 }, { "epoch": 1.7780845190061583, "grad_norm": 8.315288334180986, "learning_rate": 1.5962580910328402e-07, "loss": 0.655, "step": 12559 }, { "epoch": 1.7782260918807955, "grad_norm": 9.11325780912116, "learning_rate": 1.594243326259401e-07, "loss": 0.6322, "step": 12560 }, { "epoch": 1.7783676647554327, "grad_norm": 9.885812368938005, "learning_rate": 1.5922297919091334e-07, "loss": 0.5631, "step": 12561 }, { "epoch": 1.77850923763007, "grad_norm": 8.631367862015475, "learning_rate": 1.590217488087892e-07, "loss": 0.7655, "step": 12562 }, { "epoch": 1.7786508105047072, "grad_norm": 8.89879202199285, "learning_rate": 1.5882064149014637e-07, "loss": 0.6829, "step": 12563 }, { "epoch": 1.7787923833793444, "grad_norm": 8.181458551999512, "learning_rate": 1.5861965724555673e-07, "loss": 0.6924, "step": 12564 }, { "epoch": 1.7789339562539817, "grad_norm": 9.434358794030052, "learning_rate": 1.5841879608558652e-07, "loss": 0.7042, "step": 12565 }, { "epoch": 1.7790755291286189, "grad_norm": 9.983316619128807, "learning_rate": 1.5821805802079343e-07, "loss": 0.689, "step": 12566 }, { "epoch": 1.7792171020032561, "grad_norm": 9.3668535569123, "learning_rate": 1.5801744306173094e-07, "loss": 0.6759, "step": 12567 }, { "epoch": 1.7793586748778933, "grad_norm": 9.88434593155404, "learning_rate": 1.5781695121894563e-07, "loss": 0.6906, "step": 12568 }, { "epoch": 1.7795002477525306, "grad_norm": 9.358031926406563, "learning_rate": 1.5761658250297658e-07, "loss": 0.6722, "step": 12569 }, { "epoch": 1.7796418206271678, "grad_norm": 9.407833536375128, "learning_rate": 1.5741633692435725e-07, "loss": 0.6649, "step": 12570 }, { "epoch": 1.779783393501805, "grad_norm": 9.430525267647104, "learning_rate": 1.572162144936143e-07, "loss": 0.6277, "step": 12571 }, { "epoch": 1.7799249663764423, "grad_norm": 8.988596155120318, "learning_rate": 1.5701621522126843e-07, "loss": 0.6161, "step": 12572 }, { "epoch": 1.7800665392510795, "grad_norm": 10.537966072391345, "learning_rate": 1.568163391178326e-07, "loss": 0.7198, "step": 12573 }, { "epoch": 1.7802081121257167, "grad_norm": 9.556167001407472, "learning_rate": 1.5661658619381515e-07, "loss": 0.6501, "step": 12574 }, { "epoch": 1.780349685000354, "grad_norm": 9.575452621558671, "learning_rate": 1.564169564597165e-07, "loss": 0.6491, "step": 12575 }, { "epoch": 1.7804912578749912, "grad_norm": 8.639758765067153, "learning_rate": 1.5621744992603049e-07, "loss": 0.7646, "step": 12576 }, { "epoch": 1.7806328307496284, "grad_norm": 9.421520321371473, "learning_rate": 1.5601806660324598e-07, "loss": 0.7031, "step": 12577 }, { "epoch": 1.7807744036242656, "grad_norm": 7.937494232896527, "learning_rate": 1.558188065018437e-07, "loss": 0.6514, "step": 12578 }, { "epoch": 1.7809159764989029, "grad_norm": 7.542150689148438, "learning_rate": 1.5561966963229925e-07, "loss": 0.5943, "step": 12579 }, { "epoch": 1.78105754937354, "grad_norm": 8.364314764199717, "learning_rate": 1.5542065600508e-07, "loss": 0.5657, "step": 12580 }, { "epoch": 1.7811991222481773, "grad_norm": 9.043775274508999, "learning_rate": 1.5522176563064928e-07, "loss": 0.6508, "step": 12581 }, { "epoch": 1.7813406951228146, "grad_norm": 8.389939377830562, "learning_rate": 1.550229985194618e-07, "loss": 0.6615, "step": 12582 }, { "epoch": 1.7814822679974518, "grad_norm": 8.613045631697938, "learning_rate": 1.5482435468196695e-07, "loss": 0.5353, "step": 12583 }, { "epoch": 1.781623840872089, "grad_norm": 8.688843986783292, "learning_rate": 1.5462583412860692e-07, "loss": 0.7074, "step": 12584 }, { "epoch": 1.7817654137467263, "grad_norm": 9.238817621189286, "learning_rate": 1.5442743686981787e-07, "loss": 0.7353, "step": 12585 }, { "epoch": 1.7819069866213635, "grad_norm": 8.54023420526403, "learning_rate": 1.542291629160303e-07, "loss": 0.6749, "step": 12586 }, { "epoch": 1.7820485594960007, "grad_norm": 10.060969076330709, "learning_rate": 1.5403101227766587e-07, "loss": 0.6687, "step": 12587 }, { "epoch": 1.782190132370638, "grad_norm": 8.988118700317441, "learning_rate": 1.538329849651421e-07, "loss": 0.7221, "step": 12588 }, { "epoch": 1.782331705245275, "grad_norm": 7.3726290754969686, "learning_rate": 1.536350809888687e-07, "loss": 0.6455, "step": 12589 }, { "epoch": 1.7824732781199122, "grad_norm": 10.238010372346638, "learning_rate": 1.534373003592496e-07, "loss": 0.6377, "step": 12590 }, { "epoch": 1.7826148509945494, "grad_norm": 9.413511394657348, "learning_rate": 1.5323964308668227e-07, "loss": 0.5612, "step": 12591 }, { "epoch": 1.7827564238691866, "grad_norm": 10.295129190961799, "learning_rate": 1.5304210918155677e-07, "loss": 0.6037, "step": 12592 }, { "epoch": 1.7828979967438239, "grad_norm": 8.609078570072619, "learning_rate": 1.5284469865425784e-07, "loss": 0.6702, "step": 12593 }, { "epoch": 1.783039569618461, "grad_norm": 10.028177236371624, "learning_rate": 1.5264741151516272e-07, "loss": 0.6788, "step": 12594 }, { "epoch": 1.7831811424930983, "grad_norm": 10.555304790886696, "learning_rate": 1.524502477746434e-07, "loss": 0.6355, "step": 12595 }, { "epoch": 1.7833227153677356, "grad_norm": 9.053211160920679, "learning_rate": 1.522532074430641e-07, "loss": 0.7525, "step": 12596 }, { "epoch": 1.7834642882423728, "grad_norm": 8.608367805621196, "learning_rate": 1.5205629053078262e-07, "loss": 0.5393, "step": 12597 }, { "epoch": 1.78360586111701, "grad_norm": 9.058845993550353, "learning_rate": 1.5185949704815185e-07, "loss": 0.6702, "step": 12598 }, { "epoch": 1.783747433991647, "grad_norm": 8.080965409722095, "learning_rate": 1.5166282700551594e-07, "loss": 0.6868, "step": 12599 }, { "epoch": 1.7838890068662843, "grad_norm": 10.256137778012834, "learning_rate": 1.5146628041321443e-07, "loss": 0.7066, "step": 12600 }, { "epoch": 1.7840305797409215, "grad_norm": 9.865500601176628, "learning_rate": 1.5126985728157934e-07, "loss": 0.6618, "step": 12601 }, { "epoch": 1.7841721526155587, "grad_norm": 8.857860079085922, "learning_rate": 1.5107355762093685e-07, "loss": 0.6906, "step": 12602 }, { "epoch": 1.784313725490196, "grad_norm": 10.073542820359343, "learning_rate": 1.5087738144160562e-07, "loss": 0.6184, "step": 12603 }, { "epoch": 1.7844552983648332, "grad_norm": 9.88227595504992, "learning_rate": 1.5068132875389913e-07, "loss": 0.7208, "step": 12604 }, { "epoch": 1.7845968712394704, "grad_norm": 10.063542471960902, "learning_rate": 1.5048539956812324e-07, "loss": 0.7071, "step": 12605 }, { "epoch": 1.7847384441141076, "grad_norm": 9.464108982132634, "learning_rate": 1.5028959389457782e-07, "loss": 0.6174, "step": 12606 }, { "epoch": 1.7848800169887449, "grad_norm": 9.415969237660578, "learning_rate": 1.5009391174355735e-07, "loss": 0.6686, "step": 12607 }, { "epoch": 1.785021589863382, "grad_norm": 9.995046151509296, "learning_rate": 1.49898353125347e-07, "loss": 0.5885, "step": 12608 }, { "epoch": 1.7851631627380193, "grad_norm": 10.882169968805876, "learning_rate": 1.4970291805022825e-07, "loss": 0.7073, "step": 12609 }, { "epoch": 1.7853047356126566, "grad_norm": 9.699324546971324, "learning_rate": 1.4950760652847422e-07, "loss": 0.6023, "step": 12610 }, { "epoch": 1.7854463084872938, "grad_norm": 9.771371746615051, "learning_rate": 1.4931241857035343e-07, "loss": 0.7258, "step": 12611 }, { "epoch": 1.785587881361931, "grad_norm": 9.039216115267719, "learning_rate": 1.4911735418612515e-07, "loss": 0.6667, "step": 12612 }, { "epoch": 1.7857294542365683, "grad_norm": 9.877529158417637, "learning_rate": 1.4892241338604506e-07, "loss": 0.6855, "step": 12613 }, { "epoch": 1.7858710271112055, "grad_norm": 9.487567194724582, "learning_rate": 1.4872759618036081e-07, "loss": 0.6939, "step": 12614 }, { "epoch": 1.7860125999858427, "grad_norm": 8.940144607773979, "learning_rate": 1.4853290257931364e-07, "loss": 0.6724, "step": 12615 }, { "epoch": 1.78615417286048, "grad_norm": 9.180641988275903, "learning_rate": 1.483383325931384e-07, "loss": 0.7421, "step": 12616 }, { "epoch": 1.7862957457351172, "grad_norm": 9.373223708675212, "learning_rate": 1.4814388623206333e-07, "loss": 0.6216, "step": 12617 }, { "epoch": 1.7864373186097544, "grad_norm": 9.97284941854144, "learning_rate": 1.4794956350631106e-07, "loss": 0.6514, "step": 12618 }, { "epoch": 1.7865788914843916, "grad_norm": 8.417386417647933, "learning_rate": 1.4775536442609623e-07, "loss": 0.6611, "step": 12619 }, { "epoch": 1.7867204643590289, "grad_norm": 9.061865633104555, "learning_rate": 1.4756128900162757e-07, "loss": 0.6663, "step": 12620 }, { "epoch": 1.786862037233666, "grad_norm": 9.146985050291375, "learning_rate": 1.4736733724310865e-07, "loss": 0.6798, "step": 12621 }, { "epoch": 1.7870036101083033, "grad_norm": 9.145215360463515, "learning_rate": 1.4717350916073375e-07, "loss": 0.6696, "step": 12622 }, { "epoch": 1.7871451829829406, "grad_norm": 9.528811083320997, "learning_rate": 1.4697980476469392e-07, "loss": 0.6618, "step": 12623 }, { "epoch": 1.7872867558575778, "grad_norm": 14.101823274943364, "learning_rate": 1.4678622406517074e-07, "loss": 0.6922, "step": 12624 }, { "epoch": 1.787428328732215, "grad_norm": 9.579908295194477, "learning_rate": 1.4659276707234132e-07, "loss": 0.6475, "step": 12625 }, { "epoch": 1.7875699016068523, "grad_norm": 11.193556866877984, "learning_rate": 1.4639943379637534e-07, "loss": 0.7074, "step": 12626 }, { "epoch": 1.7877114744814895, "grad_norm": 8.793934847387582, "learning_rate": 1.462062242474363e-07, "loss": 0.68, "step": 12627 }, { "epoch": 1.7878530473561267, "grad_norm": 8.228609966006182, "learning_rate": 1.460131384356811e-07, "loss": 0.7458, "step": 12628 }, { "epoch": 1.787994620230764, "grad_norm": 9.202625928212521, "learning_rate": 1.4582017637125967e-07, "loss": 0.669, "step": 12629 }, { "epoch": 1.788136193105401, "grad_norm": 8.80190251332085, "learning_rate": 1.4562733806431666e-07, "loss": 0.7039, "step": 12630 }, { "epoch": 1.7882777659800382, "grad_norm": 8.781247393516072, "learning_rate": 1.4543462352498844e-07, "loss": 0.6519, "step": 12631 }, { "epoch": 1.7884193388546754, "grad_norm": 9.321575478918769, "learning_rate": 1.4524203276340687e-07, "loss": 0.7838, "step": 12632 }, { "epoch": 1.7885609117293126, "grad_norm": 9.693271812583054, "learning_rate": 1.4504956578969554e-07, "loss": 0.6584, "step": 12633 }, { "epoch": 1.7887024846039499, "grad_norm": 10.845524975182377, "learning_rate": 1.4485722261397273e-07, "loss": 0.7258, "step": 12634 }, { "epoch": 1.788844057478587, "grad_norm": 9.16748107991426, "learning_rate": 1.4466500324634952e-07, "loss": 0.7404, "step": 12635 }, { "epoch": 1.7889856303532243, "grad_norm": 8.428424843166963, "learning_rate": 1.444729076969309e-07, "loss": 0.7476, "step": 12636 }, { "epoch": 1.7891272032278616, "grad_norm": 9.078029192157947, "learning_rate": 1.4428093597581544e-07, "loss": 0.6768, "step": 12637 }, { "epoch": 1.7892687761024988, "grad_norm": 8.698107055285929, "learning_rate": 1.4408908809309423e-07, "loss": 0.7208, "step": 12638 }, { "epoch": 1.789410348977136, "grad_norm": 7.438516755688475, "learning_rate": 1.4389736405885397e-07, "loss": 0.6147, "step": 12639 }, { "epoch": 1.7895519218517733, "grad_norm": 9.03729784727554, "learning_rate": 1.4370576388317155e-07, "loss": 0.6978, "step": 12640 }, { "epoch": 1.7896934947264103, "grad_norm": 9.373487426807984, "learning_rate": 1.435142875761203e-07, "loss": 0.6609, "step": 12641 }, { "epoch": 1.7898350676010475, "grad_norm": 9.184571091257942, "learning_rate": 1.4332293514776635e-07, "loss": 0.7486, "step": 12642 }, { "epoch": 1.7899766404756847, "grad_norm": 8.296443259865047, "learning_rate": 1.4313170660816805e-07, "loss": 0.6563, "step": 12643 }, { "epoch": 1.790118213350322, "grad_norm": 9.617385031734605, "learning_rate": 1.4294060196737874e-07, "loss": 0.6286, "step": 12644 }, { "epoch": 1.7902597862249592, "grad_norm": 10.363576828026613, "learning_rate": 1.4274962123544457e-07, "loss": 0.702, "step": 12645 }, { "epoch": 1.7904013590995964, "grad_norm": 8.027044360292857, "learning_rate": 1.4255876442240524e-07, "loss": 0.6482, "step": 12646 }, { "epoch": 1.7905429319742336, "grad_norm": 9.611208857025186, "learning_rate": 1.423680315382933e-07, "loss": 0.7723, "step": 12647 }, { "epoch": 1.7906845048488709, "grad_norm": 9.70583650846755, "learning_rate": 1.421774225931366e-07, "loss": 0.7, "step": 12648 }, { "epoch": 1.790826077723508, "grad_norm": 8.805043197257262, "learning_rate": 1.4198693759695486e-07, "loss": 0.7482, "step": 12649 }, { "epoch": 1.7909676505981453, "grad_norm": 7.945074357296397, "learning_rate": 1.417965765597612e-07, "loss": 0.6543, "step": 12650 }, { "epoch": 1.7911092234727826, "grad_norm": 8.55652315137062, "learning_rate": 1.4160633949156344e-07, "loss": 0.599, "step": 12651 }, { "epoch": 1.7912507963474198, "grad_norm": 9.72386579396301, "learning_rate": 1.4141622640236164e-07, "loss": 0.6672, "step": 12652 }, { "epoch": 1.791392369222057, "grad_norm": 8.596403177085625, "learning_rate": 1.412262373021503e-07, "loss": 0.6914, "step": 12653 }, { "epoch": 1.7915339420966943, "grad_norm": 9.786511648268714, "learning_rate": 1.410363722009167e-07, "loss": 0.6688, "step": 12654 }, { "epoch": 1.7916755149713315, "grad_norm": 8.642575256002349, "learning_rate": 1.4084663110864262e-07, "loss": 0.585, "step": 12655 }, { "epoch": 1.7918170878459687, "grad_norm": 7.86877655679283, "learning_rate": 1.406570140353014e-07, "loss": 0.6727, "step": 12656 }, { "epoch": 1.791958660720606, "grad_norm": 8.485927826073937, "learning_rate": 1.4046752099086236e-07, "loss": 0.6325, "step": 12657 }, { "epoch": 1.7921002335952432, "grad_norm": 9.445544319684133, "learning_rate": 1.4027815198528582e-07, "loss": 0.6717, "step": 12658 }, { "epoch": 1.7922418064698804, "grad_norm": 8.627605984395245, "learning_rate": 1.4008890702852774e-07, "loss": 0.6746, "step": 12659 }, { "epoch": 1.7923833793445176, "grad_norm": 9.488376131678747, "learning_rate": 1.398997861305365e-07, "loss": 0.7114, "step": 12660 }, { "epoch": 1.7925249522191549, "grad_norm": 8.152074687851467, "learning_rate": 1.397107893012531e-07, "loss": 0.694, "step": 12661 }, { "epoch": 1.792666525093792, "grad_norm": 8.39851607884751, "learning_rate": 1.3952191655061425e-07, "loss": 0.6285, "step": 12662 }, { "epoch": 1.7928080979684293, "grad_norm": 8.930890393608928, "learning_rate": 1.393331678885476e-07, "loss": 0.7064, "step": 12663 }, { "epoch": 1.7929496708430666, "grad_norm": 8.206997318914866, "learning_rate": 1.3914454332497608e-07, "loss": 0.6084, "step": 12664 }, { "epoch": 1.7930912437177038, "grad_norm": 9.669869604799649, "learning_rate": 1.3895604286981613e-07, "loss": 0.6057, "step": 12665 }, { "epoch": 1.793232816592341, "grad_norm": 8.42688767654381, "learning_rate": 1.3876766653297597e-07, "loss": 0.6079, "step": 12666 }, { "epoch": 1.7933743894669782, "grad_norm": 8.637333110074806, "learning_rate": 1.3857941432435934e-07, "loss": 0.713, "step": 12667 }, { "epoch": 1.7935159623416155, "grad_norm": 8.902732395394343, "learning_rate": 1.3839128625386193e-07, "loss": 0.6979, "step": 12668 }, { "epoch": 1.7936575352162527, "grad_norm": 9.590290054533185, "learning_rate": 1.3820328233137393e-07, "loss": 0.6487, "step": 12669 }, { "epoch": 1.79379910809089, "grad_norm": 9.854742978255723, "learning_rate": 1.380154025667782e-07, "loss": 0.7603, "step": 12670 }, { "epoch": 1.7939406809655272, "grad_norm": 10.320452219199945, "learning_rate": 1.3782764696995188e-07, "loss": 0.6746, "step": 12671 }, { "epoch": 1.7940822538401642, "grad_norm": 8.149423833970324, "learning_rate": 1.3764001555076484e-07, "loss": 0.6147, "step": 12672 }, { "epoch": 1.7942238267148014, "grad_norm": 8.096807307465008, "learning_rate": 1.374525083190803e-07, "loss": 0.6505, "step": 12673 }, { "epoch": 1.7943653995894386, "grad_norm": 9.404431747150724, "learning_rate": 1.372651252847562e-07, "loss": 0.6669, "step": 12674 }, { "epoch": 1.7945069724640759, "grad_norm": 9.413807617789207, "learning_rate": 1.370778664576422e-07, "loss": 0.7189, "step": 12675 }, { "epoch": 1.794648545338713, "grad_norm": 8.984849681753776, "learning_rate": 1.3689073184758345e-07, "loss": 0.6694, "step": 12676 }, { "epoch": 1.7947901182133503, "grad_norm": 8.96702553788436, "learning_rate": 1.3670372146441652e-07, "loss": 0.6782, "step": 12677 }, { "epoch": 1.7949316910879876, "grad_norm": 9.465494231929334, "learning_rate": 1.3651683531797327e-07, "loss": 0.6995, "step": 12678 }, { "epoch": 1.7950732639626248, "grad_norm": 8.87572207669959, "learning_rate": 1.3633007341807726e-07, "loss": 0.5451, "step": 12679 }, { "epoch": 1.795214836837262, "grad_norm": 8.787252417776044, "learning_rate": 1.3614343577454725e-07, "loss": 0.7451, "step": 12680 }, { "epoch": 1.7953564097118992, "grad_norm": 10.329278786253212, "learning_rate": 1.3595692239719404e-07, "loss": 0.6209, "step": 12681 }, { "epoch": 1.7954979825865363, "grad_norm": 10.835339013302866, "learning_rate": 1.3577053329582258e-07, "loss": 0.5314, "step": 12682 }, { "epoch": 1.7956395554611735, "grad_norm": 8.386031898690785, "learning_rate": 1.3558426848023165e-07, "loss": 0.6888, "step": 12683 }, { "epoch": 1.7957811283358107, "grad_norm": 9.177358830300669, "learning_rate": 1.3539812796021234e-07, "loss": 0.646, "step": 12684 }, { "epoch": 1.795922701210448, "grad_norm": 12.506023329053722, "learning_rate": 1.352121117455507e-07, "loss": 0.593, "step": 12685 }, { "epoch": 1.7960642740850852, "grad_norm": 9.35958367880885, "learning_rate": 1.3502621984602477e-07, "loss": 0.6784, "step": 12686 }, { "epoch": 1.7962058469597224, "grad_norm": 11.46213567356532, "learning_rate": 1.3484045227140697e-07, "loss": 0.7252, "step": 12687 }, { "epoch": 1.7963474198343596, "grad_norm": 8.582824198119189, "learning_rate": 1.3465480903146365e-07, "loss": 0.6296, "step": 12688 }, { "epoch": 1.7964889927089969, "grad_norm": 9.23889152978964, "learning_rate": 1.344692901359529e-07, "loss": 0.7398, "step": 12689 }, { "epoch": 1.796630565583634, "grad_norm": 8.4781149572024, "learning_rate": 1.3428389559462796e-07, "loss": 0.7016, "step": 12690 }, { "epoch": 1.7967721384582713, "grad_norm": 8.421805698201647, "learning_rate": 1.340986254172344e-07, "loss": 0.742, "step": 12691 }, { "epoch": 1.7969137113329086, "grad_norm": 10.163468738661905, "learning_rate": 1.3391347961351275e-07, "loss": 0.6576, "step": 12692 }, { "epoch": 1.7970552842075458, "grad_norm": 9.382655159093968, "learning_rate": 1.337284581931944e-07, "loss": 0.7648, "step": 12693 }, { "epoch": 1.797196857082183, "grad_norm": 10.02235926524784, "learning_rate": 1.3354356116600685e-07, "loss": 0.651, "step": 12694 }, { "epoch": 1.7973384299568202, "grad_norm": 8.478059613615118, "learning_rate": 1.3335878854166984e-07, "loss": 0.6146, "step": 12695 }, { "epoch": 1.7974800028314575, "grad_norm": 9.824671491804446, "learning_rate": 1.3317414032989668e-07, "loss": 0.6433, "step": 12696 }, { "epoch": 1.7976215757060947, "grad_norm": 9.290915237856346, "learning_rate": 1.3298961654039433e-07, "loss": 0.6099, "step": 12697 }, { "epoch": 1.797763148580732, "grad_norm": 10.14622251376482, "learning_rate": 1.3280521718286255e-07, "loss": 0.619, "step": 12698 }, { "epoch": 1.7979047214553692, "grad_norm": 8.723700973738108, "learning_rate": 1.3262094226699578e-07, "loss": 0.6577, "step": 12699 }, { "epoch": 1.7980462943300064, "grad_norm": 9.957926551672095, "learning_rate": 1.3243679180248075e-07, "loss": 0.7533, "step": 12700 }, { "epoch": 1.7981878672046436, "grad_norm": 9.386153846661747, "learning_rate": 1.3225276579899833e-07, "loss": 0.5663, "step": 12701 }, { "epoch": 1.7983294400792809, "grad_norm": 9.855142449636977, "learning_rate": 1.3206886426622267e-07, "loss": 0.7651, "step": 12702 }, { "epoch": 1.798471012953918, "grad_norm": 9.153763677599068, "learning_rate": 1.318850872138211e-07, "loss": 0.5958, "step": 12703 }, { "epoch": 1.7986125858285553, "grad_norm": 8.914140812950256, "learning_rate": 1.3170143465145474e-07, "loss": 0.6786, "step": 12704 }, { "epoch": 1.7987541587031926, "grad_norm": 9.842927250463154, "learning_rate": 1.3151790658877785e-07, "loss": 0.6622, "step": 12705 }, { "epoch": 1.7988957315778298, "grad_norm": 10.143081155985792, "learning_rate": 1.3133450303543904e-07, "loss": 0.6558, "step": 12706 }, { "epoch": 1.799037304452467, "grad_norm": 8.331269835257475, "learning_rate": 1.3115122400107872e-07, "loss": 0.6431, "step": 12707 }, { "epoch": 1.7991788773271042, "grad_norm": 8.248735735574972, "learning_rate": 1.3096806949533274e-07, "loss": 0.6461, "step": 12708 }, { "epoch": 1.7993204502017415, "grad_norm": 10.879260752190847, "learning_rate": 1.3078503952782845e-07, "loss": 0.6895, "step": 12709 }, { "epoch": 1.7994620230763787, "grad_norm": 9.818109354721486, "learning_rate": 1.306021341081881e-07, "loss": 0.7151, "step": 12710 }, { "epoch": 1.799603595951016, "grad_norm": 9.168214239629766, "learning_rate": 1.304193532460274e-07, "loss": 0.6033, "step": 12711 }, { "epoch": 1.7997451688256532, "grad_norm": 8.922634753799297, "learning_rate": 1.3023669695095413e-07, "loss": 0.7242, "step": 12712 }, { "epoch": 1.7998867417002902, "grad_norm": 10.972620789147044, "learning_rate": 1.3005416523257126e-07, "loss": 0.6416, "step": 12713 }, { "epoch": 1.8000283145749274, "grad_norm": 10.478281720653573, "learning_rate": 1.2987175810047297e-07, "loss": 0.6581, "step": 12714 }, { "epoch": 1.8001698874495646, "grad_norm": 8.661541034413005, "learning_rate": 1.2968947556424943e-07, "loss": 0.6611, "step": 12715 }, { "epoch": 1.8003114603242019, "grad_norm": 8.76531339965324, "learning_rate": 1.2950731763348295e-07, "loss": 0.6229, "step": 12716 }, { "epoch": 1.800453033198839, "grad_norm": 9.023597722468217, "learning_rate": 1.2932528431774892e-07, "loss": 0.6235, "step": 12717 }, { "epoch": 1.8005946060734763, "grad_norm": 10.412419503623237, "learning_rate": 1.291433756266175e-07, "loss": 0.6126, "step": 12718 }, { "epoch": 1.8007361789481136, "grad_norm": 10.138692756973775, "learning_rate": 1.289615915696507e-07, "loss": 0.6607, "step": 12719 }, { "epoch": 1.8008777518227508, "grad_norm": 9.556025688178376, "learning_rate": 1.2877993215640539e-07, "loss": 0.7635, "step": 12720 }, { "epoch": 1.801019324697388, "grad_norm": 10.253982886618376, "learning_rate": 1.2859839739643054e-07, "loss": 0.5858, "step": 12721 }, { "epoch": 1.8011608975720252, "grad_norm": 8.564814122594246, "learning_rate": 1.2841698729927022e-07, "loss": 0.6368, "step": 12722 }, { "epoch": 1.8013024704466625, "grad_norm": 8.908116837652148, "learning_rate": 1.2823570187446065e-07, "loss": 0.6215, "step": 12723 }, { "epoch": 1.8014440433212995, "grad_norm": 10.25649148957777, "learning_rate": 1.2805454113153121e-07, "loss": 0.6496, "step": 12724 }, { "epoch": 1.8015856161959367, "grad_norm": 8.821740034086467, "learning_rate": 1.2787350508000645e-07, "loss": 0.6471, "step": 12725 }, { "epoch": 1.801727189070574, "grad_norm": 8.577244892379351, "learning_rate": 1.276925937294024e-07, "loss": 0.5786, "step": 12726 }, { "epoch": 1.8018687619452112, "grad_norm": 9.367506771039386, "learning_rate": 1.2751180708923005e-07, "loss": 0.5482, "step": 12727 }, { "epoch": 1.8020103348198484, "grad_norm": 9.046756091349383, "learning_rate": 1.2733114516899293e-07, "loss": 0.649, "step": 12728 }, { "epoch": 1.8021519076944856, "grad_norm": 8.627636050552942, "learning_rate": 1.271506079781884e-07, "loss": 0.6571, "step": 12729 }, { "epoch": 1.8022934805691229, "grad_norm": 9.124502508110117, "learning_rate": 1.2697019552630696e-07, "loss": 0.6539, "step": 12730 }, { "epoch": 1.80243505344376, "grad_norm": 12.235587224427052, "learning_rate": 1.2678990782283324e-07, "loss": 0.6555, "step": 12731 }, { "epoch": 1.8025766263183973, "grad_norm": 8.557143269090123, "learning_rate": 1.266097448772441e-07, "loss": 0.7474, "step": 12732 }, { "epoch": 1.8027181991930346, "grad_norm": 10.007695479531497, "learning_rate": 1.264297066990111e-07, "loss": 0.6899, "step": 12733 }, { "epoch": 1.8028597720676718, "grad_norm": 8.28979511960338, "learning_rate": 1.2624979329759952e-07, "loss": 0.6699, "step": 12734 }, { "epoch": 1.803001344942309, "grad_norm": 9.817759665330549, "learning_rate": 1.2607000468246533e-07, "loss": 0.66, "step": 12735 }, { "epoch": 1.8031429178169462, "grad_norm": 7.809117920282104, "learning_rate": 1.2589034086306129e-07, "loss": 0.6156, "step": 12736 }, { "epoch": 1.8032844906915835, "grad_norm": 10.031334041825714, "learning_rate": 1.2571080184883178e-07, "loss": 0.6219, "step": 12737 }, { "epoch": 1.8034260635662207, "grad_norm": 9.608468659620542, "learning_rate": 1.255313876492148e-07, "loss": 0.6407, "step": 12738 }, { "epoch": 1.803567636440858, "grad_norm": 9.155418384269264, "learning_rate": 1.2535209827364282e-07, "loss": 0.6629, "step": 12739 }, { "epoch": 1.8037092093154952, "grad_norm": 8.84663953181429, "learning_rate": 1.2517293373153993e-07, "loss": 0.6463, "step": 12740 }, { "epoch": 1.8038507821901324, "grad_norm": 12.327719020564997, "learning_rate": 1.2499389403232532e-07, "loss": 0.7393, "step": 12741 }, { "epoch": 1.8039923550647696, "grad_norm": 9.13572585054993, "learning_rate": 1.2481497918541085e-07, "loss": 0.5991, "step": 12742 }, { "epoch": 1.8041339279394069, "grad_norm": 8.484533262796488, "learning_rate": 1.246361892002021e-07, "loss": 0.6989, "step": 12743 }, { "epoch": 1.804275500814044, "grad_norm": 8.271160846314467, "learning_rate": 1.2445752408609733e-07, "loss": 0.6072, "step": 12744 }, { "epoch": 1.8044170736886813, "grad_norm": 10.054996322714628, "learning_rate": 1.2427898385248965e-07, "loss": 0.6872, "step": 12745 }, { "epoch": 1.8045586465633185, "grad_norm": 9.157242756947529, "learning_rate": 1.2410056850876428e-07, "loss": 0.6782, "step": 12746 }, { "epoch": 1.8047002194379558, "grad_norm": 9.884432380213035, "learning_rate": 1.239222780643004e-07, "loss": 0.6458, "step": 12747 }, { "epoch": 1.804841792312593, "grad_norm": 8.74170225976826, "learning_rate": 1.237441125284708e-07, "loss": 0.6833, "step": 12748 }, { "epoch": 1.8049833651872302, "grad_norm": 10.01285413480116, "learning_rate": 1.2356607191064102e-07, "loss": 0.7112, "step": 12749 }, { "epoch": 1.8051249380618675, "grad_norm": 10.014508594297272, "learning_rate": 1.2338815622017137e-07, "loss": 0.7193, "step": 12750 }, { "epoch": 1.8052665109365047, "grad_norm": 9.583759685375632, "learning_rate": 1.2321036546641406e-07, "loss": 0.631, "step": 12751 }, { "epoch": 1.805408083811142, "grad_norm": 10.380076809343517, "learning_rate": 1.2303269965871583e-07, "loss": 0.5628, "step": 12752 }, { "epoch": 1.8055496566857792, "grad_norm": 7.879144213916964, "learning_rate": 1.2285515880641585e-07, "loss": 0.6506, "step": 12753 }, { "epoch": 1.8056912295604164, "grad_norm": 8.22884546610751, "learning_rate": 1.2267774291884805e-07, "loss": 0.6811, "step": 12754 }, { "epoch": 1.8058328024350534, "grad_norm": 8.977260685288195, "learning_rate": 1.2250045200533855e-07, "loss": 0.6421, "step": 12755 }, { "epoch": 1.8059743753096906, "grad_norm": 9.35473069244246, "learning_rate": 1.2232328607520743e-07, "loss": 0.7104, "step": 12756 }, { "epoch": 1.8061159481843279, "grad_norm": 9.604828542105382, "learning_rate": 1.2214624513776861e-07, "loss": 0.6179, "step": 12757 }, { "epoch": 1.806257521058965, "grad_norm": 10.276724610769033, "learning_rate": 1.219693292023283e-07, "loss": 0.7074, "step": 12758 }, { "epoch": 1.8063990939336023, "grad_norm": 8.837039194139065, "learning_rate": 1.217925382781876e-07, "loss": 0.6337, "step": 12759 }, { "epoch": 1.8065406668082395, "grad_norm": 9.744371403423187, "learning_rate": 1.216158723746394e-07, "loss": 0.6332, "step": 12760 }, { "epoch": 1.8066822396828768, "grad_norm": 7.598851629130745, "learning_rate": 1.2143933150097154e-07, "loss": 0.7591, "step": 12761 }, { "epoch": 1.806823812557514, "grad_norm": 8.066756669900656, "learning_rate": 1.2126291566646464e-07, "loss": 0.6731, "step": 12762 }, { "epoch": 1.8069653854321512, "grad_norm": 8.87711190898969, "learning_rate": 1.210866248803924e-07, "loss": 0.642, "step": 12763 }, { "epoch": 1.8071069583067885, "grad_norm": 9.347401189452714, "learning_rate": 1.20910459152023e-07, "loss": 0.62, "step": 12764 }, { "epoch": 1.8072485311814255, "grad_norm": 8.628165286203938, "learning_rate": 1.2073441849061645e-07, "loss": 0.5947, "step": 12765 }, { "epoch": 1.8073901040560627, "grad_norm": 8.245956817314035, "learning_rate": 1.205585029054279e-07, "loss": 0.6889, "step": 12766 }, { "epoch": 1.8075316769307, "grad_norm": 8.677698987450352, "learning_rate": 1.2038271240570415e-07, "loss": 0.6551, "step": 12767 }, { "epoch": 1.8076732498053372, "grad_norm": 10.220380833765786, "learning_rate": 1.2020704700068691e-07, "loss": 0.6472, "step": 12768 }, { "epoch": 1.8078148226799744, "grad_norm": 9.5429571575605, "learning_rate": 1.2003150669961105e-07, "loss": 0.6127, "step": 12769 }, { "epoch": 1.8079563955546116, "grad_norm": 8.17627021650583, "learning_rate": 1.198560915117039e-07, "loss": 0.7005, "step": 12770 }, { "epoch": 1.8080979684292489, "grad_norm": 8.658724985330233, "learning_rate": 1.1968080144618783e-07, "loss": 0.6771, "step": 12771 }, { "epoch": 1.808239541303886, "grad_norm": 8.472516225358607, "learning_rate": 1.195056365122768e-07, "loss": 0.6596, "step": 12772 }, { "epoch": 1.8083811141785233, "grad_norm": 7.992588424694625, "learning_rate": 1.193305967191796e-07, "loss": 0.5825, "step": 12773 }, { "epoch": 1.8085226870531605, "grad_norm": 8.520179132718589, "learning_rate": 1.191556820760978e-07, "loss": 0.6026, "step": 12774 }, { "epoch": 1.8086642599277978, "grad_norm": 9.532012808870777, "learning_rate": 1.1898089259222673e-07, "loss": 0.6039, "step": 12775 }, { "epoch": 1.808805832802435, "grad_norm": 8.680030740428583, "learning_rate": 1.1880622827675464e-07, "loss": 0.6089, "step": 12776 }, { "epoch": 1.8089474056770722, "grad_norm": 8.468040521048142, "learning_rate": 1.1863168913886364e-07, "loss": 0.6026, "step": 12777 }, { "epoch": 1.8090889785517095, "grad_norm": 9.729854409676236, "learning_rate": 1.1845727518772915e-07, "loss": 0.6032, "step": 12778 }, { "epoch": 1.8092305514263467, "grad_norm": 8.996004277263488, "learning_rate": 1.1828298643251967e-07, "loss": 0.6102, "step": 12779 }, { "epoch": 1.809372124300984, "grad_norm": 9.381496772341455, "learning_rate": 1.1810882288239817e-07, "loss": 0.7151, "step": 12780 }, { "epoch": 1.8095136971756212, "grad_norm": 9.72080652980213, "learning_rate": 1.1793478454651952e-07, "loss": 0.6259, "step": 12781 }, { "epoch": 1.8096552700502584, "grad_norm": 9.355310949217186, "learning_rate": 1.1776087143403337e-07, "loss": 0.6079, "step": 12782 }, { "epoch": 1.8097968429248956, "grad_norm": 8.744511217925773, "learning_rate": 1.1758708355408155e-07, "loss": 0.664, "step": 12783 }, { "epoch": 1.8099384157995329, "grad_norm": 9.599435519470948, "learning_rate": 1.1741342091580038e-07, "loss": 0.6655, "step": 12784 }, { "epoch": 1.81007998867417, "grad_norm": 10.723293042338954, "learning_rate": 1.172398835283195e-07, "loss": 0.7794, "step": 12785 }, { "epoch": 1.8102215615488073, "grad_norm": 9.941225133418934, "learning_rate": 1.1706647140076105e-07, "loss": 0.6755, "step": 12786 }, { "epoch": 1.8103631344234445, "grad_norm": 8.574034558101316, "learning_rate": 1.1689318454224191e-07, "loss": 0.7246, "step": 12787 }, { "epoch": 1.8105047072980818, "grad_norm": 9.43399892375229, "learning_rate": 1.1672002296187063e-07, "loss": 0.6589, "step": 12788 }, { "epoch": 1.810646280172719, "grad_norm": 9.10067391048597, "learning_rate": 1.1654698666875076e-07, "loss": 0.6676, "step": 12789 }, { "epoch": 1.8107878530473562, "grad_norm": 9.046185561084098, "learning_rate": 1.1637407567197862e-07, "loss": 0.6305, "step": 12790 }, { "epoch": 1.8109294259219935, "grad_norm": 8.884164296327517, "learning_rate": 1.162012899806439e-07, "loss": 0.6065, "step": 12791 }, { "epoch": 1.8110709987966307, "grad_norm": 11.43076880992427, "learning_rate": 1.1602862960383015e-07, "loss": 0.7361, "step": 12792 }, { "epoch": 1.811212571671268, "grad_norm": 10.020648999544406, "learning_rate": 1.1585609455061348e-07, "loss": 0.7393, "step": 12793 }, { "epoch": 1.8113541445459052, "grad_norm": 9.971305495699859, "learning_rate": 1.1568368483006465e-07, "loss": 0.6182, "step": 12794 }, { "epoch": 1.8114957174205424, "grad_norm": 10.216388207810787, "learning_rate": 1.1551140045124615e-07, "loss": 0.7252, "step": 12795 }, { "epoch": 1.8116372902951794, "grad_norm": 10.188084872254343, "learning_rate": 1.1533924142321601e-07, "loss": 0.7414, "step": 12796 }, { "epoch": 1.8117788631698166, "grad_norm": 9.608203847585552, "learning_rate": 1.1516720775502338e-07, "loss": 0.6453, "step": 12797 }, { "epoch": 1.8119204360444539, "grad_norm": 8.876692919384933, "learning_rate": 1.1499529945571269e-07, "loss": 0.7008, "step": 12798 }, { "epoch": 1.812062008919091, "grad_norm": 9.620403430751237, "learning_rate": 1.1482351653432089e-07, "loss": 0.6481, "step": 12799 }, { "epoch": 1.8122035817937283, "grad_norm": 7.908161882070153, "learning_rate": 1.1465185899987797e-07, "loss": 0.669, "step": 12800 }, { "epoch": 1.8123451546683655, "grad_norm": 8.46158130341468, "learning_rate": 1.1448032686140864e-07, "loss": 0.5509, "step": 12801 }, { "epoch": 1.8124867275430028, "grad_norm": 9.38746445162731, "learning_rate": 1.1430892012792933e-07, "loss": 0.5941, "step": 12802 }, { "epoch": 1.81262830041764, "grad_norm": 7.893594738180128, "learning_rate": 1.1413763880845169e-07, "loss": 0.5791, "step": 12803 }, { "epoch": 1.8127698732922772, "grad_norm": 9.809088824747043, "learning_rate": 1.139664829119791e-07, "loss": 0.6644, "step": 12804 }, { "epoch": 1.8129114461669145, "grad_norm": 9.730092779895323, "learning_rate": 1.1379545244750961e-07, "loss": 0.83, "step": 12805 }, { "epoch": 1.8130530190415515, "grad_norm": 8.336294894078254, "learning_rate": 1.1362454742403356e-07, "loss": 0.6536, "step": 12806 }, { "epoch": 1.8131945919161887, "grad_norm": 9.803662639414972, "learning_rate": 1.1345376785053596e-07, "loss": 0.6913, "step": 12807 }, { "epoch": 1.813336164790826, "grad_norm": 9.205072116020212, "learning_rate": 1.1328311373599493e-07, "loss": 0.6955, "step": 12808 }, { "epoch": 1.8134777376654632, "grad_norm": 10.428972426914727, "learning_rate": 1.1311258508938022e-07, "loss": 0.6712, "step": 12809 }, { "epoch": 1.8136193105401004, "grad_norm": 9.511395897369844, "learning_rate": 1.1294218191965745e-07, "loss": 0.617, "step": 12810 }, { "epoch": 1.8137608834147376, "grad_norm": 9.766172250291476, "learning_rate": 1.1277190423578416e-07, "loss": 0.7694, "step": 12811 }, { "epoch": 1.8139024562893749, "grad_norm": 8.402247954620119, "learning_rate": 1.1260175204671181e-07, "loss": 0.6778, "step": 12812 }, { "epoch": 1.814044029164012, "grad_norm": 9.297691911982065, "learning_rate": 1.1243172536138547e-07, "loss": 0.7287, "step": 12813 }, { "epoch": 1.8141856020386493, "grad_norm": 8.662655660268328, "learning_rate": 1.1226182418874271e-07, "loss": 0.698, "step": 12814 }, { "epoch": 1.8143271749132865, "grad_norm": 8.965617732792671, "learning_rate": 1.1209204853771582e-07, "loss": 0.7027, "step": 12815 }, { "epoch": 1.8144687477879238, "grad_norm": 10.819347176028215, "learning_rate": 1.1192239841722935e-07, "loss": 0.6641, "step": 12816 }, { "epoch": 1.814610320662561, "grad_norm": 10.018376060356227, "learning_rate": 1.1175287383620197e-07, "loss": 0.6374, "step": 12817 }, { "epoch": 1.8147518935371982, "grad_norm": 9.739027843171932, "learning_rate": 1.1158347480354493e-07, "loss": 0.6788, "step": 12818 }, { "epoch": 1.8148934664118355, "grad_norm": 9.734109694292917, "learning_rate": 1.1141420132816383e-07, "loss": 0.7166, "step": 12819 }, { "epoch": 1.8150350392864727, "grad_norm": 9.096373504056052, "learning_rate": 1.1124505341895742e-07, "loss": 0.6252, "step": 12820 }, { "epoch": 1.81517661216111, "grad_norm": 9.65935222088668, "learning_rate": 1.1107603108481718e-07, "loss": 0.686, "step": 12821 }, { "epoch": 1.8153181850357472, "grad_norm": 9.258943924797205, "learning_rate": 1.109071343346288e-07, "loss": 0.6996, "step": 12822 }, { "epoch": 1.8154597579103844, "grad_norm": 9.08741146081245, "learning_rate": 1.1073836317727071e-07, "loss": 0.6888, "step": 12823 }, { "epoch": 1.8156013307850216, "grad_norm": 9.093352318946208, "learning_rate": 1.1056971762161584e-07, "loss": 0.7468, "step": 12824 }, { "epoch": 1.8157429036596588, "grad_norm": 8.73400899976682, "learning_rate": 1.1040119767652901e-07, "loss": 0.6657, "step": 12825 }, { "epoch": 1.815884476534296, "grad_norm": 8.472536486302007, "learning_rate": 1.1023280335086956e-07, "loss": 0.6353, "step": 12826 }, { "epoch": 1.8160260494089333, "grad_norm": 7.6948586576988465, "learning_rate": 1.1006453465348954e-07, "loss": 0.6141, "step": 12827 }, { "epoch": 1.8161676222835705, "grad_norm": 9.229848820697427, "learning_rate": 1.0989639159323523e-07, "loss": 0.7074, "step": 12828 }, { "epoch": 1.8163091951582078, "grad_norm": 9.34122415039537, "learning_rate": 1.0972837417894538e-07, "loss": 0.6399, "step": 12829 }, { "epoch": 1.816450768032845, "grad_norm": 9.109344848792297, "learning_rate": 1.0956048241945238e-07, "loss": 0.7112, "step": 12830 }, { "epoch": 1.8165923409074822, "grad_norm": 10.376416213112039, "learning_rate": 1.0939271632358278e-07, "loss": 0.7017, "step": 12831 }, { "epoch": 1.8167339137821195, "grad_norm": 8.729683095242311, "learning_rate": 1.0922507590015535e-07, "loss": 0.6755, "step": 12832 }, { "epoch": 1.8168754866567567, "grad_norm": 10.194481078407463, "learning_rate": 1.0905756115798332e-07, "loss": 0.6418, "step": 12833 }, { "epoch": 1.817017059531394, "grad_norm": 8.655879374257662, "learning_rate": 1.0889017210587216e-07, "loss": 0.7236, "step": 12834 }, { "epoch": 1.8171586324060311, "grad_norm": 8.215302995514254, "learning_rate": 1.0872290875262175e-07, "loss": 0.6942, "step": 12835 }, { "epoch": 1.8173002052806684, "grad_norm": 9.35794876771401, "learning_rate": 1.0855577110702536e-07, "loss": 0.6089, "step": 12836 }, { "epoch": 1.8174417781553056, "grad_norm": 9.785829879507617, "learning_rate": 1.0838875917786845e-07, "loss": 0.6696, "step": 12837 }, { "epoch": 1.8175833510299426, "grad_norm": 10.020189123139172, "learning_rate": 1.0822187297393177e-07, "loss": 0.6216, "step": 12838 }, { "epoch": 1.8177249239045798, "grad_norm": 9.54077273072346, "learning_rate": 1.0805511250398748e-07, "loss": 0.6385, "step": 12839 }, { "epoch": 1.817866496779217, "grad_norm": 9.90856444461191, "learning_rate": 1.07888477776803e-07, "loss": 0.6732, "step": 12840 }, { "epoch": 1.8180080696538543, "grad_norm": 10.043269197056842, "learning_rate": 1.0772196880113716e-07, "loss": 0.5909, "step": 12841 }, { "epoch": 1.8181496425284915, "grad_norm": 8.773277057053665, "learning_rate": 1.0755558558574325e-07, "loss": 0.6847, "step": 12842 }, { "epoch": 1.8182912154031288, "grad_norm": 8.165903276328919, "learning_rate": 1.0738932813936897e-07, "loss": 0.6125, "step": 12843 }, { "epoch": 1.818432788277766, "grad_norm": 8.583379307102177, "learning_rate": 1.0722319647075347e-07, "loss": 0.6109, "step": 12844 }, { "epoch": 1.8185743611524032, "grad_norm": 8.248278351520405, "learning_rate": 1.0705719058863057e-07, "loss": 0.6274, "step": 12845 }, { "epoch": 1.8187159340270405, "grad_norm": 8.410195257979671, "learning_rate": 1.0689131050172635e-07, "loss": 0.6611, "step": 12846 }, { "epoch": 1.8188575069016777, "grad_norm": 8.608429844756234, "learning_rate": 1.0672555621876218e-07, "loss": 0.6658, "step": 12847 }, { "epoch": 1.8189990797763147, "grad_norm": 9.450783381209977, "learning_rate": 1.0655992774845054e-07, "loss": 0.7155, "step": 12848 }, { "epoch": 1.819140652650952, "grad_norm": 10.475464628926707, "learning_rate": 1.0639442509949944e-07, "loss": 0.7655, "step": 12849 }, { "epoch": 1.8192822255255892, "grad_norm": 10.031448504899245, "learning_rate": 1.0622904828060803e-07, "loss": 0.7051, "step": 12850 }, { "epoch": 1.8194237984002264, "grad_norm": 8.725936498361508, "learning_rate": 1.0606379730047134e-07, "loss": 0.6113, "step": 12851 }, { "epoch": 1.8195653712748636, "grad_norm": 10.13398362967609, "learning_rate": 1.0589867216777544e-07, "loss": 0.6834, "step": 12852 }, { "epoch": 1.8197069441495008, "grad_norm": 7.74789572419997, "learning_rate": 1.0573367289120118e-07, "loss": 0.7461, "step": 12853 }, { "epoch": 1.819848517024138, "grad_norm": 10.739034338361519, "learning_rate": 1.0556879947942272e-07, "loss": 0.6316, "step": 12854 }, { "epoch": 1.8199900898987753, "grad_norm": 9.277210525359223, "learning_rate": 1.0540405194110703e-07, "loss": 0.7454, "step": 12855 }, { "epoch": 1.8201316627734125, "grad_norm": 9.621964409447775, "learning_rate": 1.0523943028491496e-07, "loss": 0.677, "step": 12856 }, { "epoch": 1.8202732356480498, "grad_norm": 8.585253256970823, "learning_rate": 1.0507493451949984e-07, "loss": 0.567, "step": 12857 }, { "epoch": 1.820414808522687, "grad_norm": 10.560000513250165, "learning_rate": 1.0491056465351007e-07, "loss": 0.7147, "step": 12858 }, { "epoch": 1.8205563813973242, "grad_norm": 7.639565236368652, "learning_rate": 1.0474632069558621e-07, "loss": 0.6433, "step": 12859 }, { "epoch": 1.8206979542719615, "grad_norm": 10.210773866563766, "learning_rate": 1.045822026543622e-07, "loss": 0.6538, "step": 12860 }, { "epoch": 1.8208395271465987, "grad_norm": 9.293975634423717, "learning_rate": 1.0441821053846612e-07, "loss": 0.5934, "step": 12861 }, { "epoch": 1.820981100021236, "grad_norm": 10.654088312864706, "learning_rate": 1.0425434435651776e-07, "loss": 0.7057, "step": 12862 }, { "epoch": 1.8211226728958732, "grad_norm": 9.556508300164516, "learning_rate": 1.0409060411713273e-07, "loss": 0.7209, "step": 12863 }, { "epoch": 1.8212642457705104, "grad_norm": 11.92528288243813, "learning_rate": 1.0392698982891775e-07, "loss": 0.6997, "step": 12864 }, { "epoch": 1.8214058186451476, "grad_norm": 9.141016451286767, "learning_rate": 1.0376350150047427e-07, "loss": 0.6693, "step": 12865 }, { "epoch": 1.8215473915197848, "grad_norm": 9.02651335901134, "learning_rate": 1.0360013914039708e-07, "loss": 0.6742, "step": 12866 }, { "epoch": 1.821688964394422, "grad_norm": 8.484479309902067, "learning_rate": 1.0343690275727374e-07, "loss": 0.5418, "step": 12867 }, { "epoch": 1.8218305372690593, "grad_norm": 9.335971005431508, "learning_rate": 1.0327379235968549e-07, "loss": 0.6484, "step": 12868 }, { "epoch": 1.8219721101436965, "grad_norm": 10.104739793601155, "learning_rate": 1.0311080795620654e-07, "loss": 0.5483, "step": 12869 }, { "epoch": 1.8221136830183338, "grad_norm": 8.692916389171192, "learning_rate": 1.0294794955540587e-07, "loss": 0.6555, "step": 12870 }, { "epoch": 1.822255255892971, "grad_norm": 8.082016618069161, "learning_rate": 1.0278521716584361e-07, "loss": 0.7415, "step": 12871 }, { "epoch": 1.8223968287676082, "grad_norm": 9.574034671578024, "learning_rate": 1.0262261079607539e-07, "loss": 0.6242, "step": 12872 }, { "epoch": 1.8225384016422455, "grad_norm": 8.743985534069664, "learning_rate": 1.0246013045464881e-07, "loss": 0.6811, "step": 12873 }, { "epoch": 1.8226799745168827, "grad_norm": 10.09611781075443, "learning_rate": 1.0229777615010538e-07, "loss": 0.7295, "step": 12874 }, { "epoch": 1.82282154739152, "grad_norm": 9.38742259637488, "learning_rate": 1.0213554789098052e-07, "loss": 0.5811, "step": 12875 }, { "epoch": 1.8229631202661571, "grad_norm": 8.470837100119315, "learning_rate": 1.0197344568580153e-07, "loss": 0.661, "step": 12876 }, { "epoch": 1.8231046931407944, "grad_norm": 8.87784197798289, "learning_rate": 1.0181146954309052e-07, "loss": 0.6082, "step": 12877 }, { "epoch": 1.8232462660154316, "grad_norm": 9.766246074000474, "learning_rate": 1.0164961947136232e-07, "loss": 0.6788, "step": 12878 }, { "epoch": 1.8233878388900686, "grad_norm": 9.30851875559908, "learning_rate": 1.0148789547912569e-07, "loss": 0.7117, "step": 12879 }, { "epoch": 1.8235294117647058, "grad_norm": 10.175626798252022, "learning_rate": 1.013262975748816e-07, "loss": 0.576, "step": 12880 }, { "epoch": 1.823670984639343, "grad_norm": 9.272524652970269, "learning_rate": 1.011648257671255e-07, "loss": 0.6706, "step": 12881 }, { "epoch": 1.8238125575139803, "grad_norm": 10.533696555462313, "learning_rate": 1.0100348006434641e-07, "loss": 0.6764, "step": 12882 }, { "epoch": 1.8239541303886175, "grad_norm": 7.205509874991065, "learning_rate": 1.0084226047502505e-07, "loss": 0.6843, "step": 12883 }, { "epoch": 1.8240957032632548, "grad_norm": 9.68702155131517, "learning_rate": 1.0068116700763769e-07, "loss": 0.6825, "step": 12884 }, { "epoch": 1.824237276137892, "grad_norm": 9.500279874192564, "learning_rate": 1.0052019967065174e-07, "loss": 0.7645, "step": 12885 }, { "epoch": 1.8243788490125292, "grad_norm": 8.239321184067213, "learning_rate": 1.0035935847253015e-07, "loss": 0.7095, "step": 12886 }, { "epoch": 1.8245204218871665, "grad_norm": 9.036375076540692, "learning_rate": 1.001986434217278e-07, "loss": 0.7156, "step": 12887 }, { "epoch": 1.8246619947618037, "grad_norm": 9.687659871412688, "learning_rate": 1.0003805452669296e-07, "loss": 0.6175, "step": 12888 }, { "epoch": 1.8248035676364407, "grad_norm": 11.704124336713868, "learning_rate": 9.987759179586886e-08, "loss": 0.7, "step": 12889 }, { "epoch": 1.824945140511078, "grad_norm": 8.262612152492668, "learning_rate": 9.97172552376896e-08, "loss": 0.6397, "step": 12890 }, { "epoch": 1.8250867133857152, "grad_norm": 8.436502129216203, "learning_rate": 9.955704486058482e-08, "loss": 0.6346, "step": 12891 }, { "epoch": 1.8252282862603524, "grad_norm": 10.269868156678857, "learning_rate": 9.939696067297611e-08, "loss": 0.6624, "step": 12892 }, { "epoch": 1.8253698591349896, "grad_norm": 8.53782003984125, "learning_rate": 9.923700268327952e-08, "loss": 0.6431, "step": 12893 }, { "epoch": 1.8255114320096268, "grad_norm": 10.068256887511843, "learning_rate": 9.90771708999036e-08, "loss": 0.6617, "step": 12894 }, { "epoch": 1.825653004884264, "grad_norm": 8.137337575483938, "learning_rate": 9.891746533125024e-08, "loss": 0.6877, "step": 12895 }, { "epoch": 1.8257945777589013, "grad_norm": 8.336515454864774, "learning_rate": 9.87578859857155e-08, "loss": 0.6185, "step": 12896 }, { "epoch": 1.8259361506335385, "grad_norm": 8.712736729124504, "learning_rate": 9.859843287168825e-08, "loss": 0.7423, "step": 12897 }, { "epoch": 1.8260777235081758, "grad_norm": 9.66990984301665, "learning_rate": 9.843910599755119e-08, "loss": 0.6747, "step": 12898 }, { "epoch": 1.826219296382813, "grad_norm": 8.125363855284373, "learning_rate": 9.827990537167903e-08, "loss": 0.5826, "step": 12899 }, { "epoch": 1.8263608692574502, "grad_norm": 9.632108763197923, "learning_rate": 9.812083100244201e-08, "loss": 0.711, "step": 12900 }, { "epoch": 1.8265024421320875, "grad_norm": 8.911149882261048, "learning_rate": 9.796188289820152e-08, "loss": 0.6029, "step": 12901 }, { "epoch": 1.8266440150067247, "grad_norm": 11.301567401038444, "learning_rate": 9.780306106731419e-08, "loss": 0.6378, "step": 12902 }, { "epoch": 1.826785587881362, "grad_norm": 8.870665015371813, "learning_rate": 9.764436551812889e-08, "loss": 0.6979, "step": 12903 }, { "epoch": 1.8269271607559991, "grad_norm": 9.676354475601853, "learning_rate": 9.748579625898758e-08, "loss": 0.6623, "step": 12904 }, { "epoch": 1.8270687336306364, "grad_norm": 9.100245512280923, "learning_rate": 9.73273532982269e-08, "loss": 0.6789, "step": 12905 }, { "epoch": 1.8272103065052736, "grad_norm": 8.003582629041968, "learning_rate": 9.716903664417549e-08, "loss": 0.6726, "step": 12906 }, { "epoch": 1.8273518793799108, "grad_norm": 10.341737672458024, "learning_rate": 9.701084630515667e-08, "loss": 0.6719, "step": 12907 }, { "epoch": 1.827493452254548, "grad_norm": 10.973871584083044, "learning_rate": 9.685278228948519e-08, "loss": 0.6465, "step": 12908 }, { "epoch": 1.8276350251291853, "grad_norm": 9.08406143303427, "learning_rate": 9.669484460547135e-08, "loss": 0.6687, "step": 12909 }, { "epoch": 1.8277765980038225, "grad_norm": 9.123165416498644, "learning_rate": 9.653703326141794e-08, "loss": 0.614, "step": 12910 }, { "epoch": 1.8279181708784598, "grad_norm": 7.327811618765673, "learning_rate": 9.637934826562001e-08, "loss": 0.6726, "step": 12911 }, { "epoch": 1.828059743753097, "grad_norm": 10.973502756929879, "learning_rate": 9.622178962636813e-08, "loss": 0.7429, "step": 12912 }, { "epoch": 1.8282013166277342, "grad_norm": 8.819099714562174, "learning_rate": 9.606435735194403e-08, "loss": 0.7154, "step": 12913 }, { "epoch": 1.8283428895023714, "grad_norm": 10.690502436564495, "learning_rate": 9.590705145062468e-08, "loss": 0.6762, "step": 12914 }, { "epoch": 1.8284844623770087, "grad_norm": 9.741868761074942, "learning_rate": 9.574987193067847e-08, "loss": 0.6112, "step": 12915 }, { "epoch": 1.828626035251646, "grad_norm": 8.081369010597722, "learning_rate": 9.559281880036908e-08, "loss": 0.6663, "step": 12916 }, { "epoch": 1.8287676081262831, "grad_norm": 9.051378463383482, "learning_rate": 9.54358920679524e-08, "loss": 0.7661, "step": 12917 }, { "epoch": 1.8289091810009204, "grad_norm": 9.675331003603798, "learning_rate": 9.527909174167793e-08, "loss": 0.676, "step": 12918 }, { "epoch": 1.8290507538755576, "grad_norm": 9.970561756772057, "learning_rate": 9.512241782978853e-08, "loss": 0.6394, "step": 12919 }, { "epoch": 1.8291923267501946, "grad_norm": 10.407238246055302, "learning_rate": 9.496587034052041e-08, "loss": 0.6723, "step": 12920 }, { "epoch": 1.8293338996248318, "grad_norm": 9.984872056411648, "learning_rate": 9.480944928210362e-08, "loss": 0.6274, "step": 12921 }, { "epoch": 1.829475472499469, "grad_norm": 9.448293008987994, "learning_rate": 9.46531546627602e-08, "loss": 0.6729, "step": 12922 }, { "epoch": 1.8296170453741063, "grad_norm": 8.626104754032324, "learning_rate": 9.449698649070721e-08, "loss": 0.6895, "step": 12923 }, { "epoch": 1.8297586182487435, "grad_norm": 9.0770248297965, "learning_rate": 9.43409447741539e-08, "loss": 0.6007, "step": 12924 }, { "epoch": 1.8299001911233808, "grad_norm": 9.214633035292163, "learning_rate": 9.418502952130343e-08, "loss": 0.7374, "step": 12925 }, { "epoch": 1.830041763998018, "grad_norm": 10.874694819936687, "learning_rate": 9.40292407403523e-08, "loss": 0.6431, "step": 12926 }, { "epoch": 1.8301833368726552, "grad_norm": 8.631014730795165, "learning_rate": 9.38735784394898e-08, "loss": 0.6946, "step": 12927 }, { "epoch": 1.8303249097472925, "grad_norm": 7.823756051439833, "learning_rate": 9.371804262689938e-08, "loss": 0.6763, "step": 12928 }, { "epoch": 1.8304664826219297, "grad_norm": 8.139519028759086, "learning_rate": 9.3562633310757e-08, "loss": 0.5851, "step": 12929 }, { "epoch": 1.830608055496567, "grad_norm": 9.64006943547105, "learning_rate": 9.340735049923277e-08, "loss": 0.6739, "step": 12930 }, { "epoch": 1.830749628371204, "grad_norm": 10.064204290842495, "learning_rate": 9.325219420048964e-08, "loss": 0.5454, "step": 12931 }, { "epoch": 1.8308912012458411, "grad_norm": 7.698430671789518, "learning_rate": 9.309716442268413e-08, "loss": 0.6511, "step": 12932 }, { "epoch": 1.8310327741204784, "grad_norm": 11.644792668358784, "learning_rate": 9.29422611739661e-08, "loss": 0.6265, "step": 12933 }, { "epoch": 1.8311743469951156, "grad_norm": 9.676771954857085, "learning_rate": 9.278748446247848e-08, "loss": 0.6926, "step": 12934 }, { "epoch": 1.8313159198697528, "grad_norm": 8.218607041888813, "learning_rate": 9.263283429635839e-08, "loss": 0.6137, "step": 12935 }, { "epoch": 1.83145749274439, "grad_norm": 10.001533009325176, "learning_rate": 9.247831068373458e-08, "loss": 0.6946, "step": 12936 }, { "epoch": 1.8315990656190273, "grad_norm": 8.743421861164263, "learning_rate": 9.23239136327314e-08, "loss": 0.6846, "step": 12937 }, { "epoch": 1.8317406384936645, "grad_norm": 9.99307125379861, "learning_rate": 9.216964315146431e-08, "loss": 0.654, "step": 12938 }, { "epoch": 1.8318822113683018, "grad_norm": 8.919650092681826, "learning_rate": 9.201549924804376e-08, "loss": 0.7175, "step": 12939 }, { "epoch": 1.832023784242939, "grad_norm": 8.746030506554504, "learning_rate": 9.186148193057325e-08, "loss": 0.6928, "step": 12940 }, { "epoch": 1.8321653571175762, "grad_norm": 9.263226100451503, "learning_rate": 9.170759120714884e-08, "loss": 0.7039, "step": 12941 }, { "epoch": 1.8323069299922135, "grad_norm": 10.511627661835925, "learning_rate": 9.155382708586097e-08, "loss": 0.7476, "step": 12942 }, { "epoch": 1.8324485028668507, "grad_norm": 9.438909362247868, "learning_rate": 9.140018957479236e-08, "loss": 0.6312, "step": 12943 }, { "epoch": 1.832590075741488, "grad_norm": 8.197622782679707, "learning_rate": 9.124667868201986e-08, "loss": 0.6637, "step": 12944 }, { "epoch": 1.8327316486161251, "grad_norm": 8.790929923140908, "learning_rate": 9.109329441561343e-08, "loss": 0.6882, "step": 12945 }, { "epoch": 1.8328732214907624, "grad_norm": 8.410699623935129, "learning_rate": 9.094003678363633e-08, "loss": 0.7914, "step": 12946 }, { "epoch": 1.8330147943653996, "grad_norm": 8.813214645086724, "learning_rate": 9.078690579414546e-08, "loss": 0.69, "step": 12947 }, { "epoch": 1.8331563672400368, "grad_norm": 8.47990330551008, "learning_rate": 9.063390145519019e-08, "loss": 0.6243, "step": 12948 }, { "epoch": 1.833297940114674, "grad_norm": 8.812757988799001, "learning_rate": 9.048102377481466e-08, "loss": 0.6566, "step": 12949 }, { "epoch": 1.8334395129893113, "grad_norm": 9.429056394580929, "learning_rate": 9.032827276105466e-08, "loss": 0.6396, "step": 12950 }, { "epoch": 1.8335810858639485, "grad_norm": 9.295791691085945, "learning_rate": 9.017564842194099e-08, "loss": 0.7451, "step": 12951 }, { "epoch": 1.8337226587385858, "grad_norm": 9.231379970495635, "learning_rate": 9.002315076549639e-08, "loss": 0.627, "step": 12952 }, { "epoch": 1.833864231613223, "grad_norm": 10.373021316281353, "learning_rate": 8.987077979973807e-08, "loss": 0.7231, "step": 12953 }, { "epoch": 1.8340058044878602, "grad_norm": 11.169744114962974, "learning_rate": 8.971853553267545e-08, "loss": 0.7604, "step": 12954 }, { "epoch": 1.8341473773624974, "grad_norm": 8.850373392795047, "learning_rate": 8.956641797231214e-08, "loss": 0.623, "step": 12955 }, { "epoch": 1.8342889502371347, "grad_norm": 9.901294708362915, "learning_rate": 8.941442712664561e-08, "loss": 0.6048, "step": 12956 }, { "epoch": 1.834430523111772, "grad_norm": 9.993635441050133, "learning_rate": 8.926256300366475e-08, "loss": 0.6402, "step": 12957 }, { "epoch": 1.8345720959864091, "grad_norm": 8.83810233689792, "learning_rate": 8.911082561135348e-08, "loss": 0.6699, "step": 12958 }, { "epoch": 1.8347136688610464, "grad_norm": 8.19150357968524, "learning_rate": 8.895921495768845e-08, "loss": 0.6706, "step": 12959 }, { "epoch": 1.8348552417356836, "grad_norm": 9.60822647798469, "learning_rate": 8.880773105063994e-08, "loss": 0.6811, "step": 12960 }, { "epoch": 1.8349968146103208, "grad_norm": 7.889305321652341, "learning_rate": 8.865637389817077e-08, "loss": 0.6796, "step": 12961 }, { "epoch": 1.8351383874849578, "grad_norm": 8.126407442702137, "learning_rate": 8.850514350823819e-08, "loss": 0.7123, "step": 12962 }, { "epoch": 1.835279960359595, "grad_norm": 8.581843225210411, "learning_rate": 8.835403988879221e-08, "loss": 0.7721, "step": 12963 }, { "epoch": 1.8354215332342323, "grad_norm": 8.45082814148149, "learning_rate": 8.820306304777593e-08, "loss": 0.6031, "step": 12964 }, { "epoch": 1.8355631061088695, "grad_norm": 10.011066417003939, "learning_rate": 8.805221299312689e-08, "loss": 0.6298, "step": 12965 }, { "epoch": 1.8357046789835068, "grad_norm": 7.648658445321218, "learning_rate": 8.790148973277401e-08, "loss": 0.7012, "step": 12966 }, { "epoch": 1.835846251858144, "grad_norm": 9.769554678100658, "learning_rate": 8.775089327464154e-08, "loss": 0.6285, "step": 12967 }, { "epoch": 1.8359878247327812, "grad_norm": 7.586493396132402, "learning_rate": 8.760042362664617e-08, "loss": 0.6838, "step": 12968 }, { "epoch": 1.8361293976074184, "grad_norm": 10.846680698616254, "learning_rate": 8.745008079669742e-08, "loss": 0.6189, "step": 12969 }, { "epoch": 1.8362709704820557, "grad_norm": 10.412932395429126, "learning_rate": 8.729986479269926e-08, "loss": 0.6661, "step": 12970 }, { "epoch": 1.836412543356693, "grad_norm": 8.631800527507554, "learning_rate": 8.714977562254784e-08, "loss": 0.6222, "step": 12971 }, { "epoch": 1.83655411623133, "grad_norm": 7.8246875152106465, "learning_rate": 8.699981329413409e-08, "loss": 0.6567, "step": 12972 }, { "epoch": 1.8366956891059671, "grad_norm": 8.568402782614575, "learning_rate": 8.68499778153406e-08, "loss": 0.6605, "step": 12973 }, { "epoch": 1.8368372619806044, "grad_norm": 9.60518916073372, "learning_rate": 8.670026919404467e-08, "loss": 0.6731, "step": 12974 }, { "epoch": 1.8369788348552416, "grad_norm": 7.618738073350617, "learning_rate": 8.655068743811613e-08, "loss": 0.6915, "step": 12975 }, { "epoch": 1.8371204077298788, "grad_norm": 7.64113223648621, "learning_rate": 8.640123255541838e-08, "loss": 0.5963, "step": 12976 }, { "epoch": 1.837261980604516, "grad_norm": 9.002629213939219, "learning_rate": 8.625190455380821e-08, "loss": 0.6316, "step": 12977 }, { "epoch": 1.8374035534791533, "grad_norm": 8.763139287290658, "learning_rate": 8.610270344113575e-08, "loss": 0.6735, "step": 12978 }, { "epoch": 1.8375451263537905, "grad_norm": 9.552700631521368, "learning_rate": 8.595362922524413e-08, "loss": 0.6914, "step": 12979 }, { "epoch": 1.8376866992284278, "grad_norm": 8.866922920749877, "learning_rate": 8.580468191397018e-08, "loss": 0.6276, "step": 12980 }, { "epoch": 1.837828272103065, "grad_norm": 10.302312084058789, "learning_rate": 8.565586151514427e-08, "loss": 0.6752, "step": 12981 }, { "epoch": 1.8379698449777022, "grad_norm": 9.411460269884854, "learning_rate": 8.550716803658904e-08, "loss": 0.5925, "step": 12982 }, { "epoch": 1.8381114178523394, "grad_norm": 8.78335361466307, "learning_rate": 8.535860148612213e-08, "loss": 0.7052, "step": 12983 }, { "epoch": 1.8382529907269767, "grad_norm": 8.760459533901015, "learning_rate": 8.521016187155284e-08, "loss": 0.7035, "step": 12984 }, { "epoch": 1.838394563601614, "grad_norm": 9.260014242404342, "learning_rate": 8.506184920068466e-08, "loss": 0.58, "step": 12985 }, { "epoch": 1.8385361364762511, "grad_norm": 9.580611884659177, "learning_rate": 8.491366348131469e-08, "loss": 0.6212, "step": 12986 }, { "epoch": 1.8386777093508884, "grad_norm": 8.218179545168205, "learning_rate": 8.476560472123251e-08, "loss": 0.7013, "step": 12987 }, { "epoch": 1.8388192822255256, "grad_norm": 10.311269975768436, "learning_rate": 8.46176729282222e-08, "loss": 0.6788, "step": 12988 }, { "epoch": 1.8389608551001628, "grad_norm": 8.090030478289215, "learning_rate": 8.44698681100592e-08, "loss": 0.6635, "step": 12989 }, { "epoch": 1.8391024279748, "grad_norm": 8.150489146525913, "learning_rate": 8.432219027451421e-08, "loss": 0.643, "step": 12990 }, { "epoch": 1.8392440008494373, "grad_norm": 10.27115290662397, "learning_rate": 8.41746394293505e-08, "loss": 0.6895, "step": 12991 }, { "epoch": 1.8393855737240745, "grad_norm": 8.980941089687946, "learning_rate": 8.402721558232463e-08, "loss": 0.6816, "step": 12992 }, { "epoch": 1.8395271465987117, "grad_norm": 8.57783771860705, "learning_rate": 8.387991874118678e-08, "loss": 0.7133, "step": 12993 }, { "epoch": 1.839668719473349, "grad_norm": 9.453343715186756, "learning_rate": 8.373274891367993e-08, "loss": 0.6234, "step": 12994 }, { "epoch": 1.8398102923479862, "grad_norm": 7.946881854751517, "learning_rate": 8.358570610754097e-08, "loss": 0.5455, "step": 12995 }, { "epoch": 1.8399518652226234, "grad_norm": 8.857258431168411, "learning_rate": 8.343879033049951e-08, "loss": 0.7148, "step": 12996 }, { "epoch": 1.8400934380972607, "grad_norm": 9.629459289391834, "learning_rate": 8.329200159027939e-08, "loss": 0.6604, "step": 12997 }, { "epoch": 1.840235010971898, "grad_norm": 11.255491823612429, "learning_rate": 8.314533989459612e-08, "loss": 0.6867, "step": 12998 }, { "epoch": 1.8403765838465351, "grad_norm": 8.97416922036802, "learning_rate": 8.299880525116072e-08, "loss": 0.6149, "step": 12999 }, { "epoch": 1.8405181567211724, "grad_norm": 9.726277763151563, "learning_rate": 8.285239766767595e-08, "loss": 0.6519, "step": 13000 }, { "epoch": 1.8406597295958096, "grad_norm": 8.449004291805586, "learning_rate": 8.270611715183813e-08, "loss": 0.6341, "step": 13001 }, { "epoch": 1.8408013024704468, "grad_norm": 8.86598069637511, "learning_rate": 8.25599637113375e-08, "loss": 0.656, "step": 13002 }, { "epoch": 1.8409428753450838, "grad_norm": 9.229783519053615, "learning_rate": 8.241393735385684e-08, "loss": 0.6241, "step": 13003 }, { "epoch": 1.841084448219721, "grad_norm": 8.746663792694985, "learning_rate": 8.226803808707301e-08, "loss": 0.6466, "step": 13004 }, { "epoch": 1.8412260210943583, "grad_norm": 8.662276500994846, "learning_rate": 8.212226591865547e-08, "loss": 0.6786, "step": 13005 }, { "epoch": 1.8413675939689955, "grad_norm": 13.76662369568209, "learning_rate": 8.197662085626778e-08, "loss": 0.7463, "step": 13006 }, { "epoch": 1.8415091668436327, "grad_norm": 10.360573539617594, "learning_rate": 8.183110290756608e-08, "loss": 0.6974, "step": 13007 }, { "epoch": 1.84165073971827, "grad_norm": 10.412892830389064, "learning_rate": 8.168571208020032e-08, "loss": 0.6687, "step": 13008 }, { "epoch": 1.8417923125929072, "grad_norm": 8.952329909885453, "learning_rate": 8.154044838181385e-08, "loss": 0.7103, "step": 13009 }, { "epoch": 1.8419338854675444, "grad_norm": 10.022228712186477, "learning_rate": 8.139531182004223e-08, "loss": 0.6938, "step": 13010 }, { "epoch": 1.8420754583421817, "grad_norm": 9.135821470877914, "learning_rate": 8.125030240251575e-08, "loss": 0.616, "step": 13011 }, { "epoch": 1.842217031216819, "grad_norm": 8.759587729868663, "learning_rate": 8.110542013685745e-08, "loss": 0.6755, "step": 13012 }, { "epoch": 1.8423586040914561, "grad_norm": 9.090098063964987, "learning_rate": 8.09606650306835e-08, "loss": 0.6794, "step": 13013 }, { "epoch": 1.8425001769660931, "grad_norm": 9.318426307465025, "learning_rate": 8.081603709160362e-08, "loss": 0.6644, "step": 13014 }, { "epoch": 1.8426417498407304, "grad_norm": 10.265272717985109, "learning_rate": 8.067153632722092e-08, "loss": 0.672, "step": 13015 }, { "epoch": 1.8427833227153676, "grad_norm": 8.63026643379344, "learning_rate": 8.052716274513178e-08, "loss": 0.6195, "step": 13016 }, { "epoch": 1.8429248955900048, "grad_norm": 7.908452266311984, "learning_rate": 8.038291635292545e-08, "loss": 0.6322, "step": 13017 }, { "epoch": 1.843066468464642, "grad_norm": 9.796485223880003, "learning_rate": 8.023879715818556e-08, "loss": 0.624, "step": 13018 }, { "epoch": 1.8432080413392793, "grad_norm": 7.96316727697307, "learning_rate": 8.009480516848717e-08, "loss": 0.6431, "step": 13019 }, { "epoch": 1.8433496142139165, "grad_norm": 8.56472816156154, "learning_rate": 7.995094039140116e-08, "loss": 0.6319, "step": 13020 }, { "epoch": 1.8434911870885538, "grad_norm": 7.960430754311568, "learning_rate": 7.980720283448957e-08, "loss": 0.5569, "step": 13021 }, { "epoch": 1.843632759963191, "grad_norm": 8.625060150379024, "learning_rate": 7.966359250530824e-08, "loss": 0.55, "step": 13022 }, { "epoch": 1.8437743328378282, "grad_norm": 9.1341540262572, "learning_rate": 7.952010941140786e-08, "loss": 0.6558, "step": 13023 }, { "epoch": 1.8439159057124654, "grad_norm": 11.410996711386154, "learning_rate": 7.937675356032987e-08, "loss": 0.6676, "step": 13024 }, { "epoch": 1.8440574785871027, "grad_norm": 10.778822827443179, "learning_rate": 7.923352495961157e-08, "loss": 0.7208, "step": 13025 }, { "epoch": 1.84419905146174, "grad_norm": 8.734478071283617, "learning_rate": 7.909042361678165e-08, "loss": 0.6421, "step": 13026 }, { "epoch": 1.8443406243363771, "grad_norm": 9.557802330461936, "learning_rate": 7.894744953936329e-08, "loss": 0.6252, "step": 13027 }, { "epoch": 1.8444821972110144, "grad_norm": 9.551521731264213, "learning_rate": 7.880460273487184e-08, "loss": 0.7089, "step": 13028 }, { "epoch": 1.8446237700856516, "grad_norm": 9.803317104180461, "learning_rate": 7.866188321081741e-08, "loss": 0.606, "step": 13029 }, { "epoch": 1.8447653429602888, "grad_norm": 9.452005115095991, "learning_rate": 7.851929097470234e-08, "loss": 0.6178, "step": 13030 }, { "epoch": 1.844906915834926, "grad_norm": 9.560733806586718, "learning_rate": 7.837682603402258e-08, "loss": 0.7163, "step": 13031 }, { "epoch": 1.8450484887095633, "grad_norm": 11.517550800427568, "learning_rate": 7.823448839626768e-08, "loss": 0.7955, "step": 13032 }, { "epoch": 1.8451900615842005, "grad_norm": 8.114290662261253, "learning_rate": 7.809227806891972e-08, "loss": 0.5917, "step": 13033 }, { "epoch": 1.8453316344588377, "grad_norm": 9.688614319732386, "learning_rate": 7.795019505945495e-08, "loss": 0.8014, "step": 13034 }, { "epoch": 1.845473207333475, "grad_norm": 9.066423921514446, "learning_rate": 7.78082393753421e-08, "loss": 0.6902, "step": 13035 }, { "epoch": 1.8456147802081122, "grad_norm": 9.209766213165885, "learning_rate": 7.766641102404438e-08, "loss": 0.6234, "step": 13036 }, { "epoch": 1.8457563530827494, "grad_norm": 9.90351940752748, "learning_rate": 7.75247100130172e-08, "loss": 0.6635, "step": 13037 }, { "epoch": 1.8458979259573867, "grad_norm": 8.452180425361913, "learning_rate": 7.738313634970962e-08, "loss": 0.6207, "step": 13038 }, { "epoch": 1.846039498832024, "grad_norm": 7.365022537393783, "learning_rate": 7.724169004156457e-08, "loss": 0.6761, "step": 13039 }, { "epoch": 1.8461810717066611, "grad_norm": 9.439745907354189, "learning_rate": 7.710037109601692e-08, "loss": 0.6971, "step": 13040 }, { "epoch": 1.8463226445812984, "grad_norm": 9.570862149394632, "learning_rate": 7.695917952049658e-08, "loss": 0.6911, "step": 13041 }, { "epoch": 1.8464642174559356, "grad_norm": 9.706462587090616, "learning_rate": 7.68181153224254e-08, "loss": 0.6086, "step": 13042 }, { "epoch": 1.8466057903305728, "grad_norm": 9.157863019183296, "learning_rate": 7.66771785092188e-08, "loss": 0.6046, "step": 13043 }, { "epoch": 1.84674736320521, "grad_norm": 9.937376393443138, "learning_rate": 7.653636908828644e-08, "loss": 0.6824, "step": 13044 }, { "epoch": 1.846888936079847, "grad_norm": 7.854065696276406, "learning_rate": 7.639568706702988e-08, "loss": 0.6608, "step": 13045 }, { "epoch": 1.8470305089544843, "grad_norm": 10.122580910376039, "learning_rate": 7.625513245284515e-08, "loss": 0.6641, "step": 13046 }, { "epoch": 1.8471720818291215, "grad_norm": 8.726660855113815, "learning_rate": 7.611470525312054e-08, "loss": 0.5836, "step": 13047 }, { "epoch": 1.8473136547037587, "grad_norm": 9.953652743686336, "learning_rate": 7.597440547523872e-08, "loss": 0.5856, "step": 13048 }, { "epoch": 1.847455227578396, "grad_norm": 10.781866702810232, "learning_rate": 7.58342331265749e-08, "loss": 0.7356, "step": 13049 }, { "epoch": 1.8475968004530332, "grad_norm": 9.515408117268727, "learning_rate": 7.56941882144982e-08, "loss": 0.6186, "step": 13050 }, { "epoch": 1.8477383733276704, "grad_norm": 10.538233583738297, "learning_rate": 7.555427074636995e-08, "loss": 0.5796, "step": 13051 }, { "epoch": 1.8478799462023077, "grad_norm": 8.33418829029005, "learning_rate": 7.541448072954622e-08, "loss": 0.7428, "step": 13052 }, { "epoch": 1.848021519076945, "grad_norm": 10.085497999477315, "learning_rate": 7.527481817137555e-08, "loss": 0.6078, "step": 13053 }, { "epoch": 1.8481630919515821, "grad_norm": 10.343038206013487, "learning_rate": 7.513528307919931e-08, "loss": 0.6817, "step": 13054 }, { "epoch": 1.8483046648262191, "grad_norm": 8.424561477251167, "learning_rate": 7.499587546035358e-08, "loss": 0.6459, "step": 13055 }, { "epoch": 1.8484462377008564, "grad_norm": 8.590243869290715, "learning_rate": 7.48565953221661e-08, "loss": 0.7753, "step": 13056 }, { "epoch": 1.8485878105754936, "grad_norm": 9.693479993393716, "learning_rate": 7.471744267195962e-08, "loss": 0.7097, "step": 13057 }, { "epoch": 1.8487293834501308, "grad_norm": 9.84118774006745, "learning_rate": 7.457841751704831e-08, "loss": 0.7846, "step": 13058 }, { "epoch": 1.848870956324768, "grad_norm": 9.126224383422864, "learning_rate": 7.44395198647413e-08, "loss": 0.6258, "step": 13059 }, { "epoch": 1.8490125291994053, "grad_norm": 10.06490055616877, "learning_rate": 7.430074972234053e-08, "loss": 0.6081, "step": 13060 }, { "epoch": 1.8491541020740425, "grad_norm": 10.132330230789051, "learning_rate": 7.416210709714016e-08, "loss": 0.6573, "step": 13061 }, { "epoch": 1.8492956749486797, "grad_norm": 8.914320117059633, "learning_rate": 7.402359199642967e-08, "loss": 0.5626, "step": 13062 }, { "epoch": 1.849437247823317, "grad_norm": 11.384783551248969, "learning_rate": 7.388520442748959e-08, "loss": 0.5729, "step": 13063 }, { "epoch": 1.8495788206979542, "grad_norm": 9.948800816948367, "learning_rate": 7.374694439759523e-08, "loss": 0.7021, "step": 13064 }, { "epoch": 1.8497203935725914, "grad_norm": 8.92914922098107, "learning_rate": 7.36088119140152e-08, "loss": 0.6816, "step": 13065 }, { "epoch": 1.8498619664472287, "grad_norm": 8.654837482762305, "learning_rate": 7.347080698401038e-08, "loss": 0.6609, "step": 13066 }, { "epoch": 1.850003539321866, "grad_norm": 8.440968796508658, "learning_rate": 7.333292961483634e-08, "loss": 0.7152, "step": 13067 }, { "epoch": 1.8501451121965031, "grad_norm": 8.869805333561354, "learning_rate": 7.319517981374036e-08, "loss": 0.6811, "step": 13068 }, { "epoch": 1.8502866850711404, "grad_norm": 9.246284769757906, "learning_rate": 7.305755758796468e-08, "loss": 0.6209, "step": 13069 }, { "epoch": 1.8504282579457776, "grad_norm": 9.284921247551978, "learning_rate": 7.292006294474325e-08, "loss": 0.6903, "step": 13070 }, { "epoch": 1.8505698308204148, "grad_norm": 10.46192750811525, "learning_rate": 7.278269589130472e-08, "loss": 0.6301, "step": 13071 }, { "epoch": 1.850711403695052, "grad_norm": 9.267615376308168, "learning_rate": 7.264545643486997e-08, "loss": 0.7503, "step": 13072 }, { "epoch": 1.8508529765696893, "grad_norm": 9.870361397570624, "learning_rate": 7.250834458265355e-08, "loss": 0.6648, "step": 13073 }, { "epoch": 1.8509945494443265, "grad_norm": 8.833305310858657, "learning_rate": 7.237136034186382e-08, "loss": 0.6527, "step": 13074 }, { "epoch": 1.8511361223189637, "grad_norm": 9.12463294558206, "learning_rate": 7.223450371970114e-08, "loss": 0.6803, "step": 13075 }, { "epoch": 1.851277695193601, "grad_norm": 10.967764350969546, "learning_rate": 7.209777472336061e-08, "loss": 0.6272, "step": 13076 }, { "epoch": 1.8514192680682382, "grad_norm": 9.437547292022444, "learning_rate": 7.19611733600295e-08, "loss": 0.7625, "step": 13077 }, { "epoch": 1.8515608409428754, "grad_norm": 9.39956254955766, "learning_rate": 7.182469963688932e-08, "loss": 0.6471, "step": 13078 }, { "epoch": 1.8517024138175127, "grad_norm": 8.822405935830771, "learning_rate": 7.168835356111376e-08, "loss": 0.6514, "step": 13079 }, { "epoch": 1.85184398669215, "grad_norm": 9.727364111044066, "learning_rate": 7.155213513987124e-08, "loss": 0.7398, "step": 13080 }, { "epoch": 1.8519855595667871, "grad_norm": 8.741556071505206, "learning_rate": 7.141604438032218e-08, "loss": 0.6416, "step": 13081 }, { "epoch": 1.8521271324414244, "grad_norm": 10.414311582730157, "learning_rate": 7.128008128962055e-08, "loss": 0.727, "step": 13082 }, { "epoch": 1.8522687053160616, "grad_norm": 11.614587858509955, "learning_rate": 7.11442458749148e-08, "loss": 0.6868, "step": 13083 }, { "epoch": 1.8524102781906988, "grad_norm": 9.791526587816726, "learning_rate": 7.100853814334451e-08, "loss": 0.6547, "step": 13084 }, { "epoch": 1.852551851065336, "grad_norm": 8.98407820501623, "learning_rate": 7.087295810204425e-08, "loss": 0.6829, "step": 13085 }, { "epoch": 1.852693423939973, "grad_norm": 9.0694860431781, "learning_rate": 7.073750575814136e-08, "loss": 0.7021, "step": 13086 }, { "epoch": 1.8528349968146103, "grad_norm": 10.121663238392482, "learning_rate": 7.060218111875628e-08, "loss": 0.6742, "step": 13087 }, { "epoch": 1.8529765696892475, "grad_norm": 8.625667380344993, "learning_rate": 7.046698419100356e-08, "loss": 0.5514, "step": 13088 }, { "epoch": 1.8531181425638847, "grad_norm": 8.477965123728698, "learning_rate": 7.033191498198949e-08, "loss": 0.6415, "step": 13089 }, { "epoch": 1.853259715438522, "grad_norm": 10.37936750656926, "learning_rate": 7.019697349881532e-08, "loss": 0.6515, "step": 13090 }, { "epoch": 1.8534012883131592, "grad_norm": 8.865996185804459, "learning_rate": 7.006215974857428e-08, "loss": 0.7342, "step": 13091 }, { "epoch": 1.8535428611877964, "grad_norm": 9.506026514800896, "learning_rate": 6.992747373835401e-08, "loss": 0.7017, "step": 13092 }, { "epoch": 1.8536844340624337, "grad_norm": 10.12022720684974, "learning_rate": 6.979291547523415e-08, "loss": 0.725, "step": 13093 }, { "epoch": 1.853826006937071, "grad_norm": 9.425733089521596, "learning_rate": 6.965848496628902e-08, "loss": 0.682, "step": 13094 }, { "epoch": 1.8539675798117081, "grad_norm": 8.440765879082791, "learning_rate": 6.952418221858492e-08, "loss": 0.6144, "step": 13095 }, { "epoch": 1.8541091526863451, "grad_norm": 9.751182680047993, "learning_rate": 6.939000723918232e-08, "loss": 0.6, "step": 13096 }, { "epoch": 1.8542507255609824, "grad_norm": 8.963218977219833, "learning_rate": 6.925596003513501e-08, "loss": 0.5751, "step": 13097 }, { "epoch": 1.8543922984356196, "grad_norm": 8.512067419143651, "learning_rate": 6.912204061348904e-08, "loss": 0.6247, "step": 13098 }, { "epoch": 1.8545338713102568, "grad_norm": 10.18538751523226, "learning_rate": 6.898824898128515e-08, "loss": 0.6935, "step": 13099 }, { "epoch": 1.854675444184894, "grad_norm": 11.776086297921468, "learning_rate": 6.885458514555632e-08, "loss": 0.6937, "step": 13100 }, { "epoch": 1.8548170170595313, "grad_norm": 9.847097645055115, "learning_rate": 6.872104911332916e-08, "loss": 0.6895, "step": 13101 }, { "epoch": 1.8549585899341685, "grad_norm": 9.520570268620787, "learning_rate": 6.858764089162334e-08, "loss": 0.7186, "step": 13102 }, { "epoch": 1.8551001628088057, "grad_norm": 9.498752712887411, "learning_rate": 6.845436048745241e-08, "loss": 0.6932, "step": 13103 }, { "epoch": 1.855241735683443, "grad_norm": 11.024221974988013, "learning_rate": 6.832120790782326e-08, "loss": 0.6532, "step": 13104 }, { "epoch": 1.8553833085580802, "grad_norm": 7.8937360918975115, "learning_rate": 6.818818315973475e-08, "loss": 0.6827, "step": 13105 }, { "epoch": 1.8555248814327174, "grad_norm": 10.20441441472088, "learning_rate": 6.805528625018016e-08, "loss": 0.6317, "step": 13106 }, { "epoch": 1.8556664543073547, "grad_norm": 9.221764436974883, "learning_rate": 6.792251718614584e-08, "loss": 0.6293, "step": 13107 }, { "epoch": 1.855808027181992, "grad_norm": 9.370079275017751, "learning_rate": 6.778987597461123e-08, "loss": 0.7295, "step": 13108 }, { "epoch": 1.8559496000566291, "grad_norm": 9.524500123371181, "learning_rate": 6.765736262254935e-08, "loss": 0.7292, "step": 13109 }, { "epoch": 1.8560911729312664, "grad_norm": 10.961121256478224, "learning_rate": 6.752497713692629e-08, "loss": 0.6066, "step": 13110 }, { "epoch": 1.8562327458059036, "grad_norm": 12.479400140088678, "learning_rate": 6.73927195247015e-08, "loss": 0.7195, "step": 13111 }, { "epoch": 1.8563743186805408, "grad_norm": 9.032174139028452, "learning_rate": 6.726058979282774e-08, "loss": 0.7417, "step": 13112 }, { "epoch": 1.856515891555178, "grad_norm": 10.98173532471929, "learning_rate": 6.712858794825083e-08, "loss": 0.6611, "step": 13113 }, { "epoch": 1.8566574644298153, "grad_norm": 8.137119116710242, "learning_rate": 6.699671399790969e-08, "loss": 0.6782, "step": 13114 }, { "epoch": 1.8567990373044525, "grad_norm": 9.59473366928808, "learning_rate": 6.686496794873792e-08, "loss": 0.6397, "step": 13115 }, { "epoch": 1.8569406101790897, "grad_norm": 8.172507967240495, "learning_rate": 6.673334980765972e-08, "loss": 0.6704, "step": 13116 }, { "epoch": 1.857082183053727, "grad_norm": 8.322345152239711, "learning_rate": 6.660185958159537e-08, "loss": 0.6968, "step": 13117 }, { "epoch": 1.8572237559283642, "grad_norm": 9.605627603912149, "learning_rate": 6.647049727745685e-08, "loss": 0.5798, "step": 13118 }, { "epoch": 1.8573653288030014, "grad_norm": 8.926095368515398, "learning_rate": 6.633926290214976e-08, "loss": 0.6285, "step": 13119 }, { "epoch": 1.8575069016776387, "grad_norm": 8.660638571227054, "learning_rate": 6.620815646257301e-08, "loss": 0.6663, "step": 13120 }, { "epoch": 1.8576484745522759, "grad_norm": 9.357007881613761, "learning_rate": 6.607717796561858e-08, "loss": 0.7146, "step": 13121 }, { "epoch": 1.8577900474269131, "grad_norm": 9.176256423280726, "learning_rate": 6.594632741817237e-08, "loss": 0.7091, "step": 13122 }, { "epoch": 1.8579316203015503, "grad_norm": 9.186126813967089, "learning_rate": 6.581560482711247e-08, "loss": 0.6407, "step": 13123 }, { "epoch": 1.8580731931761876, "grad_norm": 9.197960951033565, "learning_rate": 6.568501019931173e-08, "loss": 0.6881, "step": 13124 }, { "epoch": 1.8582147660508248, "grad_norm": 9.110424786074184, "learning_rate": 6.555454354163437e-08, "loss": 0.678, "step": 13125 }, { "epoch": 1.858356338925462, "grad_norm": 9.344113346039464, "learning_rate": 6.542420486093992e-08, "loss": 0.6509, "step": 13126 }, { "epoch": 1.8584979118000993, "grad_norm": 10.582411558203878, "learning_rate": 6.529399416407955e-08, "loss": 0.6817, "step": 13127 }, { "epoch": 1.8586394846747363, "grad_norm": 8.870421182122199, "learning_rate": 6.516391145789836e-08, "loss": 0.599, "step": 13128 }, { "epoch": 1.8587810575493735, "grad_norm": 9.816560136079922, "learning_rate": 6.503395674923529e-08, "loss": 0.6624, "step": 13129 }, { "epoch": 1.8589226304240107, "grad_norm": 10.256439419240328, "learning_rate": 6.490413004492102e-08, "loss": 0.6111, "step": 13130 }, { "epoch": 1.859064203298648, "grad_norm": 8.747776293886957, "learning_rate": 6.477443135178118e-08, "loss": 0.779, "step": 13131 }, { "epoch": 1.8592057761732852, "grad_norm": 9.282790752925418, "learning_rate": 6.464486067663366e-08, "loss": 0.7222, "step": 13132 }, { "epoch": 1.8593473490479224, "grad_norm": 9.809800086361342, "learning_rate": 6.451541802628969e-08, "loss": 0.6303, "step": 13133 }, { "epoch": 1.8594889219225597, "grad_norm": 8.984446331238573, "learning_rate": 6.438610340755464e-08, "loss": 0.5765, "step": 13134 }, { "epoch": 1.8596304947971969, "grad_norm": 10.858039697017677, "learning_rate": 6.425691682722584e-08, "loss": 0.607, "step": 13135 }, { "epoch": 1.8597720676718341, "grad_norm": 8.765026595689514, "learning_rate": 6.412785829209511e-08, "loss": 0.7024, "step": 13136 }, { "epoch": 1.8599136405464713, "grad_norm": 7.985283667411161, "learning_rate": 6.399892780894618e-08, "loss": 0.6143, "step": 13137 }, { "epoch": 1.8600552134211084, "grad_norm": 8.839023364909165, "learning_rate": 6.387012538455723e-08, "loss": 0.6066, "step": 13138 }, { "epoch": 1.8601967862957456, "grad_norm": 8.899807068523092, "learning_rate": 6.37414510256995e-08, "loss": 0.704, "step": 13139 }, { "epoch": 1.8603383591703828, "grad_norm": 8.895489319009346, "learning_rate": 6.361290473913705e-08, "loss": 0.6414, "step": 13140 }, { "epoch": 1.86047993204502, "grad_norm": 9.751462777765823, "learning_rate": 6.34844865316278e-08, "loss": 0.713, "step": 13141 }, { "epoch": 1.8606215049196573, "grad_norm": 10.06803902709332, "learning_rate": 6.335619640992191e-08, "loss": 0.6748, "step": 13142 }, { "epoch": 1.8607630777942945, "grad_norm": 10.393213669547036, "learning_rate": 6.322803438076453e-08, "loss": 0.6427, "step": 13143 }, { "epoch": 1.8609046506689317, "grad_norm": 8.70685863024267, "learning_rate": 6.310000045089193e-08, "loss": 0.681, "step": 13144 }, { "epoch": 1.861046223543569, "grad_norm": 8.630491415908953, "learning_rate": 6.297209462703569e-08, "loss": 0.6848, "step": 13145 }, { "epoch": 1.8611877964182062, "grad_norm": 8.57405013003331, "learning_rate": 6.284431691591875e-08, "loss": 0.7316, "step": 13146 }, { "epoch": 1.8613293692928434, "grad_norm": 9.988390095833411, "learning_rate": 6.271666732425935e-08, "loss": 0.632, "step": 13147 }, { "epoch": 1.8614709421674807, "grad_norm": 9.59772103597894, "learning_rate": 6.258914585876741e-08, "loss": 0.581, "step": 13148 }, { "epoch": 1.8616125150421179, "grad_norm": 8.912214885072894, "learning_rate": 6.246175252614645e-08, "loss": 0.7001, "step": 13149 }, { "epoch": 1.8617540879167551, "grad_norm": 9.382129043982992, "learning_rate": 6.233448733309388e-08, "loss": 0.6199, "step": 13150 }, { "epoch": 1.8618956607913923, "grad_norm": 8.64320552983988, "learning_rate": 6.220735028629937e-08, "loss": 0.6989, "step": 13151 }, { "epoch": 1.8620372336660296, "grad_norm": 9.305912427242996, "learning_rate": 6.2080341392447e-08, "loss": 0.731, "step": 13152 }, { "epoch": 1.8621788065406668, "grad_norm": 9.197989982292624, "learning_rate": 6.195346065821312e-08, "loss": 0.7389, "step": 13153 }, { "epoch": 1.862320379415304, "grad_norm": 7.186534783610591, "learning_rate": 6.18267080902682e-08, "loss": 0.5672, "step": 13154 }, { "epoch": 1.8624619522899413, "grad_norm": 8.898644984960315, "learning_rate": 6.170008369527496e-08, "loss": 0.6305, "step": 13155 }, { "epoch": 1.8626035251645785, "grad_norm": 8.170045374320575, "learning_rate": 6.157358747989034e-08, "loss": 0.6944, "step": 13156 }, { "epoch": 1.8627450980392157, "grad_norm": 10.251059128540618, "learning_rate": 6.144721945076426e-08, "loss": 0.6851, "step": 13157 }, { "epoch": 1.862886670913853, "grad_norm": 9.086664224886922, "learning_rate": 6.132097961453948e-08, "loss": 0.656, "step": 13158 }, { "epoch": 1.8630282437884902, "grad_norm": 9.300310388123059, "learning_rate": 6.119486797785263e-08, "loss": 0.6449, "step": 13159 }, { "epoch": 1.8631698166631274, "grad_norm": 9.364523361630711, "learning_rate": 6.106888454733284e-08, "loss": 0.6211, "step": 13160 }, { "epoch": 1.8633113895377647, "grad_norm": 10.3135379153351, "learning_rate": 6.094302932960317e-08, "loss": 0.6962, "step": 13161 }, { "epoch": 1.8634529624124019, "grad_norm": 8.671146066633103, "learning_rate": 6.081730233127996e-08, "loss": 0.6626, "step": 13162 }, { "epoch": 1.8635945352870391, "grad_norm": 10.926297625333262, "learning_rate": 6.069170355897241e-08, "loss": 0.6299, "step": 13163 }, { "epoch": 1.8637361081616763, "grad_norm": 9.756532485850746, "learning_rate": 6.056623301928327e-08, "loss": 0.6951, "step": 13164 }, { "epoch": 1.8638776810363136, "grad_norm": 9.73187566667902, "learning_rate": 6.04408907188081e-08, "loss": 0.7238, "step": 13165 }, { "epoch": 1.8640192539109508, "grad_norm": 9.425733898943434, "learning_rate": 6.031567666413663e-08, "loss": 0.5438, "step": 13166 }, { "epoch": 1.864160826785588, "grad_norm": 9.529479215628404, "learning_rate": 6.019059086185053e-08, "loss": 0.675, "step": 13167 }, { "epoch": 1.8643023996602253, "grad_norm": 9.608610789607015, "learning_rate": 6.006563331852622e-08, "loss": 0.5173, "step": 13168 }, { "epoch": 1.8644439725348623, "grad_norm": 9.021225801054603, "learning_rate": 5.994080404073233e-08, "loss": 0.638, "step": 13169 }, { "epoch": 1.8645855454094995, "grad_norm": 8.687847322615323, "learning_rate": 5.981610303503054e-08, "loss": 0.6519, "step": 13170 }, { "epoch": 1.8647271182841367, "grad_norm": 9.855423463406654, "learning_rate": 5.969153030797731e-08, "loss": 0.7267, "step": 13171 }, { "epoch": 1.864868691158774, "grad_norm": 9.666766812912908, "learning_rate": 5.9567085866120144e-08, "loss": 0.6029, "step": 13172 }, { "epoch": 1.8650102640334112, "grad_norm": 8.25983282257189, "learning_rate": 5.944276971600216e-08, "loss": 0.588, "step": 13173 }, { "epoch": 1.8651518369080484, "grad_norm": 10.069613578223649, "learning_rate": 5.9318581864157563e-08, "loss": 0.5961, "step": 13174 }, { "epoch": 1.8652934097826857, "grad_norm": 8.726748718078031, "learning_rate": 5.919452231711559e-08, "loss": 0.6132, "step": 13175 }, { "epoch": 1.8654349826573229, "grad_norm": 8.441347502398536, "learning_rate": 5.9070591081397397e-08, "loss": 0.7404, "step": 13176 }, { "epoch": 1.8655765555319601, "grad_norm": 9.937830889489211, "learning_rate": 5.894678816351862e-08, "loss": 0.6169, "step": 13177 }, { "epoch": 1.8657181284065973, "grad_norm": 9.861100857390564, "learning_rate": 5.8823113569986545e-08, "loss": 0.6451, "step": 13178 }, { "epoch": 1.8658597012812344, "grad_norm": 8.981732372903183, "learning_rate": 5.8699567307303474e-08, "loss": 0.5934, "step": 13179 }, { "epoch": 1.8660012741558716, "grad_norm": 9.042478979042516, "learning_rate": 5.8576149381963935e-08, "loss": 0.6095, "step": 13180 }, { "epoch": 1.8661428470305088, "grad_norm": 9.079160332528003, "learning_rate": 5.845285980045551e-08, "loss": 0.6081, "step": 13181 }, { "epoch": 1.866284419905146, "grad_norm": 9.283530420737248, "learning_rate": 5.8329698569259963e-08, "loss": 0.7029, "step": 13182 }, { "epoch": 1.8664259927797833, "grad_norm": 9.176460536531263, "learning_rate": 5.820666569485156e-08, "loss": 0.6575, "step": 13183 }, { "epoch": 1.8665675656544205, "grad_norm": 9.212090831764295, "learning_rate": 5.808376118369791e-08, "loss": 0.6122, "step": 13184 }, { "epoch": 1.8667091385290577, "grad_norm": 10.920514852941702, "learning_rate": 5.796098504226022e-08, "loss": 0.7005, "step": 13185 }, { "epoch": 1.866850711403695, "grad_norm": 9.411192751783934, "learning_rate": 5.7838337276992787e-08, "loss": 0.5924, "step": 13186 }, { "epoch": 1.8669922842783322, "grad_norm": 10.255807115475786, "learning_rate": 5.7715817894342944e-08, "loss": 0.7009, "step": 13187 }, { "epoch": 1.8671338571529694, "grad_norm": 9.321335665235127, "learning_rate": 5.759342690075137e-08, "loss": 0.6524, "step": 13188 }, { "epoch": 1.8672754300276067, "grad_norm": 10.744540225310729, "learning_rate": 5.7471164302652646e-08, "loss": 0.6303, "step": 13189 }, { "epoch": 1.8674170029022439, "grad_norm": 8.623487256945598, "learning_rate": 5.73490301064733e-08, "loss": 0.6478, "step": 13190 }, { "epoch": 1.8675585757768811, "grad_norm": 9.482202824774223, "learning_rate": 5.722702431863403e-08, "loss": 0.6485, "step": 13191 }, { "epoch": 1.8677001486515183, "grad_norm": 10.603679493403604, "learning_rate": 5.710514694554886e-08, "loss": 0.7113, "step": 13192 }, { "epoch": 1.8678417215261556, "grad_norm": 8.802187682366553, "learning_rate": 5.6983397993624346e-08, "loss": 0.6219, "step": 13193 }, { "epoch": 1.8679832944007928, "grad_norm": 8.731103601777983, "learning_rate": 5.686177746926147e-08, "loss": 0.7291, "step": 13194 }, { "epoch": 1.86812486727543, "grad_norm": 8.994818573458478, "learning_rate": 5.67402853788529e-08, "loss": 0.7073, "step": 13195 }, { "epoch": 1.8682664401500673, "grad_norm": 9.917082150087396, "learning_rate": 5.6618921728786026e-08, "loss": 0.5878, "step": 13196 }, { "epoch": 1.8684080130247045, "grad_norm": 8.563931756151694, "learning_rate": 5.6497686525440186e-08, "loss": 0.6481, "step": 13197 }, { "epoch": 1.8685495858993417, "grad_norm": 9.895098357094891, "learning_rate": 5.6376579775189456e-08, "loss": 0.7286, "step": 13198 }, { "epoch": 1.868691158773979, "grad_norm": 9.122181078123274, "learning_rate": 5.625560148439929e-08, "loss": 0.7231, "step": 13199 }, { "epoch": 1.8688327316486162, "grad_norm": 9.42114781345919, "learning_rate": 5.6134751659430716e-08, "loss": 0.6307, "step": 13200 }, { "epoch": 1.8689743045232534, "grad_norm": 9.468731870728313, "learning_rate": 5.6014030306635606e-08, "loss": 0.5905, "step": 13201 }, { "epoch": 1.8691158773978906, "grad_norm": 8.4682599027929, "learning_rate": 5.589343743236081e-08, "loss": 0.7506, "step": 13202 }, { "epoch": 1.8692574502725279, "grad_norm": 8.156913642631007, "learning_rate": 5.577297304294543e-08, "loss": 0.69, "step": 13203 }, { "epoch": 1.869399023147165, "grad_norm": 9.581328561041465, "learning_rate": 5.5652637144722463e-08, "loss": 0.7725, "step": 13204 }, { "epoch": 1.8695405960218023, "grad_norm": 9.474025352091386, "learning_rate": 5.5532429744017957e-08, "loss": 0.7539, "step": 13205 }, { "epoch": 1.8696821688964396, "grad_norm": 7.776987647234266, "learning_rate": 5.5412350847150466e-08, "loss": 0.6896, "step": 13206 }, { "epoch": 1.8698237417710768, "grad_norm": 10.001459015267823, "learning_rate": 5.5292400460432993e-08, "loss": 0.7301, "step": 13207 }, { "epoch": 1.869965314645714, "grad_norm": 9.799165448835545, "learning_rate": 5.517257859017161e-08, "loss": 0.5891, "step": 13208 }, { "epoch": 1.8701068875203513, "grad_norm": 11.534861921617495, "learning_rate": 5.505288524266461e-08, "loss": 0.6466, "step": 13209 }, { "epoch": 1.8702484603949883, "grad_norm": 11.20720246198514, "learning_rate": 5.493332042420446e-08, "loss": 0.5999, "step": 13210 }, { "epoch": 1.8703900332696255, "grad_norm": 7.936828825200724, "learning_rate": 5.481388414107669e-08, "loss": 0.6531, "step": 13211 }, { "epoch": 1.8705316061442627, "grad_norm": 9.34408068629923, "learning_rate": 5.469457639955961e-08, "loss": 0.6274, "step": 13212 }, { "epoch": 1.8706731790189, "grad_norm": 8.307461925583501, "learning_rate": 5.457539720592514e-08, "loss": 0.7344, "step": 13213 }, { "epoch": 1.8708147518935372, "grad_norm": 9.12892756135189, "learning_rate": 5.445634656643884e-08, "loss": 0.7398, "step": 13214 }, { "epoch": 1.8709563247681744, "grad_norm": 7.936818491596912, "learning_rate": 5.4337424487359016e-08, "loss": 0.6654, "step": 13215 }, { "epoch": 1.8710978976428116, "grad_norm": 8.905539393937499, "learning_rate": 5.421863097493707e-08, "loss": 0.6432, "step": 13216 }, { "epoch": 1.8712394705174489, "grad_norm": 9.103510806089156, "learning_rate": 5.409996603541828e-08, "loss": 0.7073, "step": 13217 }, { "epoch": 1.871381043392086, "grad_norm": 9.150084831803431, "learning_rate": 5.398142967504017e-08, "loss": 0.72, "step": 13218 }, { "epoch": 1.8715226162667233, "grad_norm": 8.868317143077107, "learning_rate": 5.386302190003495e-08, "loss": 0.6141, "step": 13219 }, { "epoch": 1.8716641891413606, "grad_norm": 8.588075186000099, "learning_rate": 5.3744742716626276e-08, "loss": 0.6343, "step": 13220 }, { "epoch": 1.8718057620159976, "grad_norm": 10.070096578125428, "learning_rate": 5.362659213103277e-08, "loss": 0.6615, "step": 13221 }, { "epoch": 1.8719473348906348, "grad_norm": 8.681045439735804, "learning_rate": 5.350857014946531e-08, "loss": 0.7126, "step": 13222 }, { "epoch": 1.872088907765272, "grad_norm": 9.153821603634839, "learning_rate": 5.339067677812782e-08, "loss": 0.6908, "step": 13223 }, { "epoch": 1.8722304806399093, "grad_norm": 9.256629836114847, "learning_rate": 5.327291202321866e-08, "loss": 0.6904, "step": 13224 }, { "epoch": 1.8723720535145465, "grad_norm": 9.362305044021193, "learning_rate": 5.315527589092762e-08, "loss": 0.6712, "step": 13225 }, { "epoch": 1.8725136263891837, "grad_norm": 8.993157646814005, "learning_rate": 5.303776838743946e-08, "loss": 0.618, "step": 13226 }, { "epoch": 1.872655199263821, "grad_norm": 9.355996364297605, "learning_rate": 5.292038951893119e-08, "loss": 0.6402, "step": 13227 }, { "epoch": 1.8727967721384582, "grad_norm": 11.003153262309272, "learning_rate": 5.2803139291573716e-08, "loss": 0.6437, "step": 13228 }, { "epoch": 1.8729383450130954, "grad_norm": 8.409811974055357, "learning_rate": 5.268601771153042e-08, "loss": 0.7851, "step": 13229 }, { "epoch": 1.8730799178877326, "grad_norm": 8.592708566158269, "learning_rate": 5.2569024784958065e-08, "loss": 0.673, "step": 13230 }, { "epoch": 1.8732214907623699, "grad_norm": 10.777703008578094, "learning_rate": 5.2452160518007555e-08, "loss": 0.6829, "step": 13231 }, { "epoch": 1.873363063637007, "grad_norm": 9.722283116710027, "learning_rate": 5.233542491682203e-08, "loss": 0.5796, "step": 13232 }, { "epoch": 1.8735046365116443, "grad_norm": 9.658953735683095, "learning_rate": 5.2218817987537976e-08, "loss": 0.6671, "step": 13233 }, { "epoch": 1.8736462093862816, "grad_norm": 9.105183024528186, "learning_rate": 5.210233973628548e-08, "loss": 0.7177, "step": 13234 }, { "epoch": 1.8737877822609188, "grad_norm": 10.469096528196653, "learning_rate": 5.198599016918771e-08, "loss": 0.6818, "step": 13235 }, { "epoch": 1.873929355135556, "grad_norm": 8.897408577475312, "learning_rate": 5.1869769292361425e-08, "loss": 0.6875, "step": 13236 }, { "epoch": 1.8740709280101933, "grad_norm": 9.461351179281705, "learning_rate": 5.1753677111915645e-08, "loss": 0.5993, "step": 13237 }, { "epoch": 1.8742125008848305, "grad_norm": 8.881864659880028, "learning_rate": 5.163771363395381e-08, "loss": 0.7479, "step": 13238 }, { "epoch": 1.8743540737594677, "grad_norm": 11.427272872720646, "learning_rate": 5.152187886457161e-08, "loss": 0.7047, "step": 13239 }, { "epoch": 1.874495646634105, "grad_norm": 9.070373482380054, "learning_rate": 5.14061728098586e-08, "loss": 0.7148, "step": 13240 }, { "epoch": 1.8746372195087422, "grad_norm": 9.569978071325165, "learning_rate": 5.1290595475897434e-08, "loss": 0.7253, "step": 13241 }, { "epoch": 1.8747787923833794, "grad_norm": 10.030843471002132, "learning_rate": 5.117514686876379e-08, "loss": 0.6524, "step": 13242 }, { "epoch": 1.8749203652580166, "grad_norm": 8.809346385529086, "learning_rate": 5.105982699452699e-08, "loss": 0.6844, "step": 13243 }, { "epoch": 1.8750619381326539, "grad_norm": 11.361095607739736, "learning_rate": 5.094463585924858e-08, "loss": 0.7472, "step": 13244 }, { "epoch": 1.875203511007291, "grad_norm": 9.515625400887718, "learning_rate": 5.082957346898482e-08, "loss": 0.6619, "step": 13245 }, { "epoch": 1.8753450838819283, "grad_norm": 8.398577851298183, "learning_rate": 5.0714639829784195e-08, "loss": 0.5487, "step": 13246 }, { "epoch": 1.8754866567565656, "grad_norm": 10.783071347512585, "learning_rate": 5.0599834947688834e-08, "loss": 0.636, "step": 13247 }, { "epoch": 1.8756282296312028, "grad_norm": 10.119864963037658, "learning_rate": 5.048515882873362e-08, "loss": 0.7037, "step": 13248 }, { "epoch": 1.87576980250584, "grad_norm": 9.11636822181202, "learning_rate": 5.037061147894734e-08, "loss": 0.5303, "step": 13249 }, { "epoch": 1.8759113753804773, "grad_norm": 9.207691245038836, "learning_rate": 5.0256192904351295e-08, "loss": 0.6017, "step": 13250 }, { "epoch": 1.8760529482551145, "grad_norm": 10.027560592464495, "learning_rate": 5.014190311096068e-08, "loss": 0.689, "step": 13251 }, { "epoch": 1.8761945211297515, "grad_norm": 7.844841261862991, "learning_rate": 5.002774210478345e-08, "loss": 0.6596, "step": 13252 }, { "epoch": 1.8763360940043887, "grad_norm": 8.53632759768394, "learning_rate": 4.9913709891821207e-08, "loss": 0.6115, "step": 13253 }, { "epoch": 1.876477666879026, "grad_norm": 8.826064039712461, "learning_rate": 4.9799806478068314e-08, "loss": 0.6369, "step": 13254 }, { "epoch": 1.8766192397536632, "grad_norm": 9.364047557424879, "learning_rate": 4.9686031869512486e-08, "loss": 0.6595, "step": 13255 }, { "epoch": 1.8767608126283004, "grad_norm": 8.63591219803603, "learning_rate": 4.9572386072135046e-08, "loss": 0.6414, "step": 13256 }, { "epoch": 1.8769023855029376, "grad_norm": 10.014987014708764, "learning_rate": 4.945886909191011e-08, "loss": 0.7215, "step": 13257 }, { "epoch": 1.8770439583775749, "grad_norm": 8.745084853461302, "learning_rate": 4.9345480934805125e-08, "loss": 0.6227, "step": 13258 }, { "epoch": 1.877185531252212, "grad_norm": 9.124087771450514, "learning_rate": 4.923222160678115e-08, "loss": 0.6765, "step": 13259 }, { "epoch": 1.8773271041268493, "grad_norm": 9.428192197100605, "learning_rate": 4.911909111379176e-08, "loss": 0.6553, "step": 13260 }, { "epoch": 1.8774686770014866, "grad_norm": 16.73330904488998, "learning_rate": 4.9006089461784424e-08, "loss": 0.5275, "step": 13261 }, { "epoch": 1.8776102498761236, "grad_norm": 7.7937477582914525, "learning_rate": 4.8893216656699386e-08, "loss": 0.7773, "step": 13262 }, { "epoch": 1.8777518227507608, "grad_norm": 9.452597560828064, "learning_rate": 4.878047270447051e-08, "loss": 0.613, "step": 13263 }, { "epoch": 1.877893395625398, "grad_norm": 8.437358487673244, "learning_rate": 4.8667857611024164e-08, "loss": 0.6235, "step": 13264 }, { "epoch": 1.8780349685000353, "grad_norm": 9.33192905579247, "learning_rate": 4.8555371382280894e-08, "loss": 0.6607, "step": 13265 }, { "epoch": 1.8781765413746725, "grad_norm": 9.825806360909512, "learning_rate": 4.844301402415402e-08, "loss": 0.7079, "step": 13266 }, { "epoch": 1.8783181142493097, "grad_norm": 8.616494227835373, "learning_rate": 4.833078554254966e-08, "loss": 0.7101, "step": 13267 }, { "epoch": 1.878459687123947, "grad_norm": 7.814152901318578, "learning_rate": 4.8218685943368094e-08, "loss": 0.6998, "step": 13268 }, { "epoch": 1.8786012599985842, "grad_norm": 8.996241314464944, "learning_rate": 4.810671523250182e-08, "loss": 0.7464, "step": 13269 }, { "epoch": 1.8787428328732214, "grad_norm": 10.340845720247495, "learning_rate": 4.799487341583753e-08, "loss": 0.6295, "step": 13270 }, { "epoch": 1.8788844057478586, "grad_norm": 8.8609959971233, "learning_rate": 4.788316049925412e-08, "loss": 0.6451, "step": 13271 }, { "epoch": 1.8790259786224959, "grad_norm": 9.049785666272365, "learning_rate": 4.777157648862496e-08, "loss": 0.6237, "step": 13272 }, { "epoch": 1.879167551497133, "grad_norm": 10.883066627618863, "learning_rate": 4.766012138981535e-08, "loss": 0.6325, "step": 13273 }, { "epoch": 1.8793091243717703, "grad_norm": 8.913966640588871, "learning_rate": 4.754879520868477e-08, "loss": 0.7995, "step": 13274 }, { "epoch": 1.8794506972464076, "grad_norm": 10.286944542834075, "learning_rate": 4.743759795108549e-08, "loss": 0.638, "step": 13275 }, { "epoch": 1.8795922701210448, "grad_norm": 10.385771525654965, "learning_rate": 4.732652962286283e-08, "loss": 0.7525, "step": 13276 }, { "epoch": 1.879733842995682, "grad_norm": 9.164035442601786, "learning_rate": 4.7215590229855723e-08, "loss": 0.6298, "step": 13277 }, { "epoch": 1.8798754158703193, "grad_norm": 9.724669196379068, "learning_rate": 4.710477977789618e-08, "loss": 0.5697, "step": 13278 }, { "epoch": 1.8800169887449565, "grad_norm": 8.464348919835404, "learning_rate": 4.699409827280954e-08, "loss": 0.6739, "step": 13279 }, { "epoch": 1.8801585616195937, "grad_norm": 9.527662458793916, "learning_rate": 4.6883545720413925e-08, "loss": 0.7242, "step": 13280 }, { "epoch": 1.880300134494231, "grad_norm": 9.177346360373553, "learning_rate": 4.677312212652108e-08, "loss": 0.728, "step": 13281 }, { "epoch": 1.8804417073688682, "grad_norm": 9.574590481997088, "learning_rate": 4.666282749693607e-08, "loss": 0.657, "step": 13282 }, { "epoch": 1.8805832802435054, "grad_norm": 8.80888043463633, "learning_rate": 4.655266183745705e-08, "loss": 0.6489, "step": 13283 }, { "epoch": 1.8807248531181426, "grad_norm": 10.642382237197515, "learning_rate": 4.644262515387521e-08, "loss": 0.6923, "step": 13284 }, { "epoch": 1.8808664259927799, "grad_norm": 9.448690689048123, "learning_rate": 4.633271745197537e-08, "loss": 0.7172, "step": 13285 }, { "epoch": 1.881007998867417, "grad_norm": 9.800929932361647, "learning_rate": 4.6222938737534864e-08, "loss": 0.6601, "step": 13286 }, { "epoch": 1.8811495717420543, "grad_norm": 8.078609488916646, "learning_rate": 4.6113289016324615e-08, "loss": 0.6147, "step": 13287 }, { "epoch": 1.8812911446166916, "grad_norm": 8.795842437040701, "learning_rate": 4.600376829410919e-08, "loss": 0.5904, "step": 13288 }, { "epoch": 1.8814327174913288, "grad_norm": 10.161968041792981, "learning_rate": 4.589437657664592e-08, "loss": 0.639, "step": 13289 }, { "epoch": 1.881574290365966, "grad_norm": 7.55424919553248, "learning_rate": 4.578511386968548e-08, "loss": 0.7024, "step": 13290 }, { "epoch": 1.8817158632406032, "grad_norm": 8.675448394532205, "learning_rate": 4.567598017897162e-08, "loss": 0.6336, "step": 13291 }, { "epoch": 1.8818574361152405, "grad_norm": 8.414586127944133, "learning_rate": 4.556697551024142e-08, "loss": 0.5926, "step": 13292 }, { "epoch": 1.8819990089898775, "grad_norm": 8.648178138048607, "learning_rate": 4.545809986922528e-08, "loss": 0.711, "step": 13293 }, { "epoch": 1.8821405818645147, "grad_norm": 11.151846735312953, "learning_rate": 4.5349353261646414e-08, "loss": 0.7174, "step": 13294 }, { "epoch": 1.882282154739152, "grad_norm": 10.148816766031805, "learning_rate": 4.524073569322218e-08, "loss": 0.7118, "step": 13295 }, { "epoch": 1.8824237276137892, "grad_norm": 7.146577520146608, "learning_rate": 4.5132247169661916e-08, "loss": 0.5637, "step": 13296 }, { "epoch": 1.8825653004884264, "grad_norm": 10.28016095640291, "learning_rate": 4.5023887696668824e-08, "loss": 0.6868, "step": 13297 }, { "epoch": 1.8827068733630636, "grad_norm": 11.288712080958454, "learning_rate": 4.491565727993974e-08, "loss": 0.7487, "step": 13298 }, { "epoch": 1.8828484462377009, "grad_norm": 9.66067630588106, "learning_rate": 4.480755592516372e-08, "loss": 0.7439, "step": 13299 }, { "epoch": 1.882990019112338, "grad_norm": 9.003229621403792, "learning_rate": 4.469958363802401e-08, "loss": 0.6751, "step": 13300 }, { "epoch": 1.8831315919869753, "grad_norm": 9.071652335635047, "learning_rate": 4.459174042419634e-08, "loss": 0.6938, "step": 13301 }, { "epoch": 1.8832731648616126, "grad_norm": 10.379404626724668, "learning_rate": 4.448402628935034e-08, "loss": 0.686, "step": 13302 }, { "epoch": 1.8834147377362498, "grad_norm": 9.385162135549225, "learning_rate": 4.437644123914758e-08, "loss": 0.6721, "step": 13303 }, { "epoch": 1.8835563106108868, "grad_norm": 8.802493643743489, "learning_rate": 4.426898527924467e-08, "loss": 0.6493, "step": 13304 }, { "epoch": 1.883697883485524, "grad_norm": 8.8682483189788, "learning_rate": 4.4161658415290135e-08, "loss": 0.7061, "step": 13305 }, { "epoch": 1.8838394563601613, "grad_norm": 10.832296077271156, "learning_rate": 4.405446065292612e-08, "loss": 0.5995, "step": 13306 }, { "epoch": 1.8839810292347985, "grad_norm": 10.374195113615855, "learning_rate": 4.3947391997787857e-08, "loss": 0.747, "step": 13307 }, { "epoch": 1.8841226021094357, "grad_norm": 8.271032630447408, "learning_rate": 4.384045245550389e-08, "loss": 0.7165, "step": 13308 }, { "epoch": 1.884264174984073, "grad_norm": 9.40114072006046, "learning_rate": 4.373364203169583e-08, "loss": 0.6297, "step": 13309 }, { "epoch": 1.8844057478587102, "grad_norm": 9.532374981636389, "learning_rate": 4.362696073197864e-08, "loss": 0.7719, "step": 13310 }, { "epoch": 1.8845473207333474, "grad_norm": 9.359490752101413, "learning_rate": 4.35204085619606e-08, "loss": 0.6769, "step": 13311 }, { "epoch": 1.8846888936079846, "grad_norm": 8.524796165199678, "learning_rate": 4.3413985527243353e-08, "loss": 0.6134, "step": 13312 }, { "epoch": 1.8848304664826219, "grad_norm": 9.73786366558555, "learning_rate": 4.330769163342102e-08, "loss": 0.6652, "step": 13313 }, { "epoch": 1.884972039357259, "grad_norm": 11.61907085860085, "learning_rate": 4.320152688608165e-08, "loss": 0.6771, "step": 13314 }, { "epoch": 1.8851136122318963, "grad_norm": 8.5383266959114, "learning_rate": 4.309549129080576e-08, "loss": 0.7492, "step": 13315 }, { "epoch": 1.8852551851065336, "grad_norm": 8.386983013564784, "learning_rate": 4.298958485316834e-08, "loss": 0.6639, "step": 13316 }, { "epoch": 1.8853967579811708, "grad_norm": 8.906883886433361, "learning_rate": 4.2883807578736337e-08, "loss": 0.6718, "step": 13317 }, { "epoch": 1.885538330855808, "grad_norm": 10.530221588904281, "learning_rate": 4.277815947307029e-08, "loss": 0.7722, "step": 13318 }, { "epoch": 1.8856799037304453, "grad_norm": 9.337678805966632, "learning_rate": 4.267264054172465e-08, "loss": 0.5861, "step": 13319 }, { "epoch": 1.8858214766050825, "grad_norm": 8.878504101649545, "learning_rate": 4.256725079024554e-08, "loss": 0.6317, "step": 13320 }, { "epoch": 1.8859630494797197, "grad_norm": 9.24652240438031, "learning_rate": 4.2461990224174076e-08, "loss": 0.6202, "step": 13321 }, { "epoch": 1.886104622354357, "grad_norm": 8.119427457271032, "learning_rate": 4.235685884904306e-08, "loss": 0.5245, "step": 13322 }, { "epoch": 1.8862461952289942, "grad_norm": 11.866993343128637, "learning_rate": 4.2251856670379733e-08, "loss": 0.8013, "step": 13323 }, { "epoch": 1.8863877681036314, "grad_norm": 9.720578920054896, "learning_rate": 4.214698369370357e-08, "loss": 0.6472, "step": 13324 }, { "epoch": 1.8865293409782686, "grad_norm": 9.373735266253089, "learning_rate": 4.204223992452794e-08, "loss": 0.6551, "step": 13325 }, { "epoch": 1.8866709138529059, "grad_norm": 9.782809099733877, "learning_rate": 4.193762536835871e-08, "loss": 0.6874, "step": 13326 }, { "epoch": 1.886812486727543, "grad_norm": 8.154765377633977, "learning_rate": 4.1833140030696216e-08, "loss": 0.6888, "step": 13327 }, { "epoch": 1.8869540596021803, "grad_norm": 9.259173406697434, "learning_rate": 4.172878391703245e-08, "loss": 0.7044, "step": 13328 }, { "epoch": 1.8870956324768176, "grad_norm": 9.2767438128388, "learning_rate": 4.162455703285356e-08, "loss": 0.744, "step": 13329 }, { "epoch": 1.8872372053514548, "grad_norm": 8.484358364250495, "learning_rate": 4.152045938363852e-08, "loss": 0.7104, "step": 13330 }, { "epoch": 1.887378778226092, "grad_norm": 8.752150353097383, "learning_rate": 4.141649097485989e-08, "loss": 0.6636, "step": 13331 }, { "epoch": 1.8875203511007292, "grad_norm": 9.557849027231166, "learning_rate": 4.131265181198302e-08, "loss": 0.5664, "step": 13332 }, { "epoch": 1.8876619239753665, "grad_norm": 7.832006896782587, "learning_rate": 4.120894190046687e-08, "loss": 0.6635, "step": 13333 }, { "epoch": 1.8878034968500037, "grad_norm": 9.278735504159624, "learning_rate": 4.11053612457632e-08, "loss": 0.6726, "step": 13334 }, { "epoch": 1.8879450697246407, "grad_norm": 9.782072477525732, "learning_rate": 4.100190985331765e-08, "loss": 0.5935, "step": 13335 }, { "epoch": 1.888086642599278, "grad_norm": 9.9336271596715, "learning_rate": 4.0898587728567805e-08, "loss": 0.6648, "step": 13336 }, { "epoch": 1.8882282154739152, "grad_norm": 10.457534293442942, "learning_rate": 4.0795394876945726e-08, "loss": 0.7289, "step": 13337 }, { "epoch": 1.8883697883485524, "grad_norm": 8.584995985257915, "learning_rate": 4.0692331303876234e-08, "loss": 0.5879, "step": 13338 }, { "epoch": 1.8885113612231896, "grad_norm": 10.137867980374375, "learning_rate": 4.058939701477693e-08, "loss": 0.6109, "step": 13339 }, { "epoch": 1.8886529340978269, "grad_norm": 7.487704052317883, "learning_rate": 4.048659201505933e-08, "loss": 0.689, "step": 13340 }, { "epoch": 1.888794506972464, "grad_norm": 8.454185433427247, "learning_rate": 4.038391631012745e-08, "loss": 0.6865, "step": 13341 }, { "epoch": 1.8889360798471013, "grad_norm": 9.116202516148359, "learning_rate": 4.028136990537945e-08, "loss": 0.717, "step": 13342 }, { "epoch": 1.8890776527217386, "grad_norm": 9.551934682395908, "learning_rate": 4.0178952806205486e-08, "loss": 0.7142, "step": 13343 }, { "epoch": 1.8892192255963758, "grad_norm": 9.051177008368382, "learning_rate": 4.0076665017990124e-08, "loss": 0.6051, "step": 13344 }, { "epoch": 1.8893607984710128, "grad_norm": 8.274986114951295, "learning_rate": 3.997450654611018e-08, "loss": 0.6364, "step": 13345 }, { "epoch": 1.88950237134565, "grad_norm": 7.7385518827296895, "learning_rate": 3.987247739593636e-08, "loss": 0.5879, "step": 13346 }, { "epoch": 1.8896439442202873, "grad_norm": 7.813932485858493, "learning_rate": 3.9770577572831594e-08, "loss": 0.5942, "step": 13347 }, { "epoch": 1.8897855170949245, "grad_norm": 9.299200815297121, "learning_rate": 3.966880708215354e-08, "loss": 0.6485, "step": 13348 }, { "epoch": 1.8899270899695617, "grad_norm": 8.021991543727575, "learning_rate": 3.9567165929251804e-08, "loss": 0.5715, "step": 13349 }, { "epoch": 1.890068662844199, "grad_norm": 8.82056809790754, "learning_rate": 3.9465654119469345e-08, "loss": 0.7409, "step": 13350 }, { "epoch": 1.8902102357188362, "grad_norm": 8.785330805013299, "learning_rate": 3.9364271658142997e-08, "loss": 0.6838, "step": 13351 }, { "epoch": 1.8903518085934734, "grad_norm": 9.959646822018135, "learning_rate": 3.926301855060183e-08, "loss": 0.6554, "step": 13352 }, { "epoch": 1.8904933814681106, "grad_norm": 9.386080691225038, "learning_rate": 3.916189480216937e-08, "loss": 0.612, "step": 13353 }, { "epoch": 1.8906349543427479, "grad_norm": 7.587112602889572, "learning_rate": 3.906090041816107e-08, "loss": 0.5052, "step": 13354 }, { "epoch": 1.890776527217385, "grad_norm": 9.913030475983287, "learning_rate": 3.896003540388604e-08, "loss": 0.6229, "step": 13355 }, { "epoch": 1.8909181000920223, "grad_norm": 10.0215584115249, "learning_rate": 3.885929976464725e-08, "loss": 0.6419, "step": 13356 }, { "epoch": 1.8910596729666596, "grad_norm": 9.057423820796465, "learning_rate": 3.875869350573963e-08, "loss": 0.6415, "step": 13357 }, { "epoch": 1.8912012458412968, "grad_norm": 9.291090965967916, "learning_rate": 3.865821663245284e-08, "loss": 0.6842, "step": 13358 }, { "epoch": 1.891342818715934, "grad_norm": 8.435687287193867, "learning_rate": 3.855786915006793e-08, "loss": 0.6091, "step": 13359 }, { "epoch": 1.8914843915905712, "grad_norm": 9.896054770685845, "learning_rate": 3.8457651063860954e-08, "loss": 0.8039, "step": 13360 }, { "epoch": 1.8916259644652085, "grad_norm": 8.940315283351179, "learning_rate": 3.835756237909938e-08, "loss": 0.6852, "step": 13361 }, { "epoch": 1.8917675373398457, "grad_norm": 8.030025878712532, "learning_rate": 3.825760310104537e-08, "loss": 0.6616, "step": 13362 }, { "epoch": 1.891909110214483, "grad_norm": 9.223274594767755, "learning_rate": 3.815777323495362e-08, "loss": 0.6949, "step": 13363 }, { "epoch": 1.8920506830891202, "grad_norm": 9.481582054403924, "learning_rate": 3.805807278607215e-08, "loss": 0.6897, "step": 13364 }, { "epoch": 1.8921922559637574, "grad_norm": 8.039687418768462, "learning_rate": 3.795850175964205e-08, "loss": 0.6798, "step": 13365 }, { "epoch": 1.8923338288383946, "grad_norm": 9.015478599060462, "learning_rate": 3.785906016089774e-08, "loss": 0.7211, "step": 13366 }, { "epoch": 1.8924754017130319, "grad_norm": 8.701083534554359, "learning_rate": 3.775974799506699e-08, "loss": 0.6706, "step": 13367 }, { "epoch": 1.892616974587669, "grad_norm": 9.387254767129114, "learning_rate": 3.766056526737005e-08, "loss": 0.7131, "step": 13368 }, { "epoch": 1.8927585474623063, "grad_norm": 8.752889537538312, "learning_rate": 3.756151198302138e-08, "loss": 0.6011, "step": 13369 }, { "epoch": 1.8929001203369435, "grad_norm": 8.694728122209494, "learning_rate": 3.7462588147228193e-08, "loss": 0.7118, "step": 13370 }, { "epoch": 1.8930416932115808, "grad_norm": 9.195193436596414, "learning_rate": 3.736379376519023e-08, "loss": 0.7603, "step": 13371 }, { "epoch": 1.893183266086218, "grad_norm": 7.184604758247079, "learning_rate": 3.726512884210165e-08, "loss": 0.6636, "step": 13372 }, { "epoch": 1.8933248389608552, "grad_norm": 9.22653603933547, "learning_rate": 3.7166593383148594e-08, "loss": 0.5639, "step": 13373 }, { "epoch": 1.8934664118354925, "grad_norm": 8.54609791014274, "learning_rate": 3.706818739351164e-08, "loss": 0.6686, "step": 13374 }, { "epoch": 1.8936079847101297, "grad_norm": 8.549946067595343, "learning_rate": 3.69699108783636e-08, "loss": 0.6386, "step": 13375 }, { "epoch": 1.8937495575847667, "grad_norm": 8.667322231832053, "learning_rate": 3.687176384287089e-08, "loss": 0.6459, "step": 13376 }, { "epoch": 1.893891130459404, "grad_norm": 7.757379279664893, "learning_rate": 3.677374629219271e-08, "loss": 0.7013, "step": 13377 }, { "epoch": 1.8940327033340412, "grad_norm": 8.395415062972756, "learning_rate": 3.667585823148218e-08, "loss": 0.7327, "step": 13378 }, { "epoch": 1.8941742762086784, "grad_norm": 8.807013785687465, "learning_rate": 3.657809966588516e-08, "loss": 0.6913, "step": 13379 }, { "epoch": 1.8943158490833156, "grad_norm": 9.452053948880057, "learning_rate": 3.6480470600540606e-08, "loss": 0.7513, "step": 13380 }, { "epoch": 1.8944574219579529, "grad_norm": 11.09683124573358, "learning_rate": 3.638297104058081e-08, "loss": 0.7777, "step": 13381 }, { "epoch": 1.89459899483259, "grad_norm": 11.563067448069035, "learning_rate": 3.6285600991131095e-08, "loss": 0.7003, "step": 13382 }, { "epoch": 1.8947405677072273, "grad_norm": 8.712948198433969, "learning_rate": 3.618836045731072e-08, "loss": 0.6185, "step": 13383 }, { "epoch": 1.8948821405818645, "grad_norm": 10.258693822811718, "learning_rate": 3.609124944423087e-08, "loss": 0.6477, "step": 13384 }, { "epoch": 1.8950237134565018, "grad_norm": 7.462524628718388, "learning_rate": 3.599426795699662e-08, "loss": 0.6409, "step": 13385 }, { "epoch": 1.8951652863311388, "grad_norm": 11.06848947917402, "learning_rate": 3.5897416000706956e-08, "loss": 0.6909, "step": 13386 }, { "epoch": 1.895306859205776, "grad_norm": 9.821574103528098, "learning_rate": 3.580069358045252e-08, "loss": 0.6787, "step": 13387 }, { "epoch": 1.8954484320804132, "grad_norm": 8.79132913480908, "learning_rate": 3.570410070131841e-08, "loss": 0.6162, "step": 13388 }, { "epoch": 1.8955900049550505, "grad_norm": 10.382841157550745, "learning_rate": 3.5607637368381965e-08, "loss": 0.6974, "step": 13389 }, { "epoch": 1.8957315778296877, "grad_norm": 8.493221946397123, "learning_rate": 3.5511303586714676e-08, "loss": 0.6087, "step": 13390 }, { "epoch": 1.895873150704325, "grad_norm": 9.649132534429828, "learning_rate": 3.541509936138082e-08, "loss": 0.6881, "step": 13391 }, { "epoch": 1.8960147235789622, "grad_norm": 8.398405250664826, "learning_rate": 3.5319024697437196e-08, "loss": 0.6188, "step": 13392 }, { "epoch": 1.8961562964535994, "grad_norm": 8.229940360352616, "learning_rate": 3.522307959993476e-08, "loss": 0.6234, "step": 13393 }, { "epoch": 1.8962978693282366, "grad_norm": 8.728058252239853, "learning_rate": 3.5127264073917256e-08, "loss": 0.659, "step": 13394 }, { "epoch": 1.8964394422028739, "grad_norm": 9.108251381307921, "learning_rate": 3.503157812442148e-08, "loss": 0.7241, "step": 13395 }, { "epoch": 1.896581015077511, "grad_norm": 8.539780819512437, "learning_rate": 3.4936021756477865e-08, "loss": 0.6366, "step": 13396 }, { "epoch": 1.8967225879521483, "grad_norm": 8.508667509694297, "learning_rate": 3.4840594975109607e-08, "loss": 0.6126, "step": 13397 }, { "epoch": 1.8968641608267856, "grad_norm": 8.641218775512487, "learning_rate": 3.474529778533298e-08, "loss": 0.7385, "step": 13398 }, { "epoch": 1.8970057337014228, "grad_norm": 11.265578948414891, "learning_rate": 3.465013019215785e-08, "loss": 0.6681, "step": 13399 }, { "epoch": 1.89714730657606, "grad_norm": 9.246927524325944, "learning_rate": 3.455509220058717e-08, "loss": 0.7584, "step": 13400 }, { "epoch": 1.8972888794506972, "grad_norm": 8.886202337330637, "learning_rate": 3.4460183815617224e-08, "loss": 0.7162, "step": 13401 }, { "epoch": 1.8974304523253345, "grad_norm": 8.285846399090223, "learning_rate": 3.4365405042236785e-08, "loss": 0.6142, "step": 13402 }, { "epoch": 1.8975720251999717, "grad_norm": 8.53923181085006, "learning_rate": 3.4270755885428555e-08, "loss": 0.6167, "step": 13403 }, { "epoch": 1.897713598074609, "grad_norm": 9.60749672001493, "learning_rate": 3.4176236350168255e-08, "loss": 0.5771, "step": 13404 }, { "epoch": 1.8978551709492462, "grad_norm": 8.916142053115133, "learning_rate": 3.408184644142443e-08, "loss": 0.689, "step": 13405 }, { "epoch": 1.8979967438238834, "grad_norm": 9.53225692683221, "learning_rate": 3.398758616415948e-08, "loss": 0.6261, "step": 13406 }, { "epoch": 1.8981383166985206, "grad_norm": 8.99924211489935, "learning_rate": 3.389345552332834e-08, "loss": 0.7526, "step": 13407 }, { "epoch": 1.8982798895731579, "grad_norm": 8.304171642168631, "learning_rate": 3.379945452387928e-08, "loss": 0.6814, "step": 13408 }, { "epoch": 1.898421462447795, "grad_norm": 9.867398583698254, "learning_rate": 3.370558317075417e-08, "loss": 0.735, "step": 13409 }, { "epoch": 1.8985630353224323, "grad_norm": 9.560510765062041, "learning_rate": 3.3611841468887683e-08, "loss": 0.6783, "step": 13410 }, { "epoch": 1.8987046081970695, "grad_norm": 9.96925587103021, "learning_rate": 3.351822942320754e-08, "loss": 0.6767, "step": 13411 }, { "epoch": 1.8988461810717068, "grad_norm": 11.87033406996691, "learning_rate": 3.342474703863508e-08, "loss": 0.6944, "step": 13412 }, { "epoch": 1.898987753946344, "grad_norm": 10.420394311864888, "learning_rate": 3.333139432008442e-08, "loss": 0.6895, "step": 13413 }, { "epoch": 1.8991293268209812, "grad_norm": 10.064793295786757, "learning_rate": 3.3238171272463316e-08, "loss": 0.6895, "step": 13414 }, { "epoch": 1.8992708996956185, "grad_norm": 10.589932364300951, "learning_rate": 3.314507790067201e-08, "loss": 0.5905, "step": 13415 }, { "epoch": 1.8994124725702557, "grad_norm": 9.1943392048089, "learning_rate": 3.3052114209604636e-08, "loss": 0.66, "step": 13416 }, { "epoch": 1.899554045444893, "grad_norm": 7.951539846557803, "learning_rate": 3.295928020414812e-08, "loss": 0.6445, "step": 13417 }, { "epoch": 1.89969561831953, "grad_norm": 8.50664888521606, "learning_rate": 3.286657588918302e-08, "loss": 0.7245, "step": 13418 }, { "epoch": 1.8998371911941672, "grad_norm": 9.759375343970135, "learning_rate": 3.2774001269582354e-08, "loss": 0.6779, "step": 13419 }, { "epoch": 1.8999787640688044, "grad_norm": 11.50588573270831, "learning_rate": 3.2681556350212805e-08, "loss": 0.5586, "step": 13420 }, { "epoch": 1.9001203369434416, "grad_norm": 9.466958660225261, "learning_rate": 3.2589241135933815e-08, "loss": 0.6237, "step": 13421 }, { "epoch": 1.9002619098180789, "grad_norm": 9.136973429274358, "learning_rate": 3.2497055631598995e-08, "loss": 0.6276, "step": 13422 }, { "epoch": 1.900403482692716, "grad_norm": 10.130232600924233, "learning_rate": 3.2404999842054194e-08, "loss": 0.6696, "step": 13423 }, { "epoch": 1.9005450555673533, "grad_norm": 9.496478984241714, "learning_rate": 3.231307377213833e-08, "loss": 0.7537, "step": 13424 }, { "epoch": 1.9006866284419905, "grad_norm": 9.143314741808544, "learning_rate": 3.222127742668446e-08, "loss": 0.6811, "step": 13425 }, { "epoch": 1.9008282013166278, "grad_norm": 8.447108696157937, "learning_rate": 3.2129610810517633e-08, "loss": 0.6319, "step": 13426 }, { "epoch": 1.900969774191265, "grad_norm": 7.855397364947904, "learning_rate": 3.203807392845732e-08, "loss": 0.6556, "step": 13427 }, { "epoch": 1.901111347065902, "grad_norm": 11.222769779490156, "learning_rate": 3.1946666785315216e-08, "loss": 0.6994, "step": 13428 }, { "epoch": 1.9012529199405392, "grad_norm": 9.946187378668373, "learning_rate": 3.1855389385896383e-08, "loss": 0.6814, "step": 13429 }, { "epoch": 1.9013944928151765, "grad_norm": 8.02430894199507, "learning_rate": 3.176424173499976e-08, "loss": 0.6725, "step": 13430 }, { "epoch": 1.9015360656898137, "grad_norm": 9.182353336316826, "learning_rate": 3.167322383741622e-08, "loss": 0.7245, "step": 13431 }, { "epoch": 1.901677638564451, "grad_norm": 8.796699373108066, "learning_rate": 3.158233569793112e-08, "loss": 0.6103, "step": 13432 }, { "epoch": 1.9018192114390882, "grad_norm": 9.049164740865546, "learning_rate": 3.149157732132202e-08, "loss": 0.6973, "step": 13433 }, { "epoch": 1.9019607843137254, "grad_norm": 9.714369789529059, "learning_rate": 3.14009487123601e-08, "loss": 0.6438, "step": 13434 }, { "epoch": 1.9021023571883626, "grad_norm": 9.122259277111546, "learning_rate": 3.131044987580961e-08, "loss": 0.6227, "step": 13435 }, { "epoch": 1.9022439300629999, "grad_norm": 9.650952512545798, "learning_rate": 3.122008081642786e-08, "loss": 0.6367, "step": 13436 }, { "epoch": 1.902385502937637, "grad_norm": 9.441588065843485, "learning_rate": 3.112984153896603e-08, "loss": 0.5844, "step": 13437 }, { "epoch": 1.9025270758122743, "grad_norm": 9.27960745408733, "learning_rate": 3.1039732048167295e-08, "loss": 0.6714, "step": 13438 }, { "epoch": 1.9026686486869115, "grad_norm": 9.606974976582565, "learning_rate": 3.0949752348768956e-08, "loss": 0.7062, "step": 13439 }, { "epoch": 1.9028102215615488, "grad_norm": 9.404155105045115, "learning_rate": 3.0859902445501136e-08, "loss": 0.6618, "step": 13440 }, { "epoch": 1.902951794436186, "grad_norm": 9.56858641959908, "learning_rate": 3.077018234308726e-08, "loss": 0.6548, "step": 13441 }, { "epoch": 1.9030933673108232, "grad_norm": 7.893772336032705, "learning_rate": 3.0680592046243576e-08, "loss": 0.6249, "step": 13442 }, { "epoch": 1.9032349401854605, "grad_norm": 11.037220492771478, "learning_rate": 3.059113155968019e-08, "loss": 0.5868, "step": 13443 }, { "epoch": 1.9033765130600977, "grad_norm": 8.593867186701004, "learning_rate": 3.050180088809973e-08, "loss": 0.6127, "step": 13444 }, { "epoch": 1.903518085934735, "grad_norm": 7.623516751467928, "learning_rate": 3.041260003619817e-08, "loss": 0.6677, "step": 13445 }, { "epoch": 1.9036596588093722, "grad_norm": 9.571004040703775, "learning_rate": 3.032352900866481e-08, "loss": 0.7086, "step": 13446 }, { "epoch": 1.9038012316840094, "grad_norm": 9.319306823755348, "learning_rate": 3.0234587810182014e-08, "loss": 0.6366, "step": 13447 }, { "epoch": 1.9039428045586466, "grad_norm": 9.593003632281437, "learning_rate": 3.014577644542549e-08, "loss": 0.6829, "step": 13448 }, { "epoch": 1.9040843774332838, "grad_norm": 7.614854410323922, "learning_rate": 3.0057094919064e-08, "loss": 0.6598, "step": 13449 }, { "epoch": 1.904225950307921, "grad_norm": 9.896498058814794, "learning_rate": 2.996854323575937e-08, "loss": 0.6549, "step": 13450 }, { "epoch": 1.9043675231825583, "grad_norm": 9.164035026333497, "learning_rate": 2.98801214001665e-08, "loss": 0.7004, "step": 13451 }, { "epoch": 1.9045090960571955, "grad_norm": 8.776268031127993, "learning_rate": 2.9791829416933593e-08, "loss": 0.6313, "step": 13452 }, { "epoch": 1.9046506689318328, "grad_norm": 7.982752566094864, "learning_rate": 2.970366729070279e-08, "loss": 0.5478, "step": 13453 }, { "epoch": 1.90479224180647, "grad_norm": 9.194268257268824, "learning_rate": 2.9615635026108426e-08, "loss": 0.6997, "step": 13454 }, { "epoch": 1.9049338146811072, "grad_norm": 10.77859490913969, "learning_rate": 2.9527732627777915e-08, "loss": 0.6382, "step": 13455 }, { "epoch": 1.9050753875557445, "grad_norm": 9.75501102886818, "learning_rate": 2.9439960100332288e-08, "loss": 0.7928, "step": 13456 }, { "epoch": 1.9052169604303817, "grad_norm": 8.917863521882463, "learning_rate": 2.9352317448385902e-08, "loss": 0.6987, "step": 13457 }, { "epoch": 1.905358533305019, "grad_norm": 8.911084385570469, "learning_rate": 2.926480467654591e-08, "loss": 0.7411, "step": 13458 }, { "epoch": 1.905500106179656, "grad_norm": 9.624773295321202, "learning_rate": 2.9177421789412795e-08, "loss": 0.6853, "step": 13459 }, { "epoch": 1.9056416790542932, "grad_norm": 8.71779132344208, "learning_rate": 2.9090168791580663e-08, "loss": 0.6311, "step": 13460 }, { "epoch": 1.9057832519289304, "grad_norm": 10.505671104989483, "learning_rate": 2.9003045687635845e-08, "loss": 0.7409, "step": 13461 }, { "epoch": 1.9059248248035676, "grad_norm": 9.633971553723272, "learning_rate": 2.8916052482158284e-08, "loss": 0.5997, "step": 13462 }, { "epoch": 1.9060663976782048, "grad_norm": 10.18294381137043, "learning_rate": 2.8829189179721552e-08, "loss": 0.7084, "step": 13463 }, { "epoch": 1.906207970552842, "grad_norm": 8.696704845566419, "learning_rate": 2.8742455784891708e-08, "loss": 0.5748, "step": 13464 }, { "epoch": 1.9063495434274793, "grad_norm": 10.190783386964863, "learning_rate": 2.865585230222817e-08, "loss": 0.6286, "step": 13465 }, { "epoch": 1.9064911163021165, "grad_norm": 27.387693484383295, "learning_rate": 2.856937873628396e-08, "loss": 0.5818, "step": 13466 }, { "epoch": 1.9066326891767538, "grad_norm": 10.4100990852089, "learning_rate": 2.8483035091604604e-08, "loss": 0.693, "step": 13467 }, { "epoch": 1.906774262051391, "grad_norm": 9.26259147815088, "learning_rate": 2.8396821372729257e-08, "loss": 0.6673, "step": 13468 }, { "epoch": 1.906915834926028, "grad_norm": 8.480666219344196, "learning_rate": 2.8310737584190117e-08, "loss": 0.6566, "step": 13469 }, { "epoch": 1.9070574078006652, "grad_norm": 8.55656060044467, "learning_rate": 2.822478373051246e-08, "loss": 0.6741, "step": 13470 }, { "epoch": 1.9071989806753025, "grad_norm": 8.471169889650213, "learning_rate": 2.8138959816215174e-08, "loss": 0.6899, "step": 13471 }, { "epoch": 1.9073405535499397, "grad_norm": 9.572288539620093, "learning_rate": 2.8053265845809363e-08, "loss": 0.6826, "step": 13472 }, { "epoch": 1.907482126424577, "grad_norm": 9.190906126826066, "learning_rate": 2.796770182380032e-08, "loss": 0.6542, "step": 13473 }, { "epoch": 1.9076236992992142, "grad_norm": 9.370017800492322, "learning_rate": 2.7882267754685832e-08, "loss": 0.6632, "step": 13474 }, { "epoch": 1.9077652721738514, "grad_norm": 9.434961242757902, "learning_rate": 2.7796963642957586e-08, "loss": 0.6421, "step": 13475 }, { "epoch": 1.9079068450484886, "grad_norm": 8.349808340100164, "learning_rate": 2.7711789493099495e-08, "loss": 0.7113, "step": 13476 }, { "epoch": 1.9080484179231259, "grad_norm": 9.550776457054011, "learning_rate": 2.7626745309589088e-08, "loss": 0.656, "step": 13477 }, { "epoch": 1.908189990797763, "grad_norm": 9.190686977142482, "learning_rate": 2.7541831096897232e-08, "loss": 0.6341, "step": 13478 }, { "epoch": 1.9083315636724003, "grad_norm": 8.249201880487533, "learning_rate": 2.7457046859487578e-08, "loss": 0.6772, "step": 13479 }, { "epoch": 1.9084731365470375, "grad_norm": 8.30258941703322, "learning_rate": 2.7372392601817678e-08, "loss": 0.5687, "step": 13480 }, { "epoch": 1.9086147094216748, "grad_norm": 10.190479802299508, "learning_rate": 2.7287868328337297e-08, "loss": 0.7318, "step": 13481 }, { "epoch": 1.908756282296312, "grad_norm": 10.715497026544261, "learning_rate": 2.720347404348983e-08, "loss": 0.6396, "step": 13482 }, { "epoch": 1.9088978551709492, "grad_norm": 7.958433646590121, "learning_rate": 2.7119209751712283e-08, "loss": 0.6925, "step": 13483 }, { "epoch": 1.9090394280455865, "grad_norm": 8.948597240804002, "learning_rate": 2.7035075457433613e-08, "loss": 0.6678, "step": 13484 }, { "epoch": 1.9091810009202237, "grad_norm": 10.872115881812453, "learning_rate": 2.6951071165077504e-08, "loss": 0.679, "step": 13485 }, { "epoch": 1.909322573794861, "grad_norm": 8.670012732895739, "learning_rate": 2.686719687905931e-08, "loss": 0.6862, "step": 13486 }, { "epoch": 1.9094641466694982, "grad_norm": 9.78904574332664, "learning_rate": 2.678345260378856e-08, "loss": 0.6952, "step": 13487 }, { "epoch": 1.9096057195441354, "grad_norm": 8.02873125174179, "learning_rate": 2.669983834366785e-08, "loss": 0.6108, "step": 13488 }, { "epoch": 1.9097472924187726, "grad_norm": 9.114232647643524, "learning_rate": 2.661635410309199e-08, "loss": 0.6409, "step": 13489 }, { "epoch": 1.9098888652934098, "grad_norm": 9.359923994619752, "learning_rate": 2.653299988645053e-08, "loss": 0.6312, "step": 13490 }, { "epoch": 1.910030438168047, "grad_norm": 10.217099491013942, "learning_rate": 2.644977569812496e-08, "loss": 0.6559, "step": 13491 }, { "epoch": 1.9101720110426843, "grad_norm": 7.7250503587778825, "learning_rate": 2.6366681542490114e-08, "loss": 0.5221, "step": 13492 }, { "epoch": 1.9103135839173215, "grad_norm": 9.783482500939602, "learning_rate": 2.6283717423914445e-08, "loss": 0.6975, "step": 13493 }, { "epoch": 1.9104551567919588, "grad_norm": 9.225173626017874, "learning_rate": 2.6200883346759466e-08, "loss": 0.7157, "step": 13494 }, { "epoch": 1.910596729666596, "grad_norm": 9.964657317077057, "learning_rate": 2.6118179315379467e-08, "loss": 0.6886, "step": 13495 }, { "epoch": 1.9107383025412332, "grad_norm": 10.106109327517988, "learning_rate": 2.6035605334122084e-08, "loss": 0.6016, "step": 13496 }, { "epoch": 1.9108798754158705, "grad_norm": 9.501836900304092, "learning_rate": 2.5953161407328565e-08, "loss": 0.6123, "step": 13497 }, { "epoch": 1.9110214482905077, "grad_norm": 8.826355343233102, "learning_rate": 2.587084753933211e-08, "loss": 0.5657, "step": 13498 }, { "epoch": 1.911163021165145, "grad_norm": 8.88795578615469, "learning_rate": 2.578866373446065e-08, "loss": 0.6966, "step": 13499 }, { "epoch": 1.911304594039782, "grad_norm": 8.532665987543428, "learning_rate": 2.5706609997034337e-08, "loss": 0.5936, "step": 13500 }, { "epoch": 1.9114461669144192, "grad_norm": 10.866666911618594, "learning_rate": 2.5624686331366666e-08, "loss": 0.6613, "step": 13501 }, { "epoch": 1.9115877397890564, "grad_norm": 8.590535176384485, "learning_rate": 2.554289274176419e-08, "loss": 0.6249, "step": 13502 }, { "epoch": 1.9117293126636936, "grad_norm": 9.903796352406555, "learning_rate": 2.546122923252681e-08, "loss": 0.648, "step": 13503 }, { "epoch": 1.9118708855383308, "grad_norm": 10.27666559015401, "learning_rate": 2.5379695807947467e-08, "loss": 0.6357, "step": 13504 }, { "epoch": 1.912012458412968, "grad_norm": 9.384825173783499, "learning_rate": 2.5298292472312192e-08, "loss": 0.715, "step": 13505 }, { "epoch": 1.9121540312876053, "grad_norm": 9.277616772298412, "learning_rate": 2.5217019229900607e-08, "loss": 0.6645, "step": 13506 }, { "epoch": 1.9122956041622425, "grad_norm": 9.21465704624868, "learning_rate": 2.513587608498541e-08, "loss": 0.6729, "step": 13507 }, { "epoch": 1.9124371770368798, "grad_norm": 9.293706786844728, "learning_rate": 2.5054863041831524e-08, "loss": 0.791, "step": 13508 }, { "epoch": 1.912578749911517, "grad_norm": 9.068607771406871, "learning_rate": 2.4973980104698036e-08, "loss": 0.7589, "step": 13509 }, { "epoch": 1.9127203227861542, "grad_norm": 8.40843109991464, "learning_rate": 2.4893227277837106e-08, "loss": 0.6709, "step": 13510 }, { "epoch": 1.9128618956607912, "grad_norm": 9.361867836974236, "learning_rate": 2.481260456549367e-08, "loss": 0.7087, "step": 13511 }, { "epoch": 1.9130034685354285, "grad_norm": 10.447360743105506, "learning_rate": 2.4732111971906004e-08, "loss": 0.6304, "step": 13512 }, { "epoch": 1.9131450414100657, "grad_norm": 9.458792609492743, "learning_rate": 2.4651749501305446e-08, "loss": 0.6144, "step": 13513 }, { "epoch": 1.913286614284703, "grad_norm": 8.532906507705231, "learning_rate": 2.4571517157916946e-08, "loss": 0.5612, "step": 13514 }, { "epoch": 1.9134281871593402, "grad_norm": 8.771369343837689, "learning_rate": 2.449141494595797e-08, "loss": 0.6554, "step": 13515 }, { "epoch": 1.9135697600339774, "grad_norm": 8.598989436191353, "learning_rate": 2.441144286963931e-08, "loss": 0.6082, "step": 13516 }, { "epoch": 1.9137113329086146, "grad_norm": 8.661397457004448, "learning_rate": 2.433160093316539e-08, "loss": 0.6003, "step": 13517 }, { "epoch": 1.9138529057832518, "grad_norm": 9.346897577470852, "learning_rate": 2.4251889140733398e-08, "loss": 0.8109, "step": 13518 }, { "epoch": 1.913994478657889, "grad_norm": 9.771538834360033, "learning_rate": 2.417230749653332e-08, "loss": 0.6919, "step": 13519 }, { "epoch": 1.9141360515325263, "grad_norm": 10.17382043848097, "learning_rate": 2.409285600474931e-08, "loss": 0.7042, "step": 13520 }, { "epoch": 1.9142776244071635, "grad_norm": 8.157840968215506, "learning_rate": 2.401353466955747e-08, "loss": 0.6702, "step": 13521 }, { "epoch": 1.9144191972818008, "grad_norm": 9.800722866256951, "learning_rate": 2.3934343495128075e-08, "loss": 0.6801, "step": 13522 }, { "epoch": 1.914560770156438, "grad_norm": 8.824894839084397, "learning_rate": 2.385528248562391e-08, "loss": 0.6901, "step": 13523 }, { "epoch": 1.9147023430310752, "grad_norm": 9.441694729508807, "learning_rate": 2.3776351645201367e-08, "loss": 0.7282, "step": 13524 }, { "epoch": 1.9148439159057125, "grad_norm": 9.50327485507977, "learning_rate": 2.3697550978009632e-08, "loss": 0.6329, "step": 13525 }, { "epoch": 1.9149854887803497, "grad_norm": 9.14826695440415, "learning_rate": 2.3618880488190942e-08, "loss": 0.6534, "step": 13526 }, { "epoch": 1.915127061654987, "grad_norm": 10.173572966960158, "learning_rate": 2.3540340179881717e-08, "loss": 0.7111, "step": 13527 }, { "epoch": 1.9152686345296241, "grad_norm": 9.497763922695173, "learning_rate": 2.3461930057210037e-08, "loss": 0.7048, "step": 13528 }, { "epoch": 1.9154102074042614, "grad_norm": 9.452214977504322, "learning_rate": 2.338365012429816e-08, "loss": 0.7129, "step": 13529 }, { "epoch": 1.9155517802788986, "grad_norm": 8.615550296364109, "learning_rate": 2.3305500385261137e-08, "loss": 0.6178, "step": 13530 }, { "epoch": 1.9156933531535358, "grad_norm": 10.88842470991898, "learning_rate": 2.322748084420734e-08, "loss": 0.6623, "step": 13531 }, { "epoch": 1.915834926028173, "grad_norm": 10.635321417122654, "learning_rate": 2.3149591505237935e-08, "loss": 0.645, "step": 13532 }, { "epoch": 1.9159764989028103, "grad_norm": 8.971615429946358, "learning_rate": 2.30718323724477e-08, "loss": 0.5956, "step": 13533 }, { "epoch": 1.9161180717774475, "grad_norm": 8.706746031247077, "learning_rate": 2.299420344992448e-08, "loss": 0.6573, "step": 13534 }, { "epoch": 1.9162596446520848, "grad_norm": 11.766546408351212, "learning_rate": 2.2916704741748897e-08, "loss": 0.6087, "step": 13535 }, { "epoch": 1.916401217526722, "grad_norm": 8.64234396752002, "learning_rate": 2.283933625199547e-08, "loss": 0.688, "step": 13536 }, { "epoch": 1.9165427904013592, "grad_norm": 9.962821513005238, "learning_rate": 2.2762097984730948e-08, "loss": 0.6575, "step": 13537 }, { "epoch": 1.9166843632759964, "grad_norm": 8.073415538181374, "learning_rate": 2.268498994401569e-08, "loss": 0.6554, "step": 13538 }, { "epoch": 1.9168259361506337, "grad_norm": 9.806311727393956, "learning_rate": 2.2608012133903402e-08, "loss": 0.7004, "step": 13539 }, { "epoch": 1.916967509025271, "grad_norm": 8.552122189568651, "learning_rate": 2.2531164558440843e-08, "loss": 0.6727, "step": 13540 }, { "epoch": 1.9171090818999081, "grad_norm": 9.117350677363651, "learning_rate": 2.2454447221667563e-08, "loss": 0.5882, "step": 13541 }, { "epoch": 1.9172506547745451, "grad_norm": 9.103020520957891, "learning_rate": 2.2377860127616447e-08, "loss": 0.7575, "step": 13542 }, { "epoch": 1.9173922276491824, "grad_norm": 8.790461694181655, "learning_rate": 2.230140328031427e-08, "loss": 0.669, "step": 13543 }, { "epoch": 1.9175338005238196, "grad_norm": 9.387163333468676, "learning_rate": 2.222507668377949e-08, "loss": 0.6104, "step": 13544 }, { "epoch": 1.9176753733984568, "grad_norm": 8.912137839300916, "learning_rate": 2.214888034202528e-08, "loss": 0.64, "step": 13545 }, { "epoch": 1.917816946273094, "grad_norm": 9.950408804465098, "learning_rate": 2.2072814259056496e-08, "loss": 0.6814, "step": 13546 }, { "epoch": 1.9179585191477313, "grad_norm": 10.1335785869567, "learning_rate": 2.199687843887244e-08, "loss": 0.7572, "step": 13547 }, { "epoch": 1.9181000920223685, "grad_norm": 9.828027188006915, "learning_rate": 2.1921072885464633e-08, "loss": 0.805, "step": 13548 }, { "epoch": 1.9182416648970058, "grad_norm": 9.247220008243525, "learning_rate": 2.1845397602818508e-08, "loss": 0.6846, "step": 13549 }, { "epoch": 1.918383237771643, "grad_norm": 9.866184984082587, "learning_rate": 2.1769852594912265e-08, "loss": 0.7234, "step": 13550 }, { "epoch": 1.9185248106462802, "grad_norm": 8.785902209603675, "learning_rate": 2.169443786571662e-08, "loss": 0.6575, "step": 13551 }, { "epoch": 1.9186663835209172, "grad_norm": 9.68571682177228, "learning_rate": 2.161915341919646e-08, "loss": 0.7023, "step": 13552 }, { "epoch": 1.9188079563955545, "grad_norm": 8.182665613388302, "learning_rate": 2.1543999259309724e-08, "loss": 0.6114, "step": 13553 }, { "epoch": 1.9189495292701917, "grad_norm": 10.509518260132507, "learning_rate": 2.1468975390006587e-08, "loss": 0.7463, "step": 13554 }, { "epoch": 1.919091102144829, "grad_norm": 8.59306993047759, "learning_rate": 2.139408181523167e-08, "loss": 0.7005, "step": 13555 }, { "epoch": 1.9192326750194661, "grad_norm": 8.624519224173863, "learning_rate": 2.1319318538921552e-08, "loss": 0.844, "step": 13556 }, { "epoch": 1.9193742478941034, "grad_norm": 9.616773384158828, "learning_rate": 2.1244685565006695e-08, "loss": 0.58, "step": 13557 }, { "epoch": 1.9195158207687406, "grad_norm": 10.353271159099798, "learning_rate": 2.1170182897410353e-08, "loss": 0.6718, "step": 13558 }, { "epoch": 1.9196573936433778, "grad_norm": 8.583753507632029, "learning_rate": 2.109581054004939e-08, "loss": 0.6279, "step": 13559 }, { "epoch": 1.919798966518015, "grad_norm": 10.417223577235225, "learning_rate": 2.1021568496833454e-08, "loss": 0.58, "step": 13560 }, { "epoch": 1.9199405393926523, "grad_norm": 8.965815579132968, "learning_rate": 2.0947456771664987e-08, "loss": 0.6572, "step": 13561 }, { "epoch": 1.9200821122672895, "grad_norm": 9.39655846435923, "learning_rate": 2.087347536844059e-08, "loss": 0.6508, "step": 13562 }, { "epoch": 1.9202236851419268, "grad_norm": 9.653837526056266, "learning_rate": 2.0799624291048816e-08, "loss": 0.6807, "step": 13563 }, { "epoch": 1.920365258016564, "grad_norm": 10.048421737713108, "learning_rate": 2.0725903543372117e-08, "loss": 0.7328, "step": 13564 }, { "epoch": 1.9205068308912012, "grad_norm": 10.961800571865178, "learning_rate": 2.0652313129286284e-08, "loss": 0.7339, "step": 13565 }, { "epoch": 1.9206484037658385, "grad_norm": 9.269072381841896, "learning_rate": 2.057885305265961e-08, "loss": 0.6817, "step": 13566 }, { "epoch": 1.9207899766404757, "grad_norm": 9.119140834158811, "learning_rate": 2.0505523317353727e-08, "loss": 0.7312, "step": 13567 }, { "epoch": 1.920931549515113, "grad_norm": 9.222312937013314, "learning_rate": 2.0432323927223883e-08, "loss": 0.6564, "step": 13568 }, { "epoch": 1.9210731223897501, "grad_norm": 8.774007505275836, "learning_rate": 2.0359254886117842e-08, "loss": 0.6217, "step": 13569 }, { "epoch": 1.9212146952643874, "grad_norm": 9.906995101488542, "learning_rate": 2.0286316197876964e-08, "loss": 0.6821, "step": 13570 }, { "epoch": 1.9213562681390246, "grad_norm": 9.261326222043648, "learning_rate": 2.0213507866335412e-08, "loss": 0.5974, "step": 13571 }, { "epoch": 1.9214978410136618, "grad_norm": 8.493362527953813, "learning_rate": 2.0140829895320955e-08, "loss": 0.6946, "step": 13572 }, { "epoch": 1.921639413888299, "grad_norm": 10.016085085961418, "learning_rate": 2.0068282288653872e-08, "loss": 0.6099, "step": 13573 }, { "epoch": 1.9217809867629363, "grad_norm": 9.25754713490988, "learning_rate": 1.9995865050147777e-08, "loss": 0.7035, "step": 13574 }, { "epoch": 1.9219225596375735, "grad_norm": 8.478126655800612, "learning_rate": 1.9923578183610182e-08, "loss": 0.6847, "step": 13575 }, { "epoch": 1.9220641325122108, "grad_norm": 9.085751926039112, "learning_rate": 1.9851421692840822e-08, "loss": 0.6961, "step": 13576 }, { "epoch": 1.922205705386848, "grad_norm": 9.76884712468712, "learning_rate": 1.9779395581633055e-08, "loss": 0.6409, "step": 13577 }, { "epoch": 1.9223472782614852, "grad_norm": 8.953050010593458, "learning_rate": 1.9707499853773016e-08, "loss": 0.6672, "step": 13578 }, { "epoch": 1.9224888511361224, "grad_norm": 9.553637018729805, "learning_rate": 1.9635734513040182e-08, "loss": 0.748, "step": 13579 }, { "epoch": 1.9226304240107597, "grad_norm": 9.328942037867876, "learning_rate": 1.956409956320737e-08, "loss": 0.6437, "step": 13580 }, { "epoch": 1.922771996885397, "grad_norm": 8.018330078547226, "learning_rate": 1.949259500804074e-08, "loss": 0.6671, "step": 13581 }, { "epoch": 1.9229135697600341, "grad_norm": 9.59753224126235, "learning_rate": 1.942122085129866e-08, "loss": 0.6688, "step": 13582 }, { "epoch": 1.9230551426346711, "grad_norm": 10.481988990677465, "learning_rate": 1.9349977096733142e-08, "loss": 0.6263, "step": 13583 }, { "epoch": 1.9231967155093084, "grad_norm": 8.736947924372855, "learning_rate": 1.9278863748089794e-08, "loss": 0.6836, "step": 13584 }, { "epoch": 1.9233382883839456, "grad_norm": 8.519604234413425, "learning_rate": 1.9207880809107014e-08, "loss": 0.7437, "step": 13585 }, { "epoch": 1.9234798612585828, "grad_norm": 8.718420097645183, "learning_rate": 1.913702828351599e-08, "loss": 0.6784, "step": 13586 }, { "epoch": 1.92362143413322, "grad_norm": 8.085198675485366, "learning_rate": 1.9066306175041792e-08, "loss": 0.7235, "step": 13587 }, { "epoch": 1.9237630070078573, "grad_norm": 8.264066322053143, "learning_rate": 1.899571448740173e-08, "loss": 0.6336, "step": 13588 }, { "epoch": 1.9239045798824945, "grad_norm": 9.850880199162011, "learning_rate": 1.892525322430755e-08, "loss": 0.6334, "step": 13589 }, { "epoch": 1.9240461527571318, "grad_norm": 10.438154554115659, "learning_rate": 1.8854922389462405e-08, "loss": 0.5955, "step": 13590 }, { "epoch": 1.924187725631769, "grad_norm": 8.765445265397737, "learning_rate": 1.8784721986564168e-08, "loss": 0.599, "step": 13591 }, { "epoch": 1.9243292985064062, "grad_norm": 10.337038011848387, "learning_rate": 1.871465201930295e-08, "loss": 0.6167, "step": 13592 }, { "epoch": 1.9244708713810434, "grad_norm": 9.468541712720462, "learning_rate": 1.864471249136218e-08, "loss": 0.6677, "step": 13593 }, { "epoch": 1.9246124442556805, "grad_norm": 10.315973979844658, "learning_rate": 1.8574903406418933e-08, "loss": 0.74, "step": 13594 }, { "epoch": 1.9247540171303177, "grad_norm": 9.683139533392284, "learning_rate": 1.850522476814276e-08, "loss": 0.8025, "step": 13595 }, { "epoch": 1.924895590004955, "grad_norm": 10.510472479307543, "learning_rate": 1.843567658019657e-08, "loss": 0.7071, "step": 13596 }, { "epoch": 1.9250371628795921, "grad_norm": 8.333173928325664, "learning_rate": 1.8366258846236607e-08, "loss": 0.7295, "step": 13597 }, { "epoch": 1.9251787357542294, "grad_norm": 10.159361001893616, "learning_rate": 1.8296971569911893e-08, "loss": 0.7339, "step": 13598 }, { "epoch": 1.9253203086288666, "grad_norm": 10.01718151358326, "learning_rate": 1.822781475486507e-08, "loss": 0.7423, "step": 13599 }, { "epoch": 1.9254618815035038, "grad_norm": 9.084552321460187, "learning_rate": 1.8158788404731565e-08, "loss": 0.6938, "step": 13600 }, { "epoch": 1.925603454378141, "grad_norm": 10.128474263469386, "learning_rate": 1.8089892523139864e-08, "loss": 0.7657, "step": 13601 }, { "epoch": 1.9257450272527783, "grad_norm": 10.564341367672423, "learning_rate": 1.8021127113712066e-08, "loss": 0.7496, "step": 13602 }, { "epoch": 1.9258866001274155, "grad_norm": 10.234501728700113, "learning_rate": 1.7952492180063064e-08, "loss": 0.7111, "step": 13603 }, { "epoch": 1.9260281730020528, "grad_norm": 9.244884416714898, "learning_rate": 1.7883987725800522e-08, "loss": 0.6114, "step": 13604 }, { "epoch": 1.92616974587669, "grad_norm": 9.022603367154964, "learning_rate": 1.7815613754526283e-08, "loss": 0.6274, "step": 13605 }, { "epoch": 1.9263113187513272, "grad_norm": 8.93290538996438, "learning_rate": 1.774737026983414e-08, "loss": 0.6532, "step": 13606 }, { "epoch": 1.9264528916259644, "grad_norm": 9.901518934460139, "learning_rate": 1.7679257275312057e-08, "loss": 0.6727, "step": 13607 }, { "epoch": 1.9265944645006017, "grad_norm": 9.536359574316819, "learning_rate": 1.7611274774540777e-08, "loss": 0.7613, "step": 13608 }, { "epoch": 1.926736037375239, "grad_norm": 8.848143871082732, "learning_rate": 1.7543422771093554e-08, "loss": 0.6033, "step": 13609 }, { "epoch": 1.9268776102498761, "grad_norm": 10.041315560646252, "learning_rate": 1.7475701268537814e-08, "loss": 0.7203, "step": 13610 }, { "epoch": 1.9270191831245134, "grad_norm": 8.963403257771335, "learning_rate": 1.7408110270432932e-08, "loss": 0.641, "step": 13611 }, { "epoch": 1.9271607559991506, "grad_norm": 9.65852324260947, "learning_rate": 1.7340649780333007e-08, "loss": 0.7556, "step": 13612 }, { "epoch": 1.9273023288737878, "grad_norm": 9.759282315241924, "learning_rate": 1.7273319801784094e-08, "loss": 0.659, "step": 13613 }, { "epoch": 1.927443901748425, "grad_norm": 8.988764213176255, "learning_rate": 1.7206120338325305e-08, "loss": 0.6258, "step": 13614 }, { "epoch": 1.9275854746230623, "grad_norm": 8.282666815710876, "learning_rate": 1.7139051393489647e-08, "loss": 0.6046, "step": 13615 }, { "epoch": 1.9277270474976995, "grad_norm": 9.5652609435888, "learning_rate": 1.7072112970802634e-08, "loss": 0.6836, "step": 13616 }, { "epoch": 1.9278686203723367, "grad_norm": 8.159990760947588, "learning_rate": 1.7005305073783396e-08, "loss": 0.6032, "step": 13617 }, { "epoch": 1.928010193246974, "grad_norm": 8.892572333830204, "learning_rate": 1.6938627705943566e-08, "loss": 0.7773, "step": 13618 }, { "epoch": 1.9281517661216112, "grad_norm": 10.042960013731129, "learning_rate": 1.6872080870788955e-08, "loss": 0.7236, "step": 13619 }, { "epoch": 1.9282933389962484, "grad_norm": 8.815010308461428, "learning_rate": 1.6805664571817593e-08, "loss": 0.6626, "step": 13620 }, { "epoch": 1.9284349118708857, "grad_norm": 8.497003363663532, "learning_rate": 1.6739378812520858e-08, "loss": 0.6611, "step": 13621 }, { "epoch": 1.928576484745523, "grad_norm": 10.18754193818055, "learning_rate": 1.667322359638318e-08, "loss": 0.6524, "step": 13622 }, { "epoch": 1.9287180576201601, "grad_norm": 7.6046423636970815, "learning_rate": 1.660719892688262e-08, "loss": 0.6159, "step": 13623 }, { "epoch": 1.9288596304947974, "grad_norm": 10.250365087938826, "learning_rate": 1.6541304807489998e-08, "loss": 0.635, "step": 13624 }, { "epoch": 1.9290012033694344, "grad_norm": 9.56163908629695, "learning_rate": 1.6475541241669224e-08, "loss": 0.6254, "step": 13625 }, { "epoch": 1.9291427762440716, "grad_norm": 8.006466636138907, "learning_rate": 1.6409908232877246e-08, "loss": 0.7364, "step": 13626 }, { "epoch": 1.9292843491187088, "grad_norm": 8.125908434040957, "learning_rate": 1.6344405784564642e-08, "loss": 0.5475, "step": 13627 }, { "epoch": 1.929425921993346, "grad_norm": 9.79076023099733, "learning_rate": 1.6279033900175047e-08, "loss": 0.6868, "step": 13628 }, { "epoch": 1.9295674948679833, "grad_norm": 9.633615180339678, "learning_rate": 1.6213792583144318e-08, "loss": 0.6853, "step": 13629 }, { "epoch": 1.9297090677426205, "grad_norm": 8.735493869805763, "learning_rate": 1.614868183690249e-08, "loss": 0.6515, "step": 13630 }, { "epoch": 1.9298506406172578, "grad_norm": 9.311133777324772, "learning_rate": 1.6083701664872663e-08, "loss": 0.7008, "step": 13631 }, { "epoch": 1.929992213491895, "grad_norm": 10.17899796206477, "learning_rate": 1.6018852070470437e-08, "loss": 0.7583, "step": 13632 }, { "epoch": 1.9301337863665322, "grad_norm": 9.67898441252038, "learning_rate": 1.5954133057105027e-08, "loss": 0.7835, "step": 13633 }, { "epoch": 1.9302753592411694, "grad_norm": 10.184112547319224, "learning_rate": 1.588954462817871e-08, "loss": 0.6891, "step": 13634 }, { "epoch": 1.9304169321158064, "grad_norm": 8.648627605605945, "learning_rate": 1.582508678708683e-08, "loss": 0.5956, "step": 13635 }, { "epoch": 1.9305585049904437, "grad_norm": 10.73903291748981, "learning_rate": 1.5760759537217783e-08, "loss": 0.6564, "step": 13636 }, { "epoch": 1.930700077865081, "grad_norm": 9.305869385344383, "learning_rate": 1.5696562881953314e-08, "loss": 0.7086, "step": 13637 }, { "epoch": 1.9308416507397181, "grad_norm": 10.175750134909242, "learning_rate": 1.563249682466822e-08, "loss": 0.6175, "step": 13638 }, { "epoch": 1.9309832236143554, "grad_norm": 8.064066889894175, "learning_rate": 1.5568561368730082e-08, "loss": 0.6499, "step": 13639 }, { "epoch": 1.9311247964889926, "grad_norm": 9.366990800817392, "learning_rate": 1.5504756517500385e-08, "loss": 0.6782, "step": 13640 }, { "epoch": 1.9312663693636298, "grad_norm": 8.02141803410295, "learning_rate": 1.544108227433311e-08, "loss": 0.7606, "step": 13641 }, { "epoch": 1.931407942238267, "grad_norm": 9.099957107610539, "learning_rate": 1.5377538642575574e-08, "loss": 0.7344, "step": 13642 }, { "epoch": 1.9315495151129043, "grad_norm": 8.742383857546825, "learning_rate": 1.5314125625568167e-08, "loss": 0.6741, "step": 13643 }, { "epoch": 1.9316910879875415, "grad_norm": 8.728531576890212, "learning_rate": 1.5250843226644608e-08, "loss": 0.5999, "step": 13644 }, { "epoch": 1.9318326608621788, "grad_norm": 10.825792906444436, "learning_rate": 1.518769144913168e-08, "loss": 0.6897, "step": 13645 }, { "epoch": 1.931974233736816, "grad_norm": 8.825174078156511, "learning_rate": 1.5124670296348676e-08, "loss": 0.7296, "step": 13646 }, { "epoch": 1.9321158066114532, "grad_norm": 8.303496798459745, "learning_rate": 1.506177977160933e-08, "loss": 0.6901, "step": 13647 }, { "epoch": 1.9322573794860904, "grad_norm": 9.032603231769794, "learning_rate": 1.4999019878219056e-08, "loss": 0.6558, "step": 13648 }, { "epoch": 1.9323989523607277, "grad_norm": 9.86997143154783, "learning_rate": 1.4936390619477715e-08, "loss": 0.7247, "step": 13649 }, { "epoch": 1.932540525235365, "grad_norm": 10.94703337314962, "learning_rate": 1.4873891998677115e-08, "loss": 0.6252, "step": 13650 }, { "epoch": 1.9326820981100021, "grad_norm": 10.044873075238744, "learning_rate": 1.4811524019103241e-08, "loss": 0.6601, "step": 13651 }, { "epoch": 1.9328236709846394, "grad_norm": 9.673325536169603, "learning_rate": 1.4749286684034303e-08, "loss": 0.6148, "step": 13652 }, { "epoch": 1.9329652438592766, "grad_norm": 9.761338512377423, "learning_rate": 1.468717999674213e-08, "loss": 0.7035, "step": 13653 }, { "epoch": 1.9331068167339138, "grad_norm": 9.783194741648934, "learning_rate": 1.4625203960492162e-08, "loss": 0.739, "step": 13654 }, { "epoch": 1.933248389608551, "grad_norm": 9.275745339277114, "learning_rate": 1.4563358578542074e-08, "loss": 0.6538, "step": 13655 }, { "epoch": 1.9333899624831883, "grad_norm": 10.09841744894439, "learning_rate": 1.4501643854142877e-08, "loss": 0.6758, "step": 13656 }, { "epoch": 1.9335315353578255, "grad_norm": 8.244802513926718, "learning_rate": 1.4440059790538918e-08, "loss": 0.7601, "step": 13657 }, { "epoch": 1.9336731082324627, "grad_norm": 10.03979318534948, "learning_rate": 1.4378606390967609e-08, "loss": 0.7092, "step": 13658 }, { "epoch": 1.9338146811071, "grad_norm": 8.329518729385486, "learning_rate": 1.4317283658659698e-08, "loss": 0.6632, "step": 13659 }, { "epoch": 1.9339562539817372, "grad_norm": 12.26146301800062, "learning_rate": 1.4256091596838717e-08, "loss": 0.6502, "step": 13660 }, { "epoch": 1.9340978268563744, "grad_norm": 10.827863593751207, "learning_rate": 1.4195030208721816e-08, "loss": 0.6602, "step": 13661 }, { "epoch": 1.9342393997310117, "grad_norm": 8.007651484661697, "learning_rate": 1.4134099497518372e-08, "loss": 0.6976, "step": 13662 }, { "epoch": 1.934380972605649, "grad_norm": 10.51990438961777, "learning_rate": 1.4073299466431933e-08, "loss": 0.7176, "step": 13663 }, { "epoch": 1.9345225454802861, "grad_norm": 10.578185223586877, "learning_rate": 1.4012630118658555e-08, "loss": 0.6677, "step": 13664 }, { "epoch": 1.9346641183549234, "grad_norm": 8.485163137455771, "learning_rate": 1.395209145738763e-08, "loss": 0.6604, "step": 13665 }, { "epoch": 1.9348056912295604, "grad_norm": 10.325616067236314, "learning_rate": 1.389168348580161e-08, "loss": 0.721, "step": 13666 }, { "epoch": 1.9349472641041976, "grad_norm": 7.774687546710453, "learning_rate": 1.3831406207076014e-08, "loss": 0.695, "step": 13667 }, { "epoch": 1.9350888369788348, "grad_norm": 8.914644909353557, "learning_rate": 1.3771259624379696e-08, "loss": 0.6354, "step": 13668 }, { "epoch": 1.935230409853472, "grad_norm": 9.545707768636081, "learning_rate": 1.3711243740874292e-08, "loss": 0.6755, "step": 13669 }, { "epoch": 1.9353719827281093, "grad_norm": 9.657092208922965, "learning_rate": 1.3651358559715056e-08, "loss": 0.6232, "step": 13670 }, { "epoch": 1.9355135556027465, "grad_norm": 11.380680217023428, "learning_rate": 1.3591604084049747e-08, "loss": 0.7352, "step": 13671 }, { "epoch": 1.9356551284773837, "grad_norm": 10.109508201039976, "learning_rate": 1.3531980317020299e-08, "loss": 0.6586, "step": 13672 }, { "epoch": 1.935796701352021, "grad_norm": 8.880961820046357, "learning_rate": 1.3472487261760313e-08, "loss": 0.7229, "step": 13673 }, { "epoch": 1.9359382742266582, "grad_norm": 10.492922259385319, "learning_rate": 1.3413124921397846e-08, "loss": 0.7702, "step": 13674 }, { "epoch": 1.9360798471012954, "grad_norm": 9.290771532491007, "learning_rate": 1.3353893299053178e-08, "loss": 0.7796, "step": 13675 }, { "epoch": 1.9362214199759324, "grad_norm": 8.731536567807577, "learning_rate": 1.3294792397840206e-08, "loss": 0.6428, "step": 13676 }, { "epoch": 1.9363629928505697, "grad_norm": 10.514072615377248, "learning_rate": 1.323582222086589e-08, "loss": 0.7058, "step": 13677 }, { "epoch": 1.936504565725207, "grad_norm": 9.06640625, "learning_rate": 1.3176982771230252e-08, "loss": 0.6594, "step": 13678 }, { "epoch": 1.9366461385998441, "grad_norm": 8.904604682865235, "learning_rate": 1.311827405202637e-08, "loss": 0.7443, "step": 13679 }, { "epoch": 1.9367877114744814, "grad_norm": 10.037158972622642, "learning_rate": 1.3059696066340388e-08, "loss": 0.6862, "step": 13680 }, { "epoch": 1.9369292843491186, "grad_norm": 10.94342996335212, "learning_rate": 1.3001248817251788e-08, "loss": 0.6541, "step": 13681 }, { "epoch": 1.9370708572237558, "grad_norm": 7.71248628933521, "learning_rate": 1.294293230783339e-08, "loss": 0.7123, "step": 13682 }, { "epoch": 1.937212430098393, "grad_norm": 9.604081843753907, "learning_rate": 1.2884746541150516e-08, "loss": 0.6199, "step": 13683 }, { "epoch": 1.9373540029730303, "grad_norm": 9.058102297066483, "learning_rate": 1.2826691520262114e-08, "loss": 0.6936, "step": 13684 }, { "epoch": 1.9374955758476675, "grad_norm": 8.63281382565035, "learning_rate": 1.2768767248219903e-08, "loss": 0.7068, "step": 13685 }, { "epoch": 1.9376371487223047, "grad_norm": 10.686513040937212, "learning_rate": 1.2710973728069231e-08, "loss": 0.67, "step": 13686 }, { "epoch": 1.937778721596942, "grad_norm": 9.148474610967641, "learning_rate": 1.2653310962847943e-08, "loss": 0.5785, "step": 13687 }, { "epoch": 1.9379202944715792, "grad_norm": 10.613732680406683, "learning_rate": 1.2595778955587501e-08, "loss": 0.7148, "step": 13688 }, { "epoch": 1.9380618673462164, "grad_norm": 9.300958022128395, "learning_rate": 1.2538377709312155e-08, "loss": 0.6425, "step": 13689 }, { "epoch": 1.9382034402208537, "grad_norm": 9.6383685125133, "learning_rate": 1.248110722703949e-08, "loss": 0.7519, "step": 13690 }, { "epoch": 1.938345013095491, "grad_norm": 10.19275553124984, "learning_rate": 1.2423967511780432e-08, "loss": 0.5549, "step": 13691 }, { "epoch": 1.9384865859701281, "grad_norm": 8.904819735073819, "learning_rate": 1.2366958566538689e-08, "loss": 0.6973, "step": 13692 }, { "epoch": 1.9386281588447654, "grad_norm": 7.516227206334062, "learning_rate": 1.231008039431103e-08, "loss": 0.7196, "step": 13693 }, { "epoch": 1.9387697317194026, "grad_norm": 9.2363593072137, "learning_rate": 1.2253332998087286e-08, "loss": 0.6847, "step": 13694 }, { "epoch": 1.9389113045940398, "grad_norm": 11.000013178037205, "learning_rate": 1.2196716380851181e-08, "loss": 0.7298, "step": 13695 }, { "epoch": 1.939052877468677, "grad_norm": 7.09625898851501, "learning_rate": 1.214023054557839e-08, "loss": 0.5946, "step": 13696 }, { "epoch": 1.9391944503433143, "grad_norm": 7.8430708025005815, "learning_rate": 1.2083875495238761e-08, "loss": 0.5665, "step": 13697 }, { "epoch": 1.9393360232179515, "grad_norm": 9.271529016574116, "learning_rate": 1.2027651232794924e-08, "loss": 0.6505, "step": 13698 }, { "epoch": 1.9394775960925887, "grad_norm": 11.30390629049606, "learning_rate": 1.197155776120229e-08, "loss": 0.5944, "step": 13699 }, { "epoch": 1.939619168967226, "grad_norm": 9.19423672485306, "learning_rate": 1.1915595083409615e-08, "loss": 0.6802, "step": 13700 }, { "epoch": 1.9397607418418632, "grad_norm": 10.09934176575694, "learning_rate": 1.1859763202358987e-08, "loss": 0.6914, "step": 13701 }, { "epoch": 1.9399023147165004, "grad_norm": 9.624941738992705, "learning_rate": 1.1804062120985282e-08, "loss": 0.5714, "step": 13702 }, { "epoch": 1.9400438875911377, "grad_norm": 8.080499945555175, "learning_rate": 1.1748491842216714e-08, "loss": 0.6234, "step": 13703 }, { "epoch": 1.940185460465775, "grad_norm": 7.524643909228752, "learning_rate": 1.1693052368974834e-08, "loss": 0.6922, "step": 13704 }, { "epoch": 1.9403270333404121, "grad_norm": 11.22663929275232, "learning_rate": 1.1637743704173698e-08, "loss": 0.7286, "step": 13705 }, { "epoch": 1.9404686062150494, "grad_norm": 9.523435897764426, "learning_rate": 1.1582565850720984e-08, "loss": 0.554, "step": 13706 }, { "epoch": 1.9406101790896866, "grad_norm": 9.029428476558262, "learning_rate": 1.1527518811517146e-08, "loss": 0.7148, "step": 13707 }, { "epoch": 1.9407517519643236, "grad_norm": 11.020064348147187, "learning_rate": 1.1472602589456538e-08, "loss": 0.584, "step": 13708 }, { "epoch": 1.9408933248389608, "grad_norm": 9.446810746297778, "learning_rate": 1.1417817187425461e-08, "loss": 0.7002, "step": 13709 }, { "epoch": 1.941034897713598, "grad_norm": 9.385494207865609, "learning_rate": 1.1363162608304112e-08, "loss": 0.6323, "step": 13710 }, { "epoch": 1.9411764705882353, "grad_norm": 8.043124789798885, "learning_rate": 1.1308638854965748e-08, "loss": 0.7315, "step": 13711 }, { "epoch": 1.9413180434628725, "grad_norm": 10.403252430505207, "learning_rate": 1.1254245930276686e-08, "loss": 0.6446, "step": 13712 }, { "epoch": 1.9414596163375097, "grad_norm": 9.921102562820915, "learning_rate": 1.1199983837096307e-08, "loss": 0.6528, "step": 13713 }, { "epoch": 1.941601189212147, "grad_norm": 8.085410044692868, "learning_rate": 1.1145852578276772e-08, "loss": 0.5877, "step": 13714 }, { "epoch": 1.9417427620867842, "grad_norm": 9.597934468901938, "learning_rate": 1.109185215666414e-08, "loss": 0.6495, "step": 13715 }, { "epoch": 1.9418843349614214, "grad_norm": 10.007222809641863, "learning_rate": 1.103798257509725e-08, "loss": 0.6178, "step": 13716 }, { "epoch": 1.9420259078360587, "grad_norm": 9.637958077356371, "learning_rate": 1.0984243836407449e-08, "loss": 0.6582, "step": 13717 }, { "epoch": 1.9421674807106957, "grad_norm": 9.861719786674072, "learning_rate": 1.0930635943420254e-08, "loss": 0.7337, "step": 13718 }, { "epoch": 1.942309053585333, "grad_norm": 10.087947157694934, "learning_rate": 1.0877158898953411e-08, "loss": 0.7428, "step": 13719 }, { "epoch": 1.9424506264599701, "grad_norm": 10.22381671503633, "learning_rate": 1.082381270581856e-08, "loss": 0.7412, "step": 13720 }, { "epoch": 1.9425921993346074, "grad_norm": 8.528709396630182, "learning_rate": 1.0770597366819847e-08, "loss": 0.6872, "step": 13721 }, { "epoch": 1.9427337722092446, "grad_norm": 8.338192349673655, "learning_rate": 1.0717512884754478e-08, "loss": 0.6863, "step": 13722 }, { "epoch": 1.9428753450838818, "grad_norm": 7.693127317044725, "learning_rate": 1.0664559262413831e-08, "loss": 0.6524, "step": 13723 }, { "epoch": 1.943016917958519, "grad_norm": 9.775999177174135, "learning_rate": 1.061173650258096e-08, "loss": 0.6763, "step": 13724 }, { "epoch": 1.9431584908331563, "grad_norm": 9.385149128805672, "learning_rate": 1.0559044608032809e-08, "loss": 0.7093, "step": 13725 }, { "epoch": 1.9433000637077935, "grad_norm": 9.957046957740651, "learning_rate": 1.0506483581539662e-08, "loss": 0.7206, "step": 13726 }, { "epoch": 1.9434416365824307, "grad_norm": 10.09981276839046, "learning_rate": 1.0454053425864308e-08, "loss": 0.7384, "step": 13727 }, { "epoch": 1.943583209457068, "grad_norm": 9.33124228168636, "learning_rate": 1.0401754143763154e-08, "loss": 0.7021, "step": 13728 }, { "epoch": 1.9437247823317052, "grad_norm": 8.755379031618487, "learning_rate": 1.034958573798539e-08, "loss": 0.5919, "step": 13729 }, { "epoch": 1.9438663552063424, "grad_norm": 8.713061592906213, "learning_rate": 1.0297548211273544e-08, "loss": 0.6376, "step": 13730 }, { "epoch": 1.9440079280809797, "grad_norm": 11.24644549695227, "learning_rate": 1.0245641566363208e-08, "loss": 0.6395, "step": 13731 }, { "epoch": 1.944149500955617, "grad_norm": 7.137010323049727, "learning_rate": 1.0193865805983028e-08, "loss": 0.7095, "step": 13732 }, { "epoch": 1.9442910738302541, "grad_norm": 7.8595645343230975, "learning_rate": 1.0142220932854995e-08, "loss": 0.6338, "step": 13733 }, { "epoch": 1.9444326467048914, "grad_norm": 8.339016260909139, "learning_rate": 1.0090706949693884e-08, "loss": 0.5928, "step": 13734 }, { "epoch": 1.9445742195795286, "grad_norm": 9.967178268655585, "learning_rate": 1.0039323859207529e-08, "loss": 0.7001, "step": 13735 }, { "epoch": 1.9447157924541658, "grad_norm": 9.702489270010453, "learning_rate": 9.988071664097376e-09, "loss": 0.6539, "step": 13736 }, { "epoch": 1.944857365328803, "grad_norm": 9.574456612329847, "learning_rate": 9.93695036705794e-09, "loss": 0.6376, "step": 13737 }, { "epoch": 1.9449989382034403, "grad_norm": 8.35825918005634, "learning_rate": 9.885959970775961e-09, "loss": 0.6962, "step": 13738 }, { "epoch": 1.9451405110780775, "grad_norm": 9.101330720627093, "learning_rate": 9.835100477932624e-09, "loss": 0.6057, "step": 13739 }, { "epoch": 1.9452820839527147, "grad_norm": 8.818757993828322, "learning_rate": 9.784371891201349e-09, "loss": 0.6633, "step": 13740 }, { "epoch": 1.945423656827352, "grad_norm": 10.145403613338361, "learning_rate": 9.733774213248615e-09, "loss": 0.6686, "step": 13741 }, { "epoch": 1.9455652297019892, "grad_norm": 7.746689119990381, "learning_rate": 9.683307446734792e-09, "loss": 0.6486, "step": 13742 }, { "epoch": 1.9457068025766264, "grad_norm": 9.127554340017948, "learning_rate": 9.632971594312478e-09, "loss": 0.7049, "step": 13743 }, { "epoch": 1.9458483754512637, "grad_norm": 11.199994318824418, "learning_rate": 9.582766658628173e-09, "loss": 0.6647, "step": 13744 }, { "epoch": 1.9459899483259009, "grad_norm": 9.71967470731483, "learning_rate": 9.532692642320596e-09, "loss": 0.6571, "step": 13745 }, { "epoch": 1.9461315212005381, "grad_norm": 8.31658055242423, "learning_rate": 9.482749548022641e-09, "loss": 0.6336, "step": 13746 }, { "epoch": 1.9462730940751753, "grad_norm": 10.635114455654122, "learning_rate": 9.43293737835943e-09, "loss": 0.6195, "step": 13747 }, { "epoch": 1.9464146669498126, "grad_norm": 7.6183257362707835, "learning_rate": 9.383256135949704e-09, "loss": 0.5615, "step": 13748 }, { "epoch": 1.9465562398244496, "grad_norm": 12.52138124981906, "learning_rate": 9.333705823404981e-09, "loss": 0.6371, "step": 13749 }, { "epoch": 1.9466978126990868, "grad_norm": 9.11865682783106, "learning_rate": 9.284286443330127e-09, "loss": 0.7381, "step": 13750 }, { "epoch": 1.946839385573724, "grad_norm": 9.454546961750898, "learning_rate": 9.234997998323613e-09, "loss": 0.7112, "step": 13751 }, { "epoch": 1.9469809584483613, "grad_norm": 7.969278235785549, "learning_rate": 9.185840490975594e-09, "loss": 0.5732, "step": 13752 }, { "epoch": 1.9471225313229985, "grad_norm": 10.129332192524041, "learning_rate": 9.136813923871224e-09, "loss": 0.72, "step": 13753 }, { "epoch": 1.9472641041976357, "grad_norm": 10.490222920248353, "learning_rate": 9.087918299586772e-09, "loss": 0.6841, "step": 13754 }, { "epoch": 1.947405677072273, "grad_norm": 9.767740786425527, "learning_rate": 9.039153620693242e-09, "loss": 0.6971, "step": 13755 }, { "epoch": 1.9475472499469102, "grad_norm": 9.166507742688998, "learning_rate": 8.990519889754412e-09, "loss": 0.5769, "step": 13756 }, { "epoch": 1.9476888228215474, "grad_norm": 8.342836937254683, "learning_rate": 8.942017109326295e-09, "loss": 0.7122, "step": 13757 }, { "epoch": 1.9478303956961847, "grad_norm": 9.300229584516021, "learning_rate": 8.893645281959073e-09, "loss": 0.7448, "step": 13758 }, { "epoch": 1.9479719685708217, "grad_norm": 10.062601977477243, "learning_rate": 8.845404410195157e-09, "loss": 0.6402, "step": 13759 }, { "epoch": 1.948113541445459, "grad_norm": 8.642793297109593, "learning_rate": 8.79729449657113e-09, "loss": 0.6683, "step": 13760 }, { "epoch": 1.9482551143200961, "grad_norm": 10.331427572747048, "learning_rate": 8.7493155436158e-09, "loss": 0.6824, "step": 13761 }, { "epoch": 1.9483966871947334, "grad_norm": 10.443996212406706, "learning_rate": 8.701467553851317e-09, "loss": 0.693, "step": 13762 }, { "epoch": 1.9485382600693706, "grad_norm": 10.081495847973892, "learning_rate": 8.65375052979317e-09, "loss": 0.6927, "step": 13763 }, { "epoch": 1.9486798329440078, "grad_norm": 9.237427699073237, "learning_rate": 8.60616447394963e-09, "loss": 0.7107, "step": 13764 }, { "epoch": 1.948821405818645, "grad_norm": 11.185532721911336, "learning_rate": 8.558709388822584e-09, "loss": 0.5866, "step": 13765 }, { "epoch": 1.9489629786932823, "grad_norm": 9.74628592934272, "learning_rate": 8.511385276906148e-09, "loss": 0.6263, "step": 13766 }, { "epoch": 1.9491045515679195, "grad_norm": 7.891564039498251, "learning_rate": 8.464192140688888e-09, "loss": 0.6419, "step": 13767 }, { "epoch": 1.9492461244425567, "grad_norm": 10.011466890486949, "learning_rate": 8.417129982650762e-09, "loss": 0.6672, "step": 13768 }, { "epoch": 1.949387697317194, "grad_norm": 8.565512913691123, "learning_rate": 8.370198805266739e-09, "loss": 0.7103, "step": 13769 }, { "epoch": 1.9495292701918312, "grad_norm": 9.076775613056938, "learning_rate": 8.323398611003176e-09, "loss": 0.6638, "step": 13770 }, { "epoch": 1.9496708430664684, "grad_norm": 8.720695469971444, "learning_rate": 8.27672940232116e-09, "loss": 0.6556, "step": 13771 }, { "epoch": 1.9498124159411057, "grad_norm": 8.296719134747114, "learning_rate": 8.230191181673175e-09, "loss": 0.6202, "step": 13772 }, { "epoch": 1.949953988815743, "grad_norm": 11.050999412392317, "learning_rate": 8.183783951506152e-09, "loss": 0.6995, "step": 13773 }, { "epoch": 1.9500955616903801, "grad_norm": 11.709879455206563, "learning_rate": 8.137507714259806e-09, "loss": 0.7289, "step": 13774 }, { "epoch": 1.9502371345650173, "grad_norm": 10.000166319417675, "learning_rate": 8.09136247236636e-09, "loss": 0.5784, "step": 13775 }, { "epoch": 1.9503787074396546, "grad_norm": 10.123448005690836, "learning_rate": 8.045348228252204e-09, "loss": 0.6207, "step": 13776 }, { "epoch": 1.9505202803142918, "grad_norm": 8.685879123887528, "learning_rate": 7.999464984335959e-09, "loss": 0.6697, "step": 13777 }, { "epoch": 1.950661853188929, "grad_norm": 8.370265704350476, "learning_rate": 7.953712743029585e-09, "loss": 0.7088, "step": 13778 }, { "epoch": 1.9508034260635663, "grad_norm": 8.617396001982296, "learning_rate": 7.908091506738658e-09, "loss": 0.6658, "step": 13779 }, { "epoch": 1.9509449989382035, "grad_norm": 11.107412087047296, "learning_rate": 7.862601277860982e-09, "loss": 0.5484, "step": 13780 }, { "epoch": 1.9510865718128407, "grad_norm": 9.281568117180802, "learning_rate": 7.817242058788255e-09, "loss": 0.6655, "step": 13781 }, { "epoch": 1.951228144687478, "grad_norm": 9.70392343673001, "learning_rate": 7.772013851904681e-09, "loss": 0.6389, "step": 13782 }, { "epoch": 1.9513697175621152, "grad_norm": 10.532869027765164, "learning_rate": 7.72691665958808e-09, "loss": 0.7286, "step": 13783 }, { "epoch": 1.9515112904367524, "grad_norm": 9.218754551773241, "learning_rate": 7.681950484209334e-09, "loss": 0.664, "step": 13784 }, { "epoch": 1.9516528633113897, "grad_norm": 11.08674783144396, "learning_rate": 7.637115328131828e-09, "loss": 0.6957, "step": 13785 }, { "epoch": 1.9517944361860269, "grad_norm": 8.974013216393645, "learning_rate": 7.592411193713123e-09, "loss": 0.7052, "step": 13786 }, { "epoch": 1.9519360090606641, "grad_norm": 8.872995499452301, "learning_rate": 7.547838083302728e-09, "loss": 0.768, "step": 13787 }, { "epoch": 1.9520775819353013, "grad_norm": 9.033346493794706, "learning_rate": 7.503395999244045e-09, "loss": 0.5089, "step": 13788 }, { "epoch": 1.9522191548099386, "grad_norm": 9.64479901979944, "learning_rate": 7.45908494387354e-09, "loss": 0.5977, "step": 13789 }, { "epoch": 1.9523607276845756, "grad_norm": 8.53717617688632, "learning_rate": 7.414904919520183e-09, "loss": 0.6639, "step": 13790 }, { "epoch": 1.9525023005592128, "grad_norm": 7.724043911035178, "learning_rate": 7.3708559285068374e-09, "loss": 0.7021, "step": 13791 }, { "epoch": 1.95264387343385, "grad_norm": 11.095150453528143, "learning_rate": 7.326937973148873e-09, "loss": 0.6839, "step": 13792 }, { "epoch": 1.9527854463084873, "grad_norm": 10.437396203170268, "learning_rate": 7.283151055755555e-09, "loss": 0.7455, "step": 13793 }, { "epoch": 1.9529270191831245, "grad_norm": 8.161347298190396, "learning_rate": 7.23949517862782e-09, "loss": 0.6566, "step": 13794 }, { "epoch": 1.9530685920577617, "grad_norm": 8.540418232809714, "learning_rate": 7.195970344061609e-09, "loss": 0.6196, "step": 13795 }, { "epoch": 1.953210164932399, "grad_norm": 10.005525207002817, "learning_rate": 7.152576554344259e-09, "loss": 0.6351, "step": 13796 }, { "epoch": 1.9533517378070362, "grad_norm": 11.66360724161778, "learning_rate": 7.109313811757279e-09, "loss": 0.6977, "step": 13797 }, { "epoch": 1.9534933106816734, "grad_norm": 10.626965150298746, "learning_rate": 7.066182118574683e-09, "loss": 0.6216, "step": 13798 }, { "epoch": 1.9536348835563107, "grad_norm": 8.398824028169201, "learning_rate": 7.023181477064378e-09, "loss": 0.7164, "step": 13799 }, { "epoch": 1.9537764564309479, "grad_norm": 10.569271648218155, "learning_rate": 6.980311889486502e-09, "loss": 0.6693, "step": 13800 }, { "epoch": 1.953918029305585, "grad_norm": 11.172394683061531, "learning_rate": 6.937573358094529e-09, "loss": 0.6429, "step": 13801 }, { "epoch": 1.9540596021802221, "grad_norm": 9.0382141928535, "learning_rate": 6.894965885135829e-09, "loss": 0.6683, "step": 13802 }, { "epoch": 1.9542011750548594, "grad_norm": 7.584143319381301, "learning_rate": 6.852489472849444e-09, "loss": 0.604, "step": 13803 }, { "epoch": 1.9543427479294966, "grad_norm": 10.520990734370734, "learning_rate": 6.810144123469142e-09, "loss": 0.6516, "step": 13804 }, { "epoch": 1.9544843208041338, "grad_norm": 9.88079972710055, "learning_rate": 6.7679298392200885e-09, "loss": 0.7156, "step": 13805 }, { "epoch": 1.954625893678771, "grad_norm": 9.293929664091191, "learning_rate": 6.7258466223221745e-09, "loss": 0.5633, "step": 13806 }, { "epoch": 1.9547674665534083, "grad_norm": 10.242320463596318, "learning_rate": 6.683894474987518e-09, "loss": 0.6387, "step": 13807 }, { "epoch": 1.9549090394280455, "grad_norm": 8.577921769422021, "learning_rate": 6.6420733994213006e-09, "loss": 0.6639, "step": 13808 }, { "epoch": 1.9550506123026827, "grad_norm": 9.28816392047814, "learning_rate": 6.600383397822319e-09, "loss": 0.6853, "step": 13809 }, { "epoch": 1.95519218517732, "grad_norm": 8.10456979201684, "learning_rate": 6.558824472381875e-09, "loss": 0.6005, "step": 13810 }, { "epoch": 1.9553337580519572, "grad_norm": 10.487788047062983, "learning_rate": 6.5173966252848885e-09, "loss": 0.7101, "step": 13811 }, { "epoch": 1.9554753309265944, "grad_norm": 8.398813581695817, "learning_rate": 6.476099858709062e-09, "loss": 0.6561, "step": 13812 }, { "epoch": 1.9556169038012317, "grad_norm": 11.473771412221959, "learning_rate": 6.4349341748254354e-09, "loss": 0.6977, "step": 13813 }, { "epoch": 1.9557584766758689, "grad_norm": 8.76591918234067, "learning_rate": 6.3938995757981125e-09, "loss": 0.5893, "step": 13814 }, { "epoch": 1.9559000495505061, "grad_norm": 9.339344631948052, "learning_rate": 6.3529960637842555e-09, "loss": 0.6442, "step": 13815 }, { "epoch": 1.9560416224251433, "grad_norm": 9.274869324619594, "learning_rate": 6.3122236409338125e-09, "loss": 0.6549, "step": 13816 }, { "epoch": 1.9561831952997806, "grad_norm": 9.54282364362151, "learning_rate": 6.271582309390622e-09, "loss": 0.6667, "step": 13817 }, { "epoch": 1.9563247681744178, "grad_norm": 9.213668405463691, "learning_rate": 6.231072071290756e-09, "loss": 0.6837, "step": 13818 }, { "epoch": 1.956466341049055, "grad_norm": 11.05922499633366, "learning_rate": 6.190692928764175e-09, "loss": 0.6582, "step": 13819 }, { "epoch": 1.9566079139236923, "grad_norm": 8.19396112025997, "learning_rate": 6.150444883933348e-09, "loss": 0.65, "step": 13820 }, { "epoch": 1.9567494867983295, "grad_norm": 9.633242162077243, "learning_rate": 6.110327938914085e-09, "loss": 0.6555, "step": 13821 }, { "epoch": 1.9568910596729667, "grad_norm": 9.20610643159652, "learning_rate": 6.070342095815529e-09, "loss": 0.6517, "step": 13822 }, { "epoch": 1.957032632547604, "grad_norm": 9.208073216787682, "learning_rate": 6.030487356739334e-09, "loss": 0.7342, "step": 13823 }, { "epoch": 1.9571742054222412, "grad_norm": 9.387652188424955, "learning_rate": 5.990763723780768e-09, "loss": 0.6234, "step": 13824 }, { "epoch": 1.9573157782968784, "grad_norm": 9.648359216534647, "learning_rate": 5.951171199028438e-09, "loss": 0.6222, "step": 13825 }, { "epoch": 1.9574573511715156, "grad_norm": 8.70499289173272, "learning_rate": 5.91170978456318e-09, "loss": 0.6045, "step": 13826 }, { "epoch": 1.9575989240461529, "grad_norm": 9.187711752183507, "learning_rate": 5.8723794824597226e-09, "loss": 0.6034, "step": 13827 }, { "epoch": 1.95774049692079, "grad_norm": 8.674590034581831, "learning_rate": 5.833180294785579e-09, "loss": 0.6937, "step": 13828 }, { "epoch": 1.9578820697954273, "grad_norm": 9.363605950382736, "learning_rate": 5.794112223601322e-09, "loss": 0.6671, "step": 13829 }, { "epoch": 1.9580236426700646, "grad_norm": 10.653887086814546, "learning_rate": 5.755175270961144e-09, "loss": 0.7525, "step": 13830 }, { "epoch": 1.9581652155447018, "grad_norm": 9.388251539438716, "learning_rate": 5.716369438911185e-09, "loss": 0.6422, "step": 13831 }, { "epoch": 1.9583067884193388, "grad_norm": 10.396187347520536, "learning_rate": 5.6776947294923115e-09, "loss": 0.6769, "step": 13832 }, { "epoch": 1.958448361293976, "grad_norm": 10.743175380712268, "learning_rate": 5.639151144736787e-09, "loss": 0.641, "step": 13833 }, { "epoch": 1.9585899341686133, "grad_norm": 9.512935465044919, "learning_rate": 5.6007386866713255e-09, "loss": 0.7048, "step": 13834 }, { "epoch": 1.9587315070432505, "grad_norm": 7.596622881203118, "learning_rate": 5.5624573573154205e-09, "loss": 0.6558, "step": 13835 }, { "epoch": 1.9588730799178877, "grad_norm": 10.97510763823747, "learning_rate": 5.524307158680797e-09, "loss": 0.7278, "step": 13836 }, { "epoch": 1.959014652792525, "grad_norm": 8.2251490060591, "learning_rate": 5.486288092773628e-09, "loss": 0.6666, "step": 13837 }, { "epoch": 1.9591562256671622, "grad_norm": 8.20500118053365, "learning_rate": 5.4484001615920375e-09, "loss": 0.7014, "step": 13838 }, { "epoch": 1.9592977985417994, "grad_norm": 8.644290305329438, "learning_rate": 5.410643367128321e-09, "loss": 0.6117, "step": 13839 }, { "epoch": 1.9594393714164366, "grad_norm": 10.554514738824817, "learning_rate": 5.373017711367001e-09, "loss": 0.6488, "step": 13840 }, { "epoch": 1.9595809442910739, "grad_norm": 8.754928508752048, "learning_rate": 5.335523196285941e-09, "loss": 0.6517, "step": 13841 }, { "epoch": 1.9597225171657109, "grad_norm": 8.942671979957293, "learning_rate": 5.2981598238563415e-09, "loss": 0.6511, "step": 13842 }, { "epoch": 1.9598640900403481, "grad_norm": 8.32522595531384, "learning_rate": 5.260927596042464e-09, "loss": 0.64, "step": 13843 }, { "epoch": 1.9600056629149853, "grad_norm": 8.062996782478448, "learning_rate": 5.223826514801356e-09, "loss": 0.6243, "step": 13844 }, { "epoch": 1.9601472357896226, "grad_norm": 8.246690490902054, "learning_rate": 5.186856582083677e-09, "loss": 0.7076, "step": 13845 }, { "epoch": 1.9602888086642598, "grad_norm": 8.549990684102655, "learning_rate": 5.1500177998325965e-09, "loss": 0.684, "step": 13846 }, { "epoch": 1.960430381538897, "grad_norm": 9.749630260792866, "learning_rate": 5.1133101699848975e-09, "loss": 0.685, "step": 13847 }, { "epoch": 1.9605719544135343, "grad_norm": 7.808084445543621, "learning_rate": 5.076733694470149e-09, "loss": 0.6607, "step": 13848 }, { "epoch": 1.9607135272881715, "grad_norm": 9.566746784720957, "learning_rate": 5.040288375211255e-09, "loss": 0.7208, "step": 13849 }, { "epoch": 1.9608551001628087, "grad_norm": 10.04349025350092, "learning_rate": 5.003974214124186e-09, "loss": 0.6597, "step": 13850 }, { "epoch": 1.960996673037446, "grad_norm": 9.945248444044498, "learning_rate": 4.96779121311769e-09, "loss": 0.6292, "step": 13851 }, { "epoch": 1.9611382459120832, "grad_norm": 8.335217173950182, "learning_rate": 4.931739374093858e-09, "loss": 0.6439, "step": 13852 }, { "epoch": 1.9612798187867204, "grad_norm": 9.144034403551851, "learning_rate": 4.895818698948396e-09, "loss": 0.6107, "step": 13853 }, { "epoch": 1.9614213916613576, "grad_norm": 9.819100462197072, "learning_rate": 4.860029189569237e-09, "loss": 0.6201, "step": 13854 }, { "epoch": 1.9615629645359949, "grad_norm": 8.163196630967542, "learning_rate": 4.824370847837933e-09, "loss": 0.6792, "step": 13855 }, { "epoch": 1.961704537410632, "grad_norm": 8.999345119810064, "learning_rate": 4.788843675629096e-09, "loss": 0.6038, "step": 13856 }, { "epoch": 1.9618461102852693, "grad_norm": 7.619119847046137, "learning_rate": 4.7534476748098416e-09, "loss": 0.7558, "step": 13857 }, { "epoch": 1.9619876831599066, "grad_norm": 8.45439976019444, "learning_rate": 4.7181828472417365e-09, "loss": 0.5859, "step": 13858 }, { "epoch": 1.9621292560345438, "grad_norm": 8.700550842385642, "learning_rate": 4.6830491947777445e-09, "loss": 0.6321, "step": 13859 }, { "epoch": 1.962270828909181, "grad_norm": 8.009034300846071, "learning_rate": 4.648046719265553e-09, "loss": 0.6679, "step": 13860 }, { "epoch": 1.9624124017838183, "grad_norm": 9.846494843006793, "learning_rate": 4.61317542254508e-09, "loss": 0.5788, "step": 13861 }, { "epoch": 1.9625539746584555, "grad_norm": 9.802726397498747, "learning_rate": 4.578435306449025e-09, "loss": 0.6722, "step": 13862 }, { "epoch": 1.9626955475330927, "grad_norm": 10.222700282910516, "learning_rate": 4.543826372803983e-09, "loss": 0.7903, "step": 13863 }, { "epoch": 1.96283712040773, "grad_norm": 8.694609662464279, "learning_rate": 4.50934862342961e-09, "loss": 0.6405, "step": 13864 }, { "epoch": 1.9629786932823672, "grad_norm": 10.184378865496734, "learning_rate": 4.475002060137789e-09, "loss": 0.7385, "step": 13865 }, { "epoch": 1.9631202661570044, "grad_norm": 8.828303458511517, "learning_rate": 4.440786684734577e-09, "loss": 0.6644, "step": 13866 }, { "epoch": 1.9632618390316416, "grad_norm": 9.00643690710809, "learning_rate": 4.406702499018256e-09, "loss": 0.7384, "step": 13867 }, { "epoch": 1.9634034119062789, "grad_norm": 7.994311694587274, "learning_rate": 4.372749504780727e-09, "loss": 0.6408, "step": 13868 }, { "epoch": 1.963544984780916, "grad_norm": 9.830138468732429, "learning_rate": 4.338927703807227e-09, "loss": 0.6358, "step": 13869 }, { "epoch": 1.9636865576555533, "grad_norm": 9.559497237133227, "learning_rate": 4.305237097875226e-09, "loss": 0.6884, "step": 13870 }, { "epoch": 1.9638281305301906, "grad_norm": 8.713418403721946, "learning_rate": 4.271677688756082e-09, "loss": 0.7257, "step": 13871 }, { "epoch": 1.9639697034048278, "grad_norm": 9.174217120039453, "learning_rate": 4.23824947821394e-09, "loss": 0.7544, "step": 13872 }, { "epoch": 1.9641112762794648, "grad_norm": 8.635388296438872, "learning_rate": 4.204952468006007e-09, "loss": 0.6959, "step": 13873 }, { "epoch": 1.964252849154102, "grad_norm": 9.095236954918397, "learning_rate": 4.171786659882826e-09, "loss": 0.7026, "step": 13874 }, { "epoch": 1.9643944220287393, "grad_norm": 9.012933550814976, "learning_rate": 4.138752055588002e-09, "loss": 0.6479, "step": 13875 }, { "epoch": 1.9645359949033765, "grad_norm": 10.268643056042134, "learning_rate": 4.105848656857925e-09, "loss": 0.6204, "step": 13876 }, { "epoch": 1.9646775677780137, "grad_norm": 9.189194295969697, "learning_rate": 4.073076465422321e-09, "loss": 0.6632, "step": 13877 }, { "epoch": 1.964819140652651, "grad_norm": 9.915429131574752, "learning_rate": 4.0404354830042566e-09, "loss": 0.712, "step": 13878 }, { "epoch": 1.9649607135272882, "grad_norm": 9.81868981161434, "learning_rate": 4.0079257113190275e-09, "loss": 0.6458, "step": 13879 }, { "epoch": 1.9651022864019254, "grad_norm": 8.896352950799294, "learning_rate": 3.9755471520763754e-09, "loss": 0.6464, "step": 13880 }, { "epoch": 1.9652438592765626, "grad_norm": 10.60838796437219, "learning_rate": 3.943299806977996e-09, "loss": 0.6777, "step": 13881 }, { "epoch": 1.9653854321511999, "grad_norm": 8.979671408029995, "learning_rate": 3.911183677719199e-09, "loss": 0.7587, "step": 13882 }, { "epoch": 1.965527005025837, "grad_norm": 10.24776713544014, "learning_rate": 3.8791987659883565e-09, "loss": 0.7604, "step": 13883 }, { "epoch": 1.9656685779004741, "grad_norm": 9.19505570279166, "learning_rate": 3.847345073466624e-09, "loss": 0.6202, "step": 13884 }, { "epoch": 1.9658101507751113, "grad_norm": 8.97000634302704, "learning_rate": 3.81562260182905e-09, "loss": 0.7162, "step": 13885 }, { "epoch": 1.9659517236497486, "grad_norm": 9.06736298673452, "learning_rate": 3.784031352742912e-09, "loss": 0.5458, "step": 13886 }, { "epoch": 1.9660932965243858, "grad_norm": 9.41701644202842, "learning_rate": 3.752571327868826e-09, "loss": 0.6856, "step": 13887 }, { "epoch": 1.966234869399023, "grad_norm": 8.055162980025024, "learning_rate": 3.721242528861024e-09, "loss": 0.5673, "step": 13888 }, { "epoch": 1.9663764422736603, "grad_norm": 7.861600102049886, "learning_rate": 3.6900449573659682e-09, "loss": 0.6167, "step": 13889 }, { "epoch": 1.9665180151482975, "grad_norm": 9.21994372898895, "learning_rate": 3.6589786150240112e-09, "loss": 0.6402, "step": 13890 }, { "epoch": 1.9666595880229347, "grad_norm": 10.47046358118782, "learning_rate": 3.6280435034682927e-09, "loss": 0.664, "step": 13891 }, { "epoch": 1.966801160897572, "grad_norm": 9.706993523568539, "learning_rate": 3.597239624325011e-09, "loss": 0.6291, "step": 13892 }, { "epoch": 1.9669427337722092, "grad_norm": 11.210315817366505, "learning_rate": 3.5665669792131484e-09, "loss": 0.757, "step": 13893 }, { "epoch": 1.9670843066468464, "grad_norm": 10.50174989196834, "learning_rate": 3.5360255697455826e-09, "loss": 0.6334, "step": 13894 }, { "epoch": 1.9672258795214836, "grad_norm": 8.878915272350234, "learning_rate": 3.505615397527695e-09, "loss": 0.6155, "step": 13895 }, { "epoch": 1.9673674523961209, "grad_norm": 9.764377264039535, "learning_rate": 3.4753364641582076e-09, "loss": 0.6844, "step": 13896 }, { "epoch": 1.967509025270758, "grad_norm": 8.681161008616275, "learning_rate": 3.445188771228625e-09, "loss": 0.6373, "step": 13897 }, { "epoch": 1.9676505981453953, "grad_norm": 8.859860253878823, "learning_rate": 3.4151723203240673e-09, "loss": 0.6255, "step": 13898 }, { "epoch": 1.9677921710200326, "grad_norm": 10.507176807560251, "learning_rate": 3.385287113022717e-09, "loss": 0.6831, "step": 13899 }, { "epoch": 1.9679337438946698, "grad_norm": 9.238851891731123, "learning_rate": 3.3555331508947076e-09, "loss": 0.6392, "step": 13900 }, { "epoch": 1.968075316769307, "grad_norm": 8.908780993165097, "learning_rate": 3.325910435505175e-09, "loss": 0.7083, "step": 13901 }, { "epoch": 1.9682168896439443, "grad_norm": 9.253759496514697, "learning_rate": 3.296418968410653e-09, "loss": 0.6881, "step": 13902 }, { "epoch": 1.9683584625185815, "grad_norm": 8.257522852210895, "learning_rate": 3.2670587511618448e-09, "loss": 0.5766, "step": 13903 }, { "epoch": 1.9685000353932187, "grad_norm": 8.793093259752624, "learning_rate": 3.2378297853022377e-09, "loss": 0.7465, "step": 13904 }, { "epoch": 1.968641608267856, "grad_norm": 9.184908754480091, "learning_rate": 3.208732072368104e-09, "loss": 0.6941, "step": 13905 }, { "epoch": 1.9687831811424932, "grad_norm": 10.09223931407488, "learning_rate": 3.179765613889052e-09, "loss": 0.7071, "step": 13906 }, { "epoch": 1.9689247540171304, "grad_norm": 10.47816813409143, "learning_rate": 3.150930411388309e-09, "loss": 0.6519, "step": 13907 }, { "epoch": 1.9690663268917676, "grad_norm": 9.1984154870904, "learning_rate": 3.1222264663813285e-09, "loss": 0.6492, "step": 13908 }, { "epoch": 1.9692078997664049, "grad_norm": 8.39442082363968, "learning_rate": 3.0936537803771814e-09, "loss": 0.5508, "step": 13909 }, { "epoch": 1.969349472641042, "grad_norm": 8.362313297850436, "learning_rate": 3.065212354878e-09, "loss": 0.6511, "step": 13910 }, { "epoch": 1.9694910455156793, "grad_norm": 9.910195512584817, "learning_rate": 3.036902191378699e-09, "loss": 0.684, "step": 13911 }, { "epoch": 1.9696326183903166, "grad_norm": 9.659954064301449, "learning_rate": 3.0087232913675325e-09, "loss": 0.7149, "step": 13912 }, { "epoch": 1.9697741912649538, "grad_norm": 10.825842238295229, "learning_rate": 2.980675656326093e-09, "loss": 0.6728, "step": 13913 }, { "epoch": 1.969915764139591, "grad_norm": 8.43145173483983, "learning_rate": 2.9527592877284793e-09, "loss": 0.6515, "step": 13914 }, { "epoch": 1.970057337014228, "grad_norm": 10.023306199817153, "learning_rate": 2.924974187042684e-09, "loss": 0.7455, "step": 13915 }, { "epoch": 1.9701989098888653, "grad_norm": 11.751658870391648, "learning_rate": 2.8973203557289274e-09, "loss": 0.6694, "step": 13916 }, { "epoch": 1.9703404827635025, "grad_norm": 8.753660471783986, "learning_rate": 2.869797795241325e-09, "loss": 0.65, "step": 13917 }, { "epoch": 1.9704820556381397, "grad_norm": 8.810849298941184, "learning_rate": 2.8424065070262186e-09, "loss": 0.6284, "step": 13918 }, { "epoch": 1.970623628512777, "grad_norm": 8.82940135270055, "learning_rate": 2.8151464925241235e-09, "loss": 0.6948, "step": 13919 }, { "epoch": 1.9707652013874142, "grad_norm": 10.198761689059824, "learning_rate": 2.7880177531677822e-09, "loss": 0.7682, "step": 13920 }, { "epoch": 1.9709067742620514, "grad_norm": 10.826065638291219, "learning_rate": 2.7610202903829986e-09, "loss": 0.6423, "step": 13921 }, { "epoch": 1.9710483471366886, "grad_norm": 8.657101806154811, "learning_rate": 2.734154105589748e-09, "loss": 0.6589, "step": 13922 }, { "epoch": 1.9711899200113259, "grad_norm": 9.351258186099127, "learning_rate": 2.7074192001996792e-09, "loss": 0.737, "step": 13923 }, { "epoch": 1.971331492885963, "grad_norm": 9.579621190364414, "learning_rate": 2.680815575618889e-09, "loss": 0.7438, "step": 13924 }, { "epoch": 1.9714730657606, "grad_norm": 9.422106991469127, "learning_rate": 2.654343233245149e-09, "loss": 0.5951, "step": 13925 }, { "epoch": 1.9716146386352373, "grad_norm": 11.002059743680528, "learning_rate": 2.6280021744706783e-09, "loss": 0.7247, "step": 13926 }, { "epoch": 1.9717562115098746, "grad_norm": 9.596345730505677, "learning_rate": 2.6017924006799254e-09, "loss": 0.6633, "step": 13927 }, { "epoch": 1.9718977843845118, "grad_norm": 9.249360345941877, "learning_rate": 2.5757139132509545e-09, "loss": 0.655, "step": 13928 }, { "epoch": 1.972039357259149, "grad_norm": 9.321673285117466, "learning_rate": 2.5497667135546135e-09, "loss": 0.6971, "step": 13929 }, { "epoch": 1.9721809301337863, "grad_norm": 9.267418210224632, "learning_rate": 2.5239508029545332e-09, "loss": 0.6186, "step": 13930 }, { "epoch": 1.9723225030084235, "grad_norm": 8.628334617071834, "learning_rate": 2.4982661828085175e-09, "loss": 0.7005, "step": 13931 }, { "epoch": 1.9724640758830607, "grad_norm": 9.900701370826075, "learning_rate": 2.4727128544660415e-09, "loss": 0.6792, "step": 13932 }, { "epoch": 1.972605648757698, "grad_norm": 12.85795872839592, "learning_rate": 2.447290819271031e-09, "loss": 0.6386, "step": 13933 }, { "epoch": 1.9727472216323352, "grad_norm": 8.618820851832144, "learning_rate": 2.4220000785599162e-09, "loss": 0.6591, "step": 13934 }, { "epoch": 1.9728887945069724, "grad_norm": 9.148709365623144, "learning_rate": 2.3968406336616344e-09, "loss": 0.7296, "step": 13935 }, { "epoch": 1.9730303673816096, "grad_norm": 8.777921758616365, "learning_rate": 2.3718124858992943e-09, "loss": 0.5422, "step": 13936 }, { "epoch": 1.9731719402562469, "grad_norm": 10.654305647051842, "learning_rate": 2.3469156365885095e-09, "loss": 0.7692, "step": 13937 }, { "epoch": 1.973313513130884, "grad_norm": 8.875471075746134, "learning_rate": 2.3221500870379552e-09, "loss": 0.6786, "step": 13938 }, { "epoch": 1.9734550860055213, "grad_norm": 7.461628219826955, "learning_rate": 2.2975158385496466e-09, "loss": 0.5951, "step": 13939 }, { "epoch": 1.9735966588801586, "grad_norm": 7.793144725065136, "learning_rate": 2.273012892418658e-09, "loss": 0.7117, "step": 13940 }, { "epoch": 1.9737382317547958, "grad_norm": 8.242339305474053, "learning_rate": 2.248641249932848e-09, "loss": 0.614, "step": 13941 }, { "epoch": 1.973879804629433, "grad_norm": 8.796107419609237, "learning_rate": 2.2244009123734145e-09, "loss": 0.703, "step": 13942 }, { "epoch": 1.9740213775040703, "grad_norm": 8.627576360284449, "learning_rate": 2.200291881015171e-09, "loss": 0.7553, "step": 13943 }, { "epoch": 1.9741629503787075, "grad_norm": 10.813731217156919, "learning_rate": 2.1763141571248813e-09, "loss": 0.6438, "step": 13944 }, { "epoch": 1.9743045232533447, "grad_norm": 9.051320303190968, "learning_rate": 2.152467741963482e-09, "loss": 0.7173, "step": 13945 }, { "epoch": 1.974446096127982, "grad_norm": 8.853312899248065, "learning_rate": 2.1287526367844147e-09, "loss": 0.5903, "step": 13946 }, { "epoch": 1.9745876690026192, "grad_norm": 9.7839242612663, "learning_rate": 2.105168842834182e-09, "loss": 0.6669, "step": 13947 }, { "epoch": 1.9747292418772564, "grad_norm": 9.227725040541038, "learning_rate": 2.081716361352626e-09, "loss": 0.623, "step": 13948 }, { "epoch": 1.9748708147518936, "grad_norm": 9.488683686530358, "learning_rate": 2.058395193572926e-09, "loss": 0.6659, "step": 13949 }, { "epoch": 1.9750123876265309, "grad_norm": 7.487658455370885, "learning_rate": 2.0352053407207696e-09, "loss": 0.7406, "step": 13950 }, { "epoch": 1.975153960501168, "grad_norm": 10.033026136931742, "learning_rate": 2.0121468040151803e-09, "loss": 0.6923, "step": 13951 }, { "epoch": 1.9752955333758053, "grad_norm": 8.73538076652818, "learning_rate": 1.9892195846685227e-09, "loss": 0.6509, "step": 13952 }, { "epoch": 1.9754371062504426, "grad_norm": 9.69057765187663, "learning_rate": 1.9664236838862204e-09, "loss": 0.585, "step": 13953 }, { "epoch": 1.9755786791250798, "grad_norm": 9.761798469062242, "learning_rate": 1.9437591028662053e-09, "loss": 0.6881, "step": 13954 }, { "epoch": 1.975720251999717, "grad_norm": 7.333005666637221, "learning_rate": 1.921225842800023e-09, "loss": 0.6391, "step": 13955 }, { "epoch": 1.975861824874354, "grad_norm": 8.405548860600907, "learning_rate": 1.8988239048725598e-09, "loss": 0.6939, "step": 13956 }, { "epoch": 1.9760033977489913, "grad_norm": 9.723304785145029, "learning_rate": 1.876553290261207e-09, "loss": 0.7063, "step": 13957 }, { "epoch": 1.9761449706236285, "grad_norm": 10.293010629065657, "learning_rate": 1.854414000136695e-09, "loss": 0.6917, "step": 13958 }, { "epoch": 1.9762865434982657, "grad_norm": 9.957730028932374, "learning_rate": 1.8324060356630925e-09, "loss": 0.7026, "step": 13959 }, { "epoch": 1.976428116372903, "grad_norm": 8.191777866212865, "learning_rate": 1.8105293979972516e-09, "loss": 0.7144, "step": 13960 }, { "epoch": 1.9765696892475402, "grad_norm": 7.515250912435024, "learning_rate": 1.7887840882888085e-09, "loss": 0.5942, "step": 13961 }, { "epoch": 1.9767112621221774, "grad_norm": 8.908575028892677, "learning_rate": 1.7671701076815706e-09, "loss": 0.676, "step": 13962 }, { "epoch": 1.9768528349968146, "grad_norm": 8.883434317505674, "learning_rate": 1.7456874573112958e-09, "loss": 0.6647, "step": 13963 }, { "epoch": 1.9769944078714519, "grad_norm": 9.311602453207612, "learning_rate": 1.7243361383076363e-09, "loss": 0.6941, "step": 13964 }, { "epoch": 1.977135980746089, "grad_norm": 9.79641630081466, "learning_rate": 1.703116151792472e-09, "loss": 0.6286, "step": 13965 }, { "epoch": 1.977277553620726, "grad_norm": 7.899570704517672, "learning_rate": 1.6820274988818552e-09, "loss": 0.6778, "step": 13966 }, { "epoch": 1.9774191264953633, "grad_norm": 8.38677014786882, "learning_rate": 1.6610701806843432e-09, "loss": 0.6631, "step": 13967 }, { "epoch": 1.9775606993700006, "grad_norm": 10.359861796867529, "learning_rate": 1.6402441983015548e-09, "loss": 0.6937, "step": 13968 }, { "epoch": 1.9777022722446378, "grad_norm": 11.268245228290976, "learning_rate": 1.6195495528281701e-09, "loss": 0.8069, "step": 13969 }, { "epoch": 1.977843845119275, "grad_norm": 10.301876630467625, "learning_rate": 1.5989862453522075e-09, "loss": 0.7114, "step": 13970 }, { "epoch": 1.9779854179939123, "grad_norm": 9.142732295818783, "learning_rate": 1.5785542769544692e-09, "loss": 0.7202, "step": 13971 }, { "epoch": 1.9781269908685495, "grad_norm": 8.576313098430157, "learning_rate": 1.5582536487093737e-09, "loss": 0.6186, "step": 13972 }, { "epoch": 1.9782685637431867, "grad_norm": 10.444740936002905, "learning_rate": 1.5380843616841223e-09, "loss": 0.7658, "step": 13973 }, { "epoch": 1.978410136617824, "grad_norm": 10.010659830485661, "learning_rate": 1.518046416938701e-09, "loss": 0.6715, "step": 13974 }, { "epoch": 1.9785517094924612, "grad_norm": 10.210327036791828, "learning_rate": 1.4981398155267112e-09, "loss": 0.7011, "step": 13975 }, { "epoch": 1.9786932823670984, "grad_norm": 9.049523474861553, "learning_rate": 1.4783645584942607e-09, "loss": 0.6732, "step": 13976 }, { "epoch": 1.9788348552417356, "grad_norm": 11.282357370230294, "learning_rate": 1.4587206468816285e-09, "loss": 0.6918, "step": 13977 }, { "epoch": 1.9789764281163729, "grad_norm": 8.13349587741878, "learning_rate": 1.4392080817207666e-09, "loss": 0.6438, "step": 13978 }, { "epoch": 1.97911800099101, "grad_norm": 7.519529981736906, "learning_rate": 1.4198268640377987e-09, "loss": 0.6802, "step": 13979 }, { "epoch": 1.9792595738656473, "grad_norm": 9.248207949692834, "learning_rate": 1.4005769948516324e-09, "loss": 0.5681, "step": 13980 }, { "epoch": 1.9794011467402846, "grad_norm": 8.273970949663704, "learning_rate": 1.381458475173958e-09, "loss": 0.6939, "step": 13981 }, { "epoch": 1.9795427196149218, "grad_norm": 8.689934478618108, "learning_rate": 1.3624713060100825e-09, "loss": 0.6609, "step": 13982 }, { "epoch": 1.979684292489559, "grad_norm": 9.517062474808814, "learning_rate": 1.343615488357819e-09, "loss": 0.6459, "step": 13983 }, { "epoch": 1.9798258653641962, "grad_norm": 8.296567864489395, "learning_rate": 1.324891023208874e-09, "loss": 0.6692, "step": 13984 }, { "epoch": 1.9799674382388335, "grad_norm": 8.536279334720197, "learning_rate": 1.306297911547183e-09, "loss": 0.6272, "step": 13985 }, { "epoch": 1.9801090111134707, "grad_norm": 8.99465317316718, "learning_rate": 1.287836154350297e-09, "loss": 0.6615, "step": 13986 }, { "epoch": 1.980250583988108, "grad_norm": 8.547410575084275, "learning_rate": 1.2695057525888288e-09, "loss": 0.6495, "step": 13987 }, { "epoch": 1.9803921568627452, "grad_norm": 8.324756735606828, "learning_rate": 1.2513067072261742e-09, "loss": 0.6594, "step": 13988 }, { "epoch": 1.9805337297373824, "grad_norm": 9.123396223039867, "learning_rate": 1.2332390192193456e-09, "loss": 0.6051, "step": 13989 }, { "epoch": 1.9806753026120196, "grad_norm": 8.359228514341527, "learning_rate": 1.215302689517861e-09, "loss": 0.6629, "step": 13990 }, { "epoch": 1.9808168754866569, "grad_norm": 10.079836867087177, "learning_rate": 1.1974977190645777e-09, "loss": 0.7232, "step": 13991 }, { "epoch": 1.980958448361294, "grad_norm": 7.973815265537321, "learning_rate": 1.1798241087959684e-09, "loss": 0.626, "step": 13992 }, { "epoch": 1.9811000212359313, "grad_norm": 8.043601901833268, "learning_rate": 1.1622818596407348e-09, "loss": 0.5779, "step": 13993 }, { "epoch": 1.9812415941105685, "grad_norm": 8.175617010414355, "learning_rate": 1.1448709725209173e-09, "loss": 0.6055, "step": 13994 }, { "epoch": 1.9813831669852058, "grad_norm": 10.633027040221704, "learning_rate": 1.1275914483521721e-09, "loss": 0.6403, "step": 13995 }, { "epoch": 1.981524739859843, "grad_norm": 9.289647269230342, "learning_rate": 1.1104432880429394e-09, "loss": 0.6966, "step": 13996 }, { "epoch": 1.9816663127344802, "grad_norm": 7.38179478972607, "learning_rate": 1.0934264924941651e-09, "loss": 0.632, "step": 13997 }, { "epoch": 1.9818078856091172, "grad_norm": 11.335630595431986, "learning_rate": 1.076541062600689e-09, "loss": 0.6637, "step": 13998 }, { "epoch": 1.9819494584837545, "grad_norm": 8.731009228935777, "learning_rate": 1.059786999250134e-09, "loss": 0.682, "step": 13999 }, { "epoch": 1.9820910313583917, "grad_norm": 9.299912105678196, "learning_rate": 1.0431643033234629e-09, "loss": 0.724, "step": 14000 }, { "epoch": 1.982232604233029, "grad_norm": 8.941141733745734, "learning_rate": 1.0266729756944205e-09, "loss": 0.5788, "step": 14001 }, { "epoch": 1.9823741771076662, "grad_norm": 8.863706898797645, "learning_rate": 1.0103130172295362e-09, "loss": 0.725, "step": 14002 }, { "epoch": 1.9825157499823034, "grad_norm": 7.830689518410256, "learning_rate": 9.940844287895101e-10, "loss": 0.6005, "step": 14003 }, { "epoch": 1.9826573228569406, "grad_norm": 8.958248782682997, "learning_rate": 9.779872112267163e-10, "loss": 0.6765, "step": 14004 }, { "epoch": 1.9827988957315779, "grad_norm": 9.277693249995693, "learning_rate": 9.62021365388255e-10, "loss": 0.6702, "step": 14005 }, { "epoch": 1.982940468606215, "grad_norm": 8.974462942562184, "learning_rate": 9.461868921126216e-10, "loss": 0.646, "step": 14006 }, { "epoch": 1.9830820414808523, "grad_norm": 10.042218162830501, "learning_rate": 9.304837922327614e-10, "loss": 0.6176, "step": 14007 }, { "epoch": 1.9832236143554893, "grad_norm": 11.666048814671104, "learning_rate": 9.149120665738476e-10, "loss": 0.6098, "step": 14008 }, { "epoch": 1.9833651872301266, "grad_norm": 8.46226427659948, "learning_rate": 8.994717159546695e-10, "loss": 0.6415, "step": 14009 }, { "epoch": 1.9835067601047638, "grad_norm": 9.256064411700834, "learning_rate": 8.841627411870779e-10, "loss": 0.7541, "step": 14010 }, { "epoch": 1.983648332979401, "grad_norm": 10.44408606417145, "learning_rate": 8.689851430754293e-10, "loss": 0.6655, "step": 14011 }, { "epoch": 1.9837899058540382, "grad_norm": 8.846034102123815, "learning_rate": 8.539389224176964e-10, "loss": 0.7154, "step": 14012 }, { "epoch": 1.9839314787286755, "grad_norm": 10.273749772464143, "learning_rate": 8.390240800051907e-10, "loss": 0.6285, "step": 14013 }, { "epoch": 1.9840730516033127, "grad_norm": 8.890394586617724, "learning_rate": 8.242406166214522e-10, "loss": 0.7102, "step": 14014 }, { "epoch": 1.98421462447795, "grad_norm": 7.340539108559867, "learning_rate": 8.095885330441921e-10, "loss": 0.6486, "step": 14015 }, { "epoch": 1.9843561973525872, "grad_norm": 10.710843476064339, "learning_rate": 7.950678300430725e-10, "loss": 0.6645, "step": 14016 }, { "epoch": 1.9844977702272244, "grad_norm": 9.658162245553022, "learning_rate": 7.806785083819268e-10, "loss": 0.7854, "step": 14017 }, { "epoch": 1.9846393431018616, "grad_norm": 10.782896231312527, "learning_rate": 7.664205688170945e-10, "loss": 0.6941, "step": 14018 }, { "epoch": 1.9847809159764989, "grad_norm": 8.841588703162936, "learning_rate": 7.52294012097976e-10, "loss": 0.7142, "step": 14019 }, { "epoch": 1.984922488851136, "grad_norm": 9.092549772464602, "learning_rate": 7.382988389673107e-10, "loss": 0.6246, "step": 14020 }, { "epoch": 1.9850640617257733, "grad_norm": 10.077586362180531, "learning_rate": 7.244350501606212e-10, "loss": 0.643, "step": 14021 }, { "epoch": 1.9852056346004106, "grad_norm": 9.594751560479521, "learning_rate": 7.10702646406769e-10, "loss": 0.698, "step": 14022 }, { "epoch": 1.9853472074750478, "grad_norm": 10.105048596672521, "learning_rate": 6.971016284279541e-10, "loss": 0.6915, "step": 14023 }, { "epoch": 1.985488780349685, "grad_norm": 9.817899542581532, "learning_rate": 6.836319969388828e-10, "loss": 0.6825, "step": 14024 }, { "epoch": 1.9856303532243222, "grad_norm": 10.326606858213474, "learning_rate": 6.702937526475994e-10, "loss": 0.6709, "step": 14025 }, { "epoch": 1.9857719260989595, "grad_norm": 8.741260632836196, "learning_rate": 6.570868962554877e-10, "loss": 0.7563, "step": 14026 }, { "epoch": 1.9859134989735967, "grad_norm": 9.48393135352935, "learning_rate": 6.440114284567145e-10, "loss": 0.6357, "step": 14027 }, { "epoch": 1.986055071848234, "grad_norm": 8.476704257887237, "learning_rate": 6.310673499387854e-10, "loss": 0.6685, "step": 14028 }, { "epoch": 1.9861966447228712, "grad_norm": 8.684997637303129, "learning_rate": 6.182546613817119e-10, "loss": 0.6591, "step": 14029 }, { "epoch": 1.9863382175975084, "grad_norm": 9.907748617421397, "learning_rate": 6.055733634596772e-10, "loss": 0.6992, "step": 14030 }, { "epoch": 1.9864797904721456, "grad_norm": 8.7909116978277, "learning_rate": 5.930234568388149e-10, "loss": 0.7211, "step": 14031 }, { "epoch": 1.9866213633467829, "grad_norm": 9.189146971188647, "learning_rate": 5.806049421791527e-10, "loss": 0.6258, "step": 14032 }, { "epoch": 1.98676293622142, "grad_norm": 8.268247826422513, "learning_rate": 5.683178201335015e-10, "loss": 0.6075, "step": 14033 }, { "epoch": 1.9869045090960573, "grad_norm": 9.083192970054638, "learning_rate": 5.561620913477339e-10, "loss": 0.7392, "step": 14034 }, { "epoch": 1.9870460819706945, "grad_norm": 8.390103059301664, "learning_rate": 5.44137756460783e-10, "loss": 0.6178, "step": 14035 }, { "epoch": 1.9871876548453318, "grad_norm": 9.746539553306095, "learning_rate": 5.322448161049209e-10, "loss": 0.7003, "step": 14036 }, { "epoch": 1.987329227719969, "grad_norm": 7.440275299557344, "learning_rate": 5.204832709052032e-10, "loss": 0.6114, "step": 14037 }, { "epoch": 1.9874708005946062, "grad_norm": 7.448215234992043, "learning_rate": 5.088531214800241e-10, "loss": 0.7554, "step": 14038 }, { "epoch": 1.9876123734692432, "grad_norm": 9.069212223317, "learning_rate": 4.973543684408389e-10, "loss": 0.6733, "step": 14039 }, { "epoch": 1.9877539463438805, "grad_norm": 11.367645904631527, "learning_rate": 4.859870123918864e-10, "loss": 0.7274, "step": 14040 }, { "epoch": 1.9878955192185177, "grad_norm": 9.412992272542734, "learning_rate": 4.747510539307442e-10, "loss": 0.7774, "step": 14041 }, { "epoch": 1.988037092093155, "grad_norm": 8.219449901876228, "learning_rate": 4.636464936483287e-10, "loss": 0.7343, "step": 14042 }, { "epoch": 1.9881786649677922, "grad_norm": 9.99524079559926, "learning_rate": 4.5267333212833943e-10, "loss": 0.6496, "step": 14043 }, { "epoch": 1.9883202378424294, "grad_norm": 9.21505445988956, "learning_rate": 4.418315699475373e-10, "loss": 0.7458, "step": 14044 }, { "epoch": 1.9884618107170666, "grad_norm": 10.975024219390631, "learning_rate": 4.311212076760218e-10, "loss": 0.6646, "step": 14045 }, { "epoch": 1.9886033835917039, "grad_norm": 10.313727658342023, "learning_rate": 4.2054224587667615e-10, "loss": 0.6953, "step": 14046 }, { "epoch": 1.988744956466341, "grad_norm": 8.80136638783824, "learning_rate": 4.1009468510544434e-10, "loss": 0.6918, "step": 14047 }, { "epoch": 1.9888865293409783, "grad_norm": 8.772458274048494, "learning_rate": 3.9977852591188694e-10, "loss": 0.6786, "step": 14048 }, { "epoch": 1.9890281022156153, "grad_norm": 9.941750439154747, "learning_rate": 3.8959376883834776e-10, "loss": 0.6825, "step": 14049 }, { "epoch": 1.9891696750902526, "grad_norm": 10.387306727551955, "learning_rate": 3.795404144199544e-10, "loss": 0.6834, "step": 14050 }, { "epoch": 1.9893112479648898, "grad_norm": 11.009424507144578, "learning_rate": 3.696184631851729e-10, "loss": 0.7033, "step": 14051 }, { "epoch": 1.989452820839527, "grad_norm": 9.813942189747754, "learning_rate": 3.5982791565608575e-10, "loss": 0.6591, "step": 14052 }, { "epoch": 1.9895943937141642, "grad_norm": 9.116658617907591, "learning_rate": 3.501687723467262e-10, "loss": 0.6154, "step": 14053 }, { "epoch": 1.9897359665888015, "grad_norm": 9.803163399346017, "learning_rate": 3.4064103376529876e-10, "loss": 0.7165, "step": 14054 }, { "epoch": 1.9898775394634387, "grad_norm": 8.500318184675363, "learning_rate": 3.3124470041251413e-10, "loss": 0.5848, "step": 14055 }, { "epoch": 1.990019112338076, "grad_norm": 8.571726485025174, "learning_rate": 3.219797727824214e-10, "loss": 0.6529, "step": 14056 }, { "epoch": 1.9901606852127132, "grad_norm": 8.805666607605405, "learning_rate": 3.128462513618535e-10, "loss": 0.6647, "step": 14057 }, { "epoch": 1.9903022580873504, "grad_norm": 8.789145832938276, "learning_rate": 3.0384413663125944e-10, "loss": 0.536, "step": 14058 }, { "epoch": 1.9904438309619876, "grad_norm": 10.96661060477385, "learning_rate": 2.9497342906387173e-10, "loss": 0.7527, "step": 14059 }, { "epoch": 1.9905854038366249, "grad_norm": 10.260607557247653, "learning_rate": 2.862341291257065e-10, "loss": 0.6398, "step": 14060 }, { "epoch": 1.990726976711262, "grad_norm": 9.819162621621242, "learning_rate": 2.776262372761185e-10, "loss": 0.7331, "step": 14061 }, { "epoch": 1.9908685495858993, "grad_norm": 8.534033464479242, "learning_rate": 2.6914975396807873e-10, "loss": 0.6509, "step": 14062 }, { "epoch": 1.9910101224605365, "grad_norm": 10.49783738889825, "learning_rate": 2.6080467964706424e-10, "loss": 0.6469, "step": 14063 }, { "epoch": 1.9911516953351738, "grad_norm": 6.835181598832153, "learning_rate": 2.525910147516131e-10, "loss": 0.6968, "step": 14064 }, { "epoch": 1.991293268209811, "grad_norm": 9.999214522984301, "learning_rate": 2.4450875971332445e-10, "loss": 0.7068, "step": 14065 }, { "epoch": 1.9914348410844482, "grad_norm": 8.661049073705154, "learning_rate": 2.3655791495769134e-10, "loss": 0.6296, "step": 14066 }, { "epoch": 1.9915764139590855, "grad_norm": 9.845694794866045, "learning_rate": 2.2873848090188e-10, "loss": 0.6407, "step": 14067 }, { "epoch": 1.9917179868337227, "grad_norm": 10.700077619894977, "learning_rate": 2.2105045795778323e-10, "loss": 0.6532, "step": 14068 }, { "epoch": 1.99185955970836, "grad_norm": 9.086436263717992, "learning_rate": 2.134938465289671e-10, "loss": 0.6645, "step": 14069 }, { "epoch": 1.9920011325829972, "grad_norm": 9.967321024393682, "learning_rate": 2.0606864701289142e-10, "loss": 0.7026, "step": 14070 }, { "epoch": 1.9921427054576344, "grad_norm": 10.649097716028685, "learning_rate": 1.987748597997996e-10, "loss": 0.7455, "step": 14071 }, { "epoch": 1.9922842783322716, "grad_norm": 10.350134040591122, "learning_rate": 1.916124852732737e-10, "loss": 0.7362, "step": 14072 }, { "epoch": 1.9924258512069088, "grad_norm": 9.536830781628762, "learning_rate": 1.845815238096793e-10, "loss": 0.7278, "step": 14073 }, { "epoch": 1.992567424081546, "grad_norm": 9.47152739729939, "learning_rate": 1.776819757787207e-10, "loss": 0.6358, "step": 14074 }, { "epoch": 1.9927089969561833, "grad_norm": 7.749080972632184, "learning_rate": 1.7091384154288571e-10, "loss": 0.6877, "step": 14075 }, { "epoch": 1.9928505698308205, "grad_norm": 9.673047513868957, "learning_rate": 1.6427712145827834e-10, "loss": 0.7492, "step": 14076 }, { "epoch": 1.9929921427054578, "grad_norm": 8.033442691464899, "learning_rate": 1.577718158737862e-10, "loss": 0.6912, "step": 14077 }, { "epoch": 1.993133715580095, "grad_norm": 8.263227089705035, "learning_rate": 1.5139792513135799e-10, "loss": 0.6069, "step": 14078 }, { "epoch": 1.9932752884547322, "grad_norm": 10.291599247720619, "learning_rate": 1.451554495657259e-10, "loss": 0.6761, "step": 14079 }, { "epoch": 1.9934168613293692, "grad_norm": 9.53766993699973, "learning_rate": 1.39044389505516e-10, "loss": 0.6622, "step": 14080 }, { "epoch": 1.9935584342040065, "grad_norm": 10.453317683201707, "learning_rate": 1.3306474527158275e-10, "loss": 0.6811, "step": 14081 }, { "epoch": 1.9937000070786437, "grad_norm": 8.426918911518527, "learning_rate": 1.2721651717839678e-10, "loss": 0.6717, "step": 14082 }, { "epoch": 1.993841579953281, "grad_norm": 10.522497870077297, "learning_rate": 1.2149970553376745e-10, "loss": 0.6467, "step": 14083 }, { "epoch": 1.9939831528279182, "grad_norm": 10.264657310269332, "learning_rate": 1.1591431063745495e-10, "loss": 0.6971, "step": 14084 }, { "epoch": 1.9941247257025554, "grad_norm": 7.499776455408756, "learning_rate": 1.1046033278394597e-10, "loss": 0.5673, "step": 14085 }, { "epoch": 1.9942662985771926, "grad_norm": 9.432384595127079, "learning_rate": 1.0513777225940047e-10, "loss": 0.6596, "step": 14086 }, { "epoch": 1.9944078714518299, "grad_norm": 9.829012638150282, "learning_rate": 9.994662934387223e-11, "loss": 0.6348, "step": 14087 }, { "epoch": 1.994549444326467, "grad_norm": 8.736771529450252, "learning_rate": 9.488690430992098e-11, "loss": 0.5866, "step": 14088 }, { "epoch": 1.9946910172011043, "grad_norm": 8.847062100635442, "learning_rate": 8.99585974237227e-11, "loss": 0.5988, "step": 14089 }, { "epoch": 1.9948325900757415, "grad_norm": 9.198821896296566, "learning_rate": 8.516170894479203e-11, "loss": 0.6335, "step": 14090 }, { "epoch": 1.9949741629503785, "grad_norm": 9.98628362283503, "learning_rate": 8.049623912459448e-11, "loss": 0.5859, "step": 14091 }, { "epoch": 1.9951157358250158, "grad_norm": 9.265597827479128, "learning_rate": 7.596218820876688e-11, "loss": 0.7308, "step": 14092 }, { "epoch": 1.995257308699653, "grad_norm": 9.010065172575752, "learning_rate": 7.15595564354521e-11, "loss": 0.6035, "step": 14093 }, { "epoch": 1.9953988815742902, "grad_norm": 11.95417683280618, "learning_rate": 6.728834403640916e-11, "loss": 0.7995, "step": 14094 }, { "epoch": 1.9955404544489275, "grad_norm": 9.430989628276151, "learning_rate": 6.314855123590313e-11, "loss": 0.5816, "step": 14095 }, { "epoch": 1.9956820273235647, "grad_norm": 9.18340973404006, "learning_rate": 5.914017825153773e-11, "loss": 0.6859, "step": 14096 }, { "epoch": 1.995823600198202, "grad_norm": 10.342692249070566, "learning_rate": 5.526322529425532e-11, "loss": 0.811, "step": 14097 }, { "epoch": 1.9959651730728392, "grad_norm": 8.742524359573405, "learning_rate": 5.151769256778183e-11, "loss": 0.6192, "step": 14098 }, { "epoch": 1.9961067459474764, "grad_norm": 8.123722856355638, "learning_rate": 4.790358026890429e-11, "loss": 0.7551, "step": 14099 }, { "epoch": 1.9962483188221136, "grad_norm": 10.051366487285136, "learning_rate": 4.44208885877484e-11, "loss": 0.7096, "step": 14100 }, { "epoch": 1.9963898916967509, "grad_norm": 9.327159020073701, "learning_rate": 4.1069617707223396e-11, "loss": 0.723, "step": 14101 }, { "epoch": 1.996531464571388, "grad_norm": 10.671039598867837, "learning_rate": 3.7849767803854745e-11, "loss": 0.6929, "step": 14102 }, { "epoch": 1.9966730374460253, "grad_norm": 9.301191389004035, "learning_rate": 3.4761339046396336e-11, "loss": 0.67, "step": 14103 }, { "epoch": 1.9968146103206625, "grad_norm": 9.510060655017368, "learning_rate": 3.1804331597773406e-11, "loss": 0.7193, "step": 14104 }, { "epoch": 1.9969561831952998, "grad_norm": 8.874550391615358, "learning_rate": 2.897874561286207e-11, "loss": 0.6343, "step": 14105 }, { "epoch": 1.997097756069937, "grad_norm": 9.450891151996965, "learning_rate": 2.6284581240709762e-11, "loss": 0.6536, "step": 14106 }, { "epoch": 1.9972393289445742, "grad_norm": 8.78829901197756, "learning_rate": 2.3721838622592362e-11, "loss": 0.5847, "step": 14107 }, { "epoch": 1.9973809018192115, "grad_norm": 10.519940288649828, "learning_rate": 2.1290517893401974e-11, "loss": 0.7101, "step": 14108 }, { "epoch": 1.9975224746938487, "grad_norm": 9.886079005625474, "learning_rate": 1.899061918081424e-11, "loss": 0.7336, "step": 14109 }, { "epoch": 1.997664047568486, "grad_norm": 8.904171135531922, "learning_rate": 1.682214260584347e-11, "loss": 0.7248, "step": 14110 }, { "epoch": 1.9978056204431232, "grad_norm": 7.5007585777668755, "learning_rate": 1.4785088282565084e-11, "loss": 0.6555, "step": 14111 }, { "epoch": 1.9979471933177604, "grad_norm": 8.756355375408722, "learning_rate": 1.2879456318115602e-11, "loss": 0.6219, "step": 14112 }, { "epoch": 1.9980887661923976, "grad_norm": 10.062127337455822, "learning_rate": 1.1105246812137538e-11, "loss": 0.644, "step": 14113 }, { "epoch": 1.9982303390670348, "grad_norm": 7.995203010505688, "learning_rate": 9.462459858444739e-12, "loss": 0.7152, "step": 14114 }, { "epoch": 1.998371911941672, "grad_norm": 9.017308863006711, "learning_rate": 7.951095543357046e-12, "loss": 0.7196, "step": 14115 }, { "epoch": 1.9985134848163093, "grad_norm": 9.19895584151912, "learning_rate": 6.5711539462554044e-12, "loss": 0.7096, "step": 14116 }, { "epoch": 1.9986550576909465, "grad_norm": 9.38129020748379, "learning_rate": 5.322635139304311e-12, "loss": 0.6468, "step": 14117 }, { "epoch": 1.9987966305655838, "grad_norm": 8.091701410325717, "learning_rate": 4.205539188839591e-12, "loss": 0.7469, "step": 14118 }, { "epoch": 1.998938203440221, "grad_norm": 9.36585165620798, "learning_rate": 3.219866153147955e-12, "loss": 0.6826, "step": 14119 }, { "epoch": 1.9990797763148582, "grad_norm": 9.700371442152237, "learning_rate": 2.3656160838547713e-12, "loss": 0.7048, "step": 14120 }, { "epoch": 1.9992213491894955, "grad_norm": 9.765553515363365, "learning_rate": 1.64278902620163e-12, "loss": 0.7694, "step": 14121 }, { "epoch": 1.9993629220641325, "grad_norm": 13.12791889975985, "learning_rate": 1.0513850182136687e-12, "loss": 0.6613, "step": 14122 }, { "epoch": 1.9995044949387697, "grad_norm": 7.490383849958997, "learning_rate": 5.914040909771324e-13, "loss": 0.6273, "step": 14123 }, { "epoch": 1.999646067813407, "grad_norm": 8.59453698456199, "learning_rate": 2.62846268361816e-13, "loss": 0.6627, "step": 14124 }, { "epoch": 1.9997876406880442, "grad_norm": 8.875822217222373, "learning_rate": 6.571156785373234e-14, "loss": 0.6961, "step": 14125 }, { "epoch": 1.9999292135626814, "grad_norm": 8.67635898993729, "learning_rate": 0.0, "loss": 0.7015, "step": 14126 }, { "epoch": 1.9999292135626814, "step": 14126, "total_flos": 3114173628186624.0, "train_loss": 0.2012747019786732, "train_runtime": 133919.7138, "train_samples_per_second": 13.502, "train_steps_per_second": 0.105 } ], "logging_steps": 1.0, "max_steps": 14126, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3114173628186624.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }