{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 7813, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00012799180852425445, "grad_norm": 11.5625, "learning_rate": 8.510638297872341e-06, "loss": 15.304, "step": 1 }, { "epoch": 0.0002559836170485089, "grad_norm": 12.875, "learning_rate": 1.7021276595744682e-05, "loss": 15.0659, "step": 2 }, { "epoch": 0.00038397542557276333, "grad_norm": 13.3125, "learning_rate": 2.5531914893617022e-05, "loss": 15.3122, "step": 3 }, { "epoch": 0.0005119672340970178, "grad_norm": 10.1875, "learning_rate": 3.4042553191489365e-05, "loss": 15.3051, "step": 4 }, { "epoch": 0.0006399590426212722, "grad_norm": 11.0625, "learning_rate": 4.2553191489361704e-05, "loss": 15.0721, "step": 5 }, { "epoch": 0.0007679508511455267, "grad_norm": 11.5, "learning_rate": 5.1063829787234044e-05, "loss": 15.0955, "step": 6 }, { "epoch": 0.0008959426596697811, "grad_norm": 8.9375, "learning_rate": 5.957446808510639e-05, "loss": 14.8674, "step": 7 }, { "epoch": 0.0010239344681940356, "grad_norm": 7.75, "learning_rate": 6.808510638297873e-05, "loss": 14.8543, "step": 8 }, { "epoch": 0.00115192627671829, "grad_norm": 9.0, "learning_rate": 7.659574468085106e-05, "loss": 14.8684, "step": 9 }, { "epoch": 0.0012799180852425445, "grad_norm": 8.25, "learning_rate": 8.510638297872341e-05, "loss": 14.8875, "step": 10 }, { "epoch": 0.001407909893766799, "grad_norm": 8.125, "learning_rate": 9.361702127659574e-05, "loss": 14.8473, "step": 11 }, { "epoch": 0.0015359017022910533, "grad_norm": 8.0, "learning_rate": 0.00010212765957446809, "loss": 14.4045, "step": 12 }, { "epoch": 0.0016638935108153079, "grad_norm": 7.15625, "learning_rate": 0.00011063829787234042, "loss": 14.5803, "step": 13 }, { "epoch": 0.0017918853193395622, "grad_norm": 7.9375, "learning_rate": 0.00011914893617021278, "loss": 14.0811, "step": 14 }, { "epoch": 0.0019198771278638167, "grad_norm": 7.875, "learning_rate": 0.0001276595744680851, "loss": 14.1291, "step": 15 }, { "epoch": 0.0020478689363880712, "grad_norm": 9.125, "learning_rate": 0.00013617021276595746, "loss": 13.6935, "step": 16 }, { "epoch": 0.0021758607449123258, "grad_norm": 11.75, "learning_rate": 0.00014468085106382977, "loss": 13.5914, "step": 17 }, { "epoch": 0.00230385255343658, "grad_norm": 10.9375, "learning_rate": 0.00015319148936170213, "loss": 13.6709, "step": 18 }, { "epoch": 0.0024318443619608344, "grad_norm": 13.6875, "learning_rate": 0.00016170212765957449, "loss": 13.1205, "step": 19 }, { "epoch": 0.002559836170485089, "grad_norm": 12.9375, "learning_rate": 0.00017021276595744682, "loss": 13.4647, "step": 20 }, { "epoch": 0.0026878279790093435, "grad_norm": 11.0625, "learning_rate": 0.00017872340425531915, "loss": 12.7896, "step": 21 }, { "epoch": 0.002815819787533598, "grad_norm": 12.3125, "learning_rate": 0.00018723404255319148, "loss": 12.3087, "step": 22 }, { "epoch": 0.002943811596057852, "grad_norm": 8.0, "learning_rate": 0.00019574468085106384, "loss": 12.8961, "step": 23 }, { "epoch": 0.0030718034045821067, "grad_norm": 9.5, "learning_rate": 0.00020425531914893618, "loss": 11.93, "step": 24 }, { "epoch": 0.003199795213106361, "grad_norm": 7.21875, "learning_rate": 0.0002127659574468085, "loss": 12.0264, "step": 25 }, { "epoch": 0.0033277870216306157, "grad_norm": 7.46875, "learning_rate": 0.00022127659574468084, "loss": 11.706, "step": 26 }, { "epoch": 0.0034557788301548703, "grad_norm": 7.8125, "learning_rate": 0.0002297872340425532, "loss": 11.9963, "step": 27 }, { "epoch": 0.0035837706386791244, "grad_norm": 7.4375, "learning_rate": 0.00023829787234042556, "loss": 11.552, "step": 28 }, { "epoch": 0.003711762447203379, "grad_norm": 8.75, "learning_rate": 0.00024680851063829787, "loss": 11.6734, "step": 29 }, { "epoch": 0.0038397542557276334, "grad_norm": 6.875, "learning_rate": 0.0002553191489361702, "loss": 11.8145, "step": 30 }, { "epoch": 0.003967746064251888, "grad_norm": 17.75, "learning_rate": 0.00026382978723404253, "loss": 12.1279, "step": 31 }, { "epoch": 0.0040957378727761425, "grad_norm": 7.71875, "learning_rate": 0.0002723404255319149, "loss": 11.6992, "step": 32 }, { "epoch": 0.004223729681300397, "grad_norm": 6.0625, "learning_rate": 0.00028085106382978725, "loss": 11.4785, "step": 33 }, { "epoch": 0.0043517214898246516, "grad_norm": 10.125, "learning_rate": 0.00028936170212765953, "loss": 11.3125, "step": 34 }, { "epoch": 0.004479713298348906, "grad_norm": 8.6875, "learning_rate": 0.0002978723404255319, "loss": 11.2372, "step": 35 }, { "epoch": 0.00460770510687316, "grad_norm": 5.65625, "learning_rate": 0.00030638297872340425, "loss": 11.3262, "step": 36 }, { "epoch": 0.004735696915397414, "grad_norm": 5.59375, "learning_rate": 0.00031489361702127664, "loss": 11.1482, "step": 37 }, { "epoch": 0.004863688723921669, "grad_norm": 5.90625, "learning_rate": 0.00032340425531914897, "loss": 10.9973, "step": 38 }, { "epoch": 0.004991680532445923, "grad_norm": 7.46875, "learning_rate": 0.00033191489361702125, "loss": 11.0743, "step": 39 }, { "epoch": 0.005119672340970178, "grad_norm": 5.375, "learning_rate": 0.00034042553191489364, "loss": 11.0801, "step": 40 }, { "epoch": 0.005247664149494432, "grad_norm": 6.59375, "learning_rate": 0.00034893617021276597, "loss": 10.7041, "step": 41 }, { "epoch": 0.005375655958018687, "grad_norm": 5.625, "learning_rate": 0.0003574468085106383, "loss": 10.452, "step": 42 }, { "epoch": 0.0055036477665429415, "grad_norm": 4.6875, "learning_rate": 0.00036595744680851063, "loss": 10.4645, "step": 43 }, { "epoch": 0.005631639575067196, "grad_norm": 5.25, "learning_rate": 0.00037446808510638297, "loss": 10.4276, "step": 44 }, { "epoch": 0.005759631383591451, "grad_norm": 6.34375, "learning_rate": 0.0003829787234042553, "loss": 11.0687, "step": 45 }, { "epoch": 0.005887623192115704, "grad_norm": 6.75, "learning_rate": 0.0003914893617021277, "loss": 10.4207, "step": 46 }, { "epoch": 0.006015615000639959, "grad_norm": 6.09375, "learning_rate": 0.0004, "loss": 10.0271, "step": 47 }, { "epoch": 0.006143606809164213, "grad_norm": 5.5, "learning_rate": 0.00040851063829787235, "loss": 10.7434, "step": 48 }, { "epoch": 0.006271598617688468, "grad_norm": 4.90625, "learning_rate": 0.0004170212765957447, "loss": 10.2806, "step": 49 }, { "epoch": 0.006399590426212722, "grad_norm": 5.8125, "learning_rate": 0.000425531914893617, "loss": 10.2049, "step": 50 }, { "epoch": 0.006527582234736977, "grad_norm": 5.21875, "learning_rate": 0.0004340425531914894, "loss": 10.5081, "step": 51 }, { "epoch": 0.0066555740432612314, "grad_norm": 4.46875, "learning_rate": 0.0004425531914893617, "loss": 10.194, "step": 52 }, { "epoch": 0.006783565851785486, "grad_norm": 4.4375, "learning_rate": 0.000451063829787234, "loss": 9.9446, "step": 53 }, { "epoch": 0.0069115576603097405, "grad_norm": 3.765625, "learning_rate": 0.0004595744680851064, "loss": 9.823, "step": 54 }, { "epoch": 0.007039549468833995, "grad_norm": 5.0, "learning_rate": 0.00046808510638297874, "loss": 9.4676, "step": 55 }, { "epoch": 0.007167541277358249, "grad_norm": 4.875, "learning_rate": 0.0004765957446808511, "loss": 9.8968, "step": 56 }, { "epoch": 0.007295533085882503, "grad_norm": 5.53125, "learning_rate": 0.0004851063829787234, "loss": 9.6318, "step": 57 }, { "epoch": 0.007423524894406758, "grad_norm": 4.6875, "learning_rate": 0.0004936170212765957, "loss": 9.6559, "step": 58 }, { "epoch": 0.007551516702931012, "grad_norm": 6.15625, "learning_rate": 0.0005021276595744681, "loss": 9.6202, "step": 59 }, { "epoch": 0.007679508511455267, "grad_norm": 3.765625, "learning_rate": 0.0005106382978723404, "loss": 9.545, "step": 60 }, { "epoch": 0.007807500319979521, "grad_norm": 5.46875, "learning_rate": 0.0005191489361702127, "loss": 9.8627, "step": 61 }, { "epoch": 0.007935492128503776, "grad_norm": 4.34375, "learning_rate": 0.0005276595744680851, "loss": 9.7528, "step": 62 }, { "epoch": 0.00806348393702803, "grad_norm": 3.21875, "learning_rate": 0.0005361702127659575, "loss": 9.4818, "step": 63 }, { "epoch": 0.008191475745552285, "grad_norm": 4.59375, "learning_rate": 0.0005446808510638298, "loss": 9.5754, "step": 64 }, { "epoch": 0.008319467554076539, "grad_norm": 3.40625, "learning_rate": 0.0005531914893617022, "loss": 9.704, "step": 65 }, { "epoch": 0.008447459362600794, "grad_norm": 2.96875, "learning_rate": 0.0005617021276595745, "loss": 9.126, "step": 66 }, { "epoch": 0.008575451171125048, "grad_norm": 11.375, "learning_rate": 0.0005702127659574468, "loss": 9.9577, "step": 67 }, { "epoch": 0.008703442979649303, "grad_norm": 6.3125, "learning_rate": 0.0005787234042553191, "loss": 9.7423, "step": 68 }, { "epoch": 0.008831434788173557, "grad_norm": 6.09375, "learning_rate": 0.0005872340425531915, "loss": 9.7837, "step": 69 }, { "epoch": 0.008959426596697812, "grad_norm": 5.21875, "learning_rate": 0.0005957446808510638, "loss": 9.4718, "step": 70 }, { "epoch": 0.009087418405222066, "grad_norm": 2.953125, "learning_rate": 0.0006042553191489362, "loss": 9.6516, "step": 71 }, { "epoch": 0.00921541021374632, "grad_norm": 3.703125, "learning_rate": 0.0006127659574468085, "loss": 9.6938, "step": 72 }, { "epoch": 0.009343402022270575, "grad_norm": 4.03125, "learning_rate": 0.0006212765957446808, "loss": 9.1787, "step": 73 }, { "epoch": 0.009471393830794829, "grad_norm": 2.796875, "learning_rate": 0.0006297872340425533, "loss": 9.5102, "step": 74 }, { "epoch": 0.009599385639319084, "grad_norm": 2.78125, "learning_rate": 0.0006382978723404256, "loss": 9.433, "step": 75 }, { "epoch": 0.009727377447843338, "grad_norm": 4.25, "learning_rate": 0.0006468085106382979, "loss": 9.8887, "step": 76 }, { "epoch": 0.009855369256367593, "grad_norm": 2.578125, "learning_rate": 0.0006553191489361702, "loss": 9.3536, "step": 77 }, { "epoch": 0.009983361064891847, "grad_norm": 2.875, "learning_rate": 0.0006638297872340425, "loss": 9.356, "step": 78 }, { "epoch": 0.010111352873416102, "grad_norm": 3.734375, "learning_rate": 0.0006723404255319148, "loss": 9.0571, "step": 79 }, { "epoch": 0.010239344681940356, "grad_norm": 2.390625, "learning_rate": 0.0006808510638297873, "loss": 9.1523, "step": 80 }, { "epoch": 0.01036733649046461, "grad_norm": 2.546875, "learning_rate": 0.0006893617021276596, "loss": 9.4769, "step": 81 }, { "epoch": 0.010495328298988865, "grad_norm": 2.8125, "learning_rate": 0.0006978723404255319, "loss": 9.2867, "step": 82 }, { "epoch": 0.010623320107513119, "grad_norm": 3.125, "learning_rate": 0.0007063829787234043, "loss": 9.328, "step": 83 }, { "epoch": 0.010751311916037374, "grad_norm": 3.1875, "learning_rate": 0.0007148936170212766, "loss": 9.1725, "step": 84 }, { "epoch": 0.010879303724561628, "grad_norm": 2.421875, "learning_rate": 0.000723404255319149, "loss": 9.0622, "step": 85 }, { "epoch": 0.011007295533085883, "grad_norm": 5.4375, "learning_rate": 0.0007319148936170213, "loss": 9.0586, "step": 86 }, { "epoch": 0.011135287341610137, "grad_norm": 2.421875, "learning_rate": 0.0007404255319148936, "loss": 9.1943, "step": 87 }, { "epoch": 0.011263279150134392, "grad_norm": 2.703125, "learning_rate": 0.0007489361702127659, "loss": 8.9902, "step": 88 }, { "epoch": 0.011391270958658646, "grad_norm": 2.59375, "learning_rate": 0.0007574468085106383, "loss": 9.2359, "step": 89 }, { "epoch": 0.011519262767182901, "grad_norm": 3.375, "learning_rate": 0.0007659574468085106, "loss": 8.8138, "step": 90 }, { "epoch": 0.011647254575707155, "grad_norm": 2.8125, "learning_rate": 0.000774468085106383, "loss": 9.4292, "step": 91 }, { "epoch": 0.011775246384231408, "grad_norm": 2.625, "learning_rate": 0.0007829787234042554, "loss": 9.4025, "step": 92 }, { "epoch": 0.011903238192755664, "grad_norm": 3.90625, "learning_rate": 0.0007914893617021277, "loss": 9.2509, "step": 93 }, { "epoch": 0.012031230001279918, "grad_norm": 2.328125, "learning_rate": 0.0008, "loss": 9.0982, "step": 94 }, { "epoch": 0.012159221809804173, "grad_norm": 3.4375, "learning_rate": 0.0008085106382978723, "loss": 9.3691, "step": 95 }, { "epoch": 0.012287213618328427, "grad_norm": 2.25, "learning_rate": 0.0008170212765957447, "loss": 8.8108, "step": 96 }, { "epoch": 0.012415205426852682, "grad_norm": 2.09375, "learning_rate": 0.000825531914893617, "loss": 8.7706, "step": 97 }, { "epoch": 0.012543197235376936, "grad_norm": 2.8125, "learning_rate": 0.0008340425531914894, "loss": 9.2015, "step": 98 }, { "epoch": 0.012671189043901191, "grad_norm": 2.296875, "learning_rate": 0.0008425531914893617, "loss": 9.0042, "step": 99 }, { "epoch": 0.012799180852425445, "grad_norm": 2.171875, "learning_rate": 0.000851063829787234, "loss": 8.9005, "step": 100 }, { "epoch": 0.012927172660949698, "grad_norm": 4.1875, "learning_rate": 0.0008595744680851064, "loss": 8.7074, "step": 101 }, { "epoch": 0.013055164469473954, "grad_norm": 1.9140625, "learning_rate": 0.0008680851063829788, "loss": 9.098, "step": 102 }, { "epoch": 0.013183156277998207, "grad_norm": 2.390625, "learning_rate": 0.0008765957446808511, "loss": 9.0511, "step": 103 }, { "epoch": 0.013311148086522463, "grad_norm": 2.921875, "learning_rate": 0.0008851063829787234, "loss": 9.0597, "step": 104 }, { "epoch": 0.013439139895046717, "grad_norm": 2.96875, "learning_rate": 0.0008936170212765957, "loss": 9.014, "step": 105 }, { "epoch": 0.013567131703570972, "grad_norm": 2.25, "learning_rate": 0.000902127659574468, "loss": 8.687, "step": 106 }, { "epoch": 0.013695123512095226, "grad_norm": 2.359375, "learning_rate": 0.0009106382978723405, "loss": 9.2103, "step": 107 }, { "epoch": 0.013823115320619481, "grad_norm": 2.609375, "learning_rate": 0.0009191489361702128, "loss": 9.077, "step": 108 }, { "epoch": 0.013951107129143735, "grad_norm": 2.984375, "learning_rate": 0.0009276595744680851, "loss": 9.2118, "step": 109 }, { "epoch": 0.01407909893766799, "grad_norm": 5.53125, "learning_rate": 0.0009361702127659575, "loss": 9.2105, "step": 110 }, { "epoch": 0.014207090746192244, "grad_norm": 2.703125, "learning_rate": 0.0009446808510638298, "loss": 8.9497, "step": 111 }, { "epoch": 0.014335082554716497, "grad_norm": 6.09375, "learning_rate": 0.0009531914893617022, "loss": 9.3107, "step": 112 }, { "epoch": 0.014463074363240753, "grad_norm": 1.671875, "learning_rate": 0.0009617021276595745, "loss": 9.1432, "step": 113 }, { "epoch": 0.014591066171765006, "grad_norm": 4.0625, "learning_rate": 0.0009702127659574468, "loss": 9.3258, "step": 114 }, { "epoch": 0.014719057980289262, "grad_norm": 1.7109375, "learning_rate": 0.0009787234042553192, "loss": 9.4424, "step": 115 }, { "epoch": 0.014847049788813516, "grad_norm": 2.234375, "learning_rate": 0.0009872340425531915, "loss": 8.9795, "step": 116 }, { "epoch": 0.014975041597337771, "grad_norm": 3.359375, "learning_rate": 0.000995744680851064, "loss": 8.8052, "step": 117 }, { "epoch": 0.015103033405862025, "grad_norm": 1.875, "learning_rate": 0.0010042553191489361, "loss": 9.0904, "step": 118 }, { "epoch": 0.01523102521438628, "grad_norm": 3.71875, "learning_rate": 0.0010127659574468084, "loss": 8.3967, "step": 119 }, { "epoch": 0.015359017022910534, "grad_norm": 2.171875, "learning_rate": 0.0010212765957446808, "loss": 9.2066, "step": 120 }, { "epoch": 0.015487008831434787, "grad_norm": 4.65625, "learning_rate": 0.0010297872340425532, "loss": 9.24, "step": 121 }, { "epoch": 0.015615000639959043, "grad_norm": 1.734375, "learning_rate": 0.0010382978723404255, "loss": 8.9721, "step": 122 }, { "epoch": 0.015742992448483296, "grad_norm": 1.9375, "learning_rate": 0.001046808510638298, "loss": 9.3257, "step": 123 }, { "epoch": 0.015870984257007552, "grad_norm": 1.7890625, "learning_rate": 0.0010553191489361701, "loss": 8.7488, "step": 124 }, { "epoch": 0.015998976065531807, "grad_norm": 1.7578125, "learning_rate": 0.0010638297872340426, "loss": 9.3335, "step": 125 }, { "epoch": 0.01612696787405606, "grad_norm": 1.8359375, "learning_rate": 0.001072340425531915, "loss": 9.3622, "step": 126 }, { "epoch": 0.016254959682580315, "grad_norm": 1.765625, "learning_rate": 0.0010808510638297872, "loss": 9.0444, "step": 127 }, { "epoch": 0.01638295149110457, "grad_norm": 1.65625, "learning_rate": 0.0010893617021276597, "loss": 8.8469, "step": 128 }, { "epoch": 0.016510943299628825, "grad_norm": 4.5, "learning_rate": 0.001097872340425532, "loss": 9.2005, "step": 129 }, { "epoch": 0.016638935108153077, "grad_norm": 2.0, "learning_rate": 0.0011063829787234043, "loss": 8.6386, "step": 130 }, { "epoch": 0.016766926916677333, "grad_norm": 3.65625, "learning_rate": 0.0011148936170212766, "loss": 9.0343, "step": 131 }, { "epoch": 0.016894918725201588, "grad_norm": 1.7890625, "learning_rate": 0.001123404255319149, "loss": 9.0456, "step": 132 }, { "epoch": 0.01702291053372584, "grad_norm": 3.34375, "learning_rate": 0.0011319148936170214, "loss": 8.9293, "step": 133 }, { "epoch": 0.017150902342250095, "grad_norm": 1.6953125, "learning_rate": 0.0011404255319148937, "loss": 9.1034, "step": 134 }, { "epoch": 0.01727889415077435, "grad_norm": 2.125, "learning_rate": 0.0011489361702127661, "loss": 8.9307, "step": 135 }, { "epoch": 0.017406885959298606, "grad_norm": 1.828125, "learning_rate": 0.0011574468085106381, "loss": 8.9665, "step": 136 }, { "epoch": 0.017534877767822858, "grad_norm": 3.015625, "learning_rate": 0.0011659574468085106, "loss": 8.7356, "step": 137 }, { "epoch": 0.017662869576347114, "grad_norm": 1.9609375, "learning_rate": 0.001174468085106383, "loss": 8.8461, "step": 138 }, { "epoch": 0.01779086138487137, "grad_norm": 3.0, "learning_rate": 0.0011829787234042552, "loss": 8.7686, "step": 139 }, { "epoch": 0.017918853193395624, "grad_norm": 1.3984375, "learning_rate": 0.0011914893617021277, "loss": 8.7767, "step": 140 }, { "epoch": 0.018046845001919876, "grad_norm": 2.0, "learning_rate": 0.0012, "loss": 8.629, "step": 141 }, { "epoch": 0.01817483681044413, "grad_norm": 1.515625, "learning_rate": 0.0012085106382978723, "loss": 8.7012, "step": 142 }, { "epoch": 0.018302828618968387, "grad_norm": 1.5625, "learning_rate": 0.0012170212765957448, "loss": 8.5763, "step": 143 }, { "epoch": 0.01843082042749264, "grad_norm": 1.7734375, "learning_rate": 0.001225531914893617, "loss": 9.1844, "step": 144 }, { "epoch": 0.018558812236016894, "grad_norm": 1.8046875, "learning_rate": 0.0012340425531914894, "loss": 8.7128, "step": 145 }, { "epoch": 0.01868680404454115, "grad_norm": 1.8359375, "learning_rate": 0.0012425531914893617, "loss": 8.6462, "step": 146 }, { "epoch": 0.018814795853065405, "grad_norm": 1.8984375, "learning_rate": 0.001251063829787234, "loss": 8.5614, "step": 147 }, { "epoch": 0.018942787661589657, "grad_norm": 2.96875, "learning_rate": 0.0012595744680851065, "loss": 8.9475, "step": 148 }, { "epoch": 0.019070779470113913, "grad_norm": 1.828125, "learning_rate": 0.0012680851063829788, "loss": 8.6816, "step": 149 }, { "epoch": 0.019198771278638168, "grad_norm": 1.8984375, "learning_rate": 0.0012765957446808512, "loss": 8.9585, "step": 150 }, { "epoch": 0.01932676308716242, "grad_norm": 1.5859375, "learning_rate": 0.0012851063829787234, "loss": 9.0933, "step": 151 }, { "epoch": 0.019454754895686675, "grad_norm": 1.8046875, "learning_rate": 0.0012936170212765959, "loss": 8.6693, "step": 152 }, { "epoch": 0.01958274670421093, "grad_norm": 2.40625, "learning_rate": 0.0013021276595744683, "loss": 8.7413, "step": 153 }, { "epoch": 0.019710738512735186, "grad_norm": 1.625, "learning_rate": 0.0013106382978723403, "loss": 8.9221, "step": 154 }, { "epoch": 0.019838730321259438, "grad_norm": 1.7421875, "learning_rate": 0.0013191489361702128, "loss": 8.8036, "step": 155 }, { "epoch": 0.019966722129783693, "grad_norm": 2.1875, "learning_rate": 0.001327659574468085, "loss": 8.2535, "step": 156 }, { "epoch": 0.02009471393830795, "grad_norm": 1.5703125, "learning_rate": 0.0013361702127659574, "loss": 9.0529, "step": 157 }, { "epoch": 0.020222705746832204, "grad_norm": 1.578125, "learning_rate": 0.0013446808510638297, "loss": 9.0868, "step": 158 }, { "epoch": 0.020350697555356456, "grad_norm": 1.6328125, "learning_rate": 0.001353191489361702, "loss": 8.6749, "step": 159 }, { "epoch": 0.02047868936388071, "grad_norm": 2.5625, "learning_rate": 0.0013617021276595745, "loss": 8.9905, "step": 160 }, { "epoch": 0.020606681172404967, "grad_norm": 1.546875, "learning_rate": 0.0013702127659574468, "loss": 9.0812, "step": 161 }, { "epoch": 0.02073467298092922, "grad_norm": 2.109375, "learning_rate": 0.0013787234042553192, "loss": 8.9843, "step": 162 }, { "epoch": 0.020862664789453474, "grad_norm": 2.046875, "learning_rate": 0.0013872340425531914, "loss": 9.0285, "step": 163 }, { "epoch": 0.02099065659797773, "grad_norm": 1.5546875, "learning_rate": 0.0013957446808510639, "loss": 9.0726, "step": 164 }, { "epoch": 0.021118648406501985, "grad_norm": 1.296875, "learning_rate": 0.0014042553191489363, "loss": 8.6532, "step": 165 }, { "epoch": 0.021246640215026237, "grad_norm": 3.578125, "learning_rate": 0.0014127659574468085, "loss": 9.1676, "step": 166 }, { "epoch": 0.021374632023550492, "grad_norm": 1.546875, "learning_rate": 0.001421276595744681, "loss": 8.7166, "step": 167 }, { "epoch": 0.021502623832074748, "grad_norm": 1.796875, "learning_rate": 0.0014297872340425532, "loss": 8.8643, "step": 168 }, { "epoch": 0.021630615640599003, "grad_norm": 1.859375, "learning_rate": 0.0014382978723404256, "loss": 8.4166, "step": 169 }, { "epoch": 0.021758607449123255, "grad_norm": 1.203125, "learning_rate": 0.001446808510638298, "loss": 8.7252, "step": 170 }, { "epoch": 0.02188659925764751, "grad_norm": 1.1953125, "learning_rate": 0.0014553191489361703, "loss": 8.3994, "step": 171 }, { "epoch": 0.022014591066171766, "grad_norm": 1.5703125, "learning_rate": 0.0014638297872340425, "loss": 8.5685, "step": 172 }, { "epoch": 0.022142582874696018, "grad_norm": 1.9609375, "learning_rate": 0.0014723404255319148, "loss": 8.4525, "step": 173 }, { "epoch": 0.022270574683220273, "grad_norm": 1.46875, "learning_rate": 0.0014808510638297872, "loss": 8.4888, "step": 174 }, { "epoch": 0.02239856649174453, "grad_norm": 2.03125, "learning_rate": 0.0014893617021276596, "loss": 8.6854, "step": 175 }, { "epoch": 0.022526558300268784, "grad_norm": 1.4140625, "learning_rate": 0.0014978723404255319, "loss": 8.7856, "step": 176 }, { "epoch": 0.022654550108793036, "grad_norm": 1.515625, "learning_rate": 0.0015063829787234043, "loss": 9.0148, "step": 177 }, { "epoch": 0.02278254191731729, "grad_norm": 1.09375, "learning_rate": 0.0015148936170212765, "loss": 8.732, "step": 178 }, { "epoch": 0.022910533725841547, "grad_norm": 2.484375, "learning_rate": 0.001523404255319149, "loss": 8.8451, "step": 179 }, { "epoch": 0.023038525534365802, "grad_norm": 1.515625, "learning_rate": 0.0015319148936170212, "loss": 8.9048, "step": 180 }, { "epoch": 0.023166517342890054, "grad_norm": 1.2109375, "learning_rate": 0.0015404255319148936, "loss": 8.2626, "step": 181 }, { "epoch": 0.02329450915141431, "grad_norm": 3.203125, "learning_rate": 0.001548936170212766, "loss": 9.0012, "step": 182 }, { "epoch": 0.023422500959938565, "grad_norm": 1.3984375, "learning_rate": 0.0015574468085106383, "loss": 8.4391, "step": 183 }, { "epoch": 0.023550492768462817, "grad_norm": 1.3671875, "learning_rate": 0.0015659574468085107, "loss": 8.6437, "step": 184 }, { "epoch": 0.023678484576987072, "grad_norm": 1.2734375, "learning_rate": 0.001574468085106383, "loss": 8.4913, "step": 185 }, { "epoch": 0.023806476385511328, "grad_norm": 1.4453125, "learning_rate": 0.0015829787234042554, "loss": 8.2586, "step": 186 }, { "epoch": 0.023934468194035583, "grad_norm": 2.3125, "learning_rate": 0.0015914893617021279, "loss": 8.6049, "step": 187 }, { "epoch": 0.024062460002559835, "grad_norm": 1.2890625, "learning_rate": 0.0016, "loss": 9.1021, "step": 188 }, { "epoch": 0.02419045181108409, "grad_norm": 1.515625, "learning_rate": 0.0016085106382978725, "loss": 8.7149, "step": 189 }, { "epoch": 0.024318443619608346, "grad_norm": 1.8359375, "learning_rate": 0.0016170212765957445, "loss": 8.8414, "step": 190 }, { "epoch": 0.024446435428132598, "grad_norm": 1.4375, "learning_rate": 0.001625531914893617, "loss": 8.7747, "step": 191 }, { "epoch": 0.024574427236656853, "grad_norm": 1.4296875, "learning_rate": 0.0016340425531914894, "loss": 8.3365, "step": 192 }, { "epoch": 0.02470241904518111, "grad_norm": 1.6015625, "learning_rate": 0.0016425531914893616, "loss": 8.3302, "step": 193 }, { "epoch": 0.024830410853705364, "grad_norm": 3.25, "learning_rate": 0.001651063829787234, "loss": 8.8121, "step": 194 }, { "epoch": 0.024958402662229616, "grad_norm": 1.28125, "learning_rate": 0.0016595744680851063, "loss": 8.7776, "step": 195 }, { "epoch": 0.02508639447075387, "grad_norm": 1.5078125, "learning_rate": 0.0016680851063829787, "loss": 8.5077, "step": 196 }, { "epoch": 0.025214386279278127, "grad_norm": 1.515625, "learning_rate": 0.0016765957446808512, "loss": 8.94, "step": 197 }, { "epoch": 0.025342378087802382, "grad_norm": 1.171875, "learning_rate": 0.0016851063829787234, "loss": 8.5434, "step": 198 }, { "epoch": 0.025470369896326634, "grad_norm": 1.28125, "learning_rate": 0.0016936170212765958, "loss": 8.7839, "step": 199 }, { "epoch": 0.02559836170485089, "grad_norm": 1.1953125, "learning_rate": 0.001702127659574468, "loss": 8.6118, "step": 200 }, { "epoch": 0.025726353513375145, "grad_norm": 1.484375, "learning_rate": 0.0017106382978723405, "loss": 8.5147, "step": 201 }, { "epoch": 0.025854345321899397, "grad_norm": 1.3828125, "learning_rate": 0.0017191489361702127, "loss": 8.9552, "step": 202 }, { "epoch": 0.025982337130423652, "grad_norm": 1.3125, "learning_rate": 0.0017276595744680852, "loss": 7.8895, "step": 203 }, { "epoch": 0.026110328938947908, "grad_norm": 1.8125, "learning_rate": 0.0017361702127659576, "loss": 8.7647, "step": 204 }, { "epoch": 0.026238320747472163, "grad_norm": 1.6484375, "learning_rate": 0.0017446808510638298, "loss": 8.6143, "step": 205 }, { "epoch": 0.026366312555996415, "grad_norm": 1.28125, "learning_rate": 0.0017531914893617023, "loss": 8.629, "step": 206 }, { "epoch": 0.02649430436452067, "grad_norm": 1.0625, "learning_rate": 0.0017617021276595745, "loss": 8.4275, "step": 207 }, { "epoch": 0.026622296173044926, "grad_norm": 1.84375, "learning_rate": 0.0017702127659574467, "loss": 8.3776, "step": 208 }, { "epoch": 0.02675028798156918, "grad_norm": 1.375, "learning_rate": 0.0017787234042553192, "loss": 8.4555, "step": 209 }, { "epoch": 0.026878279790093433, "grad_norm": 1.546875, "learning_rate": 0.0017872340425531914, "loss": 8.9744, "step": 210 }, { "epoch": 0.02700627159861769, "grad_norm": 1.0859375, "learning_rate": 0.0017957446808510638, "loss": 8.721, "step": 211 }, { "epoch": 0.027134263407141944, "grad_norm": 1.71875, "learning_rate": 0.001804255319148936, "loss": 8.7357, "step": 212 }, { "epoch": 0.027262255215666196, "grad_norm": 1.2734375, "learning_rate": 0.0018127659574468085, "loss": 8.2169, "step": 213 }, { "epoch": 0.02739024702419045, "grad_norm": 1.2421875, "learning_rate": 0.001821276595744681, "loss": 8.5497, "step": 214 }, { "epoch": 0.027518238832714707, "grad_norm": 1.171875, "learning_rate": 0.0018297872340425532, "loss": 8.6894, "step": 215 }, { "epoch": 0.027646230641238962, "grad_norm": 1.21875, "learning_rate": 0.0018382978723404256, "loss": 8.6858, "step": 216 }, { "epoch": 0.027774222449763214, "grad_norm": 1.5859375, "learning_rate": 0.0018468085106382978, "loss": 8.6044, "step": 217 }, { "epoch": 0.02790221425828747, "grad_norm": 1.1796875, "learning_rate": 0.0018553191489361703, "loss": 8.4637, "step": 218 }, { "epoch": 0.028030206066811725, "grad_norm": 1.625, "learning_rate": 0.0018638297872340427, "loss": 8.7671, "step": 219 }, { "epoch": 0.02815819787533598, "grad_norm": 1.203125, "learning_rate": 0.001872340425531915, "loss": 8.6098, "step": 220 }, { "epoch": 0.028286189683860232, "grad_norm": 1.1953125, "learning_rate": 0.0018808510638297874, "loss": 8.3476, "step": 221 }, { "epoch": 0.028414181492384488, "grad_norm": 2.203125, "learning_rate": 0.0018893617021276596, "loss": 8.5881, "step": 222 }, { "epoch": 0.028542173300908743, "grad_norm": 1.53125, "learning_rate": 0.001897872340425532, "loss": 9.3298, "step": 223 }, { "epoch": 0.028670165109432995, "grad_norm": 1.53125, "learning_rate": 0.0019063829787234045, "loss": 8.8214, "step": 224 }, { "epoch": 0.02879815691795725, "grad_norm": 1.234375, "learning_rate": 0.0019148936170212767, "loss": 8.4738, "step": 225 }, { "epoch": 0.028926148726481506, "grad_norm": 1.7109375, "learning_rate": 0.001923404255319149, "loss": 8.6569, "step": 226 }, { "epoch": 0.02905414053500576, "grad_norm": 1.4296875, "learning_rate": 0.0019319148936170212, "loss": 8.6104, "step": 227 }, { "epoch": 0.029182132343530013, "grad_norm": 1.078125, "learning_rate": 0.0019404255319148936, "loss": 8.1561, "step": 228 }, { "epoch": 0.02931012415205427, "grad_norm": 1.1796875, "learning_rate": 0.0019489361702127658, "loss": 8.3613, "step": 229 }, { "epoch": 0.029438115960578524, "grad_norm": 1.0078125, "learning_rate": 0.0019574468085106385, "loss": 7.9761, "step": 230 }, { "epoch": 0.029566107769102776, "grad_norm": 1.8046875, "learning_rate": 0.0019659574468085107, "loss": 7.9273, "step": 231 }, { "epoch": 0.02969409957762703, "grad_norm": 1.2734375, "learning_rate": 0.001974468085106383, "loss": 9.0582, "step": 232 }, { "epoch": 0.029822091386151287, "grad_norm": 1.0703125, "learning_rate": 0.001982978723404255, "loss": 8.6655, "step": 233 }, { "epoch": 0.029950083194675542, "grad_norm": 1.078125, "learning_rate": 0.001991489361702128, "loss": 8.4073, "step": 234 }, { "epoch": 0.030078075003199794, "grad_norm": 1.4609375, "learning_rate": 0.002, "loss": 8.0217, "step": 235 }, { "epoch": 0.03020606681172405, "grad_norm": 1.4296875, "learning_rate": 0.001999999914066888, "loss": 8.6746, "step": 236 }, { "epoch": 0.030334058620248305, "grad_norm": 1.0, "learning_rate": 0.001999999656267565, "loss": 8.306, "step": 237 }, { "epoch": 0.03046205042877256, "grad_norm": 0.87109375, "learning_rate": 0.001999999226602077, "loss": 8.6114, "step": 238 }, { "epoch": 0.030590042237296812, "grad_norm": 0.92578125, "learning_rate": 0.0019999986250704973, "loss": 8.2038, "step": 239 }, { "epoch": 0.030718034045821067, "grad_norm": 2.109375, "learning_rate": 0.001999997851672929, "loss": 8.7213, "step": 240 }, { "epoch": 0.030846025854345323, "grad_norm": 1.0078125, "learning_rate": 0.001999996906409505, "loss": 8.6042, "step": 241 }, { "epoch": 0.030974017662869575, "grad_norm": 1.1015625, "learning_rate": 0.001999995789280388, "loss": 8.2493, "step": 242 }, { "epoch": 0.03110200947139383, "grad_norm": 1.1484375, "learning_rate": 0.0019999945002857704, "loss": 8.5548, "step": 243 }, { "epoch": 0.031230001279918086, "grad_norm": 0.88671875, "learning_rate": 0.0019999930394258725, "loss": 8.5153, "step": 244 }, { "epoch": 0.03135799308844234, "grad_norm": 1.5390625, "learning_rate": 0.001999991406700947, "loss": 8.0701, "step": 245 }, { "epoch": 0.03148598489696659, "grad_norm": 1.3359375, "learning_rate": 0.0019999896021112725, "loss": 8.3185, "step": 246 }, { "epoch": 0.03161397670549085, "grad_norm": 1.0859375, "learning_rate": 0.001999987625657161, "loss": 8.3867, "step": 247 }, { "epoch": 0.031741968514015104, "grad_norm": 1.1171875, "learning_rate": 0.0019999854773389515, "loss": 8.4251, "step": 248 }, { "epoch": 0.03186996032253936, "grad_norm": 0.953125, "learning_rate": 0.001999983157157013, "loss": 8.5273, "step": 249 }, { "epoch": 0.031997952131063614, "grad_norm": 0.87890625, "learning_rate": 0.0019999806651117445, "loss": 9.0086, "step": 250 }, { "epoch": 0.03212594393958786, "grad_norm": 1.4375, "learning_rate": 0.001999978001203574, "loss": 8.4195, "step": 251 }, { "epoch": 0.03225393574811212, "grad_norm": 0.9296875, "learning_rate": 0.00199997516543296, "loss": 8.1317, "step": 252 }, { "epoch": 0.032381927556636374, "grad_norm": 1.078125, "learning_rate": 0.001999972157800389, "loss": 8.2909, "step": 253 }, { "epoch": 0.03250991936516063, "grad_norm": 1.3515625, "learning_rate": 0.001999968978306379, "loss": 8.4478, "step": 254 }, { "epoch": 0.032637911173684885, "grad_norm": 0.96484375, "learning_rate": 0.001999965626951476, "loss": 8.4762, "step": 255 }, { "epoch": 0.03276590298220914, "grad_norm": 1.3203125, "learning_rate": 0.001999962103736255, "loss": 8.2412, "step": 256 }, { "epoch": 0.032893894790733395, "grad_norm": 1.09375, "learning_rate": 0.0019999584086613227, "loss": 8.489, "step": 257 }, { "epoch": 0.03302188659925765, "grad_norm": 1.3359375, "learning_rate": 0.0019999545417273136, "loss": 8.2531, "step": 258 }, { "epoch": 0.0331498784077819, "grad_norm": 1.03125, "learning_rate": 0.0019999505029348924, "loss": 8.4081, "step": 259 }, { "epoch": 0.033277870216306155, "grad_norm": 1.8125, "learning_rate": 0.001999946292284754, "loss": 8.06, "step": 260 }, { "epoch": 0.03340586202483041, "grad_norm": 1.1640625, "learning_rate": 0.0019999419097776207, "loss": 8.3496, "step": 261 }, { "epoch": 0.033533853833354665, "grad_norm": 1.171875, "learning_rate": 0.0019999373554142465, "loss": 7.6989, "step": 262 }, { "epoch": 0.03366184564187892, "grad_norm": 1.0078125, "learning_rate": 0.0019999326291954144, "loss": 8.7002, "step": 263 }, { "epoch": 0.033789837450403176, "grad_norm": 1.2890625, "learning_rate": 0.001999927731121936, "loss": 8.364, "step": 264 }, { "epoch": 0.03391782925892743, "grad_norm": 1.5859375, "learning_rate": 0.0019999226611946536, "loss": 8.5855, "step": 265 }, { "epoch": 0.03404582106745168, "grad_norm": 0.75390625, "learning_rate": 0.0019999174194144384, "loss": 8.5421, "step": 266 }, { "epoch": 0.034173812875975935, "grad_norm": 1.15625, "learning_rate": 0.001999912005782191, "loss": 7.9455, "step": 267 }, { "epoch": 0.03430180468450019, "grad_norm": 1.265625, "learning_rate": 0.001999906420298842, "loss": 8.0294, "step": 268 }, { "epoch": 0.034429796493024446, "grad_norm": 1.1640625, "learning_rate": 0.0019999006629653517, "loss": 7.9632, "step": 269 }, { "epoch": 0.0345577883015487, "grad_norm": 1.1015625, "learning_rate": 0.001999894733782709, "loss": 8.2043, "step": 270 }, { "epoch": 0.03468578011007296, "grad_norm": 0.92578125, "learning_rate": 0.0019998886327519338, "loss": 8.4007, "step": 271 }, { "epoch": 0.03481377191859721, "grad_norm": 0.82421875, "learning_rate": 0.001999882359874074, "loss": 8.3737, "step": 272 }, { "epoch": 0.03494176372712146, "grad_norm": 1.0625, "learning_rate": 0.0019998759151502073, "loss": 8.2074, "step": 273 }, { "epoch": 0.035069755535645716, "grad_norm": 1.296875, "learning_rate": 0.001999869298581442, "loss": 7.8895, "step": 274 }, { "epoch": 0.03519774734416997, "grad_norm": 1.0078125, "learning_rate": 0.0019998625101689156, "loss": 8.2668, "step": 275 }, { "epoch": 0.03532573915269423, "grad_norm": 1.3046875, "learning_rate": 0.001999855549913794, "loss": 8.3547, "step": 276 }, { "epoch": 0.03545373096121848, "grad_norm": 1.0234375, "learning_rate": 0.0019998484178172732, "loss": 8.3062, "step": 277 }, { "epoch": 0.03558172276974274, "grad_norm": 0.859375, "learning_rate": 0.00199984111388058, "loss": 8.4267, "step": 278 }, { "epoch": 0.03570971457826699, "grad_norm": 0.94140625, "learning_rate": 0.001999833638104969, "loss": 8.2584, "step": 279 }, { "epoch": 0.03583770638679125, "grad_norm": 1.0078125, "learning_rate": 0.0019998259904917257, "loss": 8.4601, "step": 280 }, { "epoch": 0.0359656981953155, "grad_norm": 0.94921875, "learning_rate": 0.0019998181710421635, "loss": 7.9099, "step": 281 }, { "epoch": 0.03609369000383975, "grad_norm": 1.1875, "learning_rate": 0.001999810179757627, "loss": 8.3067, "step": 282 }, { "epoch": 0.03622168181236401, "grad_norm": 1.171875, "learning_rate": 0.00199980201663949, "loss": 7.9643, "step": 283 }, { "epoch": 0.03634967362088826, "grad_norm": 1.0546875, "learning_rate": 0.001999793681689154, "loss": 8.4406, "step": 284 }, { "epoch": 0.03647766542941252, "grad_norm": 0.96484375, "learning_rate": 0.0019997851749080525, "loss": 8.4436, "step": 285 }, { "epoch": 0.036605657237936774, "grad_norm": 0.9765625, "learning_rate": 0.0019997764962976475, "loss": 7.9318, "step": 286 }, { "epoch": 0.03673364904646103, "grad_norm": 1.0390625, "learning_rate": 0.0019997676458594306, "loss": 8.3004, "step": 287 }, { "epoch": 0.03686164085498528, "grad_norm": 0.90625, "learning_rate": 0.001999758623594923, "loss": 8.4003, "step": 288 }, { "epoch": 0.036989632663509533, "grad_norm": 1.0078125, "learning_rate": 0.001999749429505675, "loss": 7.9758, "step": 289 }, { "epoch": 0.03711762447203379, "grad_norm": 0.8671875, "learning_rate": 0.001999740063593267, "loss": 8.1537, "step": 290 }, { "epoch": 0.037245616280558044, "grad_norm": 1.0078125, "learning_rate": 0.001999730525859308, "loss": 7.8036, "step": 291 }, { "epoch": 0.0373736080890823, "grad_norm": 1.3671875, "learning_rate": 0.0019997208163054377, "loss": 8.3486, "step": 292 }, { "epoch": 0.037501599897606555, "grad_norm": 0.92578125, "learning_rate": 0.0019997109349333252, "loss": 8.439, "step": 293 }, { "epoch": 0.03762959170613081, "grad_norm": 1.0, "learning_rate": 0.0019997008817446687, "loss": 7.7537, "step": 294 }, { "epoch": 0.03775758351465506, "grad_norm": 1.0859375, "learning_rate": 0.0019996906567411954, "loss": 8.2072, "step": 295 }, { "epoch": 0.037885575323179314, "grad_norm": 1.03125, "learning_rate": 0.001999680259924663, "loss": 7.9602, "step": 296 }, { "epoch": 0.03801356713170357, "grad_norm": 0.8515625, "learning_rate": 0.0019996696912968587, "loss": 8.1023, "step": 297 }, { "epoch": 0.038141558940227825, "grad_norm": 0.9375, "learning_rate": 0.001999658950859598, "loss": 8.3889, "step": 298 }, { "epoch": 0.03826955074875208, "grad_norm": 0.94921875, "learning_rate": 0.001999648038614728, "loss": 8.2012, "step": 299 }, { "epoch": 0.038397542557276336, "grad_norm": 1.1171875, "learning_rate": 0.001999636954564123, "loss": 8.5743, "step": 300 }, { "epoch": 0.03852553436580059, "grad_norm": 1.1953125, "learning_rate": 0.0019996256987096887, "loss": 8.1439, "step": 301 }, { "epoch": 0.03865352617432484, "grad_norm": 1.0078125, "learning_rate": 0.0019996142710533597, "loss": 8.3462, "step": 302 }, { "epoch": 0.038781517982849095, "grad_norm": 1.1640625, "learning_rate": 0.001999602671597099, "loss": 8.2519, "step": 303 }, { "epoch": 0.03890950979137335, "grad_norm": 1.0625, "learning_rate": 0.0019995909003429017, "loss": 7.9275, "step": 304 }, { "epoch": 0.039037501599897606, "grad_norm": 0.9296875, "learning_rate": 0.0019995789572927894, "loss": 7.8728, "step": 305 }, { "epoch": 0.03916549340842186, "grad_norm": 0.75390625, "learning_rate": 0.001999566842448816, "loss": 7.7588, "step": 306 }, { "epoch": 0.03929348521694612, "grad_norm": 0.88671875, "learning_rate": 0.0019995545558130624, "loss": 7.8929, "step": 307 }, { "epoch": 0.03942147702547037, "grad_norm": 1.0390625, "learning_rate": 0.001999542097387641, "loss": 7.9069, "step": 308 }, { "epoch": 0.03954946883399463, "grad_norm": 0.90234375, "learning_rate": 0.001999529467174693, "loss": 8.1361, "step": 309 }, { "epoch": 0.039677460642518876, "grad_norm": 0.94140625, "learning_rate": 0.0019995166651763886, "loss": 8.17, "step": 310 }, { "epoch": 0.03980545245104313, "grad_norm": 1.0078125, "learning_rate": 0.001999503691394929, "loss": 8.1647, "step": 311 }, { "epoch": 0.03993344425956739, "grad_norm": 1.0625, "learning_rate": 0.0019994905458325424, "loss": 8.086, "step": 312 }, { "epoch": 0.04006143606809164, "grad_norm": 1.1015625, "learning_rate": 0.0019994772284914896, "loss": 8.39, "step": 313 }, { "epoch": 0.0401894278766159, "grad_norm": 0.890625, "learning_rate": 0.001999463739374059, "loss": 8.5374, "step": 314 }, { "epoch": 0.04031741968514015, "grad_norm": 1.078125, "learning_rate": 0.0019994500784825684, "loss": 7.6325, "step": 315 }, { "epoch": 0.04044541149366441, "grad_norm": 0.84765625, "learning_rate": 0.001999436245819366, "loss": 8.1332, "step": 316 }, { "epoch": 0.04057340330218866, "grad_norm": 1.1484375, "learning_rate": 0.001999422241386829, "loss": 8.4218, "step": 317 }, { "epoch": 0.04070139511071291, "grad_norm": 0.92578125, "learning_rate": 0.001999408065187365, "loss": 7.845, "step": 318 }, { "epoch": 0.04082938691923717, "grad_norm": 1.015625, "learning_rate": 0.0019993937172234095, "loss": 8.0856, "step": 319 }, { "epoch": 0.04095737872776142, "grad_norm": 0.93359375, "learning_rate": 0.0019993791974974285, "loss": 7.8986, "step": 320 }, { "epoch": 0.04108537053628568, "grad_norm": 0.9453125, "learning_rate": 0.001999364506011918, "loss": 7.7555, "step": 321 }, { "epoch": 0.041213362344809934, "grad_norm": 1.0625, "learning_rate": 0.0019993496427694025, "loss": 8.1576, "step": 322 }, { "epoch": 0.04134135415333419, "grad_norm": 0.9140625, "learning_rate": 0.001999334607772437, "loss": 8.3152, "step": 323 }, { "epoch": 0.04146934596185844, "grad_norm": 1.1015625, "learning_rate": 0.0019993194010236052, "loss": 7.9105, "step": 324 }, { "epoch": 0.04159733777038269, "grad_norm": 0.875, "learning_rate": 0.0019993040225255204, "loss": 8.0405, "step": 325 }, { "epoch": 0.04172532957890695, "grad_norm": 0.97265625, "learning_rate": 0.001999288472280826, "loss": 7.8964, "step": 326 }, { "epoch": 0.041853321387431204, "grad_norm": 0.96875, "learning_rate": 0.001999272750292194, "loss": 7.6787, "step": 327 }, { "epoch": 0.04198131319595546, "grad_norm": 1.1015625, "learning_rate": 0.0019992568565623276, "loss": 8.4053, "step": 328 }, { "epoch": 0.042109305004479715, "grad_norm": 1.078125, "learning_rate": 0.0019992407910939575, "loss": 8.4506, "step": 329 }, { "epoch": 0.04223729681300397, "grad_norm": 0.97265625, "learning_rate": 0.0019992245538898445, "loss": 8.1245, "step": 330 }, { "epoch": 0.04236528862152822, "grad_norm": 1.0078125, "learning_rate": 0.0019992081449527804, "loss": 7.8487, "step": 331 }, { "epoch": 0.042493280430052474, "grad_norm": 1.015625, "learning_rate": 0.0019991915642855844, "loss": 7.8075, "step": 332 }, { "epoch": 0.04262127223857673, "grad_norm": 0.98828125, "learning_rate": 0.0019991748118911063, "loss": 7.8185, "step": 333 }, { "epoch": 0.042749264047100985, "grad_norm": 1.203125, "learning_rate": 0.001999157887772225, "loss": 7.8235, "step": 334 }, { "epoch": 0.04287725585562524, "grad_norm": 0.90234375, "learning_rate": 0.0019991407919318503, "loss": 7.9787, "step": 335 }, { "epoch": 0.043005247664149496, "grad_norm": 1.171875, "learning_rate": 0.00199912352437292, "loss": 7.2365, "step": 336 }, { "epoch": 0.04313323947267375, "grad_norm": 0.98046875, "learning_rate": 0.0019991060850984006, "loss": 7.9981, "step": 337 }, { "epoch": 0.04326123128119801, "grad_norm": 1.28125, "learning_rate": 0.0019990884741112906, "loss": 8.5765, "step": 338 }, { "epoch": 0.043389223089722255, "grad_norm": 0.875, "learning_rate": 0.0019990706914146165, "loss": 8.0139, "step": 339 }, { "epoch": 0.04351721489824651, "grad_norm": 0.92578125, "learning_rate": 0.0019990527370114344, "loss": 8.0336, "step": 340 }, { "epoch": 0.043645206706770766, "grad_norm": 1.1328125, "learning_rate": 0.00199903461090483, "loss": 8.3171, "step": 341 }, { "epoch": 0.04377319851529502, "grad_norm": 1.0078125, "learning_rate": 0.0019990163130979184, "loss": 7.6641, "step": 342 }, { "epoch": 0.04390119032381928, "grad_norm": 1.1953125, "learning_rate": 0.0019989978435938447, "loss": 7.9447, "step": 343 }, { "epoch": 0.04402918213234353, "grad_norm": 1.3671875, "learning_rate": 0.001998979202395783, "loss": 8.3972, "step": 344 }, { "epoch": 0.04415717394086779, "grad_norm": 0.89453125, "learning_rate": 0.0019989603895069373, "loss": 8.1053, "step": 345 }, { "epoch": 0.044285165749392036, "grad_norm": 1.03125, "learning_rate": 0.001998941404930541, "loss": 7.4835, "step": 346 }, { "epoch": 0.04441315755791629, "grad_norm": 0.92578125, "learning_rate": 0.001998922248669856, "loss": 8.315, "step": 347 }, { "epoch": 0.04454114936644055, "grad_norm": 0.8515625, "learning_rate": 0.0019989029207281763, "loss": 7.7607, "step": 348 }, { "epoch": 0.0446691411749648, "grad_norm": 1.1953125, "learning_rate": 0.0019988834211088218, "loss": 8.0765, "step": 349 }, { "epoch": 0.04479713298348906, "grad_norm": 1.1015625, "learning_rate": 0.0019988637498151453, "loss": 7.8337, "step": 350 }, { "epoch": 0.04492512479201331, "grad_norm": 1.046875, "learning_rate": 0.0019988439068505267, "loss": 7.4024, "step": 351 }, { "epoch": 0.04505311660053757, "grad_norm": 1.09375, "learning_rate": 0.001998823892218377, "loss": 7.6323, "step": 352 }, { "epoch": 0.04518110840906182, "grad_norm": 0.98828125, "learning_rate": 0.0019988037059221354, "loss": 7.9424, "step": 353 }, { "epoch": 0.04530910021758607, "grad_norm": 0.90234375, "learning_rate": 0.001998783347965272, "loss": 7.439, "step": 354 }, { "epoch": 0.04543709202611033, "grad_norm": 0.9765625, "learning_rate": 0.001998762818351285, "loss": 7.5989, "step": 355 }, { "epoch": 0.04556508383463458, "grad_norm": 0.99609375, "learning_rate": 0.0019987421170837034, "loss": 8.237, "step": 356 }, { "epoch": 0.04569307564315884, "grad_norm": 0.95703125, "learning_rate": 0.0019987212441660842, "loss": 7.6642, "step": 357 }, { "epoch": 0.045821067451683094, "grad_norm": 1.09375, "learning_rate": 0.001998700199602015, "loss": 7.5337, "step": 358 }, { "epoch": 0.04594905926020735, "grad_norm": 1.03125, "learning_rate": 0.001998678983395113, "loss": 7.8855, "step": 359 }, { "epoch": 0.046077051068731605, "grad_norm": 0.98828125, "learning_rate": 0.0019986575955490246, "loss": 7.677, "step": 360 }, { "epoch": 0.04620504287725585, "grad_norm": 1.1953125, "learning_rate": 0.0019986360360674253, "loss": 7.7139, "step": 361 }, { "epoch": 0.04633303468578011, "grad_norm": 0.83984375, "learning_rate": 0.0019986143049540205, "loss": 7.895, "step": 362 }, { "epoch": 0.046461026494304364, "grad_norm": 1.1484375, "learning_rate": 0.001998592402212545, "loss": 8.281, "step": 363 }, { "epoch": 0.04658901830282862, "grad_norm": 0.9765625, "learning_rate": 0.0019985703278467635, "loss": 7.3744, "step": 364 }, { "epoch": 0.046717010111352875, "grad_norm": 1.1171875, "learning_rate": 0.0019985480818604695, "loss": 7.9484, "step": 365 }, { "epoch": 0.04684500191987713, "grad_norm": 1.125, "learning_rate": 0.0019985256642574864, "loss": 7.2767, "step": 366 }, { "epoch": 0.046972993728401385, "grad_norm": 0.953125, "learning_rate": 0.001998503075041667, "loss": 7.9239, "step": 367 }, { "epoch": 0.047100985536925634, "grad_norm": 1.0234375, "learning_rate": 0.001998480314216894, "loss": 7.4436, "step": 368 }, { "epoch": 0.04722897734544989, "grad_norm": 1.0546875, "learning_rate": 0.0019984573817870784, "loss": 7.6712, "step": 369 }, { "epoch": 0.047356969153974145, "grad_norm": 0.91015625, "learning_rate": 0.0019984342777561626, "loss": 7.953, "step": 370 }, { "epoch": 0.0474849609624984, "grad_norm": 1.0625, "learning_rate": 0.0019984110021281165, "loss": 7.39, "step": 371 }, { "epoch": 0.047612952771022655, "grad_norm": 1.03125, "learning_rate": 0.0019983875549069406, "loss": 7.7632, "step": 372 }, { "epoch": 0.04774094457954691, "grad_norm": 0.98828125, "learning_rate": 0.001998363936096665, "loss": 7.7866, "step": 373 }, { "epoch": 0.047868936388071166, "grad_norm": 1.078125, "learning_rate": 0.0019983401457013485, "loss": 7.5003, "step": 374 }, { "epoch": 0.047996928196595415, "grad_norm": 1.03125, "learning_rate": 0.0019983161837250807, "loss": 7.3998, "step": 375 }, { "epoch": 0.04812492000511967, "grad_norm": 0.99609375, "learning_rate": 0.0019982920501719786, "loss": 7.1467, "step": 376 }, { "epoch": 0.048252911813643926, "grad_norm": 1.0078125, "learning_rate": 0.001998267745046191, "loss": 7.8815, "step": 377 }, { "epoch": 0.04838090362216818, "grad_norm": 1.125, "learning_rate": 0.0019982432683518943, "loss": 7.6482, "step": 378 }, { "epoch": 0.048508895430692436, "grad_norm": 1.28125, "learning_rate": 0.0019982186200932965, "loss": 7.4682, "step": 379 }, { "epoch": 0.04863688723921669, "grad_norm": 1.1484375, "learning_rate": 0.0019981938002746327, "loss": 7.2471, "step": 380 }, { "epoch": 0.04876487904774095, "grad_norm": 1.1015625, "learning_rate": 0.0019981688089001685, "loss": 8.0438, "step": 381 }, { "epoch": 0.048892870856265196, "grad_norm": 1.1171875, "learning_rate": 0.0019981436459742, "loss": 7.0828, "step": 382 }, { "epoch": 0.04902086266478945, "grad_norm": 1.0625, "learning_rate": 0.0019981183115010508, "loss": 8.1522, "step": 383 }, { "epoch": 0.049148854473313706, "grad_norm": 1.171875, "learning_rate": 0.0019980928054850757, "loss": 7.5713, "step": 384 }, { "epoch": 0.04927684628183796, "grad_norm": 1.046875, "learning_rate": 0.001998067127930658, "loss": 7.7339, "step": 385 }, { "epoch": 0.04940483809036222, "grad_norm": 1.03125, "learning_rate": 0.0019980412788422114, "loss": 7.0573, "step": 386 }, { "epoch": 0.04953282989888647, "grad_norm": 1.2421875, "learning_rate": 0.0019980152582241774, "loss": 8.1425, "step": 387 }, { "epoch": 0.04966082170741073, "grad_norm": 0.98828125, "learning_rate": 0.001997989066081029, "loss": 7.5571, "step": 388 }, { "epoch": 0.04978881351593498, "grad_norm": 0.87890625, "learning_rate": 0.001997962702417268, "loss": 7.9552, "step": 389 }, { "epoch": 0.04991680532445923, "grad_norm": 1.0703125, "learning_rate": 0.001997936167237424, "loss": 7.8074, "step": 390 }, { "epoch": 0.05004479713298349, "grad_norm": 1.296875, "learning_rate": 0.001997909460546059, "loss": 7.6329, "step": 391 }, { "epoch": 0.05017278894150774, "grad_norm": 1.046875, "learning_rate": 0.0019978825823477623, "loss": 7.607, "step": 392 }, { "epoch": 0.050300780750032, "grad_norm": 1.125, "learning_rate": 0.0019978555326471536, "loss": 7.5571, "step": 393 }, { "epoch": 0.050428772558556254, "grad_norm": 1.0078125, "learning_rate": 0.001997828311448881, "loss": 7.5765, "step": 394 }, { "epoch": 0.05055676436708051, "grad_norm": 1.0, "learning_rate": 0.001997800918757624, "loss": 7.6145, "step": 395 }, { "epoch": 0.050684756175604764, "grad_norm": 0.890625, "learning_rate": 0.00199777335457809, "loss": 7.7618, "step": 396 }, { "epoch": 0.05081274798412901, "grad_norm": 1.09375, "learning_rate": 0.0019977456189150164, "loss": 7.5914, "step": 397 }, { "epoch": 0.05094073979265327, "grad_norm": 0.9921875, "learning_rate": 0.00199771771177317, "loss": 7.3126, "step": 398 }, { "epoch": 0.051068731601177524, "grad_norm": 1.1484375, "learning_rate": 0.001997689633157347, "loss": 7.1157, "step": 399 }, { "epoch": 0.05119672340970178, "grad_norm": 1.03125, "learning_rate": 0.0019976613830723733, "loss": 7.4847, "step": 400 }, { "epoch": 0.051324715218226034, "grad_norm": 1.3828125, "learning_rate": 0.0019976329615231042, "loss": 7.9189, "step": 401 }, { "epoch": 0.05145270702675029, "grad_norm": 1.421875, "learning_rate": 0.0019976043685144243, "loss": 7.7608, "step": 402 }, { "epoch": 0.051580698835274545, "grad_norm": 1.1015625, "learning_rate": 0.0019975756040512475, "loss": 7.4357, "step": 403 }, { "epoch": 0.051708690643798794, "grad_norm": 1.265625, "learning_rate": 0.0019975466681385183, "loss": 7.6623, "step": 404 }, { "epoch": 0.05183668245232305, "grad_norm": 1.578125, "learning_rate": 0.0019975175607812087, "loss": 7.4823, "step": 405 }, { "epoch": 0.051964674260847304, "grad_norm": 1.5703125, "learning_rate": 0.0019974882819843217, "loss": 7.1651, "step": 406 }, { "epoch": 0.05209266606937156, "grad_norm": 1.0234375, "learning_rate": 0.00199745883175289, "loss": 7.367, "step": 407 }, { "epoch": 0.052220657877895815, "grad_norm": 1.203125, "learning_rate": 0.001997429210091974, "loss": 7.6021, "step": 408 }, { "epoch": 0.05234864968642007, "grad_norm": 1.390625, "learning_rate": 0.0019973994170066653, "loss": 7.5098, "step": 409 }, { "epoch": 0.052476641494944326, "grad_norm": 1.09375, "learning_rate": 0.0019973694525020846, "loss": 7.162, "step": 410 }, { "epoch": 0.05260463330346858, "grad_norm": 1.640625, "learning_rate": 0.001997339316583381, "loss": 7.7455, "step": 411 }, { "epoch": 0.05273262511199283, "grad_norm": 1.1484375, "learning_rate": 0.0019973090092557343, "loss": 7.2754, "step": 412 }, { "epoch": 0.052860616920517085, "grad_norm": 0.84765625, "learning_rate": 0.0019972785305243535, "loss": 7.4922, "step": 413 }, { "epoch": 0.05298860872904134, "grad_norm": 1.1953125, "learning_rate": 0.0019972478803944767, "loss": 7.5516, "step": 414 }, { "epoch": 0.053116600537565596, "grad_norm": 1.2265625, "learning_rate": 0.0019972170588713712, "loss": 7.6435, "step": 415 }, { "epoch": 0.05324459234608985, "grad_norm": 0.91796875, "learning_rate": 0.0019971860659603346, "loss": 7.5155, "step": 416 }, { "epoch": 0.05337258415461411, "grad_norm": 1.1875, "learning_rate": 0.0019971549016666937, "loss": 6.827, "step": 417 }, { "epoch": 0.05350057596313836, "grad_norm": 1.1875, "learning_rate": 0.0019971235659958044, "loss": 7.3864, "step": 418 }, { "epoch": 0.05362856777166261, "grad_norm": 1.4921875, "learning_rate": 0.001997092058953052, "loss": 7.1051, "step": 419 }, { "epoch": 0.053756559580186866, "grad_norm": 0.953125, "learning_rate": 0.0019970603805438514, "loss": 7.9602, "step": 420 }, { "epoch": 0.05388455138871112, "grad_norm": 1.1328125, "learning_rate": 0.0019970285307736478, "loss": 7.4691, "step": 421 }, { "epoch": 0.05401254319723538, "grad_norm": 1.0859375, "learning_rate": 0.001996996509647914, "loss": 7.1073, "step": 422 }, { "epoch": 0.05414053500575963, "grad_norm": 1.1796875, "learning_rate": 0.001996964317172155, "loss": 7.0894, "step": 423 }, { "epoch": 0.05426852681428389, "grad_norm": 1.2578125, "learning_rate": 0.0019969319533519017, "loss": 7.395, "step": 424 }, { "epoch": 0.05439651862280814, "grad_norm": 1.0078125, "learning_rate": 0.0019968994181927178, "loss": 7.2452, "step": 425 }, { "epoch": 0.05452451043133239, "grad_norm": 1.1328125, "learning_rate": 0.0019968667117001944, "loss": 6.8562, "step": 426 }, { "epoch": 0.05465250223985665, "grad_norm": 1.3515625, "learning_rate": 0.0019968338338799524, "loss": 7.2328, "step": 427 }, { "epoch": 0.0547804940483809, "grad_norm": 1.59375, "learning_rate": 0.0019968007847376425, "loss": 7.0666, "step": 428 }, { "epoch": 0.05490848585690516, "grad_norm": 1.21875, "learning_rate": 0.001996767564278945, "loss": 7.1076, "step": 429 }, { "epoch": 0.05503647766542941, "grad_norm": 1.1484375, "learning_rate": 0.00199673417250957, "loss": 7.576, "step": 430 }, { "epoch": 0.05516446947395367, "grad_norm": 1.3359375, "learning_rate": 0.001996700609435255, "loss": 7.3135, "step": 431 }, { "epoch": 0.055292461282477924, "grad_norm": 1.1015625, "learning_rate": 0.0019966668750617685, "loss": 7.4828, "step": 432 }, { "epoch": 0.05542045309100217, "grad_norm": 1.140625, "learning_rate": 0.0019966329693949098, "loss": 7.2372, "step": 433 }, { "epoch": 0.05554844489952643, "grad_norm": 1.21875, "learning_rate": 0.0019965988924405048, "loss": 7.0769, "step": 434 }, { "epoch": 0.05567643670805068, "grad_norm": 1.0078125, "learning_rate": 0.0019965646442044105, "loss": 7.2683, "step": 435 }, { "epoch": 0.05580442851657494, "grad_norm": 0.95703125, "learning_rate": 0.0019965302246925136, "loss": 7.0883, "step": 436 }, { "epoch": 0.055932420325099194, "grad_norm": 1.25, "learning_rate": 0.001996495633910728, "loss": 6.5323, "step": 437 }, { "epoch": 0.05606041213362345, "grad_norm": 0.953125, "learning_rate": 0.001996460871865001, "loss": 6.7232, "step": 438 }, { "epoch": 0.056188403942147705, "grad_norm": 1.1484375, "learning_rate": 0.001996425938561305, "loss": 7.2941, "step": 439 }, { "epoch": 0.05631639575067196, "grad_norm": 1.4921875, "learning_rate": 0.001996390834005645, "loss": 7.2678, "step": 440 }, { "epoch": 0.05644438755919621, "grad_norm": 1.21875, "learning_rate": 0.0019963555582040546, "loss": 6.7167, "step": 441 }, { "epoch": 0.056572379367720464, "grad_norm": 1.0078125, "learning_rate": 0.001996320111162595, "loss": 6.805, "step": 442 }, { "epoch": 0.05670037117624472, "grad_norm": 0.9609375, "learning_rate": 0.0019962844928873595, "loss": 7.389, "step": 443 }, { "epoch": 0.056828362984768975, "grad_norm": 0.9453125, "learning_rate": 0.0019962487033844695, "loss": 6.7295, "step": 444 }, { "epoch": 0.05695635479329323, "grad_norm": 1.125, "learning_rate": 0.001996212742660076, "loss": 6.6994, "step": 445 }, { "epoch": 0.057084346601817486, "grad_norm": 1.265625, "learning_rate": 0.0019961766107203594, "loss": 7.1095, "step": 446 }, { "epoch": 0.05721233841034174, "grad_norm": 1.0234375, "learning_rate": 0.001996140307571529, "loss": 6.4544, "step": 447 }, { "epoch": 0.05734033021886599, "grad_norm": 1.03125, "learning_rate": 0.0019961038332198255, "loss": 6.6338, "step": 448 }, { "epoch": 0.057468322027390245, "grad_norm": 1.28125, "learning_rate": 0.001996067187671516, "loss": 7.0341, "step": 449 }, { "epoch": 0.0575963138359145, "grad_norm": 1.34375, "learning_rate": 0.0019960303709328996, "loss": 6.896, "step": 450 }, { "epoch": 0.057724305644438756, "grad_norm": 1.3046875, "learning_rate": 0.0019959933830103033, "loss": 6.3652, "step": 451 }, { "epoch": 0.05785229745296301, "grad_norm": 1.1953125, "learning_rate": 0.0019959562239100846, "loss": 6.6138, "step": 452 }, { "epoch": 0.05798028926148727, "grad_norm": 1.1640625, "learning_rate": 0.00199591889363863, "loss": 6.873, "step": 453 }, { "epoch": 0.05810828107001152, "grad_norm": 1.046875, "learning_rate": 0.0019958813922023545, "loss": 7.1165, "step": 454 }, { "epoch": 0.05823627287853577, "grad_norm": 1.2421875, "learning_rate": 0.001995843719607704, "loss": 7.2551, "step": 455 }, { "epoch": 0.058364264687060026, "grad_norm": 1.0078125, "learning_rate": 0.001995805875861153, "loss": 6.3529, "step": 456 }, { "epoch": 0.05849225649558428, "grad_norm": 1.09375, "learning_rate": 0.0019957678609692055, "loss": 6.6698, "step": 457 }, { "epoch": 0.05862024830410854, "grad_norm": 1.484375, "learning_rate": 0.001995729674938395, "loss": 6.826, "step": 458 }, { "epoch": 0.05874824011263279, "grad_norm": 0.97265625, "learning_rate": 0.0019956913177752843, "loss": 6.489, "step": 459 }, { "epoch": 0.05887623192115705, "grad_norm": 1.3671875, "learning_rate": 0.001995652789486466, "loss": 6.3699, "step": 460 }, { "epoch": 0.0590042237296813, "grad_norm": 1.328125, "learning_rate": 0.0019956140900785615, "loss": 6.7426, "step": 461 }, { "epoch": 0.05913221553820555, "grad_norm": 1.3515625, "learning_rate": 0.001995575219558222, "loss": 7.3886, "step": 462 }, { "epoch": 0.05926020734672981, "grad_norm": 1.0859375, "learning_rate": 0.0019955361779321282, "loss": 6.8035, "step": 463 }, { "epoch": 0.05938819915525406, "grad_norm": 1.078125, "learning_rate": 0.0019954969652069897, "loss": 6.5063, "step": 464 }, { "epoch": 0.05951619096377832, "grad_norm": 1.2734375, "learning_rate": 0.001995457581389546, "loss": 6.6054, "step": 465 }, { "epoch": 0.05964418277230257, "grad_norm": 1.2890625, "learning_rate": 0.0019954180264865656, "loss": 6.6185, "step": 466 }, { "epoch": 0.05977217458082683, "grad_norm": 1.3671875, "learning_rate": 0.0019953783005048475, "loss": 6.3587, "step": 467 }, { "epoch": 0.059900166389351084, "grad_norm": 1.5234375, "learning_rate": 0.0019953384034512183, "loss": 6.8593, "step": 468 }, { "epoch": 0.06002815819787534, "grad_norm": 1.0234375, "learning_rate": 0.001995298335332536, "loss": 7.2911, "step": 469 }, { "epoch": 0.06015615000639959, "grad_norm": 1.109375, "learning_rate": 0.0019952580961556856, "loss": 6.387, "step": 470 }, { "epoch": 0.06028414181492384, "grad_norm": 1.015625, "learning_rate": 0.0019952176859275835, "loss": 6.2956, "step": 471 }, { "epoch": 0.0604121336234481, "grad_norm": 0.97265625, "learning_rate": 0.0019951771046551755, "loss": 6.6801, "step": 472 }, { "epoch": 0.060540125431972354, "grad_norm": 0.99609375, "learning_rate": 0.001995136352345435, "loss": 6.5915, "step": 473 }, { "epoch": 0.06066811724049661, "grad_norm": 1.4140625, "learning_rate": 0.001995095429005367, "loss": 6.4799, "step": 474 }, { "epoch": 0.060796109049020865, "grad_norm": 1.21875, "learning_rate": 0.0019950543346420042, "loss": 6.1293, "step": 475 }, { "epoch": 0.06092410085754512, "grad_norm": 1.5, "learning_rate": 0.001995013069262409, "loss": 6.9277, "step": 476 }, { "epoch": 0.06105209266606937, "grad_norm": 1.359375, "learning_rate": 0.0019949716328736744, "loss": 6.5933, "step": 477 }, { "epoch": 0.061180084474593624, "grad_norm": 1.125, "learning_rate": 0.0019949300254829216, "loss": 7.1021, "step": 478 }, { "epoch": 0.06130807628311788, "grad_norm": 1.3515625, "learning_rate": 0.001994888247097301, "loss": 6.2418, "step": 479 }, { "epoch": 0.061436068091642135, "grad_norm": 1.0390625, "learning_rate": 0.0019948462977239937, "loss": 6.8638, "step": 480 }, { "epoch": 0.06156405990016639, "grad_norm": 1.3125, "learning_rate": 0.001994804177370209, "loss": 6.6927, "step": 481 }, { "epoch": 0.061692051708690646, "grad_norm": 1.1640625, "learning_rate": 0.0019947618860431857, "loss": 5.5488, "step": 482 }, { "epoch": 0.0618200435172149, "grad_norm": 2.453125, "learning_rate": 0.0019947194237501928, "loss": 6.6076, "step": 483 }, { "epoch": 0.06194803532573915, "grad_norm": 1.1875, "learning_rate": 0.0019946767904985277, "loss": 6.5381, "step": 484 }, { "epoch": 0.062076027134263405, "grad_norm": 1.2578125, "learning_rate": 0.0019946339862955175, "loss": 5.8563, "step": 485 }, { "epoch": 0.06220401894278766, "grad_norm": 1.578125, "learning_rate": 0.0019945910111485196, "loss": 6.5543, "step": 486 }, { "epoch": 0.062332010751311916, "grad_norm": 1.265625, "learning_rate": 0.001994547865064919, "loss": 6.2423, "step": 487 }, { "epoch": 0.06246000255983617, "grad_norm": 1.3203125, "learning_rate": 0.0019945045480521317, "loss": 6.5485, "step": 488 }, { "epoch": 0.06258799436836042, "grad_norm": 1.0546875, "learning_rate": 0.001994461060117602, "loss": 6.746, "step": 489 }, { "epoch": 0.06271598617688467, "grad_norm": 1.6015625, "learning_rate": 0.0019944174012688047, "loss": 6.1284, "step": 490 }, { "epoch": 0.06284397798540893, "grad_norm": 1.1328125, "learning_rate": 0.0019943735715132424, "loss": 6.4353, "step": 491 }, { "epoch": 0.06297196979393319, "grad_norm": 1.1953125, "learning_rate": 0.0019943295708584486, "loss": 6.1249, "step": 492 }, { "epoch": 0.06309996160245744, "grad_norm": 1.78125, "learning_rate": 0.001994285399311985, "loss": 6.3034, "step": 493 }, { "epoch": 0.0632279534109817, "grad_norm": 1.171875, "learning_rate": 0.001994241056881444, "loss": 6.1632, "step": 494 }, { "epoch": 0.06335594521950595, "grad_norm": 1.1875, "learning_rate": 0.0019941965435744456, "loss": 6.0351, "step": 495 }, { "epoch": 0.06348393702803021, "grad_norm": 1.0625, "learning_rate": 0.001994151859398641, "loss": 6.5344, "step": 496 }, { "epoch": 0.06361192883655446, "grad_norm": 0.99609375, "learning_rate": 0.001994107004361709, "loss": 6.8119, "step": 497 }, { "epoch": 0.06373992064507872, "grad_norm": 1.234375, "learning_rate": 0.001994061978471359, "loss": 6.022, "step": 498 }, { "epoch": 0.06386791245360297, "grad_norm": 1.734375, "learning_rate": 0.00199401678173533, "loss": 6.316, "step": 499 }, { "epoch": 0.06399590426212723, "grad_norm": 1.2578125, "learning_rate": 0.0019939714141613894, "loss": 7.0251, "step": 500 }, { "epoch": 0.06412389607065148, "grad_norm": 1.6953125, "learning_rate": 0.0019939258757573344, "loss": 6.5996, "step": 501 }, { "epoch": 0.06425188787917573, "grad_norm": 1.328125, "learning_rate": 0.001993880166530991, "loss": 6.4581, "step": 502 }, { "epoch": 0.06437987968769998, "grad_norm": 1.625, "learning_rate": 0.001993834286490216, "loss": 6.393, "step": 503 }, { "epoch": 0.06450787149622424, "grad_norm": 1.2265625, "learning_rate": 0.001993788235642894, "loss": 5.9731, "step": 504 }, { "epoch": 0.06463586330474849, "grad_norm": 1.0859375, "learning_rate": 0.0019937420139969396, "loss": 5.8187, "step": 505 }, { "epoch": 0.06476385511327275, "grad_norm": 1.2578125, "learning_rate": 0.001993695621560297, "loss": 6.6461, "step": 506 }, { "epoch": 0.064891846921797, "grad_norm": 1.296875, "learning_rate": 0.0019936490583409394, "loss": 6.7567, "step": 507 }, { "epoch": 0.06501983873032126, "grad_norm": 1.125, "learning_rate": 0.001993602324346869, "loss": 7.0416, "step": 508 }, { "epoch": 0.06514783053884551, "grad_norm": 0.99609375, "learning_rate": 0.0019935554195861186, "loss": 6.1935, "step": 509 }, { "epoch": 0.06527582234736977, "grad_norm": 1.0703125, "learning_rate": 0.0019935083440667492, "loss": 5.8587, "step": 510 }, { "epoch": 0.06540381415589402, "grad_norm": 1.0703125, "learning_rate": 0.001993461097796851, "loss": 6.0048, "step": 511 }, { "epoch": 0.06553180596441828, "grad_norm": 1.4375, "learning_rate": 0.001993413680784545, "loss": 6.3664, "step": 512 }, { "epoch": 0.06565979777294254, "grad_norm": 1.2265625, "learning_rate": 0.00199336609303798, "loss": 5.6121, "step": 513 }, { "epoch": 0.06578778958146679, "grad_norm": 1.140625, "learning_rate": 0.0019933183345653346, "loss": 5.7348, "step": 514 }, { "epoch": 0.06591578138999105, "grad_norm": 1.109375, "learning_rate": 0.001993270405374817, "loss": 5.0338, "step": 515 }, { "epoch": 0.0660437731985153, "grad_norm": 1.296875, "learning_rate": 0.001993222305474665, "loss": 6.3537, "step": 516 }, { "epoch": 0.06617176500703954, "grad_norm": 1.265625, "learning_rate": 0.0019931740348731446, "loss": 6.5067, "step": 517 }, { "epoch": 0.0662997568155638, "grad_norm": 1.234375, "learning_rate": 0.0019931255935785523, "loss": 6.4928, "step": 518 }, { "epoch": 0.06642774862408805, "grad_norm": 1.0546875, "learning_rate": 0.0019930769815992137, "loss": 6.11, "step": 519 }, { "epoch": 0.06655574043261231, "grad_norm": 1.109375, "learning_rate": 0.001993028198943483, "loss": 6.3656, "step": 520 }, { "epoch": 0.06668373224113656, "grad_norm": 1.734375, "learning_rate": 0.0019929792456197457, "loss": 5.7358, "step": 521 }, { "epoch": 0.06681172404966082, "grad_norm": 1.2265625, "learning_rate": 0.001992930121636413, "loss": 6.2351, "step": 522 }, { "epoch": 0.06693971585818508, "grad_norm": 1.0234375, "learning_rate": 0.0019928808270019294, "loss": 6.9005, "step": 523 }, { "epoch": 0.06706770766670933, "grad_norm": 1.25, "learning_rate": 0.0019928313617247665, "loss": 6.2306, "step": 524 }, { "epoch": 0.06719569947523359, "grad_norm": 0.91015625, "learning_rate": 0.0019927817258134257, "loss": 5.0563, "step": 525 }, { "epoch": 0.06732369128375784, "grad_norm": 1.2265625, "learning_rate": 0.0019927319192764373, "loss": 6.1055, "step": 526 }, { "epoch": 0.0674516830922821, "grad_norm": 1.2578125, "learning_rate": 0.001992681942122362, "loss": 5.4302, "step": 527 }, { "epoch": 0.06757967490080635, "grad_norm": 1.1171875, "learning_rate": 0.001992631794359789, "loss": 5.721, "step": 528 }, { "epoch": 0.06770766670933061, "grad_norm": 2.296875, "learning_rate": 0.001992581475997337, "loss": 5.5592, "step": 529 }, { "epoch": 0.06783565851785486, "grad_norm": 1.25, "learning_rate": 0.0019925309870436536, "loss": 4.6098, "step": 530 }, { "epoch": 0.06796365032637912, "grad_norm": 1.78125, "learning_rate": 0.0019924803275074166, "loss": 6.1123, "step": 531 }, { "epoch": 0.06809164213490336, "grad_norm": 1.1953125, "learning_rate": 0.0019924294973973325, "loss": 5.4104, "step": 532 }, { "epoch": 0.06821963394342762, "grad_norm": 1.125, "learning_rate": 0.0019923784967221378, "loss": 5.9449, "step": 533 }, { "epoch": 0.06834762575195187, "grad_norm": 0.95703125, "learning_rate": 0.0019923273254905966, "loss": 6.1232, "step": 534 }, { "epoch": 0.06847561756047613, "grad_norm": 0.90234375, "learning_rate": 0.001992275983711505, "loss": 6.0036, "step": 535 }, { "epoch": 0.06860360936900038, "grad_norm": 1.0078125, "learning_rate": 0.0019922244713936857, "loss": 6.3455, "step": 536 }, { "epoch": 0.06873160117752464, "grad_norm": 1.203125, "learning_rate": 0.0019921727885459928, "loss": 5.4057, "step": 537 }, { "epoch": 0.06885959298604889, "grad_norm": 0.91015625, "learning_rate": 0.0019921209351773076, "loss": 5.4196, "step": 538 }, { "epoch": 0.06898758479457315, "grad_norm": 1.484375, "learning_rate": 0.0019920689112965435, "loss": 5.6834, "step": 539 }, { "epoch": 0.0691155766030974, "grad_norm": 1.03125, "learning_rate": 0.0019920167169126405, "loss": 6.2026, "step": 540 }, { "epoch": 0.06924356841162166, "grad_norm": 0.89453125, "learning_rate": 0.0019919643520345696, "loss": 5.4044, "step": 541 }, { "epoch": 0.06937156022014591, "grad_norm": 1.0390625, "learning_rate": 0.0019919118166713307, "loss": 6.3794, "step": 542 }, { "epoch": 0.06949955202867017, "grad_norm": 1.0859375, "learning_rate": 0.0019918591108319525, "loss": 5.7818, "step": 543 }, { "epoch": 0.06962754383719442, "grad_norm": 1.0703125, "learning_rate": 0.0019918062345254933, "loss": 4.6935, "step": 544 }, { "epoch": 0.06975553564571868, "grad_norm": 0.88671875, "learning_rate": 0.001991753187761041, "loss": 5.5346, "step": 545 }, { "epoch": 0.06988352745424292, "grad_norm": 1.125, "learning_rate": 0.001991699970547712, "loss": 5.6045, "step": 546 }, { "epoch": 0.07001151926276718, "grad_norm": 1.015625, "learning_rate": 0.0019916465828946532, "loss": 5.8812, "step": 547 }, { "epoch": 0.07013951107129143, "grad_norm": 1.046875, "learning_rate": 0.00199159302481104, "loss": 5.8691, "step": 548 }, { "epoch": 0.07026750287981569, "grad_norm": 1.0234375, "learning_rate": 0.0019915392963060774, "loss": 6.3146, "step": 549 }, { "epoch": 0.07039549468833994, "grad_norm": 1.0703125, "learning_rate": 0.0019914853973889987, "loss": 5.5429, "step": 550 }, { "epoch": 0.0705234864968642, "grad_norm": 0.828125, "learning_rate": 0.001991431328069068, "loss": 5.3731, "step": 551 }, { "epoch": 0.07065147830538845, "grad_norm": 0.94921875, "learning_rate": 0.001991377088355578, "loss": 5.0177, "step": 552 }, { "epoch": 0.07077947011391271, "grad_norm": 0.9453125, "learning_rate": 0.0019913226782578507, "loss": 5.3153, "step": 553 }, { "epoch": 0.07090746192243697, "grad_norm": 1.078125, "learning_rate": 0.0019912680977852365, "loss": 5.8298, "step": 554 }, { "epoch": 0.07103545373096122, "grad_norm": 0.96484375, "learning_rate": 0.0019912133469471172, "loss": 5.7878, "step": 555 }, { "epoch": 0.07116344553948548, "grad_norm": 0.87890625, "learning_rate": 0.001991158425752902, "loss": 5.3954, "step": 556 }, { "epoch": 0.07129143734800973, "grad_norm": 0.77734375, "learning_rate": 0.0019911033342120297, "loss": 4.953, "step": 557 }, { "epoch": 0.07141942915653399, "grad_norm": 0.94921875, "learning_rate": 0.001991048072333969, "loss": 5.0921, "step": 558 }, { "epoch": 0.07154742096505824, "grad_norm": 1.2109375, "learning_rate": 0.001990992640128218, "loss": 5.314, "step": 559 }, { "epoch": 0.0716754127735825, "grad_norm": 1.0625, "learning_rate": 0.001990937037604303, "loss": 6.0037, "step": 560 }, { "epoch": 0.07180340458210674, "grad_norm": 1.1328125, "learning_rate": 0.0019908812647717804, "loss": 5.4362, "step": 561 }, { "epoch": 0.071931396390631, "grad_norm": 1.0625, "learning_rate": 0.0019908253216402356, "loss": 5.5438, "step": 562 }, { "epoch": 0.07205938819915525, "grad_norm": 1.2109375, "learning_rate": 0.001990769208219283, "loss": 5.5765, "step": 563 }, { "epoch": 0.0721873800076795, "grad_norm": 1.2265625, "learning_rate": 0.001990712924518567, "loss": 5.669, "step": 564 }, { "epoch": 0.07231537181620376, "grad_norm": 0.890625, "learning_rate": 0.0019906564705477613, "loss": 5.5131, "step": 565 }, { "epoch": 0.07244336362472802, "grad_norm": 1.0546875, "learning_rate": 0.001990599846316568, "loss": 4.9516, "step": 566 }, { "epoch": 0.07257135543325227, "grad_norm": 1.1640625, "learning_rate": 0.0019905430518347184, "loss": 6.1631, "step": 567 }, { "epoch": 0.07269934724177653, "grad_norm": 0.98828125, "learning_rate": 0.0019904860871119744, "loss": 6.1165, "step": 568 }, { "epoch": 0.07282733905030078, "grad_norm": 0.8671875, "learning_rate": 0.0019904289521581256, "loss": 4.8725, "step": 569 }, { "epoch": 0.07295533085882504, "grad_norm": 0.82421875, "learning_rate": 0.0019903716469829922, "loss": 4.482, "step": 570 }, { "epoch": 0.07308332266734929, "grad_norm": 0.9140625, "learning_rate": 0.0019903141715964232, "loss": 5.8826, "step": 571 }, { "epoch": 0.07321131447587355, "grad_norm": 1.03125, "learning_rate": 0.0019902565260082952, "loss": 5.0312, "step": 572 }, { "epoch": 0.0733393062843978, "grad_norm": 0.9453125, "learning_rate": 0.0019901987102285174, "loss": 5.3519, "step": 573 }, { "epoch": 0.07346729809292206, "grad_norm": 0.98828125, "learning_rate": 0.001990140724267025, "loss": 5.8923, "step": 574 }, { "epoch": 0.0735952899014463, "grad_norm": 1.125, "learning_rate": 0.001990082568133784, "loss": 5.8639, "step": 575 }, { "epoch": 0.07372328170997056, "grad_norm": 0.9921875, "learning_rate": 0.0019900242418387903, "loss": 5.8164, "step": 576 }, { "epoch": 0.07385127351849481, "grad_norm": 0.9375, "learning_rate": 0.0019899657453920677, "loss": 5.5802, "step": 577 }, { "epoch": 0.07397926532701907, "grad_norm": 1.0546875, "learning_rate": 0.00198990707880367, "loss": 5.3828, "step": 578 }, { "epoch": 0.07410725713554332, "grad_norm": 0.89453125, "learning_rate": 0.0019898482420836797, "loss": 5.7168, "step": 579 }, { "epoch": 0.07423524894406758, "grad_norm": 0.91015625, "learning_rate": 0.0019897892352422086, "loss": 5.8738, "step": 580 }, { "epoch": 0.07436324075259183, "grad_norm": 1.046875, "learning_rate": 0.001989730058289399, "loss": 6.1339, "step": 581 }, { "epoch": 0.07449123256111609, "grad_norm": 0.98046875, "learning_rate": 0.00198967071123542, "loss": 5.2345, "step": 582 }, { "epoch": 0.07461922436964034, "grad_norm": 0.9453125, "learning_rate": 0.0019896111940904725, "loss": 5.3292, "step": 583 }, { "epoch": 0.0747472161781646, "grad_norm": 0.8515625, "learning_rate": 0.001989551506864785, "loss": 5.065, "step": 584 }, { "epoch": 0.07487520798668885, "grad_norm": 0.890625, "learning_rate": 0.001989491649568616, "loss": 4.7571, "step": 585 }, { "epoch": 0.07500319979521311, "grad_norm": 0.95703125, "learning_rate": 0.0019894316222122525, "loss": 5.265, "step": 586 }, { "epoch": 0.07513119160373737, "grad_norm": 0.765625, "learning_rate": 0.0019893714248060113, "loss": 4.7523, "step": 587 }, { "epoch": 0.07525918341226162, "grad_norm": 0.9140625, "learning_rate": 0.0019893110573602386, "loss": 5.5748, "step": 588 }, { "epoch": 0.07538717522078588, "grad_norm": 0.7421875, "learning_rate": 0.00198925051988531, "loss": 4.8974, "step": 589 }, { "epoch": 0.07551516702931012, "grad_norm": 0.62890625, "learning_rate": 0.0019891898123916285, "loss": 5.3623, "step": 590 }, { "epoch": 0.07564315883783437, "grad_norm": 0.83203125, "learning_rate": 0.001989128934889628, "loss": 5.249, "step": 591 }, { "epoch": 0.07577115064635863, "grad_norm": 0.890625, "learning_rate": 0.001989067887389772, "loss": 5.1926, "step": 592 }, { "epoch": 0.07589914245488288, "grad_norm": 0.98046875, "learning_rate": 0.001989006669902553, "loss": 5.4034, "step": 593 }, { "epoch": 0.07602713426340714, "grad_norm": 0.84375, "learning_rate": 0.0019889452824384908, "loss": 6.3733, "step": 594 }, { "epoch": 0.0761551260719314, "grad_norm": 0.96484375, "learning_rate": 0.001988883725008136, "loss": 5.3162, "step": 595 }, { "epoch": 0.07628311788045565, "grad_norm": 0.9140625, "learning_rate": 0.001988821997622069, "loss": 4.7493, "step": 596 }, { "epoch": 0.0764111096889799, "grad_norm": 0.83984375, "learning_rate": 0.0019887601002908984, "loss": 5.9357, "step": 597 }, { "epoch": 0.07653910149750416, "grad_norm": 1.078125, "learning_rate": 0.001988698033025262, "loss": 5.8667, "step": 598 }, { "epoch": 0.07666709330602842, "grad_norm": 0.82421875, "learning_rate": 0.0019886357958358272, "loss": 4.9038, "step": 599 }, { "epoch": 0.07679508511455267, "grad_norm": 0.96484375, "learning_rate": 0.0019885733887332908, "loss": 5.0805, "step": 600 }, { "epoch": 0.07692307692307693, "grad_norm": 0.77734375, "learning_rate": 0.001988510811728378, "loss": 4.9848, "step": 601 }, { "epoch": 0.07705106873160118, "grad_norm": 0.953125, "learning_rate": 0.0019884480648318438, "loss": 4.7929, "step": 602 }, { "epoch": 0.07717906054012544, "grad_norm": 0.69140625, "learning_rate": 0.0019883851480544724, "loss": 5.4275, "step": 603 }, { "epoch": 0.07730705234864968, "grad_norm": 0.79296875, "learning_rate": 0.0019883220614070767, "loss": 5.4174, "step": 604 }, { "epoch": 0.07743504415717394, "grad_norm": 1.0703125, "learning_rate": 0.0019882588049005, "loss": 5.7157, "step": 605 }, { "epoch": 0.07756303596569819, "grad_norm": 1.1171875, "learning_rate": 0.001988195378545613, "loss": 5.2198, "step": 606 }, { "epoch": 0.07769102777422245, "grad_norm": 1.1015625, "learning_rate": 0.0019881317823533173, "loss": 5.3102, "step": 607 }, { "epoch": 0.0778190195827467, "grad_norm": 0.94921875, "learning_rate": 0.001988068016334542, "loss": 4.9465, "step": 608 }, { "epoch": 0.07794701139127096, "grad_norm": 1.109375, "learning_rate": 0.0019880040805002478, "loss": 5.0658, "step": 609 }, { "epoch": 0.07807500319979521, "grad_norm": 0.9453125, "learning_rate": 0.001987939974861422, "loss": 5.0365, "step": 610 }, { "epoch": 0.07820299500831947, "grad_norm": 0.81640625, "learning_rate": 0.001987875699429082, "loss": 5.0959, "step": 611 }, { "epoch": 0.07833098681684372, "grad_norm": 0.83984375, "learning_rate": 0.0019878112542142754, "loss": 4.6309, "step": 612 }, { "epoch": 0.07845897862536798, "grad_norm": 0.78515625, "learning_rate": 0.001987746639228077, "loss": 4.9451, "step": 613 }, { "epoch": 0.07858697043389223, "grad_norm": 1.0703125, "learning_rate": 0.001987681854481594, "loss": 5.8263, "step": 614 }, { "epoch": 0.07871496224241649, "grad_norm": 0.82421875, "learning_rate": 0.0019876168999859584, "loss": 4.6283, "step": 615 }, { "epoch": 0.07884295405094074, "grad_norm": 0.796875, "learning_rate": 0.001987551775752335, "loss": 5.0157, "step": 616 }, { "epoch": 0.078970945859465, "grad_norm": 0.98828125, "learning_rate": 0.001987486481791916, "loss": 5.3779, "step": 617 }, { "epoch": 0.07909893766798926, "grad_norm": 0.7734375, "learning_rate": 0.0019874210181159238, "loss": 4.5538, "step": 618 }, { "epoch": 0.0792269294765135, "grad_norm": 0.92578125, "learning_rate": 0.0019873553847356085, "loss": 4.5813, "step": 619 }, { "epoch": 0.07935492128503775, "grad_norm": 0.875, "learning_rate": 0.001987289581662251, "loss": 5.5369, "step": 620 }, { "epoch": 0.07948291309356201, "grad_norm": 0.88671875, "learning_rate": 0.00198722360890716, "loss": 5.185, "step": 621 }, { "epoch": 0.07961090490208626, "grad_norm": 0.87890625, "learning_rate": 0.001987157466481675, "loss": 4.3254, "step": 622 }, { "epoch": 0.07973889671061052, "grad_norm": 1.0390625, "learning_rate": 0.0019870911543971624, "loss": 4.7239, "step": 623 }, { "epoch": 0.07986688851913477, "grad_norm": 0.796875, "learning_rate": 0.00198702467266502, "loss": 4.8223, "step": 624 }, { "epoch": 0.07999488032765903, "grad_norm": 0.89453125, "learning_rate": 0.001986958021296673, "loss": 5.1434, "step": 625 }, { "epoch": 0.08012287213618328, "grad_norm": 0.828125, "learning_rate": 0.001986891200303577, "loss": 4.5331, "step": 626 }, { "epoch": 0.08025086394470754, "grad_norm": 0.6640625, "learning_rate": 0.0019868242096972165, "loss": 4.4901, "step": 627 }, { "epoch": 0.0803788557532318, "grad_norm": 0.7578125, "learning_rate": 0.0019867570494891042, "loss": 4.7608, "step": 628 }, { "epoch": 0.08050684756175605, "grad_norm": 0.921875, "learning_rate": 0.0019866897196907833, "loss": 5.24, "step": 629 }, { "epoch": 0.0806348393702803, "grad_norm": 0.81640625, "learning_rate": 0.001986622220313825, "loss": 5.4133, "step": 630 }, { "epoch": 0.08076283117880456, "grad_norm": 0.73828125, "learning_rate": 0.0019865545513698306, "loss": 5.0365, "step": 631 }, { "epoch": 0.08089082298732882, "grad_norm": 0.94140625, "learning_rate": 0.0019864867128704298, "loss": 4.9229, "step": 632 }, { "epoch": 0.08101881479585306, "grad_norm": 0.828125, "learning_rate": 0.001986418704827282, "loss": 5.6099, "step": 633 }, { "epoch": 0.08114680660437731, "grad_norm": 1.0703125, "learning_rate": 0.0019863505272520753, "loss": 4.0732, "step": 634 }, { "epoch": 0.08127479841290157, "grad_norm": 1.1796875, "learning_rate": 0.001986282180156527, "loss": 4.8182, "step": 635 }, { "epoch": 0.08140279022142582, "grad_norm": 0.98046875, "learning_rate": 0.001986213663552384, "loss": 4.6104, "step": 636 }, { "epoch": 0.08153078202995008, "grad_norm": 0.78515625, "learning_rate": 0.0019861449774514223, "loss": 5.0221, "step": 637 }, { "epoch": 0.08165877383847434, "grad_norm": 0.57421875, "learning_rate": 0.001986076121865446, "loss": 4.1284, "step": 638 }, { "epoch": 0.08178676564699859, "grad_norm": 1.1953125, "learning_rate": 0.0019860070968062893, "loss": 4.1344, "step": 639 }, { "epoch": 0.08191475745552285, "grad_norm": 0.66796875, "learning_rate": 0.0019859379022858148, "loss": 4.4689, "step": 640 }, { "epoch": 0.0820427492640471, "grad_norm": 0.90234375, "learning_rate": 0.0019858685383159157, "loss": 4.8545, "step": 641 }, { "epoch": 0.08217074107257136, "grad_norm": 1.0390625, "learning_rate": 0.0019857990049085127, "loss": 5.2177, "step": 642 }, { "epoch": 0.08229873288109561, "grad_norm": 0.7421875, "learning_rate": 0.0019857293020755563, "loss": 4.6558, "step": 643 }, { "epoch": 0.08242672468961987, "grad_norm": 0.8203125, "learning_rate": 0.0019856594298290264, "loss": 4.6432, "step": 644 }, { "epoch": 0.08255471649814412, "grad_norm": 0.80859375, "learning_rate": 0.001985589388180931, "loss": 5.1168, "step": 645 }, { "epoch": 0.08268270830666838, "grad_norm": 0.78125, "learning_rate": 0.0019855191771433085, "loss": 4.9751, "step": 646 }, { "epoch": 0.08281070011519263, "grad_norm": 0.828125, "learning_rate": 0.0019854487967282254, "loss": 4.6476, "step": 647 }, { "epoch": 0.08293869192371688, "grad_norm": 0.9765625, "learning_rate": 0.001985378246947778, "loss": 4.5548, "step": 648 }, { "epoch": 0.08306668373224113, "grad_norm": 0.8125, "learning_rate": 0.0019853075278140914, "loss": 5.4269, "step": 649 }, { "epoch": 0.08319467554076539, "grad_norm": 1.0078125, "learning_rate": 0.00198523663933932, "loss": 4.3664, "step": 650 }, { "epoch": 0.08332266734928964, "grad_norm": 0.828125, "learning_rate": 0.0019851655815356466, "loss": 4.2599, "step": 651 }, { "epoch": 0.0834506591578139, "grad_norm": 0.921875, "learning_rate": 0.0019850943544152838, "loss": 4.4381, "step": 652 }, { "epoch": 0.08357865096633815, "grad_norm": 0.76953125, "learning_rate": 0.0019850229579904733, "loss": 4.0914, "step": 653 }, { "epoch": 0.08370664277486241, "grad_norm": 0.85546875, "learning_rate": 0.0019849513922734863, "loss": 4.7392, "step": 654 }, { "epoch": 0.08383463458338666, "grad_norm": 0.80859375, "learning_rate": 0.0019848796572766213, "loss": 4.6851, "step": 655 }, { "epoch": 0.08396262639191092, "grad_norm": 1.8984375, "learning_rate": 0.001984807753012208, "loss": 4.8299, "step": 656 }, { "epoch": 0.08409061820043517, "grad_norm": 0.8359375, "learning_rate": 0.001984735679492604, "loss": 4.8623, "step": 657 }, { "epoch": 0.08421861000895943, "grad_norm": 0.9765625, "learning_rate": 0.001984663436730197, "loss": 4.7052, "step": 658 }, { "epoch": 0.08434660181748369, "grad_norm": 0.7578125, "learning_rate": 0.0019845910247374013, "loss": 4.7845, "step": 659 }, { "epoch": 0.08447459362600794, "grad_norm": 0.82421875, "learning_rate": 0.001984518443526664, "loss": 5.0864, "step": 660 }, { "epoch": 0.0846025854345322, "grad_norm": 0.92578125, "learning_rate": 0.001984445693110459, "loss": 4.5949, "step": 661 }, { "epoch": 0.08473057724305644, "grad_norm": 0.79296875, "learning_rate": 0.0019843727735012885, "loss": 5.2515, "step": 662 }, { "epoch": 0.08485856905158069, "grad_norm": 0.83984375, "learning_rate": 0.0019842996847116855, "loss": 5.3414, "step": 663 }, { "epoch": 0.08498656086010495, "grad_norm": 1.3984375, "learning_rate": 0.0019842264267542122, "loss": 4.752, "step": 664 }, { "epoch": 0.0851145526686292, "grad_norm": 0.9765625, "learning_rate": 0.0019841529996414585, "loss": 4.4128, "step": 665 }, { "epoch": 0.08524254447715346, "grad_norm": 0.94140625, "learning_rate": 0.001984079403386044, "loss": 4.9011, "step": 666 }, { "epoch": 0.08537053628567771, "grad_norm": 0.85546875, "learning_rate": 0.001984005638000618, "loss": 4.722, "step": 667 }, { "epoch": 0.08549852809420197, "grad_norm": 1.0703125, "learning_rate": 0.0019839317034978576, "loss": 4.8795, "step": 668 }, { "epoch": 0.08562651990272623, "grad_norm": 0.7890625, "learning_rate": 0.0019838575998904697, "loss": 4.3069, "step": 669 }, { "epoch": 0.08575451171125048, "grad_norm": 0.73046875, "learning_rate": 0.0019837833271911906, "loss": 4.3752, "step": 670 }, { "epoch": 0.08588250351977474, "grad_norm": 0.84765625, "learning_rate": 0.001983708885412785, "loss": 5.349, "step": 671 }, { "epoch": 0.08601049532829899, "grad_norm": 0.79296875, "learning_rate": 0.0019836342745680473, "loss": 4.7595, "step": 672 }, { "epoch": 0.08613848713682325, "grad_norm": 0.8359375, "learning_rate": 0.0019835594946698, "loss": 5.0233, "step": 673 }, { "epoch": 0.0862664789453475, "grad_norm": 0.859375, "learning_rate": 0.0019834845457308956, "loss": 3.208, "step": 674 }, { "epoch": 0.08639447075387176, "grad_norm": 0.95703125, "learning_rate": 0.0019834094277642153, "loss": 5.8435, "step": 675 }, { "epoch": 0.08652246256239601, "grad_norm": 1.140625, "learning_rate": 0.0019833341407826687, "loss": 5.1078, "step": 676 }, { "epoch": 0.08665045437092025, "grad_norm": 0.8515625, "learning_rate": 0.0019832586847991964, "loss": 4.787, "step": 677 }, { "epoch": 0.08677844617944451, "grad_norm": 0.80078125, "learning_rate": 0.001983183059826766, "loss": 4.0605, "step": 678 }, { "epoch": 0.08690643798796877, "grad_norm": 1.265625, "learning_rate": 0.001983107265878374, "loss": 4.4668, "step": 679 }, { "epoch": 0.08703442979649302, "grad_norm": 1.1640625, "learning_rate": 0.0019830313029670483, "loss": 4.1574, "step": 680 }, { "epoch": 0.08716242160501728, "grad_norm": 0.8359375, "learning_rate": 0.0019829551711058437, "loss": 4.8034, "step": 681 }, { "epoch": 0.08729041341354153, "grad_norm": 0.87890625, "learning_rate": 0.001982878870307845, "loss": 5.7189, "step": 682 }, { "epoch": 0.08741840522206579, "grad_norm": 1.0859375, "learning_rate": 0.001982802400586165, "loss": 4.3374, "step": 683 }, { "epoch": 0.08754639703059004, "grad_norm": 1.421875, "learning_rate": 0.001982725761953947, "loss": 5.032, "step": 684 }, { "epoch": 0.0876743888391143, "grad_norm": 0.96875, "learning_rate": 0.001982648954424362, "loss": 4.5456, "step": 685 }, { "epoch": 0.08780238064763855, "grad_norm": 1.34375, "learning_rate": 0.001982571978010611, "loss": 4.662, "step": 686 }, { "epoch": 0.08793037245616281, "grad_norm": 0.78515625, "learning_rate": 0.001982494832725924, "loss": 4.433, "step": 687 }, { "epoch": 0.08805836426468706, "grad_norm": 0.70703125, "learning_rate": 0.001982417518583559, "loss": 3.8644, "step": 688 }, { "epoch": 0.08818635607321132, "grad_norm": 0.80859375, "learning_rate": 0.0019823400355968037, "loss": 4.7663, "step": 689 }, { "epoch": 0.08831434788173557, "grad_norm": 0.703125, "learning_rate": 0.0019822623837789754, "loss": 3.9368, "step": 690 }, { "epoch": 0.08844233969025983, "grad_norm": 1.1796875, "learning_rate": 0.001982184563143419, "loss": 4.4762, "step": 691 }, { "epoch": 0.08857033149878407, "grad_norm": 0.85546875, "learning_rate": 0.0019821065737035105, "loss": 4.1119, "step": 692 }, { "epoch": 0.08869832330730833, "grad_norm": 0.82421875, "learning_rate": 0.0019820284154726526, "loss": 5.4806, "step": 693 }, { "epoch": 0.08882631511583258, "grad_norm": 0.69140625, "learning_rate": 0.001981950088464278, "loss": 4.2745, "step": 694 }, { "epoch": 0.08895430692435684, "grad_norm": 0.796875, "learning_rate": 0.001981871592691849, "loss": 4.0399, "step": 695 }, { "epoch": 0.0890822987328811, "grad_norm": 0.77734375, "learning_rate": 0.001981792928168856, "loss": 4.6626, "step": 696 }, { "epoch": 0.08921029054140535, "grad_norm": 0.75390625, "learning_rate": 0.0019817140949088194, "loss": 4.7864, "step": 697 }, { "epoch": 0.0893382823499296, "grad_norm": 0.9375, "learning_rate": 0.001981635092925287, "loss": 4.4117, "step": 698 }, { "epoch": 0.08946627415845386, "grad_norm": 0.84765625, "learning_rate": 0.001981555922231837, "loss": 4.7975, "step": 699 }, { "epoch": 0.08959426596697811, "grad_norm": 0.8671875, "learning_rate": 0.0019814765828420765, "loss": 4.8129, "step": 700 }, { "epoch": 0.08972225777550237, "grad_norm": 0.8671875, "learning_rate": 0.0019813970747696416, "loss": 5.0728, "step": 701 }, { "epoch": 0.08985024958402663, "grad_norm": 0.83984375, "learning_rate": 0.0019813173980281957, "loss": 4.0898, "step": 702 }, { "epoch": 0.08997824139255088, "grad_norm": 0.81640625, "learning_rate": 0.0019812375526314335, "loss": 4.3238, "step": 703 }, { "epoch": 0.09010623320107514, "grad_norm": 0.87890625, "learning_rate": 0.0019811575385930776, "loss": 4.3598, "step": 704 }, { "epoch": 0.09023422500959939, "grad_norm": 1.015625, "learning_rate": 0.0019810773559268795, "loss": 4.0851, "step": 705 }, { "epoch": 0.09036221681812363, "grad_norm": 0.60546875, "learning_rate": 0.0019809970046466203, "loss": 4.5524, "step": 706 }, { "epoch": 0.09049020862664789, "grad_norm": 0.8359375, "learning_rate": 0.0019809164847661093, "loss": 4.7088, "step": 707 }, { "epoch": 0.09061820043517214, "grad_norm": 0.703125, "learning_rate": 0.0019808357962991852, "loss": 5.3506, "step": 708 }, { "epoch": 0.0907461922436964, "grad_norm": 0.8203125, "learning_rate": 0.0019807549392597155, "loss": 5.1162, "step": 709 }, { "epoch": 0.09087418405222066, "grad_norm": 0.88671875, "learning_rate": 0.0019806739136615974, "loss": 4.65, "step": 710 }, { "epoch": 0.09100217586074491, "grad_norm": 1.0859375, "learning_rate": 0.001980592719518756, "loss": 4.3019, "step": 711 }, { "epoch": 0.09113016766926917, "grad_norm": 0.78515625, "learning_rate": 0.001980511356845146, "loss": 4.2662, "step": 712 }, { "epoch": 0.09125815947779342, "grad_norm": 0.7734375, "learning_rate": 0.00198042982565475, "loss": 4.3935, "step": 713 }, { "epoch": 0.09138615128631768, "grad_norm": 0.7890625, "learning_rate": 0.0019803481259615817, "loss": 4.2838, "step": 714 }, { "epoch": 0.09151414309484193, "grad_norm": 0.73046875, "learning_rate": 0.001980266257779682, "loss": 3.8178, "step": 715 }, { "epoch": 0.09164213490336619, "grad_norm": 0.9140625, "learning_rate": 0.0019801842211231214, "loss": 3.8339, "step": 716 }, { "epoch": 0.09177012671189044, "grad_norm": 0.88671875, "learning_rate": 0.001980102016005999, "loss": 4.864, "step": 717 }, { "epoch": 0.0918981185204147, "grad_norm": 0.8828125, "learning_rate": 0.0019800196424424434, "loss": 4.958, "step": 718 }, { "epoch": 0.09202611032893895, "grad_norm": 0.86328125, "learning_rate": 0.0019799371004466117, "loss": 4.9387, "step": 719 }, { "epoch": 0.09215410213746321, "grad_norm": 0.75, "learning_rate": 0.00197985439003269, "loss": 4.7775, "step": 720 }, { "epoch": 0.09228209394598745, "grad_norm": 0.83984375, "learning_rate": 0.0019797715112148937, "loss": 4.7766, "step": 721 }, { "epoch": 0.0924100857545117, "grad_norm": 0.71875, "learning_rate": 0.001979688464007466, "loss": 4.7705, "step": 722 }, { "epoch": 0.09253807756303596, "grad_norm": 0.8125, "learning_rate": 0.001979605248424681, "loss": 4.4002, "step": 723 }, { "epoch": 0.09266606937156022, "grad_norm": 0.859375, "learning_rate": 0.0019795218644808404, "loss": 4.2165, "step": 724 }, { "epoch": 0.09279406118008447, "grad_norm": 0.82421875, "learning_rate": 0.001979438312190274, "loss": 4.1008, "step": 725 }, { "epoch": 0.09292205298860873, "grad_norm": 1.078125, "learning_rate": 0.0019793545915673433, "loss": 4.3263, "step": 726 }, { "epoch": 0.09305004479713298, "grad_norm": 0.8359375, "learning_rate": 0.001979270702626436, "loss": 3.8691, "step": 727 }, { "epoch": 0.09317803660565724, "grad_norm": 1.0546875, "learning_rate": 0.00197918664538197, "loss": 4.0446, "step": 728 }, { "epoch": 0.0933060284141815, "grad_norm": 0.703125, "learning_rate": 0.001979102419848392, "loss": 3.8539, "step": 729 }, { "epoch": 0.09343402022270575, "grad_norm": 0.84375, "learning_rate": 0.0019790180260401777, "loss": 4.5445, "step": 730 }, { "epoch": 0.09356201203123, "grad_norm": 0.78125, "learning_rate": 0.001978933463971831, "loss": 4.5544, "step": 731 }, { "epoch": 0.09369000383975426, "grad_norm": 0.84765625, "learning_rate": 0.0019788487336578854, "loss": 4.1572, "step": 732 }, { "epoch": 0.09381799564827852, "grad_norm": 0.83203125, "learning_rate": 0.0019787638351129034, "loss": 4.1803, "step": 733 }, { "epoch": 0.09394598745680277, "grad_norm": 0.94140625, "learning_rate": 0.001978678768351477, "loss": 4.7113, "step": 734 }, { "epoch": 0.09407397926532701, "grad_norm": 0.875, "learning_rate": 0.0019785935333882245, "loss": 4.1744, "step": 735 }, { "epoch": 0.09420197107385127, "grad_norm": 0.7578125, "learning_rate": 0.0019785081302377957, "loss": 4.7301, "step": 736 }, { "epoch": 0.09432996288237552, "grad_norm": 0.8125, "learning_rate": 0.0019784225589148693, "loss": 3.7484, "step": 737 }, { "epoch": 0.09445795469089978, "grad_norm": 0.89453125, "learning_rate": 0.001978336819434151, "loss": 4.2035, "step": 738 }, { "epoch": 0.09458594649942403, "grad_norm": 0.859375, "learning_rate": 0.001978250911810377, "loss": 4.1146, "step": 739 }, { "epoch": 0.09471393830794829, "grad_norm": 0.8671875, "learning_rate": 0.0019781648360583126, "loss": 5.0394, "step": 740 }, { "epoch": 0.09484193011647254, "grad_norm": 0.76171875, "learning_rate": 0.0019780785921927496, "loss": 3.593, "step": 741 }, { "epoch": 0.0949699219249968, "grad_norm": 0.66796875, "learning_rate": 0.0019779921802285123, "loss": 4.7858, "step": 742 }, { "epoch": 0.09509791373352106, "grad_norm": 0.91796875, "learning_rate": 0.0019779056001804508, "loss": 4.7605, "step": 743 }, { "epoch": 0.09522590554204531, "grad_norm": 0.8203125, "learning_rate": 0.0019778188520634455, "loss": 3.895, "step": 744 }, { "epoch": 0.09535389735056957, "grad_norm": 0.70703125, "learning_rate": 0.0019777319358924056, "loss": 4.1992, "step": 745 }, { "epoch": 0.09548188915909382, "grad_norm": 1.03125, "learning_rate": 0.001977644851682269, "loss": 3.8095, "step": 746 }, { "epoch": 0.09560988096761808, "grad_norm": 0.8515625, "learning_rate": 0.0019775575994480034, "loss": 3.6871, "step": 747 }, { "epoch": 0.09573787277614233, "grad_norm": 0.73828125, "learning_rate": 0.001977470179204603, "loss": 4.58, "step": 748 }, { "epoch": 0.09586586458466659, "grad_norm": 0.7265625, "learning_rate": 0.001977382590967093, "loss": 4.2213, "step": 749 }, { "epoch": 0.09599385639319083, "grad_norm": 0.69921875, "learning_rate": 0.001977294834750527, "loss": 3.2659, "step": 750 }, { "epoch": 0.09612184820171508, "grad_norm": 0.7734375, "learning_rate": 0.0019772069105699875, "loss": 3.6689, "step": 751 }, { "epoch": 0.09624984001023934, "grad_norm": 0.80859375, "learning_rate": 0.0019771188184405856, "loss": 4.9027, "step": 752 }, { "epoch": 0.0963778318187636, "grad_norm": 0.67578125, "learning_rate": 0.001977030558377461, "loss": 3.7807, "step": 753 }, { "epoch": 0.09650582362728785, "grad_norm": 0.8359375, "learning_rate": 0.0019769421303957825, "loss": 4.5244, "step": 754 }, { "epoch": 0.0966338154358121, "grad_norm": 0.74609375, "learning_rate": 0.001976853534510749, "loss": 4.5131, "step": 755 }, { "epoch": 0.09676180724433636, "grad_norm": 0.6953125, "learning_rate": 0.001976764770737586, "loss": 3.9611, "step": 756 }, { "epoch": 0.09688979905286062, "grad_norm": 0.82421875, "learning_rate": 0.001976675839091549, "loss": 3.9749, "step": 757 }, { "epoch": 0.09701779086138487, "grad_norm": 0.72265625, "learning_rate": 0.0019765867395879233, "loss": 4.9185, "step": 758 }, { "epoch": 0.09714578266990913, "grad_norm": 1.0234375, "learning_rate": 0.0019764974722420213, "loss": 3.7599, "step": 759 }, { "epoch": 0.09727377447843338, "grad_norm": 0.73046875, "learning_rate": 0.001976408037069185, "loss": 4.2933, "step": 760 }, { "epoch": 0.09740176628695764, "grad_norm": 0.921875, "learning_rate": 0.001976318434084786, "loss": 4.2614, "step": 761 }, { "epoch": 0.0975297580954819, "grad_norm": 0.89453125, "learning_rate": 0.0019762286633042236, "loss": 4.316, "step": 762 }, { "epoch": 0.09765774990400615, "grad_norm": 0.68359375, "learning_rate": 0.001976138724742926, "loss": 3.9049, "step": 763 }, { "epoch": 0.09778574171253039, "grad_norm": 1.3671875, "learning_rate": 0.001976048618416351, "loss": 3.9992, "step": 764 }, { "epoch": 0.09791373352105465, "grad_norm": 0.87890625, "learning_rate": 0.001975958344339985, "loss": 4.1841, "step": 765 }, { "epoch": 0.0980417253295789, "grad_norm": 0.78515625, "learning_rate": 0.0019758679025293426, "loss": 4.4145, "step": 766 }, { "epoch": 0.09816971713810316, "grad_norm": 0.83984375, "learning_rate": 0.0019757772929999678, "loss": 4.5701, "step": 767 }, { "epoch": 0.09829770894662741, "grad_norm": 1.1171875, "learning_rate": 0.0019756865157674335, "loss": 4.8722, "step": 768 }, { "epoch": 0.09842570075515167, "grad_norm": 1.140625, "learning_rate": 0.0019755955708473413, "loss": 4.5436, "step": 769 }, { "epoch": 0.09855369256367592, "grad_norm": 0.7890625, "learning_rate": 0.0019755044582553216, "loss": 3.654, "step": 770 }, { "epoch": 0.09868168437220018, "grad_norm": 0.7109375, "learning_rate": 0.0019754131780070332, "loss": 4.826, "step": 771 }, { "epoch": 0.09880967618072443, "grad_norm": 0.8984375, "learning_rate": 0.0019753217301181646, "loss": 4.34, "step": 772 }, { "epoch": 0.09893766798924869, "grad_norm": 0.703125, "learning_rate": 0.001975230114604432, "loss": 4.0559, "step": 773 }, { "epoch": 0.09906565979777295, "grad_norm": 0.6875, "learning_rate": 0.0019751383314815817, "loss": 3.83, "step": 774 }, { "epoch": 0.0991936516062972, "grad_norm": 0.796875, "learning_rate": 0.001975046380765387, "loss": 4.0673, "step": 775 }, { "epoch": 0.09932164341482146, "grad_norm": 0.765625, "learning_rate": 0.0019749542624716526, "loss": 4.2356, "step": 776 }, { "epoch": 0.09944963522334571, "grad_norm": 0.71875, "learning_rate": 0.0019748619766162094, "loss": 3.3531, "step": 777 }, { "epoch": 0.09957762703186997, "grad_norm": 1.8046875, "learning_rate": 0.0019747695232149192, "loss": 4.5328, "step": 778 }, { "epoch": 0.09970561884039421, "grad_norm": 0.7734375, "learning_rate": 0.0019746769022836704, "loss": 4.4564, "step": 779 }, { "epoch": 0.09983361064891846, "grad_norm": 0.7578125, "learning_rate": 0.0019745841138383824, "loss": 4.7321, "step": 780 }, { "epoch": 0.09996160245744272, "grad_norm": 0.7578125, "learning_rate": 0.001974491157895002, "loss": 4.0785, "step": 781 }, { "epoch": 0.10008959426596697, "grad_norm": 0.8359375, "learning_rate": 0.001974398034469505, "loss": 3.8121, "step": 782 }, { "epoch": 0.10021758607449123, "grad_norm": 0.796875, "learning_rate": 0.001974304743577897, "loss": 3.75, "step": 783 }, { "epoch": 0.10034557788301549, "grad_norm": 0.734375, "learning_rate": 0.00197421128523621, "loss": 4.1297, "step": 784 }, { "epoch": 0.10047356969153974, "grad_norm": 0.85546875, "learning_rate": 0.001974117659460508, "loss": 3.7657, "step": 785 }, { "epoch": 0.100601561500064, "grad_norm": 0.73046875, "learning_rate": 0.0019740238662668812, "loss": 4.3152, "step": 786 }, { "epoch": 0.10072955330858825, "grad_norm": 0.67578125, "learning_rate": 0.00197392990567145, "loss": 4.8603, "step": 787 }, { "epoch": 0.10085754511711251, "grad_norm": 0.7890625, "learning_rate": 0.0019738357776903624, "loss": 4.3303, "step": 788 }, { "epoch": 0.10098553692563676, "grad_norm": 0.8671875, "learning_rate": 0.001973741482339796, "loss": 4.1671, "step": 789 }, { "epoch": 0.10111352873416102, "grad_norm": 0.97265625, "learning_rate": 0.001973647019635957, "loss": 4.0281, "step": 790 }, { "epoch": 0.10124152054268527, "grad_norm": 0.71484375, "learning_rate": 0.001973552389595081, "loss": 3.7274, "step": 791 }, { "epoch": 0.10136951235120953, "grad_norm": 0.9453125, "learning_rate": 0.001973457592233431, "loss": 4.1331, "step": 792 }, { "epoch": 0.10149750415973377, "grad_norm": 0.8203125, "learning_rate": 0.0019733626275673, "loss": 3.1635, "step": 793 }, { "epoch": 0.10162549596825803, "grad_norm": 0.76171875, "learning_rate": 0.0019732674956130084, "loss": 4.3879, "step": 794 }, { "epoch": 0.10175348777678228, "grad_norm": 0.87109375, "learning_rate": 0.0019731721963869068, "loss": 4.3729, "step": 795 }, { "epoch": 0.10188147958530654, "grad_norm": 0.6484375, "learning_rate": 0.0019730767299053734, "loss": 3.4163, "step": 796 }, { "epoch": 0.10200947139383079, "grad_norm": 1.140625, "learning_rate": 0.0019729810961848166, "loss": 3.9185, "step": 797 }, { "epoch": 0.10213746320235505, "grad_norm": 0.83203125, "learning_rate": 0.001972885295241672, "loss": 3.5473, "step": 798 }, { "epoch": 0.1022654550108793, "grad_norm": 0.66015625, "learning_rate": 0.001972789327092404, "loss": 4.071, "step": 799 }, { "epoch": 0.10239344681940356, "grad_norm": 0.6640625, "learning_rate": 0.0019726931917535074, "loss": 3.8979, "step": 800 }, { "epoch": 0.10252143862792781, "grad_norm": 0.8046875, "learning_rate": 0.001972596889241504, "loss": 4.2525, "step": 801 }, { "epoch": 0.10264943043645207, "grad_norm": 0.7265625, "learning_rate": 0.001972500419572945, "loss": 4.0423, "step": 802 }, { "epoch": 0.10277742224497632, "grad_norm": 1.9453125, "learning_rate": 0.0019724037827644103, "loss": 4.3696, "step": 803 }, { "epoch": 0.10290541405350058, "grad_norm": 0.7734375, "learning_rate": 0.0019723069788325086, "loss": 3.9245, "step": 804 }, { "epoch": 0.10303340586202483, "grad_norm": 0.75, "learning_rate": 0.001972210007793877, "loss": 3.7018, "step": 805 }, { "epoch": 0.10316139767054909, "grad_norm": 1.0546875, "learning_rate": 0.0019721128696651817, "loss": 4.2149, "step": 806 }, { "epoch": 0.10328938947907335, "grad_norm": 0.796875, "learning_rate": 0.0019720155644631174, "loss": 4.0404, "step": 807 }, { "epoch": 0.10341738128759759, "grad_norm": 0.8515625, "learning_rate": 0.0019719180922044078, "loss": 3.7965, "step": 808 }, { "epoch": 0.10354537309612184, "grad_norm": 0.859375, "learning_rate": 0.001971820452905805, "loss": 4.0452, "step": 809 }, { "epoch": 0.1036733649046461, "grad_norm": 1.71875, "learning_rate": 0.0019717226465840896, "loss": 4.3392, "step": 810 }, { "epoch": 0.10380135671317035, "grad_norm": 0.69921875, "learning_rate": 0.0019716246732560715, "loss": 4.3587, "step": 811 }, { "epoch": 0.10392934852169461, "grad_norm": 0.8125, "learning_rate": 0.0019715265329385886, "loss": 3.2758, "step": 812 }, { "epoch": 0.10405734033021886, "grad_norm": 0.8515625, "learning_rate": 0.001971428225648509, "loss": 3.3008, "step": 813 }, { "epoch": 0.10418533213874312, "grad_norm": 0.76171875, "learning_rate": 0.001971329751402727, "loss": 4.4381, "step": 814 }, { "epoch": 0.10431332394726738, "grad_norm": 0.8671875, "learning_rate": 0.001971231110218168, "loss": 4.4996, "step": 815 }, { "epoch": 0.10444131575579163, "grad_norm": 0.765625, "learning_rate": 0.0019711323021117844, "loss": 3.783, "step": 816 }, { "epoch": 0.10456930756431589, "grad_norm": 0.67578125, "learning_rate": 0.0019710333271005585, "loss": 4.3179, "step": 817 }, { "epoch": 0.10469729937284014, "grad_norm": 1.046875, "learning_rate": 0.0019709341852015005, "loss": 4.5188, "step": 818 }, { "epoch": 0.1048252911813644, "grad_norm": 0.9375, "learning_rate": 0.0019708348764316493, "loss": 4.0402, "step": 819 }, { "epoch": 0.10495328298988865, "grad_norm": 0.890625, "learning_rate": 0.0019707354008080733, "loss": 4.2469, "step": 820 }, { "epoch": 0.10508127479841291, "grad_norm": 0.9921875, "learning_rate": 0.001970635758347869, "loss": 4.6693, "step": 821 }, { "epoch": 0.10520926660693716, "grad_norm": 0.92578125, "learning_rate": 0.001970535949068161, "loss": 3.86, "step": 822 }, { "epoch": 0.1053372584154614, "grad_norm": 0.8671875, "learning_rate": 0.0019704359729861036, "loss": 3.6659, "step": 823 }, { "epoch": 0.10546525022398566, "grad_norm": 0.83984375, "learning_rate": 0.0019703358301188787, "loss": 3.8871, "step": 824 }, { "epoch": 0.10559324203250992, "grad_norm": 1.0078125, "learning_rate": 0.0019702355204836985, "loss": 3.6283, "step": 825 }, { "epoch": 0.10572123384103417, "grad_norm": 0.97265625, "learning_rate": 0.001970135044097802, "loss": 4.9221, "step": 826 }, { "epoch": 0.10584922564955843, "grad_norm": 1.109375, "learning_rate": 0.0019700344009784578, "loss": 4.3436, "step": 827 }, { "epoch": 0.10597721745808268, "grad_norm": 0.76953125, "learning_rate": 0.0019699335911429632, "loss": 3.1992, "step": 828 }, { "epoch": 0.10610520926660694, "grad_norm": 0.76171875, "learning_rate": 0.0019698326146086443, "loss": 4.189, "step": 829 }, { "epoch": 0.10623320107513119, "grad_norm": 1.15625, "learning_rate": 0.0019697314713928555, "loss": 4.6437, "step": 830 }, { "epoch": 0.10636119288365545, "grad_norm": 0.8359375, "learning_rate": 0.001969630161512979, "loss": 3.8279, "step": 831 }, { "epoch": 0.1064891846921797, "grad_norm": 0.84765625, "learning_rate": 0.001969528684986428, "loss": 4.116, "step": 832 }, { "epoch": 0.10661717650070396, "grad_norm": 0.76953125, "learning_rate": 0.0019694270418306415, "loss": 3.7031, "step": 833 }, { "epoch": 0.10674516830922821, "grad_norm": 0.75, "learning_rate": 0.0019693252320630893, "loss": 4.4006, "step": 834 }, { "epoch": 0.10687316011775247, "grad_norm": 0.75, "learning_rate": 0.001969223255701269, "loss": 4.0007, "step": 835 }, { "epoch": 0.10700115192627672, "grad_norm": 0.65234375, "learning_rate": 0.001969121112762707, "loss": 4.3481, "step": 836 }, { "epoch": 0.10712914373480097, "grad_norm": 0.79296875, "learning_rate": 0.001969018803264958, "loss": 4.1664, "step": 837 }, { "epoch": 0.10725713554332522, "grad_norm": 0.77734375, "learning_rate": 0.001968916327225605, "loss": 4.546, "step": 838 }, { "epoch": 0.10738512735184948, "grad_norm": 0.75, "learning_rate": 0.001968813684662261, "loss": 4.1408, "step": 839 }, { "epoch": 0.10751311916037373, "grad_norm": 0.671875, "learning_rate": 0.0019687108755925666, "loss": 3.9003, "step": 840 }, { "epoch": 0.10764111096889799, "grad_norm": 0.96875, "learning_rate": 0.001968607900034191, "loss": 3.8362, "step": 841 }, { "epoch": 0.10776910277742224, "grad_norm": 0.8203125, "learning_rate": 0.0019685047580048328, "loss": 3.9856, "step": 842 }, { "epoch": 0.1078970945859465, "grad_norm": 0.71484375, "learning_rate": 0.001968401449522218, "loss": 4.0759, "step": 843 }, { "epoch": 0.10802508639447075, "grad_norm": 0.890625, "learning_rate": 0.0019682979746041018, "loss": 3.3251, "step": 844 }, { "epoch": 0.10815307820299501, "grad_norm": 1.0, "learning_rate": 0.001968194333268268, "loss": 3.3372, "step": 845 }, { "epoch": 0.10828107001151926, "grad_norm": 1.3671875, "learning_rate": 0.0019680905255325303, "loss": 3.4541, "step": 846 }, { "epoch": 0.10840906182004352, "grad_norm": 0.66015625, "learning_rate": 0.001967986551414728, "loss": 3.8992, "step": 847 }, { "epoch": 0.10853705362856778, "grad_norm": 0.7421875, "learning_rate": 0.001967882410932731, "loss": 3.5374, "step": 848 }, { "epoch": 0.10866504543709203, "grad_norm": 0.72265625, "learning_rate": 0.001967778104104439, "loss": 3.0129, "step": 849 }, { "epoch": 0.10879303724561629, "grad_norm": 0.79296875, "learning_rate": 0.0019676736309477776, "loss": 4.4006, "step": 850 }, { "epoch": 0.10892102905414054, "grad_norm": 1.125, "learning_rate": 0.001967568991480702, "loss": 3.6127, "step": 851 }, { "epoch": 0.10904902086266478, "grad_norm": 0.89453125, "learning_rate": 0.001967464185721197, "loss": 4.4741, "step": 852 }, { "epoch": 0.10917701267118904, "grad_norm": 0.671875, "learning_rate": 0.001967359213687275, "loss": 3.7461, "step": 853 }, { "epoch": 0.1093050044797133, "grad_norm": 0.74609375, "learning_rate": 0.0019672540753969767, "loss": 3.8386, "step": 854 }, { "epoch": 0.10943299628823755, "grad_norm": 0.953125, "learning_rate": 0.0019671487708683723, "loss": 4.2028, "step": 855 }, { "epoch": 0.1095609880967618, "grad_norm": 0.796875, "learning_rate": 0.0019670433001195593, "loss": 2.9854, "step": 856 }, { "epoch": 0.10968897990528606, "grad_norm": 0.82421875, "learning_rate": 0.0019669376631686656, "loss": 3.6348, "step": 857 }, { "epoch": 0.10981697171381032, "grad_norm": 0.76171875, "learning_rate": 0.0019668318600338463, "loss": 3.4518, "step": 858 }, { "epoch": 0.10994496352233457, "grad_norm": 0.86328125, "learning_rate": 0.0019667258907332848, "loss": 3.0528, "step": 859 }, { "epoch": 0.11007295533085883, "grad_norm": 0.69140625, "learning_rate": 0.0019666197552851944, "loss": 4.175, "step": 860 }, { "epoch": 0.11020094713938308, "grad_norm": 1.046875, "learning_rate": 0.0019665134537078157, "loss": 3.7107, "step": 861 }, { "epoch": 0.11032893894790734, "grad_norm": 0.99609375, "learning_rate": 0.001966406986019419, "loss": 3.5773, "step": 862 }, { "epoch": 0.11045693075643159, "grad_norm": 1.0625, "learning_rate": 0.0019663003522383013, "loss": 3.7372, "step": 863 }, { "epoch": 0.11058492256495585, "grad_norm": 0.94140625, "learning_rate": 0.0019661935523827903, "loss": 4.4449, "step": 864 }, { "epoch": 0.1107129143734801, "grad_norm": 0.98046875, "learning_rate": 0.001966086586471241, "loss": 3.0235, "step": 865 }, { "epoch": 0.11084090618200435, "grad_norm": 0.87109375, "learning_rate": 0.0019659794545220377, "loss": 3.2442, "step": 866 }, { "epoch": 0.1109688979905286, "grad_norm": 0.6484375, "learning_rate": 0.001965872156553592, "loss": 3.7961, "step": 867 }, { "epoch": 0.11109688979905286, "grad_norm": 0.78515625, "learning_rate": 0.0019657646925843454, "loss": 4.2102, "step": 868 }, { "epoch": 0.11122488160757711, "grad_norm": 0.76953125, "learning_rate": 0.001965657062632767, "loss": 4.6731, "step": 869 }, { "epoch": 0.11135287341610137, "grad_norm": 0.703125, "learning_rate": 0.001965549266717355, "loss": 3.9743, "step": 870 }, { "epoch": 0.11148086522462562, "grad_norm": 0.81640625, "learning_rate": 0.0019654413048566352, "loss": 3.733, "step": 871 }, { "epoch": 0.11160885703314988, "grad_norm": 0.80859375, "learning_rate": 0.0019653331770691637, "loss": 4.0487, "step": 872 }, { "epoch": 0.11173684884167413, "grad_norm": 0.734375, "learning_rate": 0.001965224883373523, "loss": 4.494, "step": 873 }, { "epoch": 0.11186484065019839, "grad_norm": 0.7578125, "learning_rate": 0.001965116423788326, "loss": 3.341, "step": 874 }, { "epoch": 0.11199283245872264, "grad_norm": 0.67578125, "learning_rate": 0.0019650077983322124, "loss": 4.061, "step": 875 }, { "epoch": 0.1121208242672469, "grad_norm": 0.82421875, "learning_rate": 0.0019648990070238515, "loss": 3.4607, "step": 876 }, { "epoch": 0.11224881607577115, "grad_norm": 0.9921875, "learning_rate": 0.001964790049881941, "loss": 3.4764, "step": 877 }, { "epoch": 0.11237680788429541, "grad_norm": 0.71484375, "learning_rate": 0.0019646809269252075, "loss": 3.6304, "step": 878 }, { "epoch": 0.11250479969281967, "grad_norm": 0.984375, "learning_rate": 0.0019645716381724047, "loss": 4.4826, "step": 879 }, { "epoch": 0.11263279150134392, "grad_norm": 1.0546875, "learning_rate": 0.001964462183642316, "loss": 4.3733, "step": 880 }, { "epoch": 0.11276078330986816, "grad_norm": 0.77734375, "learning_rate": 0.0019643525633537525, "loss": 3.9349, "step": 881 }, { "epoch": 0.11288877511839242, "grad_norm": 0.69921875, "learning_rate": 0.0019642427773255548, "loss": 3.2893, "step": 882 }, { "epoch": 0.11301676692691667, "grad_norm": 0.78515625, "learning_rate": 0.0019641328255765914, "loss": 3.0339, "step": 883 }, { "epoch": 0.11314475873544093, "grad_norm": 0.7265625, "learning_rate": 0.0019640227081257587, "loss": 3.6765, "step": 884 }, { "epoch": 0.11327275054396518, "grad_norm": 0.79296875, "learning_rate": 0.001963912424991983, "loss": 3.2677, "step": 885 }, { "epoch": 0.11340074235248944, "grad_norm": 1.46875, "learning_rate": 0.0019638019761942177, "loss": 2.7998, "step": 886 }, { "epoch": 0.1135287341610137, "grad_norm": 0.9140625, "learning_rate": 0.0019636913617514453, "loss": 4.5996, "step": 887 }, { "epoch": 0.11365672596953795, "grad_norm": 0.6875, "learning_rate": 0.0019635805816826768, "loss": 4.1142, "step": 888 }, { "epoch": 0.1137847177780622, "grad_norm": 0.75390625, "learning_rate": 0.0019634696360069513, "loss": 3.8396, "step": 889 }, { "epoch": 0.11391270958658646, "grad_norm": 0.82421875, "learning_rate": 0.001963358524743337, "loss": 3.8086, "step": 890 }, { "epoch": 0.11404070139511072, "grad_norm": 0.76953125, "learning_rate": 0.00196324724791093, "loss": 3.4109, "step": 891 }, { "epoch": 0.11416869320363497, "grad_norm": 0.75, "learning_rate": 0.0019631358055288545, "loss": 3.7986, "step": 892 }, { "epoch": 0.11429668501215923, "grad_norm": 0.64453125, "learning_rate": 0.0019630241976162648, "loss": 3.6985, "step": 893 }, { "epoch": 0.11442467682068348, "grad_norm": 0.80078125, "learning_rate": 0.0019629124241923417, "loss": 3.6048, "step": 894 }, { "epoch": 0.11455266862920772, "grad_norm": 0.65234375, "learning_rate": 0.0019628004852762955, "loss": 3.5543, "step": 895 }, { "epoch": 0.11468066043773198, "grad_norm": 0.80859375, "learning_rate": 0.0019626883808873644, "loss": 4.0461, "step": 896 }, { "epoch": 0.11480865224625623, "grad_norm": 0.65625, "learning_rate": 0.001962576111044816, "loss": 2.938, "step": 897 }, { "epoch": 0.11493664405478049, "grad_norm": 0.84375, "learning_rate": 0.0019624636757679453, "loss": 2.5969, "step": 898 }, { "epoch": 0.11506463586330475, "grad_norm": 0.75390625, "learning_rate": 0.001962351075076076, "loss": 5.0781, "step": 899 }, { "epoch": 0.115192627671829, "grad_norm": 1.28125, "learning_rate": 0.0019622383089885607, "loss": 3.4843, "step": 900 }, { "epoch": 0.11532061948035326, "grad_norm": 0.65234375, "learning_rate": 0.00196212537752478, "loss": 3.7312, "step": 901 }, { "epoch": 0.11544861128887751, "grad_norm": 0.89453125, "learning_rate": 0.0019620122807041426, "loss": 3.6822, "step": 902 }, { "epoch": 0.11557660309740177, "grad_norm": 0.78515625, "learning_rate": 0.001961899018546086, "loss": 3.4886, "step": 903 }, { "epoch": 0.11570459490592602, "grad_norm": 0.66015625, "learning_rate": 0.0019617855910700776, "loss": 3.3682, "step": 904 }, { "epoch": 0.11583258671445028, "grad_norm": 0.83984375, "learning_rate": 0.0019616719982956097, "loss": 3.3302, "step": 905 }, { "epoch": 0.11596057852297453, "grad_norm": 0.80859375, "learning_rate": 0.0019615582402422067, "loss": 3.7622, "step": 906 }, { "epoch": 0.11608857033149879, "grad_norm": 0.83984375, "learning_rate": 0.0019614443169294186, "loss": 3.6165, "step": 907 }, { "epoch": 0.11621656214002304, "grad_norm": 0.73828125, "learning_rate": 0.001961330228376825, "loss": 3.3886, "step": 908 }, { "epoch": 0.1163445539485473, "grad_norm": 0.6484375, "learning_rate": 0.001961215974604035, "loss": 3.6523, "step": 909 }, { "epoch": 0.11647254575707154, "grad_norm": 0.80078125, "learning_rate": 0.0019611015556306845, "loss": 3.3891, "step": 910 }, { "epoch": 0.1166005375655958, "grad_norm": 0.796875, "learning_rate": 0.0019609869714764376, "loss": 3.6532, "step": 911 }, { "epoch": 0.11672852937412005, "grad_norm": 0.68359375, "learning_rate": 0.0019608722221609877, "loss": 3.5814, "step": 912 }, { "epoch": 0.11685652118264431, "grad_norm": 0.73828125, "learning_rate": 0.0019607573077040567, "loss": 3.7993, "step": 913 }, { "epoch": 0.11698451299116856, "grad_norm": 0.70703125, "learning_rate": 0.0019606422281253943, "loss": 3.587, "step": 914 }, { "epoch": 0.11711250479969282, "grad_norm": 0.77734375, "learning_rate": 0.001960526983444779, "loss": 4.2087, "step": 915 }, { "epoch": 0.11724049660821707, "grad_norm": 0.75390625, "learning_rate": 0.0019604115736820173, "loss": 3.4123, "step": 916 }, { "epoch": 0.11736848841674133, "grad_norm": 0.81640625, "learning_rate": 0.0019602959988569437, "loss": 3.3716, "step": 917 }, { "epoch": 0.11749648022526558, "grad_norm": 0.68359375, "learning_rate": 0.0019601802589894225, "loss": 3.3243, "step": 918 }, { "epoch": 0.11762447203378984, "grad_norm": 1.046875, "learning_rate": 0.001960064354099345, "loss": 3.7429, "step": 919 }, { "epoch": 0.1177524638423141, "grad_norm": 0.7109375, "learning_rate": 0.0019599482842066313, "loss": 3.3318, "step": 920 }, { "epoch": 0.11788045565083835, "grad_norm": 0.80078125, "learning_rate": 0.0019598320493312304, "loss": 4.0289, "step": 921 }, { "epoch": 0.1180084474593626, "grad_norm": 0.84375, "learning_rate": 0.0019597156494931185, "loss": 3.6527, "step": 922 }, { "epoch": 0.11813643926788686, "grad_norm": 1.0078125, "learning_rate": 0.001959599084712301, "loss": 3.641, "step": 923 }, { "epoch": 0.1182644310764111, "grad_norm": 0.81640625, "learning_rate": 0.001959482355008812, "loss": 3.377, "step": 924 }, { "epoch": 0.11839242288493536, "grad_norm": 0.82421875, "learning_rate": 0.0019593654604027127, "loss": 4.121, "step": 925 }, { "epoch": 0.11852041469345961, "grad_norm": 1.0859375, "learning_rate": 0.0019592484009140934, "loss": 4.4571, "step": 926 }, { "epoch": 0.11864840650198387, "grad_norm": 0.734375, "learning_rate": 0.001959131176563073, "loss": 3.3217, "step": 927 }, { "epoch": 0.11877639831050812, "grad_norm": 0.66015625, "learning_rate": 0.001959013787369798, "loss": 4.0584, "step": 928 }, { "epoch": 0.11890439011903238, "grad_norm": 0.625, "learning_rate": 0.001958896233354444, "loss": 3.2632, "step": 929 }, { "epoch": 0.11903238192755664, "grad_norm": 0.734375, "learning_rate": 0.0019587785145372145, "loss": 3.5658, "step": 930 }, { "epoch": 0.11916037373608089, "grad_norm": 0.90625, "learning_rate": 0.001958660630938341, "loss": 3.6855, "step": 931 }, { "epoch": 0.11928836554460515, "grad_norm": 0.765625, "learning_rate": 0.001958542582578085, "loss": 3.7663, "step": 932 }, { "epoch": 0.1194163573531294, "grad_norm": 1.09375, "learning_rate": 0.0019584243694767333, "loss": 3.7094, "step": 933 }, { "epoch": 0.11954434916165366, "grad_norm": 1.234375, "learning_rate": 0.0019583059916546035, "loss": 3.7818, "step": 934 }, { "epoch": 0.11967234097017791, "grad_norm": 0.80078125, "learning_rate": 0.0019581874491320407, "loss": 3.2882, "step": 935 }, { "epoch": 0.11980033277870217, "grad_norm": 0.8671875, "learning_rate": 0.0019580687419294184, "loss": 3.2619, "step": 936 }, { "epoch": 0.11992832458722642, "grad_norm": 0.98828125, "learning_rate": 0.0019579498700671386, "loss": 4.2336, "step": 937 }, { "epoch": 0.12005631639575068, "grad_norm": 0.7578125, "learning_rate": 0.0019578308335656313, "loss": 3.2063, "step": 938 }, { "epoch": 0.12018430820427492, "grad_norm": 0.7890625, "learning_rate": 0.001957711632445354, "loss": 3.2424, "step": 939 }, { "epoch": 0.12031230001279918, "grad_norm": 0.671875, "learning_rate": 0.0019575922667267945, "loss": 3.0782, "step": 940 }, { "epoch": 0.12044029182132343, "grad_norm": 0.9453125, "learning_rate": 0.0019574727364304674, "loss": 3.9098, "step": 941 }, { "epoch": 0.12056828362984769, "grad_norm": 1.3046875, "learning_rate": 0.0019573530415769157, "loss": 4.1458, "step": 942 }, { "epoch": 0.12069627543837194, "grad_norm": 0.7109375, "learning_rate": 0.0019572331821867107, "loss": 3.5935, "step": 943 }, { "epoch": 0.1208242672468962, "grad_norm": 0.7890625, "learning_rate": 0.0019571131582804525, "loss": 3.132, "step": 944 }, { "epoch": 0.12095225905542045, "grad_norm": 0.75, "learning_rate": 0.0019569929698787696, "loss": 4.0456, "step": 945 }, { "epoch": 0.12108025086394471, "grad_norm": 0.75390625, "learning_rate": 0.0019568726170023174, "loss": 4.2372, "step": 946 }, { "epoch": 0.12120824267246896, "grad_norm": 0.91796875, "learning_rate": 0.0019567520996717808, "loss": 4.0716, "step": 947 }, { "epoch": 0.12133623448099322, "grad_norm": 0.87109375, "learning_rate": 0.001956631417907873, "loss": 3.842, "step": 948 }, { "epoch": 0.12146422628951747, "grad_norm": 0.85546875, "learning_rate": 0.001956510571731335, "loss": 3.4917, "step": 949 }, { "epoch": 0.12159221809804173, "grad_norm": 0.72265625, "learning_rate": 0.0019563895611629358, "loss": 3.5495, "step": 950 }, { "epoch": 0.12172020990656598, "grad_norm": 0.75390625, "learning_rate": 0.001956268386223474, "loss": 3.7297, "step": 951 }, { "epoch": 0.12184820171509024, "grad_norm": 0.921875, "learning_rate": 0.0019561470469337742, "loss": 3.8693, "step": 952 }, { "epoch": 0.12197619352361448, "grad_norm": 0.83984375, "learning_rate": 0.0019560255433146915, "loss": 3.9551, "step": 953 }, { "epoch": 0.12210418533213874, "grad_norm": 0.76171875, "learning_rate": 0.0019559038753871076, "loss": 4.2624, "step": 954 }, { "epoch": 0.12223217714066299, "grad_norm": 0.77734375, "learning_rate": 0.001955782043171933, "loss": 3.4484, "step": 955 }, { "epoch": 0.12236016894918725, "grad_norm": 0.65625, "learning_rate": 0.0019556600466901076, "loss": 3.3516, "step": 956 }, { "epoch": 0.1224881607577115, "grad_norm": 0.703125, "learning_rate": 0.001955537885962598, "loss": 4.0045, "step": 957 }, { "epoch": 0.12261615256623576, "grad_norm": 0.72265625, "learning_rate": 0.001955415561010399, "loss": 3.3169, "step": 958 }, { "epoch": 0.12274414437476001, "grad_norm": 0.76953125, "learning_rate": 0.0019552930718545344, "loss": 3.5144, "step": 959 }, { "epoch": 0.12287213618328427, "grad_norm": 0.88671875, "learning_rate": 0.001955170418516056, "loss": 4.4434, "step": 960 }, { "epoch": 0.12300012799180852, "grad_norm": 0.8046875, "learning_rate": 0.001955047601016044, "loss": 3.717, "step": 961 }, { "epoch": 0.12312811980033278, "grad_norm": 0.6875, "learning_rate": 0.001954924619375606, "loss": 3.8565, "step": 962 }, { "epoch": 0.12325611160885704, "grad_norm": 1.0, "learning_rate": 0.0019548014736158787, "loss": 3.5405, "step": 963 }, { "epoch": 0.12338410341738129, "grad_norm": 0.640625, "learning_rate": 0.001954678163758027, "loss": 3.2001, "step": 964 }, { "epoch": 0.12351209522590555, "grad_norm": 0.8046875, "learning_rate": 0.0019545546898232435, "loss": 3.7179, "step": 965 }, { "epoch": 0.1236400870344298, "grad_norm": 0.8828125, "learning_rate": 0.0019544310518327494, "loss": 2.9281, "step": 966 }, { "epoch": 0.12376807884295406, "grad_norm": 0.7890625, "learning_rate": 0.001954307249807793, "loss": 3.5028, "step": 967 }, { "epoch": 0.1238960706514783, "grad_norm": 0.75, "learning_rate": 0.0019541832837696527, "loss": 2.6589, "step": 968 }, { "epoch": 0.12402406246000255, "grad_norm": 0.8125, "learning_rate": 0.0019540591537396336, "loss": 3.7952, "step": 969 }, { "epoch": 0.12415205426852681, "grad_norm": 0.71484375, "learning_rate": 0.001953934859739069, "loss": 3.2427, "step": 970 }, { "epoch": 0.12428004607705107, "grad_norm": 0.86328125, "learning_rate": 0.001953810401789322, "loss": 3.7648, "step": 971 }, { "epoch": 0.12440803788557532, "grad_norm": 0.83203125, "learning_rate": 0.001953685779911782, "loss": 3.4908, "step": 972 }, { "epoch": 0.12453602969409958, "grad_norm": 0.75390625, "learning_rate": 0.0019535609941278677, "loss": 3.3113, "step": 973 }, { "epoch": 0.12466402150262383, "grad_norm": 0.83203125, "learning_rate": 0.001953436044459025, "loss": 4.4434, "step": 974 }, { "epoch": 0.12479201331114809, "grad_norm": 0.8125, "learning_rate": 0.0019533109309267287, "loss": 3.4783, "step": 975 }, { "epoch": 0.12492000511967234, "grad_norm": 0.6484375, "learning_rate": 0.0019531856535524817, "loss": 2.8561, "step": 976 }, { "epoch": 0.1250479969281966, "grad_norm": 1.46875, "learning_rate": 0.0019530602123578149, "loss": 3.0004, "step": 977 }, { "epoch": 0.12517598873672084, "grad_norm": 0.80078125, "learning_rate": 0.0019529346073642877, "loss": 3.2929, "step": 978 }, { "epoch": 0.1253039805452451, "grad_norm": 0.76953125, "learning_rate": 0.0019528088385934869, "loss": 3.2741, "step": 979 }, { "epoch": 0.12543197235376935, "grad_norm": 0.76171875, "learning_rate": 0.001952682906067028, "loss": 3.7331, "step": 980 }, { "epoch": 0.12555996416229362, "grad_norm": 0.75390625, "learning_rate": 0.0019525568098065548, "loss": 3.3573, "step": 981 }, { "epoch": 0.12568795597081786, "grad_norm": 0.67578125, "learning_rate": 0.0019524305498337387, "loss": 2.9779, "step": 982 }, { "epoch": 0.12581594777934213, "grad_norm": 0.96484375, "learning_rate": 0.00195230412617028, "loss": 3.574, "step": 983 }, { "epoch": 0.12594393958786637, "grad_norm": 0.765625, "learning_rate": 0.0019521775388379058, "loss": 3.7596, "step": 984 }, { "epoch": 0.12607193139639064, "grad_norm": 0.76953125, "learning_rate": 0.0019520507878583728, "loss": 3.3743, "step": 985 }, { "epoch": 0.12619992320491488, "grad_norm": 0.76953125, "learning_rate": 0.0019519238732534654, "loss": 3.1035, "step": 986 }, { "epoch": 0.12632791501343915, "grad_norm": 0.75, "learning_rate": 0.0019517967950449955, "loss": 3.5453, "step": 987 }, { "epoch": 0.1264559068219634, "grad_norm": 0.9453125, "learning_rate": 0.0019516695532548036, "loss": 3.414, "step": 988 }, { "epoch": 0.12658389863048766, "grad_norm": 0.8046875, "learning_rate": 0.0019515421479047587, "loss": 3.3166, "step": 989 }, { "epoch": 0.1267118904390119, "grad_norm": 0.875, "learning_rate": 0.0019514145790167565, "loss": 3.2411, "step": 990 }, { "epoch": 0.12683988224753615, "grad_norm": 0.890625, "learning_rate": 0.001951286846612723, "loss": 3.7565, "step": 991 }, { "epoch": 0.12696787405606041, "grad_norm": 0.8828125, "learning_rate": 0.0019511589507146103, "loss": 3.4705, "step": 992 }, { "epoch": 0.12709586586458466, "grad_norm": 1.0703125, "learning_rate": 0.0019510308913443996, "loss": 4.0593, "step": 993 }, { "epoch": 0.12722385767310893, "grad_norm": 1.46875, "learning_rate": 0.0019509026685241, "loss": 3.4814, "step": 994 }, { "epoch": 0.12735184948163317, "grad_norm": 0.7734375, "learning_rate": 0.0019507742822757485, "loss": 3.6799, "step": 995 }, { "epoch": 0.12747984129015744, "grad_norm": 0.6875, "learning_rate": 0.0019506457326214108, "loss": 2.9499, "step": 996 }, { "epoch": 0.12760783309868168, "grad_norm": 1.0390625, "learning_rate": 0.00195051701958318, "loss": 3.3708, "step": 997 }, { "epoch": 0.12773582490720595, "grad_norm": 0.859375, "learning_rate": 0.001950388143183177, "loss": 3.4441, "step": 998 }, { "epoch": 0.1278638167157302, "grad_norm": 0.78125, "learning_rate": 0.0019502591034435523, "loss": 3.6524, "step": 999 }, { "epoch": 0.12799180852425446, "grad_norm": 0.6796875, "learning_rate": 0.0019501299003864827, "loss": 2.4237, "step": 1000 }, { "epoch": 0.1281198003327787, "grad_norm": 0.75390625, "learning_rate": 0.001950000534034174, "loss": 3.9275, "step": 1001 }, { "epoch": 0.12824779214130297, "grad_norm": 0.9140625, "learning_rate": 0.0019498710044088604, "loss": 3.0411, "step": 1002 }, { "epoch": 0.1283757839498272, "grad_norm": 0.76171875, "learning_rate": 0.0019497413115328027, "loss": 3.4542, "step": 1003 }, { "epoch": 0.12850377575835145, "grad_norm": 0.72265625, "learning_rate": 0.0019496114554282916, "loss": 3.3073, "step": 1004 }, { "epoch": 0.12863176756687572, "grad_norm": 0.81640625, "learning_rate": 0.0019494814361176447, "loss": 3.9253, "step": 1005 }, { "epoch": 0.12875975937539996, "grad_norm": 0.890625, "learning_rate": 0.001949351253623208, "loss": 3.639, "step": 1006 }, { "epoch": 0.12888775118392423, "grad_norm": 0.8203125, "learning_rate": 0.0019492209079673552, "loss": 2.9475, "step": 1007 }, { "epoch": 0.12901574299244847, "grad_norm": 0.74609375, "learning_rate": 0.0019490903991724881, "loss": 3.7006, "step": 1008 }, { "epoch": 0.12914373480097274, "grad_norm": 0.7109375, "learning_rate": 0.0019489597272610375, "loss": 3.2428, "step": 1009 }, { "epoch": 0.12927172660949698, "grad_norm": 0.70703125, "learning_rate": 0.0019488288922554612, "loss": 3.2863, "step": 1010 }, { "epoch": 0.12939971841802125, "grad_norm": 0.8203125, "learning_rate": 0.0019486978941782454, "loss": 3.351, "step": 1011 }, { "epoch": 0.1295277102265455, "grad_norm": 1.015625, "learning_rate": 0.0019485667330519038, "loss": 3.6498, "step": 1012 }, { "epoch": 0.12965570203506976, "grad_norm": 0.796875, "learning_rate": 0.001948435408898979, "loss": 3.0958, "step": 1013 }, { "epoch": 0.129783693843594, "grad_norm": 0.734375, "learning_rate": 0.001948303921742041, "loss": 3.1954, "step": 1014 }, { "epoch": 0.12991168565211828, "grad_norm": 0.875, "learning_rate": 0.001948172271603688, "loss": 3.3837, "step": 1015 }, { "epoch": 0.13003967746064252, "grad_norm": 0.734375, "learning_rate": 0.0019480404585065464, "loss": 3.9452, "step": 1016 }, { "epoch": 0.13016766926916679, "grad_norm": 0.71875, "learning_rate": 0.00194790848247327, "loss": 2.9277, "step": 1017 }, { "epoch": 0.13029566107769103, "grad_norm": 0.7734375, "learning_rate": 0.0019477763435265417, "loss": 3.8614, "step": 1018 }, { "epoch": 0.13042365288621527, "grad_norm": 0.64453125, "learning_rate": 0.001947644041689071, "loss": 2.7134, "step": 1019 }, { "epoch": 0.13055164469473954, "grad_norm": 0.71484375, "learning_rate": 0.0019475115769835964, "loss": 3.3132, "step": 1020 }, { "epoch": 0.13067963650326378, "grad_norm": 0.78515625, "learning_rate": 0.0019473789494328844, "loss": 3.5545, "step": 1021 }, { "epoch": 0.13080762831178805, "grad_norm": 0.83984375, "learning_rate": 0.0019472461590597288, "loss": 4.4281, "step": 1022 }, { "epoch": 0.1309356201203123, "grad_norm": 0.76171875, "learning_rate": 0.001947113205886952, "loss": 3.2657, "step": 1023 }, { "epoch": 0.13106361192883656, "grad_norm": 0.7421875, "learning_rate": 0.001946980089937404, "loss": 3.9357, "step": 1024 }, { "epoch": 0.1311916037373608, "grad_norm": 0.83203125, "learning_rate": 0.001946846811233963, "loss": 3.8255, "step": 1025 }, { "epoch": 0.13131959554588507, "grad_norm": 0.8515625, "learning_rate": 0.0019467133697995353, "loss": 3.7634, "step": 1026 }, { "epoch": 0.1314475873544093, "grad_norm": 0.80859375, "learning_rate": 0.0019465797656570546, "loss": 3.7425, "step": 1027 }, { "epoch": 0.13157557916293358, "grad_norm": 0.96484375, "learning_rate": 0.0019464459988294833, "loss": 4.4747, "step": 1028 }, { "epoch": 0.13170357097145782, "grad_norm": 0.71875, "learning_rate": 0.0019463120693398112, "loss": 3.3728, "step": 1029 }, { "epoch": 0.1318315627799821, "grad_norm": 0.6640625, "learning_rate": 0.001946177977211056, "loss": 3.3651, "step": 1030 }, { "epoch": 0.13195955458850633, "grad_norm": 0.71484375, "learning_rate": 0.0019460437224662643, "loss": 3.2182, "step": 1031 }, { "epoch": 0.1320875463970306, "grad_norm": 1.078125, "learning_rate": 0.0019459093051285095, "loss": 3.6344, "step": 1032 }, { "epoch": 0.13221553820555484, "grad_norm": 0.765625, "learning_rate": 0.0019457747252208935, "loss": 3.5366, "step": 1033 }, { "epoch": 0.13234353001407909, "grad_norm": 0.828125, "learning_rate": 0.0019456399827665458, "loss": 2.5467, "step": 1034 }, { "epoch": 0.13247152182260336, "grad_norm": 0.734375, "learning_rate": 0.0019455050777886244, "loss": 3.0982, "step": 1035 }, { "epoch": 0.1325995136311276, "grad_norm": 0.80859375, "learning_rate": 0.001945370010310315, "loss": 3.695, "step": 1036 }, { "epoch": 0.13272750543965187, "grad_norm": 0.72265625, "learning_rate": 0.0019452347803548308, "loss": 3.2746, "step": 1037 }, { "epoch": 0.1328554972481761, "grad_norm": 0.91796875, "learning_rate": 0.001945099387945413, "loss": 3.0667, "step": 1038 }, { "epoch": 0.13298348905670038, "grad_norm": 0.90234375, "learning_rate": 0.001944963833105332, "loss": 4.8267, "step": 1039 }, { "epoch": 0.13311148086522462, "grad_norm": 0.875, "learning_rate": 0.001944828115857884, "loss": 3.1858, "step": 1040 }, { "epoch": 0.1332394726737489, "grad_norm": 0.72265625, "learning_rate": 0.0019446922362263948, "loss": 3.1982, "step": 1041 }, { "epoch": 0.13336746448227313, "grad_norm": 0.671875, "learning_rate": 0.0019445561942342173, "loss": 3.6458, "step": 1042 }, { "epoch": 0.1334954562907974, "grad_norm": 1.1015625, "learning_rate": 0.0019444199899047328, "loss": 2.5773, "step": 1043 }, { "epoch": 0.13362344809932164, "grad_norm": 0.78515625, "learning_rate": 0.00194428362326135, "loss": 3.7655, "step": 1044 }, { "epoch": 0.1337514399078459, "grad_norm": 0.81640625, "learning_rate": 0.001944147094327506, "loss": 3.6526, "step": 1045 }, { "epoch": 0.13387943171637015, "grad_norm": 0.6796875, "learning_rate": 0.001944010403126665, "loss": 2.7566, "step": 1046 }, { "epoch": 0.13400742352489442, "grad_norm": 0.828125, "learning_rate": 0.0019438735496823199, "loss": 3.2002, "step": 1047 }, { "epoch": 0.13413541533341866, "grad_norm": 1.0078125, "learning_rate": 0.001943736534017991, "loss": 4.015, "step": 1048 }, { "epoch": 0.1342634071419429, "grad_norm": 1.0234375, "learning_rate": 0.0019435993561572273, "loss": 2.5212, "step": 1049 }, { "epoch": 0.13439139895046717, "grad_norm": 0.75, "learning_rate": 0.0019434620161236044, "loss": 3.9998, "step": 1050 }, { "epoch": 0.13451939075899141, "grad_norm": 0.76953125, "learning_rate": 0.0019433245139407265, "loss": 3.5611, "step": 1051 }, { "epoch": 0.13464738256751568, "grad_norm": 0.62890625, "learning_rate": 0.0019431868496322254, "loss": 2.9752, "step": 1052 }, { "epoch": 0.13477537437603992, "grad_norm": 0.82421875, "learning_rate": 0.0019430490232217616, "loss": 3.282, "step": 1053 }, { "epoch": 0.1349033661845642, "grad_norm": 0.87890625, "learning_rate": 0.0019429110347330223, "loss": 2.6788, "step": 1054 }, { "epoch": 0.13503135799308844, "grad_norm": 0.88671875, "learning_rate": 0.0019427728841897231, "loss": 4.5743, "step": 1055 }, { "epoch": 0.1351593498016127, "grad_norm": 0.75, "learning_rate": 0.0019426345716156077, "loss": 2.6773, "step": 1056 }, { "epoch": 0.13528734161013695, "grad_norm": 0.98046875, "learning_rate": 0.001942496097034447, "loss": 3.6924, "step": 1057 }, { "epoch": 0.13541533341866122, "grad_norm": 0.8515625, "learning_rate": 0.0019423574604700402, "loss": 4.4911, "step": 1058 }, { "epoch": 0.13554332522718546, "grad_norm": 0.6796875, "learning_rate": 0.0019422186619462143, "loss": 3.733, "step": 1059 }, { "epoch": 0.13567131703570973, "grad_norm": 0.84765625, "learning_rate": 0.0019420797014868243, "loss": 3.3893, "step": 1060 }, { "epoch": 0.13579930884423397, "grad_norm": 0.9296875, "learning_rate": 0.0019419405791157526, "loss": 3.5231, "step": 1061 }, { "epoch": 0.13592730065275824, "grad_norm": 0.75, "learning_rate": 0.001941801294856909, "loss": 2.7523, "step": 1062 }, { "epoch": 0.13605529246128248, "grad_norm": 0.8125, "learning_rate": 0.001941661848734233, "loss": 2.8682, "step": 1063 }, { "epoch": 0.13618328426980672, "grad_norm": 0.73046875, "learning_rate": 0.00194152224077169, "loss": 3.4219, "step": 1064 }, { "epoch": 0.136311276078331, "grad_norm": 1.015625, "learning_rate": 0.0019413824709932736, "loss": 3.1043, "step": 1065 }, { "epoch": 0.13643926788685523, "grad_norm": 0.77734375, "learning_rate": 0.0019412425394230062, "loss": 2.6258, "step": 1066 }, { "epoch": 0.1365672596953795, "grad_norm": 0.8203125, "learning_rate": 0.0019411024460849369, "loss": 3.6408, "step": 1067 }, { "epoch": 0.13669525150390374, "grad_norm": 0.7265625, "learning_rate": 0.001940962191003143, "loss": 3.8137, "step": 1068 }, { "epoch": 0.136823243312428, "grad_norm": 0.84765625, "learning_rate": 0.0019408217742017298, "loss": 2.2635, "step": 1069 }, { "epoch": 0.13695123512095225, "grad_norm": 0.6484375, "learning_rate": 0.0019406811957048298, "loss": 3.6414, "step": 1070 }, { "epoch": 0.13707922692947652, "grad_norm": 0.7734375, "learning_rate": 0.0019405404555366045, "loss": 3.1282, "step": 1071 }, { "epoch": 0.13720721873800076, "grad_norm": 1.2421875, "learning_rate": 0.0019403995537212416, "loss": 3.629, "step": 1072 }, { "epoch": 0.13733521054652503, "grad_norm": 0.76953125, "learning_rate": 0.0019402584902829575, "loss": 3.1235, "step": 1073 }, { "epoch": 0.13746320235504927, "grad_norm": 0.75, "learning_rate": 0.0019401172652459967, "loss": 3.0351, "step": 1074 }, { "epoch": 0.13759119416357354, "grad_norm": 0.79296875, "learning_rate": 0.0019399758786346304, "loss": 3.168, "step": 1075 }, { "epoch": 0.13771918597209779, "grad_norm": 0.80859375, "learning_rate": 0.0019398343304731585, "loss": 2.8223, "step": 1076 }, { "epoch": 0.13784717778062203, "grad_norm": 0.81640625, "learning_rate": 0.0019396926207859084, "loss": 3.45, "step": 1077 }, { "epoch": 0.1379751695891463, "grad_norm": 0.859375, "learning_rate": 0.0019395507495972351, "loss": 2.9948, "step": 1078 }, { "epoch": 0.13810316139767054, "grad_norm": 0.84765625, "learning_rate": 0.0019394087169315216, "loss": 3.4169, "step": 1079 }, { "epoch": 0.1382311532061948, "grad_norm": 0.953125, "learning_rate": 0.0019392665228131784, "loss": 2.8572, "step": 1080 }, { "epoch": 0.13835914501471905, "grad_norm": 0.80078125, "learning_rate": 0.0019391241672666439, "loss": 3.1166, "step": 1081 }, { "epoch": 0.13848713682324332, "grad_norm": 0.8046875, "learning_rate": 0.0019389816503163839, "loss": 3.8743, "step": 1082 }, { "epoch": 0.13861512863176756, "grad_norm": 0.734375, "learning_rate": 0.001938838971986893, "loss": 3.135, "step": 1083 }, { "epoch": 0.13874312044029183, "grad_norm": 0.7578125, "learning_rate": 0.001938696132302692, "loss": 3.3525, "step": 1084 }, { "epoch": 0.13887111224881607, "grad_norm": 0.875, "learning_rate": 0.0019385531312883307, "loss": 2.9379, "step": 1085 }, { "epoch": 0.13899910405734034, "grad_norm": 0.68359375, "learning_rate": 0.0019384099689683859, "loss": 2.9513, "step": 1086 }, { "epoch": 0.13912709586586458, "grad_norm": 0.76171875, "learning_rate": 0.0019382666453674625, "loss": 2.7024, "step": 1087 }, { "epoch": 0.13925508767438885, "grad_norm": 0.6953125, "learning_rate": 0.0019381231605101927, "loss": 2.989, "step": 1088 }, { "epoch": 0.1393830794829131, "grad_norm": 1.078125, "learning_rate": 0.0019379795144212374, "loss": 3.9851, "step": 1089 }, { "epoch": 0.13951107129143736, "grad_norm": 0.72265625, "learning_rate": 0.0019378357071252838, "loss": 3.0601, "step": 1090 }, { "epoch": 0.1396390630999616, "grad_norm": 1.0234375, "learning_rate": 0.0019376917386470477, "loss": 3.3714, "step": 1091 }, { "epoch": 0.13976705490848584, "grad_norm": 0.859375, "learning_rate": 0.0019375476090112726, "loss": 3.8742, "step": 1092 }, { "epoch": 0.1398950467170101, "grad_norm": 0.70703125, "learning_rate": 0.0019374033182427296, "loss": 4.2344, "step": 1093 }, { "epoch": 0.14002303852553435, "grad_norm": 0.78125, "learning_rate": 0.001937258866366217, "loss": 3.7389, "step": 1094 }, { "epoch": 0.14015103033405862, "grad_norm": 0.67578125, "learning_rate": 0.0019371142534065613, "loss": 3.0411, "step": 1095 }, { "epoch": 0.14027902214258287, "grad_norm": 0.78125, "learning_rate": 0.0019369694793886169, "loss": 3.3294, "step": 1096 }, { "epoch": 0.14040701395110713, "grad_norm": 0.77734375, "learning_rate": 0.0019368245443372651, "loss": 2.6087, "step": 1097 }, { "epoch": 0.14053500575963138, "grad_norm": 0.9453125, "learning_rate": 0.001936679448277416, "loss": 2.9046, "step": 1098 }, { "epoch": 0.14066299756815565, "grad_norm": 0.5390625, "learning_rate": 0.001936534191234006, "loss": 2.6771, "step": 1099 }, { "epoch": 0.1407909893766799, "grad_norm": 0.80859375, "learning_rate": 0.0019363887732320004, "loss": 3.3206, "step": 1100 }, { "epoch": 0.14091898118520416, "grad_norm": 0.69140625, "learning_rate": 0.0019362431942963913, "loss": 2.7643, "step": 1101 }, { "epoch": 0.1410469729937284, "grad_norm": 0.74609375, "learning_rate": 0.0019360974544521993, "loss": 3.333, "step": 1102 }, { "epoch": 0.14117496480225267, "grad_norm": 1.0390625, "learning_rate": 0.0019359515537244714, "loss": 3.5429, "step": 1103 }, { "epoch": 0.1413029566107769, "grad_norm": 0.74609375, "learning_rate": 0.0019358054921382833, "loss": 3.2524, "step": 1104 }, { "epoch": 0.14143094841930118, "grad_norm": 0.69921875, "learning_rate": 0.0019356592697187384, "loss": 2.9627, "step": 1105 }, { "epoch": 0.14155894022782542, "grad_norm": 0.80078125, "learning_rate": 0.001935512886490967, "loss": 2.9596, "step": 1106 }, { "epoch": 0.14168693203634966, "grad_norm": 0.76171875, "learning_rate": 0.001935366342480128, "loss": 3.3662, "step": 1107 }, { "epoch": 0.14181492384487393, "grad_norm": 0.6171875, "learning_rate": 0.0019352196377114064, "loss": 2.8613, "step": 1108 }, { "epoch": 0.14194291565339817, "grad_norm": 0.98046875, "learning_rate": 0.0019350727722100168, "loss": 3.0555, "step": 1109 }, { "epoch": 0.14207090746192244, "grad_norm": 0.6796875, "learning_rate": 0.0019349257460011997, "loss": 2.5011, "step": 1110 }, { "epoch": 0.14219889927044668, "grad_norm": 0.6953125, "learning_rate": 0.0019347785591102243, "loss": 3.5901, "step": 1111 }, { "epoch": 0.14232689107897095, "grad_norm": 0.93359375, "learning_rate": 0.0019346312115623868, "loss": 3.9179, "step": 1112 }, { "epoch": 0.1424548828874952, "grad_norm": 0.70703125, "learning_rate": 0.0019344837033830116, "loss": 2.5353, "step": 1113 }, { "epoch": 0.14258287469601946, "grad_norm": 0.9609375, "learning_rate": 0.0019343360345974504, "loss": 3.1144, "step": 1114 }, { "epoch": 0.1427108665045437, "grad_norm": 0.69921875, "learning_rate": 0.0019341882052310818, "loss": 3.0103, "step": 1115 }, { "epoch": 0.14283885831306797, "grad_norm": 0.84375, "learning_rate": 0.0019340402153093135, "loss": 2.8897, "step": 1116 }, { "epoch": 0.14296685012159221, "grad_norm": 1.2265625, "learning_rate": 0.0019338920648575796, "loss": 3.7469, "step": 1117 }, { "epoch": 0.14309484193011648, "grad_norm": 0.90625, "learning_rate": 0.001933743753901342, "loss": 3.8551, "step": 1118 }, { "epoch": 0.14322283373864073, "grad_norm": 0.75390625, "learning_rate": 0.0019335952824660904, "loss": 3.0105, "step": 1119 }, { "epoch": 0.143350825547165, "grad_norm": 0.8203125, "learning_rate": 0.0019334466505773426, "loss": 3.1833, "step": 1120 }, { "epoch": 0.14347881735568924, "grad_norm": 0.83203125, "learning_rate": 0.0019332978582606427, "loss": 3.9847, "step": 1121 }, { "epoch": 0.14360680916421348, "grad_norm": 0.7734375, "learning_rate": 0.0019331489055415632, "loss": 2.4844, "step": 1122 }, { "epoch": 0.14373480097273775, "grad_norm": 0.91015625, "learning_rate": 0.0019329997924457042, "loss": 3.1257, "step": 1123 }, { "epoch": 0.143862792781262, "grad_norm": 0.703125, "learning_rate": 0.0019328505189986935, "loss": 3.5153, "step": 1124 }, { "epoch": 0.14399078458978626, "grad_norm": 0.78515625, "learning_rate": 0.0019327010852261855, "loss": 2.6724, "step": 1125 }, { "epoch": 0.1441187763983105, "grad_norm": 0.83984375, "learning_rate": 0.0019325514911538633, "loss": 3.6588, "step": 1126 }, { "epoch": 0.14424676820683477, "grad_norm": 0.828125, "learning_rate": 0.001932401736807437, "loss": 3.0896, "step": 1127 }, { "epoch": 0.144374760015359, "grad_norm": 0.84375, "learning_rate": 0.0019322518222126442, "loss": 3.5395, "step": 1128 }, { "epoch": 0.14450275182388328, "grad_norm": 0.76953125, "learning_rate": 0.00193210174739525, "loss": 2.5968, "step": 1129 }, { "epoch": 0.14463074363240752, "grad_norm": 0.6875, "learning_rate": 0.0019319515123810476, "loss": 2.7883, "step": 1130 }, { "epoch": 0.1447587354409318, "grad_norm": 0.7265625, "learning_rate": 0.0019318011171958569, "loss": 2.9237, "step": 1131 }, { "epoch": 0.14488672724945603, "grad_norm": 0.73828125, "learning_rate": 0.001931650561865526, "loss": 3.5064, "step": 1132 }, { "epoch": 0.1450147190579803, "grad_norm": 0.7734375, "learning_rate": 0.0019314998464159303, "loss": 4.2771, "step": 1133 }, { "epoch": 0.14514271086650454, "grad_norm": 0.859375, "learning_rate": 0.0019313489708729728, "loss": 3.556, "step": 1134 }, { "epoch": 0.14527070267502878, "grad_norm": 0.8203125, "learning_rate": 0.0019311979352625835, "loss": 3.1248, "step": 1135 }, { "epoch": 0.14539869448355305, "grad_norm": 0.6796875, "learning_rate": 0.0019310467396107208, "loss": 3.4066, "step": 1136 }, { "epoch": 0.1455266862920773, "grad_norm": 0.64453125, "learning_rate": 0.0019308953839433697, "loss": 3.0167, "step": 1137 }, { "epoch": 0.14565467810060156, "grad_norm": 0.81640625, "learning_rate": 0.0019307438682865433, "loss": 3.4392, "step": 1138 }, { "epoch": 0.1457826699091258, "grad_norm": 0.6796875, "learning_rate": 0.001930592192666282, "loss": 3.042, "step": 1139 }, { "epoch": 0.14591066171765008, "grad_norm": 0.8046875, "learning_rate": 0.0019304403571086541, "loss": 2.7384, "step": 1140 }, { "epoch": 0.14603865352617432, "grad_norm": 0.78125, "learning_rate": 0.0019302883616397544, "loss": 3.0887, "step": 1141 }, { "epoch": 0.14616664533469859, "grad_norm": 0.6796875, "learning_rate": 0.001930136206285706, "loss": 3.0912, "step": 1142 }, { "epoch": 0.14629463714322283, "grad_norm": 0.75390625, "learning_rate": 0.0019299838910726592, "loss": 2.6067, "step": 1143 }, { "epoch": 0.1464226289517471, "grad_norm": 0.83203125, "learning_rate": 0.0019298314160267921, "loss": 3.3939, "step": 1144 }, { "epoch": 0.14655062076027134, "grad_norm": 0.69921875, "learning_rate": 0.0019296787811743101, "loss": 3.216, "step": 1145 }, { "epoch": 0.1466786125687956, "grad_norm": 0.7734375, "learning_rate": 0.0019295259865414456, "loss": 3.2284, "step": 1146 }, { "epoch": 0.14680660437731985, "grad_norm": 0.8203125, "learning_rate": 0.0019293730321544589, "loss": 3.2624, "step": 1147 }, { "epoch": 0.14693459618584412, "grad_norm": 1.0234375, "learning_rate": 0.0019292199180396378, "loss": 3.297, "step": 1148 }, { "epoch": 0.14706258799436836, "grad_norm": 0.8359375, "learning_rate": 0.0019290666442232977, "loss": 3.3239, "step": 1149 }, { "epoch": 0.1471905798028926, "grad_norm": 0.69921875, "learning_rate": 0.0019289132107317803, "loss": 2.9973, "step": 1150 }, { "epoch": 0.14731857161141687, "grad_norm": 0.88671875, "learning_rate": 0.0019287596175914567, "loss": 3.487, "step": 1151 }, { "epoch": 0.1474465634199411, "grad_norm": 0.6015625, "learning_rate": 0.001928605864828724, "loss": 2.6146, "step": 1152 }, { "epoch": 0.14757455522846538, "grad_norm": 0.73828125, "learning_rate": 0.0019284519524700066, "loss": 3.4662, "step": 1153 }, { "epoch": 0.14770254703698962, "grad_norm": 0.75390625, "learning_rate": 0.0019282978805417574, "loss": 3.3205, "step": 1154 }, { "epoch": 0.1478305388455139, "grad_norm": 0.86328125, "learning_rate": 0.001928143649070456, "loss": 3.1488, "step": 1155 }, { "epoch": 0.14795853065403813, "grad_norm": 0.671875, "learning_rate": 0.0019279892580826097, "loss": 3.4151, "step": 1156 }, { "epoch": 0.1480865224625624, "grad_norm": 0.60546875, "learning_rate": 0.001927834707604753, "loss": 3.2225, "step": 1157 }, { "epoch": 0.14821451427108664, "grad_norm": 0.84375, "learning_rate": 0.0019276799976634478, "loss": 3.3367, "step": 1158 }, { "epoch": 0.14834250607961091, "grad_norm": 0.7421875, "learning_rate": 0.001927525128285284, "loss": 2.9605, "step": 1159 }, { "epoch": 0.14847049788813516, "grad_norm": 0.7734375, "learning_rate": 0.0019273700994968776, "loss": 2.9721, "step": 1160 }, { "epoch": 0.14859848969665942, "grad_norm": 0.6875, "learning_rate": 0.0019272149113248736, "loss": 3.028, "step": 1161 }, { "epoch": 0.14872648150518367, "grad_norm": 0.7265625, "learning_rate": 0.0019270595637959427, "loss": 3.9092, "step": 1162 }, { "epoch": 0.14885447331370794, "grad_norm": 0.77734375, "learning_rate": 0.001926904056936785, "loss": 2.806, "step": 1163 }, { "epoch": 0.14898246512223218, "grad_norm": 0.83203125, "learning_rate": 0.001926748390774126, "loss": 3.1402, "step": 1164 }, { "epoch": 0.14911045693075642, "grad_norm": 0.73046875, "learning_rate": 0.0019265925653347198, "loss": 3.7091, "step": 1165 }, { "epoch": 0.1492384487392807, "grad_norm": 0.73828125, "learning_rate": 0.0019264365806453477, "loss": 3.5886, "step": 1166 }, { "epoch": 0.14936644054780493, "grad_norm": 0.76171875, "learning_rate": 0.001926280436732818, "loss": 2.6779, "step": 1167 }, { "epoch": 0.1494944323563292, "grad_norm": 0.7578125, "learning_rate": 0.0019261241336239663, "loss": 2.7906, "step": 1168 }, { "epoch": 0.14962242416485344, "grad_norm": 0.79296875, "learning_rate": 0.0019259676713456564, "loss": 2.8208, "step": 1169 }, { "epoch": 0.1497504159733777, "grad_norm": 0.83984375, "learning_rate": 0.0019258110499247784, "loss": 3.129, "step": 1170 }, { "epoch": 0.14987840778190195, "grad_norm": 0.87890625, "learning_rate": 0.0019256542693882504, "loss": 2.9581, "step": 1171 }, { "epoch": 0.15000639959042622, "grad_norm": 0.73046875, "learning_rate": 0.0019254973297630179, "loss": 2.7063, "step": 1172 }, { "epoch": 0.15013439139895046, "grad_norm": 0.78515625, "learning_rate": 0.001925340231076053, "loss": 2.9345, "step": 1173 }, { "epoch": 0.15026238320747473, "grad_norm": 0.91015625, "learning_rate": 0.0019251829733543562, "loss": 2.9553, "step": 1174 }, { "epoch": 0.15039037501599897, "grad_norm": 0.75, "learning_rate": 0.0019250255566249544, "loss": 3.142, "step": 1175 }, { "epoch": 0.15051836682452324, "grad_norm": 0.64453125, "learning_rate": 0.0019248679809149025, "loss": 3.2804, "step": 1176 }, { "epoch": 0.15064635863304748, "grad_norm": 0.703125, "learning_rate": 0.0019247102462512822, "loss": 2.6575, "step": 1177 }, { "epoch": 0.15077435044157175, "grad_norm": 0.7890625, "learning_rate": 0.001924552352661203, "loss": 2.6658, "step": 1178 }, { "epoch": 0.150902342250096, "grad_norm": 0.8046875, "learning_rate": 0.001924394300171801, "loss": 2.4312, "step": 1179 }, { "epoch": 0.15103033405862024, "grad_norm": 0.78515625, "learning_rate": 0.001924236088810241, "loss": 3.1609, "step": 1180 }, { "epoch": 0.1511583258671445, "grad_norm": 0.71875, "learning_rate": 0.0019240777186037136, "loss": 2.6922, "step": 1181 }, { "epoch": 0.15128631767566875, "grad_norm": 0.7734375, "learning_rate": 0.0019239191895794368, "loss": 3.1654, "step": 1182 }, { "epoch": 0.15141430948419302, "grad_norm": 0.8515625, "learning_rate": 0.0019237605017646574, "loss": 2.8786, "step": 1183 }, { "epoch": 0.15154230129271726, "grad_norm": 0.7421875, "learning_rate": 0.0019236016551866478, "loss": 2.8341, "step": 1184 }, { "epoch": 0.15167029310124153, "grad_norm": 0.703125, "learning_rate": 0.0019234426498727084, "loss": 3.9237, "step": 1185 }, { "epoch": 0.15179828490976577, "grad_norm": 0.6796875, "learning_rate": 0.0019232834858501672, "loss": 2.6052, "step": 1186 }, { "epoch": 0.15192627671829004, "grad_norm": 0.90625, "learning_rate": 0.0019231241631463788, "loss": 2.8804, "step": 1187 }, { "epoch": 0.15205426852681428, "grad_norm": 0.7265625, "learning_rate": 0.0019229646817887254, "loss": 2.9122, "step": 1188 }, { "epoch": 0.15218226033533855, "grad_norm": 0.8671875, "learning_rate": 0.0019228050418046167, "loss": 2.3908, "step": 1189 }, { "epoch": 0.1523102521438628, "grad_norm": 0.76171875, "learning_rate": 0.0019226452432214893, "loss": 2.6428, "step": 1190 }, { "epoch": 0.15243824395238706, "grad_norm": 0.77734375, "learning_rate": 0.0019224852860668072, "loss": 3.1206, "step": 1191 }, { "epoch": 0.1525662357609113, "grad_norm": 0.77734375, "learning_rate": 0.0019223251703680614, "loss": 3.2855, "step": 1192 }, { "epoch": 0.15269422756943557, "grad_norm": 0.6953125, "learning_rate": 0.0019221648961527708, "loss": 3.9638, "step": 1193 }, { "epoch": 0.1528222193779598, "grad_norm": 0.8125, "learning_rate": 0.0019220044634484807, "loss": 3.7907, "step": 1194 }, { "epoch": 0.15295021118648405, "grad_norm": 0.8046875, "learning_rate": 0.0019218438722827642, "loss": 3.2759, "step": 1195 }, { "epoch": 0.15307820299500832, "grad_norm": 0.6953125, "learning_rate": 0.0019216831226832216, "loss": 2.4429, "step": 1196 }, { "epoch": 0.15320619480353256, "grad_norm": 0.7734375, "learning_rate": 0.0019215222146774804, "loss": 2.9268, "step": 1197 }, { "epoch": 0.15333418661205683, "grad_norm": 0.7734375, "learning_rate": 0.0019213611482931951, "loss": 3.4134, "step": 1198 }, { "epoch": 0.15346217842058107, "grad_norm": 0.59375, "learning_rate": 0.0019211999235580475, "loss": 2.5718, "step": 1199 }, { "epoch": 0.15359017022910534, "grad_norm": 0.546875, "learning_rate": 0.001921038540499747, "loss": 2.3142, "step": 1200 }, { "epoch": 0.15371816203762959, "grad_norm": 0.73828125, "learning_rate": 0.0019208769991460298, "loss": 2.812, "step": 1201 }, { "epoch": 0.15384615384615385, "grad_norm": 0.78515625, "learning_rate": 0.001920715299524659, "loss": 3.1725, "step": 1202 }, { "epoch": 0.1539741456546781, "grad_norm": 0.7265625, "learning_rate": 0.0019205534416634254, "loss": 2.6004, "step": 1203 }, { "epoch": 0.15410213746320237, "grad_norm": 0.7265625, "learning_rate": 0.0019203914255901476, "loss": 3.0801, "step": 1204 }, { "epoch": 0.1542301292717266, "grad_norm": 0.69140625, "learning_rate": 0.0019202292513326698, "loss": 3.1377, "step": 1205 }, { "epoch": 0.15435812108025088, "grad_norm": 0.77734375, "learning_rate": 0.0019200669189188648, "loss": 2.4937, "step": 1206 }, { "epoch": 0.15448611288877512, "grad_norm": 0.68359375, "learning_rate": 0.0019199044283766317, "loss": 3.0643, "step": 1207 }, { "epoch": 0.15461410469729936, "grad_norm": 0.8046875, "learning_rate": 0.0019197417797338977, "loss": 3.5516, "step": 1208 }, { "epoch": 0.15474209650582363, "grad_norm": 0.72265625, "learning_rate": 0.0019195789730186161, "loss": 2.5554, "step": 1209 }, { "epoch": 0.15487008831434787, "grad_norm": 0.82421875, "learning_rate": 0.0019194160082587682, "loss": 2.9125, "step": 1210 }, { "epoch": 0.15499808012287214, "grad_norm": 0.84765625, "learning_rate": 0.001919252885482362, "loss": 3.3681, "step": 1211 }, { "epoch": 0.15512607193139638, "grad_norm": 1.203125, "learning_rate": 0.0019190896047174325, "loss": 3.2294, "step": 1212 }, { "epoch": 0.15525406373992065, "grad_norm": 0.6796875, "learning_rate": 0.0019189261659920424, "loss": 2.887, "step": 1213 }, { "epoch": 0.1553820555484449, "grad_norm": 0.76171875, "learning_rate": 0.0019187625693342816, "loss": 2.424, "step": 1214 }, { "epoch": 0.15551004735696916, "grad_norm": 0.67578125, "learning_rate": 0.0019185988147722665, "loss": 3.0079, "step": 1215 }, { "epoch": 0.1556380391654934, "grad_norm": 0.64453125, "learning_rate": 0.0019184349023341407, "loss": 2.8385, "step": 1216 }, { "epoch": 0.15576603097401767, "grad_norm": 0.7578125, "learning_rate": 0.001918270832048076, "loss": 2.5947, "step": 1217 }, { "epoch": 0.1558940227825419, "grad_norm": 0.80078125, "learning_rate": 0.0019181066039422698, "loss": 3.0168, "step": 1218 }, { "epoch": 0.15602201459106618, "grad_norm": 0.63671875, "learning_rate": 0.0019179422180449477, "loss": 2.724, "step": 1219 }, { "epoch": 0.15615000639959042, "grad_norm": 0.73046875, "learning_rate": 0.0019177776743843622, "loss": 2.7551, "step": 1220 }, { "epoch": 0.1562779982081147, "grad_norm": 0.78125, "learning_rate": 0.0019176129729887924, "loss": 2.9282, "step": 1221 }, { "epoch": 0.15640599001663893, "grad_norm": 0.796875, "learning_rate": 0.0019174481138865454, "loss": 2.907, "step": 1222 }, { "epoch": 0.15653398182516318, "grad_norm": 0.79296875, "learning_rate": 0.0019172830971059544, "loss": 3.1977, "step": 1223 }, { "epoch": 0.15666197363368745, "grad_norm": 0.9609375, "learning_rate": 0.0019171179226753808, "loss": 2.5214, "step": 1224 }, { "epoch": 0.1567899654422117, "grad_norm": 0.8984375, "learning_rate": 0.001916952590623212, "loss": 2.4556, "step": 1225 }, { "epoch": 0.15691795725073596, "grad_norm": 0.80859375, "learning_rate": 0.001916787100977863, "loss": 3.6011, "step": 1226 }, { "epoch": 0.1570459490592602, "grad_norm": 0.890625, "learning_rate": 0.001916621453767776, "loss": 2.954, "step": 1227 }, { "epoch": 0.15717394086778447, "grad_norm": 0.71875, "learning_rate": 0.0019164556490214207, "loss": 3.195, "step": 1228 }, { "epoch": 0.1573019326763087, "grad_norm": 1.0703125, "learning_rate": 0.0019162896867672924, "loss": 3.4031, "step": 1229 }, { "epoch": 0.15742992448483298, "grad_norm": 0.86328125, "learning_rate": 0.001916123567033915, "loss": 3.2088, "step": 1230 }, { "epoch": 0.15755791629335722, "grad_norm": 0.84765625, "learning_rate": 0.0019159572898498386, "loss": 2.9164, "step": 1231 }, { "epoch": 0.1576859081018815, "grad_norm": 0.79296875, "learning_rate": 0.001915790855243641, "loss": 3.108, "step": 1232 }, { "epoch": 0.15781389991040573, "grad_norm": 0.75390625, "learning_rate": 0.0019156242632439263, "loss": 2.9857, "step": 1233 }, { "epoch": 0.15794189171893, "grad_norm": 0.77734375, "learning_rate": 0.0019154575138793262, "loss": 3.0548, "step": 1234 }, { "epoch": 0.15806988352745424, "grad_norm": 0.7890625, "learning_rate": 0.0019152906071784992, "loss": 3.0768, "step": 1235 }, { "epoch": 0.1581978753359785, "grad_norm": 0.82421875, "learning_rate": 0.001915123543170131, "loss": 3.4871, "step": 1236 }, { "epoch": 0.15832586714450275, "grad_norm": 0.80078125, "learning_rate": 0.0019149563218829341, "loss": 2.9833, "step": 1237 }, { "epoch": 0.158453858953027, "grad_norm": 0.875, "learning_rate": 0.0019147889433456485, "loss": 2.8796, "step": 1238 }, { "epoch": 0.15858185076155126, "grad_norm": 0.83203125, "learning_rate": 0.001914621407587041, "loss": 3.6634, "step": 1239 }, { "epoch": 0.1587098425700755, "grad_norm": 0.80078125, "learning_rate": 0.0019144537146359047, "loss": 2.8981, "step": 1240 }, { "epoch": 0.15883783437859977, "grad_norm": 0.78125, "learning_rate": 0.001914285864521061, "loss": 2.8205, "step": 1241 }, { "epoch": 0.15896582618712402, "grad_norm": 0.9921875, "learning_rate": 0.0019141178572713573, "loss": 3.5973, "step": 1242 }, { "epoch": 0.15909381799564828, "grad_norm": 0.59765625, "learning_rate": 0.0019139496929156685, "loss": 2.8491, "step": 1243 }, { "epoch": 0.15922180980417253, "grad_norm": 0.734375, "learning_rate": 0.0019137813714828963, "loss": 2.3974, "step": 1244 }, { "epoch": 0.1593498016126968, "grad_norm": 0.85546875, "learning_rate": 0.0019136128930019698, "loss": 2.7961, "step": 1245 }, { "epoch": 0.15947779342122104, "grad_norm": 0.8359375, "learning_rate": 0.0019134442575018443, "loss": 2.7662, "step": 1246 }, { "epoch": 0.1596057852297453, "grad_norm": 0.8125, "learning_rate": 0.0019132754650115027, "loss": 3.2365, "step": 1247 }, { "epoch": 0.15973377703826955, "grad_norm": 0.6875, "learning_rate": 0.0019131065155599547, "loss": 3.1491, "step": 1248 }, { "epoch": 0.15986176884679382, "grad_norm": 0.9375, "learning_rate": 0.0019129374091762374, "loss": 2.2943, "step": 1249 }, { "epoch": 0.15998976065531806, "grad_norm": 0.69140625, "learning_rate": 0.0019127681458894136, "loss": 3.297, "step": 1250 }, { "epoch": 0.16011775246384233, "grad_norm": 0.73828125, "learning_rate": 0.0019125987257285748, "loss": 3.1192, "step": 1251 }, { "epoch": 0.16024574427236657, "grad_norm": 0.68359375, "learning_rate": 0.0019124291487228385, "loss": 3.3185, "step": 1252 }, { "epoch": 0.1603737360808908, "grad_norm": 0.6953125, "learning_rate": 0.0019122594149013487, "loss": 2.8022, "step": 1253 }, { "epoch": 0.16050172788941508, "grad_norm": 0.9375, "learning_rate": 0.0019120895242932775, "loss": 2.6998, "step": 1254 }, { "epoch": 0.16062971969793932, "grad_norm": 0.69140625, "learning_rate": 0.001911919476927823, "loss": 3.0829, "step": 1255 }, { "epoch": 0.1607577115064636, "grad_norm": 0.78125, "learning_rate": 0.0019117492728342106, "loss": 2.939, "step": 1256 }, { "epoch": 0.16088570331498783, "grad_norm": 0.69140625, "learning_rate": 0.001911578912041693, "loss": 2.9716, "step": 1257 }, { "epoch": 0.1610136951235121, "grad_norm": 0.76953125, "learning_rate": 0.001911408394579549, "loss": 3.2818, "step": 1258 }, { "epoch": 0.16114168693203634, "grad_norm": 1.296875, "learning_rate": 0.001911237720477085, "loss": 2.8096, "step": 1259 }, { "epoch": 0.1612696787405606, "grad_norm": 0.828125, "learning_rate": 0.0019110668897636342, "loss": 2.8765, "step": 1260 }, { "epoch": 0.16139767054908485, "grad_norm": 0.74609375, "learning_rate": 0.0019108959024685568, "loss": 2.7983, "step": 1261 }, { "epoch": 0.16152566235760912, "grad_norm": 0.74609375, "learning_rate": 0.0019107247586212393, "loss": 3.197, "step": 1262 }, { "epoch": 0.16165365416613336, "grad_norm": 0.74609375, "learning_rate": 0.0019105534582510954, "loss": 2.7668, "step": 1263 }, { "epoch": 0.16178164597465763, "grad_norm": 0.64453125, "learning_rate": 0.001910382001387567, "loss": 2.8204, "step": 1264 }, { "epoch": 0.16190963778318188, "grad_norm": 0.6640625, "learning_rate": 0.0019102103880601204, "loss": 2.8118, "step": 1265 }, { "epoch": 0.16203762959170612, "grad_norm": 0.73828125, "learning_rate": 0.0019100386182982505, "loss": 2.9399, "step": 1266 }, { "epoch": 0.1621656214002304, "grad_norm": 0.75390625, "learning_rate": 0.0019098666921314792, "loss": 2.491, "step": 1267 }, { "epoch": 0.16229361320875463, "grad_norm": 0.71484375, "learning_rate": 0.0019096946095893544, "loss": 3.0527, "step": 1268 }, { "epoch": 0.1624216050172789, "grad_norm": 0.6484375, "learning_rate": 0.0019095223707014514, "loss": 2.592, "step": 1269 }, { "epoch": 0.16254959682580314, "grad_norm": 0.78515625, "learning_rate": 0.001909349975497372, "loss": 2.4592, "step": 1270 }, { "epoch": 0.1626775886343274, "grad_norm": 0.8203125, "learning_rate": 0.0019091774240067455, "loss": 3.2201, "step": 1271 }, { "epoch": 0.16280558044285165, "grad_norm": 3.03125, "learning_rate": 0.0019090047162592274, "loss": 2.8591, "step": 1272 }, { "epoch": 0.16293357225137592, "grad_norm": 0.83203125, "learning_rate": 0.0019088318522845007, "loss": 2.8622, "step": 1273 }, { "epoch": 0.16306156405990016, "grad_norm": 0.8515625, "learning_rate": 0.0019086588321122744, "loss": 2.4009, "step": 1274 }, { "epoch": 0.16318955586842443, "grad_norm": 0.8828125, "learning_rate": 0.0019084856557722851, "loss": 3.1515, "step": 1275 }, { "epoch": 0.16331754767694867, "grad_norm": 0.6171875, "learning_rate": 0.0019083123232942957, "loss": 2.7374, "step": 1276 }, { "epoch": 0.16344553948547294, "grad_norm": 0.7578125, "learning_rate": 0.0019081388347080969, "loss": 2.9997, "step": 1277 }, { "epoch": 0.16357353129399718, "grad_norm": 0.79296875, "learning_rate": 0.0019079651900435043, "loss": 2.8366, "step": 1278 }, { "epoch": 0.16370152310252145, "grad_norm": 0.67578125, "learning_rate": 0.001907791389330363, "loss": 2.8378, "step": 1279 }, { "epoch": 0.1638295149110457, "grad_norm": 0.84765625, "learning_rate": 0.0019076174325985422, "loss": 3.6618, "step": 1280 }, { "epoch": 0.16395750671956993, "grad_norm": 0.69140625, "learning_rate": 0.0019074433198779399, "loss": 2.6502, "step": 1281 }, { "epoch": 0.1640854985280942, "grad_norm": 0.78125, "learning_rate": 0.00190726905119848, "loss": 3.0741, "step": 1282 }, { "epoch": 0.16421349033661845, "grad_norm": 0.69140625, "learning_rate": 0.0019070946265901136, "loss": 3.0057, "step": 1283 }, { "epoch": 0.16434148214514271, "grad_norm": 0.57421875, "learning_rate": 0.0019069200460828182, "loss": 2.1336, "step": 1284 }, { "epoch": 0.16446947395366696, "grad_norm": 0.80859375, "learning_rate": 0.0019067453097065983, "loss": 2.732, "step": 1285 }, { "epoch": 0.16459746576219123, "grad_norm": 0.82421875, "learning_rate": 0.0019065704174914854, "loss": 3.4839, "step": 1286 }, { "epoch": 0.16472545757071547, "grad_norm": 1.015625, "learning_rate": 0.0019063953694675368, "loss": 2.4278, "step": 1287 }, { "epoch": 0.16485344937923974, "grad_norm": 0.765625, "learning_rate": 0.001906220165664838, "loss": 3.2791, "step": 1288 }, { "epoch": 0.16498144118776398, "grad_norm": 0.859375, "learning_rate": 0.001906044806113501, "loss": 2.9178, "step": 1289 }, { "epoch": 0.16510943299628825, "grad_norm": 0.86328125, "learning_rate": 0.0019058692908436635, "loss": 3.374, "step": 1290 }, { "epoch": 0.1652374248048125, "grad_norm": 0.6640625, "learning_rate": 0.0019056936198854908, "loss": 2.7851, "step": 1291 }, { "epoch": 0.16536541661333676, "grad_norm": 0.8125, "learning_rate": 0.001905517793269175, "loss": 2.7953, "step": 1292 }, { "epoch": 0.165493408421861, "grad_norm": 0.62890625, "learning_rate": 0.0019053418110249343, "loss": 2.8631, "step": 1293 }, { "epoch": 0.16562140023038527, "grad_norm": 0.671875, "learning_rate": 0.0019051656731830149, "loss": 2.7794, "step": 1294 }, { "epoch": 0.1657493920389095, "grad_norm": 0.93359375, "learning_rate": 0.0019049893797736881, "loss": 2.8277, "step": 1295 }, { "epoch": 0.16587738384743375, "grad_norm": 0.6953125, "learning_rate": 0.0019048129308272533, "loss": 2.6165, "step": 1296 }, { "epoch": 0.16600537565595802, "grad_norm": 1.15625, "learning_rate": 0.0019046363263740358, "loss": 3.4371, "step": 1297 }, { "epoch": 0.16613336746448226, "grad_norm": 0.79296875, "learning_rate": 0.0019044595664443884, "loss": 2.7327, "step": 1298 }, { "epoch": 0.16626135927300653, "grad_norm": 0.91796875, "learning_rate": 0.0019042826510686894, "loss": 3.5604, "step": 1299 }, { "epoch": 0.16638935108153077, "grad_norm": 1.09375, "learning_rate": 0.0019041055802773456, "loss": 3.2296, "step": 1300 }, { "epoch": 0.16651734289005504, "grad_norm": 0.63671875, "learning_rate": 0.0019039283541007887, "loss": 2.8312, "step": 1301 }, { "epoch": 0.16664533469857928, "grad_norm": 0.6328125, "learning_rate": 0.001903750972569478, "loss": 2.2702, "step": 1302 }, { "epoch": 0.16677332650710355, "grad_norm": 0.70703125, "learning_rate": 0.0019035734357138998, "loss": 3.1609, "step": 1303 }, { "epoch": 0.1669013183156278, "grad_norm": 0.7265625, "learning_rate": 0.001903395743564566, "loss": 2.5168, "step": 1304 }, { "epoch": 0.16702931012415206, "grad_norm": 0.765625, "learning_rate": 0.0019032178961520168, "loss": 4.4863, "step": 1305 }, { "epoch": 0.1671573019326763, "grad_norm": 0.83984375, "learning_rate": 0.0019030398935068172, "loss": 3.0494, "step": 1306 }, { "epoch": 0.16728529374120057, "grad_norm": 0.68359375, "learning_rate": 0.0019028617356595607, "loss": 2.7109, "step": 1307 }, { "epoch": 0.16741328554972482, "grad_norm": 0.86328125, "learning_rate": 0.0019026834226408657, "loss": 2.5678, "step": 1308 }, { "epoch": 0.16754127735824909, "grad_norm": 0.70703125, "learning_rate": 0.0019025049544813793, "loss": 3.4073, "step": 1309 }, { "epoch": 0.16766926916677333, "grad_norm": 0.75390625, "learning_rate": 0.001902326331211773, "loss": 2.8455, "step": 1310 }, { "epoch": 0.16779726097529757, "grad_norm": 0.875, "learning_rate": 0.001902147552862747, "loss": 2.8332, "step": 1311 }, { "epoch": 0.16792525278382184, "grad_norm": 0.60546875, "learning_rate": 0.0019019686194650268, "loss": 2.2093, "step": 1312 }, { "epoch": 0.16805324459234608, "grad_norm": 0.67578125, "learning_rate": 0.0019017895310493652, "loss": 2.5917, "step": 1313 }, { "epoch": 0.16818123640087035, "grad_norm": 0.72265625, "learning_rate": 0.0019016102876465411, "loss": 3.3356, "step": 1314 }, { "epoch": 0.1683092282093946, "grad_norm": 0.6875, "learning_rate": 0.0019014308892873612, "loss": 2.9746, "step": 1315 }, { "epoch": 0.16843722001791886, "grad_norm": 0.59375, "learning_rate": 0.0019012513360026572, "loss": 2.2447, "step": 1316 }, { "epoch": 0.1685652118264431, "grad_norm": 0.89453125, "learning_rate": 0.0019010716278232882, "loss": 2.5181, "step": 1317 }, { "epoch": 0.16869320363496737, "grad_norm": 0.9453125, "learning_rate": 0.0019008917647801407, "loss": 2.2988, "step": 1318 }, { "epoch": 0.1688211954434916, "grad_norm": 0.90234375, "learning_rate": 0.0019007117469041265, "loss": 3.3072, "step": 1319 }, { "epoch": 0.16894918725201588, "grad_norm": 0.9765625, "learning_rate": 0.0019005315742261848, "loss": 2.3308, "step": 1320 }, { "epoch": 0.16907717906054012, "grad_norm": 0.7109375, "learning_rate": 0.001900351246777281, "loss": 3.0191, "step": 1321 }, { "epoch": 0.1692051708690644, "grad_norm": 0.71875, "learning_rate": 0.0019001707645884076, "loss": 3.1153, "step": 1322 }, { "epoch": 0.16933316267758863, "grad_norm": 0.73828125, "learning_rate": 0.0018999901276905834, "loss": 2.2271, "step": 1323 }, { "epoch": 0.16946115448611287, "grad_norm": 0.83203125, "learning_rate": 0.0018998093361148533, "loss": 3.1218, "step": 1324 }, { "epoch": 0.16958914629463714, "grad_norm": 0.75390625, "learning_rate": 0.0018996283898922899, "loss": 3.2087, "step": 1325 }, { "epoch": 0.16971713810316139, "grad_norm": 1.0234375, "learning_rate": 0.0018994472890539913, "loss": 3.0561, "step": 1326 }, { "epoch": 0.16984512991168565, "grad_norm": 0.67578125, "learning_rate": 0.0018992660336310827, "loss": 2.7923, "step": 1327 }, { "epoch": 0.1699731217202099, "grad_norm": 0.765625, "learning_rate": 0.001899084623654716, "loss": 2.9133, "step": 1328 }, { "epoch": 0.17010111352873417, "grad_norm": 1.0390625, "learning_rate": 0.0018989030591560693, "loss": 2.528, "step": 1329 }, { "epoch": 0.1702291053372584, "grad_norm": 0.68359375, "learning_rate": 0.0018987213401663475, "loss": 2.7901, "step": 1330 }, { "epoch": 0.17035709714578268, "grad_norm": 0.9296875, "learning_rate": 0.0018985394667167816, "loss": 3.2481, "step": 1331 }, { "epoch": 0.17048508895430692, "grad_norm": 0.69921875, "learning_rate": 0.0018983574388386298, "loss": 2.4459, "step": 1332 }, { "epoch": 0.1706130807628312, "grad_norm": 0.94140625, "learning_rate": 0.0018981752565631767, "loss": 2.4015, "step": 1333 }, { "epoch": 0.17074107257135543, "grad_norm": 0.671875, "learning_rate": 0.001897992919921733, "loss": 2.535, "step": 1334 }, { "epoch": 0.1708690643798797, "grad_norm": 0.62109375, "learning_rate": 0.0018978104289456362, "loss": 2.6201, "step": 1335 }, { "epoch": 0.17099705618840394, "grad_norm": 0.703125, "learning_rate": 0.0018976277836662506, "loss": 2.0525, "step": 1336 }, { "epoch": 0.1711250479969282, "grad_norm": 0.78125, "learning_rate": 0.001897444984114966, "loss": 3.2052, "step": 1337 }, { "epoch": 0.17125303980545245, "grad_norm": 2.015625, "learning_rate": 0.0018972620303232008, "loss": 3.0579, "step": 1338 }, { "epoch": 0.1713810316139767, "grad_norm": 0.92578125, "learning_rate": 0.0018970789223223977, "loss": 2.5945, "step": 1339 }, { "epoch": 0.17150902342250096, "grad_norm": 1.0625, "learning_rate": 0.0018968956601440267, "loss": 2.5495, "step": 1340 }, { "epoch": 0.1716370152310252, "grad_norm": 0.9375, "learning_rate": 0.0018967122438195846, "loss": 2.9128, "step": 1341 }, { "epoch": 0.17176500703954947, "grad_norm": 0.76171875, "learning_rate": 0.0018965286733805945, "loss": 3.3376, "step": 1342 }, { "epoch": 0.1718929988480737, "grad_norm": 0.84765625, "learning_rate": 0.0018963449488586057, "loss": 3.4248, "step": 1343 }, { "epoch": 0.17202099065659798, "grad_norm": 0.640625, "learning_rate": 0.0018961610702851947, "loss": 2.7956, "step": 1344 }, { "epoch": 0.17214898246512222, "grad_norm": 0.765625, "learning_rate": 0.0018959770376919638, "loss": 2.8848, "step": 1345 }, { "epoch": 0.1722769742736465, "grad_norm": 0.80078125, "learning_rate": 0.0018957928511105417, "loss": 2.2574, "step": 1346 }, { "epoch": 0.17240496608217074, "grad_norm": 0.7578125, "learning_rate": 0.0018956085105725844, "loss": 2.9681, "step": 1347 }, { "epoch": 0.172532957890695, "grad_norm": 0.76953125, "learning_rate": 0.0018954240161097736, "loss": 2.422, "step": 1348 }, { "epoch": 0.17266094969921925, "grad_norm": 0.7109375, "learning_rate": 0.0018952393677538174, "loss": 2.5005, "step": 1349 }, { "epoch": 0.17278894150774352, "grad_norm": 0.75, "learning_rate": 0.0018950545655364508, "loss": 3.1804, "step": 1350 }, { "epoch": 0.17291693331626776, "grad_norm": 0.703125, "learning_rate": 0.0018948696094894352, "loss": 3.092, "step": 1351 }, { "epoch": 0.17304492512479203, "grad_norm": 0.71875, "learning_rate": 0.001894684499644558, "loss": 2.6194, "step": 1352 }, { "epoch": 0.17317291693331627, "grad_norm": 1.0703125, "learning_rate": 0.0018944992360336336, "loss": 2.5943, "step": 1353 }, { "epoch": 0.1733009087418405, "grad_norm": 0.69921875, "learning_rate": 0.0018943138186885025, "loss": 2.4157, "step": 1354 }, { "epoch": 0.17342890055036478, "grad_norm": 0.79296875, "learning_rate": 0.0018941282476410315, "loss": 2.3782, "step": 1355 }, { "epoch": 0.17355689235888902, "grad_norm": 0.78515625, "learning_rate": 0.0018939425229231141, "loss": 2.057, "step": 1356 }, { "epoch": 0.1736848841674133, "grad_norm": 0.875, "learning_rate": 0.0018937566445666706, "loss": 2.6457, "step": 1357 }, { "epoch": 0.17381287597593753, "grad_norm": 0.7734375, "learning_rate": 0.001893570612603646, "loss": 3.1611, "step": 1358 }, { "epoch": 0.1739408677844618, "grad_norm": 1.0078125, "learning_rate": 0.0018933844270660144, "loss": 2.1942, "step": 1359 }, { "epoch": 0.17406885959298604, "grad_norm": 0.984375, "learning_rate": 0.0018931980879857736, "loss": 2.8986, "step": 1360 }, { "epoch": 0.1741968514015103, "grad_norm": 0.77734375, "learning_rate": 0.0018930115953949495, "loss": 2.6838, "step": 1361 }, { "epoch": 0.17432484321003455, "grad_norm": 0.66796875, "learning_rate": 0.0018928249493255938, "loss": 2.0056, "step": 1362 }, { "epoch": 0.17445283501855882, "grad_norm": 0.75, "learning_rate": 0.001892638149809785, "loss": 2.6713, "step": 1363 }, { "epoch": 0.17458082682708306, "grad_norm": 0.83984375, "learning_rate": 0.0018924511968796268, "loss": 3.1198, "step": 1364 }, { "epoch": 0.17470881863560733, "grad_norm": 0.69921875, "learning_rate": 0.001892264090567251, "loss": 2.7258, "step": 1365 }, { "epoch": 0.17483681044413157, "grad_norm": 0.9296875, "learning_rate": 0.0018920768309048143, "loss": 2.7344, "step": 1366 }, { "epoch": 0.17496480225265584, "grad_norm": 0.5703125, "learning_rate": 0.0018918894179245005, "loss": 1.8712, "step": 1367 }, { "epoch": 0.17509279406118008, "grad_norm": 0.7734375, "learning_rate": 0.0018917018516585195, "loss": 2.4761, "step": 1368 }, { "epoch": 0.17522078586970433, "grad_norm": 0.90234375, "learning_rate": 0.001891514132139108, "loss": 2.9963, "step": 1369 }, { "epoch": 0.1753487776782286, "grad_norm": 0.6640625, "learning_rate": 0.0018913262593985279, "loss": 3.5638, "step": 1370 }, { "epoch": 0.17547676948675284, "grad_norm": 0.68359375, "learning_rate": 0.0018911382334690688, "loss": 2.7291, "step": 1371 }, { "epoch": 0.1756047612952771, "grad_norm": 0.66796875, "learning_rate": 0.0018909500543830456, "loss": 2.4118, "step": 1372 }, { "epoch": 0.17573275310380135, "grad_norm": 0.734375, "learning_rate": 0.0018907617221728002, "loss": 2.9008, "step": 1373 }, { "epoch": 0.17586074491232562, "grad_norm": 0.68359375, "learning_rate": 0.0018905732368707003, "loss": 3.037, "step": 1374 }, { "epoch": 0.17598873672084986, "grad_norm": 0.7421875, "learning_rate": 0.0018903845985091406, "loss": 2.4574, "step": 1375 }, { "epoch": 0.17611672852937413, "grad_norm": 0.8203125, "learning_rate": 0.0018901958071205409, "loss": 2.9773, "step": 1376 }, { "epoch": 0.17624472033789837, "grad_norm": 0.69140625, "learning_rate": 0.001890006862737349, "loss": 2.6701, "step": 1377 }, { "epoch": 0.17637271214642264, "grad_norm": 0.78125, "learning_rate": 0.0018898177653920375, "loss": 3.5373, "step": 1378 }, { "epoch": 0.17650070395494688, "grad_norm": 0.7109375, "learning_rate": 0.001889628515117106, "loss": 2.758, "step": 1379 }, { "epoch": 0.17662869576347115, "grad_norm": 0.734375, "learning_rate": 0.0018894391119450799, "loss": 3.6988, "step": 1380 }, { "epoch": 0.1767566875719954, "grad_norm": 0.97265625, "learning_rate": 0.0018892495559085117, "loss": 3.6966, "step": 1381 }, { "epoch": 0.17688467938051966, "grad_norm": 0.7734375, "learning_rate": 0.0018890598470399792, "loss": 2.5999, "step": 1382 }, { "epoch": 0.1770126711890439, "grad_norm": 0.73046875, "learning_rate": 0.0018888699853720874, "loss": 3.1421, "step": 1383 }, { "epoch": 0.17714066299756814, "grad_norm": 0.75, "learning_rate": 0.0018886799709374668, "loss": 2.849, "step": 1384 }, { "epoch": 0.1772686548060924, "grad_norm": 0.9765625, "learning_rate": 0.0018884898037687748, "loss": 3.2061, "step": 1385 }, { "epoch": 0.17739664661461665, "grad_norm": 0.72265625, "learning_rate": 0.0018882994838986945, "loss": 3.291, "step": 1386 }, { "epoch": 0.17752463842314092, "grad_norm": 0.70703125, "learning_rate": 0.0018881090113599354, "loss": 3.1074, "step": 1387 }, { "epoch": 0.17765263023166517, "grad_norm": 0.7734375, "learning_rate": 0.0018879183861852332, "loss": 2.3129, "step": 1388 }, { "epoch": 0.17778062204018943, "grad_norm": 0.76953125, "learning_rate": 0.0018877276084073497, "loss": 2.5993, "step": 1389 }, { "epoch": 0.17790861384871368, "grad_norm": 0.66015625, "learning_rate": 0.0018875366780590742, "loss": 2.6015, "step": 1390 }, { "epoch": 0.17803660565723795, "grad_norm": 0.640625, "learning_rate": 0.0018873455951732203, "loss": 2.6752, "step": 1391 }, { "epoch": 0.1781645974657622, "grad_norm": 0.67578125, "learning_rate": 0.0018871543597826289, "loss": 2.6697, "step": 1392 }, { "epoch": 0.17829258927428646, "grad_norm": 0.66796875, "learning_rate": 0.0018869629719201666, "loss": 2.7342, "step": 1393 }, { "epoch": 0.1784205810828107, "grad_norm": 0.84375, "learning_rate": 0.0018867714316187273, "loss": 3.1976, "step": 1394 }, { "epoch": 0.17854857289133497, "grad_norm": 0.64453125, "learning_rate": 0.0018865797389112295, "loss": 2.5477, "step": 1395 }, { "epoch": 0.1786765646998592, "grad_norm": 0.796875, "learning_rate": 0.0018863878938306191, "loss": 2.923, "step": 1396 }, { "epoch": 0.17880455650838345, "grad_norm": 0.59375, "learning_rate": 0.0018861958964098678, "loss": 2.8756, "step": 1397 }, { "epoch": 0.17893254831690772, "grad_norm": 0.7890625, "learning_rate": 0.0018860037466819732, "loss": 2.5697, "step": 1398 }, { "epoch": 0.17906054012543196, "grad_norm": 0.71875, "learning_rate": 0.00188581144467996, "loss": 2.9197, "step": 1399 }, { "epoch": 0.17918853193395623, "grad_norm": 0.6875, "learning_rate": 0.0018856189904368774, "loss": 2.3186, "step": 1400 }, { "epoch": 0.17931652374248047, "grad_norm": 0.78515625, "learning_rate": 0.0018854263839858025, "loss": 2.8476, "step": 1401 }, { "epoch": 0.17944451555100474, "grad_norm": 0.64453125, "learning_rate": 0.001885233625359838, "loss": 2.2761, "step": 1402 }, { "epoch": 0.17957250735952898, "grad_norm": 0.640625, "learning_rate": 0.0018850407145921118, "loss": 1.9299, "step": 1403 }, { "epoch": 0.17970049916805325, "grad_norm": 0.8359375, "learning_rate": 0.0018848476517157795, "loss": 2.463, "step": 1404 }, { "epoch": 0.1798284909765775, "grad_norm": 1.3984375, "learning_rate": 0.0018846544367640218, "loss": 2.7922, "step": 1405 }, { "epoch": 0.17995648278510176, "grad_norm": 0.78125, "learning_rate": 0.0018844610697700457, "loss": 3.1053, "step": 1406 }, { "epoch": 0.180084474593626, "grad_norm": 0.80078125, "learning_rate": 0.0018842675507670847, "loss": 2.8666, "step": 1407 }, { "epoch": 0.18021246640215027, "grad_norm": 0.81640625, "learning_rate": 0.0018840738797883982, "loss": 2.0912, "step": 1408 }, { "epoch": 0.18034045821067451, "grad_norm": 0.69921875, "learning_rate": 0.0018838800568672714, "loss": 2.5875, "step": 1409 }, { "epoch": 0.18046845001919878, "grad_norm": 0.6640625, "learning_rate": 0.0018836860820370164, "loss": 2.6742, "step": 1410 }, { "epoch": 0.18059644182772303, "grad_norm": 0.73828125, "learning_rate": 0.00188349195533097, "loss": 2.808, "step": 1411 }, { "epoch": 0.18072443363624727, "grad_norm": 0.7578125, "learning_rate": 0.001883297676782497, "loss": 2.5162, "step": 1412 }, { "epoch": 0.18085242544477154, "grad_norm": 0.71875, "learning_rate": 0.001883103246424987, "loss": 2.8881, "step": 1413 }, { "epoch": 0.18098041725329578, "grad_norm": 0.92578125, "learning_rate": 0.0018829086642918559, "loss": 3.3223, "step": 1414 }, { "epoch": 0.18110840906182005, "grad_norm": 0.62890625, "learning_rate": 0.001882713930416546, "loss": 2.2337, "step": 1415 }, { "epoch": 0.1812364008703443, "grad_norm": 0.75390625, "learning_rate": 0.001882519044832525, "loss": 2.6395, "step": 1416 }, { "epoch": 0.18136439267886856, "grad_norm": 0.77734375, "learning_rate": 0.0018823240075732878, "loss": 2.8588, "step": 1417 }, { "epoch": 0.1814923844873928, "grad_norm": 0.78125, "learning_rate": 0.0018821288186723541, "loss": 2.7788, "step": 1418 }, { "epoch": 0.18162037629591707, "grad_norm": 0.80078125, "learning_rate": 0.0018819334781632708, "loss": 2.5142, "step": 1419 }, { "epoch": 0.1817483681044413, "grad_norm": 0.72265625, "learning_rate": 0.0018817379860796102, "loss": 2.4458, "step": 1420 }, { "epoch": 0.18187635991296558, "grad_norm": 0.87890625, "learning_rate": 0.0018815423424549708, "loss": 3.6767, "step": 1421 }, { "epoch": 0.18200435172148982, "grad_norm": 0.734375, "learning_rate": 0.0018813465473229765, "loss": 2.2514, "step": 1422 }, { "epoch": 0.1821323435300141, "grad_norm": 0.88671875, "learning_rate": 0.001881150600717279, "loss": 3.2388, "step": 1423 }, { "epoch": 0.18226033533853833, "grad_norm": 0.65625, "learning_rate": 0.001880954502671554, "loss": 2.5086, "step": 1424 }, { "epoch": 0.1823883271470626, "grad_norm": 0.66796875, "learning_rate": 0.0018807582532195043, "loss": 2.1166, "step": 1425 }, { "epoch": 0.18251631895558684, "grad_norm": 0.921875, "learning_rate": 0.001880561852394859, "loss": 2.837, "step": 1426 }, { "epoch": 0.18264431076411108, "grad_norm": 0.7421875, "learning_rate": 0.0018803653002313723, "loss": 2.4763, "step": 1427 }, { "epoch": 0.18277230257263535, "grad_norm": 0.6953125, "learning_rate": 0.0018801685967628253, "loss": 2.5659, "step": 1428 }, { "epoch": 0.1829002943811596, "grad_norm": 0.66015625, "learning_rate": 0.001879971742023024, "loss": 2.2847, "step": 1429 }, { "epoch": 0.18302828618968386, "grad_norm": 0.7109375, "learning_rate": 0.0018797747360458016, "loss": 2.5156, "step": 1430 }, { "epoch": 0.1831562779982081, "grad_norm": 0.7734375, "learning_rate": 0.0018795775788650172, "loss": 3.3477, "step": 1431 }, { "epoch": 0.18328426980673238, "grad_norm": 0.72265625, "learning_rate": 0.0018793802705145543, "loss": 2.5286, "step": 1432 }, { "epoch": 0.18341226161525662, "grad_norm": 0.7890625, "learning_rate": 0.0018791828110283244, "loss": 2.8812, "step": 1433 }, { "epoch": 0.18354025342378089, "grad_norm": 0.90625, "learning_rate": 0.0018789852004402641, "loss": 2.9333, "step": 1434 }, { "epoch": 0.18366824523230513, "grad_norm": 0.7578125, "learning_rate": 0.0018787874387843355, "loss": 3.843, "step": 1435 }, { "epoch": 0.1837962370408294, "grad_norm": 0.70703125, "learning_rate": 0.0018785895260945275, "loss": 2.3381, "step": 1436 }, { "epoch": 0.18392422884935364, "grad_norm": 0.69921875, "learning_rate": 0.0018783914624048545, "loss": 2.3582, "step": 1437 }, { "epoch": 0.1840522206578779, "grad_norm": 0.65625, "learning_rate": 0.001878193247749357, "loss": 2.1914, "step": 1438 }, { "epoch": 0.18418021246640215, "grad_norm": 0.6640625, "learning_rate": 0.0018779948821621016, "loss": 2.2368, "step": 1439 }, { "epoch": 0.18430820427492642, "grad_norm": 0.73828125, "learning_rate": 0.00187779636567718, "loss": 2.1871, "step": 1440 }, { "epoch": 0.18443619608345066, "grad_norm": 0.71484375, "learning_rate": 0.0018775976983287114, "loss": 3.151, "step": 1441 }, { "epoch": 0.1845641878919749, "grad_norm": 0.8984375, "learning_rate": 0.0018773988801508395, "loss": 3.6242, "step": 1442 }, { "epoch": 0.18469217970049917, "grad_norm": 0.77734375, "learning_rate": 0.0018771999111777344, "loss": 2.7166, "step": 1443 }, { "epoch": 0.1848201715090234, "grad_norm": 0.76953125, "learning_rate": 0.001877000791443592, "loss": 3.1726, "step": 1444 }, { "epoch": 0.18494816331754768, "grad_norm": 0.7109375, "learning_rate": 0.0018768015209826346, "loss": 2.6089, "step": 1445 }, { "epoch": 0.18507615512607192, "grad_norm": 0.66015625, "learning_rate": 0.00187660209982911, "loss": 1.9994, "step": 1446 }, { "epoch": 0.1852041469345962, "grad_norm": 0.703125, "learning_rate": 0.0018764025280172919, "loss": 2.4597, "step": 1447 }, { "epoch": 0.18533213874312043, "grad_norm": 0.73046875, "learning_rate": 0.0018762028055814798, "loss": 3.0217, "step": 1448 }, { "epoch": 0.1854601305516447, "grad_norm": 0.765625, "learning_rate": 0.0018760029325559993, "loss": 2.5456, "step": 1449 }, { "epoch": 0.18558812236016894, "grad_norm": 0.69140625, "learning_rate": 0.001875802908975202, "loss": 2.3325, "step": 1450 }, { "epoch": 0.1857161141686932, "grad_norm": 0.67578125, "learning_rate": 0.0018756027348734651, "loss": 2.3227, "step": 1451 }, { "epoch": 0.18584410597721746, "grad_norm": 1.2890625, "learning_rate": 0.0018754024102851919, "loss": 2.4312, "step": 1452 }, { "epoch": 0.18597209778574172, "grad_norm": 0.875, "learning_rate": 0.001875201935244811, "loss": 2.9671, "step": 1453 }, { "epoch": 0.18610008959426597, "grad_norm": 0.9609375, "learning_rate": 0.001875001309786778, "loss": 2.3042, "step": 1454 }, { "epoch": 0.1862280814027902, "grad_norm": 0.65625, "learning_rate": 0.0018748005339455731, "loss": 2.316, "step": 1455 }, { "epoch": 0.18635607321131448, "grad_norm": 0.859375, "learning_rate": 0.001874599607755703, "loss": 3.209, "step": 1456 }, { "epoch": 0.18648406501983872, "grad_norm": 0.765625, "learning_rate": 0.0018743985312517, "loss": 2.3886, "step": 1457 }, { "epoch": 0.186612056828363, "grad_norm": 1.0703125, "learning_rate": 0.0018741973044681223, "loss": 2.1587, "step": 1458 }, { "epoch": 0.18674004863688723, "grad_norm": 0.69140625, "learning_rate": 0.0018739959274395549, "loss": 2.5026, "step": 1459 }, { "epoch": 0.1868680404454115, "grad_norm": 1.046875, "learning_rate": 0.0018737944002006062, "loss": 3.3803, "step": 1460 }, { "epoch": 0.18699603225393574, "grad_norm": 0.671875, "learning_rate": 0.0018735927227859132, "loss": 3.0664, "step": 1461 }, { "epoch": 0.18712402406246, "grad_norm": 0.78125, "learning_rate": 0.001873390895230137, "loss": 3.0797, "step": 1462 }, { "epoch": 0.18725201587098425, "grad_norm": 0.6640625, "learning_rate": 0.0018731889175679646, "loss": 2.2693, "step": 1463 }, { "epoch": 0.18738000767950852, "grad_norm": 0.7421875, "learning_rate": 0.0018729867898341097, "loss": 3.7972, "step": 1464 }, { "epoch": 0.18750799948803276, "grad_norm": 0.734375, "learning_rate": 0.0018727845120633108, "loss": 1.7963, "step": 1465 }, { "epoch": 0.18763599129655703, "grad_norm": 1.2734375, "learning_rate": 0.0018725820842903328, "loss": 2.7796, "step": 1466 }, { "epoch": 0.18776398310508127, "grad_norm": 0.66015625, "learning_rate": 0.0018723795065499662, "loss": 2.2446, "step": 1467 }, { "epoch": 0.18789197491360554, "grad_norm": 0.60546875, "learning_rate": 0.0018721767788770275, "loss": 1.9024, "step": 1468 }, { "epoch": 0.18801996672212978, "grad_norm": 1.2890625, "learning_rate": 0.001871973901306358, "loss": 2.7457, "step": 1469 }, { "epoch": 0.18814795853065402, "grad_norm": 0.70703125, "learning_rate": 0.0018717708738728263, "loss": 2.1508, "step": 1470 }, { "epoch": 0.1882759503391783, "grad_norm": 1.0078125, "learning_rate": 0.0018715676966113255, "loss": 3.3466, "step": 1471 }, { "epoch": 0.18840394214770254, "grad_norm": 0.7890625, "learning_rate": 0.0018713643695567753, "loss": 3.7621, "step": 1472 }, { "epoch": 0.1885319339562268, "grad_norm": 0.79296875, "learning_rate": 0.0018711608927441201, "loss": 2.8481, "step": 1473 }, { "epoch": 0.18865992576475105, "grad_norm": 0.98828125, "learning_rate": 0.0018709572662083316, "loss": 2.7465, "step": 1474 }, { "epoch": 0.18878791757327532, "grad_norm": 0.84765625, "learning_rate": 0.0018707534899844057, "loss": 2.9764, "step": 1475 }, { "epoch": 0.18891590938179956, "grad_norm": 0.62890625, "learning_rate": 0.0018705495641073645, "loss": 2.4355, "step": 1476 }, { "epoch": 0.18904390119032383, "grad_norm": 0.8671875, "learning_rate": 0.0018703454886122566, "loss": 3.0336, "step": 1477 }, { "epoch": 0.18917189299884807, "grad_norm": 0.76171875, "learning_rate": 0.0018701412635341553, "loss": 3.1863, "step": 1478 }, { "epoch": 0.18929988480737234, "grad_norm": 0.6640625, "learning_rate": 0.0018699368889081597, "loss": 2.3703, "step": 1479 }, { "epoch": 0.18942787661589658, "grad_norm": 0.90625, "learning_rate": 0.0018697323647693955, "loss": 2.6363, "step": 1480 }, { "epoch": 0.18955586842442085, "grad_norm": 0.70703125, "learning_rate": 0.0018695276911530132, "loss": 1.9722, "step": 1481 }, { "epoch": 0.1896838602329451, "grad_norm": 0.8046875, "learning_rate": 0.0018693228680941893, "loss": 3.1991, "step": 1482 }, { "epoch": 0.18981185204146936, "grad_norm": 0.91015625, "learning_rate": 0.001869117895628126, "loss": 2.3034, "step": 1483 }, { "epoch": 0.1899398438499936, "grad_norm": 0.7578125, "learning_rate": 0.0018689127737900508, "loss": 2.9374, "step": 1484 }, { "epoch": 0.19006783565851784, "grad_norm": 0.703125, "learning_rate": 0.001868707502615218, "loss": 2.7309, "step": 1485 }, { "epoch": 0.1901958274670421, "grad_norm": 0.67578125, "learning_rate": 0.0018685020821389059, "loss": 2.6579, "step": 1486 }, { "epoch": 0.19032381927556635, "grad_norm": 0.68359375, "learning_rate": 0.00186829651239642, "loss": 2.4343, "step": 1487 }, { "epoch": 0.19045181108409062, "grad_norm": 0.97265625, "learning_rate": 0.0018680907934230905, "loss": 2.795, "step": 1488 }, { "epoch": 0.19057980289261486, "grad_norm": 0.75390625, "learning_rate": 0.0018678849252542734, "loss": 2.5973, "step": 1489 }, { "epoch": 0.19070779470113913, "grad_norm": 0.8515625, "learning_rate": 0.0018676789079253508, "loss": 2.3545, "step": 1490 }, { "epoch": 0.19083578650966337, "grad_norm": 0.8203125, "learning_rate": 0.0018674727414717302, "loss": 2.1406, "step": 1491 }, { "epoch": 0.19096377831818764, "grad_norm": 1.0703125, "learning_rate": 0.001867266425928844, "loss": 2.5358, "step": 1492 }, { "epoch": 0.19109177012671189, "grad_norm": 0.88671875, "learning_rate": 0.0018670599613321518, "loss": 2.5614, "step": 1493 }, { "epoch": 0.19121976193523615, "grad_norm": 0.8125, "learning_rate": 0.001866853347717137, "loss": 2.8529, "step": 1494 }, { "epoch": 0.1913477537437604, "grad_norm": 0.78125, "learning_rate": 0.0018666465851193102, "loss": 2.5765, "step": 1495 }, { "epoch": 0.19147574555228467, "grad_norm": 0.8359375, "learning_rate": 0.0018664396735742065, "loss": 2.7882, "step": 1496 }, { "epoch": 0.1916037373608089, "grad_norm": 0.65234375, "learning_rate": 0.001866232613117387, "loss": 2.9408, "step": 1497 }, { "epoch": 0.19173172916933318, "grad_norm": 0.59375, "learning_rate": 0.001866025403784439, "loss": 1.8605, "step": 1498 }, { "epoch": 0.19185972097785742, "grad_norm": 0.94140625, "learning_rate": 0.0018658180456109738, "loss": 2.6042, "step": 1499 }, { "epoch": 0.19198771278638166, "grad_norm": 0.80078125, "learning_rate": 0.0018656105386326298, "loss": 2.9134, "step": 1500 }, { "epoch": 0.19211570459490593, "grad_norm": 0.73046875, "learning_rate": 0.0018654028828850703, "loss": 2.7119, "step": 1501 }, { "epoch": 0.19224369640343017, "grad_norm": 0.671875, "learning_rate": 0.0018651950784039847, "loss": 2.3791, "step": 1502 }, { "epoch": 0.19237168821195444, "grad_norm": 0.765625, "learning_rate": 0.0018649871252250875, "loss": 2.3598, "step": 1503 }, { "epoch": 0.19249968002047868, "grad_norm": 0.62890625, "learning_rate": 0.0018647790233841182, "loss": 2.6392, "step": 1504 }, { "epoch": 0.19262767182900295, "grad_norm": 0.84765625, "learning_rate": 0.001864570772916843, "loss": 2.8219, "step": 1505 }, { "epoch": 0.1927556636375272, "grad_norm": 0.77734375, "learning_rate": 0.0018643623738590534, "loss": 2.5432, "step": 1506 }, { "epoch": 0.19288365544605146, "grad_norm": 0.6875, "learning_rate": 0.0018641538262465654, "loss": 2.167, "step": 1507 }, { "epoch": 0.1930116472545757, "grad_norm": 0.875, "learning_rate": 0.0018639451301152216, "loss": 2.97, "step": 1508 }, { "epoch": 0.19313963906309997, "grad_norm": 0.8828125, "learning_rate": 0.0018637362855008902, "loss": 3.1321, "step": 1509 }, { "epoch": 0.1932676308716242, "grad_norm": 1.2265625, "learning_rate": 0.0018635272924394642, "loss": 2.8977, "step": 1510 }, { "epoch": 0.19339562268014848, "grad_norm": 0.921875, "learning_rate": 0.0018633181509668622, "loss": 3.0134, "step": 1511 }, { "epoch": 0.19352361448867272, "grad_norm": 0.68359375, "learning_rate": 0.001863108861119029, "loss": 2.2776, "step": 1512 }, { "epoch": 0.193651606297197, "grad_norm": 0.70703125, "learning_rate": 0.001862899422931934, "loss": 2.6121, "step": 1513 }, { "epoch": 0.19377959810572123, "grad_norm": 0.68359375, "learning_rate": 0.0018626898364415732, "loss": 2.5245, "step": 1514 }, { "epoch": 0.19390758991424548, "grad_norm": 0.8828125, "learning_rate": 0.001862480101683967, "loss": 2.4596, "step": 1515 }, { "epoch": 0.19403558172276975, "grad_norm": 0.7109375, "learning_rate": 0.0018622702186951617, "loss": 2.3409, "step": 1516 }, { "epoch": 0.194163573531294, "grad_norm": 0.69140625, "learning_rate": 0.0018620601875112292, "loss": 2.0608, "step": 1517 }, { "epoch": 0.19429156533981826, "grad_norm": 0.84375, "learning_rate": 0.001861850008168267, "loss": 3.1202, "step": 1518 }, { "epoch": 0.1944195571483425, "grad_norm": 0.984375, "learning_rate": 0.0018616396807023974, "loss": 3.7863, "step": 1519 }, { "epoch": 0.19454754895686677, "grad_norm": 0.8828125, "learning_rate": 0.0018614292051497686, "loss": 3.2588, "step": 1520 }, { "epoch": 0.194675540765391, "grad_norm": 1.0078125, "learning_rate": 0.0018612185815465546, "loss": 2.4066, "step": 1521 }, { "epoch": 0.19480353257391528, "grad_norm": 0.83203125, "learning_rate": 0.0018610078099289542, "loss": 3.1907, "step": 1522 }, { "epoch": 0.19493152438243952, "grad_norm": 0.88671875, "learning_rate": 0.0018607968903331922, "loss": 3.1121, "step": 1523 }, { "epoch": 0.1950595161909638, "grad_norm": 0.53515625, "learning_rate": 0.0018605858227955185, "loss": 1.7815, "step": 1524 }, { "epoch": 0.19518750799948803, "grad_norm": 0.6640625, "learning_rate": 0.001860374607352208, "loss": 2.689, "step": 1525 }, { "epoch": 0.1953154998080123, "grad_norm": 0.63671875, "learning_rate": 0.001860163244039562, "loss": 2.8225, "step": 1526 }, { "epoch": 0.19544349161653654, "grad_norm": 0.90625, "learning_rate": 0.0018599517328939067, "loss": 3.5599, "step": 1527 }, { "epoch": 0.19557148342506078, "grad_norm": 0.62890625, "learning_rate": 0.0018597400739515932, "loss": 2.2174, "step": 1528 }, { "epoch": 0.19569947523358505, "grad_norm": 0.625, "learning_rate": 0.001859528267248999, "loss": 2.305, "step": 1529 }, { "epoch": 0.1958274670421093, "grad_norm": 0.6171875, "learning_rate": 0.0018593163128225267, "loss": 2.0925, "step": 1530 }, { "epoch": 0.19595545885063356, "grad_norm": 0.671875, "learning_rate": 0.0018591042107086039, "loss": 2.0219, "step": 1531 }, { "epoch": 0.1960834506591578, "grad_norm": 0.73828125, "learning_rate": 0.0018588919609436837, "loss": 2.6427, "step": 1532 }, { "epoch": 0.19621144246768207, "grad_norm": 0.73828125, "learning_rate": 0.0018586795635642443, "loss": 2.4987, "step": 1533 }, { "epoch": 0.19633943427620631, "grad_norm": 0.5859375, "learning_rate": 0.0018584670186067905, "loss": 1.7306, "step": 1534 }, { "epoch": 0.19646742608473058, "grad_norm": 0.65625, "learning_rate": 0.0018582543261078507, "loss": 2.872, "step": 1535 }, { "epoch": 0.19659541789325483, "grad_norm": 0.6875, "learning_rate": 0.0018580414861039804, "loss": 3.0412, "step": 1536 }, { "epoch": 0.1967234097017791, "grad_norm": 0.83984375, "learning_rate": 0.001857828498631759, "loss": 2.3755, "step": 1537 }, { "epoch": 0.19685140151030334, "grad_norm": 0.86328125, "learning_rate": 0.001857615363727792, "loss": 2.9821, "step": 1538 }, { "epoch": 0.1969793933188276, "grad_norm": 0.80859375, "learning_rate": 0.0018574020814287103, "loss": 2.4472, "step": 1539 }, { "epoch": 0.19710738512735185, "grad_norm": 0.78125, "learning_rate": 0.0018571886517711699, "loss": 2.8286, "step": 1540 }, { "epoch": 0.19723537693587612, "grad_norm": 0.67578125, "learning_rate": 0.0018569750747918516, "loss": 2.4344, "step": 1541 }, { "epoch": 0.19736336874440036, "grad_norm": 0.625, "learning_rate": 0.0018567613505274625, "loss": 2.3426, "step": 1542 }, { "epoch": 0.1974913605529246, "grad_norm": 0.7421875, "learning_rate": 0.001856547479014735, "loss": 2.6438, "step": 1543 }, { "epoch": 0.19761935236144887, "grad_norm": 0.84375, "learning_rate": 0.0018563334602904255, "loss": 3.1213, "step": 1544 }, { "epoch": 0.1977473441699731, "grad_norm": 0.7578125, "learning_rate": 0.0018561192943913171, "loss": 2.881, "step": 1545 }, { "epoch": 0.19787533597849738, "grad_norm": 0.7109375, "learning_rate": 0.001855904981354218, "loss": 2.9395, "step": 1546 }, { "epoch": 0.19800332778702162, "grad_norm": 0.6484375, "learning_rate": 0.0018556905212159607, "loss": 2.3717, "step": 1547 }, { "epoch": 0.1981313195955459, "grad_norm": 0.66015625, "learning_rate": 0.0018554759140134041, "loss": 2.3607, "step": 1548 }, { "epoch": 0.19825931140407013, "grad_norm": 0.93359375, "learning_rate": 0.0018552611597834317, "loss": 2.8245, "step": 1549 }, { "epoch": 0.1983873032125944, "grad_norm": 0.765625, "learning_rate": 0.0018550462585629527, "loss": 2.5345, "step": 1550 }, { "epoch": 0.19851529502111864, "grad_norm": 0.76953125, "learning_rate": 0.001854831210388901, "loss": 2.9771, "step": 1551 }, { "epoch": 0.1986432868296429, "grad_norm": 0.71484375, "learning_rate": 0.0018546160152982365, "loss": 2.7484, "step": 1552 }, { "epoch": 0.19877127863816715, "grad_norm": 0.7421875, "learning_rate": 0.001854400673327944, "loss": 2.4425, "step": 1553 }, { "epoch": 0.19889927044669142, "grad_norm": 0.859375, "learning_rate": 0.0018541851845150332, "loss": 3.3745, "step": 1554 }, { "epoch": 0.19902726225521566, "grad_norm": 0.71875, "learning_rate": 0.0018539695488965396, "loss": 2.3475, "step": 1555 }, { "epoch": 0.19915525406373993, "grad_norm": 1.0078125, "learning_rate": 0.0018537537665095233, "loss": 2.9094, "step": 1556 }, { "epoch": 0.19928324587226418, "grad_norm": 0.9140625, "learning_rate": 0.0018535378373910705, "loss": 2.8408, "step": 1557 }, { "epoch": 0.19941123768078842, "grad_norm": 0.79296875, "learning_rate": 0.0018533217615782918, "loss": 2.2625, "step": 1558 }, { "epoch": 0.19953922948931269, "grad_norm": 0.953125, "learning_rate": 0.0018531055391083236, "loss": 3.0588, "step": 1559 }, { "epoch": 0.19966722129783693, "grad_norm": 0.703125, "learning_rate": 0.0018528891700183268, "loss": 2.421, "step": 1560 }, { "epoch": 0.1997952131063612, "grad_norm": 0.7109375, "learning_rate": 0.0018526726543454883, "loss": 2.2038, "step": 1561 }, { "epoch": 0.19992320491488544, "grad_norm": 0.7890625, "learning_rate": 0.00185245599212702, "loss": 3.2602, "step": 1562 }, { "epoch": 0.2000511967234097, "grad_norm": 0.71484375, "learning_rate": 0.0018522391834001582, "loss": 3.0152, "step": 1563 }, { "epoch": 0.20017918853193395, "grad_norm": 0.671875, "learning_rate": 0.0018520222282021655, "loss": 2.5999, "step": 1564 }, { "epoch": 0.20030718034045822, "grad_norm": 0.796875, "learning_rate": 0.001851805126570329, "loss": 2.734, "step": 1565 }, { "epoch": 0.20043517214898246, "grad_norm": 0.72265625, "learning_rate": 0.0018515878785419612, "loss": 2.8684, "step": 1566 }, { "epoch": 0.20056316395750673, "grad_norm": 0.7421875, "learning_rate": 0.0018513704841543997, "loss": 2.5574, "step": 1567 }, { "epoch": 0.20069115576603097, "grad_norm": 0.81640625, "learning_rate": 0.0018511529434450075, "loss": 2.9679, "step": 1568 }, { "epoch": 0.20081914757455524, "grad_norm": 0.62890625, "learning_rate": 0.0018509352564511717, "loss": 2.5509, "step": 1569 }, { "epoch": 0.20094713938307948, "grad_norm": 0.65234375, "learning_rate": 0.001850717423210306, "loss": 2.1389, "step": 1570 }, { "epoch": 0.20107513119160375, "grad_norm": 0.7890625, "learning_rate": 0.0018504994437598486, "loss": 3.0495, "step": 1571 }, { "epoch": 0.201203123000128, "grad_norm": 1.1796875, "learning_rate": 0.0018502813181372627, "loss": 2.8328, "step": 1572 }, { "epoch": 0.20133111480865223, "grad_norm": 0.796875, "learning_rate": 0.0018500630463800364, "loss": 2.8888, "step": 1573 }, { "epoch": 0.2014591066171765, "grad_norm": 0.57421875, "learning_rate": 0.0018498446285256836, "loss": 1.7912, "step": 1574 }, { "epoch": 0.20158709842570074, "grad_norm": 0.6640625, "learning_rate": 0.0018496260646117429, "loss": 2.1146, "step": 1575 }, { "epoch": 0.20171509023422501, "grad_norm": 0.65234375, "learning_rate": 0.0018494073546757779, "loss": 2.7042, "step": 1576 }, { "epoch": 0.20184308204274926, "grad_norm": 0.75390625, "learning_rate": 0.0018491884987553778, "loss": 2.4871, "step": 1577 }, { "epoch": 0.20197107385127352, "grad_norm": 0.66015625, "learning_rate": 0.001848969496888156, "loss": 2.5486, "step": 1578 }, { "epoch": 0.20209906565979777, "grad_norm": 0.6875, "learning_rate": 0.0018487503491117522, "loss": 2.3281, "step": 1579 }, { "epoch": 0.20222705746832204, "grad_norm": 0.75, "learning_rate": 0.00184853105546383, "loss": 2.8293, "step": 1580 }, { "epoch": 0.20235504927684628, "grad_norm": 0.63671875, "learning_rate": 0.0018483116159820785, "loss": 1.8803, "step": 1581 }, { "epoch": 0.20248304108537055, "grad_norm": 0.703125, "learning_rate": 0.0018480920307042124, "loss": 2.9437, "step": 1582 }, { "epoch": 0.2026110328938948, "grad_norm": 0.6640625, "learning_rate": 0.0018478722996679707, "loss": 2.1813, "step": 1583 }, { "epoch": 0.20273902470241906, "grad_norm": 0.71875, "learning_rate": 0.0018476524229111178, "loss": 2.6455, "step": 1584 }, { "epoch": 0.2028670165109433, "grad_norm": 0.58984375, "learning_rate": 0.0018474324004714429, "loss": 2.2491, "step": 1585 }, { "epoch": 0.20299500831946754, "grad_norm": 0.74609375, "learning_rate": 0.0018472122323867609, "loss": 2.0946, "step": 1586 }, { "epoch": 0.2031230001279918, "grad_norm": 0.84765625, "learning_rate": 0.0018469919186949104, "loss": 1.9669, "step": 1587 }, { "epoch": 0.20325099193651605, "grad_norm": 0.7109375, "learning_rate": 0.0018467714594337568, "loss": 2.7646, "step": 1588 }, { "epoch": 0.20337898374504032, "grad_norm": 0.703125, "learning_rate": 0.0018465508546411894, "loss": 3.0168, "step": 1589 }, { "epoch": 0.20350697555356456, "grad_norm": 0.73828125, "learning_rate": 0.0018463301043551224, "loss": 3.0956, "step": 1590 }, { "epoch": 0.20363496736208883, "grad_norm": 0.99609375, "learning_rate": 0.0018461092086134953, "loss": 3.3363, "step": 1591 }, { "epoch": 0.20376295917061307, "grad_norm": 0.828125, "learning_rate": 0.0018458881674542728, "loss": 2.804, "step": 1592 }, { "epoch": 0.20389095097913734, "grad_norm": 0.6875, "learning_rate": 0.0018456669809154445, "loss": 2.6905, "step": 1593 }, { "epoch": 0.20401894278766158, "grad_norm": 0.93359375, "learning_rate": 0.0018454456490350252, "loss": 3.2209, "step": 1594 }, { "epoch": 0.20414693459618585, "grad_norm": 0.87109375, "learning_rate": 0.0018452241718510533, "loss": 2.2401, "step": 1595 }, { "epoch": 0.2042749264047101, "grad_norm": 0.921875, "learning_rate": 0.001845002549401594, "loss": 2.722, "step": 1596 }, { "epoch": 0.20440291821323436, "grad_norm": 0.65234375, "learning_rate": 0.0018447807817247369, "loss": 2.3071, "step": 1597 }, { "epoch": 0.2045309100217586, "grad_norm": 0.60546875, "learning_rate": 0.001844558868858596, "loss": 2.1875, "step": 1598 }, { "epoch": 0.20465890183028287, "grad_norm": 0.58203125, "learning_rate": 0.0018443368108413108, "loss": 2.3423, "step": 1599 }, { "epoch": 0.20478689363880712, "grad_norm": 0.68359375, "learning_rate": 0.0018441146077110453, "loss": 2.0963, "step": 1600 }, { "epoch": 0.20491488544733136, "grad_norm": 0.8671875, "learning_rate": 0.0018438922595059892, "loss": 2.5987, "step": 1601 }, { "epoch": 0.20504287725585563, "grad_norm": 0.73046875, "learning_rate": 0.0018436697662643558, "loss": 2.6389, "step": 1602 }, { "epoch": 0.20517086906437987, "grad_norm": 0.73046875, "learning_rate": 0.0018434471280243852, "loss": 2.5303, "step": 1603 }, { "epoch": 0.20529886087290414, "grad_norm": 0.76953125, "learning_rate": 0.0018432243448243408, "loss": 2.1759, "step": 1604 }, { "epoch": 0.20542685268142838, "grad_norm": 0.72265625, "learning_rate": 0.0018430014167025118, "loss": 2.9475, "step": 1605 }, { "epoch": 0.20555484448995265, "grad_norm": 0.62890625, "learning_rate": 0.0018427783436972115, "loss": 2.3167, "step": 1606 }, { "epoch": 0.2056828362984769, "grad_norm": 0.8125, "learning_rate": 0.0018425551258467792, "loss": 1.8225, "step": 1607 }, { "epoch": 0.20581082810700116, "grad_norm": 0.8984375, "learning_rate": 0.0018423317631895782, "loss": 2.9559, "step": 1608 }, { "epoch": 0.2059388199155254, "grad_norm": 0.609375, "learning_rate": 0.0018421082557639973, "loss": 2.3132, "step": 1609 }, { "epoch": 0.20606681172404967, "grad_norm": 0.6796875, "learning_rate": 0.0018418846036084493, "loss": 2.0487, "step": 1610 }, { "epoch": 0.2061948035325739, "grad_norm": 0.98828125, "learning_rate": 0.001841660806761373, "loss": 2.7038, "step": 1611 }, { "epoch": 0.20632279534109818, "grad_norm": 0.75, "learning_rate": 0.001841436865261231, "loss": 3.0643, "step": 1612 }, { "epoch": 0.20645078714962242, "grad_norm": 0.69921875, "learning_rate": 0.001841212779146512, "loss": 2.4436, "step": 1613 }, { "epoch": 0.2065787789581467, "grad_norm": 1.21875, "learning_rate": 0.0018409885484557283, "loss": 2.6367, "step": 1614 }, { "epoch": 0.20670677076667093, "grad_norm": 0.65234375, "learning_rate": 0.0018407641732274174, "loss": 2.1925, "step": 1615 }, { "epoch": 0.20683476257519517, "grad_norm": 0.6875, "learning_rate": 0.0018405396535001426, "loss": 2.7517, "step": 1616 }, { "epoch": 0.20696275438371944, "grad_norm": 0.75, "learning_rate": 0.0018403149893124904, "loss": 2.9116, "step": 1617 }, { "epoch": 0.20709074619224369, "grad_norm": 0.703125, "learning_rate": 0.001840090180703074, "loss": 2.5406, "step": 1618 }, { "epoch": 0.20721873800076795, "grad_norm": 0.640625, "learning_rate": 0.0018398652277105292, "loss": 2.3838, "step": 1619 }, { "epoch": 0.2073467298092922, "grad_norm": 0.6875, "learning_rate": 0.0018396401303735184, "loss": 2.0291, "step": 1620 }, { "epoch": 0.20747472161781647, "grad_norm": 0.62890625, "learning_rate": 0.0018394148887307285, "loss": 2.233, "step": 1621 }, { "epoch": 0.2076027134263407, "grad_norm": 0.828125, "learning_rate": 0.0018391895028208704, "loss": 3.2632, "step": 1622 }, { "epoch": 0.20773070523486498, "grad_norm": 0.70703125, "learning_rate": 0.0018389639726826808, "loss": 2.7884, "step": 1623 }, { "epoch": 0.20785869704338922, "grad_norm": 0.89453125, "learning_rate": 0.0018387382983549205, "loss": 2.2448, "step": 1624 }, { "epoch": 0.2079866888519135, "grad_norm": 0.80859375, "learning_rate": 0.0018385124798763752, "loss": 2.3446, "step": 1625 }, { "epoch": 0.20811468066043773, "grad_norm": 0.81640625, "learning_rate": 0.0018382865172858558, "loss": 2.4702, "step": 1626 }, { "epoch": 0.208242672468962, "grad_norm": 0.65625, "learning_rate": 0.0018380604106221971, "loss": 2.4118, "step": 1627 }, { "epoch": 0.20837066427748624, "grad_norm": 0.734375, "learning_rate": 0.00183783415992426, "loss": 2.8217, "step": 1628 }, { "epoch": 0.2084986560860105, "grad_norm": 0.71875, "learning_rate": 0.0018376077652309285, "loss": 2.3049, "step": 1629 }, { "epoch": 0.20862664789453475, "grad_norm": 1.4140625, "learning_rate": 0.0018373812265811125, "loss": 3.035, "step": 1630 }, { "epoch": 0.208754639703059, "grad_norm": 0.609375, "learning_rate": 0.0018371545440137465, "loss": 2.3663, "step": 1631 }, { "epoch": 0.20888263151158326, "grad_norm": 0.84375, "learning_rate": 0.0018369277175677894, "loss": 1.9331, "step": 1632 }, { "epoch": 0.2090106233201075, "grad_norm": 0.671875, "learning_rate": 0.0018367007472822252, "loss": 2.5558, "step": 1633 }, { "epoch": 0.20913861512863177, "grad_norm": 0.77734375, "learning_rate": 0.0018364736331960623, "loss": 1.6706, "step": 1634 }, { "epoch": 0.209266606937156, "grad_norm": 0.7890625, "learning_rate": 0.0018362463753483337, "loss": 2.6203, "step": 1635 }, { "epoch": 0.20939459874568028, "grad_norm": 0.6484375, "learning_rate": 0.0018360189737780981, "loss": 2.2446, "step": 1636 }, { "epoch": 0.20952259055420452, "grad_norm": 0.6796875, "learning_rate": 0.0018357914285244373, "loss": 2.0096, "step": 1637 }, { "epoch": 0.2096505823627288, "grad_norm": 0.7890625, "learning_rate": 0.0018355637396264594, "loss": 2.3997, "step": 1638 }, { "epoch": 0.20977857417125303, "grad_norm": 0.77734375, "learning_rate": 0.0018353359071232953, "loss": 2.318, "step": 1639 }, { "epoch": 0.2099065659797773, "grad_norm": 0.796875, "learning_rate": 0.001835107931054103, "loss": 2.1892, "step": 1640 }, { "epoch": 0.21003455778830155, "grad_norm": 0.7578125, "learning_rate": 0.001834879811458063, "loss": 2.743, "step": 1641 }, { "epoch": 0.21016254959682582, "grad_norm": 0.68359375, "learning_rate": 0.001834651548374382, "loss": 2.3935, "step": 1642 }, { "epoch": 0.21029054140535006, "grad_norm": 0.71484375, "learning_rate": 0.00183442314184229, "loss": 2.422, "step": 1643 }, { "epoch": 0.21041853321387433, "grad_norm": 0.75390625, "learning_rate": 0.0018341945919010433, "loss": 2.4298, "step": 1644 }, { "epoch": 0.21054652502239857, "grad_norm": 0.9765625, "learning_rate": 0.001833965898589921, "loss": 2.5201, "step": 1645 }, { "epoch": 0.2106745168309228, "grad_norm": 0.85546875, "learning_rate": 0.001833737061948228, "loss": 3.0443, "step": 1646 }, { "epoch": 0.21080250863944708, "grad_norm": 0.7578125, "learning_rate": 0.001833508082015294, "loss": 2.2609, "step": 1647 }, { "epoch": 0.21093050044797132, "grad_norm": 0.65234375, "learning_rate": 0.0018332789588304725, "loss": 2.3134, "step": 1648 }, { "epoch": 0.2110584922564956, "grad_norm": 0.8125, "learning_rate": 0.0018330496924331423, "loss": 2.7518, "step": 1649 }, { "epoch": 0.21118648406501983, "grad_norm": 0.6875, "learning_rate": 0.0018328202828627065, "loss": 2.1195, "step": 1650 }, { "epoch": 0.2113144758735441, "grad_norm": 0.63671875, "learning_rate": 0.0018325907301585925, "loss": 2.7252, "step": 1651 }, { "epoch": 0.21144246768206834, "grad_norm": 0.671875, "learning_rate": 0.001832361034360253, "loss": 2.8943, "step": 1652 }, { "epoch": 0.2115704594905926, "grad_norm": 0.7109375, "learning_rate": 0.001832131195507165, "loss": 2.7617, "step": 1653 }, { "epoch": 0.21169845129911685, "grad_norm": 0.6328125, "learning_rate": 0.00183190121363883, "loss": 2.2553, "step": 1654 }, { "epoch": 0.21182644310764112, "grad_norm": 0.70703125, "learning_rate": 0.0018316710887947738, "loss": 2.5289, "step": 1655 }, { "epoch": 0.21195443491616536, "grad_norm": 0.69921875, "learning_rate": 0.0018314408210145477, "loss": 2.7355, "step": 1656 }, { "epoch": 0.21208242672468963, "grad_norm": 0.8203125, "learning_rate": 0.0018312104103377263, "loss": 2.4167, "step": 1657 }, { "epoch": 0.21221041853321387, "grad_norm": 0.71484375, "learning_rate": 0.0018309798568039099, "loss": 2.6566, "step": 1658 }, { "epoch": 0.21233841034173812, "grad_norm": 0.65234375, "learning_rate": 0.0018307491604527223, "loss": 2.1224, "step": 1659 }, { "epoch": 0.21246640215026238, "grad_norm": 0.625, "learning_rate": 0.001830518321323813, "loss": 2.2352, "step": 1660 }, { "epoch": 0.21259439395878663, "grad_norm": 0.65234375, "learning_rate": 0.0018302873394568553, "loss": 1.8442, "step": 1661 }, { "epoch": 0.2127223857673109, "grad_norm": 0.74609375, "learning_rate": 0.0018300562148915467, "loss": 2.8491, "step": 1662 }, { "epoch": 0.21285037757583514, "grad_norm": 0.84375, "learning_rate": 0.0018298249476676105, "loss": 2.6131, "step": 1663 }, { "epoch": 0.2129783693843594, "grad_norm": 0.8359375, "learning_rate": 0.0018295935378247932, "loss": 2.3656, "step": 1664 }, { "epoch": 0.21310636119288365, "grad_norm": 0.80859375, "learning_rate": 0.0018293619854028666, "loss": 2.1728, "step": 1665 }, { "epoch": 0.21323435300140792, "grad_norm": 0.8125, "learning_rate": 0.0018291302904416265, "loss": 2.6414, "step": 1666 }, { "epoch": 0.21336234480993216, "grad_norm": 0.9375, "learning_rate": 0.0018288984529808937, "loss": 2.4337, "step": 1667 }, { "epoch": 0.21349033661845643, "grad_norm": 0.61328125, "learning_rate": 0.0018286664730605127, "loss": 1.952, "step": 1668 }, { "epoch": 0.21361832842698067, "grad_norm": 0.8046875, "learning_rate": 0.001828434350720354, "loss": 2.8447, "step": 1669 }, { "epoch": 0.21374632023550494, "grad_norm": 0.83984375, "learning_rate": 0.0018282020860003105, "loss": 2.0582, "step": 1670 }, { "epoch": 0.21387431204402918, "grad_norm": 0.6796875, "learning_rate": 0.0018279696789403014, "loss": 2.3149, "step": 1671 }, { "epoch": 0.21400230385255345, "grad_norm": 0.73046875, "learning_rate": 0.0018277371295802691, "loss": 2.3914, "step": 1672 }, { "epoch": 0.2141302956610777, "grad_norm": 0.671875, "learning_rate": 0.0018275044379601813, "loss": 2.0256, "step": 1673 }, { "epoch": 0.21425828746960193, "grad_norm": 0.90234375, "learning_rate": 0.00182727160412003, "loss": 3.1432, "step": 1674 }, { "epoch": 0.2143862792781262, "grad_norm": 0.65625, "learning_rate": 0.001827038628099831, "loss": 2.5148, "step": 1675 }, { "epoch": 0.21451427108665044, "grad_norm": 0.73828125, "learning_rate": 0.0018268055099396254, "loss": 2.1723, "step": 1676 }, { "epoch": 0.2146422628951747, "grad_norm": 0.7421875, "learning_rate": 0.001826572249679478, "loss": 2.1861, "step": 1677 }, { "epoch": 0.21477025470369895, "grad_norm": 0.703125, "learning_rate": 0.0018263388473594789, "loss": 2.1927, "step": 1678 }, { "epoch": 0.21489824651222322, "grad_norm": 0.734375, "learning_rate": 0.0018261053030197414, "loss": 2.2532, "step": 1679 }, { "epoch": 0.21502623832074746, "grad_norm": 1.0078125, "learning_rate": 0.0018258716167004045, "loss": 1.9148, "step": 1680 }, { "epoch": 0.21515423012927173, "grad_norm": 0.78125, "learning_rate": 0.0018256377884416307, "loss": 2.2142, "step": 1681 }, { "epoch": 0.21528222193779598, "grad_norm": 1.078125, "learning_rate": 0.0018254038182836069, "loss": 3.1401, "step": 1682 }, { "epoch": 0.21541021374632024, "grad_norm": 0.9296875, "learning_rate": 0.001825169706266545, "loss": 2.7409, "step": 1683 }, { "epoch": 0.2155382055548445, "grad_norm": 0.79296875, "learning_rate": 0.001824935452430681, "loss": 1.7577, "step": 1684 }, { "epoch": 0.21566619736336876, "grad_norm": 0.68359375, "learning_rate": 0.0018247010568162752, "loss": 1.7475, "step": 1685 }, { "epoch": 0.215794189171893, "grad_norm": 1.8203125, "learning_rate": 0.0018244665194636122, "loss": 2.1506, "step": 1686 }, { "epoch": 0.21592218098041727, "grad_norm": 0.83203125, "learning_rate": 0.0018242318404130009, "loss": 2.3686, "step": 1687 }, { "epoch": 0.2160501727889415, "grad_norm": 0.76171875, "learning_rate": 0.001823997019704775, "loss": 2.0091, "step": 1688 }, { "epoch": 0.21617816459746575, "grad_norm": 0.70703125, "learning_rate": 0.001823762057379292, "loss": 2.331, "step": 1689 }, { "epoch": 0.21630615640599002, "grad_norm": 0.77734375, "learning_rate": 0.0018235269534769341, "loss": 2.555, "step": 1690 }, { "epoch": 0.21643414821451426, "grad_norm": 0.6328125, "learning_rate": 0.0018232917080381078, "loss": 2.1175, "step": 1691 }, { "epoch": 0.21656214002303853, "grad_norm": 0.74609375, "learning_rate": 0.0018230563211032438, "loss": 2.5716, "step": 1692 }, { "epoch": 0.21669013183156277, "grad_norm": 0.703125, "learning_rate": 0.001822820792712797, "loss": 2.6372, "step": 1693 }, { "epoch": 0.21681812364008704, "grad_norm": 0.890625, "learning_rate": 0.0018225851229072471, "loss": 2.1453, "step": 1694 }, { "epoch": 0.21694611544861128, "grad_norm": 0.828125, "learning_rate": 0.0018223493117270975, "loss": 1.935, "step": 1695 }, { "epoch": 0.21707410725713555, "grad_norm": 0.74609375, "learning_rate": 0.0018221133592128762, "loss": 2.7957, "step": 1696 }, { "epoch": 0.2172020990656598, "grad_norm": 0.83203125, "learning_rate": 0.0018218772654051353, "loss": 2.1966, "step": 1697 }, { "epoch": 0.21733009087418406, "grad_norm": 0.68359375, "learning_rate": 0.001821641030344452, "loss": 2.2411, "step": 1698 }, { "epoch": 0.2174580826827083, "grad_norm": 0.80859375, "learning_rate": 0.0018214046540714266, "loss": 2.3865, "step": 1699 }, { "epoch": 0.21758607449123257, "grad_norm": 0.96875, "learning_rate": 0.0018211681366266843, "loss": 1.9136, "step": 1700 }, { "epoch": 0.21771406629975681, "grad_norm": 0.65625, "learning_rate": 0.0018209314780508743, "loss": 1.9783, "step": 1701 }, { "epoch": 0.21784205810828108, "grad_norm": 0.81640625, "learning_rate": 0.0018206946783846708, "loss": 2.8464, "step": 1702 }, { "epoch": 0.21797004991680533, "grad_norm": 0.5859375, "learning_rate": 0.0018204577376687708, "loss": 1.816, "step": 1703 }, { "epoch": 0.21809804172532957, "grad_norm": 0.6796875, "learning_rate": 0.001820220655943897, "loss": 1.8659, "step": 1704 }, { "epoch": 0.21822603353385384, "grad_norm": 0.9453125, "learning_rate": 0.0018199834332507953, "loss": 2.701, "step": 1705 }, { "epoch": 0.21835402534237808, "grad_norm": 0.81640625, "learning_rate": 0.001819746069630237, "loss": 2.6888, "step": 1706 }, { "epoch": 0.21848201715090235, "grad_norm": 0.78125, "learning_rate": 0.0018195085651230164, "loss": 2.4091, "step": 1707 }, { "epoch": 0.2186100089594266, "grad_norm": 0.6640625, "learning_rate": 0.0018192709197699527, "loss": 2.0764, "step": 1708 }, { "epoch": 0.21873800076795086, "grad_norm": 0.9140625, "learning_rate": 0.0018190331336118883, "loss": 2.4103, "step": 1709 }, { "epoch": 0.2188659925764751, "grad_norm": 0.78515625, "learning_rate": 0.0018187952066896918, "loss": 1.6044, "step": 1710 }, { "epoch": 0.21899398438499937, "grad_norm": 0.859375, "learning_rate": 0.0018185571390442541, "loss": 2.733, "step": 1711 }, { "epoch": 0.2191219761935236, "grad_norm": 0.6875, "learning_rate": 0.0018183189307164913, "loss": 2.6466, "step": 1712 }, { "epoch": 0.21924996800204788, "grad_norm": 0.76953125, "learning_rate": 0.0018180805817473434, "loss": 2.4262, "step": 1713 }, { "epoch": 0.21937795981057212, "grad_norm": 0.734375, "learning_rate": 0.0018178420921777742, "loss": 2.2208, "step": 1714 }, { "epoch": 0.2195059516190964, "grad_norm": 0.84375, "learning_rate": 0.0018176034620487717, "loss": 1.8573, "step": 1715 }, { "epoch": 0.21963394342762063, "grad_norm": 0.796875, "learning_rate": 0.0018173646914013495, "loss": 2.4445, "step": 1716 }, { "epoch": 0.21976193523614487, "grad_norm": 0.69140625, "learning_rate": 0.0018171257802765432, "loss": 2.8525, "step": 1717 }, { "epoch": 0.21988992704466914, "grad_norm": 0.84375, "learning_rate": 0.001816886728715414, "loss": 2.616, "step": 1718 }, { "epoch": 0.22001791885319338, "grad_norm": 0.765625, "learning_rate": 0.0018166475367590467, "loss": 2.9834, "step": 1719 }, { "epoch": 0.22014591066171765, "grad_norm": 1.015625, "learning_rate": 0.0018164082044485501, "loss": 2.4314, "step": 1720 }, { "epoch": 0.2202739024702419, "grad_norm": 0.98828125, "learning_rate": 0.0018161687318250578, "loss": 2.7677, "step": 1721 }, { "epoch": 0.22040189427876616, "grad_norm": 0.71484375, "learning_rate": 0.0018159291189297267, "loss": 2.5941, "step": 1722 }, { "epoch": 0.2205298860872904, "grad_norm": 0.640625, "learning_rate": 0.0018156893658037383, "loss": 2.5202, "step": 1723 }, { "epoch": 0.22065787789581467, "grad_norm": 0.7734375, "learning_rate": 0.0018154494724882978, "loss": 2.3138, "step": 1724 }, { "epoch": 0.22078586970433892, "grad_norm": 0.703125, "learning_rate": 0.0018152094390246351, "loss": 1.8814, "step": 1725 }, { "epoch": 0.22091386151286319, "grad_norm": 0.6484375, "learning_rate": 0.0018149692654540037, "loss": 2.2193, "step": 1726 }, { "epoch": 0.22104185332138743, "grad_norm": 0.76953125, "learning_rate": 0.0018147289518176818, "loss": 2.1171, "step": 1727 }, { "epoch": 0.2211698451299117, "grad_norm": 0.65625, "learning_rate": 0.0018144884981569703, "loss": 2.4629, "step": 1728 }, { "epoch": 0.22129783693843594, "grad_norm": 0.78515625, "learning_rate": 0.0018142479045131954, "loss": 1.9991, "step": 1729 }, { "epoch": 0.2214258287469602, "grad_norm": 0.6484375, "learning_rate": 0.0018140071709277072, "loss": 1.9095, "step": 1730 }, { "epoch": 0.22155382055548445, "grad_norm": 1.0859375, "learning_rate": 0.0018137662974418798, "loss": 3.4107, "step": 1731 }, { "epoch": 0.2216818123640087, "grad_norm": 0.77734375, "learning_rate": 0.0018135252840971111, "loss": 1.5575, "step": 1732 }, { "epoch": 0.22180980417253296, "grad_norm": 0.88671875, "learning_rate": 0.001813284130934823, "loss": 2.7152, "step": 1733 }, { "epoch": 0.2219377959810572, "grad_norm": 0.75390625, "learning_rate": 0.0018130428379964615, "loss": 2.6445, "step": 1734 }, { "epoch": 0.22206578778958147, "grad_norm": 0.69140625, "learning_rate": 0.001812801405323497, "loss": 1.793, "step": 1735 }, { "epoch": 0.2221937795981057, "grad_norm": 0.69140625, "learning_rate": 0.0018125598329574233, "loss": 2.1555, "step": 1736 }, { "epoch": 0.22232177140662998, "grad_norm": 0.69140625, "learning_rate": 0.001812318120939759, "loss": 2.2048, "step": 1737 }, { "epoch": 0.22244976321515422, "grad_norm": 0.68359375, "learning_rate": 0.001812076269312046, "loss": 2.3838, "step": 1738 }, { "epoch": 0.2225777550236785, "grad_norm": 0.66796875, "learning_rate": 0.00181183427811585, "loss": 2.3338, "step": 1739 }, { "epoch": 0.22270574683220273, "grad_norm": 0.79296875, "learning_rate": 0.0018115921473927617, "loss": 2.952, "step": 1740 }, { "epoch": 0.222833738640727, "grad_norm": 0.78515625, "learning_rate": 0.0018113498771843949, "loss": 2.3862, "step": 1741 }, { "epoch": 0.22296173044925124, "grad_norm": 0.609375, "learning_rate": 0.0018111074675323878, "loss": 2.0935, "step": 1742 }, { "epoch": 0.2230897222577755, "grad_norm": 0.625, "learning_rate": 0.0018108649184784022, "loss": 1.8587, "step": 1743 }, { "epoch": 0.22321771406629975, "grad_norm": 0.7421875, "learning_rate": 0.0018106222300641247, "loss": 2.6438, "step": 1744 }, { "epoch": 0.22334570587482402, "grad_norm": 0.765625, "learning_rate": 0.0018103794023312644, "loss": 2.1204, "step": 1745 }, { "epoch": 0.22347369768334827, "grad_norm": 1.484375, "learning_rate": 0.0018101364353215559, "loss": 2.2933, "step": 1746 }, { "epoch": 0.2236016894918725, "grad_norm": 0.65234375, "learning_rate": 0.0018098933290767566, "loss": 2.2464, "step": 1747 }, { "epoch": 0.22372968130039678, "grad_norm": 0.6953125, "learning_rate": 0.0018096500836386484, "loss": 2.2585, "step": 1748 }, { "epoch": 0.22385767310892102, "grad_norm": 0.59765625, "learning_rate": 0.001809406699049037, "loss": 1.7101, "step": 1749 }, { "epoch": 0.2239856649174453, "grad_norm": 0.734375, "learning_rate": 0.001809163175349752, "loss": 1.9935, "step": 1750 }, { "epoch": 0.22411365672596953, "grad_norm": 0.703125, "learning_rate": 0.0018089195125826464, "loss": 1.9987, "step": 1751 }, { "epoch": 0.2242416485344938, "grad_norm": 1.765625, "learning_rate": 0.0018086757107895984, "loss": 2.2874, "step": 1752 }, { "epoch": 0.22436964034301804, "grad_norm": 0.67578125, "learning_rate": 0.001808431770012509, "loss": 2.0606, "step": 1753 }, { "epoch": 0.2244976321515423, "grad_norm": 0.66796875, "learning_rate": 0.0018081876902933031, "loss": 2.2475, "step": 1754 }, { "epoch": 0.22462562396006655, "grad_norm": 0.87890625, "learning_rate": 0.00180794347167393, "loss": 2.2051, "step": 1755 }, { "epoch": 0.22475361576859082, "grad_norm": 0.67578125, "learning_rate": 0.0018076991141963627, "loss": 2.2479, "step": 1756 }, { "epoch": 0.22488160757711506, "grad_norm": 0.74609375, "learning_rate": 0.001807454617902598, "loss": 2.3265, "step": 1757 }, { "epoch": 0.22500959938563933, "grad_norm": 1.65625, "learning_rate": 0.0018072099828346563, "loss": 2.5512, "step": 1758 }, { "epoch": 0.22513759119416357, "grad_norm": 0.71484375, "learning_rate": 0.0018069652090345824, "loss": 2.1756, "step": 1759 }, { "epoch": 0.22526558300268784, "grad_norm": 1.40625, "learning_rate": 0.0018067202965444445, "loss": 2.7058, "step": 1760 }, { "epoch": 0.22539357481121208, "grad_norm": 0.8203125, "learning_rate": 0.0018064752454063347, "loss": 2.8025, "step": 1761 }, { "epoch": 0.22552156661973632, "grad_norm": 0.6875, "learning_rate": 0.001806230055662369, "loss": 2.4627, "step": 1762 }, { "epoch": 0.2256495584282606, "grad_norm": 0.64453125, "learning_rate": 0.0018059847273546874, "loss": 2.1257, "step": 1763 }, { "epoch": 0.22577755023678484, "grad_norm": 0.75390625, "learning_rate": 0.0018057392605254536, "loss": 2.1263, "step": 1764 }, { "epoch": 0.2259055420453091, "grad_norm": 0.6953125, "learning_rate": 0.0018054936552168548, "loss": 2.4031, "step": 1765 }, { "epoch": 0.22603353385383335, "grad_norm": 0.82421875, "learning_rate": 0.0018052479114711026, "loss": 3.4957, "step": 1766 }, { "epoch": 0.22616152566235762, "grad_norm": 1.046875, "learning_rate": 0.001805002029330432, "loss": 3.1277, "step": 1767 }, { "epoch": 0.22628951747088186, "grad_norm": 0.7734375, "learning_rate": 0.0018047560088371012, "loss": 2.2773, "step": 1768 }, { "epoch": 0.22641750927940613, "grad_norm": 0.76171875, "learning_rate": 0.0018045098500333939, "loss": 2.4087, "step": 1769 }, { "epoch": 0.22654550108793037, "grad_norm": 0.55859375, "learning_rate": 0.0018042635529616155, "loss": 1.6272, "step": 1770 }, { "epoch": 0.22667349289645464, "grad_norm": 0.765625, "learning_rate": 0.0018040171176640966, "loss": 2.1152, "step": 1771 }, { "epoch": 0.22680148470497888, "grad_norm": 0.66015625, "learning_rate": 0.001803770544183191, "loss": 1.9902, "step": 1772 }, { "epoch": 0.22692947651350315, "grad_norm": 0.78515625, "learning_rate": 0.0018035238325612764, "loss": 2.1461, "step": 1773 }, { "epoch": 0.2270574683220274, "grad_norm": 0.78515625, "learning_rate": 0.0018032769828407542, "loss": 1.7703, "step": 1774 }, { "epoch": 0.22718546013055166, "grad_norm": 0.88671875, "learning_rate": 0.0018030299950640497, "loss": 2.6201, "step": 1775 }, { "epoch": 0.2273134519390759, "grad_norm": 0.66796875, "learning_rate": 0.0018027828692736114, "loss": 1.8118, "step": 1776 }, { "epoch": 0.22744144374760014, "grad_norm": 0.734375, "learning_rate": 0.0018025356055119122, "loss": 2.0372, "step": 1777 }, { "epoch": 0.2275694355561244, "grad_norm": 0.78125, "learning_rate": 0.0018022882038214482, "loss": 2.617, "step": 1778 }, { "epoch": 0.22769742736464865, "grad_norm": 0.81640625, "learning_rate": 0.0018020406642447393, "loss": 2.8302, "step": 1779 }, { "epoch": 0.22782541917317292, "grad_norm": 0.828125, "learning_rate": 0.0018017929868243297, "loss": 2.7023, "step": 1780 }, { "epoch": 0.22795341098169716, "grad_norm": 0.88671875, "learning_rate": 0.0018015451716027861, "loss": 1.9147, "step": 1781 }, { "epoch": 0.22808140279022143, "grad_norm": 0.75, "learning_rate": 0.0018012972186227, "loss": 2.1965, "step": 1782 }, { "epoch": 0.22820939459874567, "grad_norm": 0.64453125, "learning_rate": 0.0018010491279266858, "loss": 2.2661, "step": 1783 }, { "epoch": 0.22833738640726994, "grad_norm": 0.79296875, "learning_rate": 0.0018008008995573823, "loss": 2.7743, "step": 1784 }, { "epoch": 0.22846537821579418, "grad_norm": 0.66015625, "learning_rate": 0.0018005525335574512, "loss": 2.0464, "step": 1785 }, { "epoch": 0.22859337002431845, "grad_norm": 0.76171875, "learning_rate": 0.001800304029969579, "loss": 1.859, "step": 1786 }, { "epoch": 0.2287213618328427, "grad_norm": 0.77734375, "learning_rate": 0.0018000553888364741, "loss": 2.5598, "step": 1787 }, { "epoch": 0.22884935364136696, "grad_norm": 0.67578125, "learning_rate": 0.00179980661020087, "loss": 1.5413, "step": 1788 }, { "epoch": 0.2289773454498912, "grad_norm": 0.671875, "learning_rate": 0.0017995576941055233, "loss": 2.4955, "step": 1789 }, { "epoch": 0.22910533725841545, "grad_norm": 0.69921875, "learning_rate": 0.001799308640593214, "loss": 2.5884, "step": 1790 }, { "epoch": 0.22923332906693972, "grad_norm": 0.70703125, "learning_rate": 0.0017990594497067467, "loss": 2.3885, "step": 1791 }, { "epoch": 0.22936132087546396, "grad_norm": 0.7421875, "learning_rate": 0.001798810121488948, "loss": 2.7853, "step": 1792 }, { "epoch": 0.22948931268398823, "grad_norm": 0.765625, "learning_rate": 0.0017985606559826697, "loss": 2.3762, "step": 1793 }, { "epoch": 0.22961730449251247, "grad_norm": 0.6953125, "learning_rate": 0.001798311053230786, "loss": 2.3493, "step": 1794 }, { "epoch": 0.22974529630103674, "grad_norm": 0.79296875, "learning_rate": 0.0017980613132761955, "loss": 2.6752, "step": 1795 }, { "epoch": 0.22987328810956098, "grad_norm": 0.73828125, "learning_rate": 0.0017978114361618198, "loss": 2.8626, "step": 1796 }, { "epoch": 0.23000127991808525, "grad_norm": 0.6484375, "learning_rate": 0.0017975614219306046, "loss": 2.5789, "step": 1797 }, { "epoch": 0.2301292717266095, "grad_norm": 0.76953125, "learning_rate": 0.001797311270625519, "loss": 1.89, "step": 1798 }, { "epoch": 0.23025726353513376, "grad_norm": 0.765625, "learning_rate": 0.001797060982289555, "loss": 1.8042, "step": 1799 }, { "epoch": 0.230385255343658, "grad_norm": 0.6796875, "learning_rate": 0.0017968105569657294, "loss": 2.3328, "step": 1800 }, { "epoch": 0.23051324715218227, "grad_norm": 0.76171875, "learning_rate": 0.0017965599946970815, "loss": 2.253, "step": 1801 }, { "epoch": 0.2306412389607065, "grad_norm": 0.640625, "learning_rate": 0.0017963092955266742, "loss": 2.0552, "step": 1802 }, { "epoch": 0.23076923076923078, "grad_norm": 0.65625, "learning_rate": 0.001796058459497595, "loss": 2.3548, "step": 1803 }, { "epoch": 0.23089722257775502, "grad_norm": 0.63671875, "learning_rate": 0.0017958074866529534, "loss": 2.136, "step": 1804 }, { "epoch": 0.23102521438627927, "grad_norm": 1.3671875, "learning_rate": 0.0017955563770358834, "loss": 1.7016, "step": 1805 }, { "epoch": 0.23115320619480353, "grad_norm": 0.78125, "learning_rate": 0.0017953051306895422, "loss": 2.4346, "step": 1806 }, { "epoch": 0.23128119800332778, "grad_norm": 0.765625, "learning_rate": 0.001795053747657111, "loss": 2.1799, "step": 1807 }, { "epoch": 0.23140918981185205, "grad_norm": 0.63671875, "learning_rate": 0.0017948022279817935, "loss": 2.3423, "step": 1808 }, { "epoch": 0.2315371816203763, "grad_norm": 0.6484375, "learning_rate": 0.0017945505717068175, "loss": 2.1734, "step": 1809 }, { "epoch": 0.23166517342890056, "grad_norm": 0.8046875, "learning_rate": 0.0017942987788754348, "loss": 2.6265, "step": 1810 }, { "epoch": 0.2317931652374248, "grad_norm": 0.68359375, "learning_rate": 0.0017940468495309192, "loss": 1.864, "step": 1811 }, { "epoch": 0.23192115704594907, "grad_norm": 1.0859375, "learning_rate": 0.0017937947837165695, "loss": 2.2471, "step": 1812 }, { "epoch": 0.2320491488544733, "grad_norm": 0.72265625, "learning_rate": 0.0017935425814757072, "loss": 1.803, "step": 1813 }, { "epoch": 0.23217714066299758, "grad_norm": 1.0546875, "learning_rate": 0.001793290242851677, "loss": 2.9665, "step": 1814 }, { "epoch": 0.23230513247152182, "grad_norm": 0.7109375, "learning_rate": 0.0017930377678878479, "loss": 3.1015, "step": 1815 }, { "epoch": 0.2324331242800461, "grad_norm": 0.7109375, "learning_rate": 0.0017927851566276111, "loss": 2.2253, "step": 1816 }, { "epoch": 0.23256111608857033, "grad_norm": 0.55078125, "learning_rate": 0.0017925324091143829, "loss": 1.6026, "step": 1817 }, { "epoch": 0.2326891078970946, "grad_norm": 0.94921875, "learning_rate": 0.001792279525391601, "loss": 3.0021, "step": 1818 }, { "epoch": 0.23281709970561884, "grad_norm": 0.7890625, "learning_rate": 0.0017920265055027286, "loss": 3.3357, "step": 1819 }, { "epoch": 0.23294509151414308, "grad_norm": 0.7578125, "learning_rate": 0.0017917733494912508, "loss": 2.4859, "step": 1820 }, { "epoch": 0.23307308332266735, "grad_norm": 0.796875, "learning_rate": 0.001791520057400676, "loss": 2.3131, "step": 1821 }, { "epoch": 0.2332010751311916, "grad_norm": 0.69921875, "learning_rate": 0.0017912666292745376, "loss": 2.0643, "step": 1822 }, { "epoch": 0.23332906693971586, "grad_norm": 0.7578125, "learning_rate": 0.0017910130651563909, "loss": 2.5073, "step": 1823 }, { "epoch": 0.2334570587482401, "grad_norm": 0.7421875, "learning_rate": 0.0017907593650898146, "loss": 3.3201, "step": 1824 }, { "epoch": 0.23358505055676437, "grad_norm": 0.703125, "learning_rate": 0.0017905055291184118, "loss": 1.8804, "step": 1825 }, { "epoch": 0.23371304236528861, "grad_norm": 0.65625, "learning_rate": 0.0017902515572858079, "loss": 2.662, "step": 1826 }, { "epoch": 0.23384103417381288, "grad_norm": 0.8046875, "learning_rate": 0.0017899974496356523, "loss": 2.985, "step": 1827 }, { "epoch": 0.23396902598233713, "grad_norm": 0.55859375, "learning_rate": 0.0017897432062116178, "loss": 1.7263, "step": 1828 }, { "epoch": 0.2340970177908614, "grad_norm": 0.7734375, "learning_rate": 0.0017894888270573994, "loss": 2.3007, "step": 1829 }, { "epoch": 0.23422500959938564, "grad_norm": 0.75390625, "learning_rate": 0.001789234312216717, "loss": 2.5953, "step": 1830 }, { "epoch": 0.2343530014079099, "grad_norm": 0.625, "learning_rate": 0.001788979661733313, "loss": 1.9172, "step": 1831 }, { "epoch": 0.23448099321643415, "grad_norm": 0.67578125, "learning_rate": 0.0017887248756509528, "loss": 1.7856, "step": 1832 }, { "epoch": 0.23460898502495842, "grad_norm": 0.59765625, "learning_rate": 0.0017884699540134262, "loss": 1.6776, "step": 1833 }, { "epoch": 0.23473697683348266, "grad_norm": 0.67578125, "learning_rate": 0.0017882148968645453, "loss": 2.0662, "step": 1834 }, { "epoch": 0.2348649686420069, "grad_norm": 0.73046875, "learning_rate": 0.0017879597042481455, "loss": 2.0161, "step": 1835 }, { "epoch": 0.23499296045053117, "grad_norm": 0.84375, "learning_rate": 0.0017877043762080862, "loss": 2.0219, "step": 1836 }, { "epoch": 0.2351209522590554, "grad_norm": 0.64453125, "learning_rate": 0.0017874489127882494, "loss": 1.6734, "step": 1837 }, { "epoch": 0.23524894406757968, "grad_norm": 0.703125, "learning_rate": 0.0017871933140325411, "loss": 2.414, "step": 1838 }, { "epoch": 0.23537693587610392, "grad_norm": 0.703125, "learning_rate": 0.0017869375799848895, "loss": 2.1322, "step": 1839 }, { "epoch": 0.2355049276846282, "grad_norm": 0.69921875, "learning_rate": 0.001786681710689247, "loss": 2.4683, "step": 1840 }, { "epoch": 0.23563291949315243, "grad_norm": 0.75390625, "learning_rate": 0.0017864257061895887, "loss": 2.3969, "step": 1841 }, { "epoch": 0.2357609113016767, "grad_norm": 0.80078125, "learning_rate": 0.0017861695665299133, "loss": 1.9364, "step": 1842 }, { "epoch": 0.23588890311020094, "grad_norm": 0.703125, "learning_rate": 0.0017859132917542424, "loss": 2.4063, "step": 1843 }, { "epoch": 0.2360168949187252, "grad_norm": 0.76171875, "learning_rate": 0.0017856568819066211, "loss": 2.4707, "step": 1844 }, { "epoch": 0.23614488672724945, "grad_norm": 0.67578125, "learning_rate": 0.0017854003370311178, "loss": 2.5788, "step": 1845 }, { "epoch": 0.23627287853577372, "grad_norm": 0.828125, "learning_rate": 0.0017851436571718234, "loss": 2.6901, "step": 1846 }, { "epoch": 0.23640087034429796, "grad_norm": 0.609375, "learning_rate": 0.0017848868423728525, "loss": 1.9484, "step": 1847 }, { "epoch": 0.2365288621528222, "grad_norm": 0.5703125, "learning_rate": 0.0017846298926783438, "loss": 1.8713, "step": 1848 }, { "epoch": 0.23665685396134648, "grad_norm": 0.6875, "learning_rate": 0.0017843728081324572, "loss": 1.9743, "step": 1849 }, { "epoch": 0.23678484576987072, "grad_norm": 0.6953125, "learning_rate": 0.0017841155887793772, "loss": 2.1513, "step": 1850 }, { "epoch": 0.23691283757839499, "grad_norm": 0.65234375, "learning_rate": 0.0017838582346633115, "loss": 2.3248, "step": 1851 }, { "epoch": 0.23704082938691923, "grad_norm": 0.72265625, "learning_rate": 0.0017836007458284899, "loss": 2.2742, "step": 1852 }, { "epoch": 0.2371688211954435, "grad_norm": 0.67578125, "learning_rate": 0.0017833431223191665, "loss": 1.4592, "step": 1853 }, { "epoch": 0.23729681300396774, "grad_norm": 0.921875, "learning_rate": 0.0017830853641796181, "loss": 2.1433, "step": 1854 }, { "epoch": 0.237424804812492, "grad_norm": 0.7109375, "learning_rate": 0.0017828274714541445, "loss": 2.3519, "step": 1855 }, { "epoch": 0.23755279662101625, "grad_norm": 0.71875, "learning_rate": 0.0017825694441870685, "loss": 2.7425, "step": 1856 }, { "epoch": 0.23768078842954052, "grad_norm": 0.93359375, "learning_rate": 0.0017823112824227369, "loss": 2.6476, "step": 1857 }, { "epoch": 0.23780878023806476, "grad_norm": 0.84375, "learning_rate": 0.0017820529862055182, "loss": 2.7962, "step": 1858 }, { "epoch": 0.23793677204658903, "grad_norm": 0.71484375, "learning_rate": 0.0017817945555798054, "loss": 1.4136, "step": 1859 }, { "epoch": 0.23806476385511327, "grad_norm": 0.68359375, "learning_rate": 0.0017815359905900138, "loss": 1.4896, "step": 1860 }, { "epoch": 0.23819275566363754, "grad_norm": 0.74609375, "learning_rate": 0.0017812772912805818, "loss": 2.8909, "step": 1861 }, { "epoch": 0.23832074747216178, "grad_norm": 1.25, "learning_rate": 0.0017810184576959715, "loss": 2.7408, "step": 1862 }, { "epoch": 0.23844873928068602, "grad_norm": 0.79296875, "learning_rate": 0.0017807594898806673, "loss": 2.2106, "step": 1863 }, { "epoch": 0.2385767310892103, "grad_norm": 0.7421875, "learning_rate": 0.0017805003878791773, "loss": 2.0883, "step": 1864 }, { "epoch": 0.23870472289773453, "grad_norm": 0.59375, "learning_rate": 0.001780241151736032, "loss": 1.4579, "step": 1865 }, { "epoch": 0.2388327147062588, "grad_norm": 0.84375, "learning_rate": 0.0017799817814957858, "loss": 1.875, "step": 1866 }, { "epoch": 0.23896070651478304, "grad_norm": 0.72265625, "learning_rate": 0.0017797222772030152, "loss": 2.0201, "step": 1867 }, { "epoch": 0.2390886983233073, "grad_norm": 0.6953125, "learning_rate": 0.0017794626389023205, "loss": 2.225, "step": 1868 }, { "epoch": 0.23921669013183156, "grad_norm": 0.6796875, "learning_rate": 0.0017792028666383246, "loss": 1.9855, "step": 1869 }, { "epoch": 0.23934468194035582, "grad_norm": 0.79296875, "learning_rate": 0.001778942960455674, "loss": 1.8229, "step": 1870 }, { "epoch": 0.23947267374888007, "grad_norm": 0.88671875, "learning_rate": 0.0017786829203990373, "loss": 2.3429, "step": 1871 }, { "epoch": 0.23960066555740434, "grad_norm": 0.8515625, "learning_rate": 0.0017784227465131067, "loss": 2.1904, "step": 1872 }, { "epoch": 0.23972865736592858, "grad_norm": 0.70703125, "learning_rate": 0.0017781624388425975, "loss": 2.8277, "step": 1873 }, { "epoch": 0.23985664917445285, "grad_norm": 0.7265625, "learning_rate": 0.0017779019974322474, "loss": 2.1523, "step": 1874 }, { "epoch": 0.2399846409829771, "grad_norm": 0.72265625, "learning_rate": 0.0017776414223268182, "loss": 2.7125, "step": 1875 }, { "epoch": 0.24011263279150136, "grad_norm": 0.65625, "learning_rate": 0.0017773807135710933, "loss": 1.8931, "step": 1876 }, { "epoch": 0.2402406246000256, "grad_norm": 0.63671875, "learning_rate": 0.0017771198712098802, "loss": 1.5403, "step": 1877 }, { "epoch": 0.24036861640854984, "grad_norm": 0.6484375, "learning_rate": 0.0017768588952880085, "loss": 1.9959, "step": 1878 }, { "epoch": 0.2404966082170741, "grad_norm": 0.640625, "learning_rate": 0.0017765977858503312, "loss": 1.6762, "step": 1879 }, { "epoch": 0.24062460002559835, "grad_norm": 0.734375, "learning_rate": 0.0017763365429417242, "loss": 2.1619, "step": 1880 }, { "epoch": 0.24075259183412262, "grad_norm": 0.83203125, "learning_rate": 0.0017760751666070866, "loss": 2.1427, "step": 1881 }, { "epoch": 0.24088058364264686, "grad_norm": 0.890625, "learning_rate": 0.0017758136568913402, "loss": 2.4352, "step": 1882 }, { "epoch": 0.24100857545117113, "grad_norm": 0.68359375, "learning_rate": 0.001775552013839429, "loss": 1.7955, "step": 1883 }, { "epoch": 0.24113656725969537, "grad_norm": 0.58984375, "learning_rate": 0.0017752902374963213, "loss": 1.8275, "step": 1884 }, { "epoch": 0.24126455906821964, "grad_norm": 0.80859375, "learning_rate": 0.0017750283279070076, "loss": 2.0326, "step": 1885 }, { "epoch": 0.24139255087674388, "grad_norm": 0.7578125, "learning_rate": 0.0017747662851165008, "loss": 2.526, "step": 1886 }, { "epoch": 0.24152054268526815, "grad_norm": 0.80078125, "learning_rate": 0.0017745041091698378, "loss": 1.8035, "step": 1887 }, { "epoch": 0.2416485344937924, "grad_norm": 0.7109375, "learning_rate": 0.001774241800112077, "loss": 2.3406, "step": 1888 }, { "epoch": 0.24177652630231666, "grad_norm": 0.9453125, "learning_rate": 0.0017739793579883015, "loss": 3.0092, "step": 1889 }, { "epoch": 0.2419045181108409, "grad_norm": 0.7421875, "learning_rate": 0.0017737167828436155, "loss": 2.4114, "step": 1890 }, { "epoch": 0.24203250991936517, "grad_norm": 0.84375, "learning_rate": 0.001773454074723147, "loss": 2.4877, "step": 1891 }, { "epoch": 0.24216050172788942, "grad_norm": 0.703125, "learning_rate": 0.0017731912336720467, "loss": 2.2426, "step": 1892 }, { "epoch": 0.24228849353641366, "grad_norm": 0.81640625, "learning_rate": 0.001772928259735488, "loss": 2.891, "step": 1893 }, { "epoch": 0.24241648534493793, "grad_norm": 0.66015625, "learning_rate": 0.001772665152958667, "loss": 2.5282, "step": 1894 }, { "epoch": 0.24254447715346217, "grad_norm": 0.6875, "learning_rate": 0.0017724019133868035, "loss": 2.0409, "step": 1895 }, { "epoch": 0.24267246896198644, "grad_norm": 0.8125, "learning_rate": 0.0017721385410651392, "loss": 2.1872, "step": 1896 }, { "epoch": 0.24280046077051068, "grad_norm": 0.84765625, "learning_rate": 0.0017718750360389386, "loss": 2.6889, "step": 1897 }, { "epoch": 0.24292845257903495, "grad_norm": 0.64453125, "learning_rate": 0.00177161139835349, "loss": 2.0247, "step": 1898 }, { "epoch": 0.2430564443875592, "grad_norm": 0.59375, "learning_rate": 0.001771347628054103, "loss": 1.234, "step": 1899 }, { "epoch": 0.24318443619608346, "grad_norm": 0.72265625, "learning_rate": 0.0017710837251861112, "loss": 2.0191, "step": 1900 }, { "epoch": 0.2433124280046077, "grad_norm": 0.9609375, "learning_rate": 0.0017708196897948706, "loss": 2.3977, "step": 1901 }, { "epoch": 0.24344041981313197, "grad_norm": 0.83203125, "learning_rate": 0.00177055552192576, "loss": 2.53, "step": 1902 }, { "epoch": 0.2435684116216562, "grad_norm": 0.65625, "learning_rate": 0.001770291221624181, "loss": 1.7784, "step": 1903 }, { "epoch": 0.24369640343018048, "grad_norm": 0.69921875, "learning_rate": 0.0017700267889355578, "loss": 2.3733, "step": 1904 }, { "epoch": 0.24382439523870472, "grad_norm": 0.734375, "learning_rate": 0.001769762223905337, "loss": 2.2235, "step": 1905 }, { "epoch": 0.24395238704722896, "grad_norm": 0.75, "learning_rate": 0.0017694975265789893, "loss": 2.177, "step": 1906 }, { "epoch": 0.24408037885575323, "grad_norm": 0.734375, "learning_rate": 0.0017692326970020066, "loss": 2.0946, "step": 1907 }, { "epoch": 0.24420837066427747, "grad_norm": 0.65625, "learning_rate": 0.001768967735219904, "loss": 2.5018, "step": 1908 }, { "epoch": 0.24433636247280174, "grad_norm": 0.78515625, "learning_rate": 0.00176870264127822, "loss": 2.585, "step": 1909 }, { "epoch": 0.24446435428132599, "grad_norm": 0.94140625, "learning_rate": 0.0017684374152225153, "loss": 1.9592, "step": 1910 }, { "epoch": 0.24459234608985025, "grad_norm": 0.671875, "learning_rate": 0.001768172057098373, "loss": 1.7779, "step": 1911 }, { "epoch": 0.2447203378983745, "grad_norm": 0.69140625, "learning_rate": 0.001767906566951399, "loss": 1.9582, "step": 1912 }, { "epoch": 0.24484832970689877, "grad_norm": 0.640625, "learning_rate": 0.0017676409448272228, "loss": 1.791, "step": 1913 }, { "epoch": 0.244976321515423, "grad_norm": 0.74609375, "learning_rate": 0.001767375190771495, "loss": 2.9113, "step": 1914 }, { "epoch": 0.24510431332394728, "grad_norm": 0.828125, "learning_rate": 0.0017671093048298905, "loss": 2.4855, "step": 1915 }, { "epoch": 0.24523230513247152, "grad_norm": 0.7265625, "learning_rate": 0.0017668432870481057, "loss": 2.3397, "step": 1916 }, { "epoch": 0.2453602969409958, "grad_norm": 0.6953125, "learning_rate": 0.0017665771374718601, "loss": 1.7093, "step": 1917 }, { "epoch": 0.24548828874952003, "grad_norm": 0.6484375, "learning_rate": 0.001766310856146896, "loss": 2.5175, "step": 1918 }, { "epoch": 0.2456162805580443, "grad_norm": 0.81640625, "learning_rate": 0.001766044443118978, "loss": 2.2833, "step": 1919 }, { "epoch": 0.24574427236656854, "grad_norm": 0.73046875, "learning_rate": 0.0017657778984338937, "loss": 2.0257, "step": 1920 }, { "epoch": 0.24587226417509278, "grad_norm": 0.6484375, "learning_rate": 0.001765511222137453, "loss": 1.968, "step": 1921 }, { "epoch": 0.24600025598361705, "grad_norm": 0.6640625, "learning_rate": 0.0017652444142754883, "loss": 2.1304, "step": 1922 }, { "epoch": 0.2461282477921413, "grad_norm": 0.9296875, "learning_rate": 0.0017649774748938552, "loss": 2.5489, "step": 1923 }, { "epoch": 0.24625623960066556, "grad_norm": 0.6953125, "learning_rate": 0.0017647104040384317, "loss": 2.9323, "step": 1924 }, { "epoch": 0.2463842314091898, "grad_norm": 0.8203125, "learning_rate": 0.0017644432017551175, "loss": 2.5324, "step": 1925 }, { "epoch": 0.24651222321771407, "grad_norm": 0.57421875, "learning_rate": 0.0017641758680898368, "loss": 1.8928, "step": 1926 }, { "epoch": 0.2466402150262383, "grad_norm": 0.72265625, "learning_rate": 0.001763908403088534, "loss": 1.9783, "step": 1927 }, { "epoch": 0.24676820683476258, "grad_norm": 0.8515625, "learning_rate": 0.0017636408067971782, "loss": 2.6888, "step": 1928 }, { "epoch": 0.24689619864328682, "grad_norm": 0.71875, "learning_rate": 0.0017633730792617599, "loss": 2.2435, "step": 1929 }, { "epoch": 0.2470241904518111, "grad_norm": 0.734375, "learning_rate": 0.0017631052205282925, "loss": 2.698, "step": 1930 }, { "epoch": 0.24715218226033533, "grad_norm": 0.64453125, "learning_rate": 0.0017628372306428112, "loss": 2.325, "step": 1931 }, { "epoch": 0.2472801740688596, "grad_norm": 0.66015625, "learning_rate": 0.0017625691096513757, "loss": 2.3106, "step": 1932 }, { "epoch": 0.24740816587738385, "grad_norm": 1.6875, "learning_rate": 0.001762300857600066, "loss": 2.1838, "step": 1933 }, { "epoch": 0.24753615768590811, "grad_norm": 0.82421875, "learning_rate": 0.0017620324745349857, "loss": 2.99, "step": 1934 }, { "epoch": 0.24766414949443236, "grad_norm": 0.80078125, "learning_rate": 0.0017617639605022606, "loss": 2.394, "step": 1935 }, { "epoch": 0.2477921413029566, "grad_norm": 0.79296875, "learning_rate": 0.0017614953155480398, "loss": 2.4231, "step": 1936 }, { "epoch": 0.24792013311148087, "grad_norm": 0.76953125, "learning_rate": 0.0017612265397184937, "loss": 2.4116, "step": 1937 }, { "epoch": 0.2480481249200051, "grad_norm": 0.49609375, "learning_rate": 0.001760957633059816, "loss": 1.0815, "step": 1938 }, { "epoch": 0.24817611672852938, "grad_norm": 0.7890625, "learning_rate": 0.0017606885956182228, "loss": 2.1413, "step": 1939 }, { "epoch": 0.24830410853705362, "grad_norm": 0.890625, "learning_rate": 0.0017604194274399526, "loss": 2.4926, "step": 1940 }, { "epoch": 0.2484321003455779, "grad_norm": 0.8515625, "learning_rate": 0.001760150128571266, "loss": 2.6715, "step": 1941 }, { "epoch": 0.24856009215410213, "grad_norm": 0.65234375, "learning_rate": 0.0017598806990584467, "loss": 2.1951, "step": 1942 }, { "epoch": 0.2486880839626264, "grad_norm": 0.61328125, "learning_rate": 0.0017596111389478002, "loss": 2.1095, "step": 1943 }, { "epoch": 0.24881607577115064, "grad_norm": 0.671875, "learning_rate": 0.0017593414482856551, "loss": 2.2912, "step": 1944 }, { "epoch": 0.2489440675796749, "grad_norm": 0.703125, "learning_rate": 0.001759071627118362, "loss": 2.397, "step": 1945 }, { "epoch": 0.24907205938819915, "grad_norm": 0.77734375, "learning_rate": 0.001758801675492294, "loss": 2.8487, "step": 1946 }, { "epoch": 0.24920005119672342, "grad_norm": 0.85546875, "learning_rate": 0.0017585315934538466, "loss": 2.7305, "step": 1947 }, { "epoch": 0.24932804300524766, "grad_norm": 0.6875, "learning_rate": 0.001758261381049438, "loss": 2.0185, "step": 1948 }, { "epoch": 0.24945603481377193, "grad_norm": 0.80859375, "learning_rate": 0.0017579910383255081, "loss": 2.5714, "step": 1949 }, { "epoch": 0.24958402662229617, "grad_norm": 0.6328125, "learning_rate": 0.0017577205653285205, "loss": 2.0828, "step": 1950 }, { "epoch": 0.24971201843082041, "grad_norm": 0.6953125, "learning_rate": 0.00175744996210496, "loss": 1.9161, "step": 1951 }, { "epoch": 0.24984001023934468, "grad_norm": 0.703125, "learning_rate": 0.0017571792287013337, "loss": 2.0155, "step": 1952 }, { "epoch": 0.24996800204786893, "grad_norm": 0.7578125, "learning_rate": 0.0017569083651641719, "loss": 2.9505, "step": 1953 }, { "epoch": 0.2500959938563932, "grad_norm": 0.9140625, "learning_rate": 0.0017566373715400272, "loss": 2.5936, "step": 1954 }, { "epoch": 0.25022398566491744, "grad_norm": 0.73046875, "learning_rate": 0.0017563662478754737, "loss": 2.5969, "step": 1955 }, { "epoch": 0.2503519774734417, "grad_norm": 0.7109375, "learning_rate": 0.0017560949942171087, "loss": 2.5183, "step": 1956 }, { "epoch": 0.250479969281966, "grad_norm": 0.71875, "learning_rate": 0.0017558236106115514, "loss": 2.3839, "step": 1957 }, { "epoch": 0.2506079610904902, "grad_norm": 0.67578125, "learning_rate": 0.0017555520971054435, "loss": 2.2929, "step": 1958 }, { "epoch": 0.25073595289901446, "grad_norm": 0.6484375, "learning_rate": 0.0017552804537454493, "loss": 1.8651, "step": 1959 }, { "epoch": 0.2508639447075387, "grad_norm": 0.71484375, "learning_rate": 0.0017550086805782547, "loss": 2.2155, "step": 1960 }, { "epoch": 0.250991936516063, "grad_norm": 0.6953125, "learning_rate": 0.0017547367776505687, "loss": 2.2835, "step": 1961 }, { "epoch": 0.25111992832458724, "grad_norm": 0.68359375, "learning_rate": 0.001754464745009122, "loss": 2.3756, "step": 1962 }, { "epoch": 0.2512479201331115, "grad_norm": 0.90625, "learning_rate": 0.001754192582700668, "loss": 2.7671, "step": 1963 }, { "epoch": 0.2513759119416357, "grad_norm": 0.7578125, "learning_rate": 0.0017539202907719815, "loss": 2.199, "step": 1964 }, { "epoch": 0.25150390375015996, "grad_norm": 0.59765625, "learning_rate": 0.0017536478692698615, "loss": 2.158, "step": 1965 }, { "epoch": 0.25163189555868426, "grad_norm": 0.66015625, "learning_rate": 0.001753375318241127, "loss": 1.554, "step": 1966 }, { "epoch": 0.2517598873672085, "grad_norm": 0.7109375, "learning_rate": 0.001753102637732621, "loss": 1.9319, "step": 1967 }, { "epoch": 0.25188787917573274, "grad_norm": 0.6875, "learning_rate": 0.0017528298277912076, "loss": 2.7169, "step": 1968 }, { "epoch": 0.252015870984257, "grad_norm": 0.88671875, "learning_rate": 0.0017525568884637739, "loss": 2.8589, "step": 1969 }, { "epoch": 0.2521438627927813, "grad_norm": 0.609375, "learning_rate": 0.0017522838197972287, "loss": 1.7315, "step": 1970 }, { "epoch": 0.2522718546013055, "grad_norm": 1.140625, "learning_rate": 0.0017520106218385037, "loss": 2.052, "step": 1971 }, { "epoch": 0.25239984640982976, "grad_norm": 0.7578125, "learning_rate": 0.001751737294634552, "loss": 2.3838, "step": 1972 }, { "epoch": 0.252527838218354, "grad_norm": 1.328125, "learning_rate": 0.0017514638382323496, "loss": 2.0976, "step": 1973 }, { "epoch": 0.2526558300268783, "grad_norm": 0.859375, "learning_rate": 0.001751190252678894, "loss": 2.3574, "step": 1974 }, { "epoch": 0.25278382183540254, "grad_norm": 0.69140625, "learning_rate": 0.0017509165380212059, "loss": 2.5292, "step": 1975 }, { "epoch": 0.2529118136439268, "grad_norm": 0.7421875, "learning_rate": 0.0017506426943063272, "loss": 2.6169, "step": 1976 }, { "epoch": 0.253039805452451, "grad_norm": 0.67578125, "learning_rate": 0.0017503687215813222, "loss": 1.9097, "step": 1977 }, { "epoch": 0.2531677972609753, "grad_norm": 0.69921875, "learning_rate": 0.0017500946198932784, "loss": 2.3459, "step": 1978 }, { "epoch": 0.25329578906949957, "grad_norm": 1.0078125, "learning_rate": 0.0017498203892893037, "loss": 2.5091, "step": 1979 }, { "epoch": 0.2534237808780238, "grad_norm": 0.57421875, "learning_rate": 0.0017495460298165293, "loss": 1.6508, "step": 1980 }, { "epoch": 0.25355177268654805, "grad_norm": 0.81640625, "learning_rate": 0.0017492715415221088, "loss": 2.7352, "step": 1981 }, { "epoch": 0.2536797644950723, "grad_norm": 0.66796875, "learning_rate": 0.0017489969244532172, "loss": 1.9812, "step": 1982 }, { "epoch": 0.2538077563035966, "grad_norm": 0.66796875, "learning_rate": 0.0017487221786570515, "loss": 2.0637, "step": 1983 }, { "epoch": 0.25393574811212083, "grad_norm": 0.703125, "learning_rate": 0.0017484473041808318, "loss": 2.0245, "step": 1984 }, { "epoch": 0.25406373992064507, "grad_norm": 1.15625, "learning_rate": 0.0017481723010717993, "loss": 2.5201, "step": 1985 }, { "epoch": 0.2541917317291693, "grad_norm": 0.61328125, "learning_rate": 0.0017478971693772182, "loss": 1.5426, "step": 1986 }, { "epoch": 0.2543197235376936, "grad_norm": 0.8984375, "learning_rate": 0.0017476219091443737, "loss": 2.4598, "step": 1987 }, { "epoch": 0.25444771534621785, "grad_norm": 0.66796875, "learning_rate": 0.0017473465204205746, "loss": 2.2481, "step": 1988 }, { "epoch": 0.2545757071547421, "grad_norm": 0.6875, "learning_rate": 0.0017470710032531504, "loss": 2.2061, "step": 1989 }, { "epoch": 0.25470369896326633, "grad_norm": 0.66796875, "learning_rate": 0.001746795357689453, "loss": 2.437, "step": 1990 }, { "epoch": 0.25483169077179063, "grad_norm": 0.7265625, "learning_rate": 0.001746519583776857, "loss": 2.3337, "step": 1991 }, { "epoch": 0.2549596825803149, "grad_norm": 0.69140625, "learning_rate": 0.001746243681562758, "loss": 2.7445, "step": 1992 }, { "epoch": 0.2550876743888391, "grad_norm": 0.65625, "learning_rate": 0.001745967651094575, "loss": 2.015, "step": 1993 }, { "epoch": 0.25521566619736336, "grad_norm": 0.6484375, "learning_rate": 0.001745691492419748, "loss": 1.7049, "step": 1994 }, { "epoch": 0.2553436580058876, "grad_norm": 0.78515625, "learning_rate": 0.0017454152055857396, "loss": 2.3196, "step": 1995 }, { "epoch": 0.2554716498144119, "grad_norm": 0.70703125, "learning_rate": 0.0017451387906400337, "loss": 2.328, "step": 1996 }, { "epoch": 0.25559964162293614, "grad_norm": 0.65625, "learning_rate": 0.0017448622476301367, "loss": 1.8435, "step": 1997 }, { "epoch": 0.2557276334314604, "grad_norm": 0.87109375, "learning_rate": 0.0017445855766035777, "loss": 2.4604, "step": 1998 }, { "epoch": 0.2558556252399846, "grad_norm": 0.75, "learning_rate": 0.0017443087776079064, "loss": 2.5163, "step": 1999 }, { "epoch": 0.2559836170485089, "grad_norm": 0.76171875, "learning_rate": 0.0017440318506906957, "loss": 2.378, "step": 2000 }, { "epoch": 0.25611160885703316, "grad_norm": 0.64453125, "learning_rate": 0.0017437547958995396, "loss": 1.6142, "step": 2001 }, { "epoch": 0.2562396006655574, "grad_norm": 0.796875, "learning_rate": 0.0017434776132820545, "loss": 1.9128, "step": 2002 }, { "epoch": 0.25636759247408164, "grad_norm": 0.5703125, "learning_rate": 0.0017432003028858786, "loss": 1.5662, "step": 2003 }, { "epoch": 0.25649558428260594, "grad_norm": 0.62109375, "learning_rate": 0.0017429228647586725, "loss": 2.1046, "step": 2004 }, { "epoch": 0.2566235760911302, "grad_norm": 0.87109375, "learning_rate": 0.0017426452989481186, "loss": 3.4469, "step": 2005 }, { "epoch": 0.2567515678996544, "grad_norm": 0.6484375, "learning_rate": 0.0017423676055019206, "loss": 2.3081, "step": 2006 }, { "epoch": 0.25687955970817866, "grad_norm": 1.0234375, "learning_rate": 0.001742089784467805, "loss": 1.9514, "step": 2007 }, { "epoch": 0.2570075515167029, "grad_norm": 0.67578125, "learning_rate": 0.0017418118358935196, "loss": 2.2717, "step": 2008 }, { "epoch": 0.2571355433252272, "grad_norm": 0.7890625, "learning_rate": 0.0017415337598268347, "loss": 2.0243, "step": 2009 }, { "epoch": 0.25726353513375144, "grad_norm": 0.65625, "learning_rate": 0.0017412555563155417, "loss": 1.7304, "step": 2010 }, { "epoch": 0.2573915269422757, "grad_norm": 0.703125, "learning_rate": 0.0017409772254074547, "loss": 2.1934, "step": 2011 }, { "epoch": 0.2575195187507999, "grad_norm": 0.62890625, "learning_rate": 0.0017406987671504095, "loss": 2.1479, "step": 2012 }, { "epoch": 0.2576475105593242, "grad_norm": 0.703125, "learning_rate": 0.0017404201815922633, "loss": 1.8402, "step": 2013 }, { "epoch": 0.25777550236784846, "grad_norm": 0.89453125, "learning_rate": 0.0017401414687808958, "loss": 2.8554, "step": 2014 }, { "epoch": 0.2579034941763727, "grad_norm": 0.66015625, "learning_rate": 0.0017398626287642082, "loss": 2.2097, "step": 2015 }, { "epoch": 0.25803148598489695, "grad_norm": 0.7421875, "learning_rate": 0.001739583661590124, "loss": 2.0928, "step": 2016 }, { "epoch": 0.25815947779342124, "grad_norm": 0.76171875, "learning_rate": 0.0017393045673065879, "loss": 2.405, "step": 2017 }, { "epoch": 0.2582874696019455, "grad_norm": 0.77734375, "learning_rate": 0.0017390253459615668, "loss": 1.977, "step": 2018 }, { "epoch": 0.2584154614104697, "grad_norm": 0.76953125, "learning_rate": 0.0017387459976030497, "loss": 2.4766, "step": 2019 }, { "epoch": 0.25854345321899397, "grad_norm": 0.7734375, "learning_rate": 0.0017384665222790468, "loss": 2.0874, "step": 2020 }, { "epoch": 0.25867144502751827, "grad_norm": 0.73828125, "learning_rate": 0.0017381869200375907, "loss": 1.8557, "step": 2021 }, { "epoch": 0.2587994368360425, "grad_norm": 0.71484375, "learning_rate": 0.0017379071909267353, "loss": 2.7191, "step": 2022 }, { "epoch": 0.25892742864456675, "grad_norm": 0.74609375, "learning_rate": 0.0017376273349945569, "loss": 2.3939, "step": 2023 }, { "epoch": 0.259055420453091, "grad_norm": 0.65234375, "learning_rate": 0.001737347352289153, "loss": 1.7861, "step": 2024 }, { "epoch": 0.25918341226161523, "grad_norm": 0.72265625, "learning_rate": 0.0017370672428586435, "loss": 2.1994, "step": 2025 }, { "epoch": 0.25931140407013953, "grad_norm": 0.71484375, "learning_rate": 0.0017367870067511696, "loss": 1.7971, "step": 2026 }, { "epoch": 0.25943939587866377, "grad_norm": 0.71875, "learning_rate": 0.0017365066440148942, "loss": 2.3634, "step": 2027 }, { "epoch": 0.259567387687188, "grad_norm": 0.7890625, "learning_rate": 0.0017362261546980022, "loss": 2.4356, "step": 2028 }, { "epoch": 0.25969537949571225, "grad_norm": 0.75390625, "learning_rate": 0.001735945538848701, "loss": 2.2904, "step": 2029 }, { "epoch": 0.25982337130423655, "grad_norm": 0.66796875, "learning_rate": 0.0017356647965152182, "loss": 1.8924, "step": 2030 }, { "epoch": 0.2599513631127608, "grad_norm": 0.57421875, "learning_rate": 0.0017353839277458039, "loss": 1.8996, "step": 2031 }, { "epoch": 0.26007935492128503, "grad_norm": 0.88671875, "learning_rate": 0.0017351029325887303, "loss": 1.676, "step": 2032 }, { "epoch": 0.2602073467298093, "grad_norm": 0.84765625, "learning_rate": 0.0017348218110922909, "loss": 2.3148, "step": 2033 }, { "epoch": 0.26033533853833357, "grad_norm": 0.8046875, "learning_rate": 0.001734540563304801, "loss": 2.3384, "step": 2034 }, { "epoch": 0.2604633303468578, "grad_norm": 0.6875, "learning_rate": 0.0017342591892745977, "loss": 2.6498, "step": 2035 }, { "epoch": 0.26059132215538205, "grad_norm": 0.59765625, "learning_rate": 0.001733977689050039, "loss": 1.6003, "step": 2036 }, { "epoch": 0.2607193139639063, "grad_norm": 0.60546875, "learning_rate": 0.0017336960626795062, "loss": 1.7326, "step": 2037 }, { "epoch": 0.26084730577243054, "grad_norm": 0.70703125, "learning_rate": 0.0017334143102114012, "loss": 1.8704, "step": 2038 }, { "epoch": 0.26097529758095483, "grad_norm": 0.7421875, "learning_rate": 0.0017331324316941473, "loss": 2.1407, "step": 2039 }, { "epoch": 0.2611032893894791, "grad_norm": 0.85546875, "learning_rate": 0.00173285042717619, "loss": 2.6705, "step": 2040 }, { "epoch": 0.2612312811980033, "grad_norm": 0.80078125, "learning_rate": 0.0017325682967059969, "loss": 2.7275, "step": 2041 }, { "epoch": 0.26135927300652756, "grad_norm": 0.83984375, "learning_rate": 0.0017322860403320558, "loss": 2.2856, "step": 2042 }, { "epoch": 0.26148726481505186, "grad_norm": 0.65625, "learning_rate": 0.0017320036581028777, "loss": 2.1854, "step": 2043 }, { "epoch": 0.2616152566235761, "grad_norm": 0.6015625, "learning_rate": 0.0017317211500669943, "loss": 2.0709, "step": 2044 }, { "epoch": 0.26174324843210034, "grad_norm": 0.6484375, "learning_rate": 0.0017314385162729594, "loss": 1.8443, "step": 2045 }, { "epoch": 0.2618712402406246, "grad_norm": 0.67578125, "learning_rate": 0.0017311557567693478, "loss": 2.0368, "step": 2046 }, { "epoch": 0.2619992320491489, "grad_norm": 0.60546875, "learning_rate": 0.0017308728716047565, "loss": 1.3372, "step": 2047 }, { "epoch": 0.2621272238576731, "grad_norm": 0.6953125, "learning_rate": 0.0017305898608278043, "loss": 1.7686, "step": 2048 }, { "epoch": 0.26225521566619736, "grad_norm": 0.66796875, "learning_rate": 0.001730306724487131, "loss": 1.8186, "step": 2049 }, { "epoch": 0.2623832074747216, "grad_norm": 0.6796875, "learning_rate": 0.0017300234626313978, "loss": 2.2157, "step": 2050 }, { "epoch": 0.2625111992832459, "grad_norm": 0.671875, "learning_rate": 0.0017297400753092881, "loss": 2.1314, "step": 2051 }, { "epoch": 0.26263919109177014, "grad_norm": 0.63671875, "learning_rate": 0.0017294565625695065, "loss": 1.6682, "step": 2052 }, { "epoch": 0.2627671829002944, "grad_norm": 0.7578125, "learning_rate": 0.0017291729244607796, "loss": 2.0211, "step": 2053 }, { "epoch": 0.2628951747088186, "grad_norm": 0.78125, "learning_rate": 0.0017288891610318547, "loss": 1.9789, "step": 2054 }, { "epoch": 0.26302316651734287, "grad_norm": 0.8515625, "learning_rate": 0.0017286052723315018, "loss": 2.8453, "step": 2055 }, { "epoch": 0.26315115832586716, "grad_norm": 1.140625, "learning_rate": 0.001728321258408511, "loss": 2.4635, "step": 2056 }, { "epoch": 0.2632791501343914, "grad_norm": 0.72265625, "learning_rate": 0.0017280371193116953, "loss": 1.9552, "step": 2057 }, { "epoch": 0.26340714194291565, "grad_norm": 0.7421875, "learning_rate": 0.0017277528550898884, "loss": 2.2466, "step": 2058 }, { "epoch": 0.2635351337514399, "grad_norm": 0.6796875, "learning_rate": 0.001727468465791946, "loss": 1.8553, "step": 2059 }, { "epoch": 0.2636631255599642, "grad_norm": 0.90625, "learning_rate": 0.0017271839514667446, "loss": 1.8293, "step": 2060 }, { "epoch": 0.2637911173684884, "grad_norm": 0.828125, "learning_rate": 0.0017268993121631825, "loss": 2.5633, "step": 2061 }, { "epoch": 0.26391910917701267, "grad_norm": 0.6953125, "learning_rate": 0.00172661454793018, "loss": 2.3554, "step": 2062 }, { "epoch": 0.2640471009855369, "grad_norm": 0.70703125, "learning_rate": 0.0017263296588166783, "loss": 2.4191, "step": 2063 }, { "epoch": 0.2641750927940612, "grad_norm": 0.90625, "learning_rate": 0.0017260446448716402, "loss": 2.2564, "step": 2064 }, { "epoch": 0.26430308460258545, "grad_norm": 0.76171875, "learning_rate": 0.0017257595061440502, "loss": 2.5156, "step": 2065 }, { "epoch": 0.2644310764111097, "grad_norm": 0.72265625, "learning_rate": 0.0017254742426829135, "loss": 2.6111, "step": 2066 }, { "epoch": 0.26455906821963393, "grad_norm": 0.765625, "learning_rate": 0.0017251888545372577, "loss": 2.8257, "step": 2067 }, { "epoch": 0.26468706002815817, "grad_norm": 0.80859375, "learning_rate": 0.0017249033417561308, "loss": 2.1171, "step": 2068 }, { "epoch": 0.26481505183668247, "grad_norm": 0.66015625, "learning_rate": 0.0017246177043886038, "loss": 1.7448, "step": 2069 }, { "epoch": 0.2649430436452067, "grad_norm": 0.6953125, "learning_rate": 0.0017243319424837674, "loss": 2.3069, "step": 2070 }, { "epoch": 0.26507103545373095, "grad_norm": 0.67578125, "learning_rate": 0.0017240460560907344, "loss": 2.2455, "step": 2071 }, { "epoch": 0.2651990272622552, "grad_norm": 0.828125, "learning_rate": 0.0017237600452586395, "loss": 1.6921, "step": 2072 }, { "epoch": 0.2653270190707795, "grad_norm": 0.671875, "learning_rate": 0.001723473910036638, "loss": 2.2602, "step": 2073 }, { "epoch": 0.26545501087930373, "grad_norm": 0.72265625, "learning_rate": 0.0017231876504739067, "loss": 2.0165, "step": 2074 }, { "epoch": 0.265583002687828, "grad_norm": 0.67578125, "learning_rate": 0.0017229012666196439, "loss": 2.0499, "step": 2075 }, { "epoch": 0.2657109944963522, "grad_norm": 0.703125, "learning_rate": 0.0017226147585230698, "loss": 2.1833, "step": 2076 }, { "epoch": 0.2658389863048765, "grad_norm": 0.6171875, "learning_rate": 0.0017223281262334252, "loss": 1.8366, "step": 2077 }, { "epoch": 0.26596697811340075, "grad_norm": 0.671875, "learning_rate": 0.0017220413697999724, "loss": 2.4381, "step": 2078 }, { "epoch": 0.266094969921925, "grad_norm": 0.64453125, "learning_rate": 0.0017217544892719951, "loss": 1.7741, "step": 2079 }, { "epoch": 0.26622296173044924, "grad_norm": 0.59765625, "learning_rate": 0.001721467484698799, "loss": 1.9059, "step": 2080 }, { "epoch": 0.2663509535389735, "grad_norm": 0.72265625, "learning_rate": 0.0017211803561297096, "loss": 2.3177, "step": 2081 }, { "epoch": 0.2664789453474978, "grad_norm": 0.6484375, "learning_rate": 0.001720893103614075, "loss": 1.8283, "step": 2082 }, { "epoch": 0.266606937156022, "grad_norm": 0.62890625, "learning_rate": 0.0017206057272012645, "loss": 1.8037, "step": 2083 }, { "epoch": 0.26673492896454626, "grad_norm": 0.7421875, "learning_rate": 0.001720318226940668, "loss": 2.4558, "step": 2084 }, { "epoch": 0.2668629207730705, "grad_norm": 0.6640625, "learning_rate": 0.0017200306028816975, "loss": 2.0105, "step": 2085 }, { "epoch": 0.2669909125815948, "grad_norm": 0.921875, "learning_rate": 0.0017197428550737852, "loss": 2.3929, "step": 2086 }, { "epoch": 0.26711890439011904, "grad_norm": 0.71484375, "learning_rate": 0.0017194549835663855, "loss": 1.6709, "step": 2087 }, { "epoch": 0.2672468961986433, "grad_norm": 0.73828125, "learning_rate": 0.0017191669884089743, "loss": 2.5074, "step": 2088 }, { "epoch": 0.2673748880071675, "grad_norm": 0.65625, "learning_rate": 0.0017188788696510476, "loss": 1.8727, "step": 2089 }, { "epoch": 0.2675028798156918, "grad_norm": 0.67578125, "learning_rate": 0.0017185906273421234, "loss": 1.591, "step": 2090 }, { "epoch": 0.26763087162421606, "grad_norm": 0.6875, "learning_rate": 0.0017183022615317413, "loss": 2.2147, "step": 2091 }, { "epoch": 0.2677588634327403, "grad_norm": 0.72265625, "learning_rate": 0.0017180137722694608, "loss": 1.9292, "step": 2092 }, { "epoch": 0.26788685524126454, "grad_norm": 0.76953125, "learning_rate": 0.0017177251596048645, "loss": 2.3658, "step": 2093 }, { "epoch": 0.26801484704978884, "grad_norm": 0.8359375, "learning_rate": 0.001717436423587554, "loss": 2.3886, "step": 2094 }, { "epoch": 0.2681428388583131, "grad_norm": 0.87109375, "learning_rate": 0.0017171475642671543, "loss": 2.051, "step": 2095 }, { "epoch": 0.2682708306668373, "grad_norm": 0.87109375, "learning_rate": 0.0017168585816933102, "loss": 1.7686, "step": 2096 }, { "epoch": 0.26839882247536156, "grad_norm": 0.6953125, "learning_rate": 0.0017165694759156875, "loss": 2.26, "step": 2097 }, { "epoch": 0.2685268142838858, "grad_norm": 0.875, "learning_rate": 0.0017162802469839744, "loss": 2.0321, "step": 2098 }, { "epoch": 0.2686548060924101, "grad_norm": 0.64453125, "learning_rate": 0.0017159908949478796, "loss": 2.2058, "step": 2099 }, { "epoch": 0.26878279790093434, "grad_norm": 0.80859375, "learning_rate": 0.0017157014198571328, "loss": 2.7688, "step": 2100 }, { "epoch": 0.2689107897094586, "grad_norm": 0.73046875, "learning_rate": 0.0017154118217614844, "loss": 2.7215, "step": 2101 }, { "epoch": 0.26903878151798283, "grad_norm": 0.72265625, "learning_rate": 0.0017151221007107075, "loss": 2.1338, "step": 2102 }, { "epoch": 0.2691667733265071, "grad_norm": 0.6796875, "learning_rate": 0.0017148322567545947, "loss": 2.135, "step": 2103 }, { "epoch": 0.26929476513503137, "grad_norm": 0.62109375, "learning_rate": 0.0017145422899429606, "loss": 1.3878, "step": 2104 }, { "epoch": 0.2694227569435556, "grad_norm": 0.84765625, "learning_rate": 0.001714252200325641, "loss": 2.9014, "step": 2105 }, { "epoch": 0.26955074875207985, "grad_norm": 0.65234375, "learning_rate": 0.0017139619879524917, "loss": 2.1395, "step": 2106 }, { "epoch": 0.26967874056060415, "grad_norm": 0.71484375, "learning_rate": 0.0017136716528733914, "loss": 1.8354, "step": 2107 }, { "epoch": 0.2698067323691284, "grad_norm": 0.91796875, "learning_rate": 0.0017133811951382382, "loss": 1.9129, "step": 2108 }, { "epoch": 0.26993472417765263, "grad_norm": 0.6328125, "learning_rate": 0.0017130906147969519, "loss": 1.9945, "step": 2109 }, { "epoch": 0.27006271598617687, "grad_norm": 0.76953125, "learning_rate": 0.0017127999118994742, "loss": 1.7722, "step": 2110 }, { "epoch": 0.2701907077947011, "grad_norm": 0.7109375, "learning_rate": 0.0017125090864957661, "loss": 2.0798, "step": 2111 }, { "epoch": 0.2703186996032254, "grad_norm": 0.6484375, "learning_rate": 0.0017122181386358118, "loss": 1.4662, "step": 2112 }, { "epoch": 0.27044669141174965, "grad_norm": 0.640625, "learning_rate": 0.0017119270683696142, "loss": 2.2357, "step": 2113 }, { "epoch": 0.2705746832202739, "grad_norm": 0.703125, "learning_rate": 0.0017116358757471996, "loss": 1.9167, "step": 2114 }, { "epoch": 0.27070267502879813, "grad_norm": 0.7109375, "learning_rate": 0.0017113445608186134, "loss": 2.4054, "step": 2115 }, { "epoch": 0.27083066683732243, "grad_norm": 0.73828125, "learning_rate": 0.001711053123633923, "loss": 1.6191, "step": 2116 }, { "epoch": 0.2709586586458467, "grad_norm": 0.80078125, "learning_rate": 0.0017107615642432167, "loss": 2.2597, "step": 2117 }, { "epoch": 0.2710866504543709, "grad_norm": 0.65234375, "learning_rate": 0.0017104698826966036, "loss": 2.0519, "step": 2118 }, { "epoch": 0.27121464226289516, "grad_norm": 0.87109375, "learning_rate": 0.0017101780790442138, "loss": 2.2515, "step": 2119 }, { "epoch": 0.27134263407141945, "grad_norm": 0.765625, "learning_rate": 0.0017098861533361989, "loss": 2.0212, "step": 2120 }, { "epoch": 0.2714706258799437, "grad_norm": 0.80859375, "learning_rate": 0.0017095941056227307, "loss": 2.0456, "step": 2121 }, { "epoch": 0.27159861768846794, "grad_norm": 0.65625, "learning_rate": 0.0017093019359540025, "loss": 2.2132, "step": 2122 }, { "epoch": 0.2717266094969922, "grad_norm": 0.70703125, "learning_rate": 0.0017090096443802283, "loss": 2.5875, "step": 2123 }, { "epoch": 0.2718546013055165, "grad_norm": 0.58984375, "learning_rate": 0.0017087172309516432, "loss": 1.9798, "step": 2124 }, { "epoch": 0.2719825931140407, "grad_norm": 0.71484375, "learning_rate": 0.0017084246957185035, "loss": 2.8135, "step": 2125 }, { "epoch": 0.27211058492256496, "grad_norm": 0.59375, "learning_rate": 0.0017081320387310854, "loss": 1.9758, "step": 2126 }, { "epoch": 0.2722385767310892, "grad_norm": 0.63671875, "learning_rate": 0.0017078392600396875, "loss": 1.7119, "step": 2127 }, { "epoch": 0.27236656853961344, "grad_norm": 0.7109375, "learning_rate": 0.0017075463596946281, "loss": 1.6204, "step": 2128 }, { "epoch": 0.27249456034813774, "grad_norm": 0.875, "learning_rate": 0.001707253337746247, "loss": 2.6273, "step": 2129 }, { "epoch": 0.272622552156662, "grad_norm": 0.6640625, "learning_rate": 0.0017069601942449047, "loss": 2.4039, "step": 2130 }, { "epoch": 0.2727505439651862, "grad_norm": 0.76171875, "learning_rate": 0.001706666929240983, "loss": 1.4214, "step": 2131 }, { "epoch": 0.27287853577371046, "grad_norm": 0.671875, "learning_rate": 0.0017063735427848836, "loss": 2.1941, "step": 2132 }, { "epoch": 0.27300652758223476, "grad_norm": 0.75390625, "learning_rate": 0.0017060800349270307, "loss": 1.7908, "step": 2133 }, { "epoch": 0.273134519390759, "grad_norm": 0.69921875, "learning_rate": 0.0017057864057178672, "loss": 1.914, "step": 2134 }, { "epoch": 0.27326251119928324, "grad_norm": 0.6640625, "learning_rate": 0.0017054926552078592, "loss": 1.9833, "step": 2135 }, { "epoch": 0.2733905030078075, "grad_norm": 0.8515625, "learning_rate": 0.0017051987834474916, "loss": 2.7316, "step": 2136 }, { "epoch": 0.2735184948163318, "grad_norm": 0.640625, "learning_rate": 0.0017049047904872715, "loss": 1.7264, "step": 2137 }, { "epoch": 0.273646486624856, "grad_norm": 0.7109375, "learning_rate": 0.0017046106763777265, "loss": 1.7297, "step": 2138 }, { "epoch": 0.27377447843338026, "grad_norm": 0.8125, "learning_rate": 0.0017043164411694042, "loss": 1.8993, "step": 2139 }, { "epoch": 0.2739024702419045, "grad_norm": 0.66015625, "learning_rate": 0.0017040220849128742, "loss": 2.0728, "step": 2140 }, { "epoch": 0.27403046205042875, "grad_norm": 0.5546875, "learning_rate": 0.0017037276076587267, "loss": 1.5208, "step": 2141 }, { "epoch": 0.27415845385895304, "grad_norm": 0.640625, "learning_rate": 0.0017034330094575715, "loss": 2.0964, "step": 2142 }, { "epoch": 0.2742864456674773, "grad_norm": 1.078125, "learning_rate": 0.001703138290360041, "loss": 1.6441, "step": 2143 }, { "epoch": 0.2744144374760015, "grad_norm": 0.8359375, "learning_rate": 0.0017028434504167869, "loss": 2.101, "step": 2144 }, { "epoch": 0.27454242928452577, "grad_norm": 0.69140625, "learning_rate": 0.0017025484896784824, "loss": 1.808, "step": 2145 }, { "epoch": 0.27467042109305007, "grad_norm": 0.83203125, "learning_rate": 0.001702253408195821, "loss": 2.0066, "step": 2146 }, { "epoch": 0.2747984129015743, "grad_norm": 0.671875, "learning_rate": 0.0017019582060195178, "loss": 2.3131, "step": 2147 }, { "epoch": 0.27492640471009855, "grad_norm": 0.7734375, "learning_rate": 0.0017016628832003079, "loss": 2.5719, "step": 2148 }, { "epoch": 0.2750543965186228, "grad_norm": 0.73046875, "learning_rate": 0.0017013674397889469, "loss": 2.2379, "step": 2149 }, { "epoch": 0.2751823883271471, "grad_norm": 0.71484375, "learning_rate": 0.0017010718758362123, "loss": 2.2928, "step": 2150 }, { "epoch": 0.27531038013567133, "grad_norm": 0.62109375, "learning_rate": 0.0017007761913929004, "loss": 1.627, "step": 2151 }, { "epoch": 0.27543837194419557, "grad_norm": 0.48828125, "learning_rate": 0.0017004803865098305, "loss": 1.2555, "step": 2152 }, { "epoch": 0.2755663637527198, "grad_norm": 0.72265625, "learning_rate": 0.0017001844612378415, "loss": 2.4845, "step": 2153 }, { "epoch": 0.27569435556124405, "grad_norm": 0.73828125, "learning_rate": 0.0016998884156277918, "loss": 2.2966, "step": 2154 }, { "epoch": 0.27582234736976835, "grad_norm": 0.83984375, "learning_rate": 0.0016995922497305628, "loss": 2.2351, "step": 2155 }, { "epoch": 0.2759503391782926, "grad_norm": 0.83203125, "learning_rate": 0.0016992959635970548, "loss": 2.2393, "step": 2156 }, { "epoch": 0.27607833098681683, "grad_norm": 1.0546875, "learning_rate": 0.0016989995572781896, "loss": 2.0211, "step": 2157 }, { "epoch": 0.2762063227953411, "grad_norm": 0.8203125, "learning_rate": 0.0016987030308249094, "loss": 2.1111, "step": 2158 }, { "epoch": 0.27633431460386537, "grad_norm": 0.71875, "learning_rate": 0.0016984063842881768, "loss": 1.897, "step": 2159 }, { "epoch": 0.2764623064123896, "grad_norm": 0.85546875, "learning_rate": 0.001698109617718976, "loss": 2.2141, "step": 2160 }, { "epoch": 0.27659029822091385, "grad_norm": 0.65625, "learning_rate": 0.0016978127311683103, "loss": 1.8505, "step": 2161 }, { "epoch": 0.2767182900294381, "grad_norm": 0.70703125, "learning_rate": 0.0016975157246872051, "loss": 2.6878, "step": 2162 }, { "epoch": 0.2768462818379624, "grad_norm": 0.7578125, "learning_rate": 0.0016972185983267056, "loss": 2.4146, "step": 2163 }, { "epoch": 0.27697427364648664, "grad_norm": 0.62109375, "learning_rate": 0.0016969213521378778, "loss": 1.8581, "step": 2164 }, { "epoch": 0.2771022654550109, "grad_norm": 0.8359375, "learning_rate": 0.0016966239861718082, "loss": 2.5136, "step": 2165 }, { "epoch": 0.2772302572635351, "grad_norm": 0.546875, "learning_rate": 0.0016963265004796043, "loss": 1.6245, "step": 2166 }, { "epoch": 0.2773582490720594, "grad_norm": 0.78515625, "learning_rate": 0.0016960288951123933, "loss": 2.2333, "step": 2167 }, { "epoch": 0.27748624088058366, "grad_norm": 0.69140625, "learning_rate": 0.0016957311701213235, "loss": 2.2131, "step": 2168 }, { "epoch": 0.2776142326891079, "grad_norm": 0.6796875, "learning_rate": 0.0016954333255575645, "loss": 1.6567, "step": 2169 }, { "epoch": 0.27774222449763214, "grad_norm": 0.6171875, "learning_rate": 0.0016951353614723052, "loss": 2.1451, "step": 2170 }, { "epoch": 0.2778702163061564, "grad_norm": 1.1015625, "learning_rate": 0.0016948372779167556, "loss": 1.6597, "step": 2171 }, { "epoch": 0.2779982081146807, "grad_norm": 0.65625, "learning_rate": 0.0016945390749421461, "loss": 2.5175, "step": 2172 }, { "epoch": 0.2781261999232049, "grad_norm": 0.7421875, "learning_rate": 0.0016942407525997276, "loss": 1.9936, "step": 2173 }, { "epoch": 0.27825419173172916, "grad_norm": 0.69140625, "learning_rate": 0.0016939423109407724, "loss": 2.2665, "step": 2174 }, { "epoch": 0.2783821835402534, "grad_norm": 0.625, "learning_rate": 0.0016936437500165717, "loss": 1.833, "step": 2175 }, { "epoch": 0.2785101753487777, "grad_norm": 0.703125, "learning_rate": 0.0016933450698784385, "loss": 2.2214, "step": 2176 }, { "epoch": 0.27863816715730194, "grad_norm": 0.76171875, "learning_rate": 0.0016930462705777053, "loss": 2.2661, "step": 2177 }, { "epoch": 0.2787661589658262, "grad_norm": 0.63671875, "learning_rate": 0.0016927473521657266, "loss": 1.7815, "step": 2178 }, { "epoch": 0.2788941507743504, "grad_norm": 0.65625, "learning_rate": 0.0016924483146938756, "loss": 2.374, "step": 2179 }, { "epoch": 0.2790221425828747, "grad_norm": 0.69140625, "learning_rate": 0.0016921491582135469, "loss": 2.2158, "step": 2180 }, { "epoch": 0.27915013439139896, "grad_norm": 0.82421875, "learning_rate": 0.0016918498827761552, "loss": 2.3447, "step": 2181 }, { "epoch": 0.2792781261999232, "grad_norm": 1.0859375, "learning_rate": 0.0016915504884331364, "loss": 2.7944, "step": 2182 }, { "epoch": 0.27940611800844745, "grad_norm": 0.71875, "learning_rate": 0.0016912509752359454, "loss": 2.5773, "step": 2183 }, { "epoch": 0.2795341098169717, "grad_norm": 0.84765625, "learning_rate": 0.0016909513432360594, "loss": 1.7594, "step": 2184 }, { "epoch": 0.279662101625496, "grad_norm": 0.625, "learning_rate": 0.0016906515924849743, "loss": 1.699, "step": 2185 }, { "epoch": 0.2797900934340202, "grad_norm": 0.6953125, "learning_rate": 0.0016903517230342077, "loss": 1.9426, "step": 2186 }, { "epoch": 0.27991808524254447, "grad_norm": 0.67578125, "learning_rate": 0.0016900517349352961, "loss": 2.0168, "step": 2187 }, { "epoch": 0.2800460770510687, "grad_norm": 0.95703125, "learning_rate": 0.0016897516282397983, "loss": 2.2125, "step": 2188 }, { "epoch": 0.280174068859593, "grad_norm": 0.58984375, "learning_rate": 0.0016894514029992922, "loss": 1.9918, "step": 2189 }, { "epoch": 0.28030206066811725, "grad_norm": 0.69921875, "learning_rate": 0.0016891510592653763, "loss": 1.6391, "step": 2190 }, { "epoch": 0.2804300524766415, "grad_norm": 0.71484375, "learning_rate": 0.0016888505970896696, "loss": 2.5625, "step": 2191 }, { "epoch": 0.28055804428516573, "grad_norm": 0.7109375, "learning_rate": 0.001688550016523811, "loss": 2.9778, "step": 2192 }, { "epoch": 0.28068603609369003, "grad_norm": 0.6484375, "learning_rate": 0.0016882493176194609, "loss": 2.677, "step": 2193 }, { "epoch": 0.28081402790221427, "grad_norm": 0.60546875, "learning_rate": 0.0016879485004282987, "loss": 1.5124, "step": 2194 }, { "epoch": 0.2809420197107385, "grad_norm": 0.70703125, "learning_rate": 0.0016876475650020248, "loss": 2.4775, "step": 2195 }, { "epoch": 0.28107001151926275, "grad_norm": 0.703125, "learning_rate": 0.00168734651139236, "loss": 1.8563, "step": 2196 }, { "epoch": 0.281198003327787, "grad_norm": 0.60546875, "learning_rate": 0.0016870453396510454, "loss": 1.1608, "step": 2197 }, { "epoch": 0.2813259951363113, "grad_norm": 0.68359375, "learning_rate": 0.0016867440498298419, "loss": 2.3063, "step": 2198 }, { "epoch": 0.28145398694483553, "grad_norm": 0.78515625, "learning_rate": 0.001686442641980531, "loss": 1.6122, "step": 2199 }, { "epoch": 0.2815819787533598, "grad_norm": 0.6484375, "learning_rate": 0.0016861411161549153, "loss": 2.436, "step": 2200 }, { "epoch": 0.281709970561884, "grad_norm": 0.67578125, "learning_rate": 0.0016858394724048155, "loss": 2.1263, "step": 2201 }, { "epoch": 0.2818379623704083, "grad_norm": 0.640625, "learning_rate": 0.0016855377107820755, "loss": 2.4547, "step": 2202 }, { "epoch": 0.28196595417893255, "grad_norm": 0.62890625, "learning_rate": 0.0016852358313385568, "loss": 1.8633, "step": 2203 }, { "epoch": 0.2820939459874568, "grad_norm": 0.703125, "learning_rate": 0.001684933834126143, "loss": 2.0092, "step": 2204 }, { "epoch": 0.28222193779598104, "grad_norm": 0.671875, "learning_rate": 0.0016846317191967365, "loss": 2.0841, "step": 2205 }, { "epoch": 0.28234992960450533, "grad_norm": 0.65234375, "learning_rate": 0.0016843294866022611, "loss": 2.2062, "step": 2206 }, { "epoch": 0.2824779214130296, "grad_norm": 0.6875, "learning_rate": 0.0016840271363946606, "loss": 1.8663, "step": 2207 }, { "epoch": 0.2826059132215538, "grad_norm": 0.69140625, "learning_rate": 0.0016837246686258987, "loss": 1.8661, "step": 2208 }, { "epoch": 0.28273390503007806, "grad_norm": 0.625, "learning_rate": 0.001683422083347959, "loss": 1.7528, "step": 2209 }, { "epoch": 0.28286189683860236, "grad_norm": 0.62890625, "learning_rate": 0.0016831193806128458, "loss": 1.7468, "step": 2210 }, { "epoch": 0.2829898886471266, "grad_norm": 0.671875, "learning_rate": 0.0016828165604725839, "loss": 1.9191, "step": 2211 }, { "epoch": 0.28311788045565084, "grad_norm": 1.046875, "learning_rate": 0.0016825136229792173, "loss": 2.3941, "step": 2212 }, { "epoch": 0.2832458722641751, "grad_norm": 0.72265625, "learning_rate": 0.001682210568184811, "loss": 2.3444, "step": 2213 }, { "epoch": 0.2833738640726993, "grad_norm": 0.8125, "learning_rate": 0.0016819073961414498, "loss": 1.9588, "step": 2214 }, { "epoch": 0.2835018558812236, "grad_norm": 0.69921875, "learning_rate": 0.001681604106901239, "loss": 1.7908, "step": 2215 }, { "epoch": 0.28362984768974786, "grad_norm": 0.640625, "learning_rate": 0.0016813007005163035, "loss": 1.5282, "step": 2216 }, { "epoch": 0.2837578394982721, "grad_norm": 0.53125, "learning_rate": 0.0016809971770387886, "loss": 1.4676, "step": 2217 }, { "epoch": 0.28388583130679634, "grad_norm": 0.66796875, "learning_rate": 0.0016806935365208597, "loss": 2.1402, "step": 2218 }, { "epoch": 0.28401382311532064, "grad_norm": 0.95703125, "learning_rate": 0.0016803897790147024, "loss": 2.2143, "step": 2219 }, { "epoch": 0.2841418149238449, "grad_norm": 0.67578125, "learning_rate": 0.0016800859045725228, "loss": 1.8457, "step": 2220 }, { "epoch": 0.2842698067323691, "grad_norm": 0.60546875, "learning_rate": 0.001679781913246546, "loss": 1.6046, "step": 2221 }, { "epoch": 0.28439779854089337, "grad_norm": 0.6796875, "learning_rate": 0.0016794778050890184, "loss": 1.6472, "step": 2222 }, { "epoch": 0.28452579034941766, "grad_norm": 0.76953125, "learning_rate": 0.0016791735801522055, "loss": 2.4384, "step": 2223 }, { "epoch": 0.2846537821579419, "grad_norm": 0.625, "learning_rate": 0.0016788692384883932, "loss": 1.8868, "step": 2224 }, { "epoch": 0.28478177396646615, "grad_norm": 0.78125, "learning_rate": 0.001678564780149888, "loss": 2.2071, "step": 2225 }, { "epoch": 0.2849097657749904, "grad_norm": 0.8671875, "learning_rate": 0.0016782602051890157, "loss": 2.0174, "step": 2226 }, { "epoch": 0.28503775758351463, "grad_norm": 0.80859375, "learning_rate": 0.0016779555136581226, "loss": 1.6741, "step": 2227 }, { "epoch": 0.2851657493920389, "grad_norm": 0.703125, "learning_rate": 0.0016776507056095746, "loss": 1.7213, "step": 2228 }, { "epoch": 0.28529374120056317, "grad_norm": 0.82421875, "learning_rate": 0.0016773457810957582, "loss": 1.9, "step": 2229 }, { "epoch": 0.2854217330090874, "grad_norm": 0.75390625, "learning_rate": 0.0016770407401690798, "loss": 2.2314, "step": 2230 }, { "epoch": 0.28554972481761165, "grad_norm": 0.57421875, "learning_rate": 0.0016767355828819653, "loss": 1.5668, "step": 2231 }, { "epoch": 0.28567771662613595, "grad_norm": 0.62890625, "learning_rate": 0.001676430309286861, "loss": 1.6114, "step": 2232 }, { "epoch": 0.2858057084346602, "grad_norm": 0.68359375, "learning_rate": 0.0016761249194362329, "loss": 2.0352, "step": 2233 }, { "epoch": 0.28593370024318443, "grad_norm": 0.73046875, "learning_rate": 0.0016758194133825676, "loss": 1.8338, "step": 2234 }, { "epoch": 0.28606169205170867, "grad_norm": 0.63671875, "learning_rate": 0.001675513791178371, "loss": 1.9193, "step": 2235 }, { "epoch": 0.28618968386023297, "grad_norm": 0.67578125, "learning_rate": 0.0016752080528761695, "loss": 1.3698, "step": 2236 }, { "epoch": 0.2863176756687572, "grad_norm": 0.69140625, "learning_rate": 0.001674902198528509, "loss": 1.6389, "step": 2237 }, { "epoch": 0.28644566747728145, "grad_norm": 0.7890625, "learning_rate": 0.0016745962281879555, "loss": 2.198, "step": 2238 }, { "epoch": 0.2865736592858057, "grad_norm": 0.96484375, "learning_rate": 0.001674290141907095, "loss": 2.5121, "step": 2239 }, { "epoch": 0.28670165109433, "grad_norm": 0.65625, "learning_rate": 0.0016739839397385335, "loss": 2.536, "step": 2240 }, { "epoch": 0.28682964290285423, "grad_norm": 0.7265625, "learning_rate": 0.0016736776217348966, "loss": 2.4081, "step": 2241 }, { "epoch": 0.2869576347113785, "grad_norm": 0.7265625, "learning_rate": 0.0016733711879488302, "loss": 1.7229, "step": 2242 }, { "epoch": 0.2870856265199027, "grad_norm": 0.63671875, "learning_rate": 0.001673064638433, "loss": 2.0983, "step": 2243 }, { "epoch": 0.28721361832842696, "grad_norm": 0.6796875, "learning_rate": 0.0016727579732400911, "loss": 1.489, "step": 2244 }, { "epoch": 0.28734161013695125, "grad_norm": 0.67578125, "learning_rate": 0.0016724511924228094, "loss": 1.8602, "step": 2245 }, { "epoch": 0.2874696019454755, "grad_norm": 0.5703125, "learning_rate": 0.00167214429603388, "loss": 1.3103, "step": 2246 }, { "epoch": 0.28759759375399974, "grad_norm": 0.640625, "learning_rate": 0.0016718372841260476, "loss": 1.9089, "step": 2247 }, { "epoch": 0.287725585562524, "grad_norm": 0.81640625, "learning_rate": 0.0016715301567520777, "loss": 1.7999, "step": 2248 }, { "epoch": 0.2878535773710483, "grad_norm": 0.94921875, "learning_rate": 0.001671222913964755, "loss": 2.4093, "step": 2249 }, { "epoch": 0.2879815691795725, "grad_norm": 0.78125, "learning_rate": 0.0016709155558168843, "loss": 2.1312, "step": 2250 }, { "epoch": 0.28810956098809676, "grad_norm": 0.60546875, "learning_rate": 0.0016706080823612895, "loss": 1.8269, "step": 2251 }, { "epoch": 0.288237552796621, "grad_norm": 0.96875, "learning_rate": 0.0016703004936508155, "loss": 2.5678, "step": 2252 }, { "epoch": 0.2883655446051453, "grad_norm": 0.703125, "learning_rate": 0.001669992789738326, "loss": 2.1799, "step": 2253 }, { "epoch": 0.28849353641366954, "grad_norm": 0.875, "learning_rate": 0.0016696849706767055, "loss": 2.1193, "step": 2254 }, { "epoch": 0.2886215282221938, "grad_norm": 0.7734375, "learning_rate": 0.0016693770365188573, "loss": 1.6492, "step": 2255 }, { "epoch": 0.288749520030718, "grad_norm": 0.5859375, "learning_rate": 0.0016690689873177043, "loss": 1.766, "step": 2256 }, { "epoch": 0.28887751183924226, "grad_norm": 0.65625, "learning_rate": 0.0016687608231261907, "loss": 1.9411, "step": 2257 }, { "epoch": 0.28900550364776656, "grad_norm": 0.70703125, "learning_rate": 0.0016684525439972793, "loss": 2.3292, "step": 2258 }, { "epoch": 0.2891334954562908, "grad_norm": 0.63671875, "learning_rate": 0.0016681441499839527, "loss": 1.2673, "step": 2259 }, { "epoch": 0.28926148726481504, "grad_norm": 0.765625, "learning_rate": 0.0016678356411392132, "loss": 1.8628, "step": 2260 }, { "epoch": 0.2893894790733393, "grad_norm": 0.68359375, "learning_rate": 0.0016675270175160834, "loss": 2.0259, "step": 2261 }, { "epoch": 0.2895174708818636, "grad_norm": 0.82421875, "learning_rate": 0.0016672182791676055, "loss": 2.522, "step": 2262 }, { "epoch": 0.2896454626903878, "grad_norm": 0.6953125, "learning_rate": 0.0016669094261468405, "loss": 2.7048, "step": 2263 }, { "epoch": 0.28977345449891206, "grad_norm": 0.70703125, "learning_rate": 0.0016666004585068703, "loss": 1.595, "step": 2264 }, { "epoch": 0.2899014463074363, "grad_norm": 0.578125, "learning_rate": 0.0016662913763007962, "loss": 1.6943, "step": 2265 }, { "epoch": 0.2900294381159606, "grad_norm": 0.6484375, "learning_rate": 0.001665982179581738, "loss": 1.8181, "step": 2266 }, { "epoch": 0.29015742992448484, "grad_norm": 0.7421875, "learning_rate": 0.001665672868402837, "loss": 2.3454, "step": 2267 }, { "epoch": 0.2902854217330091, "grad_norm": 0.7421875, "learning_rate": 0.0016653634428172538, "loss": 1.8072, "step": 2268 }, { "epoch": 0.2904134135415333, "grad_norm": 0.6875, "learning_rate": 0.0016650539028781669, "loss": 2.2904, "step": 2269 }, { "epoch": 0.29054140535005757, "grad_norm": 0.6328125, "learning_rate": 0.001664744248638777, "loss": 1.7903, "step": 2270 }, { "epoch": 0.29066939715858187, "grad_norm": 0.66015625, "learning_rate": 0.0016644344801523018, "loss": 2.4759, "step": 2271 }, { "epoch": 0.2907973889671061, "grad_norm": 0.828125, "learning_rate": 0.0016641245974719817, "loss": 2.28, "step": 2272 }, { "epoch": 0.29092538077563035, "grad_norm": 0.7734375, "learning_rate": 0.0016638146006510742, "loss": 2.1623, "step": 2273 }, { "epoch": 0.2910533725841546, "grad_norm": 0.8203125, "learning_rate": 0.001663504489742857, "loss": 1.7928, "step": 2274 }, { "epoch": 0.2911813643926789, "grad_norm": 0.69921875, "learning_rate": 0.0016631942648006283, "loss": 2.806, "step": 2275 }, { "epoch": 0.29130935620120313, "grad_norm": 0.72265625, "learning_rate": 0.0016628839258777046, "loss": 2.6083, "step": 2276 }, { "epoch": 0.29143734800972737, "grad_norm": 0.82421875, "learning_rate": 0.0016625734730274235, "loss": 1.6767, "step": 2277 }, { "epoch": 0.2915653398182516, "grad_norm": 0.62109375, "learning_rate": 0.0016622629063031411, "loss": 1.733, "step": 2278 }, { "epoch": 0.2916933316267759, "grad_norm": 0.6640625, "learning_rate": 0.0016619522257582328, "loss": 2.1218, "step": 2279 }, { "epoch": 0.29182132343530015, "grad_norm": 0.7265625, "learning_rate": 0.0016616414314460947, "loss": 1.5823, "step": 2280 }, { "epoch": 0.2919493152438244, "grad_norm": 0.7421875, "learning_rate": 0.001661330523420142, "loss": 2.4062, "step": 2281 }, { "epoch": 0.29207730705234863, "grad_norm": 0.67578125, "learning_rate": 0.0016610195017338083, "loss": 1.9095, "step": 2282 }, { "epoch": 0.29220529886087293, "grad_norm": 0.64453125, "learning_rate": 0.0016607083664405484, "loss": 1.8142, "step": 2283 }, { "epoch": 0.29233329066939717, "grad_norm": 0.83203125, "learning_rate": 0.0016603971175938362, "loss": 2.1675, "step": 2284 }, { "epoch": 0.2924612824779214, "grad_norm": 0.91015625, "learning_rate": 0.0016600857552471645, "loss": 2.121, "step": 2285 }, { "epoch": 0.29258927428644566, "grad_norm": 0.609375, "learning_rate": 0.001659774279454046, "loss": 1.4831, "step": 2286 }, { "epoch": 0.2927172660949699, "grad_norm": 0.7421875, "learning_rate": 0.0016594626902680127, "loss": 2.33, "step": 2287 }, { "epoch": 0.2928452579034942, "grad_norm": 0.6796875, "learning_rate": 0.0016591509877426167, "loss": 1.6101, "step": 2288 }, { "epoch": 0.29297324971201844, "grad_norm": 0.80078125, "learning_rate": 0.0016588391719314286, "loss": 1.8637, "step": 2289 }, { "epoch": 0.2931012415205427, "grad_norm": 0.5703125, "learning_rate": 0.00165852724288804, "loss": 1.3932, "step": 2290 }, { "epoch": 0.2932292333290669, "grad_norm": 0.703125, "learning_rate": 0.0016582152006660595, "loss": 2.0144, "step": 2291 }, { "epoch": 0.2933572251375912, "grad_norm": 0.84375, "learning_rate": 0.0016579030453191177, "loss": 2.4574, "step": 2292 }, { "epoch": 0.29348521694611546, "grad_norm": 0.7109375, "learning_rate": 0.001657590776900863, "loss": 2.0244, "step": 2293 }, { "epoch": 0.2936132087546397, "grad_norm": 0.75, "learning_rate": 0.0016572783954649643, "loss": 2.1161, "step": 2294 }, { "epoch": 0.29374120056316394, "grad_norm": 0.79296875, "learning_rate": 0.0016569659010651093, "loss": 2.3055, "step": 2295 }, { "epoch": 0.29386919237168824, "grad_norm": 0.6484375, "learning_rate": 0.0016566532937550048, "loss": 1.7893, "step": 2296 }, { "epoch": 0.2939971841802125, "grad_norm": 0.640625, "learning_rate": 0.0016563405735883778, "loss": 1.7875, "step": 2297 }, { "epoch": 0.2941251759887367, "grad_norm": 0.69140625, "learning_rate": 0.0016560277406189742, "loss": 1.791, "step": 2298 }, { "epoch": 0.29425316779726096, "grad_norm": 0.78125, "learning_rate": 0.0016557147949005596, "loss": 1.9242, "step": 2299 }, { "epoch": 0.2943811596057852, "grad_norm": 0.73046875, "learning_rate": 0.0016554017364869185, "loss": 1.9023, "step": 2300 }, { "epoch": 0.2945091514143095, "grad_norm": 0.703125, "learning_rate": 0.0016550885654318555, "loss": 2.4082, "step": 2301 }, { "epoch": 0.29463714322283374, "grad_norm": 0.69140625, "learning_rate": 0.0016547752817891936, "loss": 1.8531, "step": 2302 }, { "epoch": 0.294765135031358, "grad_norm": 0.68359375, "learning_rate": 0.0016544618856127761, "loss": 1.8782, "step": 2303 }, { "epoch": 0.2948931268398822, "grad_norm": 0.7109375, "learning_rate": 0.0016541483769564654, "loss": 1.7767, "step": 2304 }, { "epoch": 0.2950211186484065, "grad_norm": 0.8046875, "learning_rate": 0.0016538347558741422, "loss": 2.0353, "step": 2305 }, { "epoch": 0.29514911045693076, "grad_norm": 0.7109375, "learning_rate": 0.001653521022419708, "loss": 2.4879, "step": 2306 }, { "epoch": 0.295277102265455, "grad_norm": 0.62109375, "learning_rate": 0.001653207176647083, "loss": 2.1444, "step": 2307 }, { "epoch": 0.29540509407397925, "grad_norm": 0.65234375, "learning_rate": 0.0016528932186102067, "loss": 2.059, "step": 2308 }, { "epoch": 0.29553308588250354, "grad_norm": 0.7421875, "learning_rate": 0.0016525791483630375, "loss": 2.133, "step": 2309 }, { "epoch": 0.2956610776910278, "grad_norm": 0.62890625, "learning_rate": 0.001652264965959554, "loss": 1.8686, "step": 2310 }, { "epoch": 0.295789069499552, "grad_norm": 0.8671875, "learning_rate": 0.001651950671453753, "loss": 2.5475, "step": 2311 }, { "epoch": 0.29591706130807627, "grad_norm": 0.68359375, "learning_rate": 0.0016516362648996514, "loss": 2.5135, "step": 2312 }, { "epoch": 0.29604505311660056, "grad_norm": 0.77734375, "learning_rate": 0.0016513217463512851, "loss": 2.1897, "step": 2313 }, { "epoch": 0.2961730449251248, "grad_norm": 0.55078125, "learning_rate": 0.0016510071158627089, "loss": 1.9733, "step": 2314 }, { "epoch": 0.29630103673364905, "grad_norm": 0.66015625, "learning_rate": 0.0016506923734879977, "loss": 1.6965, "step": 2315 }, { "epoch": 0.2964290285421733, "grad_norm": 1.7421875, "learning_rate": 0.0016503775192812447, "loss": 2.1632, "step": 2316 }, { "epoch": 0.29655702035069753, "grad_norm": 0.8125, "learning_rate": 0.001650062553296563, "loss": 2.12, "step": 2317 }, { "epoch": 0.29668501215922183, "grad_norm": 0.67578125, "learning_rate": 0.0016497474755880841, "loss": 1.7067, "step": 2318 }, { "epoch": 0.29681300396774607, "grad_norm": 0.7421875, "learning_rate": 0.0016494322862099598, "loss": 2.3262, "step": 2319 }, { "epoch": 0.2969409957762703, "grad_norm": 0.54296875, "learning_rate": 0.0016491169852163598, "loss": 1.3074, "step": 2320 }, { "epoch": 0.29706898758479455, "grad_norm": 0.65234375, "learning_rate": 0.0016488015726614745, "loss": 2.0108, "step": 2321 }, { "epoch": 0.29719697939331885, "grad_norm": 0.6015625, "learning_rate": 0.0016484860485995122, "loss": 1.7387, "step": 2322 }, { "epoch": 0.2973249712018431, "grad_norm": 0.75390625, "learning_rate": 0.001648170413084701, "loss": 2.0087, "step": 2323 }, { "epoch": 0.29745296301036733, "grad_norm": 0.83203125, "learning_rate": 0.0016478546661712876, "loss": 1.9789, "step": 2324 }, { "epoch": 0.2975809548188916, "grad_norm": 0.6953125, "learning_rate": 0.0016475388079135386, "loss": 2.2509, "step": 2325 }, { "epoch": 0.29770894662741587, "grad_norm": 0.73828125, "learning_rate": 0.0016472228383657397, "loss": 2.175, "step": 2326 }, { "epoch": 0.2978369384359401, "grad_norm": 0.828125, "learning_rate": 0.0016469067575821945, "loss": 2.0688, "step": 2327 }, { "epoch": 0.29796493024446435, "grad_norm": 0.88671875, "learning_rate": 0.0016465905656172276, "loss": 1.6159, "step": 2328 }, { "epoch": 0.2980929220529886, "grad_norm": 0.65234375, "learning_rate": 0.0016462742625251807, "loss": 2.6066, "step": 2329 }, { "epoch": 0.29822091386151284, "grad_norm": 0.80078125, "learning_rate": 0.0016459578483604168, "loss": 1.7553, "step": 2330 }, { "epoch": 0.29834890567003713, "grad_norm": 0.6875, "learning_rate": 0.0016456413231773156, "loss": 1.9752, "step": 2331 }, { "epoch": 0.2984768974785614, "grad_norm": 0.6015625, "learning_rate": 0.0016453246870302776, "loss": 2.1476, "step": 2332 }, { "epoch": 0.2986048892870856, "grad_norm": 0.67578125, "learning_rate": 0.001645007939973722, "loss": 1.6078, "step": 2333 }, { "epoch": 0.29873288109560986, "grad_norm": 0.77734375, "learning_rate": 0.0016446910820620874, "loss": 2.376, "step": 2334 }, { "epoch": 0.29886087290413416, "grad_norm": 0.67578125, "learning_rate": 0.0016443741133498296, "loss": 1.7918, "step": 2335 }, { "epoch": 0.2989888647126584, "grad_norm": 0.6953125, "learning_rate": 0.0016440570338914259, "loss": 1.7562, "step": 2336 }, { "epoch": 0.29911685652118264, "grad_norm": 0.6875, "learning_rate": 0.001643739843741371, "loss": 2.2888, "step": 2337 }, { "epoch": 0.2992448483297069, "grad_norm": 0.62109375, "learning_rate": 0.00164342254295418, "loss": 1.587, "step": 2338 }, { "epoch": 0.2993728401382312, "grad_norm": 0.6640625, "learning_rate": 0.0016431051315843853, "loss": 1.816, "step": 2339 }, { "epoch": 0.2995008319467554, "grad_norm": 0.65234375, "learning_rate": 0.0016427876096865393, "loss": 2.1615, "step": 2340 }, { "epoch": 0.29962882375527966, "grad_norm": 0.6796875, "learning_rate": 0.001642469977315214, "loss": 2.2574, "step": 2341 }, { "epoch": 0.2997568155638039, "grad_norm": 0.71875, "learning_rate": 0.0016421522345249986, "loss": 2.2724, "step": 2342 }, { "epoch": 0.29988480737232814, "grad_norm": 0.7734375, "learning_rate": 0.0016418343813705035, "loss": 2.1061, "step": 2343 }, { "epoch": 0.30001279918085244, "grad_norm": 0.69140625, "learning_rate": 0.001641516417906356, "loss": 1.4284, "step": 2344 }, { "epoch": 0.3001407909893767, "grad_norm": 0.69140625, "learning_rate": 0.0016411983441872034, "loss": 2.0532, "step": 2345 }, { "epoch": 0.3002687827979009, "grad_norm": 0.94140625, "learning_rate": 0.0016408801602677127, "loss": 2.3173, "step": 2346 }, { "epoch": 0.30039677460642517, "grad_norm": 0.68359375, "learning_rate": 0.0016405618662025678, "loss": 1.599, "step": 2347 }, { "epoch": 0.30052476641494946, "grad_norm": 0.62109375, "learning_rate": 0.0016402434620464736, "loss": 1.926, "step": 2348 }, { "epoch": 0.3006527582234737, "grad_norm": 0.75, "learning_rate": 0.0016399249478541524, "loss": 2.1439, "step": 2349 }, { "epoch": 0.30078075003199795, "grad_norm": 0.703125, "learning_rate": 0.0016396063236803465, "loss": 2.1182, "step": 2350 }, { "epoch": 0.3009087418405222, "grad_norm": 0.640625, "learning_rate": 0.001639287589579816, "loss": 1.9703, "step": 2351 }, { "epoch": 0.3010367336490465, "grad_norm": 0.75390625, "learning_rate": 0.0016389687456073415, "loss": 2.249, "step": 2352 }, { "epoch": 0.3011647254575707, "grad_norm": 0.65234375, "learning_rate": 0.0016386497918177206, "loss": 1.7458, "step": 2353 }, { "epoch": 0.30129271726609497, "grad_norm": 0.69140625, "learning_rate": 0.0016383307282657712, "loss": 1.6477, "step": 2354 }, { "epoch": 0.3014207090746192, "grad_norm": 0.78125, "learning_rate": 0.0016380115550063293, "loss": 2.0693, "step": 2355 }, { "epoch": 0.3015487008831435, "grad_norm": 0.6484375, "learning_rate": 0.00163769227209425, "loss": 1.9739, "step": 2356 }, { "epoch": 0.30167669269166775, "grad_norm": 0.67578125, "learning_rate": 0.0016373728795844077, "loss": 2.0428, "step": 2357 }, { "epoch": 0.301804684500192, "grad_norm": 0.640625, "learning_rate": 0.0016370533775316944, "loss": 1.4827, "step": 2358 }, { "epoch": 0.30193267630871623, "grad_norm": 1.09375, "learning_rate": 0.0016367337659910222, "loss": 2.7486, "step": 2359 }, { "epoch": 0.30206066811724047, "grad_norm": 0.5546875, "learning_rate": 0.0016364140450173218, "loss": 1.3969, "step": 2360 }, { "epoch": 0.30218865992576477, "grad_norm": 0.75390625, "learning_rate": 0.0016360942146655417, "loss": 1.9208, "step": 2361 }, { "epoch": 0.302316651734289, "grad_norm": 0.62890625, "learning_rate": 0.0016357742749906503, "loss": 2.0457, "step": 2362 }, { "epoch": 0.30244464354281325, "grad_norm": 0.703125, "learning_rate": 0.0016354542260476348, "loss": 2.0139, "step": 2363 }, { "epoch": 0.3025726353513375, "grad_norm": 0.7734375, "learning_rate": 0.0016351340678915003, "loss": 1.7628, "step": 2364 }, { "epoch": 0.3027006271598618, "grad_norm": 0.59375, "learning_rate": 0.0016348138005772715, "loss": 1.1659, "step": 2365 }, { "epoch": 0.30282861896838603, "grad_norm": 0.71484375, "learning_rate": 0.001634493424159991, "loss": 1.6071, "step": 2366 }, { "epoch": 0.3029566107769103, "grad_norm": 0.76953125, "learning_rate": 0.0016341729386947213, "loss": 1.888, "step": 2367 }, { "epoch": 0.3030846025854345, "grad_norm": 0.703125, "learning_rate": 0.001633852344236543, "loss": 2.1852, "step": 2368 }, { "epoch": 0.3032125943939588, "grad_norm": 0.67578125, "learning_rate": 0.0016335316408405548, "loss": 1.9292, "step": 2369 }, { "epoch": 0.30334058620248305, "grad_norm": 0.73828125, "learning_rate": 0.0016332108285618755, "loss": 2.2784, "step": 2370 }, { "epoch": 0.3034685780110073, "grad_norm": 0.62109375, "learning_rate": 0.0016328899074556414, "loss": 1.9515, "step": 2371 }, { "epoch": 0.30359656981953154, "grad_norm": 0.54296875, "learning_rate": 0.0016325688775770085, "loss": 1.3651, "step": 2372 }, { "epoch": 0.3037245616280558, "grad_norm": 0.7734375, "learning_rate": 0.0016322477389811504, "loss": 2.4638, "step": 2373 }, { "epoch": 0.3038525534365801, "grad_norm": 0.734375, "learning_rate": 0.0016319264917232608, "loss": 2.0643, "step": 2374 }, { "epoch": 0.3039805452451043, "grad_norm": 0.6484375, "learning_rate": 0.0016316051358585503, "loss": 1.3587, "step": 2375 }, { "epoch": 0.30410853705362856, "grad_norm": 0.71484375, "learning_rate": 0.0016312836714422498, "loss": 2.0454, "step": 2376 }, { "epoch": 0.3042365288621528, "grad_norm": 0.78125, "learning_rate": 0.0016309620985296073, "loss": 2.2278, "step": 2377 }, { "epoch": 0.3043645206706771, "grad_norm": 0.6015625, "learning_rate": 0.0016306404171758917, "loss": 1.349, "step": 2378 }, { "epoch": 0.30449251247920134, "grad_norm": 0.8828125, "learning_rate": 0.0016303186274363882, "loss": 2.7405, "step": 2379 }, { "epoch": 0.3046205042877256, "grad_norm": 0.8359375, "learning_rate": 0.0016299967293664018, "loss": 2.0896, "step": 2380 }, { "epoch": 0.3047484960962498, "grad_norm": 0.609375, "learning_rate": 0.001629674723021256, "loss": 1.4432, "step": 2381 }, { "epoch": 0.3048764879047741, "grad_norm": 1.0859375, "learning_rate": 0.0016293526084562927, "loss": 2.7991, "step": 2382 }, { "epoch": 0.30500447971329836, "grad_norm": 0.73046875, "learning_rate": 0.0016290303857268724, "loss": 2.5463, "step": 2383 }, { "epoch": 0.3051324715218226, "grad_norm": 0.6484375, "learning_rate": 0.0016287080548883744, "loss": 1.2692, "step": 2384 }, { "epoch": 0.30526046333034684, "grad_norm": 0.70703125, "learning_rate": 0.0016283856159961967, "loss": 2.2823, "step": 2385 }, { "epoch": 0.30538845513887114, "grad_norm": 0.5703125, "learning_rate": 0.0016280630691057553, "loss": 1.6169, "step": 2386 }, { "epoch": 0.3055164469473954, "grad_norm": 0.65234375, "learning_rate": 0.0016277404142724854, "loss": 1.395, "step": 2387 }, { "epoch": 0.3056444387559196, "grad_norm": 0.7109375, "learning_rate": 0.0016274176515518406, "loss": 2.2512, "step": 2388 }, { "epoch": 0.30577243056444386, "grad_norm": 0.7578125, "learning_rate": 0.0016270947809992925, "loss": 1.8281, "step": 2389 }, { "epoch": 0.3059004223729681, "grad_norm": 0.80859375, "learning_rate": 0.0016267718026703318, "loss": 2.8004, "step": 2390 }, { "epoch": 0.3060284141814924, "grad_norm": 0.69140625, "learning_rate": 0.0016264487166204678, "loss": 1.7855, "step": 2391 }, { "epoch": 0.30615640599001664, "grad_norm": 0.7265625, "learning_rate": 0.0016261255229052275, "loss": 2.2853, "step": 2392 }, { "epoch": 0.3062843977985409, "grad_norm": 0.70703125, "learning_rate": 0.0016258022215801576, "loss": 2.1509, "step": 2393 }, { "epoch": 0.3064123896070651, "grad_norm": 0.65625, "learning_rate": 0.0016254788127008225, "loss": 2.0921, "step": 2394 }, { "epoch": 0.3065403814155894, "grad_norm": 0.80859375, "learning_rate": 0.001625155296322805, "loss": 1.9499, "step": 2395 }, { "epoch": 0.30666837322411367, "grad_norm": 0.734375, "learning_rate": 0.0016248316725017066, "loss": 2.1459, "step": 2396 }, { "epoch": 0.3067963650326379, "grad_norm": 0.67578125, "learning_rate": 0.001624507941293148, "loss": 1.9803, "step": 2397 }, { "epoch": 0.30692435684116215, "grad_norm": 0.7109375, "learning_rate": 0.0016241841027527669, "loss": 2.2875, "step": 2398 }, { "epoch": 0.30705234864968645, "grad_norm": 0.79296875, "learning_rate": 0.001623860156936221, "loss": 1.7612, "step": 2399 }, { "epoch": 0.3071803404582107, "grad_norm": 0.85546875, "learning_rate": 0.0016235361038991845, "loss": 2.5925, "step": 2400 }, { "epoch": 0.30730833226673493, "grad_norm": 1.171875, "learning_rate": 0.0016232119436973523, "loss": 2.2328, "step": 2401 }, { "epoch": 0.30743632407525917, "grad_norm": 0.734375, "learning_rate": 0.0016228876763864357, "loss": 2.2595, "step": 2402 }, { "epoch": 0.3075643158837834, "grad_norm": 0.625, "learning_rate": 0.001622563302022166, "loss": 1.9799, "step": 2403 }, { "epoch": 0.3076923076923077, "grad_norm": 0.65234375, "learning_rate": 0.0016222388206602921, "loss": 1.8567, "step": 2404 }, { "epoch": 0.30782029950083195, "grad_norm": 0.76171875, "learning_rate": 0.0016219142323565807, "loss": 2.0031, "step": 2405 }, { "epoch": 0.3079482913093562, "grad_norm": 0.6171875, "learning_rate": 0.0016215895371668183, "loss": 2.0958, "step": 2406 }, { "epoch": 0.30807628311788043, "grad_norm": 0.6953125, "learning_rate": 0.0016212647351468087, "loss": 1.6928, "step": 2407 }, { "epoch": 0.30820427492640473, "grad_norm": 0.55078125, "learning_rate": 0.0016209398263523746, "loss": 1.8195, "step": 2408 }, { "epoch": 0.308332266734929, "grad_norm": 0.75390625, "learning_rate": 0.0016206148108393569, "loss": 2.2985, "step": 2409 }, { "epoch": 0.3084602585434532, "grad_norm": 0.6484375, "learning_rate": 0.0016202896886636144, "loss": 1.6776, "step": 2410 }, { "epoch": 0.30858825035197746, "grad_norm": 0.640625, "learning_rate": 0.0016199644598810248, "loss": 1.9467, "step": 2411 }, { "epoch": 0.30871624216050175, "grad_norm": 0.93359375, "learning_rate": 0.0016196391245474843, "loss": 1.9306, "step": 2412 }, { "epoch": 0.308844233969026, "grad_norm": 0.71484375, "learning_rate": 0.0016193136827189066, "loss": 2.1253, "step": 2413 }, { "epoch": 0.30897222577755024, "grad_norm": 0.640625, "learning_rate": 0.0016189881344512243, "loss": 2.7192, "step": 2414 }, { "epoch": 0.3091002175860745, "grad_norm": 0.6484375, "learning_rate": 0.0016186624798003879, "loss": 1.5971, "step": 2415 }, { "epoch": 0.3092282093945987, "grad_norm": 0.64453125, "learning_rate": 0.0016183367188223672, "loss": 1.7307, "step": 2416 }, { "epoch": 0.309356201203123, "grad_norm": 0.60546875, "learning_rate": 0.0016180108515731484, "loss": 2.052, "step": 2417 }, { "epoch": 0.30948419301164726, "grad_norm": 0.6953125, "learning_rate": 0.0016176848781087384, "loss": 2.0716, "step": 2418 }, { "epoch": 0.3096121848201715, "grad_norm": 0.703125, "learning_rate": 0.0016173587984851597, "loss": 2.4103, "step": 2419 }, { "epoch": 0.30974017662869574, "grad_norm": 0.62109375, "learning_rate": 0.001617032612758455, "loss": 1.9785, "step": 2420 }, { "epoch": 0.30986816843722004, "grad_norm": 0.7890625, "learning_rate": 0.001616706320984685, "loss": 2.4731, "step": 2421 }, { "epoch": 0.3099961602457443, "grad_norm": 0.7890625, "learning_rate": 0.0016163799232199274, "loss": 2.2947, "step": 2422 }, { "epoch": 0.3101241520542685, "grad_norm": 0.671875, "learning_rate": 0.00161605341952028, "loss": 2.1623, "step": 2423 }, { "epoch": 0.31025214386279276, "grad_norm": 0.69140625, "learning_rate": 0.0016157268099418567, "loss": 1.8731, "step": 2424 }, { "epoch": 0.31038013567131706, "grad_norm": 0.6328125, "learning_rate": 0.0016154000945407911, "loss": 1.9689, "step": 2425 }, { "epoch": 0.3105081274798413, "grad_norm": 0.76953125, "learning_rate": 0.0016150732733732345, "loss": 1.6827, "step": 2426 }, { "epoch": 0.31063611928836554, "grad_norm": 0.8671875, "learning_rate": 0.0016147463464953567, "loss": 2.0468, "step": 2427 }, { "epoch": 0.3107641110968898, "grad_norm": 0.80859375, "learning_rate": 0.0016144193139633449, "loss": 2.2374, "step": 2428 }, { "epoch": 0.3108921029054141, "grad_norm": 0.62109375, "learning_rate": 0.001614092175833405, "loss": 1.5525, "step": 2429 }, { "epoch": 0.3110200947139383, "grad_norm": 0.69921875, "learning_rate": 0.0016137649321617618, "loss": 1.8048, "step": 2430 }, { "epoch": 0.31114808652246256, "grad_norm": 0.65234375, "learning_rate": 0.0016134375830046564, "loss": 1.862, "step": 2431 }, { "epoch": 0.3112760783309868, "grad_norm": 0.6796875, "learning_rate": 0.0016131101284183496, "loss": 2.4082, "step": 2432 }, { "epoch": 0.31140407013951105, "grad_norm": 0.9140625, "learning_rate": 0.0016127825684591195, "loss": 1.9432, "step": 2433 }, { "epoch": 0.31153206194803534, "grad_norm": 0.67578125, "learning_rate": 0.001612454903183263, "loss": 2.1635, "step": 2434 }, { "epoch": 0.3116600537565596, "grad_norm": 0.59375, "learning_rate": 0.0016121271326470944, "loss": 1.5595, "step": 2435 }, { "epoch": 0.3117880455650838, "grad_norm": 0.69921875, "learning_rate": 0.0016117992569069464, "loss": 1.8602, "step": 2436 }, { "epoch": 0.31191603737360807, "grad_norm": 0.60546875, "learning_rate": 0.0016114712760191697, "loss": 1.8645, "step": 2437 }, { "epoch": 0.31204402918213237, "grad_norm": 0.7109375, "learning_rate": 0.0016111431900401331, "loss": 1.7426, "step": 2438 }, { "epoch": 0.3121720209906566, "grad_norm": 0.8046875, "learning_rate": 0.001610814999026224, "loss": 2.184, "step": 2439 }, { "epoch": 0.31230001279918085, "grad_norm": 0.79296875, "learning_rate": 0.0016104867030338468, "loss": 1.8106, "step": 2440 }, { "epoch": 0.3124280046077051, "grad_norm": 0.6171875, "learning_rate": 0.0016101583021194247, "loss": 2.0057, "step": 2441 }, { "epoch": 0.3125559964162294, "grad_norm": 0.640625, "learning_rate": 0.0016098297963393988, "loss": 2.1331, "step": 2442 }, { "epoch": 0.31268398822475363, "grad_norm": 0.71875, "learning_rate": 0.0016095011857502283, "loss": 2.1841, "step": 2443 }, { "epoch": 0.31281198003327787, "grad_norm": 0.703125, "learning_rate": 0.0016091724704083896, "loss": 1.8123, "step": 2444 }, { "epoch": 0.3129399718418021, "grad_norm": 0.58203125, "learning_rate": 0.0016088436503703782, "loss": 1.5645, "step": 2445 }, { "epoch": 0.31306796365032635, "grad_norm": 0.73046875, "learning_rate": 0.0016085147256927072, "loss": 2.1576, "step": 2446 }, { "epoch": 0.31319595545885065, "grad_norm": 0.65234375, "learning_rate": 0.0016081856964319078, "loss": 2.5769, "step": 2447 }, { "epoch": 0.3133239472673749, "grad_norm": 0.7265625, "learning_rate": 0.0016078565626445288, "loss": 2.5833, "step": 2448 }, { "epoch": 0.31345193907589913, "grad_norm": 0.6484375, "learning_rate": 0.0016075273243871368, "loss": 2.0971, "step": 2449 }, { "epoch": 0.3135799308844234, "grad_norm": 0.6328125, "learning_rate": 0.0016071979817163173, "loss": 2.2803, "step": 2450 }, { "epoch": 0.31370792269294767, "grad_norm": 0.65625, "learning_rate": 0.0016068685346886731, "loss": 1.572, "step": 2451 }, { "epoch": 0.3138359145014719, "grad_norm": 0.734375, "learning_rate": 0.001606538983360825, "loss": 1.9218, "step": 2452 }, { "epoch": 0.31396390630999615, "grad_norm": 0.734375, "learning_rate": 0.0016062093277894114, "loss": 2.2559, "step": 2453 }, { "epoch": 0.3140918981185204, "grad_norm": 0.71484375, "learning_rate": 0.0016058795680310896, "loss": 1.5226, "step": 2454 }, { "epoch": 0.3142198899270447, "grad_norm": 0.65625, "learning_rate": 0.0016055497041425333, "loss": 2.114, "step": 2455 }, { "epoch": 0.31434788173556893, "grad_norm": 0.65625, "learning_rate": 0.0016052197361804358, "loss": 2.1792, "step": 2456 }, { "epoch": 0.3144758735440932, "grad_norm": 0.75390625, "learning_rate": 0.001604889664201507, "loss": 2.1734, "step": 2457 }, { "epoch": 0.3146038653526174, "grad_norm": 0.8125, "learning_rate": 0.001604559488262475, "loss": 1.8134, "step": 2458 }, { "epoch": 0.31473185716114166, "grad_norm": 0.72265625, "learning_rate": 0.0016042292084200863, "loss": 2.4217, "step": 2459 }, { "epoch": 0.31485984896966596, "grad_norm": 0.71484375, "learning_rate": 0.001603898824731105, "loss": 1.483, "step": 2460 }, { "epoch": 0.3149878407781902, "grad_norm": 0.8984375, "learning_rate": 0.001603568337252312, "loss": 1.6852, "step": 2461 }, { "epoch": 0.31511583258671444, "grad_norm": 0.7109375, "learning_rate": 0.0016032377460405076, "loss": 2.1321, "step": 2462 }, { "epoch": 0.3152438243952387, "grad_norm": 0.78515625, "learning_rate": 0.0016029070511525095, "loss": 2.7639, "step": 2463 }, { "epoch": 0.315371816203763, "grad_norm": 0.6640625, "learning_rate": 0.001602576252645152, "loss": 1.8637, "step": 2464 }, { "epoch": 0.3154998080122872, "grad_norm": 0.77734375, "learning_rate": 0.0016022453505752897, "loss": 2.017, "step": 2465 }, { "epoch": 0.31562779982081146, "grad_norm": 0.5703125, "learning_rate": 0.001601914344999792, "loss": 1.7595, "step": 2466 }, { "epoch": 0.3157557916293357, "grad_norm": 0.65234375, "learning_rate": 0.0016015832359755483, "loss": 1.5627, "step": 2467 }, { "epoch": 0.31588378343786, "grad_norm": 0.5234375, "learning_rate": 0.0016012520235594652, "loss": 1.0548, "step": 2468 }, { "epoch": 0.31601177524638424, "grad_norm": 0.62109375, "learning_rate": 0.0016009207078084664, "loss": 1.8276, "step": 2469 }, { "epoch": 0.3161397670549085, "grad_norm": 0.79296875, "learning_rate": 0.0016005892887794942, "loss": 1.805, "step": 2470 }, { "epoch": 0.3162677588634327, "grad_norm": 0.70703125, "learning_rate": 0.0016002577665295084, "loss": 1.7979, "step": 2471 }, { "epoch": 0.316395750671957, "grad_norm": 0.7734375, "learning_rate": 0.0015999261411154865, "loss": 1.8488, "step": 2472 }, { "epoch": 0.31652374248048126, "grad_norm": 1.0234375, "learning_rate": 0.0015995944125944232, "loss": 2.2302, "step": 2473 }, { "epoch": 0.3166517342890055, "grad_norm": 0.62890625, "learning_rate": 0.001599262581023332, "loss": 1.8294, "step": 2474 }, { "epoch": 0.31677972609752975, "grad_norm": 0.80859375, "learning_rate": 0.0015989306464592435, "loss": 2.2851, "step": 2475 }, { "epoch": 0.316907717906054, "grad_norm": 0.69140625, "learning_rate": 0.0015985986089592059, "loss": 2.043, "step": 2476 }, { "epoch": 0.3170357097145783, "grad_norm": 0.70703125, "learning_rate": 0.001598266468580285, "loss": 1.8559, "step": 2477 }, { "epoch": 0.3171637015231025, "grad_norm": 0.64453125, "learning_rate": 0.0015979342253795651, "loss": 1.7928, "step": 2478 }, { "epoch": 0.31729169333162677, "grad_norm": 0.69140625, "learning_rate": 0.001597601879414147, "loss": 2.0841, "step": 2479 }, { "epoch": 0.317419685140151, "grad_norm": 0.7421875, "learning_rate": 0.0015972694307411501, "loss": 2.2738, "step": 2480 }, { "epoch": 0.3175476769486753, "grad_norm": 0.92578125, "learning_rate": 0.0015969368794177107, "loss": 2.1731, "step": 2481 }, { "epoch": 0.31767566875719955, "grad_norm": 0.60546875, "learning_rate": 0.0015966042255009835, "loss": 2.1865, "step": 2482 }, { "epoch": 0.3178036605657238, "grad_norm": 0.828125, "learning_rate": 0.0015962714690481406, "loss": 2.7187, "step": 2483 }, { "epoch": 0.31793165237424803, "grad_norm": 0.66015625, "learning_rate": 0.0015959386101163714, "loss": 2.2262, "step": 2484 }, { "epoch": 0.3180596441827723, "grad_norm": 0.76953125, "learning_rate": 0.001595605648762883, "loss": 1.9918, "step": 2485 }, { "epoch": 0.31818763599129657, "grad_norm": 0.59375, "learning_rate": 0.0015952725850449001, "loss": 1.59, "step": 2486 }, { "epoch": 0.3183156277998208, "grad_norm": 0.53515625, "learning_rate": 0.0015949394190196658, "loss": 1.8648, "step": 2487 }, { "epoch": 0.31844361960834505, "grad_norm": 0.76171875, "learning_rate": 0.001594606150744439, "loss": 1.8528, "step": 2488 }, { "epoch": 0.3185716114168693, "grad_norm": 0.6796875, "learning_rate": 0.0015942727802764982, "loss": 1.7796, "step": 2489 }, { "epoch": 0.3186996032253936, "grad_norm": 0.578125, "learning_rate": 0.001593939307673138, "loss": 1.3752, "step": 2490 }, { "epoch": 0.31882759503391783, "grad_norm": 0.765625, "learning_rate": 0.0015936057329916716, "loss": 1.636, "step": 2491 }, { "epoch": 0.3189555868424421, "grad_norm": 0.6875, "learning_rate": 0.0015932720562894286, "loss": 1.9692, "step": 2492 }, { "epoch": 0.3190835786509663, "grad_norm": 0.65234375, "learning_rate": 0.0015929382776237568, "loss": 1.6485, "step": 2493 }, { "epoch": 0.3192115704594906, "grad_norm": 0.7265625, "learning_rate": 0.0015926043970520222, "loss": 2.2342, "step": 2494 }, { "epoch": 0.31933956226801485, "grad_norm": 0.71875, "learning_rate": 0.0015922704146316066, "loss": 1.9326, "step": 2495 }, { "epoch": 0.3194675540765391, "grad_norm": 0.58203125, "learning_rate": 0.0015919363304199112, "loss": 1.4872, "step": 2496 }, { "epoch": 0.31959554588506334, "grad_norm": 0.671875, "learning_rate": 0.0015916021444743534, "loss": 1.4434, "step": 2497 }, { "epoch": 0.31972353769358763, "grad_norm": 0.90234375, "learning_rate": 0.0015912678568523685, "loss": 2.0994, "step": 2498 }, { "epoch": 0.3198515295021119, "grad_norm": 0.67578125, "learning_rate": 0.001590933467611409, "loss": 1.659, "step": 2499 }, { "epoch": 0.3199795213106361, "grad_norm": 0.625, "learning_rate": 0.0015905989768089453, "loss": 1.5854, "step": 2500 }, { "epoch": 0.32010751311916036, "grad_norm": 0.83984375, "learning_rate": 0.0015902643845024651, "loss": 2.3666, "step": 2501 }, { "epoch": 0.32023550492768466, "grad_norm": 0.796875, "learning_rate": 0.0015899296907494738, "loss": 1.8525, "step": 2502 }, { "epoch": 0.3203634967362089, "grad_norm": 0.6796875, "learning_rate": 0.0015895948956074935, "loss": 1.7156, "step": 2503 }, { "epoch": 0.32049148854473314, "grad_norm": 0.6328125, "learning_rate": 0.001589259999134064, "loss": 1.3789, "step": 2504 }, { "epoch": 0.3206194803532574, "grad_norm": 0.578125, "learning_rate": 0.0015889250013867437, "loss": 1.61, "step": 2505 }, { "epoch": 0.3207474721617816, "grad_norm": 0.625, "learning_rate": 0.0015885899024231064, "loss": 1.8361, "step": 2506 }, { "epoch": 0.3208754639703059, "grad_norm": 0.6953125, "learning_rate": 0.0015882547023007448, "loss": 2.7552, "step": 2507 }, { "epoch": 0.32100345577883016, "grad_norm": 0.93359375, "learning_rate": 0.001587919401077268, "loss": 2.1026, "step": 2508 }, { "epoch": 0.3211314475873544, "grad_norm": 0.8359375, "learning_rate": 0.0015875839988103038, "loss": 2.1325, "step": 2509 }, { "epoch": 0.32125943939587864, "grad_norm": 0.6328125, "learning_rate": 0.0015872484955574955, "loss": 2.1547, "step": 2510 }, { "epoch": 0.32138743120440294, "grad_norm": 0.51953125, "learning_rate": 0.0015869128913765054, "loss": 1.0042, "step": 2511 }, { "epoch": 0.3215154230129272, "grad_norm": 0.8515625, "learning_rate": 0.0015865771863250125, "loss": 1.9183, "step": 2512 }, { "epoch": 0.3216434148214514, "grad_norm": 0.6953125, "learning_rate": 0.001586241380460713, "loss": 1.7418, "step": 2513 }, { "epoch": 0.32177140662997566, "grad_norm": 0.64453125, "learning_rate": 0.0015859054738413209, "loss": 1.9188, "step": 2514 }, { "epoch": 0.32189939843849996, "grad_norm": 0.7421875, "learning_rate": 0.0015855694665245666, "loss": 2.4601, "step": 2515 }, { "epoch": 0.3220273902470242, "grad_norm": 0.6328125, "learning_rate": 0.001585233358568199, "loss": 1.9397, "step": 2516 }, { "epoch": 0.32215538205554844, "grad_norm": 0.59375, "learning_rate": 0.0015848971500299828, "loss": 1.9245, "step": 2517 }, { "epoch": 0.3222833738640727, "grad_norm": 0.58203125, "learning_rate": 0.0015845608409677024, "loss": 1.6154, "step": 2518 }, { "epoch": 0.32241136567259693, "grad_norm": 0.69921875, "learning_rate": 0.0015842244314391567, "loss": 2.3726, "step": 2519 }, { "epoch": 0.3225393574811212, "grad_norm": 0.6953125, "learning_rate": 0.0015838879215021638, "loss": 2.3714, "step": 2520 }, { "epoch": 0.32266734928964547, "grad_norm": 0.64453125, "learning_rate": 0.0015835513112145581, "loss": 1.7793, "step": 2521 }, { "epoch": 0.3227953410981697, "grad_norm": 0.64453125, "learning_rate": 0.0015832146006341914, "loss": 1.4316, "step": 2522 }, { "epoch": 0.32292333290669395, "grad_norm": 0.81640625, "learning_rate": 0.001582877789818933, "loss": 1.6525, "step": 2523 }, { "epoch": 0.32305132471521825, "grad_norm": 0.7890625, "learning_rate": 0.0015825408788266695, "loss": 1.7757, "step": 2524 }, { "epoch": 0.3231793165237425, "grad_norm": 0.9453125, "learning_rate": 0.0015822038677153045, "loss": 2.8677, "step": 2525 }, { "epoch": 0.32330730833226673, "grad_norm": 0.66796875, "learning_rate": 0.0015818667565427585, "loss": 1.7011, "step": 2526 }, { "epoch": 0.32343530014079097, "grad_norm": 0.71484375, "learning_rate": 0.00158152954536697, "loss": 2.1351, "step": 2527 }, { "epoch": 0.32356329194931527, "grad_norm": 0.89453125, "learning_rate": 0.0015811922342458937, "loss": 2.0418, "step": 2528 }, { "epoch": 0.3236912837578395, "grad_norm": 0.625, "learning_rate": 0.0015808548232375027, "loss": 1.4328, "step": 2529 }, { "epoch": 0.32381927556636375, "grad_norm": 0.74609375, "learning_rate": 0.0015805173123997855, "loss": 1.9849, "step": 2530 }, { "epoch": 0.323947267374888, "grad_norm": 0.59765625, "learning_rate": 0.0015801797017907498, "loss": 1.4235, "step": 2531 }, { "epoch": 0.32407525918341223, "grad_norm": 0.6171875, "learning_rate": 0.001579841991468419, "loss": 1.4632, "step": 2532 }, { "epoch": 0.32420325099193653, "grad_norm": 0.61328125, "learning_rate": 0.0015795041814908339, "loss": 1.6792, "step": 2533 }, { "epoch": 0.3243312428004608, "grad_norm": 0.5703125, "learning_rate": 0.0015791662719160532, "loss": 1.4653, "step": 2534 }, { "epoch": 0.324459234608985, "grad_norm": 0.68359375, "learning_rate": 0.0015788282628021518, "loss": 1.6253, "step": 2535 }, { "epoch": 0.32458722641750926, "grad_norm": 0.69140625, "learning_rate": 0.0015784901542072222, "loss": 2.49, "step": 2536 }, { "epoch": 0.32471521822603355, "grad_norm": 0.55859375, "learning_rate": 0.0015781519461893732, "loss": 1.3281, "step": 2537 }, { "epoch": 0.3248432100345578, "grad_norm": 0.765625, "learning_rate": 0.0015778136388067323, "loss": 1.833, "step": 2538 }, { "epoch": 0.32497120184308204, "grad_norm": 0.5859375, "learning_rate": 0.0015774752321174427, "loss": 1.7214, "step": 2539 }, { "epoch": 0.3250991936516063, "grad_norm": 0.62890625, "learning_rate": 0.0015771367261796653, "loss": 1.3661, "step": 2540 }, { "epoch": 0.3252271854601306, "grad_norm": 0.640625, "learning_rate": 0.0015767981210515769, "loss": 1.6082, "step": 2541 }, { "epoch": 0.3253551772686548, "grad_norm": 0.6640625, "learning_rate": 0.0015764594167913737, "loss": 1.6422, "step": 2542 }, { "epoch": 0.32548316907717906, "grad_norm": 0.6015625, "learning_rate": 0.0015761206134572664, "loss": 1.3524, "step": 2543 }, { "epoch": 0.3256111608857033, "grad_norm": 0.78125, "learning_rate": 0.0015757817111074844, "loss": 2.2247, "step": 2544 }, { "epoch": 0.3257391526942276, "grad_norm": 0.75, "learning_rate": 0.0015754427098002733, "loss": 1.2803, "step": 2545 }, { "epoch": 0.32586714450275184, "grad_norm": 0.65234375, "learning_rate": 0.0015751036095938962, "loss": 2.1646, "step": 2546 }, { "epoch": 0.3259951363112761, "grad_norm": 0.703125, "learning_rate": 0.0015747644105466327, "loss": 2.3063, "step": 2547 }, { "epoch": 0.3261231281198003, "grad_norm": 0.67578125, "learning_rate": 0.00157442511271678, "loss": 1.6586, "step": 2548 }, { "epoch": 0.32625111992832456, "grad_norm": 0.62890625, "learning_rate": 0.001574085716162652, "loss": 1.6548, "step": 2549 }, { "epoch": 0.32637911173684886, "grad_norm": 0.6875, "learning_rate": 0.001573746220942579, "loss": 1.8254, "step": 2550 }, { "epoch": 0.3265071035453731, "grad_norm": 0.66796875, "learning_rate": 0.0015734066271149091, "loss": 1.6683, "step": 2551 }, { "epoch": 0.32663509535389734, "grad_norm": 0.6953125, "learning_rate": 0.001573066934738007, "loss": 1.4089, "step": 2552 }, { "epoch": 0.3267630871624216, "grad_norm": 0.9140625, "learning_rate": 0.001572727143870254, "loss": 1.8392, "step": 2553 }, { "epoch": 0.3268910789709459, "grad_norm": 0.6640625, "learning_rate": 0.001572387254570049, "loss": 1.9895, "step": 2554 }, { "epoch": 0.3270190707794701, "grad_norm": 0.6015625, "learning_rate": 0.0015720472668958078, "loss": 1.3728, "step": 2555 }, { "epoch": 0.32714706258799436, "grad_norm": 0.6171875, "learning_rate": 0.0015717071809059622, "loss": 1.7139, "step": 2556 }, { "epoch": 0.3272750543965186, "grad_norm": 0.81640625, "learning_rate": 0.0015713669966589618, "loss": 2.6925, "step": 2557 }, { "epoch": 0.3274030462050429, "grad_norm": 0.8671875, "learning_rate": 0.001571026714213273, "loss": 2.1207, "step": 2558 }, { "epoch": 0.32753103801356714, "grad_norm": 0.60546875, "learning_rate": 0.0015706863336273783, "loss": 1.4671, "step": 2559 }, { "epoch": 0.3276590298220914, "grad_norm": 0.58984375, "learning_rate": 0.001570345854959778, "loss": 1.6438, "step": 2560 }, { "epoch": 0.3277870216306156, "grad_norm": 0.5859375, "learning_rate": 0.0015700052782689888, "loss": 1.302, "step": 2561 }, { "epoch": 0.32791501343913987, "grad_norm": 0.60546875, "learning_rate": 0.0015696646036135445, "loss": 1.9853, "step": 2562 }, { "epoch": 0.32804300524766417, "grad_norm": 0.69921875, "learning_rate": 0.001569323831051995, "loss": 1.5743, "step": 2563 }, { "epoch": 0.3281709970561884, "grad_norm": 1.0234375, "learning_rate": 0.0015689829606429083, "loss": 1.9172, "step": 2564 }, { "epoch": 0.32829898886471265, "grad_norm": 0.63671875, "learning_rate": 0.001568641992444868, "loss": 1.8761, "step": 2565 }, { "epoch": 0.3284269806732369, "grad_norm": 0.703125, "learning_rate": 0.0015683009265164754, "loss": 1.736, "step": 2566 }, { "epoch": 0.3285549724817612, "grad_norm": 0.7890625, "learning_rate": 0.001567959762916348, "loss": 2.5999, "step": 2567 }, { "epoch": 0.32868296429028543, "grad_norm": 0.9453125, "learning_rate": 0.0015676185017031202, "loss": 2.2335, "step": 2568 }, { "epoch": 0.32881095609880967, "grad_norm": 0.640625, "learning_rate": 0.0015672771429354436, "loss": 1.6185, "step": 2569 }, { "epoch": 0.3289389479073339, "grad_norm": 0.8984375, "learning_rate": 0.001566935686671986, "loss": 2.5729, "step": 2570 }, { "epoch": 0.3290669397158582, "grad_norm": 0.68359375, "learning_rate": 0.0015665941329714322, "loss": 1.8442, "step": 2571 }, { "epoch": 0.32919493152438245, "grad_norm": 0.66015625, "learning_rate": 0.0015662524818924838, "loss": 2.1598, "step": 2572 }, { "epoch": 0.3293229233329067, "grad_norm": 0.65625, "learning_rate": 0.0015659107334938592, "loss": 1.945, "step": 2573 }, { "epoch": 0.32945091514143093, "grad_norm": 0.65234375, "learning_rate": 0.001565568887834293, "loss": 1.7213, "step": 2574 }, { "epoch": 0.32957890694995523, "grad_norm": 0.67578125, "learning_rate": 0.0015652269449725374, "loss": 1.8133, "step": 2575 }, { "epoch": 0.32970689875847947, "grad_norm": 0.77734375, "learning_rate": 0.0015648849049673603, "loss": 1.5971, "step": 2576 }, { "epoch": 0.3298348905670037, "grad_norm": 0.80859375, "learning_rate": 0.0015645427678775477, "loss": 1.5584, "step": 2577 }, { "epoch": 0.32996288237552795, "grad_norm": 0.8359375, "learning_rate": 0.0015642005337619005, "loss": 2.3067, "step": 2578 }, { "epoch": 0.3300908741840522, "grad_norm": 0.9140625, "learning_rate": 0.0015638582026792375, "loss": 2.2607, "step": 2579 }, { "epoch": 0.3302188659925765, "grad_norm": 0.55078125, "learning_rate": 0.0015635157746883945, "loss": 1.41, "step": 2580 }, { "epoch": 0.33034685780110074, "grad_norm": 0.67578125, "learning_rate": 0.0015631732498482224, "loss": 1.9094, "step": 2581 }, { "epoch": 0.330474849609625, "grad_norm": 0.68359375, "learning_rate": 0.00156283062821759, "loss": 1.475, "step": 2582 }, { "epoch": 0.3306028414181492, "grad_norm": 0.75390625, "learning_rate": 0.001562487909855382, "loss": 1.8897, "step": 2583 }, { "epoch": 0.3307308332266735, "grad_norm": 0.69921875, "learning_rate": 0.0015621450948205007, "loss": 1.7415, "step": 2584 }, { "epoch": 0.33085882503519776, "grad_norm": 0.71875, "learning_rate": 0.0015618021831718641, "loss": 1.5678, "step": 2585 }, { "epoch": 0.330986816843722, "grad_norm": 0.64453125, "learning_rate": 0.0015614591749684075, "loss": 1.7243, "step": 2586 }, { "epoch": 0.33111480865224624, "grad_norm": 0.6484375, "learning_rate": 0.0015611160702690818, "loss": 2.295, "step": 2587 }, { "epoch": 0.33124280046077054, "grad_norm": 0.70703125, "learning_rate": 0.0015607728691328555, "loss": 1.8111, "step": 2588 }, { "epoch": 0.3313707922692948, "grad_norm": 0.59375, "learning_rate": 0.0015604295716187134, "loss": 1.6041, "step": 2589 }, { "epoch": 0.331498784077819, "grad_norm": 0.7578125, "learning_rate": 0.0015600861777856564, "loss": 1.9043, "step": 2590 }, { "epoch": 0.33162677588634326, "grad_norm": 0.5859375, "learning_rate": 0.0015597426876927027, "loss": 1.5951, "step": 2591 }, { "epoch": 0.3317547676948675, "grad_norm": 0.7265625, "learning_rate": 0.0015593991013988862, "loss": 2.0831, "step": 2592 }, { "epoch": 0.3318827595033918, "grad_norm": 0.67578125, "learning_rate": 0.0015590554189632581, "loss": 1.7642, "step": 2593 }, { "epoch": 0.33201075131191604, "grad_norm": 0.6796875, "learning_rate": 0.0015587116404448859, "loss": 1.8615, "step": 2594 }, { "epoch": 0.3321387431204403, "grad_norm": 0.6328125, "learning_rate": 0.0015583677659028532, "loss": 1.6543, "step": 2595 }, { "epoch": 0.3322667349289645, "grad_norm": 0.72265625, "learning_rate": 0.0015580237953962603, "loss": 2.3826, "step": 2596 }, { "epoch": 0.3323947267374888, "grad_norm": 0.7734375, "learning_rate": 0.0015576797289842245, "loss": 1.7929, "step": 2597 }, { "epoch": 0.33252271854601306, "grad_norm": 0.51953125, "learning_rate": 0.0015573355667258793, "loss": 1.2576, "step": 2598 }, { "epoch": 0.3326507103545373, "grad_norm": 0.66015625, "learning_rate": 0.0015569913086803742, "loss": 1.846, "step": 2599 }, { "epoch": 0.33277870216306155, "grad_norm": 0.5859375, "learning_rate": 0.0015566469549068756, "loss": 1.8493, "step": 2600 }, { "epoch": 0.33290669397158584, "grad_norm": 0.64453125, "learning_rate": 0.0015563025054645663, "loss": 2.0497, "step": 2601 }, { "epoch": 0.3330346857801101, "grad_norm": 0.73828125, "learning_rate": 0.0015559579604126456, "loss": 1.8803, "step": 2602 }, { "epoch": 0.3331626775886343, "grad_norm": 0.8203125, "learning_rate": 0.001555613319810329, "loss": 2.11, "step": 2603 }, { "epoch": 0.33329066939715857, "grad_norm": 0.77734375, "learning_rate": 0.001555268583716849, "loss": 2.0013, "step": 2604 }, { "epoch": 0.3334186612056828, "grad_norm": 0.7578125, "learning_rate": 0.0015549237521914536, "loss": 2.1363, "step": 2605 }, { "epoch": 0.3335466530142071, "grad_norm": 0.5859375, "learning_rate": 0.001554578825293408, "loss": 2.0327, "step": 2606 }, { "epoch": 0.33367464482273135, "grad_norm": 1.1015625, "learning_rate": 0.0015542338030819932, "loss": 2.2868, "step": 2607 }, { "epoch": 0.3338026366312556, "grad_norm": 0.6640625, "learning_rate": 0.0015538886856165072, "loss": 1.5871, "step": 2608 }, { "epoch": 0.33393062843977983, "grad_norm": 0.6171875, "learning_rate": 0.0015535434729562637, "loss": 1.6191, "step": 2609 }, { "epoch": 0.33405862024830413, "grad_norm": 0.5859375, "learning_rate": 0.0015531981651605933, "loss": 1.4873, "step": 2610 }, { "epoch": 0.33418661205682837, "grad_norm": 0.78125, "learning_rate": 0.001552852762288843, "loss": 2.2037, "step": 2611 }, { "epoch": 0.3343146038653526, "grad_norm": 0.65234375, "learning_rate": 0.001552507264400375, "loss": 1.4406, "step": 2612 }, { "epoch": 0.33444259567387685, "grad_norm": 0.65625, "learning_rate": 0.0015521616715545696, "loss": 1.5472, "step": 2613 }, { "epoch": 0.33457058748240115, "grad_norm": 0.74609375, "learning_rate": 0.0015518159838108225, "loss": 2.2052, "step": 2614 }, { "epoch": 0.3346985792909254, "grad_norm": 0.72265625, "learning_rate": 0.0015514702012285452, "loss": 1.8324, "step": 2615 }, { "epoch": 0.33482657109944963, "grad_norm": 0.7734375, "learning_rate": 0.0015511243238671667, "loss": 2.1583, "step": 2616 }, { "epoch": 0.3349545629079739, "grad_norm": 0.6796875, "learning_rate": 0.0015507783517861307, "loss": 1.9711, "step": 2617 }, { "epoch": 0.33508255471649817, "grad_norm": 0.73046875, "learning_rate": 0.0015504322850448992, "loss": 2.0613, "step": 2618 }, { "epoch": 0.3352105465250224, "grad_norm": 0.58984375, "learning_rate": 0.0015500861237029487, "loss": 1.4455, "step": 2619 }, { "epoch": 0.33533853833354665, "grad_norm": 0.63671875, "learning_rate": 0.0015497398678197727, "loss": 1.5199, "step": 2620 }, { "epoch": 0.3354665301420709, "grad_norm": 0.65234375, "learning_rate": 0.0015493935174548813, "loss": 1.422, "step": 2621 }, { "epoch": 0.33559452195059514, "grad_norm": 0.90625, "learning_rate": 0.0015490470726678002, "loss": 2.3619, "step": 2622 }, { "epoch": 0.33572251375911943, "grad_norm": 0.7109375, "learning_rate": 0.0015487005335180712, "loss": 1.3433, "step": 2623 }, { "epoch": 0.3358505055676437, "grad_norm": 0.97265625, "learning_rate": 0.0015483539000652532, "loss": 2.2245, "step": 2624 }, { "epoch": 0.3359784973761679, "grad_norm": 0.640625, "learning_rate": 0.0015480071723689203, "loss": 2.0421, "step": 2625 }, { "epoch": 0.33610648918469216, "grad_norm": 0.61328125, "learning_rate": 0.0015476603504886637, "loss": 1.4549, "step": 2626 }, { "epoch": 0.33623448099321646, "grad_norm": 0.8125, "learning_rate": 0.0015473134344840902, "loss": 1.927, "step": 2627 }, { "epoch": 0.3363624728017407, "grad_norm": 0.625, "learning_rate": 0.0015469664244148233, "loss": 1.4221, "step": 2628 }, { "epoch": 0.33649046461026494, "grad_norm": 0.7265625, "learning_rate": 0.0015466193203405015, "loss": 2.0512, "step": 2629 }, { "epoch": 0.3366184564187892, "grad_norm": 0.70703125, "learning_rate": 0.0015462721223207807, "loss": 1.7954, "step": 2630 }, { "epoch": 0.3367464482273135, "grad_norm": 0.68359375, "learning_rate": 0.0015459248304153328, "loss": 1.753, "step": 2631 }, { "epoch": 0.3368744400358377, "grad_norm": 0.59765625, "learning_rate": 0.001545577444683845, "loss": 1.4968, "step": 2632 }, { "epoch": 0.33700243184436196, "grad_norm": 0.70703125, "learning_rate": 0.0015452299651860218, "loss": 1.9184, "step": 2633 }, { "epoch": 0.3371304236528862, "grad_norm": 0.66015625, "learning_rate": 0.0015448823919815829, "loss": 2.206, "step": 2634 }, { "epoch": 0.33725841546141044, "grad_norm": 0.62890625, "learning_rate": 0.0015445347251302642, "loss": 1.5815, "step": 2635 }, { "epoch": 0.33738640726993474, "grad_norm": 0.734375, "learning_rate": 0.0015441869646918179, "loss": 1.9161, "step": 2636 }, { "epoch": 0.337514399078459, "grad_norm": 0.6640625, "learning_rate": 0.0015438391107260125, "loss": 2.0598, "step": 2637 }, { "epoch": 0.3376423908869832, "grad_norm": 0.69140625, "learning_rate": 0.0015434911632926324, "loss": 1.7627, "step": 2638 }, { "epoch": 0.33777038269550747, "grad_norm": 0.89453125, "learning_rate": 0.0015431431224514779, "loss": 2.4773, "step": 2639 }, { "epoch": 0.33789837450403176, "grad_norm": 0.59765625, "learning_rate": 0.0015427949882623653, "loss": 1.8044, "step": 2640 }, { "epoch": 0.338026366312556, "grad_norm": 0.6640625, "learning_rate": 0.0015424467607851274, "loss": 1.7626, "step": 2641 }, { "epoch": 0.33815435812108025, "grad_norm": 0.73046875, "learning_rate": 0.0015420984400796125, "loss": 2.3309, "step": 2642 }, { "epoch": 0.3382823499296045, "grad_norm": 0.625, "learning_rate": 0.0015417500262056851, "loss": 1.7784, "step": 2643 }, { "epoch": 0.3384103417381288, "grad_norm": 0.5703125, "learning_rate": 0.0015414015192232262, "loss": 2.0481, "step": 2644 }, { "epoch": 0.338538333546653, "grad_norm": 0.88671875, "learning_rate": 0.001541052919192132, "loss": 2.2609, "step": 2645 }, { "epoch": 0.33866632535517727, "grad_norm": 0.69140625, "learning_rate": 0.001540704226172315, "loss": 2.1834, "step": 2646 }, { "epoch": 0.3387943171637015, "grad_norm": 0.5703125, "learning_rate": 0.001540355440223704, "loss": 1.4493, "step": 2647 }, { "epoch": 0.33892230897222575, "grad_norm": 0.7265625, "learning_rate": 0.0015400065614062438, "loss": 2.0324, "step": 2648 }, { "epoch": 0.33905030078075005, "grad_norm": 0.68359375, "learning_rate": 0.001539657589779894, "loss": 1.8173, "step": 2649 }, { "epoch": 0.3391782925892743, "grad_norm": 0.5703125, "learning_rate": 0.001539308525404632, "loss": 1.351, "step": 2650 }, { "epoch": 0.33930628439779853, "grad_norm": 0.78515625, "learning_rate": 0.0015389593683404498, "loss": 1.9675, "step": 2651 }, { "epoch": 0.33943427620632277, "grad_norm": 0.64453125, "learning_rate": 0.0015386101186473553, "loss": 1.6773, "step": 2652 }, { "epoch": 0.33956226801484707, "grad_norm": 0.67578125, "learning_rate": 0.0015382607763853736, "loss": 1.9425, "step": 2653 }, { "epoch": 0.3396902598233713, "grad_norm": 0.71484375, "learning_rate": 0.0015379113416145438, "loss": 1.9117, "step": 2654 }, { "epoch": 0.33981825163189555, "grad_norm": 0.65234375, "learning_rate": 0.0015375618143949228, "loss": 1.9229, "step": 2655 }, { "epoch": 0.3399462434404198, "grad_norm": 0.66015625, "learning_rate": 0.001537212194786582, "loss": 2.0164, "step": 2656 }, { "epoch": 0.3400742352489441, "grad_norm": 0.76953125, "learning_rate": 0.0015368624828496096, "loss": 1.8742, "step": 2657 }, { "epoch": 0.34020222705746833, "grad_norm": 0.69140625, "learning_rate": 0.0015365126786441087, "loss": 1.9559, "step": 2658 }, { "epoch": 0.3403302188659926, "grad_norm": 0.58203125, "learning_rate": 0.0015361627822301997, "loss": 1.3905, "step": 2659 }, { "epoch": 0.3404582106745168, "grad_norm": 0.71484375, "learning_rate": 0.001535812793668017, "loss": 2.0236, "step": 2660 }, { "epoch": 0.3405862024830411, "grad_norm": 0.8125, "learning_rate": 0.0015354627130177124, "loss": 1.9235, "step": 2661 }, { "epoch": 0.34071419429156535, "grad_norm": 0.578125, "learning_rate": 0.001535112540339453, "loss": 1.4778, "step": 2662 }, { "epoch": 0.3408421861000896, "grad_norm": 0.703125, "learning_rate": 0.0015347622756934213, "loss": 1.9269, "step": 2663 }, { "epoch": 0.34097017790861384, "grad_norm": 0.734375, "learning_rate": 0.0015344119191398162, "loss": 1.7719, "step": 2664 }, { "epoch": 0.3410981697171381, "grad_norm": 0.58203125, "learning_rate": 0.0015340614707388517, "loss": 1.7263, "step": 2665 }, { "epoch": 0.3412261615256624, "grad_norm": 0.6484375, "learning_rate": 0.0015337109305507593, "loss": 1.9125, "step": 2666 }, { "epoch": 0.3413541533341866, "grad_norm": 0.58203125, "learning_rate": 0.0015333602986357833, "loss": 1.5131, "step": 2667 }, { "epoch": 0.34148214514271086, "grad_norm": 0.8125, "learning_rate": 0.001533009575054187, "loss": 2.1511, "step": 2668 }, { "epoch": 0.3416101369512351, "grad_norm": 0.65625, "learning_rate": 0.0015326587598662466, "loss": 1.4644, "step": 2669 }, { "epoch": 0.3417381287597594, "grad_norm": 0.8046875, "learning_rate": 0.0015323078531322568, "loss": 1.684, "step": 2670 }, { "epoch": 0.34186612056828364, "grad_norm": 0.671875, "learning_rate": 0.0015319568549125254, "loss": 2.244, "step": 2671 }, { "epoch": 0.3419941123768079, "grad_norm": 0.76171875, "learning_rate": 0.0015316057652673776, "loss": 2.4634, "step": 2672 }, { "epoch": 0.3421221041853321, "grad_norm": 0.78125, "learning_rate": 0.0015312545842571538, "loss": 1.6878, "step": 2673 }, { "epoch": 0.3422500959938564, "grad_norm": 0.73046875, "learning_rate": 0.0015309033119422107, "loss": 1.876, "step": 2674 }, { "epoch": 0.34237808780238066, "grad_norm": 0.69921875, "learning_rate": 0.0015305519483829193, "loss": 1.8942, "step": 2675 }, { "epoch": 0.3425060796109049, "grad_norm": 0.97265625, "learning_rate": 0.0015302004936396673, "loss": 1.7484, "step": 2676 }, { "epoch": 0.34263407141942914, "grad_norm": 0.7109375, "learning_rate": 0.0015298489477728586, "loss": 1.8411, "step": 2677 }, { "epoch": 0.3427620632279534, "grad_norm": 0.6328125, "learning_rate": 0.0015294973108429112, "loss": 1.6062, "step": 2678 }, { "epoch": 0.3428900550364777, "grad_norm": 1.046875, "learning_rate": 0.0015291455829102602, "loss": 2.3653, "step": 2679 }, { "epoch": 0.3430180468450019, "grad_norm": 0.64453125, "learning_rate": 0.0015287937640353552, "loss": 1.5391, "step": 2680 }, { "epoch": 0.34314603865352616, "grad_norm": 0.73828125, "learning_rate": 0.0015284418542786625, "loss": 1.8314, "step": 2681 }, { "epoch": 0.3432740304620504, "grad_norm": 0.66015625, "learning_rate": 0.0015280898537006634, "loss": 1.9002, "step": 2682 }, { "epoch": 0.3434020222705747, "grad_norm": 0.62109375, "learning_rate": 0.0015277377623618548, "loss": 1.7858, "step": 2683 }, { "epoch": 0.34353001407909894, "grad_norm": 0.6328125, "learning_rate": 0.0015273855803227493, "loss": 1.5348, "step": 2684 }, { "epoch": 0.3436580058876232, "grad_norm": 0.6796875, "learning_rate": 0.0015270333076438747, "loss": 1.8471, "step": 2685 }, { "epoch": 0.3437859976961474, "grad_norm": 0.6796875, "learning_rate": 0.0015266809443857756, "loss": 1.9142, "step": 2686 }, { "epoch": 0.3439139895046717, "grad_norm": 0.87109375, "learning_rate": 0.0015263284906090106, "loss": 2.1114, "step": 2687 }, { "epoch": 0.34404198131319597, "grad_norm": 0.67578125, "learning_rate": 0.0015259759463741554, "loss": 2.1347, "step": 2688 }, { "epoch": 0.3441699731217202, "grad_norm": 0.8125, "learning_rate": 0.0015256233117418, "loss": 1.7064, "step": 2689 }, { "epoch": 0.34429796493024445, "grad_norm": 1.1015625, "learning_rate": 0.00152527058677255, "loss": 1.3559, "step": 2690 }, { "epoch": 0.34442595673876875, "grad_norm": 0.703125, "learning_rate": 0.0015249177715270276, "loss": 1.7366, "step": 2691 }, { "epoch": 0.344553948547293, "grad_norm": 0.7421875, "learning_rate": 0.001524564866065869, "loss": 1.9508, "step": 2692 }, { "epoch": 0.34468194035581723, "grad_norm": 0.58984375, "learning_rate": 0.0015242118704497276, "loss": 1.2293, "step": 2693 }, { "epoch": 0.34480993216434147, "grad_norm": 0.87109375, "learning_rate": 0.001523858784739271, "loss": 1.5188, "step": 2694 }, { "epoch": 0.3449379239728657, "grad_norm": 0.72265625, "learning_rate": 0.001523505608995183, "loss": 2.4958, "step": 2695 }, { "epoch": 0.34506591578139, "grad_norm": 0.90625, "learning_rate": 0.001523152343278162, "loss": 2.0149, "step": 2696 }, { "epoch": 0.34519390758991425, "grad_norm": 0.734375, "learning_rate": 0.0015227989876489226, "loss": 1.9293, "step": 2697 }, { "epoch": 0.3453218993984385, "grad_norm": 0.5859375, "learning_rate": 0.001522445542168195, "loss": 1.8729, "step": 2698 }, { "epoch": 0.34544989120696273, "grad_norm": 0.92578125, "learning_rate": 0.001522092006896725, "loss": 2.6173, "step": 2699 }, { "epoch": 0.34557788301548703, "grad_norm": 0.56640625, "learning_rate": 0.0015217383818952723, "loss": 1.7474, "step": 2700 }, { "epoch": 0.34570587482401127, "grad_norm": 0.62890625, "learning_rate": 0.0015213846672246139, "loss": 1.6723, "step": 2701 }, { "epoch": 0.3458338666325355, "grad_norm": 0.75390625, "learning_rate": 0.0015210308629455408, "loss": 1.8196, "step": 2702 }, { "epoch": 0.34596185844105976, "grad_norm": 0.6328125, "learning_rate": 0.0015206769691188601, "loss": 1.8093, "step": 2703 }, { "epoch": 0.34608985024958405, "grad_norm": 0.67578125, "learning_rate": 0.001520322985805395, "loss": 1.6877, "step": 2704 }, { "epoch": 0.3462178420581083, "grad_norm": 0.74609375, "learning_rate": 0.0015199689130659822, "loss": 1.9527, "step": 2705 }, { "epoch": 0.34634583386663254, "grad_norm": 0.734375, "learning_rate": 0.0015196147509614756, "loss": 1.9702, "step": 2706 }, { "epoch": 0.3464738256751568, "grad_norm": 0.6953125, "learning_rate": 0.0015192604995527433, "loss": 1.997, "step": 2707 }, { "epoch": 0.346601817483681, "grad_norm": 0.6640625, "learning_rate": 0.001518906158900669, "loss": 1.9782, "step": 2708 }, { "epoch": 0.3467298092922053, "grad_norm": 0.60546875, "learning_rate": 0.0015185517290661526, "loss": 1.4331, "step": 2709 }, { "epoch": 0.34685780110072956, "grad_norm": 0.74609375, "learning_rate": 0.0015181972101101083, "loss": 1.6338, "step": 2710 }, { "epoch": 0.3469857929092538, "grad_norm": 0.6015625, "learning_rate": 0.0015178426020934653, "loss": 1.7964, "step": 2711 }, { "epoch": 0.34711378471777804, "grad_norm": 0.890625, "learning_rate": 0.0015174879050771698, "loss": 1.9825, "step": 2712 }, { "epoch": 0.34724177652630234, "grad_norm": 0.8203125, "learning_rate": 0.0015171331191221812, "loss": 1.7324, "step": 2713 }, { "epoch": 0.3473697683348266, "grad_norm": 0.6640625, "learning_rate": 0.0015167782442894757, "loss": 1.8582, "step": 2714 }, { "epoch": 0.3474977601433508, "grad_norm": 0.67578125, "learning_rate": 0.0015164232806400447, "loss": 2.2377, "step": 2715 }, { "epoch": 0.34762575195187506, "grad_norm": 0.703125, "learning_rate": 0.0015160682282348938, "loss": 1.9394, "step": 2716 }, { "epoch": 0.34775374376039936, "grad_norm": 0.703125, "learning_rate": 0.001515713087135045, "loss": 2.2358, "step": 2717 }, { "epoch": 0.3478817355689236, "grad_norm": 0.6796875, "learning_rate": 0.0015153578574015347, "loss": 2.3023, "step": 2718 }, { "epoch": 0.34800972737744784, "grad_norm": 0.59765625, "learning_rate": 0.0015150025390954151, "loss": 1.5012, "step": 2719 }, { "epoch": 0.3481377191859721, "grad_norm": 0.65234375, "learning_rate": 0.0015146471322777532, "loss": 1.8386, "step": 2720 }, { "epoch": 0.3482657109944963, "grad_norm": 0.625, "learning_rate": 0.0015142916370096322, "loss": 1.533, "step": 2721 }, { "epoch": 0.3483937028030206, "grad_norm": 0.640625, "learning_rate": 0.0015139360533521487, "loss": 1.5585, "step": 2722 }, { "epoch": 0.34852169461154486, "grad_norm": 0.64453125, "learning_rate": 0.0015135803813664157, "loss": 1.312, "step": 2723 }, { "epoch": 0.3486496864200691, "grad_norm": 0.6484375, "learning_rate": 0.001513224621113562, "loss": 2.0215, "step": 2724 }, { "epoch": 0.34877767822859335, "grad_norm": 0.6328125, "learning_rate": 0.0015128687726547296, "loss": 1.3335, "step": 2725 }, { "epoch": 0.34890567003711764, "grad_norm": 0.61328125, "learning_rate": 0.001512512836051078, "loss": 1.4538, "step": 2726 }, { "epoch": 0.3490336618456419, "grad_norm": 0.66015625, "learning_rate": 0.0015121568113637795, "loss": 1.5904, "step": 2727 }, { "epoch": 0.3491616536541661, "grad_norm": 0.67578125, "learning_rate": 0.001511800698654024, "loss": 1.5402, "step": 2728 }, { "epoch": 0.34928964546269037, "grad_norm": 0.625, "learning_rate": 0.0015114444979830144, "loss": 1.7386, "step": 2729 }, { "epoch": 0.34941763727121466, "grad_norm": 0.671875, "learning_rate": 0.0015110882094119696, "loss": 1.9228, "step": 2730 }, { "epoch": 0.3495456290797389, "grad_norm": 0.859375, "learning_rate": 0.0015107318330021236, "loss": 1.962, "step": 2731 }, { "epoch": 0.34967362088826315, "grad_norm": 0.58984375, "learning_rate": 0.001510375368814726, "loss": 1.2769, "step": 2732 }, { "epoch": 0.3498016126967874, "grad_norm": 0.6796875, "learning_rate": 0.0015100188169110402, "loss": 2.0172, "step": 2733 }, { "epoch": 0.3499296045053117, "grad_norm": 0.8671875, "learning_rate": 0.001509662177352346, "loss": 1.4616, "step": 2734 }, { "epoch": 0.35005759631383593, "grad_norm": 0.6953125, "learning_rate": 0.0015093054501999375, "loss": 1.592, "step": 2735 }, { "epoch": 0.35018558812236017, "grad_norm": 0.62890625, "learning_rate": 0.0015089486355151238, "loss": 1.8327, "step": 2736 }, { "epoch": 0.3503135799308844, "grad_norm": 0.68359375, "learning_rate": 0.0015085917333592298, "loss": 1.7719, "step": 2737 }, { "epoch": 0.35044157173940865, "grad_norm": 0.6796875, "learning_rate": 0.0015082347437935944, "loss": 1.824, "step": 2738 }, { "epoch": 0.35056956354793295, "grad_norm": 0.88671875, "learning_rate": 0.0015078776668795725, "loss": 2.6873, "step": 2739 }, { "epoch": 0.3506975553564572, "grad_norm": 0.8203125, "learning_rate": 0.001507520502678533, "loss": 2.5574, "step": 2740 }, { "epoch": 0.35082554716498143, "grad_norm": 0.828125, "learning_rate": 0.001507163251251861, "loss": 1.706, "step": 2741 }, { "epoch": 0.3509535389735057, "grad_norm": 0.6484375, "learning_rate": 0.0015068059126609557, "loss": 1.4211, "step": 2742 }, { "epoch": 0.35108153078202997, "grad_norm": 0.671875, "learning_rate": 0.0015064484869672316, "loss": 1.6856, "step": 2743 }, { "epoch": 0.3512095225905542, "grad_norm": 0.5859375, "learning_rate": 0.0015060909742321176, "loss": 1.3952, "step": 2744 }, { "epoch": 0.35133751439907845, "grad_norm": 0.6484375, "learning_rate": 0.0015057333745170588, "loss": 1.8818, "step": 2745 }, { "epoch": 0.3514655062076027, "grad_norm": 0.5703125, "learning_rate": 0.0015053756878835142, "loss": 1.6038, "step": 2746 }, { "epoch": 0.351593498016127, "grad_norm": 0.7890625, "learning_rate": 0.0015050179143929578, "loss": 1.694, "step": 2747 }, { "epoch": 0.35172148982465123, "grad_norm": 0.625, "learning_rate": 0.0015046600541068792, "loss": 1.8077, "step": 2748 }, { "epoch": 0.3518494816331755, "grad_norm": 0.59765625, "learning_rate": 0.0015043021070867825, "loss": 1.8925, "step": 2749 }, { "epoch": 0.3519774734416997, "grad_norm": 0.66796875, "learning_rate": 0.0015039440733941862, "loss": 1.3568, "step": 2750 }, { "epoch": 0.35210546525022396, "grad_norm": 0.77734375, "learning_rate": 0.0015035859530906246, "loss": 1.8097, "step": 2751 }, { "epoch": 0.35223345705874826, "grad_norm": 0.70703125, "learning_rate": 0.0015032277462376468, "loss": 1.8094, "step": 2752 }, { "epoch": 0.3523614488672725, "grad_norm": 1.640625, "learning_rate": 0.001502869452896815, "loss": 1.7377, "step": 2753 }, { "epoch": 0.35248944067579674, "grad_norm": 0.65234375, "learning_rate": 0.00150251107312971, "loss": 1.7214, "step": 2754 }, { "epoch": 0.352617432484321, "grad_norm": 0.6171875, "learning_rate": 0.0015021526069979231, "loss": 1.1805, "step": 2755 }, { "epoch": 0.3527454242928453, "grad_norm": 0.74609375, "learning_rate": 0.0015017940545630638, "loss": 1.9942, "step": 2756 }, { "epoch": 0.3528734161013695, "grad_norm": 0.73828125, "learning_rate": 0.0015014354158867547, "loss": 2.0538, "step": 2757 }, { "epoch": 0.35300140790989376, "grad_norm": 0.84375, "learning_rate": 0.0015010766910306338, "loss": 1.4594, "step": 2758 }, { "epoch": 0.353129399718418, "grad_norm": 0.640625, "learning_rate": 0.0015007178800563535, "loss": 1.7864, "step": 2759 }, { "epoch": 0.3532573915269423, "grad_norm": 0.62890625, "learning_rate": 0.0015003589830255813, "loss": 1.6187, "step": 2760 }, { "epoch": 0.35338538333546654, "grad_norm": 0.6953125, "learning_rate": 0.0015, "loss": 1.5381, "step": 2761 }, { "epoch": 0.3535133751439908, "grad_norm": 0.5625, "learning_rate": 0.0014996409310413061, "loss": 1.3154, "step": 2762 }, { "epoch": 0.353641366952515, "grad_norm": 0.67578125, "learning_rate": 0.0014992817762112122, "loss": 1.6695, "step": 2763 }, { "epoch": 0.3537693587610393, "grad_norm": 0.74609375, "learning_rate": 0.0014989225355714435, "loss": 2.2739, "step": 2764 }, { "epoch": 0.35389735056956356, "grad_norm": 0.66015625, "learning_rate": 0.0014985632091837427, "loss": 1.446, "step": 2765 }, { "epoch": 0.3540253423780878, "grad_norm": 0.6796875, "learning_rate": 0.0014982037971098651, "loss": 2.0922, "step": 2766 }, { "epoch": 0.35415333418661205, "grad_norm": 0.7578125, "learning_rate": 0.001497844299411582, "loss": 2.176, "step": 2767 }, { "epoch": 0.3542813259951363, "grad_norm": 0.62890625, "learning_rate": 0.0014974847161506786, "loss": 1.7577, "step": 2768 }, { "epoch": 0.3544093178036606, "grad_norm": 0.6171875, "learning_rate": 0.0014971250473889551, "loss": 1.7986, "step": 2769 }, { "epoch": 0.3545373096121848, "grad_norm": 0.578125, "learning_rate": 0.001496765293188226, "loss": 1.4225, "step": 2770 }, { "epoch": 0.35466530142070907, "grad_norm": 0.59375, "learning_rate": 0.001496405453610322, "loss": 1.4218, "step": 2771 }, { "epoch": 0.3547932932292333, "grad_norm": 0.71875, "learning_rate": 0.0014960455287170867, "loss": 2.0929, "step": 2772 }, { "epoch": 0.3549212850377576, "grad_norm": 0.5625, "learning_rate": 0.0014956855185703785, "loss": 1.6467, "step": 2773 }, { "epoch": 0.35504927684628185, "grad_norm": 0.7890625, "learning_rate": 0.0014953254232320723, "loss": 1.4599, "step": 2774 }, { "epoch": 0.3551772686548061, "grad_norm": 0.734375, "learning_rate": 0.001494965242764055, "loss": 2.088, "step": 2775 }, { "epoch": 0.35530526046333033, "grad_norm": 0.62890625, "learning_rate": 0.0014946049772282303, "loss": 1.8337, "step": 2776 }, { "epoch": 0.3554332522718546, "grad_norm": 0.90234375, "learning_rate": 0.0014942446266865152, "loss": 1.5227, "step": 2777 }, { "epoch": 0.35556124408037887, "grad_norm": 0.62109375, "learning_rate": 0.0014938841912008423, "loss": 1.5772, "step": 2778 }, { "epoch": 0.3556892358889031, "grad_norm": 0.68359375, "learning_rate": 0.0014935236708331582, "loss": 1.6489, "step": 2779 }, { "epoch": 0.35581722769742735, "grad_norm": 0.9765625, "learning_rate": 0.0014931630656454234, "loss": 1.93, "step": 2780 }, { "epoch": 0.3559452195059516, "grad_norm": 0.66796875, "learning_rate": 0.0014928023756996145, "loss": 2.2433, "step": 2781 }, { "epoch": 0.3560732113144759, "grad_norm": 0.66796875, "learning_rate": 0.0014924416010577218, "loss": 1.7999, "step": 2782 }, { "epoch": 0.35620120312300013, "grad_norm": 0.68359375, "learning_rate": 0.0014920807417817504, "loss": 1.908, "step": 2783 }, { "epoch": 0.3563291949315244, "grad_norm": 0.66015625, "learning_rate": 0.0014917197979337194, "loss": 2.1507, "step": 2784 }, { "epoch": 0.3564571867400486, "grad_norm": 0.6953125, "learning_rate": 0.001491358769575663, "loss": 1.4236, "step": 2785 }, { "epoch": 0.3565851785485729, "grad_norm": 0.64453125, "learning_rate": 0.0014909976567696302, "loss": 1.8716, "step": 2786 }, { "epoch": 0.35671317035709715, "grad_norm": 0.703125, "learning_rate": 0.0014906364595776832, "loss": 1.7736, "step": 2787 }, { "epoch": 0.3568411621656214, "grad_norm": 0.7421875, "learning_rate": 0.0014902751780619008, "loss": 1.8803, "step": 2788 }, { "epoch": 0.35696915397414564, "grad_norm": 0.61328125, "learning_rate": 0.0014899138122843741, "loss": 1.388, "step": 2789 }, { "epoch": 0.35709714578266993, "grad_norm": 0.68359375, "learning_rate": 0.0014895523623072102, "loss": 2.3306, "step": 2790 }, { "epoch": 0.3572251375911942, "grad_norm": 0.65625, "learning_rate": 0.00148919082819253, "loss": 1.6838, "step": 2791 }, { "epoch": 0.3573531293997184, "grad_norm": 0.79296875, "learning_rate": 0.0014888292100024686, "loss": 2.4202, "step": 2792 }, { "epoch": 0.35748112120824266, "grad_norm": 0.71484375, "learning_rate": 0.0014884675077991765, "loss": 1.8399, "step": 2793 }, { "epoch": 0.3576091130167669, "grad_norm": 0.62890625, "learning_rate": 0.001488105721644818, "loss": 2.1898, "step": 2794 }, { "epoch": 0.3577371048252912, "grad_norm": 0.482421875, "learning_rate": 0.001487743851601572, "loss": 1.0444, "step": 2795 }, { "epoch": 0.35786509663381544, "grad_norm": 0.6484375, "learning_rate": 0.0014873818977316313, "loss": 1.7961, "step": 2796 }, { "epoch": 0.3579930884423397, "grad_norm": 0.625, "learning_rate": 0.0014870198600972042, "loss": 1.4775, "step": 2797 }, { "epoch": 0.3581210802508639, "grad_norm": 0.64453125, "learning_rate": 0.0014866577387605117, "loss": 1.676, "step": 2798 }, { "epoch": 0.3582490720593882, "grad_norm": 0.65625, "learning_rate": 0.0014862955337837916, "loss": 1.8587, "step": 2799 }, { "epoch": 0.35837706386791246, "grad_norm": 0.80078125, "learning_rate": 0.0014859332452292935, "loss": 1.9806, "step": 2800 }, { "epoch": 0.3585050556764367, "grad_norm": 0.6328125, "learning_rate": 0.0014855708731592835, "loss": 1.9042, "step": 2801 }, { "epoch": 0.35863304748496094, "grad_norm": 0.6015625, "learning_rate": 0.0014852084176360403, "loss": 1.1844, "step": 2802 }, { "epoch": 0.35876103929348524, "grad_norm": 0.69921875, "learning_rate": 0.0014848458787218581, "loss": 1.8746, "step": 2803 }, { "epoch": 0.3588890311020095, "grad_norm": 0.765625, "learning_rate": 0.0014844832564790455, "loss": 1.6459, "step": 2804 }, { "epoch": 0.3590170229105337, "grad_norm": 0.72265625, "learning_rate": 0.0014841205509699242, "loss": 1.6936, "step": 2805 }, { "epoch": 0.35914501471905796, "grad_norm": 0.64453125, "learning_rate": 0.001483757762256832, "loss": 1.7737, "step": 2806 }, { "epoch": 0.35927300652758226, "grad_norm": 0.80078125, "learning_rate": 0.001483394890402119, "loss": 1.8089, "step": 2807 }, { "epoch": 0.3594009983361065, "grad_norm": 0.57421875, "learning_rate": 0.0014830319354681514, "loss": 1.3573, "step": 2808 }, { "epoch": 0.35952899014463074, "grad_norm": 0.84375, "learning_rate": 0.0014826688975173084, "loss": 1.9314, "step": 2809 }, { "epoch": 0.359656981953155, "grad_norm": 0.6640625, "learning_rate": 0.0014823057766119845, "loss": 1.8584, "step": 2810 }, { "epoch": 0.3597849737616792, "grad_norm": 0.796875, "learning_rate": 0.0014819425728145874, "loss": 1.916, "step": 2811 }, { "epoch": 0.3599129655702035, "grad_norm": 0.703125, "learning_rate": 0.0014815792861875398, "loss": 1.9193, "step": 2812 }, { "epoch": 0.36004095737872777, "grad_norm": 0.79296875, "learning_rate": 0.0014812159167932784, "loss": 1.492, "step": 2813 }, { "epoch": 0.360168949187252, "grad_norm": 0.671875, "learning_rate": 0.0014808524646942537, "loss": 1.7099, "step": 2814 }, { "epoch": 0.36029694099577625, "grad_norm": 0.6796875, "learning_rate": 0.0014804889299529318, "loss": 1.3755, "step": 2815 }, { "epoch": 0.36042493280430055, "grad_norm": 0.671875, "learning_rate": 0.0014801253126317912, "loss": 1.3531, "step": 2816 }, { "epoch": 0.3605529246128248, "grad_norm": 0.59375, "learning_rate": 0.0014797616127933259, "loss": 1.3387, "step": 2817 }, { "epoch": 0.36068091642134903, "grad_norm": 0.640625, "learning_rate": 0.001479397830500043, "loss": 1.8097, "step": 2818 }, { "epoch": 0.36080890822987327, "grad_norm": 0.73046875, "learning_rate": 0.0014790339658144651, "loss": 2.1758, "step": 2819 }, { "epoch": 0.36093690003839757, "grad_norm": 0.65625, "learning_rate": 0.001478670018799128, "loss": 1.6263, "step": 2820 }, { "epoch": 0.3610648918469218, "grad_norm": 0.6875, "learning_rate": 0.0014783059895165818, "loss": 1.3112, "step": 2821 }, { "epoch": 0.36119288365544605, "grad_norm": 0.625, "learning_rate": 0.001477941878029391, "loss": 1.6563, "step": 2822 }, { "epoch": 0.3613208754639703, "grad_norm": 0.76953125, "learning_rate": 0.0014775776844001339, "loss": 1.6563, "step": 2823 }, { "epoch": 0.36144886727249453, "grad_norm": 0.59375, "learning_rate": 0.001477213408691403, "loss": 1.5769, "step": 2824 }, { "epoch": 0.36157685908101883, "grad_norm": 0.6484375, "learning_rate": 0.0014768490509658053, "loss": 1.706, "step": 2825 }, { "epoch": 0.3617048508895431, "grad_norm": 0.7421875, "learning_rate": 0.0014764846112859614, "loss": 1.8487, "step": 2826 }, { "epoch": 0.3618328426980673, "grad_norm": 0.6796875, "learning_rate": 0.0014761200897145063, "loss": 1.7212, "step": 2827 }, { "epoch": 0.36196083450659156, "grad_norm": 0.6171875, "learning_rate": 0.0014757554863140884, "loss": 1.3699, "step": 2828 }, { "epoch": 0.36208882631511585, "grad_norm": 0.69140625, "learning_rate": 0.0014753908011473716, "loss": 1.4182, "step": 2829 }, { "epoch": 0.3622168181236401, "grad_norm": 0.51171875, "learning_rate": 0.0014750260342770328, "loss": 1.0392, "step": 2830 }, { "epoch": 0.36234480993216434, "grad_norm": 0.60546875, "learning_rate": 0.0014746611857657623, "loss": 1.8387, "step": 2831 }, { "epoch": 0.3624728017406886, "grad_norm": 0.6015625, "learning_rate": 0.0014742962556762657, "loss": 1.6684, "step": 2832 }, { "epoch": 0.3626007935492129, "grad_norm": 0.6953125, "learning_rate": 0.0014739312440712627, "loss": 1.6874, "step": 2833 }, { "epoch": 0.3627287853577371, "grad_norm": 0.6171875, "learning_rate": 0.0014735661510134857, "loss": 1.5708, "step": 2834 }, { "epoch": 0.36285677716626136, "grad_norm": 0.6171875, "learning_rate": 0.0014732009765656823, "loss": 1.3741, "step": 2835 }, { "epoch": 0.3629847689747856, "grad_norm": 0.7734375, "learning_rate": 0.0014728357207906132, "loss": 2.1666, "step": 2836 }, { "epoch": 0.3631127607833099, "grad_norm": 0.68359375, "learning_rate": 0.001472470383751054, "loss": 1.8469, "step": 2837 }, { "epoch": 0.36324075259183414, "grad_norm": 0.640625, "learning_rate": 0.0014721049655097935, "loss": 1.5226, "step": 2838 }, { "epoch": 0.3633687444003584, "grad_norm": 0.6015625, "learning_rate": 0.0014717394661296352, "loss": 1.5178, "step": 2839 }, { "epoch": 0.3634967362088826, "grad_norm": 0.6953125, "learning_rate": 0.0014713738856733955, "loss": 2.08, "step": 2840 }, { "epoch": 0.36362472801740686, "grad_norm": 0.5546875, "learning_rate": 0.001471008224203906, "loss": 1.4106, "step": 2841 }, { "epoch": 0.36375271982593116, "grad_norm": 0.85546875, "learning_rate": 0.001470642481784011, "loss": 1.6534, "step": 2842 }, { "epoch": 0.3638807116344554, "grad_norm": 0.76953125, "learning_rate": 0.0014702766584765692, "loss": 2.2105, "step": 2843 }, { "epoch": 0.36400870344297964, "grad_norm": 0.63671875, "learning_rate": 0.0014699107543444537, "loss": 1.7547, "step": 2844 }, { "epoch": 0.3641366952515039, "grad_norm": 0.7890625, "learning_rate": 0.001469544769450551, "loss": 2.2604, "step": 2845 }, { "epoch": 0.3642646870600282, "grad_norm": 0.56640625, "learning_rate": 0.0014691787038577614, "loss": 1.2612, "step": 2846 }, { "epoch": 0.3643926788685524, "grad_norm": 0.76953125, "learning_rate": 0.001468812557628999, "loss": 1.4258, "step": 2847 }, { "epoch": 0.36452067067707666, "grad_norm": 0.546875, "learning_rate": 0.001468446330827192, "loss": 1.2685, "step": 2848 }, { "epoch": 0.3646486624856009, "grad_norm": 0.84765625, "learning_rate": 0.0014680800235152831, "loss": 1.8224, "step": 2849 }, { "epoch": 0.3647766542941252, "grad_norm": 0.703125, "learning_rate": 0.0014677136357562274, "loss": 2.077, "step": 2850 }, { "epoch": 0.36490464610264944, "grad_norm": 0.61328125, "learning_rate": 0.001467347167612995, "loss": 1.6233, "step": 2851 }, { "epoch": 0.3650326379111737, "grad_norm": 0.64453125, "learning_rate": 0.0014669806191485695, "loss": 1.3085, "step": 2852 }, { "epoch": 0.3651606297196979, "grad_norm": 0.77734375, "learning_rate": 0.0014666139904259477, "loss": 1.4167, "step": 2853 }, { "epoch": 0.36528862152822217, "grad_norm": 0.70703125, "learning_rate": 0.0014662472815081407, "loss": 2.3168, "step": 2854 }, { "epoch": 0.36541661333674647, "grad_norm": 0.90625, "learning_rate": 0.0014658804924581738, "loss": 1.956, "step": 2855 }, { "epoch": 0.3655446051452707, "grad_norm": 0.6640625, "learning_rate": 0.0014655136233390857, "loss": 1.8016, "step": 2856 }, { "epoch": 0.36567259695379495, "grad_norm": 0.76171875, "learning_rate": 0.0014651466742139282, "loss": 1.9128, "step": 2857 }, { "epoch": 0.3658005887623192, "grad_norm": 0.9609375, "learning_rate": 0.0014647796451457682, "loss": 3.0111, "step": 2858 }, { "epoch": 0.3659285805708435, "grad_norm": 0.69140625, "learning_rate": 0.0014644125361976848, "loss": 1.6864, "step": 2859 }, { "epoch": 0.36605657237936773, "grad_norm": 0.71484375, "learning_rate": 0.0014640453474327724, "loss": 2.0294, "step": 2860 }, { "epoch": 0.36618456418789197, "grad_norm": 0.78125, "learning_rate": 0.0014636780789141382, "loss": 2.0022, "step": 2861 }, { "epoch": 0.3663125559964162, "grad_norm": 0.67578125, "learning_rate": 0.0014633107307049026, "loss": 1.7574, "step": 2862 }, { "epoch": 0.3664405478049405, "grad_norm": 0.72265625, "learning_rate": 0.0014629433028682014, "loss": 2.0888, "step": 2863 }, { "epoch": 0.36656853961346475, "grad_norm": 0.6640625, "learning_rate": 0.0014625757954671822, "loss": 1.6688, "step": 2864 }, { "epoch": 0.366696531421989, "grad_norm": 0.6875, "learning_rate": 0.001462208208565007, "loss": 1.8058, "step": 2865 }, { "epoch": 0.36682452323051323, "grad_norm": 0.7265625, "learning_rate": 0.001461840542224852, "loss": 2.2845, "step": 2866 }, { "epoch": 0.3669525150390375, "grad_norm": 0.640625, "learning_rate": 0.0014614727965099072, "loss": 1.5955, "step": 2867 }, { "epoch": 0.36708050684756177, "grad_norm": 0.76953125, "learning_rate": 0.0014611049714833746, "loss": 2.1679, "step": 2868 }, { "epoch": 0.367208498656086, "grad_norm": 0.78515625, "learning_rate": 0.0014607370672084716, "loss": 2.7993, "step": 2869 }, { "epoch": 0.36733649046461025, "grad_norm": 0.65625, "learning_rate": 0.0014603690837484279, "loss": 1.5055, "step": 2870 }, { "epoch": 0.3674644822731345, "grad_norm": 0.59375, "learning_rate": 0.0014600010211664877, "loss": 0.8357, "step": 2871 }, { "epoch": 0.3675924740816588, "grad_norm": 0.64453125, "learning_rate": 0.001459632879525909, "loss": 1.3974, "step": 2872 }, { "epoch": 0.36772046589018303, "grad_norm": 0.703125, "learning_rate": 0.0014592646588899624, "loss": 2.0441, "step": 2873 }, { "epoch": 0.3678484576987073, "grad_norm": 0.78515625, "learning_rate": 0.0014588963593219326, "loss": 1.48, "step": 2874 }, { "epoch": 0.3679764495072315, "grad_norm": 0.73046875, "learning_rate": 0.001458527980885118, "loss": 1.8019, "step": 2875 }, { "epoch": 0.3681044413157558, "grad_norm": 0.69140625, "learning_rate": 0.0014581595236428302, "loss": 1.6273, "step": 2876 }, { "epoch": 0.36823243312428006, "grad_norm": 0.9609375, "learning_rate": 0.001457790987658395, "loss": 1.6814, "step": 2877 }, { "epoch": 0.3683604249328043, "grad_norm": 0.5703125, "learning_rate": 0.001457422372995151, "loss": 1.3315, "step": 2878 }, { "epoch": 0.36848841674132854, "grad_norm": 0.6953125, "learning_rate": 0.0014570536797164507, "loss": 2.1558, "step": 2879 }, { "epoch": 0.36861640854985284, "grad_norm": 0.70703125, "learning_rate": 0.0014566849078856596, "loss": 1.4502, "step": 2880 }, { "epoch": 0.3687444003583771, "grad_norm": 0.7109375, "learning_rate": 0.001456316057566158, "loss": 1.873, "step": 2881 }, { "epoch": 0.3688723921669013, "grad_norm": 0.6328125, "learning_rate": 0.0014559471288213377, "loss": 1.4422, "step": 2882 }, { "epoch": 0.36900038397542556, "grad_norm": 0.56640625, "learning_rate": 0.0014555781217146062, "loss": 1.4231, "step": 2883 }, { "epoch": 0.3691283757839498, "grad_norm": 0.734375, "learning_rate": 0.0014552090363093826, "loss": 1.6406, "step": 2884 }, { "epoch": 0.3692563675924741, "grad_norm": 0.578125, "learning_rate": 0.0014548398726691005, "loss": 0.7916, "step": 2885 }, { "epoch": 0.36938435940099834, "grad_norm": 0.5859375, "learning_rate": 0.0014544706308572066, "loss": 1.3991, "step": 2886 }, { "epoch": 0.3695123512095226, "grad_norm": 0.5078125, "learning_rate": 0.001454101310937161, "loss": 1.1445, "step": 2887 }, { "epoch": 0.3696403430180468, "grad_norm": 0.5625, "learning_rate": 0.0014537319129724377, "loss": 1.302, "step": 2888 }, { "epoch": 0.3697683348265711, "grad_norm": 0.65625, "learning_rate": 0.0014533624370265232, "loss": 1.5098, "step": 2889 }, { "epoch": 0.36989632663509536, "grad_norm": 0.62890625, "learning_rate": 0.0014529928831629184, "loss": 1.7258, "step": 2890 }, { "epoch": 0.3700243184436196, "grad_norm": 0.734375, "learning_rate": 0.0014526232514451367, "loss": 2.0742, "step": 2891 }, { "epoch": 0.37015231025214385, "grad_norm": 0.71875, "learning_rate": 0.001452253541936706, "loss": 2.8149, "step": 2892 }, { "epoch": 0.37028030206066814, "grad_norm": 0.69140625, "learning_rate": 0.0014518837547011657, "loss": 2.0584, "step": 2893 }, { "epoch": 0.3704082938691924, "grad_norm": 0.6796875, "learning_rate": 0.001451513889802071, "loss": 1.9078, "step": 2894 }, { "epoch": 0.3705362856777166, "grad_norm": 0.79296875, "learning_rate": 0.0014511439473029884, "loss": 2.7068, "step": 2895 }, { "epoch": 0.37066427748624087, "grad_norm": 0.65234375, "learning_rate": 0.001450773927267499, "loss": 1.5382, "step": 2896 }, { "epoch": 0.3707922692947651, "grad_norm": 0.69140625, "learning_rate": 0.001450403829759196, "loss": 2.0546, "step": 2897 }, { "epoch": 0.3709202611032894, "grad_norm": 0.59765625, "learning_rate": 0.0014500336548416877, "loss": 1.9956, "step": 2898 }, { "epoch": 0.37104825291181365, "grad_norm": 0.76171875, "learning_rate": 0.0014496634025785938, "loss": 2.467, "step": 2899 }, { "epoch": 0.3711762447203379, "grad_norm": 0.92578125, "learning_rate": 0.0014492930730335488, "loss": 2.2398, "step": 2900 }, { "epoch": 0.37130423652886213, "grad_norm": 0.73046875, "learning_rate": 0.0014489226662701992, "loss": 2.3485, "step": 2901 }, { "epoch": 0.3714322283373864, "grad_norm": 0.55859375, "learning_rate": 0.001448552182352206, "loss": 1.0926, "step": 2902 }, { "epoch": 0.37156022014591067, "grad_norm": 0.8203125, "learning_rate": 0.0014481816213432427, "loss": 1.554, "step": 2903 }, { "epoch": 0.3716882119544349, "grad_norm": 0.64453125, "learning_rate": 0.0014478109833069954, "loss": 1.4934, "step": 2904 }, { "epoch": 0.37181620376295915, "grad_norm": 0.60546875, "learning_rate": 0.0014474402683071657, "loss": 1.3643, "step": 2905 }, { "epoch": 0.37194419557148345, "grad_norm": 0.5703125, "learning_rate": 0.001447069476407466, "loss": 1.323, "step": 2906 }, { "epoch": 0.3720721873800077, "grad_norm": 0.68359375, "learning_rate": 0.0014466986076716235, "loss": 1.9439, "step": 2907 }, { "epoch": 0.37220017918853193, "grad_norm": 0.71484375, "learning_rate": 0.0014463276621633774, "loss": 1.6974, "step": 2908 }, { "epoch": 0.3723281709970562, "grad_norm": 0.65625, "learning_rate": 0.001445956639946481, "loss": 1.4826, "step": 2909 }, { "epoch": 0.3724561628055804, "grad_norm": 0.74609375, "learning_rate": 0.0014455855410847006, "loss": 1.4693, "step": 2910 }, { "epoch": 0.3725841546141047, "grad_norm": 0.7734375, "learning_rate": 0.0014452143656418154, "loss": 1.7158, "step": 2911 }, { "epoch": 0.37271214642262895, "grad_norm": 0.66796875, "learning_rate": 0.001444843113681618, "loss": 1.3769, "step": 2912 }, { "epoch": 0.3728401382311532, "grad_norm": 0.78125, "learning_rate": 0.001444471785267914, "loss": 2.5381, "step": 2913 }, { "epoch": 0.37296813003967744, "grad_norm": 1.15625, "learning_rate": 0.0014441003804645223, "loss": 1.5985, "step": 2914 }, { "epoch": 0.37309612184820173, "grad_norm": 0.6640625, "learning_rate": 0.001443728899335275, "loss": 1.583, "step": 2915 }, { "epoch": 0.373224113656726, "grad_norm": 0.65625, "learning_rate": 0.0014433573419440164, "loss": 1.6741, "step": 2916 }, { "epoch": 0.3733521054652502, "grad_norm": 0.828125, "learning_rate": 0.0014429857083546054, "loss": 2.0903, "step": 2917 }, { "epoch": 0.37348009727377446, "grad_norm": 0.62890625, "learning_rate": 0.0014426139986309136, "loss": 1.37, "step": 2918 }, { "epoch": 0.37360808908229876, "grad_norm": 0.9921875, "learning_rate": 0.0014422422128368242, "loss": 1.8175, "step": 2919 }, { "epoch": 0.373736080890823, "grad_norm": 0.734375, "learning_rate": 0.0014418703510362355, "loss": 2.6955, "step": 2920 }, { "epoch": 0.37386407269934724, "grad_norm": 0.625, "learning_rate": 0.0014414984132930578, "loss": 1.3688, "step": 2921 }, { "epoch": 0.3739920645078715, "grad_norm": 0.609375, "learning_rate": 0.0014411263996712143, "loss": 1.7466, "step": 2922 }, { "epoch": 0.3741200563163958, "grad_norm": 0.671875, "learning_rate": 0.0014407543102346423, "loss": 1.7944, "step": 2923 }, { "epoch": 0.37424804812492, "grad_norm": 0.6796875, "learning_rate": 0.0014403821450472906, "loss": 2.0558, "step": 2924 }, { "epoch": 0.37437603993344426, "grad_norm": 0.5625, "learning_rate": 0.0014400099041731226, "loss": 1.7137, "step": 2925 }, { "epoch": 0.3745040317419685, "grad_norm": 0.6015625, "learning_rate": 0.001439637587676113, "loss": 1.8885, "step": 2926 }, { "epoch": 0.37463202355049274, "grad_norm": 0.6484375, "learning_rate": 0.0014392651956202512, "loss": 1.5503, "step": 2927 }, { "epoch": 0.37476001535901704, "grad_norm": 0.78515625, "learning_rate": 0.0014388927280695382, "loss": 2.5328, "step": 2928 }, { "epoch": 0.3748880071675413, "grad_norm": 0.7421875, "learning_rate": 0.0014385201850879894, "loss": 1.7202, "step": 2929 }, { "epoch": 0.3750159989760655, "grad_norm": 1.0546875, "learning_rate": 0.001438147566739632, "loss": 1.4654, "step": 2930 }, { "epoch": 0.37514399078458976, "grad_norm": 0.67578125, "learning_rate": 0.0014377748730885062, "loss": 1.861, "step": 2931 }, { "epoch": 0.37527198259311406, "grad_norm": 0.671875, "learning_rate": 0.0014374021041986653, "loss": 1.8985, "step": 2932 }, { "epoch": 0.3753999744016383, "grad_norm": 0.60546875, "learning_rate": 0.0014370292601341764, "loss": 1.3305, "step": 2933 }, { "epoch": 0.37552796621016254, "grad_norm": 0.703125, "learning_rate": 0.0014366563409591187, "loss": 2.4771, "step": 2934 }, { "epoch": 0.3756559580186868, "grad_norm": 0.515625, "learning_rate": 0.0014362833467375837, "loss": 0.8978, "step": 2935 }, { "epoch": 0.3757839498272111, "grad_norm": 0.62890625, "learning_rate": 0.0014359102775336773, "loss": 1.4376, "step": 2936 }, { "epoch": 0.3759119416357353, "grad_norm": 0.70703125, "learning_rate": 0.001435537133411517, "loss": 1.6999, "step": 2937 }, { "epoch": 0.37603993344425957, "grad_norm": 0.6328125, "learning_rate": 0.001435163914435234, "loss": 1.6372, "step": 2938 }, { "epoch": 0.3761679252527838, "grad_norm": 0.90234375, "learning_rate": 0.0014347906206689718, "loss": 1.9178, "step": 2939 }, { "epoch": 0.37629591706130805, "grad_norm": 0.64453125, "learning_rate": 0.0014344172521768871, "loss": 1.8674, "step": 2940 }, { "epoch": 0.37642390886983235, "grad_norm": 0.78125, "learning_rate": 0.0014340438090231491, "loss": 1.9482, "step": 2941 }, { "epoch": 0.3765519006783566, "grad_norm": 0.7109375, "learning_rate": 0.0014336702912719405, "loss": 1.5894, "step": 2942 }, { "epoch": 0.37667989248688083, "grad_norm": 0.6484375, "learning_rate": 0.001433296698987456, "loss": 1.8598, "step": 2943 }, { "epoch": 0.37680788429540507, "grad_norm": 0.5625, "learning_rate": 0.0014329230322339035, "loss": 1.3173, "step": 2944 }, { "epoch": 0.37693587610392937, "grad_norm": 0.63671875, "learning_rate": 0.0014325492910755042, "loss": 1.9664, "step": 2945 }, { "epoch": 0.3770638679124536, "grad_norm": 0.6953125, "learning_rate": 0.001432175475576491, "loss": 1.7357, "step": 2946 }, { "epoch": 0.37719185972097785, "grad_norm": 0.6953125, "learning_rate": 0.0014318015858011107, "loss": 1.349, "step": 2947 }, { "epoch": 0.3773198515295021, "grad_norm": 0.64453125, "learning_rate": 0.0014314276218136217, "loss": 2.145, "step": 2948 }, { "epoch": 0.3774478433380264, "grad_norm": 0.6640625, "learning_rate": 0.0014310535836782959, "loss": 1.2374, "step": 2949 }, { "epoch": 0.37757583514655063, "grad_norm": 1.0390625, "learning_rate": 0.0014306794714594182, "loss": 1.8016, "step": 2950 }, { "epoch": 0.3777038269550749, "grad_norm": 0.625, "learning_rate": 0.0014303052852212858, "loss": 1.659, "step": 2951 }, { "epoch": 0.3778318187635991, "grad_norm": 0.80859375, "learning_rate": 0.0014299310250282084, "loss": 1.0724, "step": 2952 }, { "epoch": 0.3779598105721234, "grad_norm": 0.62890625, "learning_rate": 0.001429556690944509, "loss": 1.6249, "step": 2953 }, { "epoch": 0.37808780238064765, "grad_norm": 0.60546875, "learning_rate": 0.0014291822830345224, "loss": 1.0761, "step": 2954 }, { "epoch": 0.3782157941891719, "grad_norm": 0.6796875, "learning_rate": 0.0014288078013625974, "loss": 1.9362, "step": 2955 }, { "epoch": 0.37834378599769614, "grad_norm": 0.671875, "learning_rate": 0.0014284332459930944, "loss": 1.3837, "step": 2956 }, { "epoch": 0.3784717778062204, "grad_norm": 0.7421875, "learning_rate": 0.0014280586169903868, "loss": 1.6261, "step": 2957 }, { "epoch": 0.3785997696147447, "grad_norm": 0.6640625, "learning_rate": 0.0014276839144188606, "loss": 2.0744, "step": 2958 }, { "epoch": 0.3787277614232689, "grad_norm": 0.640625, "learning_rate": 0.001427309138342915, "loss": 2.1608, "step": 2959 }, { "epoch": 0.37885575323179316, "grad_norm": 0.625, "learning_rate": 0.0014269342888269608, "loss": 1.1603, "step": 2960 }, { "epoch": 0.3789837450403174, "grad_norm": 0.8203125, "learning_rate": 0.001426559365935422, "loss": 2.6154, "step": 2961 }, { "epoch": 0.3791117368488417, "grad_norm": 0.61328125, "learning_rate": 0.0014261843697327354, "loss": 1.3291, "step": 2962 }, { "epoch": 0.37923972865736594, "grad_norm": 0.734375, "learning_rate": 0.0014258093002833504, "loss": 2.1215, "step": 2963 }, { "epoch": 0.3793677204658902, "grad_norm": 0.66796875, "learning_rate": 0.001425434157651728, "loss": 1.321, "step": 2964 }, { "epoch": 0.3794957122744144, "grad_norm": 0.67578125, "learning_rate": 0.0014250589419023433, "loss": 2.0127, "step": 2965 }, { "epoch": 0.3796237040829387, "grad_norm": 0.6796875, "learning_rate": 0.001424683653099683, "loss": 1.7933, "step": 2966 }, { "epoch": 0.37975169589146296, "grad_norm": 0.6875, "learning_rate": 0.0014243082913082466, "loss": 1.6117, "step": 2967 }, { "epoch": 0.3798796876999872, "grad_norm": 0.671875, "learning_rate": 0.001423932856592546, "loss": 1.567, "step": 2968 }, { "epoch": 0.38000767950851144, "grad_norm": 0.7109375, "learning_rate": 0.0014235573490171056, "loss": 1.6992, "step": 2969 }, { "epoch": 0.3801356713170357, "grad_norm": 0.8203125, "learning_rate": 0.001423181768646463, "loss": 2.1282, "step": 2970 }, { "epoch": 0.38026366312556, "grad_norm": 0.62109375, "learning_rate": 0.001422806115545167, "loss": 1.7544, "step": 2971 }, { "epoch": 0.3803916549340842, "grad_norm": 0.75390625, "learning_rate": 0.0014224303897777803, "loss": 1.7695, "step": 2972 }, { "epoch": 0.38051964674260846, "grad_norm": 0.7421875, "learning_rate": 0.0014220545914088777, "loss": 1.8036, "step": 2973 }, { "epoch": 0.3806476385511327, "grad_norm": 0.54296875, "learning_rate": 0.0014216787205030453, "loss": 1.162, "step": 2974 }, { "epoch": 0.380775630359657, "grad_norm": 0.70703125, "learning_rate": 0.0014213027771248833, "loss": 1.5009, "step": 2975 }, { "epoch": 0.38090362216818124, "grad_norm": 0.6171875, "learning_rate": 0.0014209267613390036, "loss": 1.5192, "step": 2976 }, { "epoch": 0.3810316139767055, "grad_norm": 0.625, "learning_rate": 0.0014205506732100303, "loss": 1.6141, "step": 2977 }, { "epoch": 0.3811596057852297, "grad_norm": 0.5859375, "learning_rate": 0.0014201745128026007, "loss": 1.2063, "step": 2978 }, { "epoch": 0.381287597593754, "grad_norm": 0.74609375, "learning_rate": 0.0014197982801813637, "loss": 1.96, "step": 2979 }, { "epoch": 0.38141558940227827, "grad_norm": 0.625, "learning_rate": 0.001419421975410981, "loss": 1.5785, "step": 2980 }, { "epoch": 0.3815435812108025, "grad_norm": 0.76953125, "learning_rate": 0.0014190455985561273, "loss": 1.7526, "step": 2981 }, { "epoch": 0.38167157301932675, "grad_norm": 0.84375, "learning_rate": 0.0014186691496814881, "loss": 1.6122, "step": 2982 }, { "epoch": 0.381799564827851, "grad_norm": 0.66015625, "learning_rate": 0.0014182926288517628, "loss": 1.6044, "step": 2983 }, { "epoch": 0.3819275566363753, "grad_norm": 0.80078125, "learning_rate": 0.0014179160361316627, "loss": 1.9817, "step": 2984 }, { "epoch": 0.38205554844489953, "grad_norm": 0.56640625, "learning_rate": 0.0014175393715859111, "loss": 1.5642, "step": 2985 }, { "epoch": 0.38218354025342377, "grad_norm": 0.6015625, "learning_rate": 0.0014171626352792442, "loss": 1.2387, "step": 2986 }, { "epoch": 0.382311532061948, "grad_norm": 0.8515625, "learning_rate": 0.00141678582727641, "loss": 1.3917, "step": 2987 }, { "epoch": 0.3824395238704723, "grad_norm": 0.64453125, "learning_rate": 0.0014164089476421686, "loss": 1.5371, "step": 2988 }, { "epoch": 0.38256751567899655, "grad_norm": 0.6953125, "learning_rate": 0.0014160319964412942, "loss": 1.9051, "step": 2989 }, { "epoch": 0.3826955074875208, "grad_norm": 0.82421875, "learning_rate": 0.0014156549737385706, "loss": 1.8493, "step": 2990 }, { "epoch": 0.38282349929604503, "grad_norm": 0.72265625, "learning_rate": 0.0014152778795987963, "loss": 1.4732, "step": 2991 }, { "epoch": 0.38295149110456933, "grad_norm": 0.75, "learning_rate": 0.0014149007140867803, "loss": 1.7962, "step": 2992 }, { "epoch": 0.38307948291309357, "grad_norm": 0.82421875, "learning_rate": 0.0014145234772673452, "loss": 1.7035, "step": 2993 }, { "epoch": 0.3832074747216178, "grad_norm": 0.671875, "learning_rate": 0.0014141461692053245, "loss": 1.876, "step": 2994 }, { "epoch": 0.38333546653014205, "grad_norm": 0.84765625, "learning_rate": 0.001413768789965566, "loss": 2.1518, "step": 2995 }, { "epoch": 0.38346345833866635, "grad_norm": 0.71484375, "learning_rate": 0.001413391339612927, "loss": 1.6498, "step": 2996 }, { "epoch": 0.3835914501471906, "grad_norm": 0.58203125, "learning_rate": 0.0014130138182122793, "loss": 1.7787, "step": 2997 }, { "epoch": 0.38371944195571484, "grad_norm": 0.7890625, "learning_rate": 0.001412636225828506, "loss": 2.2648, "step": 2998 }, { "epoch": 0.3838474337642391, "grad_norm": 0.61328125, "learning_rate": 0.001412258562526502, "loss": 1.281, "step": 2999 }, { "epoch": 0.3839754255727633, "grad_norm": 0.89453125, "learning_rate": 0.0014118808283711758, "loss": 1.8461, "step": 3000 }, { "epoch": 0.3841034173812876, "grad_norm": 0.73828125, "learning_rate": 0.001411503023427446, "loss": 1.7706, "step": 3001 }, { "epoch": 0.38423140918981186, "grad_norm": 0.66015625, "learning_rate": 0.0014111251477602455, "loss": 1.4304, "step": 3002 }, { "epoch": 0.3843594009983361, "grad_norm": 0.6328125, "learning_rate": 0.0014107472014345177, "loss": 1.6504, "step": 3003 }, { "epoch": 0.38448739280686034, "grad_norm": 0.55859375, "learning_rate": 0.0014103691845152192, "loss": 1.3872, "step": 3004 }, { "epoch": 0.38461538461538464, "grad_norm": 0.578125, "learning_rate": 0.001409991097067318, "loss": 1.3533, "step": 3005 }, { "epoch": 0.3847433764239089, "grad_norm": 0.6953125, "learning_rate": 0.001409612939155795, "loss": 2.1768, "step": 3006 }, { "epoch": 0.3848713682324331, "grad_norm": 0.69921875, "learning_rate": 0.0014092347108456424, "loss": 1.5256, "step": 3007 }, { "epoch": 0.38499936004095736, "grad_norm": 0.7578125, "learning_rate": 0.0014088564122018653, "loss": 2.3395, "step": 3008 }, { "epoch": 0.38512735184948166, "grad_norm": 0.62109375, "learning_rate": 0.00140847804328948, "loss": 1.9746, "step": 3009 }, { "epoch": 0.3852553436580059, "grad_norm": 0.70703125, "learning_rate": 0.0014080996041735151, "loss": 1.584, "step": 3010 }, { "epoch": 0.38538333546653014, "grad_norm": 0.83203125, "learning_rate": 0.0014077210949190123, "loss": 1.8887, "step": 3011 }, { "epoch": 0.3855113272750544, "grad_norm": 0.640625, "learning_rate": 0.001407342515591024, "loss": 1.5695, "step": 3012 }, { "epoch": 0.3856393190835786, "grad_norm": 0.62890625, "learning_rate": 0.0014069638662546158, "loss": 1.6608, "step": 3013 }, { "epoch": 0.3857673108921029, "grad_norm": 0.734375, "learning_rate": 0.0014065851469748638, "loss": 2.0418, "step": 3014 }, { "epoch": 0.38589530270062716, "grad_norm": 0.96484375, "learning_rate": 0.0014062063578168576, "loss": 1.4212, "step": 3015 }, { "epoch": 0.3860232945091514, "grad_norm": 0.65234375, "learning_rate": 0.0014058274988456984, "loss": 1.9445, "step": 3016 }, { "epoch": 0.38615128631767565, "grad_norm": 0.625, "learning_rate": 0.0014054485701264986, "loss": 1.466, "step": 3017 }, { "epoch": 0.38627927812619994, "grad_norm": 0.75390625, "learning_rate": 0.0014050695717243845, "loss": 2.076, "step": 3018 }, { "epoch": 0.3864072699347242, "grad_norm": 0.77734375, "learning_rate": 0.0014046905037044919, "loss": 2.3681, "step": 3019 }, { "epoch": 0.3865352617432484, "grad_norm": 0.62890625, "learning_rate": 0.0014043113661319703, "loss": 1.9839, "step": 3020 }, { "epoch": 0.38666325355177267, "grad_norm": 0.76171875, "learning_rate": 0.0014039321590719807, "loss": 1.9767, "step": 3021 }, { "epoch": 0.38679124536029696, "grad_norm": 0.6875, "learning_rate": 0.0014035528825896955, "loss": 1.2785, "step": 3022 }, { "epoch": 0.3869192371688212, "grad_norm": 0.62890625, "learning_rate": 0.0014031735367503, "loss": 1.5653, "step": 3023 }, { "epoch": 0.38704722897734545, "grad_norm": 0.64453125, "learning_rate": 0.001402794121618991, "loss": 1.5398, "step": 3024 }, { "epoch": 0.3871752207858697, "grad_norm": 0.6796875, "learning_rate": 0.0014024146372609769, "loss": 1.643, "step": 3025 }, { "epoch": 0.387303212594394, "grad_norm": 0.57421875, "learning_rate": 0.0014020350837414784, "loss": 1.8591, "step": 3026 }, { "epoch": 0.38743120440291823, "grad_norm": 0.72265625, "learning_rate": 0.0014016554611257276, "loss": 2.0616, "step": 3027 }, { "epoch": 0.38755919621144247, "grad_norm": 0.80859375, "learning_rate": 0.001401275769478969, "loss": 1.9311, "step": 3028 }, { "epoch": 0.3876871880199667, "grad_norm": 0.64453125, "learning_rate": 0.001400896008866459, "loss": 1.9572, "step": 3029 }, { "epoch": 0.38781517982849095, "grad_norm": 0.6015625, "learning_rate": 0.0014005161793534655, "loss": 1.3475, "step": 3030 }, { "epoch": 0.38794317163701525, "grad_norm": 0.60546875, "learning_rate": 0.0014001362810052683, "loss": 1.3152, "step": 3031 }, { "epoch": 0.3880711634455395, "grad_norm": 0.94921875, "learning_rate": 0.001399756313887159, "loss": 1.4649, "step": 3032 }, { "epoch": 0.38819915525406373, "grad_norm": 0.84375, "learning_rate": 0.0013993762780644412, "loss": 1.718, "step": 3033 }, { "epoch": 0.388327147062588, "grad_norm": 0.83203125, "learning_rate": 0.0013989961736024302, "loss": 1.7172, "step": 3034 }, { "epoch": 0.38845513887111227, "grad_norm": 0.59765625, "learning_rate": 0.0013986160005664532, "loss": 1.4262, "step": 3035 }, { "epoch": 0.3885831306796365, "grad_norm": 0.66015625, "learning_rate": 0.0013982357590218491, "loss": 1.585, "step": 3036 }, { "epoch": 0.38871112248816075, "grad_norm": 0.7578125, "learning_rate": 0.0013978554490339686, "loss": 1.7206, "step": 3037 }, { "epoch": 0.388839114296685, "grad_norm": 0.79296875, "learning_rate": 0.0013974750706681736, "loss": 2.1836, "step": 3038 }, { "epoch": 0.3889671061052093, "grad_norm": 0.82421875, "learning_rate": 0.0013970946239898392, "loss": 1.7603, "step": 3039 }, { "epoch": 0.38909509791373353, "grad_norm": 0.71875, "learning_rate": 0.001396714109064351, "loss": 1.8834, "step": 3040 }, { "epoch": 0.3892230897222578, "grad_norm": 0.83984375, "learning_rate": 0.0013963335259571062, "loss": 1.9161, "step": 3041 }, { "epoch": 0.389351081530782, "grad_norm": 0.81640625, "learning_rate": 0.0013959528747335148, "loss": 1.6412, "step": 3042 }, { "epoch": 0.38947907333930626, "grad_norm": 0.625, "learning_rate": 0.0013955721554589978, "loss": 1.5279, "step": 3043 }, { "epoch": 0.38960706514783056, "grad_norm": 0.6484375, "learning_rate": 0.0013951913681989874, "loss": 1.7993, "step": 3044 }, { "epoch": 0.3897350569563548, "grad_norm": 0.81640625, "learning_rate": 0.0013948105130189286, "loss": 2.1543, "step": 3045 }, { "epoch": 0.38986304876487904, "grad_norm": 1.1875, "learning_rate": 0.0013944295899842776, "loss": 1.6158, "step": 3046 }, { "epoch": 0.3899910405734033, "grad_norm": 0.640625, "learning_rate": 0.0013940485991605022, "loss": 1.7264, "step": 3047 }, { "epoch": 0.3901190323819276, "grad_norm": 0.578125, "learning_rate": 0.0013936675406130811, "loss": 1.6409, "step": 3048 }, { "epoch": 0.3902470241904518, "grad_norm": 0.60546875, "learning_rate": 0.0013932864144075064, "loss": 1.6642, "step": 3049 }, { "epoch": 0.39037501599897606, "grad_norm": 0.8203125, "learning_rate": 0.0013929052206092802, "loss": 2.2272, "step": 3050 }, { "epoch": 0.3905030078075003, "grad_norm": 0.8515625, "learning_rate": 0.0013925239592839174, "loss": 2.0149, "step": 3051 }, { "epoch": 0.3906309996160246, "grad_norm": 0.83203125, "learning_rate": 0.0013921426304969434, "loss": 1.4573, "step": 3052 }, { "epoch": 0.39075899142454884, "grad_norm": 1.046875, "learning_rate": 0.001391761234313896, "loss": 0.9883, "step": 3053 }, { "epoch": 0.3908869832330731, "grad_norm": 0.58203125, "learning_rate": 0.0013913797708003243, "loss": 1.3383, "step": 3054 }, { "epoch": 0.3910149750415973, "grad_norm": 0.79296875, "learning_rate": 0.0013909982400217885, "loss": 1.6537, "step": 3055 }, { "epoch": 0.39114296685012157, "grad_norm": 0.67578125, "learning_rate": 0.0013906166420438618, "loss": 1.5555, "step": 3056 }, { "epoch": 0.39127095865864586, "grad_norm": 0.68359375, "learning_rate": 0.0013902349769321272, "loss": 1.4126, "step": 3057 }, { "epoch": 0.3913989504671701, "grad_norm": 0.6875, "learning_rate": 0.0013898532447521803, "loss": 1.7207, "step": 3058 }, { "epoch": 0.39152694227569435, "grad_norm": 0.484375, "learning_rate": 0.0013894714455696283, "loss": 1.0625, "step": 3059 }, { "epoch": 0.3916549340842186, "grad_norm": 0.765625, "learning_rate": 0.001389089579450089, "loss": 1.6192, "step": 3060 }, { "epoch": 0.3917829258927429, "grad_norm": 0.68359375, "learning_rate": 0.0013887076464591927, "loss": 1.8028, "step": 3061 }, { "epoch": 0.3919109177012671, "grad_norm": 1.3671875, "learning_rate": 0.0013883256466625808, "loss": 2.1126, "step": 3062 }, { "epoch": 0.39203890950979137, "grad_norm": 0.71875, "learning_rate": 0.0013879435801259059, "loss": 1.549, "step": 3063 }, { "epoch": 0.3921669013183156, "grad_norm": 0.6484375, "learning_rate": 0.0013875614469148329, "loss": 1.2345, "step": 3064 }, { "epoch": 0.3922948931268399, "grad_norm": 0.84765625, "learning_rate": 0.0013871792470950366, "loss": 1.8556, "step": 3065 }, { "epoch": 0.39242288493536415, "grad_norm": 0.7265625, "learning_rate": 0.001386796980732205, "loss": 1.9797, "step": 3066 }, { "epoch": 0.3925508767438884, "grad_norm": 0.79296875, "learning_rate": 0.0013864146478920366, "loss": 1.9219, "step": 3067 }, { "epoch": 0.39267886855241263, "grad_norm": 0.60546875, "learning_rate": 0.0013860322486402416, "loss": 1.3075, "step": 3068 }, { "epoch": 0.3928068603609369, "grad_norm": 0.68359375, "learning_rate": 0.001385649783042541, "loss": 1.4742, "step": 3069 }, { "epoch": 0.39293485216946117, "grad_norm": 0.65625, "learning_rate": 0.0013852672511646682, "loss": 1.7269, "step": 3070 }, { "epoch": 0.3930628439779854, "grad_norm": 0.74609375, "learning_rate": 0.0013848846530723677, "loss": 1.5405, "step": 3071 }, { "epoch": 0.39319083578650965, "grad_norm": 0.7265625, "learning_rate": 0.0013845019888313946, "loss": 1.8432, "step": 3072 }, { "epoch": 0.3933188275950339, "grad_norm": 0.7265625, "learning_rate": 0.0013841192585075165, "loss": 1.5151, "step": 3073 }, { "epoch": 0.3934468194035582, "grad_norm": 0.59375, "learning_rate": 0.0013837364621665114, "loss": 1.1654, "step": 3074 }, { "epoch": 0.39357481121208243, "grad_norm": 0.6875, "learning_rate": 0.0013833535998741696, "loss": 1.2094, "step": 3075 }, { "epoch": 0.3937028030206067, "grad_norm": 1.1171875, "learning_rate": 0.0013829706716962915, "loss": 2.284, "step": 3076 }, { "epoch": 0.3938307948291309, "grad_norm": 0.625, "learning_rate": 0.00138258767769869, "loss": 1.673, "step": 3077 }, { "epoch": 0.3939587866376552, "grad_norm": 0.73046875, "learning_rate": 0.0013822046179471886, "loss": 1.4047, "step": 3078 }, { "epoch": 0.39408677844617945, "grad_norm": 0.65625, "learning_rate": 0.0013818214925076224, "loss": 2.181, "step": 3079 }, { "epoch": 0.3942147702547037, "grad_norm": 0.64453125, "learning_rate": 0.0013814383014458377, "loss": 1.772, "step": 3080 }, { "epoch": 0.39434276206322794, "grad_norm": 0.65625, "learning_rate": 0.0013810550448276923, "loss": 1.6046, "step": 3081 }, { "epoch": 0.39447075387175223, "grad_norm": 0.78125, "learning_rate": 0.0013806717227190554, "loss": 1.7823, "step": 3082 }, { "epoch": 0.3945987456802765, "grad_norm": 0.68359375, "learning_rate": 0.0013802883351858055, "loss": 2.2111, "step": 3083 }, { "epoch": 0.3947267374888007, "grad_norm": 0.6015625, "learning_rate": 0.001379904882293836, "loss": 1.6126, "step": 3084 }, { "epoch": 0.39485472929732496, "grad_norm": 0.58203125, "learning_rate": 0.0013795213641090483, "loss": 1.3325, "step": 3085 }, { "epoch": 0.3949827211058492, "grad_norm": 0.67578125, "learning_rate": 0.001379137780697357, "loss": 1.4418, "step": 3086 }, { "epoch": 0.3951107129143735, "grad_norm": 0.703125, "learning_rate": 0.0013787541321246862, "loss": 1.6134, "step": 3087 }, { "epoch": 0.39523870472289774, "grad_norm": 0.78515625, "learning_rate": 0.0013783704184569725, "loss": 1.845, "step": 3088 }, { "epoch": 0.395366696531422, "grad_norm": 0.625, "learning_rate": 0.0013779866397601636, "loss": 1.8878, "step": 3089 }, { "epoch": 0.3954946883399462, "grad_norm": 0.58984375, "learning_rate": 0.0013776027961002178, "loss": 1.6038, "step": 3090 }, { "epoch": 0.3956226801484705, "grad_norm": 0.67578125, "learning_rate": 0.0013772188875431053, "loss": 1.7348, "step": 3091 }, { "epoch": 0.39575067195699476, "grad_norm": 0.890625, "learning_rate": 0.0013768349141548063, "loss": 1.5436, "step": 3092 }, { "epoch": 0.395878663765519, "grad_norm": 0.703125, "learning_rate": 0.0013764508760013136, "loss": 1.7762, "step": 3093 }, { "epoch": 0.39600665557404324, "grad_norm": 0.56640625, "learning_rate": 0.0013760667731486295, "loss": 1.3375, "step": 3094 }, { "epoch": 0.39613464738256754, "grad_norm": 0.60546875, "learning_rate": 0.0013756826056627693, "loss": 1.3371, "step": 3095 }, { "epoch": 0.3962626391910918, "grad_norm": 0.61328125, "learning_rate": 0.001375298373609758, "loss": 1.8017, "step": 3096 }, { "epoch": 0.396390630999616, "grad_norm": 0.6484375, "learning_rate": 0.0013749140770556321, "loss": 1.2794, "step": 3097 }, { "epoch": 0.39651862280814026, "grad_norm": 0.62890625, "learning_rate": 0.001374529716066439, "loss": 1.604, "step": 3098 }, { "epoch": 0.39664661461666456, "grad_norm": 0.546875, "learning_rate": 0.0013741452907082372, "loss": 1.1431, "step": 3099 }, { "epoch": 0.3967746064251888, "grad_norm": 0.67578125, "learning_rate": 0.0013737608010470973, "loss": 1.9227, "step": 3100 }, { "epoch": 0.39690259823371304, "grad_norm": 0.625, "learning_rate": 0.001373376247149099, "loss": 1.6141, "step": 3101 }, { "epoch": 0.3970305900422373, "grad_norm": 0.765625, "learning_rate": 0.0013729916290803352, "loss": 1.6846, "step": 3102 }, { "epoch": 0.3971585818507615, "grad_norm": 0.5859375, "learning_rate": 0.0013726069469069083, "loss": 1.3175, "step": 3103 }, { "epoch": 0.3972865736592858, "grad_norm": 0.625, "learning_rate": 0.001372222200694932, "loss": 1.5514, "step": 3104 }, { "epoch": 0.39741456546781007, "grad_norm": 0.74609375, "learning_rate": 0.0013718373905105313, "loss": 1.9435, "step": 3105 }, { "epoch": 0.3975425572763343, "grad_norm": 0.52734375, "learning_rate": 0.0013714525164198418, "loss": 1.3087, "step": 3106 }, { "epoch": 0.39767054908485855, "grad_norm": 0.5703125, "learning_rate": 0.0013710675784890107, "loss": 1.4216, "step": 3107 }, { "epoch": 0.39779854089338285, "grad_norm": 0.82421875, "learning_rate": 0.001370682576784196, "loss": 1.8052, "step": 3108 }, { "epoch": 0.3979265327019071, "grad_norm": 0.6796875, "learning_rate": 0.001370297511371566, "loss": 1.1608, "step": 3109 }, { "epoch": 0.39805452451043133, "grad_norm": 0.58984375, "learning_rate": 0.0013699123823173007, "loss": 1.7608, "step": 3110 }, { "epoch": 0.39818251631895557, "grad_norm": 0.71875, "learning_rate": 0.0013695271896875909, "loss": 1.5223, "step": 3111 }, { "epoch": 0.39831050812747987, "grad_norm": 0.765625, "learning_rate": 0.0013691419335486379, "loss": 1.4166, "step": 3112 }, { "epoch": 0.3984384999360041, "grad_norm": 0.67578125, "learning_rate": 0.0013687566139666547, "loss": 1.4842, "step": 3113 }, { "epoch": 0.39856649174452835, "grad_norm": 0.7734375, "learning_rate": 0.0013683712310078643, "loss": 2.0265, "step": 3114 }, { "epoch": 0.3986944835530526, "grad_norm": 0.6640625, "learning_rate": 0.001367985784738501, "loss": 1.9539, "step": 3115 }, { "epoch": 0.39882247536157683, "grad_norm": 0.859375, "learning_rate": 0.00136760027522481, "loss": 2.2464, "step": 3116 }, { "epoch": 0.39895046717010113, "grad_norm": 1.0625, "learning_rate": 0.0013672147025330476, "loss": 1.5925, "step": 3117 }, { "epoch": 0.39907845897862537, "grad_norm": 0.671875, "learning_rate": 0.0013668290667294804, "loss": 1.6132, "step": 3118 }, { "epoch": 0.3992064507871496, "grad_norm": 0.65625, "learning_rate": 0.001366443367880387, "loss": 1.739, "step": 3119 }, { "epoch": 0.39933444259567386, "grad_norm": 0.6328125, "learning_rate": 0.0013660576060520548, "loss": 1.5918, "step": 3120 }, { "epoch": 0.39946243440419815, "grad_norm": 1.078125, "learning_rate": 0.001365671781310784, "loss": 2.024, "step": 3121 }, { "epoch": 0.3995904262127224, "grad_norm": 0.875, "learning_rate": 0.0013652858937228845, "loss": 1.1176, "step": 3122 }, { "epoch": 0.39971841802124664, "grad_norm": 0.5703125, "learning_rate": 0.0013648999433546773, "loss": 1.3741, "step": 3123 }, { "epoch": 0.3998464098297709, "grad_norm": 0.7265625, "learning_rate": 0.001364513930272495, "loss": 1.3982, "step": 3124 }, { "epoch": 0.3999744016382952, "grad_norm": 0.75390625, "learning_rate": 0.0013641278545426792, "loss": 1.8582, "step": 3125 }, { "epoch": 0.4001023934468194, "grad_norm": 0.703125, "learning_rate": 0.001363741716231584, "loss": 2.2079, "step": 3126 }, { "epoch": 0.40023038525534366, "grad_norm": 0.71484375, "learning_rate": 0.0013633555154055732, "loss": 1.4708, "step": 3127 }, { "epoch": 0.4003583770638679, "grad_norm": 0.6796875, "learning_rate": 0.0013629692521310215, "loss": 1.5478, "step": 3128 }, { "epoch": 0.40048636887239214, "grad_norm": 0.6953125, "learning_rate": 0.0013625829264743148, "loss": 1.714, "step": 3129 }, { "epoch": 0.40061436068091644, "grad_norm": 0.60546875, "learning_rate": 0.0013621965385018496, "loss": 1.4313, "step": 3130 }, { "epoch": 0.4007423524894407, "grad_norm": 0.5859375, "learning_rate": 0.0013618100882800323, "loss": 1.2326, "step": 3131 }, { "epoch": 0.4008703442979649, "grad_norm": 0.65625, "learning_rate": 0.0013614235758752813, "loss": 1.5607, "step": 3132 }, { "epoch": 0.40099833610648916, "grad_norm": 0.6328125, "learning_rate": 0.0013610370013540248, "loss": 1.3011, "step": 3133 }, { "epoch": 0.40112632791501346, "grad_norm": 0.703125, "learning_rate": 0.0013606503647827016, "loss": 1.3873, "step": 3134 }, { "epoch": 0.4012543197235377, "grad_norm": 0.50390625, "learning_rate": 0.0013602636662277622, "loss": 1.0389, "step": 3135 }, { "epoch": 0.40138231153206194, "grad_norm": 0.6171875, "learning_rate": 0.0013598769057556663, "loss": 1.3943, "step": 3136 }, { "epoch": 0.4015103033405862, "grad_norm": 0.64453125, "learning_rate": 0.0013594900834328855, "loss": 1.4725, "step": 3137 }, { "epoch": 0.4016382951491105, "grad_norm": 0.921875, "learning_rate": 0.0013591031993259008, "loss": 2.0203, "step": 3138 }, { "epoch": 0.4017662869576347, "grad_norm": 0.73046875, "learning_rate": 0.0013587162535012049, "loss": 1.5781, "step": 3139 }, { "epoch": 0.40189427876615896, "grad_norm": 0.91796875, "learning_rate": 0.001358329246025301, "loss": 2.157, "step": 3140 }, { "epoch": 0.4020222705746832, "grad_norm": 0.70703125, "learning_rate": 0.0013579421769647024, "loss": 2.5679, "step": 3141 }, { "epoch": 0.4021502623832075, "grad_norm": 0.76953125, "learning_rate": 0.0013575550463859332, "loss": 1.968, "step": 3142 }, { "epoch": 0.40227825419173174, "grad_norm": 0.6796875, "learning_rate": 0.001357167854355528, "loss": 1.5567, "step": 3143 }, { "epoch": 0.402406246000256, "grad_norm": 0.5390625, "learning_rate": 0.0013567806009400318, "loss": 0.9411, "step": 3144 }, { "epoch": 0.4025342378087802, "grad_norm": 0.87109375, "learning_rate": 0.001356393286206001, "loss": 2.3813, "step": 3145 }, { "epoch": 0.40266222961730447, "grad_norm": 0.6015625, "learning_rate": 0.0013560059102200013, "loss": 1.7677, "step": 3146 }, { "epoch": 0.40279022142582876, "grad_norm": 0.6640625, "learning_rate": 0.0013556184730486099, "loss": 1.4357, "step": 3147 }, { "epoch": 0.402918213234353, "grad_norm": 1.0078125, "learning_rate": 0.0013552309747584144, "loss": 1.5339, "step": 3148 }, { "epoch": 0.40304620504287725, "grad_norm": 0.6640625, "learning_rate": 0.001354843415416012, "loss": 1.5793, "step": 3149 }, { "epoch": 0.4031741968514015, "grad_norm": 0.66015625, "learning_rate": 0.0013544557950880113, "loss": 1.8711, "step": 3150 }, { "epoch": 0.4033021886599258, "grad_norm": 0.59765625, "learning_rate": 0.0013540681138410316, "loss": 1.6158, "step": 3151 }, { "epoch": 0.40343018046845003, "grad_norm": 0.5625, "learning_rate": 0.0013536803717417016, "loss": 1.5156, "step": 3152 }, { "epoch": 0.40355817227697427, "grad_norm": 0.6328125, "learning_rate": 0.0013532925688566615, "loss": 1.9058, "step": 3153 }, { "epoch": 0.4036861640854985, "grad_norm": 0.64453125, "learning_rate": 0.0013529047052525615, "loss": 2.0544, "step": 3154 }, { "epoch": 0.4038141558940228, "grad_norm": 0.70703125, "learning_rate": 0.0013525167809960617, "loss": 2.1876, "step": 3155 }, { "epoch": 0.40394214770254705, "grad_norm": 0.734375, "learning_rate": 0.0013521287961538338, "loss": 1.4873, "step": 3156 }, { "epoch": 0.4040701395110713, "grad_norm": 0.7734375, "learning_rate": 0.0013517407507925591, "loss": 1.5106, "step": 3157 }, { "epoch": 0.40419813131959553, "grad_norm": 0.6171875, "learning_rate": 0.0013513526449789293, "loss": 1.6863, "step": 3158 }, { "epoch": 0.4043261231281198, "grad_norm": 0.5859375, "learning_rate": 0.0013509644787796473, "loss": 1.256, "step": 3159 }, { "epoch": 0.40445411493664407, "grad_norm": 0.68359375, "learning_rate": 0.0013505762522614249, "loss": 1.9898, "step": 3160 }, { "epoch": 0.4045821067451683, "grad_norm": 0.62890625, "learning_rate": 0.0013501879654909854, "loss": 1.3081, "step": 3161 }, { "epoch": 0.40471009855369255, "grad_norm": 0.6484375, "learning_rate": 0.0013497996185350626, "loss": 1.3448, "step": 3162 }, { "epoch": 0.4048380903622168, "grad_norm": 0.64453125, "learning_rate": 0.0013494112114604, "loss": 2.2031, "step": 3163 }, { "epoch": 0.4049660821707411, "grad_norm": 0.72265625, "learning_rate": 0.0013490227443337514, "loss": 1.6086, "step": 3164 }, { "epoch": 0.40509407397926533, "grad_norm": 0.703125, "learning_rate": 0.0013486342172218816, "loss": 1.9751, "step": 3165 }, { "epoch": 0.4052220657877896, "grad_norm": 0.63671875, "learning_rate": 0.001348245630191565, "loss": 1.5674, "step": 3166 }, { "epoch": 0.4053500575963138, "grad_norm": 0.6328125, "learning_rate": 0.0013478569833095865, "loss": 1.3691, "step": 3167 }, { "epoch": 0.4054780494048381, "grad_norm": 0.51171875, "learning_rate": 0.0013474682766427416, "loss": 1.1611, "step": 3168 }, { "epoch": 0.40560604121336236, "grad_norm": 0.69140625, "learning_rate": 0.0013470795102578357, "loss": 1.3798, "step": 3169 }, { "epoch": 0.4057340330218866, "grad_norm": 0.59375, "learning_rate": 0.0013466906842216847, "loss": 1.5548, "step": 3170 }, { "epoch": 0.40586202483041084, "grad_norm": 0.59765625, "learning_rate": 0.0013463017986011148, "loss": 1.4671, "step": 3171 }, { "epoch": 0.4059900166389351, "grad_norm": 0.65234375, "learning_rate": 0.0013459128534629619, "loss": 1.8885, "step": 3172 }, { "epoch": 0.4061180084474594, "grad_norm": 0.6171875, "learning_rate": 0.0013455238488740728, "loss": 1.5053, "step": 3173 }, { "epoch": 0.4062460002559836, "grad_norm": 0.54296875, "learning_rate": 0.0013451347849013045, "loss": 0.9789, "step": 3174 }, { "epoch": 0.40637399206450786, "grad_norm": 0.6953125, "learning_rate": 0.0013447456616115237, "loss": 1.322, "step": 3175 }, { "epoch": 0.4065019838730321, "grad_norm": 0.65625, "learning_rate": 0.0013443564790716071, "loss": 1.5251, "step": 3176 }, { "epoch": 0.4066299756815564, "grad_norm": 0.74609375, "learning_rate": 0.0013439672373484431, "loss": 1.4047, "step": 3177 }, { "epoch": 0.40675796749008064, "grad_norm": 0.828125, "learning_rate": 0.001343577936508928, "loss": 1.9439, "step": 3178 }, { "epoch": 0.4068859592986049, "grad_norm": 0.54296875, "learning_rate": 0.0013431885766199702, "loss": 1.1518, "step": 3179 }, { "epoch": 0.4070139511071291, "grad_norm": 0.73046875, "learning_rate": 0.0013427991577484874, "loss": 1.745, "step": 3180 }, { "epoch": 0.4071419429156534, "grad_norm": 0.62890625, "learning_rate": 0.0013424096799614078, "loss": 1.4438, "step": 3181 }, { "epoch": 0.40726993472417766, "grad_norm": 0.6484375, "learning_rate": 0.0013420201433256688, "loss": 1.1982, "step": 3182 }, { "epoch": 0.4073979265327019, "grad_norm": 0.65625, "learning_rate": 0.0013416305479082191, "loss": 1.2943, "step": 3183 }, { "epoch": 0.40752591834122615, "grad_norm": 0.75, "learning_rate": 0.0013412408937760168, "loss": 1.6744, "step": 3184 }, { "epoch": 0.40765391014975044, "grad_norm": 0.65625, "learning_rate": 0.0013408511809960303, "loss": 1.5203, "step": 3185 }, { "epoch": 0.4077819019582747, "grad_norm": 0.69140625, "learning_rate": 0.0013404614096352385, "loss": 1.6436, "step": 3186 }, { "epoch": 0.4079098937667989, "grad_norm": 0.609375, "learning_rate": 0.001340071579760629, "loss": 1.3551, "step": 3187 }, { "epoch": 0.40803788557532317, "grad_norm": 0.7421875, "learning_rate": 0.0013396816914392016, "loss": 1.4585, "step": 3188 }, { "epoch": 0.4081658773838474, "grad_norm": 0.66796875, "learning_rate": 0.0013392917447379636, "loss": 1.5675, "step": 3189 }, { "epoch": 0.4082938691923717, "grad_norm": 0.6015625, "learning_rate": 0.0013389017397239346, "loss": 1.3445, "step": 3190 }, { "epoch": 0.40842186100089595, "grad_norm": 0.6640625, "learning_rate": 0.001338511676464143, "loss": 1.5531, "step": 3191 }, { "epoch": 0.4085498528094202, "grad_norm": 0.5546875, "learning_rate": 0.0013381215550256277, "loss": 1.0272, "step": 3192 }, { "epoch": 0.40867784461794443, "grad_norm": 0.671875, "learning_rate": 0.0013377313754754372, "loss": 1.6199, "step": 3193 }, { "epoch": 0.4088058364264687, "grad_norm": 0.6640625, "learning_rate": 0.0013373411378806302, "loss": 1.4394, "step": 3194 }, { "epoch": 0.40893382823499297, "grad_norm": 0.84765625, "learning_rate": 0.001336950842308275, "loss": 1.6912, "step": 3195 }, { "epoch": 0.4090618200435172, "grad_norm": 0.9375, "learning_rate": 0.001336560488825451, "loss": 1.6453, "step": 3196 }, { "epoch": 0.40918981185204145, "grad_norm": 0.71875, "learning_rate": 0.0013361700774992464, "loss": 1.6802, "step": 3197 }, { "epoch": 0.40931780366056575, "grad_norm": 0.96484375, "learning_rate": 0.0013357796083967597, "loss": 1.3206, "step": 3198 }, { "epoch": 0.40944579546909, "grad_norm": 0.75, "learning_rate": 0.0013353890815850993, "loss": 1.8972, "step": 3199 }, { "epoch": 0.40957378727761423, "grad_norm": 0.5546875, "learning_rate": 0.0013349984971313836, "loss": 1.5101, "step": 3200 }, { "epoch": 0.4097017790861385, "grad_norm": 0.66015625, "learning_rate": 0.0013346078551027408, "loss": 1.6004, "step": 3201 }, { "epoch": 0.4098297708946627, "grad_norm": 0.76953125, "learning_rate": 0.0013342171555663091, "loss": 2.0847, "step": 3202 }, { "epoch": 0.409957762703187, "grad_norm": 0.67578125, "learning_rate": 0.001333826398589237, "loss": 2.0405, "step": 3203 }, { "epoch": 0.41008575451171125, "grad_norm": 0.625, "learning_rate": 0.0013334355842386815, "loss": 1.6316, "step": 3204 }, { "epoch": 0.4102137463202355, "grad_norm": 0.80859375, "learning_rate": 0.0013330447125818115, "loss": 1.9201, "step": 3205 }, { "epoch": 0.41034173812875974, "grad_norm": 0.78125, "learning_rate": 0.0013326537836858038, "loss": 2.2348, "step": 3206 }, { "epoch": 0.41046972993728403, "grad_norm": 0.609375, "learning_rate": 0.001332262797617846, "loss": 1.029, "step": 3207 }, { "epoch": 0.4105977217458083, "grad_norm": 0.6796875, "learning_rate": 0.0013318717544451358, "loss": 1.7886, "step": 3208 }, { "epoch": 0.4107257135543325, "grad_norm": 0.6953125, "learning_rate": 0.0013314806542348801, "loss": 1.6665, "step": 3209 }, { "epoch": 0.41085370536285676, "grad_norm": 0.5625, "learning_rate": 0.0013310894970542957, "loss": 1.2825, "step": 3210 }, { "epoch": 0.41098169717138106, "grad_norm": 0.83203125, "learning_rate": 0.0013306982829706091, "loss": 2.0388, "step": 3211 }, { "epoch": 0.4111096889799053, "grad_norm": 1.125, "learning_rate": 0.0013303070120510574, "loss": 1.8298, "step": 3212 }, { "epoch": 0.41123768078842954, "grad_norm": 0.64453125, "learning_rate": 0.0013299156843628863, "loss": 1.5872, "step": 3213 }, { "epoch": 0.4113656725969538, "grad_norm": 1.03125, "learning_rate": 0.0013295242999733523, "loss": 1.8088, "step": 3214 }, { "epoch": 0.4114936644054781, "grad_norm": 0.765625, "learning_rate": 0.0013291328589497208, "loss": 2.1707, "step": 3215 }, { "epoch": 0.4116216562140023, "grad_norm": 0.7578125, "learning_rate": 0.0013287413613592674, "loss": 1.5454, "step": 3216 }, { "epoch": 0.41174964802252656, "grad_norm": 1.3359375, "learning_rate": 0.001328349807269277, "loss": 1.8875, "step": 3217 }, { "epoch": 0.4118776398310508, "grad_norm": 0.9140625, "learning_rate": 0.001327958196747045, "loss": 1.5711, "step": 3218 }, { "epoch": 0.41200563163957504, "grad_norm": 0.70703125, "learning_rate": 0.0013275665298598763, "loss": 1.8541, "step": 3219 }, { "epoch": 0.41213362344809934, "grad_norm": 0.74609375, "learning_rate": 0.0013271748066750843, "loss": 2.0757, "step": 3220 }, { "epoch": 0.4122616152566236, "grad_norm": 0.62890625, "learning_rate": 0.0013267830272599937, "loss": 1.7863, "step": 3221 }, { "epoch": 0.4123896070651478, "grad_norm": 0.640625, "learning_rate": 0.0013263911916819376, "loss": 1.4199, "step": 3222 }, { "epoch": 0.41251759887367206, "grad_norm": 0.7109375, "learning_rate": 0.0013259993000082598, "loss": 2.1541, "step": 3223 }, { "epoch": 0.41264559068219636, "grad_norm": 0.625, "learning_rate": 0.0013256073523063127, "loss": 1.6949, "step": 3224 }, { "epoch": 0.4127735824907206, "grad_norm": 0.55078125, "learning_rate": 0.0013252153486434596, "loss": 1.4258, "step": 3225 }, { "epoch": 0.41290157429924484, "grad_norm": 0.63671875, "learning_rate": 0.0013248232890870721, "loss": 1.4577, "step": 3226 }, { "epoch": 0.4130295661077691, "grad_norm": 0.80078125, "learning_rate": 0.0013244311737045324, "loss": 1.944, "step": 3227 }, { "epoch": 0.4131575579162934, "grad_norm": 0.6015625, "learning_rate": 0.0013240390025632315, "loss": 1.4302, "step": 3228 }, { "epoch": 0.4132855497248176, "grad_norm": 0.75, "learning_rate": 0.0013236467757305704, "loss": 1.8283, "step": 3229 }, { "epoch": 0.41341354153334187, "grad_norm": 0.703125, "learning_rate": 0.0013232544932739601, "loss": 1.659, "step": 3230 }, { "epoch": 0.4135415333418661, "grad_norm": 0.703125, "learning_rate": 0.0013228621552608202, "loss": 1.3654, "step": 3231 }, { "epoch": 0.41366952515039035, "grad_norm": 0.78125, "learning_rate": 0.0013224697617585809, "loss": 1.8382, "step": 3232 }, { "epoch": 0.41379751695891465, "grad_norm": 0.5625, "learning_rate": 0.0013220773128346805, "loss": 1.1239, "step": 3233 }, { "epoch": 0.4139255087674389, "grad_norm": 0.62109375, "learning_rate": 0.0013216848085565688, "loss": 1.76, "step": 3234 }, { "epoch": 0.41405350057596313, "grad_norm": 0.6796875, "learning_rate": 0.001321292248991703, "loss": 1.4854, "step": 3235 }, { "epoch": 0.41418149238448737, "grad_norm": 0.6875, "learning_rate": 0.0013208996342075517, "loss": 1.6808, "step": 3236 }, { "epoch": 0.41430948419301167, "grad_norm": 0.55859375, "learning_rate": 0.0013205069642715915, "loss": 1.4843, "step": 3237 }, { "epoch": 0.4144374760015359, "grad_norm": 0.65625, "learning_rate": 0.0013201142392513096, "loss": 1.3534, "step": 3238 }, { "epoch": 0.41456546781006015, "grad_norm": 0.65625, "learning_rate": 0.0013197214592142014, "loss": 1.8532, "step": 3239 }, { "epoch": 0.4146934596185844, "grad_norm": 0.74609375, "learning_rate": 0.0013193286242277735, "loss": 1.599, "step": 3240 }, { "epoch": 0.4148214514271087, "grad_norm": 0.61328125, "learning_rate": 0.0013189357343595404, "loss": 1.7203, "step": 3241 }, { "epoch": 0.41494944323563293, "grad_norm": 0.65625, "learning_rate": 0.0013185427896770267, "loss": 1.4103, "step": 3242 }, { "epoch": 0.4150774350441572, "grad_norm": 0.69921875, "learning_rate": 0.0013181497902477663, "loss": 1.7425, "step": 3243 }, { "epoch": 0.4152054268526814, "grad_norm": 0.63671875, "learning_rate": 0.0013177567361393025, "loss": 1.4384, "step": 3244 }, { "epoch": 0.41533341866120566, "grad_norm": 0.59765625, "learning_rate": 0.0013173636274191882, "loss": 1.2346, "step": 3245 }, { "epoch": 0.41546141046972995, "grad_norm": 0.59765625, "learning_rate": 0.001316970464154985, "loss": 1.413, "step": 3246 }, { "epoch": 0.4155894022782542, "grad_norm": 0.73046875, "learning_rate": 0.0013165772464142653, "loss": 1.8685, "step": 3247 }, { "epoch": 0.41571739408677844, "grad_norm": 0.8125, "learning_rate": 0.0013161839742646092, "loss": 2.1633, "step": 3248 }, { "epoch": 0.4158453858953027, "grad_norm": 0.84375, "learning_rate": 0.001315790647773607, "loss": 1.9804, "step": 3249 }, { "epoch": 0.415973377703827, "grad_norm": 0.83203125, "learning_rate": 0.0013153972670088582, "loss": 1.8009, "step": 3250 }, { "epoch": 0.4161013695123512, "grad_norm": 0.7734375, "learning_rate": 0.0013150038320379723, "loss": 1.7432, "step": 3251 }, { "epoch": 0.41622936132087546, "grad_norm": 0.7109375, "learning_rate": 0.0013146103429285667, "loss": 1.5213, "step": 3252 }, { "epoch": 0.4163573531293997, "grad_norm": 0.76171875, "learning_rate": 0.0013142167997482693, "loss": 1.4661, "step": 3253 }, { "epoch": 0.416485344937924, "grad_norm": 0.671875, "learning_rate": 0.0013138232025647167, "loss": 1.7972, "step": 3254 }, { "epoch": 0.41661333674644824, "grad_norm": 0.8046875, "learning_rate": 0.0013134295514455553, "loss": 1.2969, "step": 3255 }, { "epoch": 0.4167413285549725, "grad_norm": 0.609375, "learning_rate": 0.0013130358464584397, "loss": 1.4629, "step": 3256 }, { "epoch": 0.4168693203634967, "grad_norm": 0.8671875, "learning_rate": 0.0013126420876710353, "loss": 2.2046, "step": 3257 }, { "epoch": 0.416997312172021, "grad_norm": 0.609375, "learning_rate": 0.0013122482751510153, "loss": 1.4503, "step": 3258 }, { "epoch": 0.41712530398054526, "grad_norm": 0.66796875, "learning_rate": 0.0013118544089660634, "loss": 1.7137, "step": 3259 }, { "epoch": 0.4172532957890695, "grad_norm": 0.61328125, "learning_rate": 0.0013114604891838711, "loss": 1.4641, "step": 3260 }, { "epoch": 0.41738128759759374, "grad_norm": 0.69140625, "learning_rate": 0.0013110665158721406, "loss": 1.9427, "step": 3261 }, { "epoch": 0.417509279406118, "grad_norm": 0.72265625, "learning_rate": 0.0013106724890985823, "loss": 1.3499, "step": 3262 }, { "epoch": 0.4176372712146423, "grad_norm": 0.6796875, "learning_rate": 0.0013102784089309165, "loss": 1.3175, "step": 3263 }, { "epoch": 0.4177652630231665, "grad_norm": 0.68359375, "learning_rate": 0.0013098842754368718, "loss": 1.3827, "step": 3264 }, { "epoch": 0.41789325483169076, "grad_norm": 0.703125, "learning_rate": 0.0013094900886841865, "loss": 1.3803, "step": 3265 }, { "epoch": 0.418021246640215, "grad_norm": 0.79296875, "learning_rate": 0.0013090958487406079, "loss": 2.0383, "step": 3266 }, { "epoch": 0.4181492384487393, "grad_norm": 0.57421875, "learning_rate": 0.0013087015556738928, "loss": 1.2524, "step": 3267 }, { "epoch": 0.41827723025726354, "grad_norm": 0.69140625, "learning_rate": 0.0013083072095518068, "loss": 1.8613, "step": 3268 }, { "epoch": 0.4184052220657878, "grad_norm": 0.73828125, "learning_rate": 0.0013079128104421245, "loss": 1.8132, "step": 3269 }, { "epoch": 0.418533213874312, "grad_norm": 0.6328125, "learning_rate": 0.0013075183584126302, "loss": 1.6926, "step": 3270 }, { "epoch": 0.4186612056828363, "grad_norm": 0.6484375, "learning_rate": 0.0013071238535311163, "loss": 1.5211, "step": 3271 }, { "epoch": 0.41878919749136057, "grad_norm": 0.609375, "learning_rate": 0.0013067292958653853, "loss": 1.3111, "step": 3272 }, { "epoch": 0.4189171892998848, "grad_norm": 0.76953125, "learning_rate": 0.001306334685483248, "loss": 1.2039, "step": 3273 }, { "epoch": 0.41904518110840905, "grad_norm": 0.7109375, "learning_rate": 0.0013059400224525253, "loss": 1.5959, "step": 3274 }, { "epoch": 0.4191731729169333, "grad_norm": 0.6484375, "learning_rate": 0.0013055453068410454, "loss": 1.8884, "step": 3275 }, { "epoch": 0.4193011647254576, "grad_norm": 0.73046875, "learning_rate": 0.0013051505387166476, "loss": 1.2878, "step": 3276 }, { "epoch": 0.41942915653398183, "grad_norm": 1.0703125, "learning_rate": 0.0013047557181471783, "loss": 1.4471, "step": 3277 }, { "epoch": 0.41955714834250607, "grad_norm": 0.54296875, "learning_rate": 0.0013043608452004943, "loss": 1.0831, "step": 3278 }, { "epoch": 0.4196851401510303, "grad_norm": 0.6875, "learning_rate": 0.001303965919944461, "loss": 1.3966, "step": 3279 }, { "epoch": 0.4198131319595546, "grad_norm": 0.60546875, "learning_rate": 0.0013035709424469525, "loss": 1.1363, "step": 3280 }, { "epoch": 0.41994112376807885, "grad_norm": 0.60546875, "learning_rate": 0.0013031759127758522, "loss": 1.4959, "step": 3281 }, { "epoch": 0.4200691155766031, "grad_norm": 0.80859375, "learning_rate": 0.0013027808309990522, "loss": 1.9854, "step": 3282 }, { "epoch": 0.42019710738512733, "grad_norm": 0.8359375, "learning_rate": 0.0013023856971844544, "loss": 2.1157, "step": 3283 }, { "epoch": 0.42032509919365163, "grad_norm": 0.65625, "learning_rate": 0.0013019905113999678, "loss": 2.095, "step": 3284 }, { "epoch": 0.42045309100217587, "grad_norm": 0.84765625, "learning_rate": 0.0013015952737135124, "loss": 2.0794, "step": 3285 }, { "epoch": 0.4205810828107001, "grad_norm": 0.62890625, "learning_rate": 0.0013011999841930158, "loss": 1.5729, "step": 3286 }, { "epoch": 0.42070907461922435, "grad_norm": 0.62109375, "learning_rate": 0.0013008046429064152, "loss": 1.9995, "step": 3287 }, { "epoch": 0.42083706642774865, "grad_norm": 0.67578125, "learning_rate": 0.001300409249921656, "loss": 1.8075, "step": 3288 }, { "epoch": 0.4209650582362729, "grad_norm": 0.6640625, "learning_rate": 0.001300013805306693, "loss": 1.7081, "step": 3289 }, { "epoch": 0.42109305004479713, "grad_norm": 0.828125, "learning_rate": 0.00129961830912949, "loss": 1.9376, "step": 3290 }, { "epoch": 0.4212210418533214, "grad_norm": 0.9296875, "learning_rate": 0.0012992227614580195, "loss": 1.9304, "step": 3291 }, { "epoch": 0.4213490336618456, "grad_norm": 0.96875, "learning_rate": 0.0012988271623602628, "loss": 2.1461, "step": 3292 }, { "epoch": 0.4214770254703699, "grad_norm": 0.91796875, "learning_rate": 0.0012984315119042094, "loss": 1.7212, "step": 3293 }, { "epoch": 0.42160501727889416, "grad_norm": 0.72265625, "learning_rate": 0.001298035810157859, "loss": 1.5468, "step": 3294 }, { "epoch": 0.4217330090874184, "grad_norm": 0.65234375, "learning_rate": 0.001297640057189219, "loss": 1.3799, "step": 3295 }, { "epoch": 0.42186100089594264, "grad_norm": 0.8671875, "learning_rate": 0.001297244253066306, "loss": 1.5107, "step": 3296 }, { "epoch": 0.42198899270446694, "grad_norm": 0.73828125, "learning_rate": 0.0012968483978571452, "loss": 1.5036, "step": 3297 }, { "epoch": 0.4221169845129912, "grad_norm": 0.578125, "learning_rate": 0.0012964524916297714, "loss": 1.2885, "step": 3298 }, { "epoch": 0.4222449763215154, "grad_norm": 0.56640625, "learning_rate": 0.0012960565344522267, "loss": 1.2853, "step": 3299 }, { "epoch": 0.42237296813003966, "grad_norm": 0.92578125, "learning_rate": 0.0012956605263925631, "loss": 2.4334, "step": 3300 }, { "epoch": 0.42250095993856396, "grad_norm": 0.65234375, "learning_rate": 0.001295264467518841, "loss": 1.3032, "step": 3301 }, { "epoch": 0.4226289517470882, "grad_norm": 0.67578125, "learning_rate": 0.0012948683578991298, "loss": 1.5153, "step": 3302 }, { "epoch": 0.42275694355561244, "grad_norm": 0.6796875, "learning_rate": 0.001294472197601507, "loss": 1.792, "step": 3303 }, { "epoch": 0.4228849353641367, "grad_norm": 0.53515625, "learning_rate": 0.0012940759866940591, "loss": 1.1729, "step": 3304 }, { "epoch": 0.4230129271726609, "grad_norm": 0.58984375, "learning_rate": 0.0012936797252448822, "loss": 1.3599, "step": 3305 }, { "epoch": 0.4231409189811852, "grad_norm": 0.828125, "learning_rate": 0.0012932834133220791, "loss": 1.6, "step": 3306 }, { "epoch": 0.42326891078970946, "grad_norm": 0.58984375, "learning_rate": 0.0012928870509937632, "loss": 1.3691, "step": 3307 }, { "epoch": 0.4233969025982337, "grad_norm": 0.73828125, "learning_rate": 0.0012924906383280552, "loss": 2.5634, "step": 3308 }, { "epoch": 0.42352489440675795, "grad_norm": 0.75, "learning_rate": 0.0012920941753930858, "loss": 1.4948, "step": 3309 }, { "epoch": 0.42365288621528224, "grad_norm": 0.6875, "learning_rate": 0.0012916976622569932, "loss": 1.922, "step": 3310 }, { "epoch": 0.4237808780238065, "grad_norm": 0.66796875, "learning_rate": 0.0012913010989879243, "loss": 1.4629, "step": 3311 }, { "epoch": 0.4239088698323307, "grad_norm": 0.75390625, "learning_rate": 0.0012909044856540355, "loss": 1.5908, "step": 3312 }, { "epoch": 0.42403686164085497, "grad_norm": 0.83984375, "learning_rate": 0.0012905078223234908, "loss": 2.1848, "step": 3313 }, { "epoch": 0.42416485344937926, "grad_norm": 0.703125, "learning_rate": 0.0012901111090644637, "loss": 1.8984, "step": 3314 }, { "epoch": 0.4242928452579035, "grad_norm": 0.71484375, "learning_rate": 0.0012897143459451354, "loss": 1.7739, "step": 3315 }, { "epoch": 0.42442083706642775, "grad_norm": 0.54296875, "learning_rate": 0.001289317533033696, "loss": 1.4739, "step": 3316 }, { "epoch": 0.424548828874952, "grad_norm": 0.90234375, "learning_rate": 0.0012889206703983447, "loss": 1.3993, "step": 3317 }, { "epoch": 0.42467682068347623, "grad_norm": 0.61328125, "learning_rate": 0.0012885237581072883, "loss": 1.4416, "step": 3318 }, { "epoch": 0.4248048124920005, "grad_norm": 0.8125, "learning_rate": 0.001288126796228743, "loss": 2.4509, "step": 3319 }, { "epoch": 0.42493280430052477, "grad_norm": 0.66796875, "learning_rate": 0.0012877297848309331, "loss": 1.6117, "step": 3320 }, { "epoch": 0.425060796109049, "grad_norm": 0.609375, "learning_rate": 0.001287332723982091, "loss": 1.5292, "step": 3321 }, { "epoch": 0.42518878791757325, "grad_norm": 0.6953125, "learning_rate": 0.0012869356137504586, "loss": 1.409, "step": 3322 }, { "epoch": 0.42531677972609755, "grad_norm": 0.6875, "learning_rate": 0.0012865384542042856, "loss": 1.6209, "step": 3323 }, { "epoch": 0.4254447715346218, "grad_norm": 0.60546875, "learning_rate": 0.0012861412454118297, "loss": 1.7499, "step": 3324 }, { "epoch": 0.42557276334314603, "grad_norm": 0.71484375, "learning_rate": 0.0012857439874413587, "loss": 1.6339, "step": 3325 }, { "epoch": 0.4257007551516703, "grad_norm": 0.72265625, "learning_rate": 0.0012853466803611473, "loss": 2.2414, "step": 3326 }, { "epoch": 0.42582874696019457, "grad_norm": 0.796875, "learning_rate": 0.0012849493242394793, "loss": 2.5823, "step": 3327 }, { "epoch": 0.4259567387687188, "grad_norm": 0.6015625, "learning_rate": 0.0012845519191446463, "loss": 1.678, "step": 3328 }, { "epoch": 0.42608473057724305, "grad_norm": 0.671875, "learning_rate": 0.0012841544651449497, "loss": 1.9392, "step": 3329 }, { "epoch": 0.4262127223857673, "grad_norm": 0.671875, "learning_rate": 0.0012837569623086976, "loss": 1.5999, "step": 3330 }, { "epoch": 0.4263407141942916, "grad_norm": 0.80859375, "learning_rate": 0.0012833594107042076, "loss": 1.8916, "step": 3331 }, { "epoch": 0.42646870600281583, "grad_norm": 0.578125, "learning_rate": 0.0012829618103998059, "loss": 1.1979, "step": 3332 }, { "epoch": 0.4265966978113401, "grad_norm": 1.3046875, "learning_rate": 0.0012825641614638253, "loss": 1.1127, "step": 3333 }, { "epoch": 0.4267246896198643, "grad_norm": 0.75, "learning_rate": 0.0012821664639646095, "loss": 2.5159, "step": 3334 }, { "epoch": 0.42685268142838856, "grad_norm": 0.7734375, "learning_rate": 0.0012817687179705088, "loss": 2.1429, "step": 3335 }, { "epoch": 0.42698067323691286, "grad_norm": 0.69140625, "learning_rate": 0.0012813709235498822, "loss": 1.7954, "step": 3336 }, { "epoch": 0.4271086650454371, "grad_norm": 0.703125, "learning_rate": 0.0012809730807710974, "loss": 1.7712, "step": 3337 }, { "epoch": 0.42723665685396134, "grad_norm": 0.5859375, "learning_rate": 0.0012805751897025297, "loss": 1.4451, "step": 3338 }, { "epoch": 0.4273646486624856, "grad_norm": 0.58984375, "learning_rate": 0.0012801772504125635, "loss": 1.4498, "step": 3339 }, { "epoch": 0.4274926404710099, "grad_norm": 0.7421875, "learning_rate": 0.0012797792629695908, "loss": 1.6222, "step": 3340 }, { "epoch": 0.4276206322795341, "grad_norm": 0.640625, "learning_rate": 0.0012793812274420127, "loss": 1.067, "step": 3341 }, { "epoch": 0.42774862408805836, "grad_norm": 0.62109375, "learning_rate": 0.0012789831438982375, "loss": 1.3138, "step": 3342 }, { "epoch": 0.4278766158965826, "grad_norm": 0.8359375, "learning_rate": 0.001278585012406683, "loss": 1.7521, "step": 3343 }, { "epoch": 0.4280046077051069, "grad_norm": 0.59765625, "learning_rate": 0.0012781868330357738, "loss": 1.5049, "step": 3344 }, { "epoch": 0.42813259951363114, "grad_norm": 0.61328125, "learning_rate": 0.0012777886058539438, "loss": 1.4626, "step": 3345 }, { "epoch": 0.4282605913221554, "grad_norm": 0.65625, "learning_rate": 0.0012773903309296348, "loss": 1.4665, "step": 3346 }, { "epoch": 0.4283885831306796, "grad_norm": 0.58203125, "learning_rate": 0.001276992008331297, "loss": 1.376, "step": 3347 }, { "epoch": 0.42851657493920386, "grad_norm": 0.86328125, "learning_rate": 0.0012765936381273885, "loss": 2.123, "step": 3348 }, { "epoch": 0.42864456674772816, "grad_norm": 0.6015625, "learning_rate": 0.0012761952203863757, "loss": 1.2421, "step": 3349 }, { "epoch": 0.4287725585562524, "grad_norm": 0.62890625, "learning_rate": 0.0012757967551767328, "loss": 1.7098, "step": 3350 }, { "epoch": 0.42890055036477664, "grad_norm": 0.69140625, "learning_rate": 0.001275398242566943, "loss": 1.6417, "step": 3351 }, { "epoch": 0.4290285421733009, "grad_norm": 0.60546875, "learning_rate": 0.001274999682625497, "loss": 1.3395, "step": 3352 }, { "epoch": 0.4291565339818252, "grad_norm": 0.578125, "learning_rate": 0.0012746010754208933, "loss": 1.3381, "step": 3353 }, { "epoch": 0.4292845257903494, "grad_norm": 0.7109375, "learning_rate": 0.00127420242102164, "loss": 1.6025, "step": 3354 }, { "epoch": 0.42941251759887367, "grad_norm": 0.59765625, "learning_rate": 0.0012738037194962514, "loss": 1.6224, "step": 3355 }, { "epoch": 0.4295405094073979, "grad_norm": 1.2421875, "learning_rate": 0.0012734049709132514, "loss": 1.5415, "step": 3356 }, { "epoch": 0.4296685012159222, "grad_norm": 0.65625, "learning_rate": 0.001273006175341171, "loss": 1.7885, "step": 3357 }, { "epoch": 0.42979649302444645, "grad_norm": 0.6875, "learning_rate": 0.0012726073328485502, "loss": 1.3686, "step": 3358 }, { "epoch": 0.4299244848329707, "grad_norm": 0.609375, "learning_rate": 0.001272208443503936, "loss": 0.9184, "step": 3359 }, { "epoch": 0.43005247664149493, "grad_norm": 0.62109375, "learning_rate": 0.0012718095073758846, "loss": 1.7434, "step": 3360 }, { "epoch": 0.43018046845001917, "grad_norm": 0.65234375, "learning_rate": 0.0012714105245329591, "loss": 1.1876, "step": 3361 }, { "epoch": 0.43030846025854347, "grad_norm": 0.91015625, "learning_rate": 0.0012710114950437313, "loss": 1.4127, "step": 3362 }, { "epoch": 0.4304364520670677, "grad_norm": 0.8984375, "learning_rate": 0.001270612418976781, "loss": 2.0362, "step": 3363 }, { "epoch": 0.43056444387559195, "grad_norm": 0.72265625, "learning_rate": 0.0012702132964006957, "loss": 1.8393, "step": 3364 }, { "epoch": 0.4306924356841162, "grad_norm": 0.55078125, "learning_rate": 0.0012698141273840718, "loss": 1.1262, "step": 3365 }, { "epoch": 0.4308204274926405, "grad_norm": 0.59375, "learning_rate": 0.001269414911995512, "loss": 1.5473, "step": 3366 }, { "epoch": 0.43094841930116473, "grad_norm": 0.765625, "learning_rate": 0.0012690156503036287, "loss": 2.0267, "step": 3367 }, { "epoch": 0.431076411109689, "grad_norm": 0.55859375, "learning_rate": 0.0012686163423770408, "loss": 1.5672, "step": 3368 }, { "epoch": 0.4312044029182132, "grad_norm": 0.79296875, "learning_rate": 0.0012682169882843766, "loss": 1.6958, "step": 3369 }, { "epoch": 0.4313323947267375, "grad_norm": 0.94921875, "learning_rate": 0.001267817588094271, "loss": 1.9166, "step": 3370 }, { "epoch": 0.43146038653526175, "grad_norm": 0.5625, "learning_rate": 0.0012674181418753678, "loss": 1.5383, "step": 3371 }, { "epoch": 0.431588378343786, "grad_norm": 0.6484375, "learning_rate": 0.0012670186496963178, "loss": 1.6633, "step": 3372 }, { "epoch": 0.43171637015231024, "grad_norm": 0.81640625, "learning_rate": 0.0012666191116257807, "loss": 1.9452, "step": 3373 }, { "epoch": 0.43184436196083453, "grad_norm": 0.71484375, "learning_rate": 0.0012662195277324236, "loss": 1.6633, "step": 3374 }, { "epoch": 0.4319723537693588, "grad_norm": 0.63671875, "learning_rate": 0.0012658198980849212, "loss": 1.6215, "step": 3375 }, { "epoch": 0.432100345577883, "grad_norm": 0.68359375, "learning_rate": 0.0012654202227519564, "loss": 1.2936, "step": 3376 }, { "epoch": 0.43222833738640726, "grad_norm": 0.7109375, "learning_rate": 0.0012650205018022202, "loss": 1.5654, "step": 3377 }, { "epoch": 0.4323563291949315, "grad_norm": 0.63671875, "learning_rate": 0.0012646207353044106, "loss": 1.6376, "step": 3378 }, { "epoch": 0.4324843210034558, "grad_norm": 0.59765625, "learning_rate": 0.0012642209233272338, "loss": 1.2867, "step": 3379 }, { "epoch": 0.43261231281198004, "grad_norm": 0.56640625, "learning_rate": 0.0012638210659394052, "loss": 1.6056, "step": 3380 }, { "epoch": 0.4327403046205043, "grad_norm": 0.61328125, "learning_rate": 0.0012634211632096455, "loss": 1.2171, "step": 3381 }, { "epoch": 0.4328682964290285, "grad_norm": 0.60546875, "learning_rate": 0.0012630212152066854, "loss": 1.0596, "step": 3382 }, { "epoch": 0.4329962882375528, "grad_norm": 0.58984375, "learning_rate": 0.0012626212219992617, "loss": 1.6454, "step": 3383 }, { "epoch": 0.43312428004607706, "grad_norm": 0.78125, "learning_rate": 0.0012622211836561198, "loss": 1.7808, "step": 3384 }, { "epoch": 0.4332522718546013, "grad_norm": 0.64453125, "learning_rate": 0.0012618211002460133, "loss": 1.3427, "step": 3385 }, { "epoch": 0.43338026366312554, "grad_norm": 0.55859375, "learning_rate": 0.0012614209718377026, "loss": 1.2118, "step": 3386 }, { "epoch": 0.43350825547164984, "grad_norm": 0.640625, "learning_rate": 0.0012610207984999568, "loss": 1.7984, "step": 3387 }, { "epoch": 0.4336362472801741, "grad_norm": 0.83984375, "learning_rate": 0.0012606205803015514, "loss": 2.2219, "step": 3388 }, { "epoch": 0.4337642390886983, "grad_norm": 0.66796875, "learning_rate": 0.001260220317311271, "loss": 1.3326, "step": 3389 }, { "epoch": 0.43389223089722256, "grad_norm": 0.65625, "learning_rate": 0.0012598200095979067, "loss": 1.8403, "step": 3390 }, { "epoch": 0.4340202227057468, "grad_norm": 0.7421875, "learning_rate": 0.0012594196572302586, "loss": 1.0855, "step": 3391 }, { "epoch": 0.4341482145142711, "grad_norm": 0.6484375, "learning_rate": 0.001259019260277133, "loss": 1.4709, "step": 3392 }, { "epoch": 0.43427620632279534, "grad_norm": 0.59765625, "learning_rate": 0.0012586188188073455, "loss": 1.7369, "step": 3393 }, { "epoch": 0.4344041981313196, "grad_norm": 0.71484375, "learning_rate": 0.0012582183328897177, "loss": 1.7697, "step": 3394 }, { "epoch": 0.4345321899398438, "grad_norm": 0.69140625, "learning_rate": 0.0012578178025930797, "loss": 1.5611, "step": 3395 }, { "epoch": 0.4346601817483681, "grad_norm": 0.70703125, "learning_rate": 0.0012574172279862694, "loss": 2.0513, "step": 3396 }, { "epoch": 0.43478817355689237, "grad_norm": 0.6015625, "learning_rate": 0.0012570166091381318, "loss": 1.2961, "step": 3397 }, { "epoch": 0.4349161653654166, "grad_norm": 0.5625, "learning_rate": 0.0012566159461175205, "loss": 1.3058, "step": 3398 }, { "epoch": 0.43504415717394085, "grad_norm": 0.6875, "learning_rate": 0.0012562152389932945, "loss": 1.6982, "step": 3399 }, { "epoch": 0.43517214898246515, "grad_norm": 0.73828125, "learning_rate": 0.0012558144878343233, "loss": 2.0853, "step": 3400 }, { "epoch": 0.4353001407909894, "grad_norm": 0.68359375, "learning_rate": 0.0012554136927094817, "loss": 1.3152, "step": 3401 }, { "epoch": 0.43542813259951363, "grad_norm": 0.6953125, "learning_rate": 0.001255012853687653, "loss": 1.5895, "step": 3402 }, { "epoch": 0.43555612440803787, "grad_norm": 0.9921875, "learning_rate": 0.0012546119708377274, "loss": 1.8828, "step": 3403 }, { "epoch": 0.43568411621656217, "grad_norm": 0.765625, "learning_rate": 0.0012542110442286042, "loss": 1.761, "step": 3404 }, { "epoch": 0.4358121080250864, "grad_norm": 0.69140625, "learning_rate": 0.0012538100739291882, "loss": 2.2002, "step": 3405 }, { "epoch": 0.43594009983361065, "grad_norm": 0.6796875, "learning_rate": 0.001253409060008393, "loss": 1.0123, "step": 3406 }, { "epoch": 0.4360680916421349, "grad_norm": 0.62890625, "learning_rate": 0.001253008002535139, "loss": 1.2089, "step": 3407 }, { "epoch": 0.43619608345065913, "grad_norm": 0.56640625, "learning_rate": 0.0012526069015783552, "loss": 1.3318, "step": 3408 }, { "epoch": 0.43632407525918343, "grad_norm": 0.7734375, "learning_rate": 0.0012522057572069772, "loss": 1.8096, "step": 3409 }, { "epoch": 0.43645206706770767, "grad_norm": 0.65234375, "learning_rate": 0.0012518045694899474, "loss": 1.354, "step": 3410 }, { "epoch": 0.4365800588762319, "grad_norm": 0.7265625, "learning_rate": 0.001251403338496217, "loss": 1.5007, "step": 3411 }, { "epoch": 0.43670805068475615, "grad_norm": 0.6171875, "learning_rate": 0.001251002064294744, "loss": 1.9215, "step": 3412 }, { "epoch": 0.43683604249328045, "grad_norm": 0.79296875, "learning_rate": 0.0012506007469544936, "loss": 2.1161, "step": 3413 }, { "epoch": 0.4369640343018047, "grad_norm": 0.734375, "learning_rate": 0.0012501993865444388, "loss": 1.6174, "step": 3414 }, { "epoch": 0.43709202611032894, "grad_norm": 0.6328125, "learning_rate": 0.0012497979831335604, "loss": 0.9407, "step": 3415 }, { "epoch": 0.4372200179188532, "grad_norm": 0.7109375, "learning_rate": 0.0012493965367908453, "loss": 2.3396, "step": 3416 }, { "epoch": 0.4373480097273775, "grad_norm": 0.64453125, "learning_rate": 0.0012489950475852891, "loss": 1.4765, "step": 3417 }, { "epoch": 0.4374760015359017, "grad_norm": 0.71484375, "learning_rate": 0.0012485935155858942, "loss": 1.8933, "step": 3418 }, { "epoch": 0.43760399334442596, "grad_norm": 0.671875, "learning_rate": 0.0012481919408616699, "loss": 1.6328, "step": 3419 }, { "epoch": 0.4377319851529502, "grad_norm": 0.74609375, "learning_rate": 0.0012477903234816342, "loss": 2.0547, "step": 3420 }, { "epoch": 0.43785997696147444, "grad_norm": 0.9375, "learning_rate": 0.001247388663514811, "loss": 2.0868, "step": 3421 }, { "epoch": 0.43798796876999874, "grad_norm": 0.71484375, "learning_rate": 0.001246986961030232, "loss": 2.133, "step": 3422 }, { "epoch": 0.438115960578523, "grad_norm": 0.63671875, "learning_rate": 0.0012465852160969365, "loss": 1.7411, "step": 3423 }, { "epoch": 0.4382439523870472, "grad_norm": 0.7265625, "learning_rate": 0.0012461834287839707, "loss": 1.8637, "step": 3424 }, { "epoch": 0.43837194419557146, "grad_norm": 0.66796875, "learning_rate": 0.0012457815991603885, "loss": 1.3558, "step": 3425 }, { "epoch": 0.43849993600409576, "grad_norm": 0.6796875, "learning_rate": 0.0012453797272952505, "loss": 1.4696, "step": 3426 }, { "epoch": 0.43862792781262, "grad_norm": 0.578125, "learning_rate": 0.0012449778132576255, "loss": 1.3054, "step": 3427 }, { "epoch": 0.43875591962114424, "grad_norm": 0.5390625, "learning_rate": 0.0012445758571165884, "loss": 1.2032, "step": 3428 }, { "epoch": 0.4388839114296685, "grad_norm": 0.85546875, "learning_rate": 0.001244173858941222, "loss": 1.6725, "step": 3429 }, { "epoch": 0.4390119032381928, "grad_norm": 0.7265625, "learning_rate": 0.0012437718188006164, "loss": 1.4044, "step": 3430 }, { "epoch": 0.439139895046717, "grad_norm": 0.7265625, "learning_rate": 0.0012433697367638687, "loss": 1.5909, "step": 3431 }, { "epoch": 0.43926788685524126, "grad_norm": 1.015625, "learning_rate": 0.001242967612900083, "loss": 2.3239, "step": 3432 }, { "epoch": 0.4393958786637655, "grad_norm": 0.51953125, "learning_rate": 0.0012425654472783712, "loss": 0.9398, "step": 3433 }, { "epoch": 0.43952387047228975, "grad_norm": 0.70703125, "learning_rate": 0.0012421632399678513, "loss": 1.5226, "step": 3434 }, { "epoch": 0.43965186228081404, "grad_norm": 0.828125, "learning_rate": 0.0012417609910376497, "loss": 1.9862, "step": 3435 }, { "epoch": 0.4397798540893383, "grad_norm": 0.5390625, "learning_rate": 0.0012413587005568993, "loss": 0.8549, "step": 3436 }, { "epoch": 0.4399078458978625, "grad_norm": 0.59375, "learning_rate": 0.0012409563685947403, "loss": 1.5461, "step": 3437 }, { "epoch": 0.44003583770638677, "grad_norm": 0.76171875, "learning_rate": 0.0012405539952203197, "loss": 2.0183, "step": 3438 }, { "epoch": 0.44016382951491106, "grad_norm": 0.71875, "learning_rate": 0.0012401515805027922, "loss": 1.8735, "step": 3439 }, { "epoch": 0.4402918213234353, "grad_norm": 0.72265625, "learning_rate": 0.0012397491245113193, "loss": 1.5493, "step": 3440 }, { "epoch": 0.44041981313195955, "grad_norm": 0.68359375, "learning_rate": 0.0012393466273150694, "loss": 1.789, "step": 3441 }, { "epoch": 0.4405478049404838, "grad_norm": 0.6796875, "learning_rate": 0.0012389440889832182, "loss": 1.6762, "step": 3442 }, { "epoch": 0.4406757967490081, "grad_norm": 0.65625, "learning_rate": 0.0012385415095849486, "loss": 1.8088, "step": 3443 }, { "epoch": 0.44080378855753233, "grad_norm": 0.58984375, "learning_rate": 0.0012381388891894505, "loss": 1.1798, "step": 3444 }, { "epoch": 0.44093178036605657, "grad_norm": 0.86328125, "learning_rate": 0.00123773622786592, "loss": 1.8993, "step": 3445 }, { "epoch": 0.4410597721745808, "grad_norm": 0.64453125, "learning_rate": 0.0012373335256835618, "loss": 1.4883, "step": 3446 }, { "epoch": 0.4411877639831051, "grad_norm": 0.57421875, "learning_rate": 0.0012369307827115865, "loss": 1.2718, "step": 3447 }, { "epoch": 0.44131575579162935, "grad_norm": 0.76171875, "learning_rate": 0.0012365279990192123, "loss": 2.244, "step": 3448 }, { "epoch": 0.4414437476001536, "grad_norm": 0.80859375, "learning_rate": 0.0012361251746756637, "loss": 1.3261, "step": 3449 }, { "epoch": 0.44157173940867783, "grad_norm": 0.7109375, "learning_rate": 0.0012357223097501726, "loss": 1.6683, "step": 3450 }, { "epoch": 0.4416997312172021, "grad_norm": 0.71875, "learning_rate": 0.0012353194043119781, "loss": 1.5258, "step": 3451 }, { "epoch": 0.44182772302572637, "grad_norm": 0.6484375, "learning_rate": 0.001234916458430326, "loss": 1.6429, "step": 3452 }, { "epoch": 0.4419557148342506, "grad_norm": 0.65234375, "learning_rate": 0.0012345134721744692, "loss": 1.5034, "step": 3453 }, { "epoch": 0.44208370664277485, "grad_norm": 0.7109375, "learning_rate": 0.0012341104456136671, "loss": 2.0101, "step": 3454 }, { "epoch": 0.4422116984512991, "grad_norm": 0.6484375, "learning_rate": 0.001233707378817187, "loss": 1.659, "step": 3455 }, { "epoch": 0.4423396902598234, "grad_norm": 0.71875, "learning_rate": 0.0012333042718543012, "loss": 2.1481, "step": 3456 }, { "epoch": 0.44246768206834763, "grad_norm": 0.6015625, "learning_rate": 0.0012329011247942914, "loss": 1.4528, "step": 3457 }, { "epoch": 0.4425956738768719, "grad_norm": 0.89453125, "learning_rate": 0.0012324979377064445, "loss": 1.6377, "step": 3458 }, { "epoch": 0.4427236656853961, "grad_norm": 0.6875, "learning_rate": 0.0012320947106600549, "loss": 1.9401, "step": 3459 }, { "epoch": 0.4428516574939204, "grad_norm": 0.6328125, "learning_rate": 0.0012316914437244236, "loss": 1.4429, "step": 3460 }, { "epoch": 0.44297964930244466, "grad_norm": 0.78125, "learning_rate": 0.0012312881369688582, "loss": 1.6428, "step": 3461 }, { "epoch": 0.4431076411109689, "grad_norm": 0.609375, "learning_rate": 0.0012308847904626743, "loss": 1.2404, "step": 3462 }, { "epoch": 0.44323563291949314, "grad_norm": 0.62890625, "learning_rate": 0.0012304814042751926, "loss": 1.4913, "step": 3463 }, { "epoch": 0.4433636247280174, "grad_norm": 0.66796875, "learning_rate": 0.0012300779784757427, "loss": 1.3613, "step": 3464 }, { "epoch": 0.4434916165365417, "grad_norm": 0.53515625, "learning_rate": 0.0012296745131336589, "loss": 1.1595, "step": 3465 }, { "epoch": 0.4436196083450659, "grad_norm": 0.78125, "learning_rate": 0.0012292710083182837, "loss": 1.3368, "step": 3466 }, { "epoch": 0.44374760015359016, "grad_norm": 0.69140625, "learning_rate": 0.0012288674640989657, "loss": 1.6415, "step": 3467 }, { "epoch": 0.4438755919621144, "grad_norm": 0.67578125, "learning_rate": 0.0012284638805450606, "loss": 1.0915, "step": 3468 }, { "epoch": 0.4440035837706387, "grad_norm": 0.703125, "learning_rate": 0.0012280602577259313, "loss": 1.8176, "step": 3469 }, { "epoch": 0.44413157557916294, "grad_norm": 0.578125, "learning_rate": 0.001227656595710946, "loss": 1.3079, "step": 3470 }, { "epoch": 0.4442595673876872, "grad_norm": 0.82421875, "learning_rate": 0.0012272528945694815, "loss": 2.5854, "step": 3471 }, { "epoch": 0.4443875591962114, "grad_norm": 0.71875, "learning_rate": 0.0012268491543709194, "loss": 1.7278, "step": 3472 }, { "epoch": 0.4445155510047357, "grad_norm": 0.71875, "learning_rate": 0.0012264453751846503, "loss": 1.6548, "step": 3473 }, { "epoch": 0.44464354281325996, "grad_norm": 0.62890625, "learning_rate": 0.0012260415570800687, "loss": 1.6534, "step": 3474 }, { "epoch": 0.4447715346217842, "grad_norm": 0.640625, "learning_rate": 0.0012256377001265784, "loss": 1.5329, "step": 3475 }, { "epoch": 0.44489952643030845, "grad_norm": 0.609375, "learning_rate": 0.0012252338043935885, "loss": 1.2831, "step": 3476 }, { "epoch": 0.44502751823883274, "grad_norm": 0.640625, "learning_rate": 0.001224829869950515, "loss": 1.3505, "step": 3477 }, { "epoch": 0.445155510047357, "grad_norm": 0.78515625, "learning_rate": 0.0012244258968667802, "loss": 1.8848, "step": 3478 }, { "epoch": 0.4452835018558812, "grad_norm": 0.6953125, "learning_rate": 0.0012240218852118138, "loss": 1.4611, "step": 3479 }, { "epoch": 0.44541149366440547, "grad_norm": 0.61328125, "learning_rate": 0.0012236178350550518, "loss": 1.6044, "step": 3480 }, { "epoch": 0.4455394854729297, "grad_norm": 0.87109375, "learning_rate": 0.0012232137464659367, "loss": 1.357, "step": 3481 }, { "epoch": 0.445667477281454, "grad_norm": 0.61328125, "learning_rate": 0.001222809619513918, "loss": 1.166, "step": 3482 }, { "epoch": 0.44579546908997825, "grad_norm": 0.640625, "learning_rate": 0.0012224054542684505, "loss": 1.6669, "step": 3483 }, { "epoch": 0.4459234608985025, "grad_norm": 0.59765625, "learning_rate": 0.001222001250798998, "loss": 1.0813, "step": 3484 }, { "epoch": 0.44605145270702673, "grad_norm": 0.65625, "learning_rate": 0.001221597009175028, "loss": 1.5182, "step": 3485 }, { "epoch": 0.446179444515551, "grad_norm": 0.6796875, "learning_rate": 0.001221192729466017, "loss": 1.0802, "step": 3486 }, { "epoch": 0.44630743632407527, "grad_norm": 0.466796875, "learning_rate": 0.0012207884117414465, "loss": 1.0509, "step": 3487 }, { "epoch": 0.4464354281325995, "grad_norm": 0.6171875, "learning_rate": 0.0012203840560708053, "loss": 1.4823, "step": 3488 }, { "epoch": 0.44656341994112375, "grad_norm": 0.70703125, "learning_rate": 0.0012199796625235882, "loss": 1.7934, "step": 3489 }, { "epoch": 0.44669141174964805, "grad_norm": 0.66015625, "learning_rate": 0.001219575231169297, "loss": 1.4526, "step": 3490 }, { "epoch": 0.4468194035581723, "grad_norm": 0.63671875, "learning_rate": 0.0012191707620774397, "loss": 1.5027, "step": 3491 }, { "epoch": 0.44694739536669653, "grad_norm": 0.73828125, "learning_rate": 0.0012187662553175313, "loss": 1.7341, "step": 3492 }, { "epoch": 0.4470753871752208, "grad_norm": 0.80859375, "learning_rate": 0.0012183617109590922, "loss": 1.7456, "step": 3493 }, { "epoch": 0.447203378983745, "grad_norm": 0.66015625, "learning_rate": 0.0012179571290716503, "loss": 1.935, "step": 3494 }, { "epoch": 0.4473313707922693, "grad_norm": 0.66796875, "learning_rate": 0.0012175525097247394, "loss": 1.4964, "step": 3495 }, { "epoch": 0.44745936260079355, "grad_norm": 0.83984375, "learning_rate": 0.0012171478529879002, "loss": 1.9062, "step": 3496 }, { "epoch": 0.4475873544093178, "grad_norm": 0.77734375, "learning_rate": 0.001216743158930679, "loss": 1.205, "step": 3497 }, { "epoch": 0.44771534621784204, "grad_norm": 0.546875, "learning_rate": 0.0012163384276226293, "loss": 1.1237, "step": 3498 }, { "epoch": 0.44784333802636633, "grad_norm": 0.6875, "learning_rate": 0.001215933659133311, "loss": 1.6805, "step": 3499 }, { "epoch": 0.4479713298348906, "grad_norm": 0.76171875, "learning_rate": 0.0012155288535322898, "loss": 1.7234, "step": 3500 }, { "epoch": 0.4480993216434148, "grad_norm": 0.55859375, "learning_rate": 0.001215124010889138, "loss": 1.0521, "step": 3501 }, { "epoch": 0.44822731345193906, "grad_norm": 0.56640625, "learning_rate": 0.001214719131273435, "loss": 1.2816, "step": 3502 }, { "epoch": 0.44835530526046335, "grad_norm": 0.67578125, "learning_rate": 0.0012143142147547652, "loss": 1.6553, "step": 3503 }, { "epoch": 0.4484832970689876, "grad_norm": 0.8671875, "learning_rate": 0.0012139092614027206, "loss": 2.029, "step": 3504 }, { "epoch": 0.44861128887751184, "grad_norm": 0.72265625, "learning_rate": 0.001213504271286899, "loss": 1.294, "step": 3505 }, { "epoch": 0.4487392806860361, "grad_norm": 0.6015625, "learning_rate": 0.001213099244476904, "loss": 1.4037, "step": 3506 }, { "epoch": 0.4488672724945603, "grad_norm": 0.59765625, "learning_rate": 0.0012126941810423463, "loss": 1.3263, "step": 3507 }, { "epoch": 0.4489952643030846, "grad_norm": 0.703125, "learning_rate": 0.001212289081052843, "loss": 1.7489, "step": 3508 }, { "epoch": 0.44912325611160886, "grad_norm": 0.625, "learning_rate": 0.0012118839445780163, "loss": 1.7369, "step": 3509 }, { "epoch": 0.4492512479201331, "grad_norm": 0.6171875, "learning_rate": 0.0012114787716874962, "loss": 1.3274, "step": 3510 }, { "epoch": 0.44937923972865734, "grad_norm": 0.59375, "learning_rate": 0.0012110735624509183, "loss": 1.7466, "step": 3511 }, { "epoch": 0.44950723153718164, "grad_norm": 0.75, "learning_rate": 0.0012106683169379239, "loss": 1.9582, "step": 3512 }, { "epoch": 0.4496352233457059, "grad_norm": 0.8125, "learning_rate": 0.001210263035218161, "loss": 2.1898, "step": 3513 }, { "epoch": 0.4497632151542301, "grad_norm": 0.5546875, "learning_rate": 0.0012098577173612838, "loss": 1.1226, "step": 3514 }, { "epoch": 0.44989120696275436, "grad_norm": 0.77734375, "learning_rate": 0.0012094523634369537, "loss": 1.4075, "step": 3515 }, { "epoch": 0.45001919877127866, "grad_norm": 0.55078125, "learning_rate": 0.001209046973514836, "loss": 1.0474, "step": 3516 }, { "epoch": 0.4501471905798029, "grad_norm": 0.5625, "learning_rate": 0.0012086415476646047, "loss": 1.2965, "step": 3517 }, { "epoch": 0.45027518238832714, "grad_norm": 0.640625, "learning_rate": 0.0012082360859559378, "loss": 1.8216, "step": 3518 }, { "epoch": 0.4504031741968514, "grad_norm": 0.6484375, "learning_rate": 0.0012078305884585213, "loss": 1.8167, "step": 3519 }, { "epoch": 0.4505311660053757, "grad_norm": 0.55859375, "learning_rate": 0.0012074250552420458, "loss": 1.4403, "step": 3520 }, { "epoch": 0.4506591578138999, "grad_norm": 0.6796875, "learning_rate": 0.0012070194863762094, "loss": 1.5835, "step": 3521 }, { "epoch": 0.45078714962242417, "grad_norm": 0.90625, "learning_rate": 0.0012066138819307156, "loss": 1.6572, "step": 3522 }, { "epoch": 0.4509151414309484, "grad_norm": 0.703125, "learning_rate": 0.0012062082419752736, "loss": 2.1088, "step": 3523 }, { "epoch": 0.45104313323947265, "grad_norm": 0.6953125, "learning_rate": 0.0012058025665795996, "loss": 1.8605, "step": 3524 }, { "epoch": 0.45117112504799695, "grad_norm": 0.67578125, "learning_rate": 0.0012053968558134152, "loss": 1.293, "step": 3525 }, { "epoch": 0.4512991168565212, "grad_norm": 0.5703125, "learning_rate": 0.0012049911097464492, "loss": 1.1194, "step": 3526 }, { "epoch": 0.45142710866504543, "grad_norm": 0.63671875, "learning_rate": 0.0012045853284484345, "loss": 1.234, "step": 3527 }, { "epoch": 0.45155510047356967, "grad_norm": 0.5390625, "learning_rate": 0.0012041795119891124, "loss": 0.9583, "step": 3528 }, { "epoch": 0.45168309228209397, "grad_norm": 0.71875, "learning_rate": 0.001203773660438228, "loss": 1.2495, "step": 3529 }, { "epoch": 0.4518110840906182, "grad_norm": 0.73828125, "learning_rate": 0.0012033677738655336, "loss": 1.8333, "step": 3530 }, { "epoch": 0.45193907589914245, "grad_norm": 0.59765625, "learning_rate": 0.001202961852340788, "loss": 1.3328, "step": 3531 }, { "epoch": 0.4520670677076667, "grad_norm": 0.796875, "learning_rate": 0.0012025558959337548, "loss": 1.6723, "step": 3532 }, { "epoch": 0.452195059516191, "grad_norm": 0.59375, "learning_rate": 0.0012021499047142048, "loss": 1.6393, "step": 3533 }, { "epoch": 0.45232305132471523, "grad_norm": 0.703125, "learning_rate": 0.0012017438787519138, "loss": 1.009, "step": 3534 }, { "epoch": 0.45245104313323947, "grad_norm": 0.578125, "learning_rate": 0.0012013378181166638, "loss": 1.1971, "step": 3535 }, { "epoch": 0.4525790349417637, "grad_norm": 1.015625, "learning_rate": 0.0012009317228782433, "loss": 2.1447, "step": 3536 }, { "epoch": 0.45270702675028796, "grad_norm": 0.7109375, "learning_rate": 0.0012005255931064461, "loss": 1.7181, "step": 3537 }, { "epoch": 0.45283501855881225, "grad_norm": 0.65625, "learning_rate": 0.0012001194288710723, "loss": 1.555, "step": 3538 }, { "epoch": 0.4529630103673365, "grad_norm": 0.80078125, "learning_rate": 0.0011997132302419276, "loss": 1.7814, "step": 3539 }, { "epoch": 0.45309100217586074, "grad_norm": 0.76953125, "learning_rate": 0.001199306997288824, "loss": 1.8416, "step": 3540 }, { "epoch": 0.453218993984385, "grad_norm": 0.66796875, "learning_rate": 0.0011989007300815793, "loss": 1.5658, "step": 3541 }, { "epoch": 0.4533469857929093, "grad_norm": 0.5078125, "learning_rate": 0.001198494428690017, "loss": 1.267, "step": 3542 }, { "epoch": 0.4534749776014335, "grad_norm": 0.5390625, "learning_rate": 0.0011980880931839663, "loss": 1.3355, "step": 3543 }, { "epoch": 0.45360296940995776, "grad_norm": 0.67578125, "learning_rate": 0.0011976817236332633, "loss": 2.3463, "step": 3544 }, { "epoch": 0.453730961218482, "grad_norm": 0.546875, "learning_rate": 0.0011972753201077487, "loss": 1.0164, "step": 3545 }, { "epoch": 0.4538589530270063, "grad_norm": 0.7265625, "learning_rate": 0.0011968688826772694, "loss": 1.5796, "step": 3546 }, { "epoch": 0.45398694483553054, "grad_norm": 0.671875, "learning_rate": 0.0011964624114116785, "loss": 1.6686, "step": 3547 }, { "epoch": 0.4541149366440548, "grad_norm": 0.6640625, "learning_rate": 0.0011960559063808347, "loss": 1.5594, "step": 3548 }, { "epoch": 0.454242928452579, "grad_norm": 0.50390625, "learning_rate": 0.0011956493676546025, "loss": 1.0623, "step": 3549 }, { "epoch": 0.4543709202611033, "grad_norm": 0.7421875, "learning_rate": 0.0011952427953028522, "loss": 1.8125, "step": 3550 }, { "epoch": 0.45449891206962756, "grad_norm": 0.65625, "learning_rate": 0.0011948361893954595, "loss": 1.9313, "step": 3551 }, { "epoch": 0.4546269038781518, "grad_norm": 0.59765625, "learning_rate": 0.0011944295500023066, "loss": 1.2702, "step": 3552 }, { "epoch": 0.45475489568667604, "grad_norm": 0.5546875, "learning_rate": 0.0011940228771932808, "loss": 1.1651, "step": 3553 }, { "epoch": 0.4548828874952003, "grad_norm": 0.58203125, "learning_rate": 0.0011936161710382755, "loss": 1.4476, "step": 3554 }, { "epoch": 0.4550108793037246, "grad_norm": 0.70703125, "learning_rate": 0.0011932094316071905, "loss": 1.622, "step": 3555 }, { "epoch": 0.4551388711122488, "grad_norm": 1.203125, "learning_rate": 0.0011928026589699292, "loss": 2.0715, "step": 3556 }, { "epoch": 0.45526686292077306, "grad_norm": 0.69921875, "learning_rate": 0.0011923958531964032, "loss": 1.5558, "step": 3557 }, { "epoch": 0.4553948547292973, "grad_norm": 0.796875, "learning_rate": 0.0011919890143565278, "loss": 1.6278, "step": 3558 }, { "epoch": 0.4555228465378216, "grad_norm": 0.68359375, "learning_rate": 0.0011915821425202257, "loss": 1.6173, "step": 3559 }, { "epoch": 0.45565083834634584, "grad_norm": 0.64453125, "learning_rate": 0.0011911752377574238, "loss": 1.3943, "step": 3560 }, { "epoch": 0.4557788301548701, "grad_norm": 0.6328125, "learning_rate": 0.001190768300138056, "loss": 1.3418, "step": 3561 }, { "epoch": 0.4559068219633943, "grad_norm": 0.703125, "learning_rate": 0.00119036132973206, "loss": 1.6023, "step": 3562 }, { "epoch": 0.4560348137719186, "grad_norm": 0.57421875, "learning_rate": 0.0011899543266093813, "loss": 0.8315, "step": 3563 }, { "epoch": 0.45616280558044286, "grad_norm": 0.671875, "learning_rate": 0.0011895472908399693, "loss": 1.3758, "step": 3564 }, { "epoch": 0.4562907973889671, "grad_norm": 0.8125, "learning_rate": 0.0011891402224937803, "loss": 1.831, "step": 3565 }, { "epoch": 0.45641878919749135, "grad_norm": 0.51953125, "learning_rate": 0.0011887331216407756, "loss": 1.137, "step": 3566 }, { "epoch": 0.4565467810060156, "grad_norm": 0.578125, "learning_rate": 0.0011883259883509211, "loss": 1.1637, "step": 3567 }, { "epoch": 0.4566747728145399, "grad_norm": 0.671875, "learning_rate": 0.0011879188226941906, "loss": 1.4455, "step": 3568 }, { "epoch": 0.45680276462306413, "grad_norm": 0.6875, "learning_rate": 0.0011875116247405608, "loss": 1.8221, "step": 3569 }, { "epoch": 0.45693075643158837, "grad_norm": 0.640625, "learning_rate": 0.0011871043945600166, "loss": 1.6705, "step": 3570 }, { "epoch": 0.4570587482401126, "grad_norm": 0.66015625, "learning_rate": 0.001186697132222546, "loss": 1.1495, "step": 3571 }, { "epoch": 0.4571867400486369, "grad_norm": 0.73046875, "learning_rate": 0.0011862898377981445, "loss": 1.5596, "step": 3572 }, { "epoch": 0.45731473185716115, "grad_norm": 0.671875, "learning_rate": 0.0011858825113568118, "loss": 1.5285, "step": 3573 }, { "epoch": 0.4574427236656854, "grad_norm": 0.66015625, "learning_rate": 0.0011854751529685533, "loss": 1.53, "step": 3574 }, { "epoch": 0.45757071547420963, "grad_norm": 0.59765625, "learning_rate": 0.0011850677627033806, "loss": 1.114, "step": 3575 }, { "epoch": 0.45769870728273393, "grad_norm": 0.66796875, "learning_rate": 0.0011846603406313104, "loss": 1.3046, "step": 3576 }, { "epoch": 0.45782669909125817, "grad_norm": 0.58203125, "learning_rate": 0.0011842528868223645, "loss": 1.5211, "step": 3577 }, { "epoch": 0.4579546908997824, "grad_norm": 0.859375, "learning_rate": 0.0011838454013465706, "loss": 2.0366, "step": 3578 }, { "epoch": 0.45808268270830665, "grad_norm": 0.6015625, "learning_rate": 0.0011834378842739614, "loss": 0.8788, "step": 3579 }, { "epoch": 0.4582106745168309, "grad_norm": 0.72265625, "learning_rate": 0.0011830303356745754, "loss": 1.7071, "step": 3580 }, { "epoch": 0.4583386663253552, "grad_norm": 0.71875, "learning_rate": 0.0011826227556184573, "loss": 2.0921, "step": 3581 }, { "epoch": 0.45846665813387943, "grad_norm": 0.703125, "learning_rate": 0.0011822151441756553, "loss": 1.5676, "step": 3582 }, { "epoch": 0.4585946499424037, "grad_norm": 0.67578125, "learning_rate": 0.001181807501416224, "loss": 2.1792, "step": 3583 }, { "epoch": 0.4587226417509279, "grad_norm": 0.6328125, "learning_rate": 0.0011813998274102242, "loss": 1.1661, "step": 3584 }, { "epoch": 0.4588506335594522, "grad_norm": 0.92578125, "learning_rate": 0.0011809921222277208, "loss": 2.1099, "step": 3585 }, { "epoch": 0.45897862536797646, "grad_norm": 0.58984375, "learning_rate": 0.0011805843859387846, "loss": 1.3516, "step": 3586 }, { "epoch": 0.4591066171765007, "grad_norm": 0.7109375, "learning_rate": 0.0011801766186134916, "loss": 2.0213, "step": 3587 }, { "epoch": 0.45923460898502494, "grad_norm": 0.5546875, "learning_rate": 0.001179768820321924, "loss": 1.3172, "step": 3588 }, { "epoch": 0.45936260079354924, "grad_norm": 0.65625, "learning_rate": 0.001179360991134167, "loss": 1.6249, "step": 3589 }, { "epoch": 0.4594905926020735, "grad_norm": 0.66796875, "learning_rate": 0.0011789531311203146, "loss": 1.9561, "step": 3590 }, { "epoch": 0.4596185844105977, "grad_norm": 0.65625, "learning_rate": 0.0011785452403504624, "loss": 1.4122, "step": 3591 }, { "epoch": 0.45974657621912196, "grad_norm": 0.64453125, "learning_rate": 0.001178137318894714, "loss": 1.6869, "step": 3592 }, { "epoch": 0.45987456802764626, "grad_norm": 0.76171875, "learning_rate": 0.001177729366823177, "loss": 1.6937, "step": 3593 }, { "epoch": 0.4600025598361705, "grad_norm": 0.609375, "learning_rate": 0.0011773213842059646, "loss": 1.5811, "step": 3594 }, { "epoch": 0.46013055164469474, "grad_norm": 0.6796875, "learning_rate": 0.0011769133711131959, "loss": 1.527, "step": 3595 }, { "epoch": 0.460258543453219, "grad_norm": 0.71484375, "learning_rate": 0.0011765053276149935, "loss": 1.4385, "step": 3596 }, { "epoch": 0.4603865352617432, "grad_norm": 0.6328125, "learning_rate": 0.001176097253781487, "loss": 1.2286, "step": 3597 }, { "epoch": 0.4605145270702675, "grad_norm": 0.61328125, "learning_rate": 0.00117568914968281, "loss": 1.2018, "step": 3598 }, { "epoch": 0.46064251887879176, "grad_norm": 0.66015625, "learning_rate": 0.0011752810153891026, "loss": 1.8283, "step": 3599 }, { "epoch": 0.460770510687316, "grad_norm": 0.58203125, "learning_rate": 0.0011748728509705082, "loss": 1.2967, "step": 3600 }, { "epoch": 0.46089850249584025, "grad_norm": 1.078125, "learning_rate": 0.0011744646564971776, "loss": 2.1083, "step": 3601 }, { "epoch": 0.46102649430436454, "grad_norm": 0.75390625, "learning_rate": 0.001174056432039265, "loss": 1.7734, "step": 3602 }, { "epoch": 0.4611544861128888, "grad_norm": 0.69140625, "learning_rate": 0.0011736481776669307, "loss": 1.3838, "step": 3603 }, { "epoch": 0.461282477921413, "grad_norm": 0.7265625, "learning_rate": 0.0011732398934503393, "loss": 1.6607, "step": 3604 }, { "epoch": 0.46141046972993727, "grad_norm": 0.5703125, "learning_rate": 0.0011728315794596613, "loss": 1.2788, "step": 3605 }, { "epoch": 0.46153846153846156, "grad_norm": 0.6640625, "learning_rate": 0.0011724232357650727, "loss": 1.7325, "step": 3606 }, { "epoch": 0.4616664533469858, "grad_norm": 0.81640625, "learning_rate": 0.0011720148624367532, "loss": 1.4086, "step": 3607 }, { "epoch": 0.46179444515551005, "grad_norm": 0.6796875, "learning_rate": 0.001171606459544889, "loss": 1.1551, "step": 3608 }, { "epoch": 0.4619224369640343, "grad_norm": 0.578125, "learning_rate": 0.0011711980271596702, "loss": 1.4289, "step": 3609 }, { "epoch": 0.46205042877255853, "grad_norm": 1.0859375, "learning_rate": 0.001170789565351293, "loss": 1.2418, "step": 3610 }, { "epoch": 0.4621784205810828, "grad_norm": 0.63671875, "learning_rate": 0.0011703810741899576, "loss": 1.426, "step": 3611 }, { "epoch": 0.46230641238960707, "grad_norm": 0.64453125, "learning_rate": 0.0011699725537458708, "loss": 1.5447, "step": 3612 }, { "epoch": 0.4624344041981313, "grad_norm": 0.69921875, "learning_rate": 0.0011695640040892423, "loss": 2.0317, "step": 3613 }, { "epoch": 0.46256239600665555, "grad_norm": 0.61328125, "learning_rate": 0.0011691554252902887, "loss": 1.3231, "step": 3614 }, { "epoch": 0.46269038781517985, "grad_norm": 0.64453125, "learning_rate": 0.0011687468174192308, "loss": 1.7783, "step": 3615 }, { "epoch": 0.4628183796237041, "grad_norm": 0.640625, "learning_rate": 0.0011683381805462944, "loss": 1.8521, "step": 3616 }, { "epoch": 0.46294637143222833, "grad_norm": 0.72265625, "learning_rate": 0.001167929514741711, "loss": 1.8957, "step": 3617 }, { "epoch": 0.4630743632407526, "grad_norm": 0.9296875, "learning_rate": 0.001167520820075715, "loss": 2.1175, "step": 3618 }, { "epoch": 0.46320235504927687, "grad_norm": 0.71875, "learning_rate": 0.0011671120966185485, "loss": 1.5379, "step": 3619 }, { "epoch": 0.4633303468578011, "grad_norm": 0.546875, "learning_rate": 0.0011667033444404567, "loss": 1.349, "step": 3620 }, { "epoch": 0.46345833866632535, "grad_norm": 0.6484375, "learning_rate": 0.0011662945636116907, "loss": 1.2259, "step": 3621 }, { "epoch": 0.4635863304748496, "grad_norm": 0.55078125, "learning_rate": 0.0011658857542025055, "loss": 1.4475, "step": 3622 }, { "epoch": 0.46371432228337384, "grad_norm": 0.68359375, "learning_rate": 0.0011654769162831626, "loss": 1.4793, "step": 3623 }, { "epoch": 0.46384231409189813, "grad_norm": 0.69921875, "learning_rate": 0.0011650680499239263, "loss": 1.5742, "step": 3624 }, { "epoch": 0.4639703059004224, "grad_norm": 0.70703125, "learning_rate": 0.0011646591551950677, "loss": 1.482, "step": 3625 }, { "epoch": 0.4640982977089466, "grad_norm": 0.75, "learning_rate": 0.0011642502321668614, "loss": 1.889, "step": 3626 }, { "epoch": 0.46422628951747086, "grad_norm": 0.6015625, "learning_rate": 0.0011638412809095878, "loss": 0.8742, "step": 3627 }, { "epoch": 0.46435428132599516, "grad_norm": 0.6953125, "learning_rate": 0.0011634323014935322, "loss": 2.0855, "step": 3628 }, { "epoch": 0.4644822731345194, "grad_norm": 0.62890625, "learning_rate": 0.0011630232939889836, "loss": 1.8454, "step": 3629 }, { "epoch": 0.46461026494304364, "grad_norm": 0.88671875, "learning_rate": 0.0011626142584662369, "loss": 2.4242, "step": 3630 }, { "epoch": 0.4647382567515679, "grad_norm": 0.8046875, "learning_rate": 0.0011622051949955914, "loss": 1.8019, "step": 3631 }, { "epoch": 0.4648662485600922, "grad_norm": 0.56640625, "learning_rate": 0.0011617961036473519, "loss": 0.9078, "step": 3632 }, { "epoch": 0.4649942403686164, "grad_norm": 0.796875, "learning_rate": 0.0011613869844918265, "loss": 1.0928, "step": 3633 }, { "epoch": 0.46512223217714066, "grad_norm": 0.6875, "learning_rate": 0.0011609778375993296, "loss": 1.7201, "step": 3634 }, { "epoch": 0.4652502239856649, "grad_norm": 0.7109375, "learning_rate": 0.0011605686630401792, "loss": 1.729, "step": 3635 }, { "epoch": 0.4653782157941892, "grad_norm": 0.73046875, "learning_rate": 0.0011601594608846991, "loss": 1.2742, "step": 3636 }, { "epoch": 0.46550620760271344, "grad_norm": 0.87109375, "learning_rate": 0.001159750231203217, "loss": 2.0697, "step": 3637 }, { "epoch": 0.4656341994112377, "grad_norm": 0.62890625, "learning_rate": 0.0011593409740660655, "loss": 1.6996, "step": 3638 }, { "epoch": 0.4657621912197619, "grad_norm": 0.58984375, "learning_rate": 0.0011589316895435826, "loss": 1.1124, "step": 3639 }, { "epoch": 0.46589018302828616, "grad_norm": 0.640625, "learning_rate": 0.0011585223777061102, "loss": 1.3336, "step": 3640 }, { "epoch": 0.46601817483681046, "grad_norm": 0.7578125, "learning_rate": 0.0011581130386239952, "loss": 1.9892, "step": 3641 }, { "epoch": 0.4661461666453347, "grad_norm": 0.765625, "learning_rate": 0.001157703672367589, "loss": 1.4224, "step": 3642 }, { "epoch": 0.46627415845385894, "grad_norm": 0.640625, "learning_rate": 0.0011572942790072484, "loss": 1.6858, "step": 3643 }, { "epoch": 0.4664021502623832, "grad_norm": 0.6171875, "learning_rate": 0.0011568848586133336, "loss": 1.6775, "step": 3644 }, { "epoch": 0.4665301420709075, "grad_norm": 0.70703125, "learning_rate": 0.0011564754112562106, "loss": 1.1986, "step": 3645 }, { "epoch": 0.4666581338794317, "grad_norm": 0.7265625, "learning_rate": 0.0011560659370062492, "loss": 1.5059, "step": 3646 }, { "epoch": 0.46678612568795597, "grad_norm": 0.64453125, "learning_rate": 0.0011556564359338243, "loss": 1.4367, "step": 3647 }, { "epoch": 0.4669141174964802, "grad_norm": 0.5703125, "learning_rate": 0.0011552469081093158, "loss": 1.1936, "step": 3648 }, { "epoch": 0.4670421093050045, "grad_norm": 0.78515625, "learning_rate": 0.001154837353603107, "loss": 2.6349, "step": 3649 }, { "epoch": 0.46717010111352875, "grad_norm": 0.7265625, "learning_rate": 0.001154427772485587, "loss": 1.6054, "step": 3650 }, { "epoch": 0.467298092922053, "grad_norm": 0.6875, "learning_rate": 0.0011540181648271486, "loss": 1.7125, "step": 3651 }, { "epoch": 0.46742608473057723, "grad_norm": 0.6796875, "learning_rate": 0.0011536085306981898, "loss": 1.881, "step": 3652 }, { "epoch": 0.46755407653910147, "grad_norm": 0.64453125, "learning_rate": 0.0011531988701691124, "loss": 1.3769, "step": 3653 }, { "epoch": 0.46768206834762577, "grad_norm": 1.1484375, "learning_rate": 0.0011527891833103237, "loss": 1.8845, "step": 3654 }, { "epoch": 0.46781006015615, "grad_norm": 0.65625, "learning_rate": 0.001152379470192235, "loss": 1.2916, "step": 3655 }, { "epoch": 0.46793805196467425, "grad_norm": 0.65234375, "learning_rate": 0.0011519697308852621, "loss": 1.136, "step": 3656 }, { "epoch": 0.4680660437731985, "grad_norm": 0.6640625, "learning_rate": 0.001151559965459825, "loss": 1.356, "step": 3657 }, { "epoch": 0.4681940355817228, "grad_norm": 0.61328125, "learning_rate": 0.001151150173986349, "loss": 1.5294, "step": 3658 }, { "epoch": 0.46832202739024703, "grad_norm": 0.6640625, "learning_rate": 0.001150740356535263, "loss": 1.6881, "step": 3659 }, { "epoch": 0.4684500191987713, "grad_norm": 0.609375, "learning_rate": 0.0011503305131770013, "loss": 1.1755, "step": 3660 }, { "epoch": 0.4685780110072955, "grad_norm": 0.578125, "learning_rate": 0.0011499206439820019, "loss": 1.4778, "step": 3661 }, { "epoch": 0.4687060028158198, "grad_norm": 0.7890625, "learning_rate": 0.001149510749020707, "loss": 2.0204, "step": 3662 }, { "epoch": 0.46883399462434405, "grad_norm": 0.6015625, "learning_rate": 0.0011491008283635647, "loss": 1.7821, "step": 3663 }, { "epoch": 0.4689619864328683, "grad_norm": 0.7109375, "learning_rate": 0.0011486908820810251, "loss": 1.3973, "step": 3664 }, { "epoch": 0.46908997824139254, "grad_norm": 0.71484375, "learning_rate": 0.0011482809102435457, "loss": 1.6127, "step": 3665 }, { "epoch": 0.46921797004991683, "grad_norm": 1.4609375, "learning_rate": 0.0011478709129215856, "loss": 1.2009, "step": 3666 }, { "epoch": 0.4693459618584411, "grad_norm": 0.609375, "learning_rate": 0.0011474608901856102, "loss": 1.3941, "step": 3667 }, { "epoch": 0.4694739536669653, "grad_norm": 0.73828125, "learning_rate": 0.0011470508421060881, "loss": 1.9559, "step": 3668 }, { "epoch": 0.46960194547548956, "grad_norm": 0.72265625, "learning_rate": 0.001146640768753493, "loss": 1.7697, "step": 3669 }, { "epoch": 0.4697299372840138, "grad_norm": 0.5859375, "learning_rate": 0.0011462306701983025, "loss": 1.5683, "step": 3670 }, { "epoch": 0.4698579290925381, "grad_norm": 0.703125, "learning_rate": 0.0011458205465109989, "loss": 1.279, "step": 3671 }, { "epoch": 0.46998592090106234, "grad_norm": 0.66796875, "learning_rate": 0.0011454103977620684, "loss": 1.5225, "step": 3672 }, { "epoch": 0.4701139127095866, "grad_norm": 0.7578125, "learning_rate": 0.0011450002240220019, "loss": 1.936, "step": 3673 }, { "epoch": 0.4702419045181108, "grad_norm": 0.55078125, "learning_rate": 0.0011445900253612945, "loss": 1.0406, "step": 3674 }, { "epoch": 0.4703698963266351, "grad_norm": 0.66015625, "learning_rate": 0.0011441798018504445, "loss": 1.479, "step": 3675 }, { "epoch": 0.47049788813515936, "grad_norm": 0.6015625, "learning_rate": 0.001143769553559957, "loss": 1.1851, "step": 3676 }, { "epoch": 0.4706258799436836, "grad_norm": 0.75, "learning_rate": 0.001143359280560339, "loss": 1.4614, "step": 3677 }, { "epoch": 0.47075387175220784, "grad_norm": 0.80078125, "learning_rate": 0.0011429489829221024, "loss": 1.4948, "step": 3678 }, { "epoch": 0.47088186356073214, "grad_norm": 0.671875, "learning_rate": 0.0011425386607157642, "loss": 1.5806, "step": 3679 }, { "epoch": 0.4710098553692564, "grad_norm": 0.58203125, "learning_rate": 0.0011421283140118442, "loss": 1.2554, "step": 3680 }, { "epoch": 0.4711378471777806, "grad_norm": 0.72265625, "learning_rate": 0.0011417179428808676, "loss": 1.5608, "step": 3681 }, { "epoch": 0.47126583898630486, "grad_norm": 0.65625, "learning_rate": 0.0011413075473933627, "loss": 1.6617, "step": 3682 }, { "epoch": 0.4713938307948291, "grad_norm": 0.71484375, "learning_rate": 0.0011408971276198636, "loss": 1.4873, "step": 3683 }, { "epoch": 0.4715218226033534, "grad_norm": 0.609375, "learning_rate": 0.0011404866836309071, "loss": 1.064, "step": 3684 }, { "epoch": 0.47164981441187764, "grad_norm": 0.671875, "learning_rate": 0.0011400762154970346, "loss": 1.3172, "step": 3685 }, { "epoch": 0.4717778062204019, "grad_norm": 0.6640625, "learning_rate": 0.0011396657232887917, "loss": 1.3606, "step": 3686 }, { "epoch": 0.4719057980289261, "grad_norm": 0.55078125, "learning_rate": 0.0011392552070767282, "loss": 1.1649, "step": 3687 }, { "epoch": 0.4720337898374504, "grad_norm": 0.89453125, "learning_rate": 0.0011388446669313979, "loss": 1.631, "step": 3688 }, { "epoch": 0.47216178164597467, "grad_norm": 0.94921875, "learning_rate": 0.0011384341029233587, "loss": 3.2786, "step": 3689 }, { "epoch": 0.4722897734544989, "grad_norm": 0.68359375, "learning_rate": 0.0011380235151231732, "loss": 1.8516, "step": 3690 }, { "epoch": 0.47241776526302315, "grad_norm": 0.75, "learning_rate": 0.001137612903601407, "loss": 2.0856, "step": 3691 }, { "epoch": 0.47254575707154745, "grad_norm": 0.703125, "learning_rate": 0.0011372022684286308, "loss": 1.8496, "step": 3692 }, { "epoch": 0.4726737488800717, "grad_norm": 0.65234375, "learning_rate": 0.0011367916096754185, "loss": 1.7301, "step": 3693 }, { "epoch": 0.47280174068859593, "grad_norm": 0.71875, "learning_rate": 0.001136380927412349, "loss": 1.4864, "step": 3694 }, { "epoch": 0.47292973249712017, "grad_norm": 0.640625, "learning_rate": 0.0011359702217100041, "loss": 1.5395, "step": 3695 }, { "epoch": 0.4730577243056444, "grad_norm": 0.61328125, "learning_rate": 0.0011355594926389708, "loss": 1.2392, "step": 3696 }, { "epoch": 0.4731857161141687, "grad_norm": 0.81640625, "learning_rate": 0.0011351487402698388, "loss": 1.7836, "step": 3697 }, { "epoch": 0.47331370792269295, "grad_norm": 0.64453125, "learning_rate": 0.0011347379646732033, "loss": 1.3885, "step": 3698 }, { "epoch": 0.4734416997312172, "grad_norm": 0.734375, "learning_rate": 0.0011343271659196624, "loss": 2.0586, "step": 3699 }, { "epoch": 0.47356969153974143, "grad_norm": 0.62890625, "learning_rate": 0.0011339163440798187, "loss": 1.3707, "step": 3700 }, { "epoch": 0.47369768334826573, "grad_norm": 0.55859375, "learning_rate": 0.0011335054992242784, "loss": 1.274, "step": 3701 }, { "epoch": 0.47382567515678997, "grad_norm": 0.70703125, "learning_rate": 0.001133094631423652, "loss": 1.6793, "step": 3702 }, { "epoch": 0.4739536669653142, "grad_norm": 0.7265625, "learning_rate": 0.0011326837407485536, "loss": 2.303, "step": 3703 }, { "epoch": 0.47408165877383845, "grad_norm": 0.79296875, "learning_rate": 0.0011322728272696017, "loss": 2.1251, "step": 3704 }, { "epoch": 0.47420965058236275, "grad_norm": 0.609375, "learning_rate": 0.0011318618910574185, "loss": 1.1609, "step": 3705 }, { "epoch": 0.474337642390887, "grad_norm": 0.65625, "learning_rate": 0.00113145093218263, "loss": 1.651, "step": 3706 }, { "epoch": 0.47446563419941123, "grad_norm": 0.6484375, "learning_rate": 0.001131039950715866, "loss": 1.5094, "step": 3707 }, { "epoch": 0.4745936260079355, "grad_norm": 0.60546875, "learning_rate": 0.00113062894672776, "loss": 1.2045, "step": 3708 }, { "epoch": 0.4747216178164598, "grad_norm": 0.73828125, "learning_rate": 0.0011302179202889505, "loss": 1.9101, "step": 3709 }, { "epoch": 0.474849609624984, "grad_norm": 0.7109375, "learning_rate": 0.0011298068714700783, "loss": 1.7288, "step": 3710 }, { "epoch": 0.47497760143350826, "grad_norm": 1.1796875, "learning_rate": 0.0011293958003417892, "loss": 1.7245, "step": 3711 }, { "epoch": 0.4751055932420325, "grad_norm": 0.7421875, "learning_rate": 0.0011289847069747326, "loss": 1.8536, "step": 3712 }, { "epoch": 0.47523358505055674, "grad_norm": 0.71484375, "learning_rate": 0.001128573591439561, "loss": 1.5539, "step": 3713 }, { "epoch": 0.47536157685908104, "grad_norm": 0.625, "learning_rate": 0.0011281624538069322, "loss": 1.8019, "step": 3714 }, { "epoch": 0.4754895686676053, "grad_norm": 0.66796875, "learning_rate": 0.0011277512941475058, "loss": 1.6224, "step": 3715 }, { "epoch": 0.4756175604761295, "grad_norm": 1.3046875, "learning_rate": 0.0011273401125319472, "loss": 1.8874, "step": 3716 }, { "epoch": 0.47574555228465376, "grad_norm": 1.078125, "learning_rate": 0.0011269289090309238, "loss": 2.1388, "step": 3717 }, { "epoch": 0.47587354409317806, "grad_norm": 1.046875, "learning_rate": 0.0011265176837151082, "loss": 1.3203, "step": 3718 }, { "epoch": 0.4760015359017023, "grad_norm": 0.6484375, "learning_rate": 0.0011261064366551762, "loss": 1.6939, "step": 3719 }, { "epoch": 0.47612952771022654, "grad_norm": 0.61328125, "learning_rate": 0.0011256951679218064, "loss": 1.4087, "step": 3720 }, { "epoch": 0.4762575195187508, "grad_norm": 0.64453125, "learning_rate": 0.0011252838775856829, "loss": 1.8989, "step": 3721 }, { "epoch": 0.4763855113272751, "grad_norm": 0.60546875, "learning_rate": 0.0011248725657174922, "loss": 1.1949, "step": 3722 }, { "epoch": 0.4765135031357993, "grad_norm": 0.625, "learning_rate": 0.0011244612323879255, "loss": 1.3013, "step": 3723 }, { "epoch": 0.47664149494432356, "grad_norm": 0.609375, "learning_rate": 0.0011240498776676764, "loss": 1.7242, "step": 3724 }, { "epoch": 0.4767694867528478, "grad_norm": 0.56640625, "learning_rate": 0.001123638501627443, "loss": 1.3511, "step": 3725 }, { "epoch": 0.47689747856137205, "grad_norm": 1.046875, "learning_rate": 0.0011232271043379273, "loss": 1.1918, "step": 3726 }, { "epoch": 0.47702547036989634, "grad_norm": 0.66015625, "learning_rate": 0.0011228156858698343, "loss": 2.1072, "step": 3727 }, { "epoch": 0.4771534621784206, "grad_norm": 0.703125, "learning_rate": 0.0011224042462938731, "loss": 1.4676, "step": 3728 }, { "epoch": 0.4772814539869448, "grad_norm": 0.96484375, "learning_rate": 0.0011219927856807562, "loss": 2.0668, "step": 3729 }, { "epoch": 0.47740944579546907, "grad_norm": 0.55859375, "learning_rate": 0.0011215813041011997, "loss": 1.2234, "step": 3730 }, { "epoch": 0.47753743760399336, "grad_norm": 0.58203125, "learning_rate": 0.0011211698016259234, "loss": 1.5836, "step": 3731 }, { "epoch": 0.4776654294125176, "grad_norm": 0.7421875, "learning_rate": 0.0011207582783256506, "loss": 1.3937, "step": 3732 }, { "epoch": 0.47779342122104185, "grad_norm": 0.8046875, "learning_rate": 0.0011203467342711087, "loss": 1.961, "step": 3733 }, { "epoch": 0.4779214130295661, "grad_norm": 0.66015625, "learning_rate": 0.0011199351695330279, "loss": 1.5636, "step": 3734 }, { "epoch": 0.4780494048380904, "grad_norm": 0.73828125, "learning_rate": 0.0011195235841821422, "loss": 1.6219, "step": 3735 }, { "epoch": 0.4781773966466146, "grad_norm": 0.53515625, "learning_rate": 0.0011191119782891891, "loss": 1.0168, "step": 3736 }, { "epoch": 0.47830538845513887, "grad_norm": 0.6796875, "learning_rate": 0.0011187003519249105, "loss": 1.4966, "step": 3737 }, { "epoch": 0.4784333802636631, "grad_norm": 0.70703125, "learning_rate": 0.0011182887051600502, "loss": 1.5385, "step": 3738 }, { "epoch": 0.4785613720721874, "grad_norm": 0.8359375, "learning_rate": 0.0011178770380653566, "loss": 1.4995, "step": 3739 }, { "epoch": 0.47868936388071165, "grad_norm": 0.671875, "learning_rate": 0.001117465350711582, "loss": 1.6609, "step": 3740 }, { "epoch": 0.4788173556892359, "grad_norm": 0.63671875, "learning_rate": 0.0011170536431694804, "loss": 1.4636, "step": 3741 }, { "epoch": 0.47894534749776013, "grad_norm": 0.5859375, "learning_rate": 0.0011166419155098112, "loss": 1.2279, "step": 3742 }, { "epoch": 0.4790733393062844, "grad_norm": 0.69921875, "learning_rate": 0.0011162301678033367, "loss": 1.4081, "step": 3743 }, { "epoch": 0.47920133111480867, "grad_norm": 0.62109375, "learning_rate": 0.0011158184001208216, "loss": 2.1279, "step": 3744 }, { "epoch": 0.4793293229233329, "grad_norm": 0.6640625, "learning_rate": 0.0011154066125330358, "loss": 1.5094, "step": 3745 }, { "epoch": 0.47945731473185715, "grad_norm": 0.73828125, "learning_rate": 0.0011149948051107509, "loss": 1.4678, "step": 3746 }, { "epoch": 0.4795853065403814, "grad_norm": 0.9296875, "learning_rate": 0.001114582977924743, "loss": 2.1588, "step": 3747 }, { "epoch": 0.4797132983489057, "grad_norm": 0.84765625, "learning_rate": 0.001114171131045791, "loss": 1.4215, "step": 3748 }, { "epoch": 0.47984129015742993, "grad_norm": 0.91015625, "learning_rate": 0.0011137592645446782, "loss": 2.2296, "step": 3749 }, { "epoch": 0.4799692819659542, "grad_norm": 0.72265625, "learning_rate": 0.0011133473784921896, "loss": 1.5528, "step": 3750 }, { "epoch": 0.4800972737744784, "grad_norm": 0.7421875, "learning_rate": 0.0011129354729591157, "loss": 1.7146, "step": 3751 }, { "epoch": 0.4802252655830027, "grad_norm": 0.60546875, "learning_rate": 0.0011125235480162479, "loss": 1.2147, "step": 3752 }, { "epoch": 0.48035325739152696, "grad_norm": 0.67578125, "learning_rate": 0.0011121116037343826, "loss": 1.4508, "step": 3753 }, { "epoch": 0.4804812492000512, "grad_norm": 0.6796875, "learning_rate": 0.0011116996401843192, "loss": 1.6803, "step": 3754 }, { "epoch": 0.48060924100857544, "grad_norm": 0.578125, "learning_rate": 0.0011112876574368605, "loss": 1.3909, "step": 3755 }, { "epoch": 0.4807372328170997, "grad_norm": 0.66015625, "learning_rate": 0.0011108756555628125, "loss": 1.5239, "step": 3756 }, { "epoch": 0.480865224625624, "grad_norm": 0.57421875, "learning_rate": 0.0011104636346329837, "loss": 1.091, "step": 3757 }, { "epoch": 0.4809932164341482, "grad_norm": 0.86328125, "learning_rate": 0.0011100515947181876, "loss": 2.1914, "step": 3758 }, { "epoch": 0.48112120824267246, "grad_norm": 0.66015625, "learning_rate": 0.0011096395358892387, "loss": 1.6549, "step": 3759 }, { "epoch": 0.4812492000511967, "grad_norm": 0.796875, "learning_rate": 0.0011092274582169572, "loss": 2.5331, "step": 3760 }, { "epoch": 0.481377191859721, "grad_norm": 0.6171875, "learning_rate": 0.0011088153617721645, "loss": 1.6781, "step": 3761 }, { "epoch": 0.48150518366824524, "grad_norm": 0.72265625, "learning_rate": 0.0011084032466256867, "loss": 2.3348, "step": 3762 }, { "epoch": 0.4816331754767695, "grad_norm": 0.6640625, "learning_rate": 0.001107991112848352, "loss": 1.4134, "step": 3763 }, { "epoch": 0.4817611672852937, "grad_norm": 0.640625, "learning_rate": 0.0011075789605109925, "loss": 1.6416, "step": 3764 }, { "epoch": 0.481889159093818, "grad_norm": 0.69140625, "learning_rate": 0.001107166789684443, "loss": 1.6668, "step": 3765 }, { "epoch": 0.48201715090234226, "grad_norm": 0.55859375, "learning_rate": 0.001106754600439542, "loss": 1.4643, "step": 3766 }, { "epoch": 0.4821451427108665, "grad_norm": 0.85546875, "learning_rate": 0.001106342392847131, "loss": 1.3778, "step": 3767 }, { "epoch": 0.48227313451939074, "grad_norm": 0.50390625, "learning_rate": 0.0011059301669780542, "loss": 0.9742, "step": 3768 }, { "epoch": 0.482401126327915, "grad_norm": 0.70703125, "learning_rate": 0.0011055179229031598, "loss": 1.5225, "step": 3769 }, { "epoch": 0.4825291181364393, "grad_norm": 0.66015625, "learning_rate": 0.001105105660693298, "loss": 2.0289, "step": 3770 }, { "epoch": 0.4826571099449635, "grad_norm": 0.625, "learning_rate": 0.0011046933804193233, "loss": 2.0587, "step": 3771 }, { "epoch": 0.48278510175348777, "grad_norm": 0.70703125, "learning_rate": 0.0011042810821520923, "loss": 1.5032, "step": 3772 }, { "epoch": 0.482913093562012, "grad_norm": 0.671875, "learning_rate": 0.0011038687659624658, "loss": 1.4424, "step": 3773 }, { "epoch": 0.4830410853705363, "grad_norm": 0.671875, "learning_rate": 0.0011034564319213065, "loss": 1.6551, "step": 3774 }, { "epoch": 0.48316907717906055, "grad_norm": 0.59375, "learning_rate": 0.0011030440800994805, "loss": 1.1529, "step": 3775 }, { "epoch": 0.4832970689875848, "grad_norm": 0.63671875, "learning_rate": 0.0011026317105678578, "loss": 1.2561, "step": 3776 }, { "epoch": 0.48342506079610903, "grad_norm": 0.69140625, "learning_rate": 0.0011022193233973104, "loss": 1.5169, "step": 3777 }, { "epoch": 0.4835530526046333, "grad_norm": 0.65625, "learning_rate": 0.0011018069186587139, "loss": 1.5597, "step": 3778 }, { "epoch": 0.48368104441315757, "grad_norm": 0.5625, "learning_rate": 0.0011013944964229463, "loss": 0.9717, "step": 3779 }, { "epoch": 0.4838090362216818, "grad_norm": 0.70703125, "learning_rate": 0.00110098205676089, "loss": 1.6698, "step": 3780 }, { "epoch": 0.48393702803020605, "grad_norm": 0.77734375, "learning_rate": 0.001100569599743428, "loss": 1.61, "step": 3781 }, { "epoch": 0.48406501983873035, "grad_norm": 0.640625, "learning_rate": 0.0011001571254414492, "loss": 1.2466, "step": 3782 }, { "epoch": 0.4841930116472546, "grad_norm": 0.59375, "learning_rate": 0.001099744633925843, "loss": 1.3503, "step": 3783 }, { "epoch": 0.48432100345577883, "grad_norm": 0.6328125, "learning_rate": 0.0010993321252675034, "loss": 1.4472, "step": 3784 }, { "epoch": 0.4844489952643031, "grad_norm": 0.58984375, "learning_rate": 0.0010989195995373265, "loss": 1.1278, "step": 3785 }, { "epoch": 0.4845769870728273, "grad_norm": 0.6328125, "learning_rate": 0.001098507056806211, "loss": 1.2478, "step": 3786 }, { "epoch": 0.4847049788813516, "grad_norm": 0.7734375, "learning_rate": 0.00109809449714506, "loss": 2.1651, "step": 3787 }, { "epoch": 0.48483297068987585, "grad_norm": 0.72265625, "learning_rate": 0.0010976819206247777, "loss": 1.2023, "step": 3788 }, { "epoch": 0.4849609624984001, "grad_norm": 0.76953125, "learning_rate": 0.0010972693273162727, "loss": 2.0888, "step": 3789 }, { "epoch": 0.48508895430692434, "grad_norm": 0.65625, "learning_rate": 0.0010968567172904558, "loss": 1.1907, "step": 3790 }, { "epoch": 0.48521694611544863, "grad_norm": 0.67578125, "learning_rate": 0.0010964440906182404, "loss": 1.8563, "step": 3791 }, { "epoch": 0.4853449379239729, "grad_norm": 0.57421875, "learning_rate": 0.0010960314473705433, "loss": 1.1354, "step": 3792 }, { "epoch": 0.4854729297324971, "grad_norm": 0.58984375, "learning_rate": 0.0010956187876182838, "loss": 1.0686, "step": 3793 }, { "epoch": 0.48560092154102136, "grad_norm": 0.59765625, "learning_rate": 0.0010952061114323842, "loss": 1.4601, "step": 3794 }, { "epoch": 0.48572891334954565, "grad_norm": 0.65625, "learning_rate": 0.0010947934188837696, "loss": 1.312, "step": 3795 }, { "epoch": 0.4858569051580699, "grad_norm": 0.7578125, "learning_rate": 0.0010943807100433683, "loss": 1.1058, "step": 3796 }, { "epoch": 0.48598489696659414, "grad_norm": 1.015625, "learning_rate": 0.0010939679849821106, "loss": 2.0271, "step": 3797 }, { "epoch": 0.4861128887751184, "grad_norm": 0.6328125, "learning_rate": 0.0010935552437709298, "loss": 1.5473, "step": 3798 }, { "epoch": 0.4862408805836426, "grad_norm": 0.703125, "learning_rate": 0.0010931424864807624, "loss": 1.4009, "step": 3799 }, { "epoch": 0.4863688723921669, "grad_norm": 0.71484375, "learning_rate": 0.001092729713182548, "loss": 1.6989, "step": 3800 }, { "epoch": 0.48649686420069116, "grad_norm": 0.62109375, "learning_rate": 0.0010923169239472274, "loss": 1.2207, "step": 3801 }, { "epoch": 0.4866248560092154, "grad_norm": 0.66015625, "learning_rate": 0.0010919041188457458, "loss": 1.7204, "step": 3802 }, { "epoch": 0.48675284781773964, "grad_norm": 0.72265625, "learning_rate": 0.0010914912979490504, "loss": 1.7257, "step": 3803 }, { "epoch": 0.48688083962626394, "grad_norm": 0.63671875, "learning_rate": 0.0010910784613280904, "loss": 1.3248, "step": 3804 }, { "epoch": 0.4870088314347882, "grad_norm": 0.5390625, "learning_rate": 0.0010906656090538195, "loss": 1.2722, "step": 3805 }, { "epoch": 0.4871368232433124, "grad_norm": 0.79296875, "learning_rate": 0.0010902527411971927, "loss": 2.2446, "step": 3806 }, { "epoch": 0.48726481505183666, "grad_norm": 0.625, "learning_rate": 0.0010898398578291682, "loss": 1.5673, "step": 3807 }, { "epoch": 0.48739280686036096, "grad_norm": 0.7421875, "learning_rate": 0.0010894269590207063, "loss": 1.9282, "step": 3808 }, { "epoch": 0.4875207986688852, "grad_norm": 0.73828125, "learning_rate": 0.0010890140448427702, "loss": 1.8628, "step": 3809 }, { "epoch": 0.48764879047740944, "grad_norm": 0.69140625, "learning_rate": 0.001088601115366327, "loss": 1.7521, "step": 3810 }, { "epoch": 0.4877767822859337, "grad_norm": 0.65234375, "learning_rate": 0.0010881881706623442, "loss": 1.6367, "step": 3811 }, { "epoch": 0.4879047740944579, "grad_norm": 0.609375, "learning_rate": 0.0010877752108017934, "loss": 1.5187, "step": 3812 }, { "epoch": 0.4880327659029822, "grad_norm": 0.6484375, "learning_rate": 0.0010873622358556485, "loss": 1.6453, "step": 3813 }, { "epoch": 0.48816075771150647, "grad_norm": 0.703125, "learning_rate": 0.001086949245894886, "loss": 1.7572, "step": 3814 }, { "epoch": 0.4882887495200307, "grad_norm": 0.6640625, "learning_rate": 0.0010865362409904847, "loss": 1.2343, "step": 3815 }, { "epoch": 0.48841674132855495, "grad_norm": 0.546875, "learning_rate": 0.0010861232212134265, "loss": 1.3086, "step": 3816 }, { "epoch": 0.48854473313707925, "grad_norm": 0.65234375, "learning_rate": 0.0010857101866346953, "loss": 1.0165, "step": 3817 }, { "epoch": 0.4886727249456035, "grad_norm": 0.609375, "learning_rate": 0.0010852971373252781, "loss": 1.6116, "step": 3818 }, { "epoch": 0.48880071675412773, "grad_norm": 0.59765625, "learning_rate": 0.0010848840733561637, "loss": 0.9226, "step": 3819 }, { "epoch": 0.48892870856265197, "grad_norm": 0.63671875, "learning_rate": 0.0010844709947983443, "loss": 1.1341, "step": 3820 }, { "epoch": 0.48905670037117627, "grad_norm": 0.7734375, "learning_rate": 0.0010840579017228136, "loss": 2.2118, "step": 3821 }, { "epoch": 0.4891846921797005, "grad_norm": 0.703125, "learning_rate": 0.0010836447942005692, "loss": 1.125, "step": 3822 }, { "epoch": 0.48931268398822475, "grad_norm": 0.796875, "learning_rate": 0.0010832316723026092, "loss": 1.1425, "step": 3823 }, { "epoch": 0.489440675796749, "grad_norm": 0.71875, "learning_rate": 0.0010828185360999365, "loss": 1.3512, "step": 3824 }, { "epoch": 0.4895686676052733, "grad_norm": 0.578125, "learning_rate": 0.0010824053856635543, "loss": 1.1161, "step": 3825 }, { "epoch": 0.48969665941379753, "grad_norm": 0.69140625, "learning_rate": 0.0010819922210644694, "loss": 1.0964, "step": 3826 }, { "epoch": 0.48982465122232177, "grad_norm": 0.546875, "learning_rate": 0.0010815790423736914, "loss": 1.1234, "step": 3827 }, { "epoch": 0.489952643030846, "grad_norm": 0.66015625, "learning_rate": 0.001081165849662231, "loss": 1.4927, "step": 3828 }, { "epoch": 0.49008063483937025, "grad_norm": 0.734375, "learning_rate": 0.0010807526430011028, "loss": 2.1187, "step": 3829 }, { "epoch": 0.49020862664789455, "grad_norm": 0.63671875, "learning_rate": 0.0010803394224613223, "loss": 1.6346, "step": 3830 }, { "epoch": 0.4903366184564188, "grad_norm": 0.5625, "learning_rate": 0.001079926188113909, "loss": 0.8719, "step": 3831 }, { "epoch": 0.49046461026494304, "grad_norm": 0.578125, "learning_rate": 0.001079512940029883, "loss": 0.9436, "step": 3832 }, { "epoch": 0.4905926020734673, "grad_norm": 0.625, "learning_rate": 0.0010790996782802686, "loss": 1.5383, "step": 3833 }, { "epoch": 0.4907205938819916, "grad_norm": 0.59375, "learning_rate": 0.001078686402936091, "loss": 1.1728, "step": 3834 }, { "epoch": 0.4908485856905158, "grad_norm": 0.76953125, "learning_rate": 0.0010782731140683785, "loss": 1.7422, "step": 3835 }, { "epoch": 0.49097657749904006, "grad_norm": 1.0859375, "learning_rate": 0.0010778598117481611, "loss": 1.6897, "step": 3836 }, { "epoch": 0.4911045693075643, "grad_norm": 0.734375, "learning_rate": 0.001077446496046472, "loss": 1.429, "step": 3837 }, { "epoch": 0.4912325611160886, "grad_norm": 0.671875, "learning_rate": 0.0010770331670343458, "loss": 1.8426, "step": 3838 }, { "epoch": 0.49136055292461284, "grad_norm": 0.6328125, "learning_rate": 0.00107661982478282, "loss": 1.2715, "step": 3839 }, { "epoch": 0.4914885447331371, "grad_norm": 0.93359375, "learning_rate": 0.0010762064693629345, "loss": 1.1418, "step": 3840 }, { "epoch": 0.4916165365416613, "grad_norm": 0.63671875, "learning_rate": 0.0010757931008457307, "loss": 1.6154, "step": 3841 }, { "epoch": 0.49174452835018556, "grad_norm": 0.7578125, "learning_rate": 0.0010753797193022525, "loss": 2.0411, "step": 3842 }, { "epoch": 0.49187252015870986, "grad_norm": 0.5390625, "learning_rate": 0.0010749663248035468, "loss": 1.2643, "step": 3843 }, { "epoch": 0.4920005119672341, "grad_norm": 0.69140625, "learning_rate": 0.001074552917420662, "loss": 1.6071, "step": 3844 }, { "epoch": 0.49212850377575834, "grad_norm": 0.65625, "learning_rate": 0.0010741394972246484, "loss": 1.8795, "step": 3845 }, { "epoch": 0.4922564955842826, "grad_norm": 0.73046875, "learning_rate": 0.0010737260642865595, "loss": 1.1807, "step": 3846 }, { "epoch": 0.4923844873928069, "grad_norm": 0.62890625, "learning_rate": 0.0010733126186774503, "loss": 1.3961, "step": 3847 }, { "epoch": 0.4925124792013311, "grad_norm": 0.5, "learning_rate": 0.001072899160468378, "loss": 1.1709, "step": 3848 }, { "epoch": 0.49264047100985536, "grad_norm": 0.703125, "learning_rate": 0.001072485689730402, "loss": 1.5396, "step": 3849 }, { "epoch": 0.4927684628183796, "grad_norm": 0.71484375, "learning_rate": 0.0010720722065345844, "loss": 1.711, "step": 3850 }, { "epoch": 0.4928964546269039, "grad_norm": 0.66015625, "learning_rate": 0.0010716587109519888, "loss": 1.7543, "step": 3851 }, { "epoch": 0.49302444643542814, "grad_norm": 0.58203125, "learning_rate": 0.001071245203053681, "loss": 1.1527, "step": 3852 }, { "epoch": 0.4931524382439524, "grad_norm": 0.7734375, "learning_rate": 0.0010708316829107294, "loss": 1.7202, "step": 3853 }, { "epoch": 0.4932804300524766, "grad_norm": 0.60546875, "learning_rate": 0.0010704181505942032, "loss": 0.8129, "step": 3854 }, { "epoch": 0.4934084218610009, "grad_norm": 0.78125, "learning_rate": 0.0010700046061751759, "loss": 1.7783, "step": 3855 }, { "epoch": 0.49353641366952516, "grad_norm": 0.66796875, "learning_rate": 0.001069591049724721, "loss": 1.5954, "step": 3856 }, { "epoch": 0.4936644054780494, "grad_norm": 0.609375, "learning_rate": 0.001069177481313915, "loss": 1.1819, "step": 3857 }, { "epoch": 0.49379239728657365, "grad_norm": 0.640625, "learning_rate": 0.0010687639010138368, "loss": 1.2262, "step": 3858 }, { "epoch": 0.4939203890950979, "grad_norm": 0.490234375, "learning_rate": 0.0010683503088955663, "loss": 0.9776, "step": 3859 }, { "epoch": 0.4940483809036222, "grad_norm": 0.83984375, "learning_rate": 0.001067936705030186, "loss": 1.7527, "step": 3860 }, { "epoch": 0.49417637271214643, "grad_norm": 0.65625, "learning_rate": 0.001067523089488781, "loss": 1.5657, "step": 3861 }, { "epoch": 0.49430436452067067, "grad_norm": 0.609375, "learning_rate": 0.0010671094623424375, "loss": 1.6608, "step": 3862 }, { "epoch": 0.4944323563291949, "grad_norm": 0.7421875, "learning_rate": 0.0010666958236622442, "loss": 1.5407, "step": 3863 }, { "epoch": 0.4945603481377192, "grad_norm": 0.703125, "learning_rate": 0.0010662821735192914, "loss": 1.4158, "step": 3864 }, { "epoch": 0.49468833994624345, "grad_norm": 0.7734375, "learning_rate": 0.0010658685119846712, "loss": 1.3248, "step": 3865 }, { "epoch": 0.4948163317547677, "grad_norm": 0.765625, "learning_rate": 0.0010654548391294792, "loss": 1.7434, "step": 3866 }, { "epoch": 0.49494432356329193, "grad_norm": 0.7421875, "learning_rate": 0.0010650411550248108, "loss": 1.7785, "step": 3867 }, { "epoch": 0.49507231537181623, "grad_norm": 0.68359375, "learning_rate": 0.0010646274597417645, "loss": 1.4754, "step": 3868 }, { "epoch": 0.49520030718034047, "grad_norm": 0.703125, "learning_rate": 0.001064213753351441, "loss": 1.8486, "step": 3869 }, { "epoch": 0.4953282989888647, "grad_norm": 0.5859375, "learning_rate": 0.0010638000359249418, "loss": 1.2117, "step": 3870 }, { "epoch": 0.49545629079738895, "grad_norm": 0.71484375, "learning_rate": 0.001063386307533371, "loss": 1.9305, "step": 3871 }, { "epoch": 0.4955842826059132, "grad_norm": 0.60546875, "learning_rate": 0.0010629725682478354, "loss": 1.3997, "step": 3872 }, { "epoch": 0.4957122744144375, "grad_norm": 0.63671875, "learning_rate": 0.0010625588181394424, "loss": 0.9641, "step": 3873 }, { "epoch": 0.49584026622296173, "grad_norm": 0.64453125, "learning_rate": 0.001062145057279301, "loss": 1.5378, "step": 3874 }, { "epoch": 0.495968258031486, "grad_norm": 0.58984375, "learning_rate": 0.0010617312857385236, "loss": 1.5752, "step": 3875 }, { "epoch": 0.4960962498400102, "grad_norm": 0.72265625, "learning_rate": 0.0010613175035882228, "loss": 2.0252, "step": 3876 }, { "epoch": 0.4962242416485345, "grad_norm": 0.71484375, "learning_rate": 0.0010609037108995144, "loss": 1.1878, "step": 3877 }, { "epoch": 0.49635223345705876, "grad_norm": 0.6796875, "learning_rate": 0.0010604899077435149, "loss": 1.6707, "step": 3878 }, { "epoch": 0.496480225265583, "grad_norm": 0.546875, "learning_rate": 0.0010600760941913434, "loss": 1.0217, "step": 3879 }, { "epoch": 0.49660821707410724, "grad_norm": 0.67578125, "learning_rate": 0.0010596622703141208, "loss": 1.5655, "step": 3880 }, { "epoch": 0.49673620888263154, "grad_norm": 0.61328125, "learning_rate": 0.0010592484361829687, "loss": 1.174, "step": 3881 }, { "epoch": 0.4968642006911558, "grad_norm": 0.64453125, "learning_rate": 0.0010588345918690113, "loss": 1.5208, "step": 3882 }, { "epoch": 0.49699219249968, "grad_norm": 0.66796875, "learning_rate": 0.001058420737443375, "loss": 1.7663, "step": 3883 }, { "epoch": 0.49712018430820426, "grad_norm": 0.6328125, "learning_rate": 0.0010580068729771873, "loss": 1.5014, "step": 3884 }, { "epoch": 0.4972481761167285, "grad_norm": 0.5703125, "learning_rate": 0.001057592998541577, "loss": 0.9018, "step": 3885 }, { "epoch": 0.4973761679252528, "grad_norm": 0.65625, "learning_rate": 0.0010571791142076758, "loss": 1.6119, "step": 3886 }, { "epoch": 0.49750415973377704, "grad_norm": 0.6171875, "learning_rate": 0.0010567652200466156, "loss": 1.131, "step": 3887 }, { "epoch": 0.4976321515423013, "grad_norm": 0.69921875, "learning_rate": 0.0010563513161295313, "loss": 1.8132, "step": 3888 }, { "epoch": 0.4977601433508255, "grad_norm": 0.92578125, "learning_rate": 0.0010559374025275597, "loss": 1.2968, "step": 3889 }, { "epoch": 0.4978881351593498, "grad_norm": 0.765625, "learning_rate": 0.0010555234793118375, "loss": 1.7384, "step": 3890 }, { "epoch": 0.49801612696787406, "grad_norm": 0.55859375, "learning_rate": 0.0010551095465535045, "loss": 1.2981, "step": 3891 }, { "epoch": 0.4981441187763983, "grad_norm": 0.66796875, "learning_rate": 0.001054695604323702, "loss": 1.5123, "step": 3892 }, { "epoch": 0.49827211058492255, "grad_norm": 0.703125, "learning_rate": 0.0010542816526935722, "loss": 1.8546, "step": 3893 }, { "epoch": 0.49840010239344684, "grad_norm": 0.953125, "learning_rate": 0.0010538676917342597, "loss": 2.4513, "step": 3894 }, { "epoch": 0.4985280942019711, "grad_norm": 0.7421875, "learning_rate": 0.0010534537215169107, "loss": 1.8315, "step": 3895 }, { "epoch": 0.4986560860104953, "grad_norm": 0.74609375, "learning_rate": 0.001053039742112672, "loss": 1.6972, "step": 3896 }, { "epoch": 0.49878407781901957, "grad_norm": 0.6953125, "learning_rate": 0.0010526257535926936, "loss": 1.8721, "step": 3897 }, { "epoch": 0.49891206962754386, "grad_norm": 0.6796875, "learning_rate": 0.001052211756028125, "loss": 1.5659, "step": 3898 }, { "epoch": 0.4990400614360681, "grad_norm": 0.6171875, "learning_rate": 0.0010517977494901192, "loss": 1.3848, "step": 3899 }, { "epoch": 0.49916805324459235, "grad_norm": 0.69140625, "learning_rate": 0.0010513837340498297, "loss": 1.2277, "step": 3900 }, { "epoch": 0.4992960450531166, "grad_norm": 0.77734375, "learning_rate": 0.0010509697097784122, "loss": 1.5398, "step": 3901 }, { "epoch": 0.49942403686164083, "grad_norm": 0.6484375, "learning_rate": 0.001050555676747023, "loss": 1.6108, "step": 3902 }, { "epoch": 0.4995520286701651, "grad_norm": 0.69921875, "learning_rate": 0.00105014163502682, "loss": 1.4297, "step": 3903 }, { "epoch": 0.49968002047868937, "grad_norm": 0.6796875, "learning_rate": 0.0010497275846889638, "loss": 1.5052, "step": 3904 }, { "epoch": 0.4998080122872136, "grad_norm": 0.69921875, "learning_rate": 0.0010493135258046154, "loss": 1.0537, "step": 3905 }, { "epoch": 0.49993600409573785, "grad_norm": 0.7109375, "learning_rate": 0.0010488994584449374, "loss": 1.4475, "step": 3906 }, { "epoch": 0.5000639959042621, "grad_norm": 1.0078125, "learning_rate": 0.001048485382681094, "loss": 1.6393, "step": 3907 }, { "epoch": 0.5001919877127864, "grad_norm": 0.55859375, "learning_rate": 0.0010480712985842513, "loss": 0.9537, "step": 3908 }, { "epoch": 0.5003199795213107, "grad_norm": 0.703125, "learning_rate": 0.001047657206225576, "loss": 1.8019, "step": 3909 }, { "epoch": 0.5004479713298349, "grad_norm": 0.61328125, "learning_rate": 0.0010472431056762358, "loss": 1.7795, "step": 3910 }, { "epoch": 0.5005759631383592, "grad_norm": 0.8359375, "learning_rate": 0.001046828997007402, "loss": 1.2779, "step": 3911 }, { "epoch": 0.5007039549468834, "grad_norm": 0.734375, "learning_rate": 0.001046414880290245, "loss": 1.6997, "step": 3912 }, { "epoch": 0.5008319467554077, "grad_norm": 0.61328125, "learning_rate": 0.0010460007555959383, "loss": 1.6984, "step": 3913 }, { "epoch": 0.500959938563932, "grad_norm": 0.5703125, "learning_rate": 0.0010455866229956549, "loss": 1.6141, "step": 3914 }, { "epoch": 0.5010879303724561, "grad_norm": 0.66015625, "learning_rate": 0.0010451724825605707, "loss": 1.6885, "step": 3915 }, { "epoch": 0.5012159221809804, "grad_norm": 0.79296875, "learning_rate": 0.0010447583343618626, "loss": 1.7968, "step": 3916 }, { "epoch": 0.5013439139895046, "grad_norm": 0.640625, "learning_rate": 0.001044344178470709, "loss": 1.2313, "step": 3917 }, { "epoch": 0.5014719057980289, "grad_norm": 0.9140625, "learning_rate": 0.001043930014958288, "loss": 1.6876, "step": 3918 }, { "epoch": 0.5015998976065532, "grad_norm": 0.75, "learning_rate": 0.0010435158438957819, "loss": 1.9363, "step": 3919 }, { "epoch": 0.5017278894150774, "grad_norm": 0.6171875, "learning_rate": 0.0010431016653543715, "loss": 1.3795, "step": 3920 }, { "epoch": 0.5018558812236017, "grad_norm": 0.58984375, "learning_rate": 0.0010426874794052408, "loss": 1.4119, "step": 3921 }, { "epoch": 0.501983873032126, "grad_norm": 0.640625, "learning_rate": 0.001042273286119574, "loss": 1.4926, "step": 3922 }, { "epoch": 0.5021118648406502, "grad_norm": 0.77734375, "learning_rate": 0.0010418590855685572, "loss": 2.0563, "step": 3923 }, { "epoch": 0.5022398566491745, "grad_norm": 0.75, "learning_rate": 0.0010414448778233777, "loss": 2.3923, "step": 3924 }, { "epoch": 0.5023678484576987, "grad_norm": 0.64453125, "learning_rate": 0.0010410306629552231, "loss": 1.4498, "step": 3925 }, { "epoch": 0.502495840266223, "grad_norm": 0.625, "learning_rate": 0.0010406164410352834, "loss": 1.0972, "step": 3926 }, { "epoch": 0.5026238320747473, "grad_norm": 0.5234375, "learning_rate": 0.0010402022121347495, "loss": 0.9747, "step": 3927 }, { "epoch": 0.5027518238832714, "grad_norm": 0.5859375, "learning_rate": 0.001039787976324813, "loss": 1.2369, "step": 3928 }, { "epoch": 0.5028798156917957, "grad_norm": 0.671875, "learning_rate": 0.0010393737336766673, "loss": 1.7741, "step": 3929 }, { "epoch": 0.5030078075003199, "grad_norm": 0.6015625, "learning_rate": 0.0010389594842615067, "loss": 1.4652, "step": 3930 }, { "epoch": 0.5031357993088442, "grad_norm": 0.6328125, "learning_rate": 0.0010385452281505268, "loss": 1.6996, "step": 3931 }, { "epoch": 0.5032637911173685, "grad_norm": 0.54296875, "learning_rate": 0.0010381309654149235, "loss": 1.4017, "step": 3932 }, { "epoch": 0.5033917829258927, "grad_norm": 0.65625, "learning_rate": 0.0010377166961258954, "loss": 1.7911, "step": 3933 }, { "epoch": 0.503519774734417, "grad_norm": 0.640625, "learning_rate": 0.0010373024203546412, "loss": 1.293, "step": 3934 }, { "epoch": 0.5036477665429413, "grad_norm": 0.5859375, "learning_rate": 0.001036888138172361, "loss": 0.8172, "step": 3935 }, { "epoch": 0.5037757583514655, "grad_norm": 0.7265625, "learning_rate": 0.0010364738496502555, "loss": 1.3114, "step": 3936 }, { "epoch": 0.5039037501599898, "grad_norm": 0.6796875, "learning_rate": 0.0010360595548595272, "loss": 1.3741, "step": 3937 }, { "epoch": 0.504031741968514, "grad_norm": 0.73046875, "learning_rate": 0.0010356452538713794, "loss": 1.9196, "step": 3938 }, { "epoch": 0.5041597337770383, "grad_norm": 0.671875, "learning_rate": 0.0010352309467570164, "loss": 1.334, "step": 3939 }, { "epoch": 0.5042877255855626, "grad_norm": 0.6875, "learning_rate": 0.0010348166335876438, "loss": 1.4792, "step": 3940 }, { "epoch": 0.5044157173940867, "grad_norm": 0.4921875, "learning_rate": 0.0010344023144344672, "loss": 0.7615, "step": 3941 }, { "epoch": 0.504543709202611, "grad_norm": 0.66796875, "learning_rate": 0.0010339879893686954, "loss": 1.3423, "step": 3942 }, { "epoch": 0.5046717010111352, "grad_norm": 0.71875, "learning_rate": 0.0010335736584615356, "loss": 1.6923, "step": 3943 }, { "epoch": 0.5047996928196595, "grad_norm": 0.5, "learning_rate": 0.001033159321784198, "loss": 1.0903, "step": 3944 }, { "epoch": 0.5049276846281838, "grad_norm": 0.76171875, "learning_rate": 0.001032744979407893, "loss": 1.5427, "step": 3945 }, { "epoch": 0.505055676436708, "grad_norm": 0.5859375, "learning_rate": 0.0010323306314038324, "loss": 1.0237, "step": 3946 }, { "epoch": 0.5051836682452323, "grad_norm": 0.75, "learning_rate": 0.001031916277843228, "loss": 1.2268, "step": 3947 }, { "epoch": 0.5053116600537566, "grad_norm": 0.7265625, "learning_rate": 0.0010315019187972934, "loss": 1.4482, "step": 3948 }, { "epoch": 0.5054396518622808, "grad_norm": 0.65234375, "learning_rate": 0.0010310875543372425, "loss": 1.5664, "step": 3949 }, { "epoch": 0.5055676436708051, "grad_norm": 0.52734375, "learning_rate": 0.0010306731845342917, "loss": 0.8626, "step": 3950 }, { "epoch": 0.5056956354793293, "grad_norm": 0.89453125, "learning_rate": 0.0010302588094596558, "loss": 1.5475, "step": 3951 }, { "epoch": 0.5058236272878536, "grad_norm": 0.625, "learning_rate": 0.001029844429184553, "loss": 1.0782, "step": 3952 }, { "epoch": 0.5059516190963779, "grad_norm": 0.76171875, "learning_rate": 0.0010294300437802008, "loss": 1.7669, "step": 3953 }, { "epoch": 0.506079610904902, "grad_norm": 0.60546875, "learning_rate": 0.0010290156533178181, "loss": 1.0951, "step": 3954 }, { "epoch": 0.5062076027134264, "grad_norm": 0.65625, "learning_rate": 0.0010286012578686243, "loss": 1.14, "step": 3955 }, { "epoch": 0.5063355945219506, "grad_norm": 0.71875, "learning_rate": 0.0010281868575038404, "loss": 1.1679, "step": 3956 }, { "epoch": 0.5064635863304748, "grad_norm": 0.58984375, "learning_rate": 0.0010277724522946882, "loss": 1.5888, "step": 3957 }, { "epoch": 0.5065915781389991, "grad_norm": 0.8359375, "learning_rate": 0.001027358042312389, "loss": 2.2238, "step": 3958 }, { "epoch": 0.5067195699475233, "grad_norm": 0.640625, "learning_rate": 0.0010269436276281664, "loss": 1.2349, "step": 3959 }, { "epoch": 0.5068475617560476, "grad_norm": 0.65234375, "learning_rate": 0.001026529208313244, "loss": 1.5128, "step": 3960 }, { "epoch": 0.5069755535645719, "grad_norm": 0.73828125, "learning_rate": 0.0010261147844388473, "loss": 2.1263, "step": 3961 }, { "epoch": 0.5071035453730961, "grad_norm": 0.7421875, "learning_rate": 0.001025700356076201, "loss": 1.6227, "step": 3962 }, { "epoch": 0.5072315371816204, "grad_norm": 0.6640625, "learning_rate": 0.001025285923296531, "loss": 0.9586, "step": 3963 }, { "epoch": 0.5073595289901446, "grad_norm": 0.74609375, "learning_rate": 0.0010248714861710655, "loss": 1.9431, "step": 3964 }, { "epoch": 0.5074875207986689, "grad_norm": 0.6171875, "learning_rate": 0.0010244570447710314, "loss": 1.2959, "step": 3965 }, { "epoch": 0.5076155126071932, "grad_norm": 0.76953125, "learning_rate": 0.0010240425991676569, "loss": 1.4285, "step": 3966 }, { "epoch": 0.5077435044157174, "grad_norm": 0.66796875, "learning_rate": 0.0010236281494321719, "loss": 1.4839, "step": 3967 }, { "epoch": 0.5078714962242417, "grad_norm": 0.8203125, "learning_rate": 0.0010232136956358064, "loss": 1.3947, "step": 3968 }, { "epoch": 0.507999488032766, "grad_norm": 0.59375, "learning_rate": 0.0010227992378497904, "loss": 1.0504, "step": 3969 }, { "epoch": 0.5081274798412901, "grad_norm": 0.62890625, "learning_rate": 0.0010223847761453558, "loss": 1.2146, "step": 3970 }, { "epoch": 0.5082554716498144, "grad_norm": 0.7890625, "learning_rate": 0.0010219703105937335, "loss": 1.5681, "step": 3971 }, { "epoch": 0.5083834634583386, "grad_norm": 0.490234375, "learning_rate": 0.0010215558412661572, "loss": 0.6115, "step": 3972 }, { "epoch": 0.5085114552668629, "grad_norm": 0.59765625, "learning_rate": 0.0010211413682338602, "loss": 1.4061, "step": 3973 }, { "epoch": 0.5086394470753872, "grad_norm": 0.67578125, "learning_rate": 0.0010207268915680757, "loss": 1.5206, "step": 3974 }, { "epoch": 0.5087674388839114, "grad_norm": 0.734375, "learning_rate": 0.0010203124113400389, "loss": 1.5486, "step": 3975 }, { "epoch": 0.5088954306924357, "grad_norm": 0.66796875, "learning_rate": 0.001019897927620984, "loss": 1.6695, "step": 3976 }, { "epoch": 0.5090234225009599, "grad_norm": 0.578125, "learning_rate": 0.001019483440482148, "loss": 1.0495, "step": 3977 }, { "epoch": 0.5091514143094842, "grad_norm": 0.703125, "learning_rate": 0.0010190689499947664, "loss": 1.9144, "step": 3978 }, { "epoch": 0.5092794061180085, "grad_norm": 0.546875, "learning_rate": 0.0010186544562300766, "loss": 1.1555, "step": 3979 }, { "epoch": 0.5094073979265327, "grad_norm": 0.625, "learning_rate": 0.0010182399592593154, "loss": 1.2608, "step": 3980 }, { "epoch": 0.509535389735057, "grad_norm": 0.578125, "learning_rate": 0.0010178254591537218, "loss": 1.2789, "step": 3981 }, { "epoch": 0.5096633815435813, "grad_norm": 0.6640625, "learning_rate": 0.0010174109559845336, "loss": 1.7769, "step": 3982 }, { "epoch": 0.5097913733521054, "grad_norm": 0.59375, "learning_rate": 0.0010169964498229894, "loss": 1.3682, "step": 3983 }, { "epoch": 0.5099193651606297, "grad_norm": 0.77734375, "learning_rate": 0.0010165819407403306, "loss": 2.0932, "step": 3984 }, { "epoch": 0.5100473569691539, "grad_norm": 0.69921875, "learning_rate": 0.0010161674288077958, "loss": 1.3771, "step": 3985 }, { "epoch": 0.5101753487776782, "grad_norm": 0.76171875, "learning_rate": 0.0010157529140966263, "loss": 1.892, "step": 3986 }, { "epoch": 0.5103033405862025, "grad_norm": 0.59375, "learning_rate": 0.0010153383966780625, "loss": 1.4063, "step": 3987 }, { "epoch": 0.5104313323947267, "grad_norm": 0.67578125, "learning_rate": 0.0010149238766233467, "loss": 1.505, "step": 3988 }, { "epoch": 0.510559324203251, "grad_norm": 0.640625, "learning_rate": 0.0010145093540037202, "loss": 1.6485, "step": 3989 }, { "epoch": 0.5106873160117752, "grad_norm": 0.77734375, "learning_rate": 0.0010140948288904265, "loss": 1.9461, "step": 3990 }, { "epoch": 0.5108153078202995, "grad_norm": 0.640625, "learning_rate": 0.0010136803013547071, "loss": 1.2003, "step": 3991 }, { "epoch": 0.5109432996288238, "grad_norm": 0.625, "learning_rate": 0.0010132657714678063, "loss": 1.4739, "step": 3992 }, { "epoch": 0.511071291437348, "grad_norm": 0.65234375, "learning_rate": 0.0010128512393009672, "loss": 1.3088, "step": 3993 }, { "epoch": 0.5111992832458723, "grad_norm": 0.78125, "learning_rate": 0.001012436704925434, "loss": 1.3081, "step": 3994 }, { "epoch": 0.5113272750543966, "grad_norm": 0.6796875, "learning_rate": 0.0010120221684124515, "loss": 1.2492, "step": 3995 }, { "epoch": 0.5114552668629208, "grad_norm": 0.5625, "learning_rate": 0.0010116076298332644, "loss": 1.2274, "step": 3996 }, { "epoch": 0.511583258671445, "grad_norm": 0.76953125, "learning_rate": 0.001011193089259118, "loss": 1.5074, "step": 3997 }, { "epoch": 0.5117112504799692, "grad_norm": 0.6796875, "learning_rate": 0.001010778546761257, "loss": 1.9034, "step": 3998 }, { "epoch": 0.5118392422884935, "grad_norm": 0.625, "learning_rate": 0.0010103640024109282, "loss": 1.5582, "step": 3999 }, { "epoch": 0.5119672340970178, "grad_norm": 0.66015625, "learning_rate": 0.0010099494562793771, "loss": 1.3784, "step": 4000 }, { "epoch": 0.512095225905542, "grad_norm": 1.578125, "learning_rate": 0.001009534908437851, "loss": 1.387, "step": 4001 }, { "epoch": 0.5122232177140663, "grad_norm": 0.71875, "learning_rate": 0.0010091203589575961, "loss": 1.8561, "step": 4002 }, { "epoch": 0.5123512095225905, "grad_norm": 0.68359375, "learning_rate": 0.0010087058079098594, "loss": 1.3315, "step": 4003 }, { "epoch": 0.5124792013311148, "grad_norm": 0.80859375, "learning_rate": 0.0010082912553658885, "loss": 2.037, "step": 4004 }, { "epoch": 0.5126071931396391, "grad_norm": 0.671875, "learning_rate": 0.0010078767013969303, "loss": 1.45, "step": 4005 }, { "epoch": 0.5127351849481633, "grad_norm": 0.73046875, "learning_rate": 0.0010074621460742339, "loss": 1.8623, "step": 4006 }, { "epoch": 0.5128631767566876, "grad_norm": 0.58984375, "learning_rate": 0.0010070475894690463, "loss": 1.41, "step": 4007 }, { "epoch": 0.5129911685652119, "grad_norm": 0.65625, "learning_rate": 0.0010066330316526161, "loss": 1.1012, "step": 4008 }, { "epoch": 0.5131191603737361, "grad_norm": 0.498046875, "learning_rate": 0.0010062184726961918, "loss": 0.8734, "step": 4009 }, { "epoch": 0.5132471521822604, "grad_norm": 0.65234375, "learning_rate": 0.001005803912671022, "loss": 1.1599, "step": 4010 }, { "epoch": 0.5133751439907845, "grad_norm": 0.66796875, "learning_rate": 0.0010053893516483556, "loss": 1.6462, "step": 4011 }, { "epoch": 0.5135031357993088, "grad_norm": 0.66015625, "learning_rate": 0.0010049747896994422, "loss": 1.6703, "step": 4012 }, { "epoch": 0.5136311276078331, "grad_norm": 0.83203125, "learning_rate": 0.0010045602268955298, "loss": 1.5783, "step": 4013 }, { "epoch": 0.5137591194163573, "grad_norm": 0.75, "learning_rate": 0.0010041456633078688, "loss": 1.1486, "step": 4014 }, { "epoch": 0.5138871112248816, "grad_norm": 0.66796875, "learning_rate": 0.0010037310990077082, "loss": 1.2451, "step": 4015 }, { "epoch": 0.5140151030334058, "grad_norm": 0.70703125, "learning_rate": 0.0010033165340662977, "loss": 1.6737, "step": 4016 }, { "epoch": 0.5141430948419301, "grad_norm": 0.61328125, "learning_rate": 0.001002901968554887, "loss": 1.2031, "step": 4017 }, { "epoch": 0.5142710866504544, "grad_norm": 0.6171875, "learning_rate": 0.0010024874025447259, "loss": 1.3048, "step": 4018 }, { "epoch": 0.5143990784589786, "grad_norm": 0.58203125, "learning_rate": 0.0010020728361070643, "loss": 1.0953, "step": 4019 }, { "epoch": 0.5145270702675029, "grad_norm": 1.25, "learning_rate": 0.0010016582693131524, "loss": 1.4212, "step": 4020 }, { "epoch": 0.5146550620760272, "grad_norm": 0.640625, "learning_rate": 0.0010012437022342397, "loss": 1.1844, "step": 4021 }, { "epoch": 0.5147830538845514, "grad_norm": 0.68359375, "learning_rate": 0.0010008291349415765, "loss": 1.3277, "step": 4022 }, { "epoch": 0.5149110456930757, "grad_norm": 0.5703125, "learning_rate": 0.0010004145675064136, "loss": 1.5768, "step": 4023 }, { "epoch": 0.5150390375015999, "grad_norm": 0.6484375, "learning_rate": 0.001, "loss": 1.0423, "step": 4024 }, { "epoch": 0.5151670293101241, "grad_norm": 0.7265625, "learning_rate": 0.0009995854324935866, "loss": 1.7394, "step": 4025 }, { "epoch": 0.5152950211186484, "grad_norm": 0.92578125, "learning_rate": 0.0009991708650584233, "loss": 1.4498, "step": 4026 }, { "epoch": 0.5154230129271726, "grad_norm": 0.625, "learning_rate": 0.0009987562977657606, "loss": 1.1581, "step": 4027 }, { "epoch": 0.5155510047356969, "grad_norm": 0.61328125, "learning_rate": 0.0009983417306868479, "loss": 1.4062, "step": 4028 }, { "epoch": 0.5156789965442212, "grad_norm": 0.6484375, "learning_rate": 0.0009979271638929357, "loss": 1.3091, "step": 4029 }, { "epoch": 0.5158069883527454, "grad_norm": 0.765625, "learning_rate": 0.0009975125974552742, "loss": 2.0614, "step": 4030 }, { "epoch": 0.5159349801612697, "grad_norm": 0.6484375, "learning_rate": 0.0009970980314451135, "loss": 1.4728, "step": 4031 }, { "epoch": 0.5160629719697939, "grad_norm": 0.96875, "learning_rate": 0.0009966834659337024, "loss": 1.56, "step": 4032 }, { "epoch": 0.5161909637783182, "grad_norm": 0.72265625, "learning_rate": 0.0009962689009922919, "loss": 1.4569, "step": 4033 }, { "epoch": 0.5163189555868425, "grad_norm": 0.69140625, "learning_rate": 0.0009958543366921312, "loss": 1.4094, "step": 4034 }, { "epoch": 0.5164469473953667, "grad_norm": 0.68359375, "learning_rate": 0.00099543977310447, "loss": 1.3107, "step": 4035 }, { "epoch": 0.516574939203891, "grad_norm": 1.03125, "learning_rate": 0.0009950252103005583, "loss": 1.2362, "step": 4036 }, { "epoch": 0.5167029310124152, "grad_norm": 0.8046875, "learning_rate": 0.0009946106483516444, "loss": 2.0884, "step": 4037 }, { "epoch": 0.5168309228209395, "grad_norm": 0.578125, "learning_rate": 0.0009941960873289782, "loss": 1.0722, "step": 4038 }, { "epoch": 0.5169589146294638, "grad_norm": 0.78515625, "learning_rate": 0.000993781527303808, "loss": 1.1221, "step": 4039 }, { "epoch": 0.5170869064379879, "grad_norm": 0.6484375, "learning_rate": 0.0009933669683473841, "loss": 1.6631, "step": 4040 }, { "epoch": 0.5172148982465122, "grad_norm": 0.68359375, "learning_rate": 0.000992952410530954, "loss": 1.5704, "step": 4041 }, { "epoch": 0.5173428900550365, "grad_norm": 0.6484375, "learning_rate": 0.0009925378539257664, "loss": 1.3625, "step": 4042 }, { "epoch": 0.5174708818635607, "grad_norm": 0.63671875, "learning_rate": 0.0009921232986030695, "loss": 1.1639, "step": 4043 }, { "epoch": 0.517598873672085, "grad_norm": 0.64453125, "learning_rate": 0.000991708744634112, "loss": 0.9606, "step": 4044 }, { "epoch": 0.5177268654806092, "grad_norm": 0.59765625, "learning_rate": 0.0009912941920901409, "loss": 1.1759, "step": 4045 }, { "epoch": 0.5178548572891335, "grad_norm": 0.54296875, "learning_rate": 0.0009908796410424041, "loss": 0.8484, "step": 4046 }, { "epoch": 0.5179828490976578, "grad_norm": 0.53515625, "learning_rate": 0.000990465091562149, "loss": 1.1101, "step": 4047 }, { "epoch": 0.518110840906182, "grad_norm": 0.69140625, "learning_rate": 0.0009900505437206227, "loss": 1.5603, "step": 4048 }, { "epoch": 0.5182388327147063, "grad_norm": 0.55078125, "learning_rate": 0.0009896359975890719, "loss": 1.1434, "step": 4049 }, { "epoch": 0.5183668245232305, "grad_norm": 0.6640625, "learning_rate": 0.0009892214532387432, "loss": 1.5629, "step": 4050 }, { "epoch": 0.5184948163317548, "grad_norm": 0.65234375, "learning_rate": 0.0009888069107408823, "loss": 1.4095, "step": 4051 }, { "epoch": 0.5186228081402791, "grad_norm": 0.70703125, "learning_rate": 0.0009883923701667356, "loss": 2.0055, "step": 4052 }, { "epoch": 0.5187507999488032, "grad_norm": 0.734375, "learning_rate": 0.0009879778315875487, "loss": 1.8771, "step": 4053 }, { "epoch": 0.5188787917573275, "grad_norm": 0.69140625, "learning_rate": 0.0009875632950745662, "loss": 1.7181, "step": 4054 }, { "epoch": 0.5190067835658518, "grad_norm": 0.81640625, "learning_rate": 0.0009871487606990328, "loss": 1.7509, "step": 4055 }, { "epoch": 0.519134775374376, "grad_norm": 0.80859375, "learning_rate": 0.0009867342285321938, "loss": 2.2135, "step": 4056 }, { "epoch": 0.5192627671829003, "grad_norm": 0.59375, "learning_rate": 0.000986319698645293, "loss": 1.1546, "step": 4057 }, { "epoch": 0.5193907589914245, "grad_norm": 0.609375, "learning_rate": 0.000985905171109574, "loss": 1.4116, "step": 4058 }, { "epoch": 0.5195187507999488, "grad_norm": 0.65234375, "learning_rate": 0.00098549064599628, "loss": 1.2887, "step": 4059 }, { "epoch": 0.5196467426084731, "grad_norm": 0.65625, "learning_rate": 0.0009850761233766536, "loss": 1.1935, "step": 4060 }, { "epoch": 0.5197747344169973, "grad_norm": 0.60546875, "learning_rate": 0.0009846616033219377, "loss": 1.0899, "step": 4061 }, { "epoch": 0.5199027262255216, "grad_norm": 0.7421875, "learning_rate": 0.0009842470859033742, "loss": 1.478, "step": 4062 }, { "epoch": 0.5200307180340458, "grad_norm": 0.609375, "learning_rate": 0.0009838325711922045, "loss": 1.2456, "step": 4063 }, { "epoch": 0.5201587098425701, "grad_norm": 0.62109375, "learning_rate": 0.0009834180592596694, "loss": 1.2698, "step": 4064 }, { "epoch": 0.5202867016510944, "grad_norm": 0.6875, "learning_rate": 0.0009830035501770104, "loss": 1.4175, "step": 4065 }, { "epoch": 0.5204146934596185, "grad_norm": 0.59375, "learning_rate": 0.0009825890440154667, "loss": 1.4279, "step": 4066 }, { "epoch": 0.5205426852681428, "grad_norm": 0.6328125, "learning_rate": 0.0009821745408462787, "loss": 1.2622, "step": 4067 }, { "epoch": 0.5206706770766671, "grad_norm": 0.6953125, "learning_rate": 0.0009817600407406846, "loss": 1.2092, "step": 4068 }, { "epoch": 0.5207986688851913, "grad_norm": 0.62109375, "learning_rate": 0.0009813455437699236, "loss": 1.0571, "step": 4069 }, { "epoch": 0.5209266606937156, "grad_norm": 0.73828125, "learning_rate": 0.0009809310500052337, "loss": 1.2428, "step": 4070 }, { "epoch": 0.5210546525022398, "grad_norm": 0.74609375, "learning_rate": 0.0009805165595178523, "loss": 1.717, "step": 4071 }, { "epoch": 0.5211826443107641, "grad_norm": 0.85546875, "learning_rate": 0.000980102072379016, "loss": 1.9695, "step": 4072 }, { "epoch": 0.5213106361192884, "grad_norm": 0.66015625, "learning_rate": 0.0009796875886599614, "loss": 1.2984, "step": 4073 }, { "epoch": 0.5214386279278126, "grad_norm": 0.64453125, "learning_rate": 0.0009792731084319244, "loss": 1.1901, "step": 4074 }, { "epoch": 0.5215666197363369, "grad_norm": 0.77734375, "learning_rate": 0.0009788586317661403, "loss": 1.8083, "step": 4075 }, { "epoch": 0.5216946115448611, "grad_norm": 0.83984375, "learning_rate": 0.0009784441587338429, "loss": 2.1293, "step": 4076 }, { "epoch": 0.5218226033533854, "grad_norm": 0.7421875, "learning_rate": 0.0009780296894062667, "loss": 1.3798, "step": 4077 }, { "epoch": 0.5219505951619097, "grad_norm": 0.6953125, "learning_rate": 0.0009776152238546445, "loss": 1.4126, "step": 4078 }, { "epoch": 0.5220785869704339, "grad_norm": 0.59765625, "learning_rate": 0.0009772007621502094, "loss": 1.1239, "step": 4079 }, { "epoch": 0.5222065787789582, "grad_norm": 0.734375, "learning_rate": 0.0009767863043641939, "loss": 1.9069, "step": 4080 }, { "epoch": 0.5223345705874824, "grad_norm": 0.66015625, "learning_rate": 0.000976371850567828, "loss": 1.4066, "step": 4081 }, { "epoch": 0.5224625623960066, "grad_norm": 0.60546875, "learning_rate": 0.0009759574008323431, "loss": 1.3571, "step": 4082 }, { "epoch": 0.5225905542045309, "grad_norm": 0.6328125, "learning_rate": 0.000975542955228969, "loss": 1.6479, "step": 4083 }, { "epoch": 0.5227185460130551, "grad_norm": 0.58203125, "learning_rate": 0.0009751285138289348, "loss": 1.0956, "step": 4084 }, { "epoch": 0.5228465378215794, "grad_norm": 0.75390625, "learning_rate": 0.000974714076703469, "loss": 1.4513, "step": 4085 }, { "epoch": 0.5229745296301037, "grad_norm": 0.52734375, "learning_rate": 0.0009742996439237995, "loss": 1.028, "step": 4086 }, { "epoch": 0.5231025214386279, "grad_norm": 0.765625, "learning_rate": 0.000973885215561153, "loss": 1.6738, "step": 4087 }, { "epoch": 0.5232305132471522, "grad_norm": 0.6875, "learning_rate": 0.0009734707916867559, "loss": 1.2342, "step": 4088 }, { "epoch": 0.5233585050556764, "grad_norm": 0.68359375, "learning_rate": 0.0009730563723718339, "loss": 1.4473, "step": 4089 }, { "epoch": 0.5234864968642007, "grad_norm": 0.69921875, "learning_rate": 0.0009726419576876112, "loss": 1.8384, "step": 4090 }, { "epoch": 0.523614488672725, "grad_norm": 0.59375, "learning_rate": 0.0009722275477053121, "loss": 1.0617, "step": 4091 }, { "epoch": 0.5237424804812492, "grad_norm": 0.59765625, "learning_rate": 0.0009718131424961594, "loss": 1.205, "step": 4092 }, { "epoch": 0.5238704722897735, "grad_norm": 0.8125, "learning_rate": 0.0009713987421313758, "loss": 1.7948, "step": 4093 }, { "epoch": 0.5239984640982978, "grad_norm": 0.6796875, "learning_rate": 0.0009709843466821821, "loss": 1.6287, "step": 4094 }, { "epoch": 0.5241264559068219, "grad_norm": 0.84765625, "learning_rate": 0.0009705699562197993, "loss": 1.7061, "step": 4095 }, { "epoch": 0.5242544477153462, "grad_norm": 0.95703125, "learning_rate": 0.0009701555708154469, "loss": 1.9281, "step": 4096 }, { "epoch": 0.5243824395238704, "grad_norm": 1.03125, "learning_rate": 0.0009697411905403443, "loss": 2.1192, "step": 4097 }, { "epoch": 0.5245104313323947, "grad_norm": 0.8046875, "learning_rate": 0.0009693268154657088, "loss": 1.51, "step": 4098 }, { "epoch": 0.524638423140919, "grad_norm": 0.69140625, "learning_rate": 0.0009689124456627576, "loss": 1.4854, "step": 4099 }, { "epoch": 0.5247664149494432, "grad_norm": 0.77734375, "learning_rate": 0.0009684980812027068, "loss": 2.0303, "step": 4100 }, { "epoch": 0.5248944067579675, "grad_norm": 0.859375, "learning_rate": 0.0009680837221567721, "loss": 1.8658, "step": 4101 }, { "epoch": 0.5250223985664918, "grad_norm": 0.73828125, "learning_rate": 0.0009676693685961679, "loss": 0.8916, "step": 4102 }, { "epoch": 0.525150390375016, "grad_norm": 0.6484375, "learning_rate": 0.000967255020592107, "loss": 1.5684, "step": 4103 }, { "epoch": 0.5252783821835403, "grad_norm": 0.5, "learning_rate": 0.000966840678215802, "loss": 0.808, "step": 4104 }, { "epoch": 0.5254063739920645, "grad_norm": 0.5859375, "learning_rate": 0.0009664263415384644, "loss": 1.0635, "step": 4105 }, { "epoch": 0.5255343658005888, "grad_norm": 0.6796875, "learning_rate": 0.000966012010631305, "loss": 1.494, "step": 4106 }, { "epoch": 0.5256623576091131, "grad_norm": 0.546875, "learning_rate": 0.0009655976855655328, "loss": 1.0793, "step": 4107 }, { "epoch": 0.5257903494176372, "grad_norm": 0.64453125, "learning_rate": 0.0009651833664123566, "loss": 1.4969, "step": 4108 }, { "epoch": 0.5259183412261615, "grad_norm": 0.5703125, "learning_rate": 0.0009647690532429837, "loss": 0.8243, "step": 4109 }, { "epoch": 0.5260463330346857, "grad_norm": 0.64453125, "learning_rate": 0.0009643547461286206, "loss": 1.2972, "step": 4110 }, { "epoch": 0.52617432484321, "grad_norm": 0.6015625, "learning_rate": 0.0009639404451404728, "loss": 1.1914, "step": 4111 }, { "epoch": 0.5263023166517343, "grad_norm": 0.73828125, "learning_rate": 0.0009635261503497445, "loss": 1.3283, "step": 4112 }, { "epoch": 0.5264303084602585, "grad_norm": 0.70703125, "learning_rate": 0.0009631118618276391, "loss": 1.7427, "step": 4113 }, { "epoch": 0.5265583002687828, "grad_norm": 0.78125, "learning_rate": 0.0009626975796453586, "loss": 1.7884, "step": 4114 }, { "epoch": 0.5266862920773071, "grad_norm": 0.8359375, "learning_rate": 0.0009622833038741048, "loss": 2.08, "step": 4115 }, { "epoch": 0.5268142838858313, "grad_norm": 0.7734375, "learning_rate": 0.0009618690345850764, "loss": 1.613, "step": 4116 }, { "epoch": 0.5269422756943556, "grad_norm": 0.66015625, "learning_rate": 0.0009614547718494735, "loss": 1.2432, "step": 4117 }, { "epoch": 0.5270702675028798, "grad_norm": 0.6953125, "learning_rate": 0.0009610405157384933, "loss": 1.4507, "step": 4118 }, { "epoch": 0.5271982593114041, "grad_norm": 0.62109375, "learning_rate": 0.0009606262663233325, "loss": 1.0298, "step": 4119 }, { "epoch": 0.5273262511199284, "grad_norm": 0.62890625, "learning_rate": 0.0009602120236751872, "loss": 1.2197, "step": 4120 }, { "epoch": 0.5274542429284526, "grad_norm": 0.66796875, "learning_rate": 0.0009597977878652508, "loss": 1.0416, "step": 4121 }, { "epoch": 0.5275822347369769, "grad_norm": 0.6640625, "learning_rate": 0.0009593835589647167, "loss": 1.5423, "step": 4122 }, { "epoch": 0.527710226545501, "grad_norm": 0.84765625, "learning_rate": 0.0009589693370447768, "loss": 1.0568, "step": 4123 }, { "epoch": 0.5278382183540253, "grad_norm": 0.9375, "learning_rate": 0.0009585551221766228, "loss": 1.2903, "step": 4124 }, { "epoch": 0.5279662101625496, "grad_norm": 0.84375, "learning_rate": 0.000958140914431443, "loss": 1.519, "step": 4125 }, { "epoch": 0.5280942019710738, "grad_norm": 0.578125, "learning_rate": 0.0009577267138804262, "loss": 1.2483, "step": 4126 }, { "epoch": 0.5282221937795981, "grad_norm": 0.640625, "learning_rate": 0.0009573125205947593, "loss": 1.0377, "step": 4127 }, { "epoch": 0.5283501855881224, "grad_norm": 0.73046875, "learning_rate": 0.0009568983346456288, "loss": 1.6639, "step": 4128 }, { "epoch": 0.5284781773966466, "grad_norm": 0.54296875, "learning_rate": 0.0009564841561042186, "loss": 1.2322, "step": 4129 }, { "epoch": 0.5286061692051709, "grad_norm": 0.69921875, "learning_rate": 0.0009560699850417121, "loss": 1.4687, "step": 4130 }, { "epoch": 0.5287341610136951, "grad_norm": 0.62109375, "learning_rate": 0.0009556558215292915, "loss": 1.1696, "step": 4131 }, { "epoch": 0.5288621528222194, "grad_norm": 0.47265625, "learning_rate": 0.0009552416656381373, "loss": 0.6933, "step": 4132 }, { "epoch": 0.5289901446307437, "grad_norm": 0.59765625, "learning_rate": 0.0009548275174394294, "loss": 1.268, "step": 4133 }, { "epoch": 0.5291181364392679, "grad_norm": 0.73828125, "learning_rate": 0.0009544133770043453, "loss": 1.4249, "step": 4134 }, { "epoch": 0.5292461282477922, "grad_norm": 0.65234375, "learning_rate": 0.000953999244404062, "loss": 1.2348, "step": 4135 }, { "epoch": 0.5293741200563163, "grad_norm": 0.6328125, "learning_rate": 0.0009535851197097548, "loss": 1.3731, "step": 4136 }, { "epoch": 0.5295021118648406, "grad_norm": 0.7265625, "learning_rate": 0.0009531710029925984, "loss": 1.7501, "step": 4137 }, { "epoch": 0.5296301036733649, "grad_norm": 0.77734375, "learning_rate": 0.0009527568943237645, "loss": 1.2982, "step": 4138 }, { "epoch": 0.5297580954818891, "grad_norm": 0.546875, "learning_rate": 0.0009523427937744245, "loss": 1.1068, "step": 4139 }, { "epoch": 0.5298860872904134, "grad_norm": 0.8359375, "learning_rate": 0.0009519287014157488, "loss": 1.8894, "step": 4140 }, { "epoch": 0.5300140790989377, "grad_norm": 0.6328125, "learning_rate": 0.0009515146173189057, "loss": 1.3177, "step": 4141 }, { "epoch": 0.5301420709074619, "grad_norm": 0.625, "learning_rate": 0.0009511005415550629, "loss": 1.4394, "step": 4142 }, { "epoch": 0.5302700627159862, "grad_norm": 0.640625, "learning_rate": 0.0009506864741953848, "loss": 1.2318, "step": 4143 }, { "epoch": 0.5303980545245104, "grad_norm": 0.65234375, "learning_rate": 0.0009502724153110364, "loss": 1.5223, "step": 4144 }, { "epoch": 0.5305260463330347, "grad_norm": 0.59375, "learning_rate": 0.0009498583649731801, "loss": 1.4218, "step": 4145 }, { "epoch": 0.530654038141559, "grad_norm": 0.58984375, "learning_rate": 0.0009494443232529776, "loss": 0.9453, "step": 4146 }, { "epoch": 0.5307820299500832, "grad_norm": 0.6484375, "learning_rate": 0.0009490302902215882, "loss": 1.3953, "step": 4147 }, { "epoch": 0.5309100217586075, "grad_norm": 0.71875, "learning_rate": 0.0009486162659501703, "loss": 1.9138, "step": 4148 }, { "epoch": 0.5310380135671317, "grad_norm": 0.609375, "learning_rate": 0.0009482022505098808, "loss": 1.2434, "step": 4149 }, { "epoch": 0.531166005375656, "grad_norm": 0.76171875, "learning_rate": 0.000947788243971875, "loss": 1.9496, "step": 4150 }, { "epoch": 0.5312939971841802, "grad_norm": 0.6875, "learning_rate": 0.0009473742464073068, "loss": 1.8374, "step": 4151 }, { "epoch": 0.5314219889927044, "grad_norm": 0.75, "learning_rate": 0.000946960257887328, "loss": 1.9386, "step": 4152 }, { "epoch": 0.5315499808012287, "grad_norm": 0.859375, "learning_rate": 0.0009465462784830895, "loss": 1.1859, "step": 4153 }, { "epoch": 0.531677972609753, "grad_norm": 0.69140625, "learning_rate": 0.0009461323082657401, "loss": 1.4338, "step": 4154 }, { "epoch": 0.5318059644182772, "grad_norm": 0.734375, "learning_rate": 0.0009457183473064279, "loss": 1.4393, "step": 4155 }, { "epoch": 0.5319339562268015, "grad_norm": 0.74609375, "learning_rate": 0.0009453043956762983, "loss": 1.6619, "step": 4156 }, { "epoch": 0.5320619480353257, "grad_norm": 0.482421875, "learning_rate": 0.0009448904534464956, "loss": 0.7814, "step": 4157 }, { "epoch": 0.53218993984385, "grad_norm": 0.84765625, "learning_rate": 0.0009444765206881626, "loss": 1.6256, "step": 4158 }, { "epoch": 0.5323179316523743, "grad_norm": 0.4921875, "learning_rate": 0.0009440625974724408, "loss": 0.7259, "step": 4159 }, { "epoch": 0.5324459234608985, "grad_norm": 0.69921875, "learning_rate": 0.0009436486838704688, "loss": 2.1991, "step": 4160 }, { "epoch": 0.5325739152694228, "grad_norm": 0.58984375, "learning_rate": 0.0009432347799533845, "loss": 1.2727, "step": 4161 }, { "epoch": 0.532701907077947, "grad_norm": 0.671875, "learning_rate": 0.0009428208857923245, "loss": 1.1378, "step": 4162 }, { "epoch": 0.5328298988864713, "grad_norm": 0.60546875, "learning_rate": 0.000942407001458423, "loss": 0.9146, "step": 4163 }, { "epoch": 0.5329578906949956, "grad_norm": 0.69140625, "learning_rate": 0.0009419931270228133, "loss": 1.24, "step": 4164 }, { "epoch": 0.5330858825035197, "grad_norm": 0.5546875, "learning_rate": 0.0009415792625566253, "loss": 1.0764, "step": 4165 }, { "epoch": 0.533213874312044, "grad_norm": 0.8046875, "learning_rate": 0.0009411654081309888, "loss": 2.4241, "step": 4166 }, { "epoch": 0.5333418661205683, "grad_norm": 0.79296875, "learning_rate": 0.0009407515638170317, "loss": 2.3665, "step": 4167 }, { "epoch": 0.5334698579290925, "grad_norm": 0.60546875, "learning_rate": 0.0009403377296858797, "loss": 1.3332, "step": 4168 }, { "epoch": 0.5335978497376168, "grad_norm": 0.69140625, "learning_rate": 0.0009399239058086568, "loss": 1.4247, "step": 4169 }, { "epoch": 0.533725841546141, "grad_norm": 0.5625, "learning_rate": 0.0009395100922564852, "loss": 1.2996, "step": 4170 }, { "epoch": 0.5338538333546653, "grad_norm": 0.671875, "learning_rate": 0.0009390962891004858, "loss": 1.8806, "step": 4171 }, { "epoch": 0.5339818251631896, "grad_norm": 0.7109375, "learning_rate": 0.0009386824964117773, "loss": 1.5017, "step": 4172 }, { "epoch": 0.5341098169717138, "grad_norm": 0.640625, "learning_rate": 0.0009382687142614767, "loss": 1.2444, "step": 4173 }, { "epoch": 0.5342378087802381, "grad_norm": 0.640625, "learning_rate": 0.0009378549427206991, "loss": 1.5432, "step": 4174 }, { "epoch": 0.5343658005887624, "grad_norm": 0.7578125, "learning_rate": 0.0009374411818605579, "loss": 1.3327, "step": 4175 }, { "epoch": 0.5344937923972866, "grad_norm": 0.65625, "learning_rate": 0.0009370274317521644, "loss": 1.4451, "step": 4176 }, { "epoch": 0.5346217842058109, "grad_norm": 0.59375, "learning_rate": 0.0009366136924666288, "loss": 1.1007, "step": 4177 }, { "epoch": 0.534749776014335, "grad_norm": 0.83984375, "learning_rate": 0.0009361999640750585, "loss": 1.5631, "step": 4178 }, { "epoch": 0.5348777678228593, "grad_norm": 0.64453125, "learning_rate": 0.0009357862466485594, "loss": 1.5469, "step": 4179 }, { "epoch": 0.5350057596313836, "grad_norm": 0.62109375, "learning_rate": 0.0009353725402582356, "loss": 1.623, "step": 4180 }, { "epoch": 0.5351337514399078, "grad_norm": 0.90234375, "learning_rate": 0.0009349588449751898, "loss": 1.4546, "step": 4181 }, { "epoch": 0.5352617432484321, "grad_norm": 0.61328125, "learning_rate": 0.0009345451608705213, "loss": 1.1013, "step": 4182 }, { "epoch": 0.5353897350569563, "grad_norm": 0.64453125, "learning_rate": 0.000934131488015329, "loss": 1.2902, "step": 4183 }, { "epoch": 0.5355177268654806, "grad_norm": 0.6484375, "learning_rate": 0.0009337178264807088, "loss": 1.4758, "step": 4184 }, { "epoch": 0.5356457186740049, "grad_norm": 0.74609375, "learning_rate": 0.0009333041763377558, "loss": 1.4345, "step": 4185 }, { "epoch": 0.5357737104825291, "grad_norm": 0.84375, "learning_rate": 0.0009328905376575628, "loss": 1.9367, "step": 4186 }, { "epoch": 0.5359017022910534, "grad_norm": 0.7890625, "learning_rate": 0.0009324769105112192, "loss": 1.8046, "step": 4187 }, { "epoch": 0.5360296940995777, "grad_norm": 0.58984375, "learning_rate": 0.000932063294969814, "loss": 1.4072, "step": 4188 }, { "epoch": 0.5361576859081019, "grad_norm": 0.69140625, "learning_rate": 0.000931649691104434, "loss": 1.194, "step": 4189 }, { "epoch": 0.5362856777166262, "grad_norm": 0.5703125, "learning_rate": 0.0009312360989861637, "loss": 0.9666, "step": 4190 }, { "epoch": 0.5364136695251503, "grad_norm": 0.65625, "learning_rate": 0.0009308225186860852, "loss": 1.0125, "step": 4191 }, { "epoch": 0.5365416613336746, "grad_norm": 0.69140625, "learning_rate": 0.0009304089502752791, "loss": 1.2579, "step": 4192 }, { "epoch": 0.5366696531421989, "grad_norm": 0.65234375, "learning_rate": 0.0009299953938248242, "loss": 1.6354, "step": 4193 }, { "epoch": 0.5367976449507231, "grad_norm": 0.73046875, "learning_rate": 0.0009295818494057967, "loss": 1.4725, "step": 4194 }, { "epoch": 0.5369256367592474, "grad_norm": 0.63671875, "learning_rate": 0.000929168317089271, "loss": 1.2696, "step": 4195 }, { "epoch": 0.5370536285677716, "grad_norm": 0.640625, "learning_rate": 0.0009287547969463191, "loss": 1.3346, "step": 4196 }, { "epoch": 0.5371816203762959, "grad_norm": 0.671875, "learning_rate": 0.0009283412890480113, "loss": 1.5821, "step": 4197 }, { "epoch": 0.5373096121848202, "grad_norm": 0.65625, "learning_rate": 0.0009279277934654156, "loss": 1.2, "step": 4198 }, { "epoch": 0.5374376039933444, "grad_norm": 0.75, "learning_rate": 0.0009275143102695984, "loss": 1.4555, "step": 4199 }, { "epoch": 0.5375655958018687, "grad_norm": 0.69921875, "learning_rate": 0.0009271008395316223, "loss": 1.1416, "step": 4200 }, { "epoch": 0.537693587610393, "grad_norm": 1.5078125, "learning_rate": 0.0009266873813225499, "loss": 1.0973, "step": 4201 }, { "epoch": 0.5378215794189172, "grad_norm": 0.68359375, "learning_rate": 0.0009262739357134406, "loss": 1.1359, "step": 4202 }, { "epoch": 0.5379495712274415, "grad_norm": 0.66796875, "learning_rate": 0.0009258605027753515, "loss": 1.659, "step": 4203 }, { "epoch": 0.5380775630359657, "grad_norm": 0.6484375, "learning_rate": 0.0009254470825793384, "loss": 1.0914, "step": 4204 }, { "epoch": 0.53820555484449, "grad_norm": 0.58984375, "learning_rate": 0.0009250336751964535, "loss": 1.2921, "step": 4205 }, { "epoch": 0.5383335466530142, "grad_norm": 0.60546875, "learning_rate": 0.0009246202806977476, "loss": 1.1683, "step": 4206 }, { "epoch": 0.5384615384615384, "grad_norm": 0.83203125, "learning_rate": 0.0009242068991542694, "loss": 1.9805, "step": 4207 }, { "epoch": 0.5385895302700627, "grad_norm": 0.69140625, "learning_rate": 0.0009237935306370659, "loss": 1.3031, "step": 4208 }, { "epoch": 0.5387175220785869, "grad_norm": 0.59765625, "learning_rate": 0.0009233801752171801, "loss": 1.2648, "step": 4209 }, { "epoch": 0.5388455138871112, "grad_norm": 0.72265625, "learning_rate": 0.0009229668329656543, "loss": 1.4744, "step": 4210 }, { "epoch": 0.5389735056956355, "grad_norm": 0.6328125, "learning_rate": 0.0009225535039535282, "loss": 1.1753, "step": 4211 }, { "epoch": 0.5391014975041597, "grad_norm": 0.66015625, "learning_rate": 0.0009221401882518393, "loss": 1.1226, "step": 4212 }, { "epoch": 0.539229489312684, "grad_norm": 0.70703125, "learning_rate": 0.0009217268859316219, "loss": 1.5472, "step": 4213 }, { "epoch": 0.5393574811212083, "grad_norm": 0.73046875, "learning_rate": 0.0009213135970639092, "loss": 1.181, "step": 4214 }, { "epoch": 0.5394854729297325, "grad_norm": 0.62109375, "learning_rate": 0.0009209003217197315, "loss": 1.6803, "step": 4215 }, { "epoch": 0.5396134647382568, "grad_norm": 0.72265625, "learning_rate": 0.000920487059970117, "loss": 1.4109, "step": 4216 }, { "epoch": 0.539741456546781, "grad_norm": 0.59375, "learning_rate": 0.0009200738118860913, "loss": 1.1871, "step": 4217 }, { "epoch": 0.5398694483553053, "grad_norm": 0.6171875, "learning_rate": 0.0009196605775386777, "loss": 1.0023, "step": 4218 }, { "epoch": 0.5399974401638296, "grad_norm": 0.71484375, "learning_rate": 0.0009192473569988973, "loss": 1.3049, "step": 4219 }, { "epoch": 0.5401254319723537, "grad_norm": 0.6640625, "learning_rate": 0.0009188341503377689, "loss": 1.3564, "step": 4220 }, { "epoch": 0.540253423780878, "grad_norm": 0.73046875, "learning_rate": 0.0009184209576263092, "loss": 1.1808, "step": 4221 }, { "epoch": 0.5403814155894022, "grad_norm": 0.76953125, "learning_rate": 0.0009180077789355306, "loss": 1.9248, "step": 4222 }, { "epoch": 0.5405094073979265, "grad_norm": 0.70703125, "learning_rate": 0.000917594614336446, "loss": 1.7733, "step": 4223 }, { "epoch": 0.5406373992064508, "grad_norm": 0.69140625, "learning_rate": 0.0009171814639000637, "loss": 1.7673, "step": 4224 }, { "epoch": 0.540765391014975, "grad_norm": 0.6484375, "learning_rate": 0.0009167683276973906, "loss": 1.4037, "step": 4225 }, { "epoch": 0.5408933828234993, "grad_norm": 0.54296875, "learning_rate": 0.0009163552057994313, "loss": 0.7341, "step": 4226 }, { "epoch": 0.5410213746320236, "grad_norm": 0.6015625, "learning_rate": 0.0009159420982771864, "loss": 1.0691, "step": 4227 }, { "epoch": 0.5411493664405478, "grad_norm": 0.5859375, "learning_rate": 0.0009155290052016559, "loss": 1.263, "step": 4228 }, { "epoch": 0.5412773582490721, "grad_norm": 1.0390625, "learning_rate": 0.0009151159266438362, "loss": 2.0575, "step": 4229 }, { "epoch": 0.5414053500575963, "grad_norm": 0.6484375, "learning_rate": 0.0009147028626747222, "loss": 1.1804, "step": 4230 }, { "epoch": 0.5415333418661206, "grad_norm": 0.765625, "learning_rate": 0.0009142898133653048, "loss": 2.2662, "step": 4231 }, { "epoch": 0.5416613336746449, "grad_norm": 0.5703125, "learning_rate": 0.0009138767787865737, "loss": 1.3173, "step": 4232 }, { "epoch": 0.541789325483169, "grad_norm": 0.55859375, "learning_rate": 0.0009134637590095154, "loss": 1.2609, "step": 4233 }, { "epoch": 0.5419173172916933, "grad_norm": 0.7265625, "learning_rate": 0.0009130507541051142, "loss": 1.0688, "step": 4234 }, { "epoch": 0.5420453091002175, "grad_norm": 0.55078125, "learning_rate": 0.0009126377641443517, "loss": 1.471, "step": 4235 }, { "epoch": 0.5421733009087418, "grad_norm": 0.67578125, "learning_rate": 0.0009122247891982069, "loss": 1.4828, "step": 4236 }, { "epoch": 0.5423012927172661, "grad_norm": 0.65625, "learning_rate": 0.0009118118293376561, "loss": 1.2028, "step": 4237 }, { "epoch": 0.5424292845257903, "grad_norm": 0.640625, "learning_rate": 0.0009113988846336732, "loss": 1.2975, "step": 4238 }, { "epoch": 0.5425572763343146, "grad_norm": 0.6953125, "learning_rate": 0.0009109859551572297, "loss": 1.2243, "step": 4239 }, { "epoch": 0.5426852681428389, "grad_norm": 0.67578125, "learning_rate": 0.000910573040979294, "loss": 1.3711, "step": 4240 }, { "epoch": 0.5428132599513631, "grad_norm": 0.6875, "learning_rate": 0.0009101601421708319, "loss": 1.1919, "step": 4241 }, { "epoch": 0.5429412517598874, "grad_norm": 0.6015625, "learning_rate": 0.0009097472588028071, "loss": 1.1624, "step": 4242 }, { "epoch": 0.5430692435684116, "grad_norm": 0.59375, "learning_rate": 0.0009093343909461806, "loss": 1.2973, "step": 4243 }, { "epoch": 0.5431972353769359, "grad_norm": 0.62890625, "learning_rate": 0.0009089215386719097, "loss": 1.1945, "step": 4244 }, { "epoch": 0.5433252271854602, "grad_norm": 1.0234375, "learning_rate": 0.00090850870205095, "loss": 1.6441, "step": 4245 }, { "epoch": 0.5434532189939844, "grad_norm": 0.90625, "learning_rate": 0.0009080958811542542, "loss": 1.5184, "step": 4246 }, { "epoch": 0.5435812108025087, "grad_norm": 0.64453125, "learning_rate": 0.0009076830760527725, "loss": 1.4538, "step": 4247 }, { "epoch": 0.543709202611033, "grad_norm": 0.58984375, "learning_rate": 0.0009072702868174524, "loss": 1.172, "step": 4248 }, { "epoch": 0.5438371944195571, "grad_norm": 0.63671875, "learning_rate": 0.0009068575135192377, "loss": 1.2365, "step": 4249 }, { "epoch": 0.5439651862280814, "grad_norm": 0.70703125, "learning_rate": 0.0009064447562290704, "loss": 1.3475, "step": 4250 }, { "epoch": 0.5440931780366056, "grad_norm": 0.62890625, "learning_rate": 0.0009060320150178897, "loss": 1.0337, "step": 4251 }, { "epoch": 0.5442211698451299, "grad_norm": 0.6171875, "learning_rate": 0.000905619289956632, "loss": 1.1218, "step": 4252 }, { "epoch": 0.5443491616536542, "grad_norm": 0.77734375, "learning_rate": 0.0009052065811162304, "loss": 1.9926, "step": 4253 }, { "epoch": 0.5444771534621784, "grad_norm": 0.59765625, "learning_rate": 0.000904793888567616, "loss": 1.1876, "step": 4254 }, { "epoch": 0.5446051452707027, "grad_norm": 0.625, "learning_rate": 0.0009043812123817164, "loss": 1.3979, "step": 4255 }, { "epoch": 0.5447331370792269, "grad_norm": 0.66796875, "learning_rate": 0.0009039685526294568, "loss": 1.4995, "step": 4256 }, { "epoch": 0.5448611288877512, "grad_norm": 0.7421875, "learning_rate": 0.0009035559093817598, "loss": 1.4829, "step": 4257 }, { "epoch": 0.5449891206962755, "grad_norm": 0.6640625, "learning_rate": 0.0009031432827095444, "loss": 1.4966, "step": 4258 }, { "epoch": 0.5451171125047997, "grad_norm": 0.71484375, "learning_rate": 0.0009027306726837272, "loss": 1.926, "step": 4259 }, { "epoch": 0.545245104313324, "grad_norm": 0.66796875, "learning_rate": 0.0009023180793752223, "loss": 1.6835, "step": 4260 }, { "epoch": 0.5453730961218483, "grad_norm": 0.69140625, "learning_rate": 0.0009019055028549404, "loss": 1.1597, "step": 4261 }, { "epoch": 0.5455010879303724, "grad_norm": 0.67578125, "learning_rate": 0.0009014929431937891, "loss": 1.3332, "step": 4262 }, { "epoch": 0.5456290797388967, "grad_norm": 0.79296875, "learning_rate": 0.0009010804004626738, "loss": 1.682, "step": 4263 }, { "epoch": 0.5457570715474209, "grad_norm": 0.76171875, "learning_rate": 0.0009006678747324967, "loss": 1.3329, "step": 4264 }, { "epoch": 0.5458850633559452, "grad_norm": 0.6015625, "learning_rate": 0.0009002553660741574, "loss": 1.1675, "step": 4265 }, { "epoch": 0.5460130551644695, "grad_norm": 0.5703125, "learning_rate": 0.0008998428745585513, "loss": 1.0613, "step": 4266 }, { "epoch": 0.5461410469729937, "grad_norm": 0.8203125, "learning_rate": 0.0008994304002565722, "loss": 2.1313, "step": 4267 }, { "epoch": 0.546269038781518, "grad_norm": 0.6640625, "learning_rate": 0.0008990179432391103, "loss": 1.6178, "step": 4268 }, { "epoch": 0.5463970305900422, "grad_norm": 0.6484375, "learning_rate": 0.0008986055035770536, "loss": 1.51, "step": 4269 }, { "epoch": 0.5465250223985665, "grad_norm": 0.72265625, "learning_rate": 0.0008981930813412865, "loss": 1.6904, "step": 4270 }, { "epoch": 0.5466530142070908, "grad_norm": 0.6640625, "learning_rate": 0.0008977806766026899, "loss": 1.5791, "step": 4271 }, { "epoch": 0.546781006015615, "grad_norm": 0.82421875, "learning_rate": 0.0008973682894321425, "loss": 2.3671, "step": 4272 }, { "epoch": 0.5469089978241393, "grad_norm": 0.65625, "learning_rate": 0.0008969559199005195, "loss": 1.3767, "step": 4273 }, { "epoch": 0.5470369896326636, "grad_norm": 0.546875, "learning_rate": 0.000896543568078694, "loss": 1.0, "step": 4274 }, { "epoch": 0.5471649814411877, "grad_norm": 0.68359375, "learning_rate": 0.0008961312340375345, "loss": 1.4543, "step": 4275 }, { "epoch": 0.547292973249712, "grad_norm": 0.81640625, "learning_rate": 0.0008957189178479077, "loss": 2.1927, "step": 4276 }, { "epoch": 0.5474209650582362, "grad_norm": 0.578125, "learning_rate": 0.0008953066195806769, "loss": 1.4581, "step": 4277 }, { "epoch": 0.5475489568667605, "grad_norm": 0.58984375, "learning_rate": 0.000894894339306702, "loss": 1.5452, "step": 4278 }, { "epoch": 0.5476769486752848, "grad_norm": 0.515625, "learning_rate": 0.0008944820770968406, "loss": 1.1578, "step": 4279 }, { "epoch": 0.547804940483809, "grad_norm": 0.6640625, "learning_rate": 0.0008940698330219459, "loss": 1.5027, "step": 4280 }, { "epoch": 0.5479329322923333, "grad_norm": 0.671875, "learning_rate": 0.0008936576071528692, "loss": 1.6731, "step": 4281 }, { "epoch": 0.5480609241008575, "grad_norm": 0.5625, "learning_rate": 0.000893245399560458, "loss": 1.1323, "step": 4282 }, { "epoch": 0.5481889159093818, "grad_norm": 0.60546875, "learning_rate": 0.0008928332103155572, "loss": 1.0187, "step": 4283 }, { "epoch": 0.5483169077179061, "grad_norm": 0.59375, "learning_rate": 0.0008924210394890078, "loss": 1.3356, "step": 4284 }, { "epoch": 0.5484448995264303, "grad_norm": 0.7578125, "learning_rate": 0.0008920088871516481, "loss": 1.6728, "step": 4285 }, { "epoch": 0.5485728913349546, "grad_norm": 0.478515625, "learning_rate": 0.0008915967533743134, "loss": 1.1037, "step": 4286 }, { "epoch": 0.5487008831434789, "grad_norm": 0.703125, "learning_rate": 0.0008911846382278352, "loss": 1.4628, "step": 4287 }, { "epoch": 0.548828874952003, "grad_norm": 0.61328125, "learning_rate": 0.0008907725417830431, "loss": 1.3463, "step": 4288 }, { "epoch": 0.5489568667605274, "grad_norm": 0.59765625, "learning_rate": 0.0008903604641107614, "loss": 0.884, "step": 4289 }, { "epoch": 0.5490848585690515, "grad_norm": 0.6015625, "learning_rate": 0.0008899484052818126, "loss": 0.9491, "step": 4290 }, { "epoch": 0.5492128503775758, "grad_norm": 0.7734375, "learning_rate": 0.0008895363653670161, "loss": 2.0905, "step": 4291 }, { "epoch": 0.5493408421861001, "grad_norm": 0.6640625, "learning_rate": 0.0008891243444371879, "loss": 1.7515, "step": 4292 }, { "epoch": 0.5494688339946243, "grad_norm": 0.62890625, "learning_rate": 0.0008887123425631396, "loss": 1.1553, "step": 4293 }, { "epoch": 0.5495968258031486, "grad_norm": 0.71484375, "learning_rate": 0.0008883003598156809, "loss": 0.97, "step": 4294 }, { "epoch": 0.5497248176116728, "grad_norm": 0.74609375, "learning_rate": 0.0008878883962656175, "loss": 2.0329, "step": 4295 }, { "epoch": 0.5498528094201971, "grad_norm": 0.640625, "learning_rate": 0.0008874764519837526, "loss": 1.518, "step": 4296 }, { "epoch": 0.5499808012287214, "grad_norm": 0.5078125, "learning_rate": 0.0008870645270408848, "loss": 1.2498, "step": 4297 }, { "epoch": 0.5501087930372456, "grad_norm": 0.59765625, "learning_rate": 0.0008866526215078103, "loss": 1.0313, "step": 4298 }, { "epoch": 0.5502367848457699, "grad_norm": 0.60546875, "learning_rate": 0.000886240735455322, "loss": 1.4513, "step": 4299 }, { "epoch": 0.5503647766542942, "grad_norm": 0.7734375, "learning_rate": 0.0008858288689542089, "loss": 1.6362, "step": 4300 }, { "epoch": 0.5504927684628184, "grad_norm": 0.67578125, "learning_rate": 0.0008854170220752573, "loss": 1.2716, "step": 4301 }, { "epoch": 0.5506207602713427, "grad_norm": 0.7578125, "learning_rate": 0.0008850051948892494, "loss": 1.9504, "step": 4302 }, { "epoch": 0.5507487520798668, "grad_norm": 0.470703125, "learning_rate": 0.0008845933874669645, "loss": 0.8233, "step": 4303 }, { "epoch": 0.5508767438883911, "grad_norm": 0.6796875, "learning_rate": 0.0008841815998791784, "loss": 1.3901, "step": 4304 }, { "epoch": 0.5510047356969154, "grad_norm": 0.79296875, "learning_rate": 0.0008837698321966639, "loss": 1.0866, "step": 4305 }, { "epoch": 0.5511327275054396, "grad_norm": 0.546875, "learning_rate": 0.0008833580844901887, "loss": 0.9383, "step": 4306 }, { "epoch": 0.5512607193139639, "grad_norm": 0.6796875, "learning_rate": 0.0008829463568305196, "loss": 1.4528, "step": 4307 }, { "epoch": 0.5513887111224881, "grad_norm": 0.78515625, "learning_rate": 0.0008825346492884183, "loss": 1.3723, "step": 4308 }, { "epoch": 0.5515167029310124, "grad_norm": 0.63671875, "learning_rate": 0.0008821229619346432, "loss": 1.2352, "step": 4309 }, { "epoch": 0.5516446947395367, "grad_norm": 0.609375, "learning_rate": 0.0008817112948399502, "loss": 1.2808, "step": 4310 }, { "epoch": 0.5517726865480609, "grad_norm": 0.5546875, "learning_rate": 0.0008812996480750899, "loss": 0.986, "step": 4311 }, { "epoch": 0.5519006783565852, "grad_norm": 0.6328125, "learning_rate": 0.0008808880217108109, "loss": 1.3825, "step": 4312 }, { "epoch": 0.5520286701651095, "grad_norm": 0.5703125, "learning_rate": 0.0008804764158178577, "loss": 1.1918, "step": 4313 }, { "epoch": 0.5521566619736337, "grad_norm": 0.6015625, "learning_rate": 0.0008800648304669725, "loss": 1.1634, "step": 4314 }, { "epoch": 0.552284653782158, "grad_norm": 0.48046875, "learning_rate": 0.0008796532657288915, "loss": 0.7287, "step": 4315 }, { "epoch": 0.5524126455906821, "grad_norm": 0.56640625, "learning_rate": 0.0008792417216743494, "loss": 1.1964, "step": 4316 }, { "epoch": 0.5525406373992064, "grad_norm": 0.64453125, "learning_rate": 0.0008788301983740769, "loss": 1.2879, "step": 4317 }, { "epoch": 0.5526686292077307, "grad_norm": 0.87890625, "learning_rate": 0.0008784186958988006, "loss": 1.4235, "step": 4318 }, { "epoch": 0.5527966210162549, "grad_norm": 0.80859375, "learning_rate": 0.0008780072143192442, "loss": 1.7344, "step": 4319 }, { "epoch": 0.5529246128247792, "grad_norm": 0.67578125, "learning_rate": 0.0008775957537061272, "loss": 1.9381, "step": 4320 }, { "epoch": 0.5530526046333035, "grad_norm": 0.546875, "learning_rate": 0.0008771843141301658, "loss": 0.8636, "step": 4321 }, { "epoch": 0.5531805964418277, "grad_norm": 0.74609375, "learning_rate": 0.0008767728956620728, "loss": 1.3933, "step": 4322 }, { "epoch": 0.553308588250352, "grad_norm": 0.671875, "learning_rate": 0.0008763614983725572, "loss": 1.5717, "step": 4323 }, { "epoch": 0.5534365800588762, "grad_norm": 0.61328125, "learning_rate": 0.0008759501223323239, "loss": 1.3145, "step": 4324 }, { "epoch": 0.5535645718674005, "grad_norm": 0.703125, "learning_rate": 0.0008755387676120746, "loss": 1.3863, "step": 4325 }, { "epoch": 0.5536925636759248, "grad_norm": 0.734375, "learning_rate": 0.0008751274342825076, "loss": 1.9962, "step": 4326 }, { "epoch": 0.553820555484449, "grad_norm": 0.5625, "learning_rate": 0.0008747161224143174, "loss": 1.0187, "step": 4327 }, { "epoch": 0.5539485472929733, "grad_norm": 0.68359375, "learning_rate": 0.0008743048320781937, "loss": 1.4983, "step": 4328 }, { "epoch": 0.5540765391014975, "grad_norm": 0.6875, "learning_rate": 0.0008738935633448242, "loss": 1.8387, "step": 4329 }, { "epoch": 0.5542045309100218, "grad_norm": 0.5078125, "learning_rate": 0.0008734823162848918, "loss": 0.7988, "step": 4330 }, { "epoch": 0.554332522718546, "grad_norm": 0.625, "learning_rate": 0.0008730710909690761, "loss": 1.3431, "step": 4331 }, { "epoch": 0.5544605145270702, "grad_norm": 0.68359375, "learning_rate": 0.0008726598874680533, "loss": 1.7849, "step": 4332 }, { "epoch": 0.5545885063355945, "grad_norm": 0.52734375, "learning_rate": 0.0008722487058524943, "loss": 0.6922, "step": 4333 }, { "epoch": 0.5547164981441188, "grad_norm": 0.5546875, "learning_rate": 0.0008718375461930681, "loss": 1.0125, "step": 4334 }, { "epoch": 0.554844489952643, "grad_norm": 0.55859375, "learning_rate": 0.0008714264085604388, "loss": 1.0779, "step": 4335 }, { "epoch": 0.5549724817611673, "grad_norm": 0.66015625, "learning_rate": 0.0008710152930252677, "loss": 1.4712, "step": 4336 }, { "epoch": 0.5551004735696915, "grad_norm": 0.7109375, "learning_rate": 0.000870604199658211, "loss": 1.4511, "step": 4337 }, { "epoch": 0.5552284653782158, "grad_norm": 0.9375, "learning_rate": 0.000870193128529922, "loss": 1.413, "step": 4338 }, { "epoch": 0.5553564571867401, "grad_norm": 0.8359375, "learning_rate": 0.0008697820797110499, "loss": 1.914, "step": 4339 }, { "epoch": 0.5554844489952643, "grad_norm": 0.59765625, "learning_rate": 0.0008693710532722401, "loss": 0.9238, "step": 4340 }, { "epoch": 0.5556124408037886, "grad_norm": 0.68359375, "learning_rate": 0.0008689600492841345, "loss": 1.7752, "step": 4341 }, { "epoch": 0.5557404326123128, "grad_norm": 0.6640625, "learning_rate": 0.0008685490678173704, "loss": 1.708, "step": 4342 }, { "epoch": 0.5558684244208371, "grad_norm": 0.61328125, "learning_rate": 0.0008681381089425815, "loss": 1.1525, "step": 4343 }, { "epoch": 0.5559964162293614, "grad_norm": 0.796875, "learning_rate": 0.0008677271727303981, "loss": 1.8011, "step": 4344 }, { "epoch": 0.5561244080378855, "grad_norm": 0.87890625, "learning_rate": 0.0008673162592514465, "loss": 1.6868, "step": 4345 }, { "epoch": 0.5562523998464098, "grad_norm": 0.578125, "learning_rate": 0.0008669053685763482, "loss": 1.1865, "step": 4346 }, { "epoch": 0.5563803916549341, "grad_norm": 0.66015625, "learning_rate": 0.0008664945007757218, "loss": 1.8071, "step": 4347 }, { "epoch": 0.5565083834634583, "grad_norm": 0.72265625, "learning_rate": 0.0008660836559201815, "loss": 0.9648, "step": 4348 }, { "epoch": 0.5566363752719826, "grad_norm": 0.71875, "learning_rate": 0.0008656728340803382, "loss": 1.1396, "step": 4349 }, { "epoch": 0.5567643670805068, "grad_norm": 0.53515625, "learning_rate": 0.0008652620353267971, "loss": 0.838, "step": 4350 }, { "epoch": 0.5568923588890311, "grad_norm": 0.80078125, "learning_rate": 0.0008648512597301613, "loss": 1.5257, "step": 4351 }, { "epoch": 0.5570203506975554, "grad_norm": 0.71484375, "learning_rate": 0.0008644405073610296, "loss": 1.5273, "step": 4352 }, { "epoch": 0.5571483425060796, "grad_norm": 0.6484375, "learning_rate": 0.0008640297782899959, "loss": 1.331, "step": 4353 }, { "epoch": 0.5572763343146039, "grad_norm": 0.74609375, "learning_rate": 0.0008636190725876514, "loss": 1.6568, "step": 4354 }, { "epoch": 0.5574043261231281, "grad_norm": 0.6875, "learning_rate": 0.0008632083903245816, "loss": 1.4478, "step": 4355 }, { "epoch": 0.5575323179316524, "grad_norm": 0.68359375, "learning_rate": 0.0008627977315713693, "loss": 1.513, "step": 4356 }, { "epoch": 0.5576603097401767, "grad_norm": 0.6484375, "learning_rate": 0.000862387096398593, "loss": 1.4821, "step": 4357 }, { "epoch": 0.5577883015487008, "grad_norm": 0.8046875, "learning_rate": 0.0008619764848768271, "loss": 2.5756, "step": 4358 }, { "epoch": 0.5579162933572251, "grad_norm": 0.77734375, "learning_rate": 0.0008615658970766414, "loss": 1.5676, "step": 4359 }, { "epoch": 0.5580442851657494, "grad_norm": 0.64453125, "learning_rate": 0.0008611553330686024, "loss": 1.0942, "step": 4360 }, { "epoch": 0.5581722769742736, "grad_norm": 0.828125, "learning_rate": 0.0008607447929232722, "loss": 1.7723, "step": 4361 }, { "epoch": 0.5583002687827979, "grad_norm": 0.6484375, "learning_rate": 0.0008603342767112086, "loss": 1.1241, "step": 4362 }, { "epoch": 0.5584282605913221, "grad_norm": 0.70703125, "learning_rate": 0.0008599237845029657, "loss": 1.461, "step": 4363 }, { "epoch": 0.5585562523998464, "grad_norm": 0.66796875, "learning_rate": 0.0008595133163690931, "loss": 1.5682, "step": 4364 }, { "epoch": 0.5586842442083707, "grad_norm": 0.58984375, "learning_rate": 0.0008591028723801364, "loss": 1.2932, "step": 4365 }, { "epoch": 0.5588122360168949, "grad_norm": 0.79296875, "learning_rate": 0.000858692452606637, "loss": 1.8981, "step": 4366 }, { "epoch": 0.5589402278254192, "grad_norm": 0.56640625, "learning_rate": 0.0008582820571191328, "loss": 1.0333, "step": 4367 }, { "epoch": 0.5590682196339434, "grad_norm": 0.56640625, "learning_rate": 0.000857871685988156, "loss": 1.2186, "step": 4368 }, { "epoch": 0.5591962114424677, "grad_norm": 0.75, "learning_rate": 0.000857461339284236, "loss": 1.3539, "step": 4369 }, { "epoch": 0.559324203250992, "grad_norm": 0.6171875, "learning_rate": 0.0008570510170778974, "loss": 1.2119, "step": 4370 }, { "epoch": 0.5594521950595162, "grad_norm": 0.490234375, "learning_rate": 0.000856640719439661, "loss": 0.8532, "step": 4371 }, { "epoch": 0.5595801868680405, "grad_norm": 0.7578125, "learning_rate": 0.0008562304464400432, "loss": 1.4385, "step": 4372 }, { "epoch": 0.5597081786765647, "grad_norm": 1.0546875, "learning_rate": 0.0008558201981495554, "loss": 1.87, "step": 4373 }, { "epoch": 0.5598361704850889, "grad_norm": 0.8515625, "learning_rate": 0.0008554099746387058, "loss": 1.352, "step": 4374 }, { "epoch": 0.5599641622936132, "grad_norm": 0.609375, "learning_rate": 0.0008549997759779981, "loss": 1.0808, "step": 4375 }, { "epoch": 0.5600921541021374, "grad_norm": 0.6484375, "learning_rate": 0.0008545896022379319, "loss": 1.636, "step": 4376 }, { "epoch": 0.5602201459106617, "grad_norm": 0.76171875, "learning_rate": 0.0008541794534890014, "loss": 1.6119, "step": 4377 }, { "epoch": 0.560348137719186, "grad_norm": 0.6953125, "learning_rate": 0.0008537693298016977, "loss": 1.1804, "step": 4378 }, { "epoch": 0.5604761295277102, "grad_norm": 0.5546875, "learning_rate": 0.0008533592312465071, "loss": 1.0555, "step": 4379 }, { "epoch": 0.5606041213362345, "grad_norm": 0.58203125, "learning_rate": 0.0008529491578939122, "loss": 1.2443, "step": 4380 }, { "epoch": 0.5607321131447587, "grad_norm": 0.7109375, "learning_rate": 0.0008525391098143903, "loss": 1.3695, "step": 4381 }, { "epoch": 0.560860104953283, "grad_norm": 0.76171875, "learning_rate": 0.0008521290870784147, "loss": 1.5425, "step": 4382 }, { "epoch": 0.5609880967618073, "grad_norm": 0.62109375, "learning_rate": 0.0008517190897564546, "loss": 1.5795, "step": 4383 }, { "epoch": 0.5611160885703315, "grad_norm": 0.5625, "learning_rate": 0.0008513091179189748, "loss": 1.0565, "step": 4384 }, { "epoch": 0.5612440803788558, "grad_norm": 0.62890625, "learning_rate": 0.0008508991716364359, "loss": 1.1234, "step": 4385 }, { "epoch": 0.5613720721873801, "grad_norm": 0.6953125, "learning_rate": 0.0008504892509792931, "loss": 1.2966, "step": 4386 }, { "epoch": 0.5615000639959042, "grad_norm": 0.65625, "learning_rate": 0.0008500793560179984, "loss": 1.485, "step": 4387 }, { "epoch": 0.5616280558044285, "grad_norm": 0.765625, "learning_rate": 0.0008496694868229987, "loss": 1.4752, "step": 4388 }, { "epoch": 0.5617560476129527, "grad_norm": 0.83203125, "learning_rate": 0.000849259643464737, "loss": 1.3528, "step": 4389 }, { "epoch": 0.561884039421477, "grad_norm": 0.58984375, "learning_rate": 0.0008488498260136512, "loss": 1.287, "step": 4390 }, { "epoch": 0.5620120312300013, "grad_norm": 0.77734375, "learning_rate": 0.0008484400345401751, "loss": 1.7458, "step": 4391 }, { "epoch": 0.5621400230385255, "grad_norm": 0.6484375, "learning_rate": 0.0008480302691147381, "loss": 1.3392, "step": 4392 }, { "epoch": 0.5622680148470498, "grad_norm": 0.6171875, "learning_rate": 0.0008476205298077649, "loss": 1.1461, "step": 4393 }, { "epoch": 0.562396006655574, "grad_norm": 0.65234375, "learning_rate": 0.0008472108166896765, "loss": 1.1251, "step": 4394 }, { "epoch": 0.5625239984640983, "grad_norm": 0.62890625, "learning_rate": 0.0008468011298308879, "loss": 1.1454, "step": 4395 }, { "epoch": 0.5626519902726226, "grad_norm": 0.60546875, "learning_rate": 0.0008463914693018105, "loss": 0.8711, "step": 4396 }, { "epoch": 0.5627799820811468, "grad_norm": 0.77734375, "learning_rate": 0.0008459818351728514, "loss": 1.2614, "step": 4397 }, { "epoch": 0.5629079738896711, "grad_norm": 0.609375, "learning_rate": 0.0008455722275144134, "loss": 1.4918, "step": 4398 }, { "epoch": 0.5630359656981954, "grad_norm": 0.671875, "learning_rate": 0.0008451626463968933, "loss": 1.4231, "step": 4399 }, { "epoch": 0.5631639575067195, "grad_norm": 0.609375, "learning_rate": 0.0008447530918906845, "loss": 1.5839, "step": 4400 }, { "epoch": 0.5632919493152438, "grad_norm": 0.671875, "learning_rate": 0.0008443435640661756, "loss": 1.1573, "step": 4401 }, { "epoch": 0.563419941123768, "grad_norm": 0.78125, "learning_rate": 0.0008439340629937509, "loss": 1.8678, "step": 4402 }, { "epoch": 0.5635479329322923, "grad_norm": 0.59765625, "learning_rate": 0.0008435245887437896, "loss": 0.9931, "step": 4403 }, { "epoch": 0.5636759247408166, "grad_norm": 0.74609375, "learning_rate": 0.0008431151413866666, "loss": 1.3119, "step": 4404 }, { "epoch": 0.5638039165493408, "grad_norm": 0.5390625, "learning_rate": 0.0008427057209927517, "loss": 1.0753, "step": 4405 }, { "epoch": 0.5639319083578651, "grad_norm": 0.63671875, "learning_rate": 0.000842296327632411, "loss": 1.16, "step": 4406 }, { "epoch": 0.5640599001663894, "grad_norm": 0.984375, "learning_rate": 0.000841886961376005, "loss": 1.3551, "step": 4407 }, { "epoch": 0.5641878919749136, "grad_norm": 0.69921875, "learning_rate": 0.00084147762229389, "loss": 1.9919, "step": 4408 }, { "epoch": 0.5643158837834379, "grad_norm": 0.69140625, "learning_rate": 0.0008410683104564176, "loss": 1.0035, "step": 4409 }, { "epoch": 0.5644438755919621, "grad_norm": 0.5234375, "learning_rate": 0.0008406590259339345, "loss": 0.8481, "step": 4410 }, { "epoch": 0.5645718674004864, "grad_norm": 1.1953125, "learning_rate": 0.0008402497687967836, "loss": 1.5049, "step": 4411 }, { "epoch": 0.5646998592090107, "grad_norm": 0.59765625, "learning_rate": 0.0008398405391153012, "loss": 1.4768, "step": 4412 }, { "epoch": 0.5648278510175349, "grad_norm": 0.70703125, "learning_rate": 0.000839431336959821, "loss": 1.7466, "step": 4413 }, { "epoch": 0.5649558428260592, "grad_norm": 0.51953125, "learning_rate": 0.0008390221624006706, "loss": 0.657, "step": 4414 }, { "epoch": 0.5650838346345833, "grad_norm": 0.5859375, "learning_rate": 0.0008386130155081735, "loss": 0.9652, "step": 4415 }, { "epoch": 0.5652118264431076, "grad_norm": 0.71875, "learning_rate": 0.0008382038963526486, "loss": 1.2852, "step": 4416 }, { "epoch": 0.5653398182516319, "grad_norm": 0.6953125, "learning_rate": 0.0008377948050044086, "loss": 1.4829, "step": 4417 }, { "epoch": 0.5654678100601561, "grad_norm": 0.671875, "learning_rate": 0.0008373857415337634, "loss": 1.5093, "step": 4418 }, { "epoch": 0.5655958018686804, "grad_norm": 0.58203125, "learning_rate": 0.0008369767060110164, "loss": 1.2521, "step": 4419 }, { "epoch": 0.5657237936772047, "grad_norm": 0.69140625, "learning_rate": 0.0008365676985064684, "loss": 1.5782, "step": 4420 }, { "epoch": 0.5658517854857289, "grad_norm": 0.765625, "learning_rate": 0.0008361587190904123, "loss": 1.8736, "step": 4421 }, { "epoch": 0.5659797772942532, "grad_norm": 0.68359375, "learning_rate": 0.0008357497678331389, "loss": 1.6473, "step": 4422 }, { "epoch": 0.5661077691027774, "grad_norm": 0.6875, "learning_rate": 0.0008353408448049327, "loss": 1.3583, "step": 4423 }, { "epoch": 0.5662357609113017, "grad_norm": 0.65234375, "learning_rate": 0.0008349319500760738, "loss": 1.2999, "step": 4424 }, { "epoch": 0.566363752719826, "grad_norm": 0.6796875, "learning_rate": 0.0008345230837168378, "loss": 1.6895, "step": 4425 }, { "epoch": 0.5664917445283502, "grad_norm": 0.953125, "learning_rate": 0.0008341142457974944, "loss": 1.9241, "step": 4426 }, { "epoch": 0.5666197363368745, "grad_norm": 0.68359375, "learning_rate": 0.0008337054363883094, "loss": 1.1482, "step": 4427 }, { "epoch": 0.5667477281453986, "grad_norm": 0.640625, "learning_rate": 0.0008332966555595431, "loss": 1.5623, "step": 4428 }, { "epoch": 0.5668757199539229, "grad_norm": 0.7109375, "learning_rate": 0.0008328879033814516, "loss": 1.1108, "step": 4429 }, { "epoch": 0.5670037117624472, "grad_norm": 0.62109375, "learning_rate": 0.000832479179924285, "loss": 1.2429, "step": 4430 }, { "epoch": 0.5671317035709714, "grad_norm": 0.55859375, "learning_rate": 0.0008320704852582895, "loss": 1.0244, "step": 4431 }, { "epoch": 0.5672596953794957, "grad_norm": 0.63671875, "learning_rate": 0.0008316618194537054, "loss": 1.1599, "step": 4432 }, { "epoch": 0.56738768718802, "grad_norm": 0.78515625, "learning_rate": 0.0008312531825807695, "loss": 1.4514, "step": 4433 }, { "epoch": 0.5675156789965442, "grad_norm": 0.80078125, "learning_rate": 0.0008308445747097115, "loss": 1.5856, "step": 4434 }, { "epoch": 0.5676436708050685, "grad_norm": 0.72265625, "learning_rate": 0.0008304359959107578, "loss": 1.8614, "step": 4435 }, { "epoch": 0.5677716626135927, "grad_norm": 0.5390625, "learning_rate": 0.0008300274462541295, "loss": 1.1103, "step": 4436 }, { "epoch": 0.567899654422117, "grad_norm": 0.5859375, "learning_rate": 0.0008296189258100422, "loss": 0.9999, "step": 4437 }, { "epoch": 0.5680276462306413, "grad_norm": 0.66015625, "learning_rate": 0.0008292104346487075, "loss": 1.562, "step": 4438 }, { "epoch": 0.5681556380391655, "grad_norm": 0.640625, "learning_rate": 0.00082880197284033, "loss": 1.3044, "step": 4439 }, { "epoch": 0.5682836298476898, "grad_norm": 0.8515625, "learning_rate": 0.0008283935404551113, "loss": 1.7831, "step": 4440 }, { "epoch": 0.568411621656214, "grad_norm": 0.609375, "learning_rate": 0.0008279851375632468, "loss": 1.2123, "step": 4441 }, { "epoch": 0.5685396134647382, "grad_norm": 0.66796875, "learning_rate": 0.0008275767642349276, "loss": 1.3365, "step": 4442 }, { "epoch": 0.5686676052732625, "grad_norm": 0.68359375, "learning_rate": 0.0008271684205403386, "loss": 1.1675, "step": 4443 }, { "epoch": 0.5687955970817867, "grad_norm": 0.6015625, "learning_rate": 0.0008267601065496611, "loss": 1.3143, "step": 4444 }, { "epoch": 0.568923588890311, "grad_norm": 0.71875, "learning_rate": 0.0008263518223330697, "loss": 1.735, "step": 4445 }, { "epoch": 0.5690515806988353, "grad_norm": 0.59765625, "learning_rate": 0.0008259435679607351, "loss": 1.312, "step": 4446 }, { "epoch": 0.5691795725073595, "grad_norm": 0.640625, "learning_rate": 0.0008255353435028227, "loss": 1.4377, "step": 4447 }, { "epoch": 0.5693075643158838, "grad_norm": 0.61328125, "learning_rate": 0.0008251271490294918, "loss": 1.2474, "step": 4448 }, { "epoch": 0.569435556124408, "grad_norm": 0.69140625, "learning_rate": 0.0008247189846108978, "loss": 1.9107, "step": 4449 }, { "epoch": 0.5695635479329323, "grad_norm": 0.6796875, "learning_rate": 0.00082431085031719, "loss": 1.4296, "step": 4450 }, { "epoch": 0.5696915397414566, "grad_norm": 0.69921875, "learning_rate": 0.0008239027462185134, "loss": 1.5815, "step": 4451 }, { "epoch": 0.5698195315499808, "grad_norm": 0.62890625, "learning_rate": 0.0008234946723850067, "loss": 1.0631, "step": 4452 }, { "epoch": 0.5699475233585051, "grad_norm": 0.7734375, "learning_rate": 0.0008230866288868044, "loss": 1.3246, "step": 4453 }, { "epoch": 0.5700755151670293, "grad_norm": 0.55859375, "learning_rate": 0.0008226786157940352, "loss": 0.7762, "step": 4454 }, { "epoch": 0.5702035069755536, "grad_norm": 0.63671875, "learning_rate": 0.0008222706331768231, "loss": 1.1439, "step": 4455 }, { "epoch": 0.5703314987840779, "grad_norm": 0.57421875, "learning_rate": 0.0008218626811052865, "loss": 1.2006, "step": 4456 }, { "epoch": 0.570459490592602, "grad_norm": 0.67578125, "learning_rate": 0.0008214547596495377, "loss": 1.515, "step": 4457 }, { "epoch": 0.5705874824011263, "grad_norm": 0.6484375, "learning_rate": 0.0008210468688796857, "loss": 1.4144, "step": 4458 }, { "epoch": 0.5707154742096506, "grad_norm": 0.6640625, "learning_rate": 0.0008206390088658326, "loss": 1.4718, "step": 4459 }, { "epoch": 0.5708434660181748, "grad_norm": 0.61328125, "learning_rate": 0.0008202311796780766, "loss": 1.1659, "step": 4460 }, { "epoch": 0.5709714578266991, "grad_norm": 0.81640625, "learning_rate": 0.0008198233813865083, "loss": 1.9878, "step": 4461 }, { "epoch": 0.5710994496352233, "grad_norm": 0.77734375, "learning_rate": 0.0008194156140612156, "loss": 1.8078, "step": 4462 }, { "epoch": 0.5712274414437476, "grad_norm": 0.80078125, "learning_rate": 0.0008190078777722794, "loss": 1.7769, "step": 4463 }, { "epoch": 0.5713554332522719, "grad_norm": 0.7421875, "learning_rate": 0.0008186001725897761, "loss": 1.6706, "step": 4464 }, { "epoch": 0.5714834250607961, "grad_norm": 0.6796875, "learning_rate": 0.0008181924985837761, "loss": 1.6283, "step": 4465 }, { "epoch": 0.5716114168693204, "grad_norm": 0.640625, "learning_rate": 0.000817784855824345, "loss": 1.098, "step": 4466 }, { "epoch": 0.5717394086778446, "grad_norm": 0.6171875, "learning_rate": 0.0008173772443815429, "loss": 1.3043, "step": 4467 }, { "epoch": 0.5718674004863689, "grad_norm": 0.88671875, "learning_rate": 0.0008169696643254243, "loss": 1.5847, "step": 4468 }, { "epoch": 0.5719953922948932, "grad_norm": 0.625, "learning_rate": 0.0008165621157260387, "loss": 1.5264, "step": 4469 }, { "epoch": 0.5721233841034173, "grad_norm": 0.66796875, "learning_rate": 0.0008161545986534297, "loss": 1.3202, "step": 4470 }, { "epoch": 0.5722513759119416, "grad_norm": 0.58984375, "learning_rate": 0.0008157471131776356, "loss": 1.1569, "step": 4471 }, { "epoch": 0.5723793677204659, "grad_norm": 0.609375, "learning_rate": 0.0008153396593686897, "loss": 1.3465, "step": 4472 }, { "epoch": 0.5725073595289901, "grad_norm": 0.7578125, "learning_rate": 0.0008149322372966195, "loss": 1.4418, "step": 4473 }, { "epoch": 0.5726353513375144, "grad_norm": 0.73046875, "learning_rate": 0.0008145248470314468, "loss": 1.5695, "step": 4474 }, { "epoch": 0.5727633431460386, "grad_norm": 0.66015625, "learning_rate": 0.0008141174886431885, "loss": 1.3151, "step": 4475 }, { "epoch": 0.5728913349545629, "grad_norm": 0.6171875, "learning_rate": 0.0008137101622018556, "loss": 1.2453, "step": 4476 }, { "epoch": 0.5730193267630872, "grad_norm": 0.59375, "learning_rate": 0.0008133028677774539, "loss": 1.0484, "step": 4477 }, { "epoch": 0.5731473185716114, "grad_norm": 0.5078125, "learning_rate": 0.0008128956054399838, "loss": 0.6781, "step": 4478 }, { "epoch": 0.5732753103801357, "grad_norm": 0.72265625, "learning_rate": 0.0008124883752594393, "loss": 1.2821, "step": 4479 }, { "epoch": 0.57340330218866, "grad_norm": 0.7578125, "learning_rate": 0.0008120811773058097, "loss": 1.7967, "step": 4480 }, { "epoch": 0.5735312939971842, "grad_norm": 0.71484375, "learning_rate": 0.0008116740116490787, "loss": 1.5003, "step": 4481 }, { "epoch": 0.5736592858057085, "grad_norm": 0.73828125, "learning_rate": 0.0008112668783592249, "loss": 1.7481, "step": 4482 }, { "epoch": 0.5737872776142326, "grad_norm": 0.6484375, "learning_rate": 0.0008108597775062198, "loss": 1.7487, "step": 4483 }, { "epoch": 0.573915269422757, "grad_norm": 0.69140625, "learning_rate": 0.0008104527091600306, "loss": 1.5298, "step": 4484 }, { "epoch": 0.5740432612312812, "grad_norm": 0.59765625, "learning_rate": 0.0008100456733906189, "loss": 1.0491, "step": 4485 }, { "epoch": 0.5741712530398054, "grad_norm": 0.93359375, "learning_rate": 0.00080963867026794, "loss": 1.5736, "step": 4486 }, { "epoch": 0.5742992448483297, "grad_norm": 0.71484375, "learning_rate": 0.0008092316998619444, "loss": 1.8488, "step": 4487 }, { "epoch": 0.5744272366568539, "grad_norm": 0.71875, "learning_rate": 0.0008088247622425762, "loss": 1.6848, "step": 4488 }, { "epoch": 0.5745552284653782, "grad_norm": 0.7265625, "learning_rate": 0.0008084178574797743, "loss": 1.6393, "step": 4489 }, { "epoch": 0.5746832202739025, "grad_norm": 0.67578125, "learning_rate": 0.0008080109856434721, "loss": 1.6666, "step": 4490 }, { "epoch": 0.5748112120824267, "grad_norm": 0.6484375, "learning_rate": 0.0008076041468035971, "loss": 1.2944, "step": 4491 }, { "epoch": 0.574939203890951, "grad_norm": 0.78515625, "learning_rate": 0.0008071973410300709, "loss": 1.832, "step": 4492 }, { "epoch": 0.5750671956994753, "grad_norm": 0.6484375, "learning_rate": 0.0008067905683928099, "loss": 1.16, "step": 4493 }, { "epoch": 0.5751951875079995, "grad_norm": 0.78125, "learning_rate": 0.0008063838289617243, "loss": 1.7755, "step": 4494 }, { "epoch": 0.5753231793165238, "grad_norm": 0.7578125, "learning_rate": 0.0008059771228067197, "loss": 1.2727, "step": 4495 }, { "epoch": 0.575451171125048, "grad_norm": 0.63671875, "learning_rate": 0.0008055704499976936, "loss": 1.4275, "step": 4496 }, { "epoch": 0.5755791629335723, "grad_norm": 0.64453125, "learning_rate": 0.0008051638106045406, "loss": 1.3017, "step": 4497 }, { "epoch": 0.5757071547420965, "grad_norm": 0.61328125, "learning_rate": 0.0008047572046971479, "loss": 1.251, "step": 4498 }, { "epoch": 0.5758351465506207, "grad_norm": 0.59375, "learning_rate": 0.0008043506323453975, "loss": 1.2082, "step": 4499 }, { "epoch": 0.575963138359145, "grad_norm": 0.51953125, "learning_rate": 0.0008039440936191655, "loss": 0.8828, "step": 4500 }, { "epoch": 0.5760911301676692, "grad_norm": 0.671875, "learning_rate": 0.0008035375885883217, "loss": 1.2343, "step": 4501 }, { "epoch": 0.5762191219761935, "grad_norm": 0.578125, "learning_rate": 0.0008031311173227309, "loss": 0.8972, "step": 4502 }, { "epoch": 0.5763471137847178, "grad_norm": 0.83203125, "learning_rate": 0.0008027246798922513, "loss": 1.3374, "step": 4503 }, { "epoch": 0.576475105593242, "grad_norm": 0.58203125, "learning_rate": 0.0008023182763667369, "loss": 1.0575, "step": 4504 }, { "epoch": 0.5766030974017663, "grad_norm": 0.60546875, "learning_rate": 0.0008019119068160338, "loss": 1.202, "step": 4505 }, { "epoch": 0.5767310892102906, "grad_norm": 0.6640625, "learning_rate": 0.0008015055713099833, "loss": 1.2835, "step": 4506 }, { "epoch": 0.5768590810188148, "grad_norm": 0.55859375, "learning_rate": 0.0008010992699184208, "loss": 1.0384, "step": 4507 }, { "epoch": 0.5769870728273391, "grad_norm": 0.7734375, "learning_rate": 0.0008006930027111761, "loss": 1.6066, "step": 4508 }, { "epoch": 0.5771150646358633, "grad_norm": 0.72265625, "learning_rate": 0.0008002867697580727, "loss": 1.1585, "step": 4509 }, { "epoch": 0.5772430564443876, "grad_norm": 0.65234375, "learning_rate": 0.000799880571128928, "loss": 1.1551, "step": 4510 }, { "epoch": 0.5773710482529119, "grad_norm": 0.7109375, "learning_rate": 0.0007994744068935541, "loss": 1.3523, "step": 4511 }, { "epoch": 0.577499040061436, "grad_norm": 0.57421875, "learning_rate": 0.0007990682771217567, "loss": 1.4698, "step": 4512 }, { "epoch": 0.5776270318699603, "grad_norm": 0.62109375, "learning_rate": 0.0007986621818833364, "loss": 1.22, "step": 4513 }, { "epoch": 0.5777550236784845, "grad_norm": 0.65234375, "learning_rate": 0.0007982561212480863, "loss": 1.5497, "step": 4514 }, { "epoch": 0.5778830154870088, "grad_norm": 0.69921875, "learning_rate": 0.0007978500952857952, "loss": 1.1446, "step": 4515 }, { "epoch": 0.5780110072955331, "grad_norm": 0.67578125, "learning_rate": 0.000797444104066245, "loss": 1.9027, "step": 4516 }, { "epoch": 0.5781389991040573, "grad_norm": 0.77734375, "learning_rate": 0.0007970381476592125, "loss": 1.3971, "step": 4517 }, { "epoch": 0.5782669909125816, "grad_norm": 0.609375, "learning_rate": 0.0007966322261344665, "loss": 1.2502, "step": 4518 }, { "epoch": 0.5783949827211059, "grad_norm": 0.90625, "learning_rate": 0.0007962263395617724, "loss": 1.7286, "step": 4519 }, { "epoch": 0.5785229745296301, "grad_norm": 0.66015625, "learning_rate": 0.0007958204880108879, "loss": 1.2071, "step": 4520 }, { "epoch": 0.5786509663381544, "grad_norm": 0.671875, "learning_rate": 0.0007954146715515654, "loss": 1.4566, "step": 4521 }, { "epoch": 0.5787789581466786, "grad_norm": 0.671875, "learning_rate": 0.0007950088902535513, "loss": 1.4658, "step": 4522 }, { "epoch": 0.5789069499552029, "grad_norm": 0.73046875, "learning_rate": 0.0007946031441865848, "loss": 1.4755, "step": 4523 }, { "epoch": 0.5790349417637272, "grad_norm": 0.59375, "learning_rate": 0.0007941974334204008, "loss": 0.8621, "step": 4524 }, { "epoch": 0.5791629335722513, "grad_norm": 0.55078125, "learning_rate": 0.0007937917580247265, "loss": 1.0185, "step": 4525 }, { "epoch": 0.5792909253807756, "grad_norm": 0.6796875, "learning_rate": 0.0007933861180692849, "loss": 1.6932, "step": 4526 }, { "epoch": 0.5794189171892998, "grad_norm": 0.68359375, "learning_rate": 0.0007929805136237908, "loss": 1.27, "step": 4527 }, { "epoch": 0.5795469089978241, "grad_norm": 0.76171875, "learning_rate": 0.0007925749447579544, "loss": 1.5213, "step": 4528 }, { "epoch": 0.5796749008063484, "grad_norm": 0.66015625, "learning_rate": 0.000792169411541479, "loss": 1.3332, "step": 4529 }, { "epoch": 0.5798028926148726, "grad_norm": 0.51953125, "learning_rate": 0.0007917639140440622, "loss": 1.2427, "step": 4530 }, { "epoch": 0.5799308844233969, "grad_norm": 0.640625, "learning_rate": 0.0007913584523353957, "loss": 1.2353, "step": 4531 }, { "epoch": 0.5800588762319212, "grad_norm": 0.91796875, "learning_rate": 0.000790953026485164, "loss": 1.9407, "step": 4532 }, { "epoch": 0.5801868680404454, "grad_norm": 1.390625, "learning_rate": 0.0007905476365630465, "loss": 1.4423, "step": 4533 }, { "epoch": 0.5803148598489697, "grad_norm": 1.015625, "learning_rate": 0.000790142282638716, "loss": 1.1971, "step": 4534 }, { "epoch": 0.5804428516574939, "grad_norm": 0.6015625, "learning_rate": 0.0007897369647818394, "loss": 0.9681, "step": 4535 }, { "epoch": 0.5805708434660182, "grad_norm": 0.609375, "learning_rate": 0.0007893316830620765, "loss": 0.99, "step": 4536 }, { "epoch": 0.5806988352745425, "grad_norm": 0.64453125, "learning_rate": 0.0007889264375490818, "loss": 1.693, "step": 4537 }, { "epoch": 0.5808268270830667, "grad_norm": 0.59375, "learning_rate": 0.0007885212283125036, "loss": 1.1069, "step": 4538 }, { "epoch": 0.580954818891591, "grad_norm": 0.55859375, "learning_rate": 0.0007881160554219835, "loss": 1.0052, "step": 4539 }, { "epoch": 0.5810828107001151, "grad_norm": 0.67578125, "learning_rate": 0.0007877109189471575, "loss": 1.3596, "step": 4540 }, { "epoch": 0.5812108025086394, "grad_norm": 0.8671875, "learning_rate": 0.0007873058189576537, "loss": 2.118, "step": 4541 }, { "epoch": 0.5813387943171637, "grad_norm": 0.66796875, "learning_rate": 0.000786900755523096, "loss": 1.2682, "step": 4542 }, { "epoch": 0.5814667861256879, "grad_norm": 1.1796875, "learning_rate": 0.0007864957287131012, "loss": 2.0278, "step": 4543 }, { "epoch": 0.5815947779342122, "grad_norm": 0.72265625, "learning_rate": 0.0007860907385972796, "loss": 1.5356, "step": 4544 }, { "epoch": 0.5817227697427365, "grad_norm": 0.80859375, "learning_rate": 0.0007856857852452349, "loss": 1.3216, "step": 4545 }, { "epoch": 0.5818507615512607, "grad_norm": 0.64453125, "learning_rate": 0.0007852808687265653, "loss": 1.246, "step": 4546 }, { "epoch": 0.581978753359785, "grad_norm": 0.6953125, "learning_rate": 0.000784875989110862, "loss": 1.3242, "step": 4547 }, { "epoch": 0.5821067451683092, "grad_norm": 0.609375, "learning_rate": 0.0007844711464677106, "loss": 1.3714, "step": 4548 }, { "epoch": 0.5822347369768335, "grad_norm": 1.109375, "learning_rate": 0.0007840663408666893, "loss": 2.1842, "step": 4549 }, { "epoch": 0.5823627287853578, "grad_norm": 0.6328125, "learning_rate": 0.0007836615723773709, "loss": 1.0917, "step": 4550 }, { "epoch": 0.582490720593882, "grad_norm": 0.58203125, "learning_rate": 0.0007832568410693213, "loss": 0.6619, "step": 4551 }, { "epoch": 0.5826187124024063, "grad_norm": 0.88671875, "learning_rate": 0.0007828521470121001, "loss": 2.2204, "step": 4552 }, { "epoch": 0.5827467042109306, "grad_norm": 0.5546875, "learning_rate": 0.0007824474902752608, "loss": 0.9002, "step": 4553 }, { "epoch": 0.5828746960194547, "grad_norm": 0.671875, "learning_rate": 0.0007820428709283499, "loss": 1.4881, "step": 4554 }, { "epoch": 0.583002687827979, "grad_norm": 0.578125, "learning_rate": 0.0007816382890409079, "loss": 0.8103, "step": 4555 }, { "epoch": 0.5831306796365032, "grad_norm": 0.78515625, "learning_rate": 0.0007812337446824689, "loss": 1.372, "step": 4556 }, { "epoch": 0.5832586714450275, "grad_norm": 0.54296875, "learning_rate": 0.0007808292379225604, "loss": 1.2082, "step": 4557 }, { "epoch": 0.5833866632535518, "grad_norm": 0.5859375, "learning_rate": 0.0007804247688307031, "loss": 1.1683, "step": 4558 }, { "epoch": 0.583514655062076, "grad_norm": 0.64453125, "learning_rate": 0.000780020337476412, "loss": 0.9364, "step": 4559 }, { "epoch": 0.5836426468706003, "grad_norm": 0.54296875, "learning_rate": 0.0007796159439291949, "loss": 1.1179, "step": 4560 }, { "epoch": 0.5837706386791245, "grad_norm": 0.55078125, "learning_rate": 0.0007792115882585536, "loss": 0.9364, "step": 4561 }, { "epoch": 0.5838986304876488, "grad_norm": 0.92578125, "learning_rate": 0.0007788072705339834, "loss": 1.8486, "step": 4562 }, { "epoch": 0.5840266222961731, "grad_norm": 0.80078125, "learning_rate": 0.0007784029908249723, "loss": 1.9848, "step": 4563 }, { "epoch": 0.5841546141046973, "grad_norm": 0.6015625, "learning_rate": 0.0007779987492010021, "loss": 1.1959, "step": 4564 }, { "epoch": 0.5842826059132216, "grad_norm": 0.60546875, "learning_rate": 0.0007775945457315492, "loss": 1.0815, "step": 4565 }, { "epoch": 0.5844105977217459, "grad_norm": 0.6171875, "learning_rate": 0.0007771903804860825, "loss": 1.2552, "step": 4566 }, { "epoch": 0.58453858953027, "grad_norm": 0.54296875, "learning_rate": 0.0007767862535340635, "loss": 1.1754, "step": 4567 }, { "epoch": 0.5846665813387943, "grad_norm": 0.671875, "learning_rate": 0.0007763821649449483, "loss": 1.2125, "step": 4568 }, { "epoch": 0.5847945731473185, "grad_norm": 0.671875, "learning_rate": 0.0007759781147881863, "loss": 1.37, "step": 4569 }, { "epoch": 0.5849225649558428, "grad_norm": 0.6484375, "learning_rate": 0.00077557410313322, "loss": 1.2623, "step": 4570 }, { "epoch": 0.5850505567643671, "grad_norm": 0.71875, "learning_rate": 0.0007751701300494856, "loss": 1.487, "step": 4571 }, { "epoch": 0.5851785485728913, "grad_norm": 0.57421875, "learning_rate": 0.0007747661956064118, "loss": 0.8685, "step": 4572 }, { "epoch": 0.5853065403814156, "grad_norm": 0.765625, "learning_rate": 0.0007743622998734217, "loss": 1.3459, "step": 4573 }, { "epoch": 0.5854345321899398, "grad_norm": 0.546875, "learning_rate": 0.0007739584429199314, "loss": 1.0747, "step": 4574 }, { "epoch": 0.5855625239984641, "grad_norm": 0.65625, "learning_rate": 0.0007735546248153503, "loss": 1.208, "step": 4575 }, { "epoch": 0.5856905158069884, "grad_norm": 0.48828125, "learning_rate": 0.0007731508456290805, "loss": 1.0512, "step": 4576 }, { "epoch": 0.5858185076155126, "grad_norm": 0.6640625, "learning_rate": 0.0007727471054305188, "loss": 1.3662, "step": 4577 }, { "epoch": 0.5859464994240369, "grad_norm": 0.52734375, "learning_rate": 0.000772343404289054, "loss": 0.9988, "step": 4578 }, { "epoch": 0.5860744912325612, "grad_norm": 0.5703125, "learning_rate": 0.0007719397422740691, "loss": 0.8572, "step": 4579 }, { "epoch": 0.5862024830410854, "grad_norm": 0.828125, "learning_rate": 0.0007715361194549394, "loss": 1.4125, "step": 4580 }, { "epoch": 0.5863304748496097, "grad_norm": 0.6171875, "learning_rate": 0.0007711325359010344, "loss": 1.1526, "step": 4581 }, { "epoch": 0.5864584666581338, "grad_norm": 0.76953125, "learning_rate": 0.0007707289916817164, "loss": 1.7563, "step": 4582 }, { "epoch": 0.5865864584666581, "grad_norm": 0.81640625, "learning_rate": 0.0007703254868663412, "loss": 1.9295, "step": 4583 }, { "epoch": 0.5867144502751824, "grad_norm": 0.8984375, "learning_rate": 0.0007699220215242577, "loss": 1.5186, "step": 4584 }, { "epoch": 0.5868424420837066, "grad_norm": 0.609375, "learning_rate": 0.0007695185957248074, "loss": 1.263, "step": 4585 }, { "epoch": 0.5869704338922309, "grad_norm": 0.52734375, "learning_rate": 0.0007691152095373259, "loss": 0.9177, "step": 4586 }, { "epoch": 0.5870984257007551, "grad_norm": 0.625, "learning_rate": 0.0007687118630311417, "loss": 1.3851, "step": 4587 }, { "epoch": 0.5872264175092794, "grad_norm": 0.6171875, "learning_rate": 0.0007683085562755769, "loss": 1.2397, "step": 4588 }, { "epoch": 0.5873544093178037, "grad_norm": 0.76171875, "learning_rate": 0.0007679052893399454, "loss": 1.3353, "step": 4589 }, { "epoch": 0.5874824011263279, "grad_norm": 0.671875, "learning_rate": 0.0007675020622935557, "loss": 1.7398, "step": 4590 }, { "epoch": 0.5876103929348522, "grad_norm": 0.62890625, "learning_rate": 0.0007670988752057087, "loss": 1.2832, "step": 4591 }, { "epoch": 0.5877383847433765, "grad_norm": 0.6328125, "learning_rate": 0.0007666957281456989, "loss": 1.3132, "step": 4592 }, { "epoch": 0.5878663765519007, "grad_norm": 0.69140625, "learning_rate": 0.0007662926211828136, "loss": 1.7802, "step": 4593 }, { "epoch": 0.587994368360425, "grad_norm": 0.69140625, "learning_rate": 0.000765889554386333, "loss": 1.0574, "step": 4594 }, { "epoch": 0.5881223601689491, "grad_norm": 0.75390625, "learning_rate": 0.000765486527825531, "loss": 2.0212, "step": 4595 }, { "epoch": 0.5882503519774734, "grad_norm": 0.59765625, "learning_rate": 0.0007650835415696739, "loss": 1.1435, "step": 4596 }, { "epoch": 0.5883783437859977, "grad_norm": 0.58984375, "learning_rate": 0.000764680595688022, "loss": 1.0434, "step": 4597 }, { "epoch": 0.5885063355945219, "grad_norm": 0.63671875, "learning_rate": 0.0007642776902498275, "loss": 1.4035, "step": 4598 }, { "epoch": 0.5886343274030462, "grad_norm": 0.68359375, "learning_rate": 0.0007638748253243365, "loss": 1.237, "step": 4599 }, { "epoch": 0.5887623192115704, "grad_norm": 0.7265625, "learning_rate": 0.0007634720009807879, "loss": 1.3477, "step": 4600 }, { "epoch": 0.5888903110200947, "grad_norm": 0.6171875, "learning_rate": 0.0007630692172884137, "loss": 1.0663, "step": 4601 }, { "epoch": 0.589018302828619, "grad_norm": 0.6171875, "learning_rate": 0.0007626664743164382, "loss": 1.2084, "step": 4602 }, { "epoch": 0.5891462946371432, "grad_norm": 0.70703125, "learning_rate": 0.0007622637721340801, "loss": 1.2349, "step": 4603 }, { "epoch": 0.5892742864456675, "grad_norm": 0.67578125, "learning_rate": 0.0007618611108105497, "loss": 1.2576, "step": 4604 }, { "epoch": 0.5894022782541918, "grad_norm": 0.6328125, "learning_rate": 0.0007614584904150514, "loss": 1.5495, "step": 4605 }, { "epoch": 0.589530270062716, "grad_norm": 0.640625, "learning_rate": 0.000761055911016782, "loss": 1.0249, "step": 4606 }, { "epoch": 0.5896582618712403, "grad_norm": 0.6875, "learning_rate": 0.0007606533726849308, "loss": 1.4756, "step": 4607 }, { "epoch": 0.5897862536797644, "grad_norm": 0.58984375, "learning_rate": 0.0007602508754886809, "loss": 0.9497, "step": 4608 }, { "epoch": 0.5899142454882887, "grad_norm": 0.470703125, "learning_rate": 0.0007598484194972076, "loss": 0.8174, "step": 4609 }, { "epoch": 0.590042237296813, "grad_norm": 0.66015625, "learning_rate": 0.0007594460047796806, "loss": 1.2791, "step": 4610 }, { "epoch": 0.5901702291053372, "grad_norm": 0.7109375, "learning_rate": 0.00075904363140526, "loss": 1.8278, "step": 4611 }, { "epoch": 0.5902982209138615, "grad_norm": 0.5390625, "learning_rate": 0.0007586412994431009, "loss": 1.0455, "step": 4612 }, { "epoch": 0.5904262127223857, "grad_norm": 0.734375, "learning_rate": 0.0007582390089623505, "loss": 1.6248, "step": 4613 }, { "epoch": 0.59055420453091, "grad_norm": 0.8671875, "learning_rate": 0.0007578367600321489, "loss": 1.4511, "step": 4614 }, { "epoch": 0.5906821963394343, "grad_norm": 0.578125, "learning_rate": 0.0007574345527216293, "loss": 1.1509, "step": 4615 }, { "epoch": 0.5908101881479585, "grad_norm": 0.6640625, "learning_rate": 0.0007570323870999172, "loss": 1.0214, "step": 4616 }, { "epoch": 0.5909381799564828, "grad_norm": 0.625, "learning_rate": 0.0007566302632361313, "loss": 1.3928, "step": 4617 }, { "epoch": 0.5910661717650071, "grad_norm": 0.6171875, "learning_rate": 0.0007562281811993835, "loss": 1.1588, "step": 4618 }, { "epoch": 0.5911941635735313, "grad_norm": 0.7890625, "learning_rate": 0.0007558261410587781, "loss": 1.121, "step": 4619 }, { "epoch": 0.5913221553820556, "grad_norm": 0.56640625, "learning_rate": 0.0007554241428834118, "loss": 0.8999, "step": 4620 }, { "epoch": 0.5914501471905798, "grad_norm": 0.55078125, "learning_rate": 0.0007550221867423745, "loss": 0.95, "step": 4621 }, { "epoch": 0.591578138999104, "grad_norm": 0.765625, "learning_rate": 0.0007546202727047494, "loss": 1.5063, "step": 4622 }, { "epoch": 0.5917061308076283, "grad_norm": 0.77734375, "learning_rate": 0.0007542184008396116, "loss": 1.8455, "step": 4623 }, { "epoch": 0.5918341226161525, "grad_norm": 0.734375, "learning_rate": 0.0007538165712160298, "loss": 1.6098, "step": 4624 }, { "epoch": 0.5919621144246768, "grad_norm": 0.76953125, "learning_rate": 0.0007534147839030638, "loss": 1.454, "step": 4625 }, { "epoch": 0.5920901062332011, "grad_norm": 0.609375, "learning_rate": 0.0007530130389697681, "loss": 1.1808, "step": 4626 }, { "epoch": 0.5922180980417253, "grad_norm": 0.82421875, "learning_rate": 0.0007526113364851891, "loss": 1.485, "step": 4627 }, { "epoch": 0.5923460898502496, "grad_norm": 0.6484375, "learning_rate": 0.0007522096765183662, "loss": 1.4046, "step": 4628 }, { "epoch": 0.5924740816587738, "grad_norm": 0.8046875, "learning_rate": 0.0007518080591383302, "loss": 1.8756, "step": 4629 }, { "epoch": 0.5926020734672981, "grad_norm": 0.6640625, "learning_rate": 0.0007514064844141061, "loss": 1.8065, "step": 4630 }, { "epoch": 0.5927300652758224, "grad_norm": 0.63671875, "learning_rate": 0.000751004952414711, "loss": 1.4318, "step": 4631 }, { "epoch": 0.5928580570843466, "grad_norm": 0.8671875, "learning_rate": 0.0007506034632091549, "loss": 1.6261, "step": 4632 }, { "epoch": 0.5929860488928709, "grad_norm": 0.58984375, "learning_rate": 0.0007502020168664399, "loss": 1.6221, "step": 4633 }, { "epoch": 0.5931140407013951, "grad_norm": 0.703125, "learning_rate": 0.0007498006134555613, "loss": 1.2176, "step": 4634 }, { "epoch": 0.5932420325099194, "grad_norm": 0.6953125, "learning_rate": 0.0007493992530455067, "loss": 1.1739, "step": 4635 }, { "epoch": 0.5933700243184437, "grad_norm": 0.61328125, "learning_rate": 0.0007489979357052563, "loss": 1.2085, "step": 4636 }, { "epoch": 0.5934980161269678, "grad_norm": 0.68359375, "learning_rate": 0.0007485966615037833, "loss": 1.3527, "step": 4637 }, { "epoch": 0.5936260079354921, "grad_norm": 0.6015625, "learning_rate": 0.0007481954305100527, "loss": 1.0749, "step": 4638 }, { "epoch": 0.5937539997440164, "grad_norm": 0.56640625, "learning_rate": 0.000747794242793023, "loss": 1.3163, "step": 4639 }, { "epoch": 0.5938819915525406, "grad_norm": 0.78125, "learning_rate": 0.0007473930984216446, "loss": 1.5381, "step": 4640 }, { "epoch": 0.5940099833610649, "grad_norm": 0.58203125, "learning_rate": 0.0007469919974648609, "loss": 0.8487, "step": 4641 }, { "epoch": 0.5941379751695891, "grad_norm": 0.5625, "learning_rate": 0.0007465909399916073, "loss": 1.1553, "step": 4642 }, { "epoch": 0.5942659669781134, "grad_norm": 0.765625, "learning_rate": 0.000746189926070812, "loss": 1.677, "step": 4643 }, { "epoch": 0.5943939587866377, "grad_norm": 0.6328125, "learning_rate": 0.000745788955771396, "loss": 1.3615, "step": 4644 }, { "epoch": 0.5945219505951619, "grad_norm": 0.80078125, "learning_rate": 0.0007453880291622726, "loss": 2.0083, "step": 4645 }, { "epoch": 0.5946499424036862, "grad_norm": 0.62109375, "learning_rate": 0.0007449871463123477, "loss": 1.1726, "step": 4646 }, { "epoch": 0.5947779342122104, "grad_norm": 0.625, "learning_rate": 0.0007445863072905185, "loss": 0.8827, "step": 4647 }, { "epoch": 0.5949059260207347, "grad_norm": 0.640625, "learning_rate": 0.0007441855121656768, "loss": 1.1548, "step": 4648 }, { "epoch": 0.595033917829259, "grad_norm": 0.703125, "learning_rate": 0.0007437847610067052, "loss": 1.7901, "step": 4649 }, { "epoch": 0.5951619096377831, "grad_norm": 0.625, "learning_rate": 0.0007433840538824801, "loss": 1.0021, "step": 4650 }, { "epoch": 0.5952899014463074, "grad_norm": 0.7421875, "learning_rate": 0.0007429833908618682, "loss": 1.3999, "step": 4651 }, { "epoch": 0.5954178932548317, "grad_norm": 0.609375, "learning_rate": 0.0007425827720137308, "loss": 1.0789, "step": 4652 }, { "epoch": 0.5955458850633559, "grad_norm": 0.83203125, "learning_rate": 0.0007421821974069205, "loss": 1.7312, "step": 4653 }, { "epoch": 0.5956738768718802, "grad_norm": 0.671875, "learning_rate": 0.0007417816671102824, "loss": 1.2687, "step": 4654 }, { "epoch": 0.5958018686804044, "grad_norm": 0.78125, "learning_rate": 0.000741381181192655, "loss": 1.5126, "step": 4655 }, { "epoch": 0.5959298604889287, "grad_norm": 0.5546875, "learning_rate": 0.0007409807397228671, "loss": 1.0053, "step": 4656 }, { "epoch": 0.596057852297453, "grad_norm": 0.65625, "learning_rate": 0.0007405803427697416, "loss": 1.4998, "step": 4657 }, { "epoch": 0.5961858441059772, "grad_norm": 0.71484375, "learning_rate": 0.0007401799904020933, "loss": 1.8025, "step": 4658 }, { "epoch": 0.5963138359145015, "grad_norm": 0.703125, "learning_rate": 0.0007397796826887294, "loss": 1.0622, "step": 4659 }, { "epoch": 0.5964418277230257, "grad_norm": 0.6171875, "learning_rate": 0.0007393794196984488, "loss": 1.1864, "step": 4660 }, { "epoch": 0.59656981953155, "grad_norm": 0.578125, "learning_rate": 0.0007389792015000434, "loss": 1.4319, "step": 4661 }, { "epoch": 0.5966978113400743, "grad_norm": 0.6875, "learning_rate": 0.0007385790281622973, "loss": 1.3739, "step": 4662 }, { "epoch": 0.5968258031485985, "grad_norm": 0.58984375, "learning_rate": 0.0007381788997539868, "loss": 1.5197, "step": 4663 }, { "epoch": 0.5969537949571228, "grad_norm": 0.5859375, "learning_rate": 0.0007377788163438803, "loss": 1.3252, "step": 4664 }, { "epoch": 0.597081786765647, "grad_norm": 0.625, "learning_rate": 0.0007373787780007386, "loss": 1.2227, "step": 4665 }, { "epoch": 0.5972097785741712, "grad_norm": 0.68359375, "learning_rate": 0.0007369787847933149, "loss": 1.3285, "step": 4666 }, { "epoch": 0.5973377703826955, "grad_norm": 0.62890625, "learning_rate": 0.0007365788367903543, "loss": 1.4279, "step": 4667 }, { "epoch": 0.5974657621912197, "grad_norm": 0.58984375, "learning_rate": 0.0007361789340605952, "loss": 1.3899, "step": 4668 }, { "epoch": 0.597593753999744, "grad_norm": 0.609375, "learning_rate": 0.0007357790766727662, "loss": 1.389, "step": 4669 }, { "epoch": 0.5977217458082683, "grad_norm": 0.75, "learning_rate": 0.0007353792646955896, "loss": 1.1124, "step": 4670 }, { "epoch": 0.5978497376167925, "grad_norm": 0.609375, "learning_rate": 0.00073497949819778, "loss": 1.2151, "step": 4671 }, { "epoch": 0.5979777294253168, "grad_norm": 0.66796875, "learning_rate": 0.0007345797772480439, "loss": 1.1354, "step": 4672 }, { "epoch": 0.598105721233841, "grad_norm": 0.6171875, "learning_rate": 0.0007341801019150791, "loss": 1.2121, "step": 4673 }, { "epoch": 0.5982337130423653, "grad_norm": 0.5625, "learning_rate": 0.0007337804722675766, "loss": 0.9902, "step": 4674 }, { "epoch": 0.5983617048508896, "grad_norm": 0.65625, "learning_rate": 0.0007333808883742192, "loss": 1.476, "step": 4675 }, { "epoch": 0.5984896966594138, "grad_norm": 0.78125, "learning_rate": 0.0007329813503036822, "loss": 1.6588, "step": 4676 }, { "epoch": 0.5986176884679381, "grad_norm": 0.75, "learning_rate": 0.0007325818581246325, "loss": 2.3395, "step": 4677 }, { "epoch": 0.5987456802764624, "grad_norm": 0.80859375, "learning_rate": 0.0007321824119057292, "loss": 1.1687, "step": 4678 }, { "epoch": 0.5988736720849865, "grad_norm": 0.60546875, "learning_rate": 0.0007317830117156234, "loss": 1.2539, "step": 4679 }, { "epoch": 0.5990016638935108, "grad_norm": 0.87890625, "learning_rate": 0.0007313836576229591, "loss": 1.6219, "step": 4680 }, { "epoch": 0.599129655702035, "grad_norm": 0.8125, "learning_rate": 0.0007309843496963715, "loss": 1.3939, "step": 4681 }, { "epoch": 0.5992576475105593, "grad_norm": 0.58984375, "learning_rate": 0.0007305850880044881, "loss": 1.2223, "step": 4682 }, { "epoch": 0.5993856393190836, "grad_norm": 0.6640625, "learning_rate": 0.0007301858726159282, "loss": 1.7117, "step": 4683 }, { "epoch": 0.5995136311276078, "grad_norm": 0.6015625, "learning_rate": 0.0007297867035993041, "loss": 1.6053, "step": 4684 }, { "epoch": 0.5996416229361321, "grad_norm": 0.59375, "learning_rate": 0.0007293875810232194, "loss": 1.0262, "step": 4685 }, { "epoch": 0.5997696147446563, "grad_norm": 0.5859375, "learning_rate": 0.000728988504956269, "loss": 1.3633, "step": 4686 }, { "epoch": 0.5998976065531806, "grad_norm": 1.265625, "learning_rate": 0.0007285894754670413, "loss": 1.8994, "step": 4687 }, { "epoch": 0.6000255983617049, "grad_norm": 0.7890625, "learning_rate": 0.0007281904926241156, "loss": 1.1981, "step": 4688 }, { "epoch": 0.6001535901702291, "grad_norm": 0.640625, "learning_rate": 0.000727791556496064, "loss": 1.1131, "step": 4689 }, { "epoch": 0.6002815819787534, "grad_norm": 0.5390625, "learning_rate": 0.0007273926671514503, "loss": 0.9762, "step": 4690 }, { "epoch": 0.6004095737872777, "grad_norm": 0.68359375, "learning_rate": 0.0007269938246588293, "loss": 1.2084, "step": 4691 }, { "epoch": 0.6005375655958018, "grad_norm": 0.953125, "learning_rate": 0.0007265950290867489, "loss": 1.1265, "step": 4692 }, { "epoch": 0.6006655574043261, "grad_norm": 0.62890625, "learning_rate": 0.0007261962805037485, "loss": 1.2312, "step": 4693 }, { "epoch": 0.6007935492128503, "grad_norm": 0.83203125, "learning_rate": 0.0007257975789783604, "loss": 1.5783, "step": 4694 }, { "epoch": 0.6009215410213746, "grad_norm": 0.76953125, "learning_rate": 0.0007253989245791068, "loss": 1.6508, "step": 4695 }, { "epoch": 0.6010495328298989, "grad_norm": 0.671875, "learning_rate": 0.0007250003173745035, "loss": 1.4562, "step": 4696 }, { "epoch": 0.6011775246384231, "grad_norm": 0.66796875, "learning_rate": 0.0007246017574330572, "loss": 1.5864, "step": 4697 }, { "epoch": 0.6013055164469474, "grad_norm": 0.671875, "learning_rate": 0.0007242032448232671, "loss": 1.8514, "step": 4698 }, { "epoch": 0.6014335082554717, "grad_norm": 0.7890625, "learning_rate": 0.0007238047796136247, "loss": 1.5123, "step": 4699 }, { "epoch": 0.6015615000639959, "grad_norm": 0.609375, "learning_rate": 0.0007234063618726116, "loss": 1.167, "step": 4700 }, { "epoch": 0.6016894918725202, "grad_norm": 0.66796875, "learning_rate": 0.0007230079916687029, "loss": 1.2442, "step": 4701 }, { "epoch": 0.6018174836810444, "grad_norm": 0.5234375, "learning_rate": 0.0007226096690703651, "loss": 0.8747, "step": 4702 }, { "epoch": 0.6019454754895687, "grad_norm": 0.60546875, "learning_rate": 0.0007222113941460565, "loss": 1.2378, "step": 4703 }, { "epoch": 0.602073467298093, "grad_norm": 0.61328125, "learning_rate": 0.0007218131669642265, "loss": 1.0564, "step": 4704 }, { "epoch": 0.6022014591066172, "grad_norm": 0.53515625, "learning_rate": 0.0007214149875933172, "loss": 0.8584, "step": 4705 }, { "epoch": 0.6023294509151415, "grad_norm": 0.72265625, "learning_rate": 0.0007210168561017625, "loss": 1.7615, "step": 4706 }, { "epoch": 0.6024574427236656, "grad_norm": 0.54296875, "learning_rate": 0.0007206187725579873, "loss": 1.1488, "step": 4707 }, { "epoch": 0.6025854345321899, "grad_norm": 0.66796875, "learning_rate": 0.0007202207370304093, "loss": 1.3649, "step": 4708 }, { "epoch": 0.6027134263407142, "grad_norm": 0.6484375, "learning_rate": 0.0007198227495874368, "loss": 1.4799, "step": 4709 }, { "epoch": 0.6028414181492384, "grad_norm": 0.5859375, "learning_rate": 0.0007194248102974704, "loss": 1.4204, "step": 4710 }, { "epoch": 0.6029694099577627, "grad_norm": 0.71484375, "learning_rate": 0.0007190269192289027, "loss": 1.425, "step": 4711 }, { "epoch": 0.603097401766287, "grad_norm": 0.65625, "learning_rate": 0.0007186290764501182, "loss": 1.6658, "step": 4712 }, { "epoch": 0.6032253935748112, "grad_norm": 0.71484375, "learning_rate": 0.0007182312820294915, "loss": 1.2663, "step": 4713 }, { "epoch": 0.6033533853833355, "grad_norm": 0.55078125, "learning_rate": 0.0007178335360353907, "loss": 1.3327, "step": 4714 }, { "epoch": 0.6034813771918597, "grad_norm": 0.671875, "learning_rate": 0.0007174358385361745, "loss": 1.0643, "step": 4715 }, { "epoch": 0.603609369000384, "grad_norm": 0.56640625, "learning_rate": 0.0007170381896001947, "loss": 0.9233, "step": 4716 }, { "epoch": 0.6037373608089083, "grad_norm": 0.66796875, "learning_rate": 0.0007166405892957926, "loss": 1.702, "step": 4717 }, { "epoch": 0.6038653526174325, "grad_norm": 0.68359375, "learning_rate": 0.0007162430376913027, "loss": 1.157, "step": 4718 }, { "epoch": 0.6039933444259568, "grad_norm": 0.5703125, "learning_rate": 0.0007158455348550505, "loss": 1.1133, "step": 4719 }, { "epoch": 0.6041213362344809, "grad_norm": 0.640625, "learning_rate": 0.0007154480808553536, "loss": 1.2952, "step": 4720 }, { "epoch": 0.6042493280430052, "grad_norm": 0.65234375, "learning_rate": 0.0007150506757605211, "loss": 0.8525, "step": 4721 }, { "epoch": 0.6043773198515295, "grad_norm": 0.6484375, "learning_rate": 0.0007146533196388528, "loss": 1.5574, "step": 4722 }, { "epoch": 0.6045053116600537, "grad_norm": 0.6171875, "learning_rate": 0.0007142560125586412, "loss": 1.1473, "step": 4723 }, { "epoch": 0.604633303468578, "grad_norm": 0.79296875, "learning_rate": 0.0007138587545881701, "loss": 1.6452, "step": 4724 }, { "epoch": 0.6047612952771023, "grad_norm": 0.66796875, "learning_rate": 0.0007134615457957148, "loss": 1.2599, "step": 4725 }, { "epoch": 0.6048892870856265, "grad_norm": 0.80859375, "learning_rate": 0.0007130643862495416, "loss": 1.4838, "step": 4726 }, { "epoch": 0.6050172788941508, "grad_norm": 0.59765625, "learning_rate": 0.0007126672760179091, "loss": 1.0988, "step": 4727 }, { "epoch": 0.605145270702675, "grad_norm": 0.58984375, "learning_rate": 0.0007122702151690671, "loss": 1.23, "step": 4728 }, { "epoch": 0.6052732625111993, "grad_norm": 0.5703125, "learning_rate": 0.0007118732037712569, "loss": 1.2082, "step": 4729 }, { "epoch": 0.6054012543197236, "grad_norm": 0.60546875, "learning_rate": 0.0007114762418927121, "loss": 1.028, "step": 4730 }, { "epoch": 0.6055292461282478, "grad_norm": 1.03125, "learning_rate": 0.0007110793296016555, "loss": 1.515, "step": 4731 }, { "epoch": 0.6056572379367721, "grad_norm": 0.6484375, "learning_rate": 0.000710682466966304, "loss": 1.4934, "step": 4732 }, { "epoch": 0.6057852297452962, "grad_norm": 0.75390625, "learning_rate": 0.0007102856540548648, "loss": 2.0649, "step": 4733 }, { "epoch": 0.6059132215538205, "grad_norm": 0.6015625, "learning_rate": 0.0007098888909355368, "loss": 1.1106, "step": 4734 }, { "epoch": 0.6060412133623448, "grad_norm": 0.54296875, "learning_rate": 0.0007094921776765094, "loss": 0.9666, "step": 4735 }, { "epoch": 0.606169205170869, "grad_norm": 0.58203125, "learning_rate": 0.0007090955143459648, "loss": 1.327, "step": 4736 }, { "epoch": 0.6062971969793933, "grad_norm": 0.62109375, "learning_rate": 0.0007086989010120759, "loss": 1.2004, "step": 4737 }, { "epoch": 0.6064251887879176, "grad_norm": 0.67578125, "learning_rate": 0.0007083023377430069, "loss": 1.7891, "step": 4738 }, { "epoch": 0.6065531805964418, "grad_norm": 0.7421875, "learning_rate": 0.0007079058246069145, "loss": 1.8124, "step": 4739 }, { "epoch": 0.6066811724049661, "grad_norm": 0.765625, "learning_rate": 0.0007075093616719449, "loss": 1.3497, "step": 4740 }, { "epoch": 0.6068091642134903, "grad_norm": 0.74609375, "learning_rate": 0.0007071129490062372, "loss": 1.4755, "step": 4741 }, { "epoch": 0.6069371560220146, "grad_norm": 0.67578125, "learning_rate": 0.000706716586677921, "loss": 1.8759, "step": 4742 }, { "epoch": 0.6070651478305389, "grad_norm": 0.72265625, "learning_rate": 0.0007063202747551182, "loss": 1.9106, "step": 4743 }, { "epoch": 0.6071931396390631, "grad_norm": 0.87890625, "learning_rate": 0.0007059240133059408, "loss": 1.7444, "step": 4744 }, { "epoch": 0.6073211314475874, "grad_norm": 0.69921875, "learning_rate": 0.000705527802398493, "loss": 1.827, "step": 4745 }, { "epoch": 0.6074491232561116, "grad_norm": 0.50390625, "learning_rate": 0.0007051316421008702, "loss": 1.2123, "step": 4746 }, { "epoch": 0.6075771150646359, "grad_norm": 0.55078125, "learning_rate": 0.000704735532481159, "loss": 1.326, "step": 4747 }, { "epoch": 0.6077051068731602, "grad_norm": 0.5625, "learning_rate": 0.000704339473607437, "loss": 1.22, "step": 4748 }, { "epoch": 0.6078330986816843, "grad_norm": 0.6875, "learning_rate": 0.0007039434655477735, "loss": 1.554, "step": 4749 }, { "epoch": 0.6079610904902086, "grad_norm": 0.71484375, "learning_rate": 0.0007035475083702287, "loss": 1.3969, "step": 4750 }, { "epoch": 0.6080890822987329, "grad_norm": 0.91015625, "learning_rate": 0.0007031516021428545, "loss": 2.0431, "step": 4751 }, { "epoch": 0.6082170741072571, "grad_norm": 0.55859375, "learning_rate": 0.0007027557469336945, "loss": 1.1029, "step": 4752 }, { "epoch": 0.6083450659157814, "grad_norm": 0.66015625, "learning_rate": 0.0007023599428107814, "loss": 1.6144, "step": 4753 }, { "epoch": 0.6084730577243056, "grad_norm": 0.55078125, "learning_rate": 0.0007019641898421412, "loss": 1.083, "step": 4754 }, { "epoch": 0.6086010495328299, "grad_norm": 0.609375, "learning_rate": 0.0007015684880957905, "loss": 1.558, "step": 4755 }, { "epoch": 0.6087290413413542, "grad_norm": 0.56640625, "learning_rate": 0.0007011728376397378, "loss": 1.3267, "step": 4756 }, { "epoch": 0.6088570331498784, "grad_norm": 0.8125, "learning_rate": 0.0007007772385419807, "loss": 2.1946, "step": 4757 }, { "epoch": 0.6089850249584027, "grad_norm": 0.640625, "learning_rate": 0.00070038169087051, "loss": 1.242, "step": 4758 }, { "epoch": 0.6091130167669269, "grad_norm": 0.58203125, "learning_rate": 0.0006999861946933071, "loss": 1.0725, "step": 4759 }, { "epoch": 0.6092410085754512, "grad_norm": 0.61328125, "learning_rate": 0.0006995907500783443, "loss": 1.162, "step": 4760 }, { "epoch": 0.6093690003839755, "grad_norm": 0.60546875, "learning_rate": 0.0006991953570935853, "loss": 1.0569, "step": 4761 }, { "epoch": 0.6094969921924996, "grad_norm": 0.640625, "learning_rate": 0.0006988000158069845, "loss": 1.2618, "step": 4762 }, { "epoch": 0.6096249840010239, "grad_norm": 0.79296875, "learning_rate": 0.0006984047262864879, "loss": 1.5448, "step": 4763 }, { "epoch": 0.6097529758095482, "grad_norm": 0.8828125, "learning_rate": 0.0006980094886000324, "loss": 1.4866, "step": 4764 }, { "epoch": 0.6098809676180724, "grad_norm": 0.671875, "learning_rate": 0.0006976143028155459, "loss": 2.0439, "step": 4765 }, { "epoch": 0.6100089594265967, "grad_norm": 0.65625, "learning_rate": 0.0006972191690009477, "loss": 1.3675, "step": 4766 }, { "epoch": 0.6101369512351209, "grad_norm": 0.91015625, "learning_rate": 0.0006968240872241478, "loss": 1.6122, "step": 4767 }, { "epoch": 0.6102649430436452, "grad_norm": 0.69921875, "learning_rate": 0.0006964290575530475, "loss": 1.2167, "step": 4768 }, { "epoch": 0.6103929348521695, "grad_norm": 0.609375, "learning_rate": 0.0006960340800555392, "loss": 1.2139, "step": 4769 }, { "epoch": 0.6105209266606937, "grad_norm": 0.58203125, "learning_rate": 0.0006956391547995057, "loss": 0.7637, "step": 4770 }, { "epoch": 0.610648918469218, "grad_norm": 0.6875, "learning_rate": 0.0006952442818528219, "loss": 1.6221, "step": 4771 }, { "epoch": 0.6107769102777423, "grad_norm": 0.65625, "learning_rate": 0.0006948494612833526, "loss": 1.2529, "step": 4772 }, { "epoch": 0.6109049020862665, "grad_norm": 0.63671875, "learning_rate": 0.0006944546931589546, "loss": 1.1695, "step": 4773 }, { "epoch": 0.6110328938947908, "grad_norm": 0.85546875, "learning_rate": 0.0006940599775474752, "loss": 1.494, "step": 4774 }, { "epoch": 0.611160885703315, "grad_norm": 0.59375, "learning_rate": 0.000693665314516752, "loss": 1.3506, "step": 4775 }, { "epoch": 0.6112888775118392, "grad_norm": 0.546875, "learning_rate": 0.0006932707041346147, "loss": 1.0163, "step": 4776 }, { "epoch": 0.6114168693203635, "grad_norm": 0.828125, "learning_rate": 0.0006928761464688835, "loss": 1.7052, "step": 4777 }, { "epoch": 0.6115448611288877, "grad_norm": 0.578125, "learning_rate": 0.0006924816415873703, "loss": 0.9351, "step": 4778 }, { "epoch": 0.611672852937412, "grad_norm": 0.73828125, "learning_rate": 0.0006920871895578757, "loss": 1.4286, "step": 4779 }, { "epoch": 0.6118008447459362, "grad_norm": 0.80078125, "learning_rate": 0.0006916927904481934, "loss": 1.443, "step": 4780 }, { "epoch": 0.6119288365544605, "grad_norm": 0.6796875, "learning_rate": 0.0006912984443261072, "loss": 1.4222, "step": 4781 }, { "epoch": 0.6120568283629848, "grad_norm": 0.59375, "learning_rate": 0.000690904151259392, "loss": 1.2301, "step": 4782 }, { "epoch": 0.612184820171509, "grad_norm": 0.69140625, "learning_rate": 0.0006905099113158138, "loss": 1.3626, "step": 4783 }, { "epoch": 0.6123128119800333, "grad_norm": 0.625, "learning_rate": 0.0006901157245631284, "loss": 1.4507, "step": 4784 }, { "epoch": 0.6124408037885576, "grad_norm": 0.546875, "learning_rate": 0.0006897215910690836, "loss": 0.947, "step": 4785 }, { "epoch": 0.6125687955970818, "grad_norm": 0.61328125, "learning_rate": 0.0006893275109014174, "loss": 1.3153, "step": 4786 }, { "epoch": 0.6126967874056061, "grad_norm": 0.6953125, "learning_rate": 0.0006889334841278595, "loss": 1.4223, "step": 4787 }, { "epoch": 0.6128247792141303, "grad_norm": 0.6875, "learning_rate": 0.0006885395108161289, "loss": 1.5116, "step": 4788 }, { "epoch": 0.6129527710226546, "grad_norm": 0.64453125, "learning_rate": 0.0006881455910339369, "loss": 1.3433, "step": 4789 }, { "epoch": 0.6130807628311788, "grad_norm": 0.81640625, "learning_rate": 0.0006877517248489848, "loss": 2.3247, "step": 4790 }, { "epoch": 0.613208754639703, "grad_norm": 0.5078125, "learning_rate": 0.0006873579123289649, "loss": 0.8419, "step": 4791 }, { "epoch": 0.6133367464482273, "grad_norm": 0.8671875, "learning_rate": 0.0006869641535415605, "loss": 1.9199, "step": 4792 }, { "epoch": 0.6134647382567515, "grad_norm": 0.71875, "learning_rate": 0.0006865704485544451, "loss": 1.027, "step": 4793 }, { "epoch": 0.6135927300652758, "grad_norm": 0.6484375, "learning_rate": 0.0006861767974352833, "loss": 1.8671, "step": 4794 }, { "epoch": 0.6137207218738001, "grad_norm": 0.671875, "learning_rate": 0.0006857832002517307, "loss": 1.1517, "step": 4795 }, { "epoch": 0.6138487136823243, "grad_norm": 0.6875, "learning_rate": 0.0006853896570714335, "loss": 1.6693, "step": 4796 }, { "epoch": 0.6139767054908486, "grad_norm": 0.58984375, "learning_rate": 0.000684996167962028, "loss": 1.5, "step": 4797 }, { "epoch": 0.6141046972993729, "grad_norm": 0.796875, "learning_rate": 0.0006846027329911417, "loss": 1.4504, "step": 4798 }, { "epoch": 0.6142326891078971, "grad_norm": 0.73828125, "learning_rate": 0.0006842093522263929, "loss": 1.525, "step": 4799 }, { "epoch": 0.6143606809164214, "grad_norm": 0.6796875, "learning_rate": 0.0006838160257353911, "loss": 1.4458, "step": 4800 }, { "epoch": 0.6144886727249456, "grad_norm": 0.63671875, "learning_rate": 0.0006834227535857349, "loss": 1.2935, "step": 4801 }, { "epoch": 0.6146166645334699, "grad_norm": 0.6171875, "learning_rate": 0.0006830295358450149, "loss": 1.4761, "step": 4802 }, { "epoch": 0.6147446563419942, "grad_norm": 0.87109375, "learning_rate": 0.000682636372580812, "loss": 0.9584, "step": 4803 }, { "epoch": 0.6148726481505183, "grad_norm": 0.7265625, "learning_rate": 0.0006822432638606974, "loss": 1.3589, "step": 4804 }, { "epoch": 0.6150006399590426, "grad_norm": 0.7265625, "learning_rate": 0.0006818502097522339, "loss": 1.6068, "step": 4805 }, { "epoch": 0.6151286317675668, "grad_norm": 0.703125, "learning_rate": 0.0006814572103229735, "loss": 1.6604, "step": 4806 }, { "epoch": 0.6152566235760911, "grad_norm": 0.67578125, "learning_rate": 0.0006810642656404596, "loss": 1.8452, "step": 4807 }, { "epoch": 0.6153846153846154, "grad_norm": 0.625, "learning_rate": 0.0006806713757722265, "loss": 0.9694, "step": 4808 }, { "epoch": 0.6155126071931396, "grad_norm": 0.84375, "learning_rate": 0.0006802785407857986, "loss": 1.6006, "step": 4809 }, { "epoch": 0.6156405990016639, "grad_norm": 0.609375, "learning_rate": 0.0006798857607486908, "loss": 1.2345, "step": 4810 }, { "epoch": 0.6157685908101882, "grad_norm": 0.65234375, "learning_rate": 0.0006794930357284086, "loss": 1.3567, "step": 4811 }, { "epoch": 0.6158965826187124, "grad_norm": 0.85546875, "learning_rate": 0.0006791003657924484, "loss": 1.6533, "step": 4812 }, { "epoch": 0.6160245744272367, "grad_norm": 0.62890625, "learning_rate": 0.000678707751008297, "loss": 1.2787, "step": 4813 }, { "epoch": 0.6161525662357609, "grad_norm": 0.61328125, "learning_rate": 0.0006783151914434318, "loss": 0.876, "step": 4814 }, { "epoch": 0.6162805580442852, "grad_norm": 0.62109375, "learning_rate": 0.0006779226871653195, "loss": 1.4385, "step": 4815 }, { "epoch": 0.6164085498528095, "grad_norm": 0.56640625, "learning_rate": 0.0006775302382414194, "loss": 1.2716, "step": 4816 }, { "epoch": 0.6165365416613336, "grad_norm": 0.77734375, "learning_rate": 0.0006771378447391798, "loss": 1.6251, "step": 4817 }, { "epoch": 0.616664533469858, "grad_norm": 0.58984375, "learning_rate": 0.0006767455067260403, "loss": 1.2591, "step": 4818 }, { "epoch": 0.6167925252783821, "grad_norm": 0.625, "learning_rate": 0.0006763532242694298, "loss": 1.1345, "step": 4819 }, { "epoch": 0.6169205170869064, "grad_norm": 0.640625, "learning_rate": 0.0006759609974367689, "loss": 1.119, "step": 4820 }, { "epoch": 0.6170485088954307, "grad_norm": 0.5703125, "learning_rate": 0.0006755688262954677, "loss": 0.8338, "step": 4821 }, { "epoch": 0.6171765007039549, "grad_norm": 0.62109375, "learning_rate": 0.0006751767109129278, "loss": 1.5406, "step": 4822 }, { "epoch": 0.6173044925124792, "grad_norm": 0.73046875, "learning_rate": 0.0006747846513565406, "loss": 1.8614, "step": 4823 }, { "epoch": 0.6174324843210035, "grad_norm": 0.80859375, "learning_rate": 0.0006743926476936873, "loss": 1.2091, "step": 4824 }, { "epoch": 0.6175604761295277, "grad_norm": 0.6328125, "learning_rate": 0.0006740006999917405, "loss": 1.6475, "step": 4825 }, { "epoch": 0.617688467938052, "grad_norm": 0.72265625, "learning_rate": 0.0006736088083180625, "loss": 1.2553, "step": 4826 }, { "epoch": 0.6178164597465762, "grad_norm": 0.625, "learning_rate": 0.0006732169727400067, "loss": 0.8051, "step": 4827 }, { "epoch": 0.6179444515551005, "grad_norm": 0.6875, "learning_rate": 0.0006728251933249159, "loss": 1.1643, "step": 4828 }, { "epoch": 0.6180724433636248, "grad_norm": 0.59375, "learning_rate": 0.0006724334701401239, "loss": 1.4546, "step": 4829 }, { "epoch": 0.618200435172149, "grad_norm": 0.703125, "learning_rate": 0.0006720418032529547, "loss": 1.1787, "step": 4830 }, { "epoch": 0.6183284269806733, "grad_norm": 0.546875, "learning_rate": 0.0006716501927307231, "loss": 1.223, "step": 4831 }, { "epoch": 0.6184564187891974, "grad_norm": 0.60546875, "learning_rate": 0.0006712586386407328, "loss": 1.5472, "step": 4832 }, { "epoch": 0.6185844105977217, "grad_norm": 0.703125, "learning_rate": 0.0006708671410502793, "loss": 1.8289, "step": 4833 }, { "epoch": 0.618712402406246, "grad_norm": 0.7109375, "learning_rate": 0.0006704757000266477, "loss": 1.372, "step": 4834 }, { "epoch": 0.6188403942147702, "grad_norm": 0.58203125, "learning_rate": 0.0006700843156371136, "loss": 1.2011, "step": 4835 }, { "epoch": 0.6189683860232945, "grad_norm": 0.5546875, "learning_rate": 0.000669692987948943, "loss": 0.9845, "step": 4836 }, { "epoch": 0.6190963778318188, "grad_norm": 0.72265625, "learning_rate": 0.0006693017170293909, "loss": 1.6607, "step": 4837 }, { "epoch": 0.619224369640343, "grad_norm": 0.703125, "learning_rate": 0.0006689105029457045, "loss": 1.7245, "step": 4838 }, { "epoch": 0.6193523614488673, "grad_norm": 0.9140625, "learning_rate": 0.00066851934576512, "loss": 1.5095, "step": 4839 }, { "epoch": 0.6194803532573915, "grad_norm": 0.75390625, "learning_rate": 0.0006681282455548645, "loss": 1.9428, "step": 4840 }, { "epoch": 0.6196083450659158, "grad_norm": 0.7734375, "learning_rate": 0.0006677372023821542, "loss": 1.1136, "step": 4841 }, { "epoch": 0.6197363368744401, "grad_norm": 0.7421875, "learning_rate": 0.0006673462163141966, "loss": 1.6873, "step": 4842 }, { "epoch": 0.6198643286829643, "grad_norm": 0.71484375, "learning_rate": 0.0006669552874181888, "loss": 1.5206, "step": 4843 }, { "epoch": 0.6199923204914886, "grad_norm": 0.58203125, "learning_rate": 0.0006665644157613182, "loss": 0.9308, "step": 4844 }, { "epoch": 0.6201203123000127, "grad_norm": 0.80078125, "learning_rate": 0.0006661736014107635, "loss": 1.5269, "step": 4845 }, { "epoch": 0.620248304108537, "grad_norm": 1.1171875, "learning_rate": 0.0006657828444336911, "loss": 1.4726, "step": 4846 }, { "epoch": 0.6203762959170613, "grad_norm": 1.078125, "learning_rate": 0.0006653921448972594, "loss": 2.1705, "step": 4847 }, { "epoch": 0.6205042877255855, "grad_norm": 0.60546875, "learning_rate": 0.0006650015028686167, "loss": 1.2939, "step": 4848 }, { "epoch": 0.6206322795341098, "grad_norm": 0.609375, "learning_rate": 0.000664610918414901, "loss": 1.4533, "step": 4849 }, { "epoch": 0.6207602713426341, "grad_norm": 0.59375, "learning_rate": 0.0006642203916032406, "loss": 1.0645, "step": 4850 }, { "epoch": 0.6208882631511583, "grad_norm": 0.703125, "learning_rate": 0.0006638299225007537, "loss": 1.2943, "step": 4851 }, { "epoch": 0.6210162549596826, "grad_norm": 0.703125, "learning_rate": 0.0006634395111745489, "loss": 1.2249, "step": 4852 }, { "epoch": 0.6211442467682068, "grad_norm": 0.71484375, "learning_rate": 0.000663049157691725, "loss": 1.403, "step": 4853 }, { "epoch": 0.6212722385767311, "grad_norm": 0.6171875, "learning_rate": 0.0006626588621193701, "loss": 1.4503, "step": 4854 }, { "epoch": 0.6214002303852554, "grad_norm": 0.546875, "learning_rate": 0.000662268624524563, "loss": 0.8254, "step": 4855 }, { "epoch": 0.6215282221937796, "grad_norm": 0.60546875, "learning_rate": 0.0006618784449743722, "loss": 1.1634, "step": 4856 }, { "epoch": 0.6216562140023039, "grad_norm": 0.609375, "learning_rate": 0.0006614883235358569, "loss": 1.0928, "step": 4857 }, { "epoch": 0.6217842058108282, "grad_norm": 0.796875, "learning_rate": 0.0006610982602760657, "loss": 1.5334, "step": 4858 }, { "epoch": 0.6219121976193523, "grad_norm": 0.66015625, "learning_rate": 0.0006607082552620367, "loss": 1.4291, "step": 4859 }, { "epoch": 0.6220401894278766, "grad_norm": 0.57421875, "learning_rate": 0.0006603183085607987, "loss": 1.1244, "step": 4860 }, { "epoch": 0.6221681812364008, "grad_norm": 0.578125, "learning_rate": 0.0006599284202393708, "loss": 1.0621, "step": 4861 }, { "epoch": 0.6222961730449251, "grad_norm": 0.61328125, "learning_rate": 0.0006595385903647619, "loss": 0.977, "step": 4862 }, { "epoch": 0.6224241648534494, "grad_norm": 0.67578125, "learning_rate": 0.0006591488190039698, "loss": 1.1656, "step": 4863 }, { "epoch": 0.6225521566619736, "grad_norm": 0.6015625, "learning_rate": 0.0006587591062239834, "loss": 0.8212, "step": 4864 }, { "epoch": 0.6226801484704979, "grad_norm": 0.89453125, "learning_rate": 0.0006583694520917811, "loss": 2.024, "step": 4865 }, { "epoch": 0.6228081402790221, "grad_norm": 0.609375, "learning_rate": 0.0006579798566743314, "loss": 0.8808, "step": 4866 }, { "epoch": 0.6229361320875464, "grad_norm": 0.6015625, "learning_rate": 0.0006575903200385926, "loss": 1.2533, "step": 4867 }, { "epoch": 0.6230641238960707, "grad_norm": 0.57421875, "learning_rate": 0.0006572008422515127, "loss": 1.1203, "step": 4868 }, { "epoch": 0.6231921157045949, "grad_norm": 0.67578125, "learning_rate": 0.0006568114233800298, "loss": 1.3658, "step": 4869 }, { "epoch": 0.6233201075131192, "grad_norm": 0.734375, "learning_rate": 0.000656422063491072, "loss": 1.8813, "step": 4870 }, { "epoch": 0.6234480993216435, "grad_norm": 0.66796875, "learning_rate": 0.0006560327626515573, "loss": 1.0848, "step": 4871 }, { "epoch": 0.6235760911301677, "grad_norm": 0.58203125, "learning_rate": 0.0006556435209283929, "loss": 0.9864, "step": 4872 }, { "epoch": 0.623704082938692, "grad_norm": 0.5703125, "learning_rate": 0.0006552543383884765, "loss": 0.8598, "step": 4873 }, { "epoch": 0.6238320747472161, "grad_norm": 0.703125, "learning_rate": 0.0006548652150986955, "loss": 1.3318, "step": 4874 }, { "epoch": 0.6239600665557404, "grad_norm": 0.765625, "learning_rate": 0.000654476151125927, "loss": 1.0499, "step": 4875 }, { "epoch": 0.6240880583642647, "grad_norm": 0.703125, "learning_rate": 0.0006540871465370382, "loss": 1.574, "step": 4876 }, { "epoch": 0.6242160501727889, "grad_norm": 0.6171875, "learning_rate": 0.0006536982013988853, "loss": 1.0239, "step": 4877 }, { "epoch": 0.6243440419813132, "grad_norm": 0.78125, "learning_rate": 0.0006533093157783152, "loss": 1.2115, "step": 4878 }, { "epoch": 0.6244720337898374, "grad_norm": 0.578125, "learning_rate": 0.0006529204897421643, "loss": 1.1829, "step": 4879 }, { "epoch": 0.6246000255983617, "grad_norm": 0.7578125, "learning_rate": 0.0006525317233572588, "loss": 1.714, "step": 4880 }, { "epoch": 0.624728017406886, "grad_norm": 0.53515625, "learning_rate": 0.0006521430166904139, "loss": 1.2249, "step": 4881 }, { "epoch": 0.6248560092154102, "grad_norm": 0.69140625, "learning_rate": 0.0006517543698084351, "loss": 1.1305, "step": 4882 }, { "epoch": 0.6249840010239345, "grad_norm": 0.8359375, "learning_rate": 0.0006513657827781185, "loss": 1.7397, "step": 4883 }, { "epoch": 0.6251119928324588, "grad_norm": 0.7265625, "learning_rate": 0.0006509772556662488, "loss": 1.5089, "step": 4884 }, { "epoch": 0.625239984640983, "grad_norm": 0.63671875, "learning_rate": 0.0006505887885396003, "loss": 1.1685, "step": 4885 }, { "epoch": 0.6253679764495073, "grad_norm": 0.671875, "learning_rate": 0.0006502003814649375, "loss": 1.1113, "step": 4886 }, { "epoch": 0.6254959682580314, "grad_norm": 0.71484375, "learning_rate": 0.0006498120345090146, "loss": 1.6866, "step": 4887 }, { "epoch": 0.6256239600665557, "grad_norm": 0.5546875, "learning_rate": 0.0006494237477385753, "loss": 1.1366, "step": 4888 }, { "epoch": 0.62575195187508, "grad_norm": 0.83984375, "learning_rate": 0.0006490355212203532, "loss": 1.6263, "step": 4889 }, { "epoch": 0.6258799436836042, "grad_norm": 0.5546875, "learning_rate": 0.0006486473550210707, "loss": 0.8933, "step": 4890 }, { "epoch": 0.6260079354921285, "grad_norm": 0.6171875, "learning_rate": 0.000648259249207441, "loss": 1.4276, "step": 4891 }, { "epoch": 0.6261359273006527, "grad_norm": 0.71484375, "learning_rate": 0.0006478712038461662, "loss": 1.1693, "step": 4892 }, { "epoch": 0.626263919109177, "grad_norm": 0.734375, "learning_rate": 0.0006474832190039386, "loss": 1.3703, "step": 4893 }, { "epoch": 0.6263919109177013, "grad_norm": 0.609375, "learning_rate": 0.0006470952947474389, "loss": 1.4274, "step": 4894 }, { "epoch": 0.6265199027262255, "grad_norm": 0.51171875, "learning_rate": 0.0006467074311433385, "loss": 0.6944, "step": 4895 }, { "epoch": 0.6266478945347498, "grad_norm": 0.6484375, "learning_rate": 0.0006463196282582984, "loss": 1.5647, "step": 4896 }, { "epoch": 0.6267758863432741, "grad_norm": 0.63671875, "learning_rate": 0.0006459318861589684, "loss": 1.2763, "step": 4897 }, { "epoch": 0.6269038781517983, "grad_norm": 0.66015625, "learning_rate": 0.0006455442049119887, "loss": 1.4294, "step": 4898 }, { "epoch": 0.6270318699603226, "grad_norm": 0.5546875, "learning_rate": 0.0006451565845839882, "loss": 1.075, "step": 4899 }, { "epoch": 0.6271598617688467, "grad_norm": 0.58203125, "learning_rate": 0.0006447690252415858, "loss": 1.3863, "step": 4900 }, { "epoch": 0.627287853577371, "grad_norm": 0.55859375, "learning_rate": 0.0006443815269513899, "loss": 1.0408, "step": 4901 }, { "epoch": 0.6274158453858953, "grad_norm": 0.76953125, "learning_rate": 0.0006439940897799989, "loss": 1.0629, "step": 4902 }, { "epoch": 0.6275438371944195, "grad_norm": 0.69140625, "learning_rate": 0.0006436067137939994, "loss": 1.6001, "step": 4903 }, { "epoch": 0.6276718290029438, "grad_norm": 0.59765625, "learning_rate": 0.0006432193990599684, "loss": 1.3539, "step": 4904 }, { "epoch": 0.627799820811468, "grad_norm": 0.71875, "learning_rate": 0.0006428321456444722, "loss": 1.9321, "step": 4905 }, { "epoch": 0.6279278126199923, "grad_norm": 0.66796875, "learning_rate": 0.0006424449536140668, "loss": 1.6035, "step": 4906 }, { "epoch": 0.6280558044285166, "grad_norm": 0.76171875, "learning_rate": 0.0006420578230352978, "loss": 1.146, "step": 4907 }, { "epoch": 0.6281837962370408, "grad_norm": 0.6640625, "learning_rate": 0.0006416707539746991, "loss": 1.479, "step": 4908 }, { "epoch": 0.6283117880455651, "grad_norm": 0.69921875, "learning_rate": 0.000641283746498795, "loss": 1.6063, "step": 4909 }, { "epoch": 0.6284397798540894, "grad_norm": 0.84765625, "learning_rate": 0.0006408968006740994, "loss": 1.6608, "step": 4910 }, { "epoch": 0.6285677716626136, "grad_norm": 0.6875, "learning_rate": 0.000640509916567115, "loss": 1.7442, "step": 4911 }, { "epoch": 0.6286957634711379, "grad_norm": 0.57421875, "learning_rate": 0.0006401230942443339, "loss": 1.0778, "step": 4912 }, { "epoch": 0.6288237552796621, "grad_norm": 0.65234375, "learning_rate": 0.0006397363337722379, "loss": 1.3228, "step": 4913 }, { "epoch": 0.6289517470881864, "grad_norm": 0.81640625, "learning_rate": 0.0006393496352172982, "loss": 1.7837, "step": 4914 }, { "epoch": 0.6290797388967106, "grad_norm": 0.73828125, "learning_rate": 0.0006389629986459756, "loss": 1.001, "step": 4915 }, { "epoch": 0.6292077307052348, "grad_norm": 0.5390625, "learning_rate": 0.0006385764241247189, "loss": 0.7678, "step": 4916 }, { "epoch": 0.6293357225137591, "grad_norm": 0.70703125, "learning_rate": 0.0006381899117199678, "loss": 1.1762, "step": 4917 }, { "epoch": 0.6294637143222833, "grad_norm": 0.5859375, "learning_rate": 0.0006378034614981507, "loss": 1.016, "step": 4918 }, { "epoch": 0.6295917061308076, "grad_norm": 0.515625, "learning_rate": 0.0006374170735256853, "loss": 0.787, "step": 4919 }, { "epoch": 0.6297196979393319, "grad_norm": 0.59375, "learning_rate": 0.000637030747868979, "loss": 1.2652, "step": 4920 }, { "epoch": 0.6298476897478561, "grad_norm": 0.55078125, "learning_rate": 0.000636644484594427, "loss": 0.8987, "step": 4921 }, { "epoch": 0.6299756815563804, "grad_norm": 0.63671875, "learning_rate": 0.0006362582837684161, "loss": 1.0087, "step": 4922 }, { "epoch": 0.6301036733649047, "grad_norm": 0.609375, "learning_rate": 0.0006358721454573207, "loss": 1.07, "step": 4923 }, { "epoch": 0.6302316651734289, "grad_norm": 0.734375, "learning_rate": 0.0006354860697275053, "loss": 1.6581, "step": 4924 }, { "epoch": 0.6303596569819532, "grad_norm": 0.5546875, "learning_rate": 0.0006351000566453226, "loss": 1.2312, "step": 4925 }, { "epoch": 0.6304876487904774, "grad_norm": 0.50390625, "learning_rate": 0.0006347141062771158, "loss": 0.9688, "step": 4926 }, { "epoch": 0.6306156405990017, "grad_norm": 0.62890625, "learning_rate": 0.0006343282186892163, "loss": 1.2568, "step": 4927 }, { "epoch": 0.630743632407526, "grad_norm": 0.59375, "learning_rate": 0.0006339423939479452, "loss": 0.97, "step": 4928 }, { "epoch": 0.6308716242160501, "grad_norm": 0.6171875, "learning_rate": 0.0006335566321196135, "loss": 1.2551, "step": 4929 }, { "epoch": 0.6309996160245744, "grad_norm": 0.828125, "learning_rate": 0.0006331709332705195, "loss": 1.8014, "step": 4930 }, { "epoch": 0.6311276078330987, "grad_norm": 0.55078125, "learning_rate": 0.0006327852974669526, "loss": 0.8369, "step": 4931 }, { "epoch": 0.6312555996416229, "grad_norm": 0.52734375, "learning_rate": 0.00063239972477519, "loss": 0.8759, "step": 4932 }, { "epoch": 0.6313835914501472, "grad_norm": 0.59765625, "learning_rate": 0.0006320142152614994, "loss": 1.0278, "step": 4933 }, { "epoch": 0.6315115832586714, "grad_norm": 0.79296875, "learning_rate": 0.0006316287689921361, "loss": 1.5576, "step": 4934 }, { "epoch": 0.6316395750671957, "grad_norm": 0.6171875, "learning_rate": 0.0006312433860333456, "loss": 1.1302, "step": 4935 }, { "epoch": 0.63176756687572, "grad_norm": 0.66015625, "learning_rate": 0.000630858066451362, "loss": 1.0118, "step": 4936 }, { "epoch": 0.6318955586842442, "grad_norm": 0.7109375, "learning_rate": 0.0006304728103124094, "loss": 1.3935, "step": 4937 }, { "epoch": 0.6320235504927685, "grad_norm": 0.73828125, "learning_rate": 0.0006300876176826992, "loss": 1.6025, "step": 4938 }, { "epoch": 0.6321515423012927, "grad_norm": 0.57421875, "learning_rate": 0.0006297024886284342, "loss": 0.9382, "step": 4939 }, { "epoch": 0.632279534109817, "grad_norm": 0.6796875, "learning_rate": 0.0006293174232158041, "loss": 1.5135, "step": 4940 }, { "epoch": 0.6324075259183413, "grad_norm": 0.71484375, "learning_rate": 0.0006289324215109893, "loss": 1.7593, "step": 4941 }, { "epoch": 0.6325355177268654, "grad_norm": 0.72265625, "learning_rate": 0.0006285474835801586, "loss": 1.3229, "step": 4942 }, { "epoch": 0.6326635095353897, "grad_norm": 0.59375, "learning_rate": 0.000628162609489469, "loss": 1.0865, "step": 4943 }, { "epoch": 0.632791501343914, "grad_norm": 0.62109375, "learning_rate": 0.0006277777993050681, "loss": 1.1507, "step": 4944 }, { "epoch": 0.6329194931524382, "grad_norm": 0.58984375, "learning_rate": 0.0006273930530930918, "loss": 0.7403, "step": 4945 }, { "epoch": 0.6330474849609625, "grad_norm": 0.76171875, "learning_rate": 0.000627008370919665, "loss": 1.3479, "step": 4946 }, { "epoch": 0.6331754767694867, "grad_norm": 0.67578125, "learning_rate": 0.000626623752850901, "loss": 1.1847, "step": 4947 }, { "epoch": 0.633303468578011, "grad_norm": 0.75, "learning_rate": 0.0006262391989529031, "loss": 1.3102, "step": 4948 }, { "epoch": 0.6334314603865353, "grad_norm": 0.6796875, "learning_rate": 0.0006258547092917628, "loss": 1.1099, "step": 4949 }, { "epoch": 0.6335594521950595, "grad_norm": 0.640625, "learning_rate": 0.0006254702839335611, "loss": 1.0875, "step": 4950 }, { "epoch": 0.6336874440035838, "grad_norm": 0.7578125, "learning_rate": 0.0006250859229443684, "loss": 1.5115, "step": 4951 }, { "epoch": 0.633815435812108, "grad_norm": 0.7109375, "learning_rate": 0.0006247016263902423, "loss": 1.523, "step": 4952 }, { "epoch": 0.6339434276206323, "grad_norm": 0.84375, "learning_rate": 0.0006243173943372307, "loss": 0.9869, "step": 4953 }, { "epoch": 0.6340714194291566, "grad_norm": 0.88671875, "learning_rate": 0.0006239332268513704, "loss": 1.9566, "step": 4954 }, { "epoch": 0.6341994112376808, "grad_norm": 0.58984375, "learning_rate": 0.0006235491239986868, "loss": 1.5691, "step": 4955 }, { "epoch": 0.634327403046205, "grad_norm": 0.66015625, "learning_rate": 0.0006231650858451939, "loss": 1.2274, "step": 4956 }, { "epoch": 0.6344553948547293, "grad_norm": 0.61328125, "learning_rate": 0.0006227811124568949, "loss": 1.1999, "step": 4957 }, { "epoch": 0.6345833866632535, "grad_norm": 0.7578125, "learning_rate": 0.0006223972038997823, "loss": 1.3626, "step": 4958 }, { "epoch": 0.6347113784717778, "grad_norm": 0.7734375, "learning_rate": 0.0006220133602398365, "loss": 1.5445, "step": 4959 }, { "epoch": 0.634839370280302, "grad_norm": 0.61328125, "learning_rate": 0.0006216295815430278, "loss": 1.6048, "step": 4960 }, { "epoch": 0.6349673620888263, "grad_norm": 0.69921875, "learning_rate": 0.0006212458678753141, "loss": 1.5734, "step": 4961 }, { "epoch": 0.6350953538973506, "grad_norm": 0.61328125, "learning_rate": 0.0006208622193026434, "loss": 1.3465, "step": 4962 }, { "epoch": 0.6352233457058748, "grad_norm": 0.734375, "learning_rate": 0.0006204786358909516, "loss": 1.2818, "step": 4963 }, { "epoch": 0.6353513375143991, "grad_norm": 0.73828125, "learning_rate": 0.0006200951177061642, "loss": 1.1779, "step": 4964 }, { "epoch": 0.6354793293229233, "grad_norm": 0.59375, "learning_rate": 0.0006197116648141945, "loss": 1.5599, "step": 4965 }, { "epoch": 0.6356073211314476, "grad_norm": 0.69140625, "learning_rate": 0.000619328277280945, "loss": 1.5154, "step": 4966 }, { "epoch": 0.6357353129399719, "grad_norm": 0.6875, "learning_rate": 0.0006189449551723075, "loss": 1.5694, "step": 4967 }, { "epoch": 0.6358633047484961, "grad_norm": 0.5859375, "learning_rate": 0.0006185616985541624, "loss": 1.2526, "step": 4968 }, { "epoch": 0.6359912965570204, "grad_norm": 0.5546875, "learning_rate": 0.0006181785074923778, "loss": 0.8735, "step": 4969 }, { "epoch": 0.6361192883655447, "grad_norm": 0.70703125, "learning_rate": 0.0006177953820528117, "loss": 1.7162, "step": 4970 }, { "epoch": 0.6362472801740688, "grad_norm": 0.58203125, "learning_rate": 0.0006174123223013102, "loss": 0.9307, "step": 4971 }, { "epoch": 0.6363752719825931, "grad_norm": 1.03125, "learning_rate": 0.0006170293283037087, "loss": 1.7504, "step": 4972 }, { "epoch": 0.6365032637911173, "grad_norm": 0.66015625, "learning_rate": 0.0006166464001258307, "loss": 1.3911, "step": 4973 }, { "epoch": 0.6366312555996416, "grad_norm": 0.58984375, "learning_rate": 0.0006162635378334886, "loss": 0.9654, "step": 4974 }, { "epoch": 0.6367592474081659, "grad_norm": 0.6875, "learning_rate": 0.0006158807414924836, "loss": 1.3407, "step": 4975 }, { "epoch": 0.6368872392166901, "grad_norm": 0.578125, "learning_rate": 0.0006154980111686053, "loss": 1.2524, "step": 4976 }, { "epoch": 0.6370152310252144, "grad_norm": 0.68359375, "learning_rate": 0.0006151153469276326, "loss": 1.9194, "step": 4977 }, { "epoch": 0.6371432228337386, "grad_norm": 0.67578125, "learning_rate": 0.0006147327488353318, "loss": 1.3834, "step": 4978 }, { "epoch": 0.6372712146422629, "grad_norm": 0.6796875, "learning_rate": 0.0006143502169574591, "loss": 1.1503, "step": 4979 }, { "epoch": 0.6373992064507872, "grad_norm": 0.83203125, "learning_rate": 0.0006139677513597587, "loss": 1.4305, "step": 4980 }, { "epoch": 0.6375271982593114, "grad_norm": 0.78125, "learning_rate": 0.0006135853521079635, "loss": 2.6308, "step": 4981 }, { "epoch": 0.6376551900678357, "grad_norm": 0.7734375, "learning_rate": 0.0006132030192677952, "loss": 1.7531, "step": 4982 }, { "epoch": 0.63778318187636, "grad_norm": 0.69140625, "learning_rate": 0.0006128207529049634, "loss": 1.7162, "step": 4983 }, { "epoch": 0.6379111736848841, "grad_norm": 0.64453125, "learning_rate": 0.0006124385530851673, "loss": 2.0491, "step": 4984 }, { "epoch": 0.6380391654934084, "grad_norm": 0.75390625, "learning_rate": 0.0006120564198740939, "loss": 1.4225, "step": 4985 }, { "epoch": 0.6381671573019326, "grad_norm": 0.60546875, "learning_rate": 0.0006116743533374194, "loss": 1.1854, "step": 4986 }, { "epoch": 0.6382951491104569, "grad_norm": 0.63671875, "learning_rate": 0.0006112923535408073, "loss": 1.09, "step": 4987 }, { "epoch": 0.6384231409189812, "grad_norm": 0.7890625, "learning_rate": 0.0006109104205499111, "loss": 1.4868, "step": 4988 }, { "epoch": 0.6385511327275054, "grad_norm": 0.7265625, "learning_rate": 0.0006105285544303717, "loss": 1.78, "step": 4989 }, { "epoch": 0.6386791245360297, "grad_norm": 0.7578125, "learning_rate": 0.0006101467552478195, "loss": 1.5208, "step": 4990 }, { "epoch": 0.6388071163445539, "grad_norm": 0.6015625, "learning_rate": 0.0006097650230678731, "loss": 0.8292, "step": 4991 }, { "epoch": 0.6389351081530782, "grad_norm": 0.53515625, "learning_rate": 0.0006093833579561385, "loss": 1.2322, "step": 4992 }, { "epoch": 0.6390630999616025, "grad_norm": 0.58984375, "learning_rate": 0.0006090017599782116, "loss": 1.713, "step": 4993 }, { "epoch": 0.6391910917701267, "grad_norm": 0.59375, "learning_rate": 0.0006086202291996761, "loss": 1.4056, "step": 4994 }, { "epoch": 0.639319083578651, "grad_norm": 0.6171875, "learning_rate": 0.0006082387656861044, "loss": 1.4612, "step": 4995 }, { "epoch": 0.6394470753871753, "grad_norm": 0.7578125, "learning_rate": 0.0006078573695030568, "loss": 1.0407, "step": 4996 }, { "epoch": 0.6395750671956995, "grad_norm": 0.734375, "learning_rate": 0.0006074760407160828, "loss": 1.0215, "step": 4997 }, { "epoch": 0.6397030590042238, "grad_norm": 0.7421875, "learning_rate": 0.0006070947793907197, "loss": 1.4747, "step": 4998 }, { "epoch": 0.6398310508127479, "grad_norm": 0.82421875, "learning_rate": 0.0006067135855924937, "loss": 1.282, "step": 4999 }, { "epoch": 0.6399590426212722, "grad_norm": 0.80078125, "learning_rate": 0.0006063324593869189, "loss": 1.0313, "step": 5000 }, { "epoch": 0.6400870344297965, "grad_norm": 0.66796875, "learning_rate": 0.0006059514008394982, "loss": 1.2856, "step": 5001 }, { "epoch": 0.6402150262383207, "grad_norm": 0.63671875, "learning_rate": 0.0006055704100157225, "loss": 1.4022, "step": 5002 }, { "epoch": 0.640343018046845, "grad_norm": 0.60546875, "learning_rate": 0.0006051894869810713, "loss": 1.2325, "step": 5003 }, { "epoch": 0.6404710098553693, "grad_norm": 0.62890625, "learning_rate": 0.0006048086318010127, "loss": 0.7098, "step": 5004 }, { "epoch": 0.6405990016638935, "grad_norm": 0.75390625, "learning_rate": 0.0006044278445410025, "loss": 1.8001, "step": 5005 }, { "epoch": 0.6407269934724178, "grad_norm": 0.58203125, "learning_rate": 0.0006040471252664851, "loss": 1.133, "step": 5006 }, { "epoch": 0.640854985280942, "grad_norm": 0.65234375, "learning_rate": 0.0006036664740428937, "loss": 1.4818, "step": 5007 }, { "epoch": 0.6409829770894663, "grad_norm": 0.69140625, "learning_rate": 0.0006032858909356494, "loss": 1.7535, "step": 5008 }, { "epoch": 0.6411109688979906, "grad_norm": 0.6328125, "learning_rate": 0.0006029053760101611, "loss": 1.5021, "step": 5009 }, { "epoch": 0.6412389607065148, "grad_norm": 0.6328125, "learning_rate": 0.0006025249293318265, "loss": 1.033, "step": 5010 }, { "epoch": 0.6413669525150391, "grad_norm": 0.62890625, "learning_rate": 0.0006021445509660315, "loss": 1.0947, "step": 5011 }, { "epoch": 0.6414949443235632, "grad_norm": 0.64453125, "learning_rate": 0.0006017642409781508, "loss": 1.3554, "step": 5012 }, { "epoch": 0.6416229361320875, "grad_norm": 0.6328125, "learning_rate": 0.000601383999433547, "loss": 1.2178, "step": 5013 }, { "epoch": 0.6417509279406118, "grad_norm": 0.671875, "learning_rate": 0.00060100382639757, "loss": 1.8817, "step": 5014 }, { "epoch": 0.641878919749136, "grad_norm": 0.71875, "learning_rate": 0.0006006237219355591, "loss": 1.2399, "step": 5015 }, { "epoch": 0.6420069115576603, "grad_norm": 0.6328125, "learning_rate": 0.0006002436861128411, "loss": 1.2032, "step": 5016 }, { "epoch": 0.6421349033661846, "grad_norm": 0.64453125, "learning_rate": 0.000599863718994732, "loss": 1.0483, "step": 5017 }, { "epoch": 0.6422628951747088, "grad_norm": 0.671875, "learning_rate": 0.0005994838206465346, "loss": 1.7032, "step": 5018 }, { "epoch": 0.6423908869832331, "grad_norm": 0.54296875, "learning_rate": 0.0005991039911335409, "loss": 0.7853, "step": 5019 }, { "epoch": 0.6425188787917573, "grad_norm": 0.640625, "learning_rate": 0.000598724230521031, "loss": 1.3414, "step": 5020 }, { "epoch": 0.6426468706002816, "grad_norm": 0.65625, "learning_rate": 0.0005983445388742726, "loss": 1.7385, "step": 5021 }, { "epoch": 0.6427748624088059, "grad_norm": 0.64453125, "learning_rate": 0.0005979649162585219, "loss": 1.6314, "step": 5022 }, { "epoch": 0.6429028542173301, "grad_norm": 0.63671875, "learning_rate": 0.0005975853627390232, "loss": 1.4017, "step": 5023 }, { "epoch": 0.6430308460258544, "grad_norm": 0.609375, "learning_rate": 0.000597205878381009, "loss": 1.2564, "step": 5024 }, { "epoch": 0.6431588378343785, "grad_norm": 0.765625, "learning_rate": 0.0005968264632496998, "loss": 0.9936, "step": 5025 }, { "epoch": 0.6432868296429028, "grad_norm": 0.63671875, "learning_rate": 0.0005964471174103047, "loss": 1.5215, "step": 5026 }, { "epoch": 0.6434148214514271, "grad_norm": 0.58984375, "learning_rate": 0.0005960678409280195, "loss": 1.2219, "step": 5027 }, { "epoch": 0.6435428132599513, "grad_norm": 0.7578125, "learning_rate": 0.0005956886338680297, "loss": 1.0766, "step": 5028 }, { "epoch": 0.6436708050684756, "grad_norm": 0.8046875, "learning_rate": 0.0005953094962955081, "loss": 1.4005, "step": 5029 }, { "epoch": 0.6437987968769999, "grad_norm": 0.62109375, "learning_rate": 0.0005949304282756158, "loss": 1.2517, "step": 5030 }, { "epoch": 0.6439267886855241, "grad_norm": 0.62890625, "learning_rate": 0.0005945514298735013, "loss": 1.494, "step": 5031 }, { "epoch": 0.6440547804940484, "grad_norm": 0.62109375, "learning_rate": 0.0005941725011543018, "loss": 1.2447, "step": 5032 }, { "epoch": 0.6441827723025726, "grad_norm": 0.62109375, "learning_rate": 0.0005937936421831425, "loss": 1.1716, "step": 5033 }, { "epoch": 0.6443107641110969, "grad_norm": 0.59765625, "learning_rate": 0.0005934148530251362, "loss": 1.0756, "step": 5034 }, { "epoch": 0.6444387559196212, "grad_norm": 0.671875, "learning_rate": 0.0005930361337453847, "loss": 1.6194, "step": 5035 }, { "epoch": 0.6445667477281454, "grad_norm": 0.546875, "learning_rate": 0.000592657484408976, "loss": 1.1681, "step": 5036 }, { "epoch": 0.6446947395366697, "grad_norm": 0.58203125, "learning_rate": 0.0005922789050809878, "loss": 0.8457, "step": 5037 }, { "epoch": 0.6448227313451939, "grad_norm": 0.64453125, "learning_rate": 0.0005919003958264848, "loss": 1.2229, "step": 5038 }, { "epoch": 0.6449507231537182, "grad_norm": 0.6953125, "learning_rate": 0.0005915219567105205, "loss": 1.5029, "step": 5039 }, { "epoch": 0.6450787149622424, "grad_norm": 0.6640625, "learning_rate": 0.0005911435877981351, "loss": 1.2128, "step": 5040 }, { "epoch": 0.6452067067707666, "grad_norm": 0.75390625, "learning_rate": 0.0005907652891543576, "loss": 1.0483, "step": 5041 }, { "epoch": 0.6453346985792909, "grad_norm": 0.58203125, "learning_rate": 0.000590387060844205, "loss": 1.0554, "step": 5042 }, { "epoch": 0.6454626903878152, "grad_norm": 0.73828125, "learning_rate": 0.0005900089029326818, "loss": 1.4797, "step": 5043 }, { "epoch": 0.6455906821963394, "grad_norm": 0.6484375, "learning_rate": 0.000589630815484781, "loss": 1.5789, "step": 5044 }, { "epoch": 0.6457186740048637, "grad_norm": 0.609375, "learning_rate": 0.0005892527985654824, "loss": 1.0829, "step": 5045 }, { "epoch": 0.6458466658133879, "grad_norm": 0.7109375, "learning_rate": 0.0005888748522397547, "loss": 1.3243, "step": 5046 }, { "epoch": 0.6459746576219122, "grad_norm": 0.65234375, "learning_rate": 0.0005884969765725539, "loss": 1.1866, "step": 5047 }, { "epoch": 0.6461026494304365, "grad_norm": 0.71875, "learning_rate": 0.0005881191716288248, "loss": 1.751, "step": 5048 }, { "epoch": 0.6462306412389607, "grad_norm": 0.57421875, "learning_rate": 0.0005877414374734982, "loss": 1.1301, "step": 5049 }, { "epoch": 0.646358633047485, "grad_norm": 0.7265625, "learning_rate": 0.0005873637741714941, "loss": 1.8711, "step": 5050 }, { "epoch": 0.6464866248560092, "grad_norm": 0.51171875, "learning_rate": 0.0005869861817877207, "loss": 0.601, "step": 5051 }, { "epoch": 0.6466146166645335, "grad_norm": 0.546875, "learning_rate": 0.0005866086603870733, "loss": 1.0536, "step": 5052 }, { "epoch": 0.6467426084730578, "grad_norm": 0.66796875, "learning_rate": 0.0005862312100344345, "loss": 1.0077, "step": 5053 }, { "epoch": 0.6468706002815819, "grad_norm": 0.6953125, "learning_rate": 0.0005858538307946755, "loss": 0.9364, "step": 5054 }, { "epoch": 0.6469985920901062, "grad_norm": 0.7109375, "learning_rate": 0.000585476522732655, "loss": 1.417, "step": 5055 }, { "epoch": 0.6471265838986305, "grad_norm": 0.77734375, "learning_rate": 0.0005850992859132197, "loss": 1.094, "step": 5056 }, { "epoch": 0.6472545757071547, "grad_norm": 0.60546875, "learning_rate": 0.000584722120401204, "loss": 1.1563, "step": 5057 }, { "epoch": 0.647382567515679, "grad_norm": 0.7265625, "learning_rate": 0.0005843450262614293, "loss": 1.5279, "step": 5058 }, { "epoch": 0.6475105593242032, "grad_norm": 0.6484375, "learning_rate": 0.0005839680035587061, "loss": 1.5112, "step": 5059 }, { "epoch": 0.6476385511327275, "grad_norm": 0.72265625, "learning_rate": 0.000583591052357831, "loss": 1.487, "step": 5060 }, { "epoch": 0.6477665429412518, "grad_norm": 0.66015625, "learning_rate": 0.0005832141727235905, "loss": 1.3857, "step": 5061 }, { "epoch": 0.647894534749776, "grad_norm": 0.81640625, "learning_rate": 0.0005828373647207561, "loss": 1.8734, "step": 5062 }, { "epoch": 0.6480225265583003, "grad_norm": 0.64453125, "learning_rate": 0.000582460628414089, "loss": 1.3269, "step": 5063 }, { "epoch": 0.6481505183668245, "grad_norm": 0.60546875, "learning_rate": 0.0005820839638683373, "loss": 1.1869, "step": 5064 }, { "epoch": 0.6482785101753488, "grad_norm": 0.58203125, "learning_rate": 0.0005817073711482371, "loss": 1.14, "step": 5065 }, { "epoch": 0.6484065019838731, "grad_norm": 1.1015625, "learning_rate": 0.0005813308503185122, "loss": 2.0241, "step": 5066 }, { "epoch": 0.6485344937923972, "grad_norm": 0.7265625, "learning_rate": 0.0005809544014438732, "loss": 1.3701, "step": 5067 }, { "epoch": 0.6486624856009215, "grad_norm": 0.74609375, "learning_rate": 0.0005805780245890191, "loss": 1.3391, "step": 5068 }, { "epoch": 0.6487904774094458, "grad_norm": 0.72265625, "learning_rate": 0.0005802017198186366, "loss": 1.1065, "step": 5069 }, { "epoch": 0.64891846921797, "grad_norm": 0.50390625, "learning_rate": 0.0005798254871973996, "loss": 0.7584, "step": 5070 }, { "epoch": 0.6490464610264943, "grad_norm": 0.890625, "learning_rate": 0.0005794493267899699, "loss": 1.3103, "step": 5071 }, { "epoch": 0.6491744528350185, "grad_norm": 0.6796875, "learning_rate": 0.0005790732386609966, "loss": 1.5109, "step": 5072 }, { "epoch": 0.6493024446435428, "grad_norm": 0.70703125, "learning_rate": 0.0005786972228751169, "loss": 1.4972, "step": 5073 }, { "epoch": 0.6494304364520671, "grad_norm": 0.69140625, "learning_rate": 0.0005783212794969548, "loss": 1.6149, "step": 5074 }, { "epoch": 0.6495584282605913, "grad_norm": 0.75, "learning_rate": 0.0005779454085911229, "loss": 1.5918, "step": 5075 }, { "epoch": 0.6496864200691156, "grad_norm": 0.625, "learning_rate": 0.0005775696102222199, "loss": 1.296, "step": 5076 }, { "epoch": 0.6498144118776399, "grad_norm": 0.69921875, "learning_rate": 0.000577193884454833, "loss": 1.0357, "step": 5077 }, { "epoch": 0.6499424036861641, "grad_norm": 0.66015625, "learning_rate": 0.0005768182313535372, "loss": 1.042, "step": 5078 }, { "epoch": 0.6500703954946884, "grad_norm": 0.765625, "learning_rate": 0.0005764426509828948, "loss": 1.7256, "step": 5079 }, { "epoch": 0.6501983873032126, "grad_norm": 0.703125, "learning_rate": 0.0005760671434074541, "loss": 1.551, "step": 5080 }, { "epoch": 0.6503263791117369, "grad_norm": 0.67578125, "learning_rate": 0.0005756917086917532, "loss": 1.2646, "step": 5081 }, { "epoch": 0.6504543709202611, "grad_norm": 0.80859375, "learning_rate": 0.0005753163469003167, "loss": 1.3207, "step": 5082 }, { "epoch": 0.6505823627287853, "grad_norm": 0.6875, "learning_rate": 0.0005749410580976568, "loss": 1.1246, "step": 5083 }, { "epoch": 0.6507103545373096, "grad_norm": 0.5625, "learning_rate": 0.000574565842348272, "loss": 0.9948, "step": 5084 }, { "epoch": 0.6508383463458338, "grad_norm": 0.765625, "learning_rate": 0.00057419069971665, "loss": 1.2798, "step": 5085 }, { "epoch": 0.6509663381543581, "grad_norm": 0.6875, "learning_rate": 0.0005738156302672646, "loss": 1.9316, "step": 5086 }, { "epoch": 0.6510943299628824, "grad_norm": 0.55859375, "learning_rate": 0.000573440634064578, "loss": 1.0281, "step": 5087 }, { "epoch": 0.6512223217714066, "grad_norm": 0.65625, "learning_rate": 0.0005730657111730397, "loss": 1.3116, "step": 5088 }, { "epoch": 0.6513503135799309, "grad_norm": 0.515625, "learning_rate": 0.0005726908616570854, "loss": 0.9845, "step": 5089 }, { "epoch": 0.6514783053884552, "grad_norm": 0.8125, "learning_rate": 0.0005723160855811395, "loss": 0.8995, "step": 5090 }, { "epoch": 0.6516062971969794, "grad_norm": 0.796875, "learning_rate": 0.0005719413830096134, "loss": 1.545, "step": 5091 }, { "epoch": 0.6517342890055037, "grad_norm": 0.640625, "learning_rate": 0.0005715667540069059, "loss": 1.4223, "step": 5092 }, { "epoch": 0.6518622808140279, "grad_norm": 0.609375, "learning_rate": 0.0005711921986374026, "loss": 0.7426, "step": 5093 }, { "epoch": 0.6519902726225522, "grad_norm": 0.609375, "learning_rate": 0.0005708177169654776, "loss": 1.1203, "step": 5094 }, { "epoch": 0.6521182644310765, "grad_norm": 0.6328125, "learning_rate": 0.0005704433090554912, "loss": 1.3863, "step": 5095 }, { "epoch": 0.6522462562396006, "grad_norm": 0.6171875, "learning_rate": 0.0005700689749717915, "loss": 1.0289, "step": 5096 }, { "epoch": 0.6523742480481249, "grad_norm": 0.61328125, "learning_rate": 0.0005696947147787144, "loss": 1.4358, "step": 5097 }, { "epoch": 0.6525022398566491, "grad_norm": 0.68359375, "learning_rate": 0.000569320528540582, "loss": 1.5146, "step": 5098 }, { "epoch": 0.6526302316651734, "grad_norm": 0.65234375, "learning_rate": 0.0005689464163217042, "loss": 1.0689, "step": 5099 }, { "epoch": 0.6527582234736977, "grad_norm": 0.54296875, "learning_rate": 0.0005685723781863785, "loss": 1.0983, "step": 5100 }, { "epoch": 0.6528862152822219, "grad_norm": 0.640625, "learning_rate": 0.0005681984141988899, "loss": 1.6144, "step": 5101 }, { "epoch": 0.6530142070907462, "grad_norm": 0.5703125, "learning_rate": 0.0005678245244235093, "loss": 1.1565, "step": 5102 }, { "epoch": 0.6531421988992705, "grad_norm": 0.6484375, "learning_rate": 0.0005674507089244957, "loss": 1.36, "step": 5103 }, { "epoch": 0.6532701907077947, "grad_norm": 0.59765625, "learning_rate": 0.0005670769677660962, "loss": 1.5307, "step": 5104 }, { "epoch": 0.653398182516319, "grad_norm": 0.6875, "learning_rate": 0.0005667033010125442, "loss": 1.1854, "step": 5105 }, { "epoch": 0.6535261743248432, "grad_norm": 0.62109375, "learning_rate": 0.0005663297087280597, "loss": 1.4473, "step": 5106 }, { "epoch": 0.6536541661333675, "grad_norm": 0.68359375, "learning_rate": 0.0005659561909768509, "loss": 1.3884, "step": 5107 }, { "epoch": 0.6537821579418918, "grad_norm": 0.65234375, "learning_rate": 0.000565582747823113, "loss": 1.3487, "step": 5108 }, { "epoch": 0.653910149750416, "grad_norm": 0.75390625, "learning_rate": 0.0005652093793310282, "loss": 1.1562, "step": 5109 }, { "epoch": 0.6540381415589402, "grad_norm": 0.6640625, "learning_rate": 0.0005648360855647663, "loss": 1.027, "step": 5110 }, { "epoch": 0.6541661333674644, "grad_norm": 0.56640625, "learning_rate": 0.0005644628665884832, "loss": 1.0925, "step": 5111 }, { "epoch": 0.6542941251759887, "grad_norm": 0.71875, "learning_rate": 0.0005640897224663228, "loss": 1.8154, "step": 5112 }, { "epoch": 0.654422116984513, "grad_norm": 0.6796875, "learning_rate": 0.0005637166532624163, "loss": 1.3287, "step": 5113 }, { "epoch": 0.6545501087930372, "grad_norm": 0.671875, "learning_rate": 0.0005633436590408814, "loss": 1.4533, "step": 5114 }, { "epoch": 0.6546781006015615, "grad_norm": 0.796875, "learning_rate": 0.0005629707398658234, "loss": 1.4055, "step": 5115 }, { "epoch": 0.6548060924100858, "grad_norm": 0.60546875, "learning_rate": 0.0005625978958013344, "loss": 0.8711, "step": 5116 }, { "epoch": 0.65493408421861, "grad_norm": 0.6484375, "learning_rate": 0.000562225126911494, "loss": 1.5439, "step": 5117 }, { "epoch": 0.6550620760271343, "grad_norm": 0.58984375, "learning_rate": 0.0005618524332603681, "loss": 1.0745, "step": 5118 }, { "epoch": 0.6551900678356585, "grad_norm": 0.65625, "learning_rate": 0.0005614798149120108, "loss": 1.4413, "step": 5119 }, { "epoch": 0.6553180596441828, "grad_norm": 0.64453125, "learning_rate": 0.0005611072719304617, "loss": 1.6468, "step": 5120 }, { "epoch": 0.6554460514527071, "grad_norm": 0.63671875, "learning_rate": 0.0005607348043797491, "loss": 1.4039, "step": 5121 }, { "epoch": 0.6555740432612313, "grad_norm": 0.765625, "learning_rate": 0.0005603624123238871, "loss": 1.9516, "step": 5122 }, { "epoch": 0.6557020350697556, "grad_norm": 0.6875, "learning_rate": 0.0005599900958268781, "loss": 1.4599, "step": 5123 }, { "epoch": 0.6558300268782797, "grad_norm": 0.68359375, "learning_rate": 0.0005596178549527097, "loss": 1.6002, "step": 5124 }, { "epoch": 0.655958018686804, "grad_norm": 0.68359375, "learning_rate": 0.0005592456897653578, "loss": 1.4348, "step": 5125 }, { "epoch": 0.6560860104953283, "grad_norm": 0.6796875, "learning_rate": 0.0005588736003287854, "loss": 1.1642, "step": 5126 }, { "epoch": 0.6562140023038525, "grad_norm": 0.6875, "learning_rate": 0.000558501586706942, "loss": 1.7344, "step": 5127 }, { "epoch": 0.6563419941123768, "grad_norm": 0.6875, "learning_rate": 0.0005581296489637647, "loss": 1.266, "step": 5128 }, { "epoch": 0.6564699859209011, "grad_norm": 0.7421875, "learning_rate": 0.0005577577871631759, "loss": 1.6556, "step": 5129 }, { "epoch": 0.6565979777294253, "grad_norm": 0.6640625, "learning_rate": 0.0005573860013690867, "loss": 1.313, "step": 5130 }, { "epoch": 0.6567259695379496, "grad_norm": 0.64453125, "learning_rate": 0.0005570142916453945, "loss": 1.7377, "step": 5131 }, { "epoch": 0.6568539613464738, "grad_norm": 0.58203125, "learning_rate": 0.0005566426580559839, "loss": 1.3629, "step": 5132 }, { "epoch": 0.6569819531549981, "grad_norm": 0.6953125, "learning_rate": 0.0005562711006647257, "loss": 1.8414, "step": 5133 }, { "epoch": 0.6571099449635224, "grad_norm": 0.57421875, "learning_rate": 0.0005558996195354779, "loss": 1.0037, "step": 5134 }, { "epoch": 0.6572379367720466, "grad_norm": 0.67578125, "learning_rate": 0.0005555282147320862, "loss": 1.0892, "step": 5135 }, { "epoch": 0.6573659285805709, "grad_norm": 0.68359375, "learning_rate": 0.0005551568863183822, "loss": 1.6798, "step": 5136 }, { "epoch": 0.657493920389095, "grad_norm": 0.8125, "learning_rate": 0.0005547856343581847, "loss": 1.4363, "step": 5137 }, { "epoch": 0.6576219121976193, "grad_norm": 0.6484375, "learning_rate": 0.0005544144589152995, "loss": 1.3957, "step": 5138 }, { "epoch": 0.6577499040061436, "grad_norm": 0.65625, "learning_rate": 0.000554043360053519, "loss": 1.153, "step": 5139 }, { "epoch": 0.6578778958146678, "grad_norm": 0.7578125, "learning_rate": 0.0005536723378366227, "loss": 1.515, "step": 5140 }, { "epoch": 0.6580058876231921, "grad_norm": 0.5859375, "learning_rate": 0.000553301392328377, "loss": 0.9713, "step": 5141 }, { "epoch": 0.6581338794317164, "grad_norm": 0.6015625, "learning_rate": 0.0005529305235925341, "loss": 1.0887, "step": 5142 }, { "epoch": 0.6582618712402406, "grad_norm": 0.67578125, "learning_rate": 0.0005525597316928344, "loss": 1.1021, "step": 5143 }, { "epoch": 0.6583898630487649, "grad_norm": 0.57421875, "learning_rate": 0.0005521890166930044, "loss": 1.2916, "step": 5144 }, { "epoch": 0.6585178548572891, "grad_norm": 0.55859375, "learning_rate": 0.0005518183786567581, "loss": 0.8911, "step": 5145 }, { "epoch": 0.6586458466658134, "grad_norm": 0.58984375, "learning_rate": 0.0005514478176477944, "loss": 1.4778, "step": 5146 }, { "epoch": 0.6587738384743377, "grad_norm": 0.64453125, "learning_rate": 0.000551077333729801, "loss": 1.6903, "step": 5147 }, { "epoch": 0.6589018302828619, "grad_norm": 0.5859375, "learning_rate": 0.0005507069269664512, "loss": 1.115, "step": 5148 }, { "epoch": 0.6590298220913862, "grad_norm": 0.55859375, "learning_rate": 0.0005503365974214059, "loss": 1.1155, "step": 5149 }, { "epoch": 0.6591578138999105, "grad_norm": 0.6796875, "learning_rate": 0.0005499663451583125, "loss": 1.2685, "step": 5150 }, { "epoch": 0.6592858057084346, "grad_norm": 0.79296875, "learning_rate": 0.0005495961702408038, "loss": 1.5617, "step": 5151 }, { "epoch": 0.6594137975169589, "grad_norm": 0.6640625, "learning_rate": 0.0005492260727325011, "loss": 1.7788, "step": 5152 }, { "epoch": 0.6595417893254831, "grad_norm": 0.6640625, "learning_rate": 0.0005488560526970116, "loss": 1.0001, "step": 5153 }, { "epoch": 0.6596697811340074, "grad_norm": 0.76953125, "learning_rate": 0.0005484861101979294, "loss": 1.8185, "step": 5154 }, { "epoch": 0.6597977729425317, "grad_norm": 0.69921875, "learning_rate": 0.0005481162452988344, "loss": 1.4408, "step": 5155 }, { "epoch": 0.6599257647510559, "grad_norm": 0.83984375, "learning_rate": 0.0005477464580632944, "loss": 1.3478, "step": 5156 }, { "epoch": 0.6600537565595802, "grad_norm": 0.62109375, "learning_rate": 0.0005473767485548634, "loss": 1.3659, "step": 5157 }, { "epoch": 0.6601817483681044, "grad_norm": 0.6875, "learning_rate": 0.0005470071168370819, "loss": 1.5904, "step": 5158 }, { "epoch": 0.6603097401766287, "grad_norm": 0.67578125, "learning_rate": 0.0005466375629734767, "loss": 1.3782, "step": 5159 }, { "epoch": 0.660437731985153, "grad_norm": 0.6171875, "learning_rate": 0.0005462680870275624, "loss": 1.3528, "step": 5160 }, { "epoch": 0.6605657237936772, "grad_norm": 0.75, "learning_rate": 0.0005458986890628388, "loss": 1.6975, "step": 5161 }, { "epoch": 0.6606937156022015, "grad_norm": 0.703125, "learning_rate": 0.0005455293691427934, "loss": 1.4231, "step": 5162 }, { "epoch": 0.6608217074107258, "grad_norm": 0.6875, "learning_rate": 0.0005451601273308997, "loss": 1.2495, "step": 5163 }, { "epoch": 0.66094969921925, "grad_norm": 0.73828125, "learning_rate": 0.0005447909636906176, "loss": 2.2551, "step": 5164 }, { "epoch": 0.6610776910277743, "grad_norm": 0.7578125, "learning_rate": 0.0005444218782853941, "loss": 1.4076, "step": 5165 }, { "epoch": 0.6612056828362984, "grad_norm": 0.486328125, "learning_rate": 0.0005440528711786623, "loss": 0.7835, "step": 5166 }, { "epoch": 0.6613336746448227, "grad_norm": 0.60546875, "learning_rate": 0.0005436839424338425, "loss": 1.1845, "step": 5167 }, { "epoch": 0.661461666453347, "grad_norm": 0.7578125, "learning_rate": 0.0005433150921143406, "loss": 1.8271, "step": 5168 }, { "epoch": 0.6615896582618712, "grad_norm": 0.80078125, "learning_rate": 0.0005429463202835498, "loss": 1.5447, "step": 5169 }, { "epoch": 0.6617176500703955, "grad_norm": 0.625, "learning_rate": 0.0005425776270048492, "loss": 1.2679, "step": 5170 }, { "epoch": 0.6618456418789197, "grad_norm": 0.7421875, "learning_rate": 0.0005422090123416048, "loss": 1.4374, "step": 5171 }, { "epoch": 0.661973633687444, "grad_norm": 0.69140625, "learning_rate": 0.0005418404763571698, "loss": 1.0942, "step": 5172 }, { "epoch": 0.6621016254959683, "grad_norm": 0.69140625, "learning_rate": 0.0005414720191148822, "loss": 1.3071, "step": 5173 }, { "epoch": 0.6622296173044925, "grad_norm": 0.76171875, "learning_rate": 0.0005411036406780676, "loss": 1.7055, "step": 5174 }, { "epoch": 0.6623576091130168, "grad_norm": 0.67578125, "learning_rate": 0.0005407353411100378, "loss": 1.3506, "step": 5175 }, { "epoch": 0.6624856009215411, "grad_norm": 0.85546875, "learning_rate": 0.0005403671204740915, "loss": 1.8215, "step": 5176 }, { "epoch": 0.6626135927300653, "grad_norm": 0.6875, "learning_rate": 0.0005399989788335125, "loss": 1.555, "step": 5177 }, { "epoch": 0.6627415845385896, "grad_norm": 0.58203125, "learning_rate": 0.0005396309162515725, "loss": 1.108, "step": 5178 }, { "epoch": 0.6628695763471137, "grad_norm": 0.7578125, "learning_rate": 0.0005392629327915288, "loss": 1.2861, "step": 5179 }, { "epoch": 0.662997568155638, "grad_norm": 0.66015625, "learning_rate": 0.0005388950285166255, "loss": 1.3633, "step": 5180 }, { "epoch": 0.6631255599641623, "grad_norm": 1.0625, "learning_rate": 0.000538527203490093, "loss": 1.0638, "step": 5181 }, { "epoch": 0.6632535517726865, "grad_norm": 1.4375, "learning_rate": 0.0005381594577751476, "loss": 1.2699, "step": 5182 }, { "epoch": 0.6633815435812108, "grad_norm": 0.67578125, "learning_rate": 0.0005377917914349929, "loss": 1.4237, "step": 5183 }, { "epoch": 0.663509535389735, "grad_norm": 0.5859375, "learning_rate": 0.000537424204532818, "loss": 0.9474, "step": 5184 }, { "epoch": 0.6636375271982593, "grad_norm": 0.59375, "learning_rate": 0.000537056697131799, "loss": 1.3439, "step": 5185 }, { "epoch": 0.6637655190067836, "grad_norm": 0.85546875, "learning_rate": 0.0005366892692950974, "loss": 1.3281, "step": 5186 }, { "epoch": 0.6638935108153078, "grad_norm": 0.65234375, "learning_rate": 0.0005363219210858621, "loss": 1.4607, "step": 5187 }, { "epoch": 0.6640215026238321, "grad_norm": 0.80859375, "learning_rate": 0.0005359546525672275, "loss": 1.9073, "step": 5188 }, { "epoch": 0.6641494944323564, "grad_norm": 0.765625, "learning_rate": 0.0005355874638023155, "loss": 1.6473, "step": 5189 }, { "epoch": 0.6642774862408806, "grad_norm": 0.73828125, "learning_rate": 0.0005352203548542322, "loss": 1.6528, "step": 5190 }, { "epoch": 0.6644054780494049, "grad_norm": 0.62890625, "learning_rate": 0.000534853325786072, "loss": 1.56, "step": 5191 }, { "epoch": 0.664533469857929, "grad_norm": 0.8203125, "learning_rate": 0.0005344863766609147, "loss": 1.5443, "step": 5192 }, { "epoch": 0.6646614616664533, "grad_norm": 0.921875, "learning_rate": 0.000534119507541826, "loss": 1.5247, "step": 5193 }, { "epoch": 0.6647894534749776, "grad_norm": 0.70703125, "learning_rate": 0.0005337527184918594, "loss": 1.5099, "step": 5194 }, { "epoch": 0.6649174452835018, "grad_norm": 0.76953125, "learning_rate": 0.0005333860095740525, "loss": 1.9678, "step": 5195 }, { "epoch": 0.6650454370920261, "grad_norm": 0.68359375, "learning_rate": 0.0005330193808514307, "loss": 1.144, "step": 5196 }, { "epoch": 0.6651734289005503, "grad_norm": 0.7890625, "learning_rate": 0.0005326528323870048, "loss": 2.119, "step": 5197 }, { "epoch": 0.6653014207090746, "grad_norm": 0.62890625, "learning_rate": 0.0005322863642437727, "loss": 1.5036, "step": 5198 }, { "epoch": 0.6654294125175989, "grad_norm": 0.65234375, "learning_rate": 0.0005319199764847171, "loss": 1.2921, "step": 5199 }, { "epoch": 0.6655574043261231, "grad_norm": 0.70703125, "learning_rate": 0.000531553669172808, "loss": 1.6066, "step": 5200 }, { "epoch": 0.6656853961346474, "grad_norm": 0.58984375, "learning_rate": 0.0005311874423710013, "loss": 1.0669, "step": 5201 }, { "epoch": 0.6658133879431717, "grad_norm": 0.66796875, "learning_rate": 0.000530821296142239, "loss": 1.212, "step": 5202 }, { "epoch": 0.6659413797516959, "grad_norm": 0.66796875, "learning_rate": 0.0005304552305494492, "loss": 1.1703, "step": 5203 }, { "epoch": 0.6660693715602202, "grad_norm": 0.765625, "learning_rate": 0.0005300892456555463, "loss": 2.0756, "step": 5204 }, { "epoch": 0.6661973633687444, "grad_norm": 0.58984375, "learning_rate": 0.0005297233415234307, "loss": 1.316, "step": 5205 }, { "epoch": 0.6663253551772687, "grad_norm": 0.84375, "learning_rate": 0.000529357518215989, "loss": 1.6274, "step": 5206 }, { "epoch": 0.666453346985793, "grad_norm": 0.77734375, "learning_rate": 0.0005289917757960943, "loss": 1.808, "step": 5207 }, { "epoch": 0.6665813387943171, "grad_norm": 0.6328125, "learning_rate": 0.0005286261143266044, "loss": 1.1153, "step": 5208 }, { "epoch": 0.6667093306028414, "grad_norm": 0.69921875, "learning_rate": 0.0005282605338703649, "loss": 1.7069, "step": 5209 }, { "epoch": 0.6668373224113656, "grad_norm": 0.578125, "learning_rate": 0.0005278950344902064, "loss": 1.0566, "step": 5210 }, { "epoch": 0.6669653142198899, "grad_norm": 1.0234375, "learning_rate": 0.000527529616248946, "loss": 1.2708, "step": 5211 }, { "epoch": 0.6670933060284142, "grad_norm": 0.6484375, "learning_rate": 0.0005271642792093872, "loss": 1.2833, "step": 5212 }, { "epoch": 0.6672212978369384, "grad_norm": 0.6796875, "learning_rate": 0.0005267990234343182, "loss": 1.1994, "step": 5213 }, { "epoch": 0.6673492896454627, "grad_norm": 0.6484375, "learning_rate": 0.0005264338489865145, "loss": 1.3932, "step": 5214 }, { "epoch": 0.667477281453987, "grad_norm": 0.76171875, "learning_rate": 0.0005260687559287376, "loss": 1.6421, "step": 5215 }, { "epoch": 0.6676052732625112, "grad_norm": 0.7578125, "learning_rate": 0.0005257037443237342, "loss": 1.5255, "step": 5216 }, { "epoch": 0.6677332650710355, "grad_norm": 0.57421875, "learning_rate": 0.000525338814234238, "loss": 1.1002, "step": 5217 }, { "epoch": 0.6678612568795597, "grad_norm": 0.52734375, "learning_rate": 0.0005249739657229675, "loss": 1.1163, "step": 5218 }, { "epoch": 0.667989248688084, "grad_norm": 0.62109375, "learning_rate": 0.0005246091988526283, "loss": 1.0708, "step": 5219 }, { "epoch": 0.6681172404966083, "grad_norm": 0.65625, "learning_rate": 0.0005242445136859117, "loss": 1.4233, "step": 5220 }, { "epoch": 0.6682452323051324, "grad_norm": 0.59375, "learning_rate": 0.0005238799102854941, "loss": 1.5655, "step": 5221 }, { "epoch": 0.6683732241136567, "grad_norm": 0.6484375, "learning_rate": 0.0005235153887140389, "loss": 1.2544, "step": 5222 }, { "epoch": 0.668501215922181, "grad_norm": 0.6328125, "learning_rate": 0.0005231509490341949, "loss": 1.2307, "step": 5223 }, { "epoch": 0.6686292077307052, "grad_norm": 0.703125, "learning_rate": 0.0005227865913085972, "loss": 1.0569, "step": 5224 }, { "epoch": 0.6687571995392295, "grad_norm": 0.6328125, "learning_rate": 0.0005224223155998663, "loss": 1.3251, "step": 5225 }, { "epoch": 0.6688851913477537, "grad_norm": 0.4453125, "learning_rate": 0.0005220581219706091, "loss": 0.8304, "step": 5226 }, { "epoch": 0.669013183156278, "grad_norm": 0.6875, "learning_rate": 0.0005216940104834181, "loss": 1.2095, "step": 5227 }, { "epoch": 0.6691411749648023, "grad_norm": 0.6328125, "learning_rate": 0.0005213299812008718, "loss": 1.2238, "step": 5228 }, { "epoch": 0.6692691667733265, "grad_norm": 0.69140625, "learning_rate": 0.0005209660341855351, "loss": 1.3511, "step": 5229 }, { "epoch": 0.6693971585818508, "grad_norm": 0.7578125, "learning_rate": 0.000520602169499957, "loss": 2.3763, "step": 5230 }, { "epoch": 0.669525150390375, "grad_norm": 0.60546875, "learning_rate": 0.0005202383872066744, "loss": 1.2547, "step": 5231 }, { "epoch": 0.6696531421988993, "grad_norm": 0.62890625, "learning_rate": 0.0005198746873682088, "loss": 1.1886, "step": 5232 }, { "epoch": 0.6697811340074236, "grad_norm": 0.66796875, "learning_rate": 0.0005195110700470683, "loss": 1.4403, "step": 5233 }, { "epoch": 0.6699091258159477, "grad_norm": 0.69140625, "learning_rate": 0.0005191475353057464, "loss": 1.2551, "step": 5234 }, { "epoch": 0.670037117624472, "grad_norm": 0.515625, "learning_rate": 0.0005187840832067221, "loss": 0.8575, "step": 5235 }, { "epoch": 0.6701651094329963, "grad_norm": 0.470703125, "learning_rate": 0.0005184207138124605, "loss": 0.7625, "step": 5236 }, { "epoch": 0.6702931012415205, "grad_norm": 0.6796875, "learning_rate": 0.0005180574271854128, "loss": 1.1778, "step": 5237 }, { "epoch": 0.6704210930500448, "grad_norm": 0.64453125, "learning_rate": 0.0005176942233880157, "loss": 1.0791, "step": 5238 }, { "epoch": 0.670549084858569, "grad_norm": 0.6953125, "learning_rate": 0.0005173311024826916, "loss": 1.6703, "step": 5239 }, { "epoch": 0.6706770766670933, "grad_norm": 0.8515625, "learning_rate": 0.0005169680645318486, "loss": 1.8026, "step": 5240 }, { "epoch": 0.6708050684756176, "grad_norm": 0.68359375, "learning_rate": 0.000516605109597881, "loss": 1.1484, "step": 5241 }, { "epoch": 0.6709330602841418, "grad_norm": 0.7734375, "learning_rate": 0.0005162422377431681, "loss": 1.7545, "step": 5242 }, { "epoch": 0.6710610520926661, "grad_norm": 0.625, "learning_rate": 0.000515879449030076, "loss": 1.1391, "step": 5243 }, { "epoch": 0.6711890439011903, "grad_norm": 0.60546875, "learning_rate": 0.0005155167435209549, "loss": 1.1684, "step": 5244 }, { "epoch": 0.6713170357097146, "grad_norm": 0.6484375, "learning_rate": 0.000515154121278142, "loss": 0.9142, "step": 5245 }, { "epoch": 0.6714450275182389, "grad_norm": 0.68359375, "learning_rate": 0.0005147915823639599, "loss": 1.3487, "step": 5246 }, { "epoch": 0.671573019326763, "grad_norm": 0.6875, "learning_rate": 0.0005144291268407167, "loss": 1.4144, "step": 5247 }, { "epoch": 0.6717010111352874, "grad_norm": 0.5078125, "learning_rate": 0.0005140667547707064, "loss": 0.6348, "step": 5248 }, { "epoch": 0.6718290029438116, "grad_norm": 0.6015625, "learning_rate": 0.0005137044662162085, "loss": 1.0744, "step": 5249 }, { "epoch": 0.6719569947523358, "grad_norm": 0.63671875, "learning_rate": 0.000513342261239488, "loss": 1.3106, "step": 5250 }, { "epoch": 0.6720849865608601, "grad_norm": 0.70703125, "learning_rate": 0.0005129801399027963, "loss": 1.1352, "step": 5251 }, { "epoch": 0.6722129783693843, "grad_norm": 0.734375, "learning_rate": 0.0005126181022683689, "loss": 1.2877, "step": 5252 }, { "epoch": 0.6723409701779086, "grad_norm": 0.78125, "learning_rate": 0.0005122561483984282, "loss": 1.3454, "step": 5253 }, { "epoch": 0.6724689619864329, "grad_norm": 0.734375, "learning_rate": 0.0005118942783551818, "loss": 1.656, "step": 5254 }, { "epoch": 0.6725969537949571, "grad_norm": 0.703125, "learning_rate": 0.0005115324922008233, "loss": 1.6532, "step": 5255 }, { "epoch": 0.6727249456034814, "grad_norm": 0.65625, "learning_rate": 0.0005111707899975317, "loss": 1.2042, "step": 5256 }, { "epoch": 0.6728529374120056, "grad_norm": 0.546875, "learning_rate": 0.0005108091718074706, "loss": 0.9941, "step": 5257 }, { "epoch": 0.6729809292205299, "grad_norm": 0.71484375, "learning_rate": 0.0005104476376927901, "loss": 1.4217, "step": 5258 }, { "epoch": 0.6731089210290542, "grad_norm": 0.6875, "learning_rate": 0.000510086187715626, "loss": 1.8042, "step": 5259 }, { "epoch": 0.6732369128375784, "grad_norm": 0.578125, "learning_rate": 0.0005097248219380994, "loss": 0.9559, "step": 5260 }, { "epoch": 0.6733649046461027, "grad_norm": 0.6015625, "learning_rate": 0.0005093635404223167, "loss": 1.4691, "step": 5261 }, { "epoch": 0.673492896454627, "grad_norm": 0.6796875, "learning_rate": 0.00050900234323037, "loss": 1.5334, "step": 5262 }, { "epoch": 0.6736208882631511, "grad_norm": 0.640625, "learning_rate": 0.0005086412304243369, "loss": 1.0055, "step": 5263 }, { "epoch": 0.6737488800716754, "grad_norm": 0.64453125, "learning_rate": 0.0005082802020662807, "loss": 1.2939, "step": 5264 }, { "epoch": 0.6738768718801996, "grad_norm": 0.59765625, "learning_rate": 0.0005079192582182501, "loss": 1.1867, "step": 5265 }, { "epoch": 0.6740048636887239, "grad_norm": 0.73828125, "learning_rate": 0.0005075583989422783, "loss": 1.6785, "step": 5266 }, { "epoch": 0.6741328554972482, "grad_norm": 0.7734375, "learning_rate": 0.0005071976243003855, "loss": 1.6552, "step": 5267 }, { "epoch": 0.6742608473057724, "grad_norm": 0.6328125, "learning_rate": 0.0005068369343545767, "loss": 1.103, "step": 5268 }, { "epoch": 0.6743888391142967, "grad_norm": 0.703125, "learning_rate": 0.0005064763291668425, "loss": 1.381, "step": 5269 }, { "epoch": 0.6745168309228209, "grad_norm": 0.82421875, "learning_rate": 0.0005061158087991577, "loss": 1.8188, "step": 5270 }, { "epoch": 0.6746448227313452, "grad_norm": 0.62109375, "learning_rate": 0.0005057553733134845, "loss": 0.9878, "step": 5271 }, { "epoch": 0.6747728145398695, "grad_norm": 0.5625, "learning_rate": 0.0005053950227717697, "loss": 1.0572, "step": 5272 }, { "epoch": 0.6749008063483937, "grad_norm": 0.546875, "learning_rate": 0.0005050347572359452, "loss": 0.8947, "step": 5273 }, { "epoch": 0.675028798156918, "grad_norm": 0.640625, "learning_rate": 0.000504674576767928, "loss": 1.0357, "step": 5274 }, { "epoch": 0.6751567899654423, "grad_norm": 0.67578125, "learning_rate": 0.0005043144814296215, "loss": 1.5175, "step": 5275 }, { "epoch": 0.6752847817739664, "grad_norm": 0.5390625, "learning_rate": 0.0005039544712829136, "loss": 1.2048, "step": 5276 }, { "epoch": 0.6754127735824907, "grad_norm": 0.625, "learning_rate": 0.0005035945463896781, "loss": 1.3787, "step": 5277 }, { "epoch": 0.6755407653910149, "grad_norm": 0.734375, "learning_rate": 0.0005032347068117742, "loss": 1.9546, "step": 5278 }, { "epoch": 0.6756687571995392, "grad_norm": 0.7734375, "learning_rate": 0.0005028749526110454, "loss": 2.0544, "step": 5279 }, { "epoch": 0.6757967490080635, "grad_norm": 0.625, "learning_rate": 0.0005025152838493219, "loss": 1.3904, "step": 5280 }, { "epoch": 0.6759247408165877, "grad_norm": 1.1640625, "learning_rate": 0.0005021557005884183, "loss": 1.6347, "step": 5281 }, { "epoch": 0.676052732625112, "grad_norm": 0.734375, "learning_rate": 0.0005017962028901348, "loss": 1.8253, "step": 5282 }, { "epoch": 0.6761807244336362, "grad_norm": 0.77734375, "learning_rate": 0.0005014367908162574, "loss": 1.7767, "step": 5283 }, { "epoch": 0.6763087162421605, "grad_norm": 0.640625, "learning_rate": 0.0005010774644285564, "loss": 2.0672, "step": 5284 }, { "epoch": 0.6764367080506848, "grad_norm": 0.61328125, "learning_rate": 0.000500718223788788, "loss": 1.6539, "step": 5285 }, { "epoch": 0.676564699859209, "grad_norm": 0.53515625, "learning_rate": 0.0005003590689586937, "loss": 0.747, "step": 5286 }, { "epoch": 0.6766926916677333, "grad_norm": 0.494140625, "learning_rate": 0.0005000000000000002, "loss": 0.9819, "step": 5287 }, { "epoch": 0.6768206834762576, "grad_norm": 0.7421875, "learning_rate": 0.0004996410169744187, "loss": 1.7672, "step": 5288 }, { "epoch": 0.6769486752847818, "grad_norm": 0.6171875, "learning_rate": 0.0004992821199436468, "loss": 1.3313, "step": 5289 }, { "epoch": 0.677076667093306, "grad_norm": 0.5859375, "learning_rate": 0.0004989233089693664, "loss": 0.9475, "step": 5290 }, { "epoch": 0.6772046589018302, "grad_norm": 0.7890625, "learning_rate": 0.0004985645841132458, "loss": 1.4755, "step": 5291 }, { "epoch": 0.6773326507103545, "grad_norm": 0.55859375, "learning_rate": 0.0004982059454369366, "loss": 0.8909, "step": 5292 }, { "epoch": 0.6774606425188788, "grad_norm": 0.703125, "learning_rate": 0.0004978473930020767, "loss": 1.6385, "step": 5293 }, { "epoch": 0.677588634327403, "grad_norm": 0.6328125, "learning_rate": 0.0004974889268702901, "loss": 1.2777, "step": 5294 }, { "epoch": 0.6777166261359273, "grad_norm": 0.6953125, "learning_rate": 0.0004971305471031845, "loss": 1.3129, "step": 5295 }, { "epoch": 0.6778446179444515, "grad_norm": 0.75, "learning_rate": 0.0004967722537623537, "loss": 1.5128, "step": 5296 }, { "epoch": 0.6779726097529758, "grad_norm": 0.7109375, "learning_rate": 0.0004964140469093755, "loss": 1.4913, "step": 5297 }, { "epoch": 0.6781006015615001, "grad_norm": 0.65625, "learning_rate": 0.0004960559266058139, "loss": 1.3797, "step": 5298 }, { "epoch": 0.6782285933700243, "grad_norm": 0.50390625, "learning_rate": 0.0004956978929132177, "loss": 1.0408, "step": 5299 }, { "epoch": 0.6783565851785486, "grad_norm": 0.72265625, "learning_rate": 0.000495339945893121, "loss": 1.0762, "step": 5300 }, { "epoch": 0.6784845769870729, "grad_norm": 0.640625, "learning_rate": 0.0004949820856070424, "loss": 1.5949, "step": 5301 }, { "epoch": 0.6786125687955971, "grad_norm": 0.6875, "learning_rate": 0.000494624312116486, "loss": 1.2899, "step": 5302 }, { "epoch": 0.6787405606041214, "grad_norm": 0.62890625, "learning_rate": 0.0004942666254829413, "loss": 1.0439, "step": 5303 }, { "epoch": 0.6788685524126455, "grad_norm": 0.609375, "learning_rate": 0.0004939090257678824, "loss": 0.889, "step": 5304 }, { "epoch": 0.6789965442211698, "grad_norm": 0.640625, "learning_rate": 0.0004935515130327686, "loss": 1.2029, "step": 5305 }, { "epoch": 0.6791245360296941, "grad_norm": 0.59765625, "learning_rate": 0.0004931940873390443, "loss": 0.8974, "step": 5306 }, { "epoch": 0.6792525278382183, "grad_norm": 0.72265625, "learning_rate": 0.0004928367487481389, "loss": 1.4494, "step": 5307 }, { "epoch": 0.6793805196467426, "grad_norm": 0.73046875, "learning_rate": 0.0004924794973214668, "loss": 1.3243, "step": 5308 }, { "epoch": 0.6795085114552669, "grad_norm": 0.60546875, "learning_rate": 0.0004921223331204278, "loss": 1.2025, "step": 5309 }, { "epoch": 0.6796365032637911, "grad_norm": 0.71875, "learning_rate": 0.0004917652562064058, "loss": 1.483, "step": 5310 }, { "epoch": 0.6797644950723154, "grad_norm": 0.609375, "learning_rate": 0.0004914082666407705, "loss": 1.179, "step": 5311 }, { "epoch": 0.6798924868808396, "grad_norm": 0.63671875, "learning_rate": 0.0004910513644848761, "loss": 1.5026, "step": 5312 }, { "epoch": 0.6800204786893639, "grad_norm": 1.328125, "learning_rate": 0.000490694549800063, "loss": 1.9998, "step": 5313 }, { "epoch": 0.6801484704978882, "grad_norm": 0.482421875, "learning_rate": 0.0004903378226476544, "loss": 0.7294, "step": 5314 }, { "epoch": 0.6802764623064124, "grad_norm": 0.69140625, "learning_rate": 0.0004899811830889597, "loss": 1.2398, "step": 5315 }, { "epoch": 0.6804044541149367, "grad_norm": 0.62890625, "learning_rate": 0.000489624631185274, "loss": 1.1768, "step": 5316 }, { "epoch": 0.6805324459234608, "grad_norm": 0.81640625, "learning_rate": 0.0004892681669978761, "loss": 1.584, "step": 5317 }, { "epoch": 0.6806604377319851, "grad_norm": 0.703125, "learning_rate": 0.0004889117905880307, "loss": 1.8735, "step": 5318 }, { "epoch": 0.6807884295405094, "grad_norm": 0.7109375, "learning_rate": 0.0004885555020169858, "loss": 1.5507, "step": 5319 }, { "epoch": 0.6809164213490336, "grad_norm": 0.6953125, "learning_rate": 0.00048819930134597615, "loss": 1.3476, "step": 5320 }, { "epoch": 0.6810444131575579, "grad_norm": 0.76171875, "learning_rate": 0.00048784318863622037, "loss": 1.2752, "step": 5321 }, { "epoch": 0.6811724049660822, "grad_norm": 0.65234375, "learning_rate": 0.00048748716394892257, "loss": 1.447, "step": 5322 }, { "epoch": 0.6813003967746064, "grad_norm": 0.76953125, "learning_rate": 0.0004871312273452706, "loss": 0.9485, "step": 5323 }, { "epoch": 0.6814283885831307, "grad_norm": 0.734375, "learning_rate": 0.0004867753788864385, "loss": 1.8318, "step": 5324 }, { "epoch": 0.6815563803916549, "grad_norm": 0.64453125, "learning_rate": 0.0004864196186335844, "loss": 0.9194, "step": 5325 }, { "epoch": 0.6816843722001792, "grad_norm": 0.59765625, "learning_rate": 0.0004860639466478516, "loss": 0.9975, "step": 5326 }, { "epoch": 0.6818123640087035, "grad_norm": 0.515625, "learning_rate": 0.000485708362990368, "loss": 0.8249, "step": 5327 }, { "epoch": 0.6819403558172277, "grad_norm": 0.7421875, "learning_rate": 0.0004853528677222465, "loss": 1.4133, "step": 5328 }, { "epoch": 0.682068347625752, "grad_norm": 0.53515625, "learning_rate": 0.0004849974609045849, "loss": 0.9676, "step": 5329 }, { "epoch": 0.6821963394342762, "grad_norm": 0.75, "learning_rate": 0.0004846421425984652, "loss": 1.8357, "step": 5330 }, { "epoch": 0.6823243312428005, "grad_norm": 0.6796875, "learning_rate": 0.0004842869128649553, "loss": 1.7265, "step": 5331 }, { "epoch": 0.6824523230513247, "grad_norm": 0.52734375, "learning_rate": 0.0004839317717651063, "loss": 1.1953, "step": 5332 }, { "epoch": 0.6825803148598489, "grad_norm": 0.6015625, "learning_rate": 0.0004835767193599555, "loss": 0.9396, "step": 5333 }, { "epoch": 0.6827083066683732, "grad_norm": 0.59765625, "learning_rate": 0.0004832217557105243, "loss": 1.3267, "step": 5334 }, { "epoch": 0.6828362984768975, "grad_norm": 0.60546875, "learning_rate": 0.0004828668808778194, "loss": 1.5007, "step": 5335 }, { "epoch": 0.6829642902854217, "grad_norm": 0.61328125, "learning_rate": 0.00048251209492283087, "loss": 1.1638, "step": 5336 }, { "epoch": 0.683092282093946, "grad_norm": 0.55078125, "learning_rate": 0.000482157397906535, "loss": 0.9997, "step": 5337 }, { "epoch": 0.6832202739024702, "grad_norm": 0.65625, "learning_rate": 0.0004818027898898919, "loss": 1.2109, "step": 5338 }, { "epoch": 0.6833482657109945, "grad_norm": 0.69921875, "learning_rate": 0.00048144827093384723, "loss": 1.6108, "step": 5339 }, { "epoch": 0.6834762575195188, "grad_norm": 0.66796875, "learning_rate": 0.00048109384109933095, "loss": 1.0177, "step": 5340 }, { "epoch": 0.683604249328043, "grad_norm": 0.6796875, "learning_rate": 0.00048073950044725687, "loss": 1.537, "step": 5341 }, { "epoch": 0.6837322411365673, "grad_norm": 0.56640625, "learning_rate": 0.00048038524903852456, "loss": 0.9097, "step": 5342 }, { "epoch": 0.6838602329450915, "grad_norm": 0.69921875, "learning_rate": 0.0004800310869340178, "loss": 0.9789, "step": 5343 }, { "epoch": 0.6839882247536158, "grad_norm": 0.6953125, "learning_rate": 0.00047967701419460553, "loss": 0.8388, "step": 5344 }, { "epoch": 0.6841162165621401, "grad_norm": 0.58203125, "learning_rate": 0.00047932303088114, "loss": 1.1898, "step": 5345 }, { "epoch": 0.6842442083706642, "grad_norm": 0.6015625, "learning_rate": 0.00047896913705445957, "loss": 1.062, "step": 5346 }, { "epoch": 0.6843722001791885, "grad_norm": 0.5078125, "learning_rate": 0.0004786153327753865, "loss": 0.919, "step": 5347 }, { "epoch": 0.6845001919877128, "grad_norm": 0.6171875, "learning_rate": 0.0004782616181047278, "loss": 1.2271, "step": 5348 }, { "epoch": 0.684628183796237, "grad_norm": 0.5390625, "learning_rate": 0.00047790799310327514, "loss": 1.169, "step": 5349 }, { "epoch": 0.6847561756047613, "grad_norm": 0.8515625, "learning_rate": 0.00047755445783180474, "loss": 2.1309, "step": 5350 }, { "epoch": 0.6848841674132855, "grad_norm": 0.5390625, "learning_rate": 0.00047720101235107726, "loss": 1.1076, "step": 5351 }, { "epoch": 0.6850121592218098, "grad_norm": 0.66015625, "learning_rate": 0.00047684765672183807, "loss": 1.1818, "step": 5352 }, { "epoch": 0.6851401510303341, "grad_norm": 0.74609375, "learning_rate": 0.0004764943910048175, "loss": 1.4034, "step": 5353 }, { "epoch": 0.6852681428388583, "grad_norm": 0.58984375, "learning_rate": 0.0004761412152607292, "loss": 0.94, "step": 5354 }, { "epoch": 0.6853961346473826, "grad_norm": 0.6640625, "learning_rate": 0.0004757881295502725, "loss": 1.3163, "step": 5355 }, { "epoch": 0.6855241264559068, "grad_norm": 0.60546875, "learning_rate": 0.00047543513393413106, "loss": 1.171, "step": 5356 }, { "epoch": 0.6856521182644311, "grad_norm": 0.625, "learning_rate": 0.0004750822284729727, "loss": 0.9642, "step": 5357 }, { "epoch": 0.6857801100729554, "grad_norm": 0.61328125, "learning_rate": 0.00047472941322745043, "loss": 0.8478, "step": 5358 }, { "epoch": 0.6859081018814795, "grad_norm": 0.76171875, "learning_rate": 0.0004743766882582006, "loss": 1.8073, "step": 5359 }, { "epoch": 0.6860360936900038, "grad_norm": 0.78125, "learning_rate": 0.0004740240536258449, "loss": 1.1491, "step": 5360 }, { "epoch": 0.6861640854985281, "grad_norm": 0.609375, "learning_rate": 0.000473671509390989, "loss": 1.4852, "step": 5361 }, { "epoch": 0.6862920773070523, "grad_norm": 0.59765625, "learning_rate": 0.00047331905561422464, "loss": 1.1144, "step": 5362 }, { "epoch": 0.6864200691155766, "grad_norm": 0.828125, "learning_rate": 0.00047296669235612546, "loss": 1.2925, "step": 5363 }, { "epoch": 0.6865480609241008, "grad_norm": 0.6328125, "learning_rate": 0.00047261441967725107, "loss": 1.5892, "step": 5364 }, { "epoch": 0.6866760527326251, "grad_norm": 0.71875, "learning_rate": 0.0004722622376381455, "loss": 1.278, "step": 5365 }, { "epoch": 0.6868040445411494, "grad_norm": 0.57421875, "learning_rate": 0.00047191014629933704, "loss": 1.1191, "step": 5366 }, { "epoch": 0.6869320363496736, "grad_norm": 0.7265625, "learning_rate": 0.0004715581457213377, "loss": 1.6018, "step": 5367 }, { "epoch": 0.6870600281581979, "grad_norm": 0.62109375, "learning_rate": 0.00047120623596464496, "loss": 1.3193, "step": 5368 }, { "epoch": 0.6871880199667221, "grad_norm": 0.55859375, "learning_rate": 0.00047085441708974007, "loss": 0.8773, "step": 5369 }, { "epoch": 0.6873160117752464, "grad_norm": 0.5625, "learning_rate": 0.00047050268915708896, "loss": 1.143, "step": 5370 }, { "epoch": 0.6874440035837707, "grad_norm": 0.7421875, "learning_rate": 0.0004701510522271416, "loss": 1.4249, "step": 5371 }, { "epoch": 0.6875719953922949, "grad_norm": 0.70703125, "learning_rate": 0.00046979950636033254, "loss": 0.952, "step": 5372 }, { "epoch": 0.6876999872008192, "grad_norm": 0.71484375, "learning_rate": 0.0004694480516170807, "loss": 1.3743, "step": 5373 }, { "epoch": 0.6878279790093434, "grad_norm": 0.89453125, "learning_rate": 0.00046909668805778936, "loss": 1.7114, "step": 5374 }, { "epoch": 0.6879559708178676, "grad_norm": 0.609375, "learning_rate": 0.00046874541574284625, "loss": 1.1846, "step": 5375 }, { "epoch": 0.6880839626263919, "grad_norm": 0.71484375, "learning_rate": 0.00046839423473262257, "loss": 1.3133, "step": 5376 }, { "epoch": 0.6882119544349161, "grad_norm": 0.59765625, "learning_rate": 0.0004680431450874748, "loss": 1.178, "step": 5377 }, { "epoch": 0.6883399462434404, "grad_norm": 0.7890625, "learning_rate": 0.0004676921468677434, "loss": 1.357, "step": 5378 }, { "epoch": 0.6884679380519647, "grad_norm": 0.82421875, "learning_rate": 0.00046734124013375324, "loss": 1.2325, "step": 5379 }, { "epoch": 0.6885959298604889, "grad_norm": 0.6953125, "learning_rate": 0.0004669904249458136, "loss": 0.9748, "step": 5380 }, { "epoch": 0.6887239216690132, "grad_norm": 0.53125, "learning_rate": 0.00046663970136421684, "loss": 0.747, "step": 5381 }, { "epoch": 0.6888519134775375, "grad_norm": 0.69921875, "learning_rate": 0.0004662890694492412, "loss": 1.6421, "step": 5382 }, { "epoch": 0.6889799052860617, "grad_norm": 0.55859375, "learning_rate": 0.00046593852926114787, "loss": 0.8991, "step": 5383 }, { "epoch": 0.689107897094586, "grad_norm": 0.69921875, "learning_rate": 0.000465588080860184, "loss": 1.0704, "step": 5384 }, { "epoch": 0.6892358889031102, "grad_norm": 0.59765625, "learning_rate": 0.00046523772430657887, "loss": 1.2781, "step": 5385 }, { "epoch": 0.6893638807116345, "grad_norm": 0.60546875, "learning_rate": 0.00046488745966054713, "loss": 0.9693, "step": 5386 }, { "epoch": 0.6894918725201588, "grad_norm": 0.6015625, "learning_rate": 0.0004645372869822876, "loss": 1.0416, "step": 5387 }, { "epoch": 0.6896198643286829, "grad_norm": 0.6015625, "learning_rate": 0.0004641872063319833, "loss": 1.2591, "step": 5388 }, { "epoch": 0.6897478561372072, "grad_norm": 0.62109375, "learning_rate": 0.0004638372177698007, "loss": 1.2081, "step": 5389 }, { "epoch": 0.6898758479457314, "grad_norm": 0.71484375, "learning_rate": 0.00046348732135589144, "loss": 1.7203, "step": 5390 }, { "epoch": 0.6900038397542557, "grad_norm": 0.71875, "learning_rate": 0.0004631375171503907, "loss": 1.1161, "step": 5391 }, { "epoch": 0.69013183156278, "grad_norm": 0.5390625, "learning_rate": 0.0004627878052134181, "loss": 0.9612, "step": 5392 }, { "epoch": 0.6902598233713042, "grad_norm": 0.66796875, "learning_rate": 0.00046243818560507733, "loss": 0.9925, "step": 5393 }, { "epoch": 0.6903878151798285, "grad_norm": 0.6171875, "learning_rate": 0.0004620886583854562, "loss": 1.1552, "step": 5394 }, { "epoch": 0.6905158069883528, "grad_norm": 0.6953125, "learning_rate": 0.0004617392236146266, "loss": 1.5563, "step": 5395 }, { "epoch": 0.690643798796877, "grad_norm": 0.56640625, "learning_rate": 0.0004613898813526445, "loss": 1.1805, "step": 5396 }, { "epoch": 0.6907717906054013, "grad_norm": 0.578125, "learning_rate": 0.0004610406316595506, "loss": 1.2188, "step": 5397 }, { "epoch": 0.6908997824139255, "grad_norm": 0.65234375, "learning_rate": 0.0004606914745953682, "loss": 1.7372, "step": 5398 }, { "epoch": 0.6910277742224498, "grad_norm": 0.6015625, "learning_rate": 0.000460342410220106, "loss": 1.104, "step": 5399 }, { "epoch": 0.6911557660309741, "grad_norm": 0.75, "learning_rate": 0.00045999343859375643, "loss": 2.1693, "step": 5400 }, { "epoch": 0.6912837578394982, "grad_norm": 0.87109375, "learning_rate": 0.00045964455977629595, "loss": 1.703, "step": 5401 }, { "epoch": 0.6914117496480225, "grad_norm": 0.6484375, "learning_rate": 0.0004592957738276854, "loss": 0.9165, "step": 5402 }, { "epoch": 0.6915397414565467, "grad_norm": 0.609375, "learning_rate": 0.0004589470808078685, "loss": 0.8038, "step": 5403 }, { "epoch": 0.691667733265071, "grad_norm": 0.64453125, "learning_rate": 0.00045859848077677424, "loss": 0.9878, "step": 5404 }, { "epoch": 0.6917957250735953, "grad_norm": 0.67578125, "learning_rate": 0.0004582499737943151, "loss": 1.6525, "step": 5405 }, { "epoch": 0.6919237168821195, "grad_norm": 0.69921875, "learning_rate": 0.00045790155992038787, "loss": 1.3526, "step": 5406 }, { "epoch": 0.6920517086906438, "grad_norm": 0.65625, "learning_rate": 0.00045755323921487293, "loss": 1.5359, "step": 5407 }, { "epoch": 0.6921797004991681, "grad_norm": 0.6640625, "learning_rate": 0.00045720501173763487, "loss": 1.2661, "step": 5408 }, { "epoch": 0.6923076923076923, "grad_norm": 0.66015625, "learning_rate": 0.00045685687754852224, "loss": 1.3323, "step": 5409 }, { "epoch": 0.6924356841162166, "grad_norm": 0.59765625, "learning_rate": 0.00045650883670736756, "loss": 1.0182, "step": 5410 }, { "epoch": 0.6925636759247408, "grad_norm": 0.578125, "learning_rate": 0.00045616088927398767, "loss": 0.9394, "step": 5411 }, { "epoch": 0.6926916677332651, "grad_norm": 0.59765625, "learning_rate": 0.0004558130353081823, "loss": 1.037, "step": 5412 }, { "epoch": 0.6928196595417894, "grad_norm": 0.6484375, "learning_rate": 0.0004554652748697361, "loss": 1.2838, "step": 5413 }, { "epoch": 0.6929476513503136, "grad_norm": 0.73828125, "learning_rate": 0.0004551176080184173, "loss": 1.6451, "step": 5414 }, { "epoch": 0.6930756431588379, "grad_norm": 0.7265625, "learning_rate": 0.00045477003481397815, "loss": 1.3476, "step": 5415 }, { "epoch": 0.693203634967362, "grad_norm": 0.68359375, "learning_rate": 0.00045442255531615483, "loss": 1.2118, "step": 5416 }, { "epoch": 0.6933316267758863, "grad_norm": 0.63671875, "learning_rate": 0.00045407516958466724, "loss": 1.0358, "step": 5417 }, { "epoch": 0.6934596185844106, "grad_norm": 0.703125, "learning_rate": 0.0004537278776792192, "loss": 1.1848, "step": 5418 }, { "epoch": 0.6935876103929348, "grad_norm": 0.80078125, "learning_rate": 0.0004533806796594989, "loss": 1.558, "step": 5419 }, { "epoch": 0.6937156022014591, "grad_norm": 0.6953125, "learning_rate": 0.0004530335755851772, "loss": 1.1938, "step": 5420 }, { "epoch": 0.6938435940099834, "grad_norm": 0.68359375, "learning_rate": 0.0004526865655159099, "loss": 1.2026, "step": 5421 }, { "epoch": 0.6939715858185076, "grad_norm": 0.59375, "learning_rate": 0.00045233964951133643, "loss": 1.0856, "step": 5422 }, { "epoch": 0.6940995776270319, "grad_norm": 0.6953125, "learning_rate": 0.00045199282763107974, "loss": 1.0349, "step": 5423 }, { "epoch": 0.6942275694355561, "grad_norm": 0.6015625, "learning_rate": 0.00045164609993474737, "loss": 1.1385, "step": 5424 }, { "epoch": 0.6943555612440804, "grad_norm": 0.73046875, "learning_rate": 0.0004512994664819292, "loss": 0.9035, "step": 5425 }, { "epoch": 0.6944835530526047, "grad_norm": 0.65625, "learning_rate": 0.0004509529273322003, "loss": 1.3202, "step": 5426 }, { "epoch": 0.6946115448611289, "grad_norm": 0.61328125, "learning_rate": 0.0004506064825451189, "loss": 1.4018, "step": 5427 }, { "epoch": 0.6947395366696532, "grad_norm": 0.71484375, "learning_rate": 0.00045026013218022734, "loss": 1.5868, "step": 5428 }, { "epoch": 0.6948675284781773, "grad_norm": 0.53515625, "learning_rate": 0.0004499138762970515, "loss": 1.0554, "step": 5429 }, { "epoch": 0.6949955202867016, "grad_norm": 0.55078125, "learning_rate": 0.000449567714955101, "loss": 0.9893, "step": 5430 }, { "epoch": 0.6951235120952259, "grad_norm": 0.5390625, "learning_rate": 0.0004492216482138692, "loss": 0.9679, "step": 5431 }, { "epoch": 0.6952515039037501, "grad_norm": 0.58203125, "learning_rate": 0.00044887567613283353, "loss": 1.3626, "step": 5432 }, { "epoch": 0.6953794957122744, "grad_norm": 0.5859375, "learning_rate": 0.00044852979877145504, "loss": 1.0933, "step": 5433 }, { "epoch": 0.6955074875207987, "grad_norm": 0.640625, "learning_rate": 0.00044818401618917784, "loss": 1.3552, "step": 5434 }, { "epoch": 0.6956354793293229, "grad_norm": 0.6015625, "learning_rate": 0.0004478383284454305, "loss": 1.1603, "step": 5435 }, { "epoch": 0.6957634711378472, "grad_norm": 0.59765625, "learning_rate": 0.0004474927355996251, "loss": 0.8761, "step": 5436 }, { "epoch": 0.6958914629463714, "grad_norm": 0.71875, "learning_rate": 0.0004471472377111574, "loss": 1.0215, "step": 5437 }, { "epoch": 0.6960194547548957, "grad_norm": 0.53125, "learning_rate": 0.00044680183483940673, "loss": 0.7736, "step": 5438 }, { "epoch": 0.69614744656342, "grad_norm": 0.79296875, "learning_rate": 0.0004464565270437363, "loss": 1.7273, "step": 5439 }, { "epoch": 0.6962754383719442, "grad_norm": 0.671875, "learning_rate": 0.00044611131438349283, "loss": 1.4319, "step": 5440 }, { "epoch": 0.6964034301804685, "grad_norm": 0.734375, "learning_rate": 0.0004457661969180067, "loss": 1.7506, "step": 5441 }, { "epoch": 0.6965314219889926, "grad_norm": 0.80078125, "learning_rate": 0.0004454211747065923, "loss": 1.6409, "step": 5442 }, { "epoch": 0.696659413797517, "grad_norm": 0.71875, "learning_rate": 0.0004450762478085465, "loss": 1.1067, "step": 5443 }, { "epoch": 0.6967874056060412, "grad_norm": 0.76953125, "learning_rate": 0.00044473141628315104, "loss": 1.6947, "step": 5444 }, { "epoch": 0.6969153974145654, "grad_norm": 0.796875, "learning_rate": 0.0004443866801896709, "loss": 1.4037, "step": 5445 }, { "epoch": 0.6970433892230897, "grad_norm": 0.51953125, "learning_rate": 0.0004440420395873547, "loss": 1.0186, "step": 5446 }, { "epoch": 0.697171381031614, "grad_norm": 0.578125, "learning_rate": 0.00044369749453543393, "loss": 1.0325, "step": 5447 }, { "epoch": 0.6972993728401382, "grad_norm": 0.671875, "learning_rate": 0.00044335304509312466, "loss": 1.4335, "step": 5448 }, { "epoch": 0.6974273646486625, "grad_norm": 0.66796875, "learning_rate": 0.00044300869131962595, "loss": 1.4833, "step": 5449 }, { "epoch": 0.6975553564571867, "grad_norm": 0.74609375, "learning_rate": 0.00044266443327412085, "loss": 1.3632, "step": 5450 }, { "epoch": 0.697683348265711, "grad_norm": 0.64453125, "learning_rate": 0.0004423202710157754, "loss": 0.8878, "step": 5451 }, { "epoch": 0.6978113400742353, "grad_norm": 0.5859375, "learning_rate": 0.00044197620460373967, "loss": 0.8826, "step": 5452 }, { "epoch": 0.6979393318827595, "grad_norm": 0.69921875, "learning_rate": 0.0004416322340971469, "loss": 1.2079, "step": 5453 }, { "epoch": 0.6980673236912838, "grad_norm": 0.5703125, "learning_rate": 0.00044128835955511416, "loss": 1.0911, "step": 5454 }, { "epoch": 0.6981953154998081, "grad_norm": 0.6328125, "learning_rate": 0.0004409445810367421, "loss": 1.5344, "step": 5455 }, { "epoch": 0.6983233073083323, "grad_norm": 0.51171875, "learning_rate": 0.00044060089860111396, "loss": 0.8415, "step": 5456 }, { "epoch": 0.6984512991168565, "grad_norm": 0.6171875, "learning_rate": 0.0004402573123072975, "loss": 1.2915, "step": 5457 }, { "epoch": 0.6985792909253807, "grad_norm": 0.6015625, "learning_rate": 0.0004399138222143436, "loss": 1.1171, "step": 5458 }, { "epoch": 0.698707282733905, "grad_norm": 0.6328125, "learning_rate": 0.0004395704283812871, "loss": 1.1954, "step": 5459 }, { "epoch": 0.6988352745424293, "grad_norm": 0.7109375, "learning_rate": 0.0004392271308671445, "loss": 1.8424, "step": 5460 }, { "epoch": 0.6989632663509535, "grad_norm": 0.73046875, "learning_rate": 0.00043888392973091817, "loss": 1.7688, "step": 5461 }, { "epoch": 0.6990912581594778, "grad_norm": 0.63671875, "learning_rate": 0.00043854082503159264, "loss": 1.1134, "step": 5462 }, { "epoch": 0.699219249968002, "grad_norm": 0.57421875, "learning_rate": 0.00043819781682813576, "loss": 1.3505, "step": 5463 }, { "epoch": 0.6993472417765263, "grad_norm": 0.57421875, "learning_rate": 0.00043785490517949955, "loss": 0.8827, "step": 5464 }, { "epoch": 0.6994752335850506, "grad_norm": 0.57421875, "learning_rate": 0.00043751209014461826, "loss": 0.9663, "step": 5465 }, { "epoch": 0.6996032253935748, "grad_norm": 0.65625, "learning_rate": 0.00043716937178241036, "loss": 1.6808, "step": 5466 }, { "epoch": 0.6997312172020991, "grad_norm": 0.73046875, "learning_rate": 0.00043682675015177774, "loss": 1.4897, "step": 5467 }, { "epoch": 0.6998592090106234, "grad_norm": 0.60546875, "learning_rate": 0.0004364842253116059, "loss": 1.2283, "step": 5468 }, { "epoch": 0.6999872008191476, "grad_norm": 0.609375, "learning_rate": 0.00043614179732076244, "loss": 1.2354, "step": 5469 }, { "epoch": 0.7001151926276719, "grad_norm": 0.55078125, "learning_rate": 0.0004357994662380996, "loss": 1.0873, "step": 5470 }, { "epoch": 0.700243184436196, "grad_norm": 0.6171875, "learning_rate": 0.0004354572321224525, "loss": 0.9833, "step": 5471 }, { "epoch": 0.7003711762447203, "grad_norm": 0.7890625, "learning_rate": 0.00043511509503263955, "loss": 1.9097, "step": 5472 }, { "epoch": 0.7004991680532446, "grad_norm": 0.703125, "learning_rate": 0.0004347730550274628, "loss": 1.4709, "step": 5473 }, { "epoch": 0.7006271598617688, "grad_norm": 0.5078125, "learning_rate": 0.0004344311121657071, "loss": 0.7995, "step": 5474 }, { "epoch": 0.7007551516702931, "grad_norm": 0.75, "learning_rate": 0.000434089266506141, "loss": 1.3459, "step": 5475 }, { "epoch": 0.7008831434788173, "grad_norm": 0.6875, "learning_rate": 0.0004337475181075162, "loss": 1.302, "step": 5476 }, { "epoch": 0.7010111352873416, "grad_norm": 0.703125, "learning_rate": 0.00043340586702856823, "loss": 1.5572, "step": 5477 }, { "epoch": 0.7011391270958659, "grad_norm": 0.6953125, "learning_rate": 0.00043306431332801434, "loss": 1.4949, "step": 5478 }, { "epoch": 0.7012671189043901, "grad_norm": 0.77734375, "learning_rate": 0.0004327228570645566, "loss": 1.6552, "step": 5479 }, { "epoch": 0.7013951107129144, "grad_norm": 0.55078125, "learning_rate": 0.0004323814982968799, "loss": 1.0335, "step": 5480 }, { "epoch": 0.7015231025214387, "grad_norm": 0.59375, "learning_rate": 0.0004320402370836525, "loss": 1.18, "step": 5481 }, { "epoch": 0.7016510943299629, "grad_norm": 0.67578125, "learning_rate": 0.000431699073483525, "loss": 1.3708, "step": 5482 }, { "epoch": 0.7017790861384872, "grad_norm": 0.75390625, "learning_rate": 0.000431358007555132, "loss": 2.3493, "step": 5483 }, { "epoch": 0.7019070779470113, "grad_norm": 0.5078125, "learning_rate": 0.0004310170393570917, "loss": 0.8746, "step": 5484 }, { "epoch": 0.7020350697555356, "grad_norm": 0.66796875, "learning_rate": 0.0004306761689480049, "loss": 1.1622, "step": 5485 }, { "epoch": 0.7021630615640599, "grad_norm": 0.546875, "learning_rate": 0.0004303353963864559, "loss": 0.8956, "step": 5486 }, { "epoch": 0.7022910533725841, "grad_norm": 0.58984375, "learning_rate": 0.0004299947217310114, "loss": 1.2908, "step": 5487 }, { "epoch": 0.7024190451811084, "grad_norm": 0.77734375, "learning_rate": 0.00042965414504022215, "loss": 1.3897, "step": 5488 }, { "epoch": 0.7025470369896326, "grad_norm": 0.609375, "learning_rate": 0.0004293136663726217, "loss": 1.0237, "step": 5489 }, { "epoch": 0.7026750287981569, "grad_norm": 0.7109375, "learning_rate": 0.00042897328578672743, "loss": 1.3041, "step": 5490 }, { "epoch": 0.7028030206066812, "grad_norm": 0.6171875, "learning_rate": 0.00042863300334103837, "loss": 1.3134, "step": 5491 }, { "epoch": 0.7029310124152054, "grad_norm": 0.703125, "learning_rate": 0.00042829281909403797, "loss": 1.3934, "step": 5492 }, { "epoch": 0.7030590042237297, "grad_norm": 0.578125, "learning_rate": 0.00042795273310419237, "loss": 1.1175, "step": 5493 }, { "epoch": 0.703186996032254, "grad_norm": 0.734375, "learning_rate": 0.000427612745429951, "loss": 1.6743, "step": 5494 }, { "epoch": 0.7033149878407782, "grad_norm": 0.6796875, "learning_rate": 0.0004272728561297461, "loss": 1.2831, "step": 5495 }, { "epoch": 0.7034429796493025, "grad_norm": 0.50390625, "learning_rate": 0.0004269330652619933, "loss": 0.8272, "step": 5496 }, { "epoch": 0.7035709714578267, "grad_norm": 0.66015625, "learning_rate": 0.00042659337288509104, "loss": 1.3547, "step": 5497 }, { "epoch": 0.703698963266351, "grad_norm": 0.71875, "learning_rate": 0.00042625377905742103, "loss": 1.7376, "step": 5498 }, { "epoch": 0.7038269550748752, "grad_norm": 0.69140625, "learning_rate": 0.00042591428383734843, "loss": 1.3953, "step": 5499 }, { "epoch": 0.7039549468833994, "grad_norm": 0.4609375, "learning_rate": 0.0004255748872832201, "loss": 0.6651, "step": 5500 }, { "epoch": 0.7040829386919237, "grad_norm": 0.79296875, "learning_rate": 0.0004252355894533674, "loss": 2.0509, "step": 5501 }, { "epoch": 0.7042109305004479, "grad_norm": 0.60546875, "learning_rate": 0.00042489639040610395, "loss": 1.2592, "step": 5502 }, { "epoch": 0.7043389223089722, "grad_norm": 0.5546875, "learning_rate": 0.0004245572901997271, "loss": 0.9089, "step": 5503 }, { "epoch": 0.7044669141174965, "grad_norm": 0.53515625, "learning_rate": 0.0004242182888925161, "loss": 0.6267, "step": 5504 }, { "epoch": 0.7045949059260207, "grad_norm": 0.5625, "learning_rate": 0.00042387938654273364, "loss": 0.8006, "step": 5505 }, { "epoch": 0.704722897734545, "grad_norm": 0.69921875, "learning_rate": 0.00042354058320862644, "loss": 1.6736, "step": 5506 }, { "epoch": 0.7048508895430693, "grad_norm": 0.671875, "learning_rate": 0.0004232018789484228, "loss": 1.4074, "step": 5507 }, { "epoch": 0.7049788813515935, "grad_norm": 0.54296875, "learning_rate": 0.00042286327382033505, "loss": 1.0363, "step": 5508 }, { "epoch": 0.7051068731601178, "grad_norm": 0.5546875, "learning_rate": 0.0004225247678825573, "loss": 0.8589, "step": 5509 }, { "epoch": 0.705234864968642, "grad_norm": 0.75, "learning_rate": 0.0004221863611932677, "loss": 1.2043, "step": 5510 }, { "epoch": 0.7053628567771663, "grad_norm": 0.63671875, "learning_rate": 0.00042184805381062664, "loss": 1.2715, "step": 5511 }, { "epoch": 0.7054908485856906, "grad_norm": 0.88671875, "learning_rate": 0.0004215098457927783, "loss": 1.4949, "step": 5512 }, { "epoch": 0.7056188403942147, "grad_norm": 0.8046875, "learning_rate": 0.0004211717371978485, "loss": 1.8981, "step": 5513 }, { "epoch": 0.705746832202739, "grad_norm": 0.66015625, "learning_rate": 0.00042083372808394704, "loss": 1.4452, "step": 5514 }, { "epoch": 0.7058748240112632, "grad_norm": 0.625, "learning_rate": 0.00042049581850916616, "loss": 1.2156, "step": 5515 }, { "epoch": 0.7060028158197875, "grad_norm": 0.64453125, "learning_rate": 0.00042015800853158127, "loss": 1.713, "step": 5516 }, { "epoch": 0.7061308076283118, "grad_norm": 0.79296875, "learning_rate": 0.00041982029820925037, "loss": 1.4578, "step": 5517 }, { "epoch": 0.706258799436836, "grad_norm": 0.703125, "learning_rate": 0.00041948268760021444, "loss": 1.2448, "step": 5518 }, { "epoch": 0.7063867912453603, "grad_norm": 0.59765625, "learning_rate": 0.00041914517676249743, "loss": 0.9639, "step": 5519 }, { "epoch": 0.7065147830538846, "grad_norm": 0.515625, "learning_rate": 0.00041880776575410606, "loss": 0.7851, "step": 5520 }, { "epoch": 0.7066427748624088, "grad_norm": 0.7578125, "learning_rate": 0.0004184704546330302, "loss": 1.8938, "step": 5521 }, { "epoch": 0.7067707666709331, "grad_norm": 0.5703125, "learning_rate": 0.00041813324345724157, "loss": 1.0485, "step": 5522 }, { "epoch": 0.7068987584794573, "grad_norm": 0.859375, "learning_rate": 0.0004177961322846956, "loss": 1.401, "step": 5523 }, { "epoch": 0.7070267502879816, "grad_norm": 0.62109375, "learning_rate": 0.0004174591211733305, "loss": 1.2673, "step": 5524 }, { "epoch": 0.7071547420965059, "grad_norm": 0.765625, "learning_rate": 0.000417122210181067, "loss": 1.3863, "step": 5525 }, { "epoch": 0.70728273390503, "grad_norm": 0.6328125, "learning_rate": 0.00041678539936580906, "loss": 1.2057, "step": 5526 }, { "epoch": 0.7074107257135543, "grad_norm": 0.69921875, "learning_rate": 0.0004164486887854424, "loss": 1.603, "step": 5527 }, { "epoch": 0.7075387175220786, "grad_norm": 0.8515625, "learning_rate": 0.00041611207849783616, "loss": 1.5011, "step": 5528 }, { "epoch": 0.7076667093306028, "grad_norm": 0.67578125, "learning_rate": 0.0004157755685608431, "loss": 1.4429, "step": 5529 }, { "epoch": 0.7077947011391271, "grad_norm": 0.73828125, "learning_rate": 0.0004154391590322978, "loss": 1.4545, "step": 5530 }, { "epoch": 0.7079226929476513, "grad_norm": 0.6875, "learning_rate": 0.00041510284997001704, "loss": 1.037, "step": 5531 }, { "epoch": 0.7080506847561756, "grad_norm": 0.76171875, "learning_rate": 0.00041476664143180134, "loss": 1.2166, "step": 5532 }, { "epoch": 0.7081786765646999, "grad_norm": 0.66015625, "learning_rate": 0.00041443053347543345, "loss": 1.4178, "step": 5533 }, { "epoch": 0.7083066683732241, "grad_norm": 0.6875, "learning_rate": 0.0004140945261586796, "loss": 1.562, "step": 5534 }, { "epoch": 0.7084346601817484, "grad_norm": 0.6328125, "learning_rate": 0.0004137586195392872, "loss": 1.0014, "step": 5535 }, { "epoch": 0.7085626519902726, "grad_norm": 0.76953125, "learning_rate": 0.00041342281367498767, "loss": 1.0367, "step": 5536 }, { "epoch": 0.7086906437987969, "grad_norm": 0.5625, "learning_rate": 0.00041308710862349477, "loss": 0.9299, "step": 5537 }, { "epoch": 0.7088186356073212, "grad_norm": 0.77734375, "learning_rate": 0.00041275150444250463, "loss": 1.3789, "step": 5538 }, { "epoch": 0.7089466274158454, "grad_norm": 0.72265625, "learning_rate": 0.0004124160011896965, "loss": 1.6433, "step": 5539 }, { "epoch": 0.7090746192243697, "grad_norm": 0.64453125, "learning_rate": 0.0004120805989227319, "loss": 1.2369, "step": 5540 }, { "epoch": 0.709202611032894, "grad_norm": 0.6171875, "learning_rate": 0.00041174529769925526, "loss": 0.9528, "step": 5541 }, { "epoch": 0.7093306028414181, "grad_norm": 0.50390625, "learning_rate": 0.0004114100975768935, "loss": 1.0805, "step": 5542 }, { "epoch": 0.7094585946499424, "grad_norm": 0.67578125, "learning_rate": 0.0004110749986132566, "loss": 1.4812, "step": 5543 }, { "epoch": 0.7095865864584666, "grad_norm": 0.5234375, "learning_rate": 0.000410740000865936, "loss": 0.7638, "step": 5544 }, { "epoch": 0.7097145782669909, "grad_norm": 0.69140625, "learning_rate": 0.00041040510439250676, "loss": 1.4272, "step": 5545 }, { "epoch": 0.7098425700755152, "grad_norm": 0.54296875, "learning_rate": 0.00041007030925052645, "loss": 0.5393, "step": 5546 }, { "epoch": 0.7099705618840394, "grad_norm": 0.609375, "learning_rate": 0.00040973561549753493, "loss": 1.1129, "step": 5547 }, { "epoch": 0.7100985536925637, "grad_norm": 0.55859375, "learning_rate": 0.0004094010231910552, "loss": 1.0713, "step": 5548 }, { "epoch": 0.7102265455010879, "grad_norm": 0.5390625, "learning_rate": 0.00040906653238859147, "loss": 1.1098, "step": 5549 }, { "epoch": 0.7103545373096122, "grad_norm": 0.7734375, "learning_rate": 0.00040873214314763207, "loss": 1.4736, "step": 5550 }, { "epoch": 0.7104825291181365, "grad_norm": 0.671875, "learning_rate": 0.00040839785552564654, "loss": 1.3915, "step": 5551 }, { "epoch": 0.7106105209266607, "grad_norm": 0.68359375, "learning_rate": 0.00040806366958008887, "loss": 0.9844, "step": 5552 }, { "epoch": 0.710738512735185, "grad_norm": 0.6953125, "learning_rate": 0.0004077295853683933, "loss": 1.3981, "step": 5553 }, { "epoch": 0.7108665045437093, "grad_norm": 0.70703125, "learning_rate": 0.00040739560294797796, "loss": 1.2742, "step": 5554 }, { "epoch": 0.7109944963522334, "grad_norm": 1.2578125, "learning_rate": 0.00040706172237624304, "loss": 1.6626, "step": 5555 }, { "epoch": 0.7111224881607577, "grad_norm": 0.63671875, "learning_rate": 0.0004067279437105719, "loss": 1.3853, "step": 5556 }, { "epoch": 0.7112504799692819, "grad_norm": 0.51953125, "learning_rate": 0.00040639426700832883, "loss": 0.9761, "step": 5557 }, { "epoch": 0.7113784717778062, "grad_norm": 0.5078125, "learning_rate": 0.00040606069232686215, "loss": 0.7802, "step": 5558 }, { "epoch": 0.7115064635863305, "grad_norm": 0.625, "learning_rate": 0.000405727219723502, "loss": 1.2469, "step": 5559 }, { "epoch": 0.7116344553948547, "grad_norm": 0.98828125, "learning_rate": 0.0004053938492555611, "loss": 1.9396, "step": 5560 }, { "epoch": 0.711762447203379, "grad_norm": 0.486328125, "learning_rate": 0.0004050605809803345, "loss": 0.7736, "step": 5561 }, { "epoch": 0.7118904390119032, "grad_norm": 0.66796875, "learning_rate": 0.0004047274149550998, "loss": 1.3022, "step": 5562 }, { "epoch": 0.7120184308204275, "grad_norm": 0.65234375, "learning_rate": 0.0004043943512371171, "loss": 1.2667, "step": 5563 }, { "epoch": 0.7121464226289518, "grad_norm": 0.58984375, "learning_rate": 0.00040406138988362863, "loss": 0.6292, "step": 5564 }, { "epoch": 0.712274414437476, "grad_norm": 0.578125, "learning_rate": 0.0004037285309518596, "loss": 1.0511, "step": 5565 }, { "epoch": 0.7124024062460003, "grad_norm": 0.734375, "learning_rate": 0.00040339577449901654, "loss": 0.9983, "step": 5566 }, { "epoch": 0.7125303980545246, "grad_norm": 0.55078125, "learning_rate": 0.0004030631205822893, "loss": 1.0426, "step": 5567 }, { "epoch": 0.7126583898630487, "grad_norm": 0.65234375, "learning_rate": 0.0004027305692588501, "loss": 1.9369, "step": 5568 }, { "epoch": 0.712786381671573, "grad_norm": 0.6171875, "learning_rate": 0.00040239812058585304, "loss": 1.0793, "step": 5569 }, { "epoch": 0.7129143734800972, "grad_norm": 0.703125, "learning_rate": 0.00040206577462043524, "loss": 1.4634, "step": 5570 }, { "epoch": 0.7130423652886215, "grad_norm": 0.82421875, "learning_rate": 0.00040173353141971513, "loss": 1.2423, "step": 5571 }, { "epoch": 0.7131703570971458, "grad_norm": 0.5703125, "learning_rate": 0.00040140139104079443, "loss": 0.9228, "step": 5572 }, { "epoch": 0.71329834890567, "grad_norm": 0.74609375, "learning_rate": 0.00040106935354075634, "loss": 1.4238, "step": 5573 }, { "epoch": 0.7134263407141943, "grad_norm": 0.65625, "learning_rate": 0.00040073741897666805, "loss": 1.4804, "step": 5574 }, { "epoch": 0.7135543325227185, "grad_norm": 0.703125, "learning_rate": 0.00040040558740557687, "loss": 1.2927, "step": 5575 }, { "epoch": 0.7136823243312428, "grad_norm": 0.62109375, "learning_rate": 0.0004000738588845138, "loss": 1.177, "step": 5576 }, { "epoch": 0.7138103161397671, "grad_norm": 0.6640625, "learning_rate": 0.00039974223347049166, "loss": 1.1808, "step": 5577 }, { "epoch": 0.7139383079482913, "grad_norm": 0.5625, "learning_rate": 0.0003994107112205058, "loss": 1.0247, "step": 5578 }, { "epoch": 0.7140662997568156, "grad_norm": 1.21875, "learning_rate": 0.0003990792921915339, "loss": 1.1284, "step": 5579 }, { "epoch": 0.7141942915653399, "grad_norm": 0.953125, "learning_rate": 0.00039874797644053517, "loss": 2.0415, "step": 5580 }, { "epoch": 0.714322283373864, "grad_norm": 0.68359375, "learning_rate": 0.0003984167640244518, "loss": 1.0648, "step": 5581 }, { "epoch": 0.7144502751823884, "grad_norm": 0.65234375, "learning_rate": 0.0003980856550002081, "loss": 1.5028, "step": 5582 }, { "epoch": 0.7145782669909125, "grad_norm": 0.6875, "learning_rate": 0.0003977546494247105, "loss": 1.5033, "step": 5583 }, { "epoch": 0.7147062587994368, "grad_norm": 0.6875, "learning_rate": 0.00039742374735484776, "loss": 1.2614, "step": 5584 }, { "epoch": 0.7148342506079611, "grad_norm": 0.55859375, "learning_rate": 0.0003970929488474906, "loss": 1.0471, "step": 5585 }, { "epoch": 0.7149622424164853, "grad_norm": 0.71875, "learning_rate": 0.00039676225395949226, "loss": 1.5924, "step": 5586 }, { "epoch": 0.7150902342250096, "grad_norm": 0.72265625, "learning_rate": 0.00039643166274768826, "loss": 0.965, "step": 5587 }, { "epoch": 0.7152182260335338, "grad_norm": 0.71875, "learning_rate": 0.0003961011752688954, "loss": 0.9168, "step": 5588 }, { "epoch": 0.7153462178420581, "grad_norm": 0.64453125, "learning_rate": 0.0003957707915799137, "loss": 1.4505, "step": 5589 }, { "epoch": 0.7154742096505824, "grad_norm": 0.66796875, "learning_rate": 0.000395440511737525, "loss": 1.304, "step": 5590 }, { "epoch": 0.7156022014591066, "grad_norm": 0.59765625, "learning_rate": 0.0003951103357984931, "loss": 1.1661, "step": 5591 }, { "epoch": 0.7157301932676309, "grad_norm": 0.71484375, "learning_rate": 0.0003947802638195647, "loss": 1.6895, "step": 5592 }, { "epoch": 0.7158581850761552, "grad_norm": 0.5859375, "learning_rate": 0.00039445029585746696, "loss": 0.6784, "step": 5593 }, { "epoch": 0.7159861768846794, "grad_norm": 0.76171875, "learning_rate": 0.0003941204319689108, "loss": 0.6789, "step": 5594 }, { "epoch": 0.7161141686932037, "grad_norm": 0.703125, "learning_rate": 0.0003937906722105887, "loss": 1.7009, "step": 5595 }, { "epoch": 0.7162421605017278, "grad_norm": 0.625, "learning_rate": 0.00039346101663917523, "loss": 1.1002, "step": 5596 }, { "epoch": 0.7163701523102521, "grad_norm": 0.6484375, "learning_rate": 0.0003931314653113269, "loss": 1.4465, "step": 5597 }, { "epoch": 0.7164981441187764, "grad_norm": 0.59375, "learning_rate": 0.00039280201828368266, "loss": 0.8636, "step": 5598 }, { "epoch": 0.7166261359273006, "grad_norm": 0.6328125, "learning_rate": 0.0003924726756128632, "loss": 1.5058, "step": 5599 }, { "epoch": 0.7167541277358249, "grad_norm": 0.640625, "learning_rate": 0.0003921434373554713, "loss": 1.2285, "step": 5600 }, { "epoch": 0.7168821195443492, "grad_norm": 0.6015625, "learning_rate": 0.00039181430356809247, "loss": 1.156, "step": 5601 }, { "epoch": 0.7170101113528734, "grad_norm": 0.6171875, "learning_rate": 0.0003914852743072929, "loss": 0.9232, "step": 5602 }, { "epoch": 0.7171381031613977, "grad_norm": 0.67578125, "learning_rate": 0.0003911563496296219, "loss": 1.2116, "step": 5603 }, { "epoch": 0.7172660949699219, "grad_norm": 0.65234375, "learning_rate": 0.0003908275295916106, "loss": 1.0797, "step": 5604 }, { "epoch": 0.7173940867784462, "grad_norm": 0.77734375, "learning_rate": 0.00039049881424977196, "loss": 1.1183, "step": 5605 }, { "epoch": 0.7175220785869705, "grad_norm": 0.546875, "learning_rate": 0.0003901702036606011, "loss": 1.1559, "step": 5606 }, { "epoch": 0.7176500703954947, "grad_norm": 0.796875, "learning_rate": 0.0003898416978805751, "loss": 1.4551, "step": 5607 }, { "epoch": 0.717778062204019, "grad_norm": 0.66015625, "learning_rate": 0.000389513296966153, "loss": 1.5549, "step": 5608 }, { "epoch": 0.7179060540125431, "grad_norm": 0.71484375, "learning_rate": 0.00038918500097377586, "loss": 1.2257, "step": 5609 }, { "epoch": 0.7180340458210674, "grad_norm": 0.76171875, "learning_rate": 0.00038885680995986695, "loss": 1.2189, "step": 5610 }, { "epoch": 0.7181620376295917, "grad_norm": 0.59375, "learning_rate": 0.0003885287239808305, "loss": 1.1249, "step": 5611 }, { "epoch": 0.7182900294381159, "grad_norm": 0.81640625, "learning_rate": 0.0003882007430930539, "loss": 1.0363, "step": 5612 }, { "epoch": 0.7184180212466402, "grad_norm": 0.64453125, "learning_rate": 0.00038787286735290574, "loss": 1.0932, "step": 5613 }, { "epoch": 0.7185460130551645, "grad_norm": 0.61328125, "learning_rate": 0.00038754509681673745, "loss": 0.8398, "step": 5614 }, { "epoch": 0.7186740048636887, "grad_norm": 0.60546875, "learning_rate": 0.00038721743154088075, "loss": 0.6919, "step": 5615 }, { "epoch": 0.718801996672213, "grad_norm": 0.60546875, "learning_rate": 0.0003868898715816508, "loss": 0.7396, "step": 5616 }, { "epoch": 0.7189299884807372, "grad_norm": 0.58984375, "learning_rate": 0.00038656241699534387, "loss": 0.7098, "step": 5617 }, { "epoch": 0.7190579802892615, "grad_norm": 0.625, "learning_rate": 0.00038623506783823846, "loss": 1.3043, "step": 5618 }, { "epoch": 0.7191859720977858, "grad_norm": 0.71484375, "learning_rate": 0.0003859078241665949, "loss": 1.8243, "step": 5619 }, { "epoch": 0.71931396390631, "grad_norm": 0.65234375, "learning_rate": 0.00038558068603665517, "loss": 1.0821, "step": 5620 }, { "epoch": 0.7194419557148343, "grad_norm": 0.5859375, "learning_rate": 0.00038525365350464347, "loss": 1.0427, "step": 5621 }, { "epoch": 0.7195699475233585, "grad_norm": 0.640625, "learning_rate": 0.00038492672662676544, "loss": 1.3117, "step": 5622 }, { "epoch": 0.7196979393318828, "grad_norm": 0.76171875, "learning_rate": 0.0003845999054592092, "loss": 1.3874, "step": 5623 }, { "epoch": 0.719825931140407, "grad_norm": 0.75390625, "learning_rate": 0.00038427319005814357, "loss": 0.9301, "step": 5624 }, { "epoch": 0.7199539229489312, "grad_norm": 0.546875, "learning_rate": 0.0003839465804797203, "loss": 0.961, "step": 5625 }, { "epoch": 0.7200819147574555, "grad_norm": 0.74609375, "learning_rate": 0.00038362007678007247, "loss": 1.2867, "step": 5626 }, { "epoch": 0.7202099065659798, "grad_norm": 0.671875, "learning_rate": 0.000383293679015315, "loss": 1.0528, "step": 5627 }, { "epoch": 0.720337898374504, "grad_norm": 0.59765625, "learning_rate": 0.0003829673872415448, "loss": 1.0089, "step": 5628 }, { "epoch": 0.7204658901830283, "grad_norm": 0.7109375, "learning_rate": 0.00038264120151484027, "loss": 1.2575, "step": 5629 }, { "epoch": 0.7205938819915525, "grad_norm": 0.66015625, "learning_rate": 0.0003823151218912617, "loss": 1.0203, "step": 5630 }, { "epoch": 0.7207218738000768, "grad_norm": 0.59765625, "learning_rate": 0.00038198914842685127, "loss": 1.2928, "step": 5631 }, { "epoch": 0.7208498656086011, "grad_norm": 0.640625, "learning_rate": 0.0003816632811776332, "loss": 0.843, "step": 5632 }, { "epoch": 0.7209778574171253, "grad_norm": 0.55078125, "learning_rate": 0.0003813375201996121, "loss": 1.0477, "step": 5633 }, { "epoch": 0.7211058492256496, "grad_norm": 0.64453125, "learning_rate": 0.0003810118655487759, "loss": 1.2117, "step": 5634 }, { "epoch": 0.7212338410341738, "grad_norm": 0.73828125, "learning_rate": 0.0003806863172810936, "loss": 1.5938, "step": 5635 }, { "epoch": 0.7213618328426981, "grad_norm": 0.609375, "learning_rate": 0.0003803608754525161, "loss": 0.9806, "step": 5636 }, { "epoch": 0.7214898246512224, "grad_norm": 0.58984375, "learning_rate": 0.00038003554011897535, "loss": 1.084, "step": 5637 }, { "epoch": 0.7216178164597465, "grad_norm": 0.6484375, "learning_rate": 0.0003797103113363859, "loss": 0.8848, "step": 5638 }, { "epoch": 0.7217458082682708, "grad_norm": 0.74609375, "learning_rate": 0.00037938518916064335, "loss": 1.9432, "step": 5639 }, { "epoch": 0.7218738000767951, "grad_norm": 0.87890625, "learning_rate": 0.0003790601736476255, "loss": 1.8154, "step": 5640 }, { "epoch": 0.7220017918853193, "grad_norm": 0.64453125, "learning_rate": 0.0003787352648531913, "loss": 1.5037, "step": 5641 }, { "epoch": 0.7221297836938436, "grad_norm": 0.79296875, "learning_rate": 0.0003784104628331817, "loss": 1.5, "step": 5642 }, { "epoch": 0.7222577755023678, "grad_norm": 0.53515625, "learning_rate": 0.00037808576764341927, "loss": 1.1489, "step": 5643 }, { "epoch": 0.7223857673108921, "grad_norm": 0.68359375, "learning_rate": 0.0003777611793397081, "loss": 1.1102, "step": 5644 }, { "epoch": 0.7225137591194164, "grad_norm": 0.640625, "learning_rate": 0.0003774366979778342, "loss": 1.3801, "step": 5645 }, { "epoch": 0.7226417509279406, "grad_norm": 0.58984375, "learning_rate": 0.0003771123236135644, "loss": 1.151, "step": 5646 }, { "epoch": 0.7227697427364649, "grad_norm": 0.5234375, "learning_rate": 0.000376788056302648, "loss": 0.8192, "step": 5647 }, { "epoch": 0.7228977345449891, "grad_norm": 0.546875, "learning_rate": 0.00037646389610081557, "loss": 1.0703, "step": 5648 }, { "epoch": 0.7230257263535134, "grad_norm": 0.59765625, "learning_rate": 0.0003761398430637796, "loss": 0.9556, "step": 5649 }, { "epoch": 0.7231537181620377, "grad_norm": 0.6171875, "learning_rate": 0.000375815897247233, "loss": 1.0318, "step": 5650 }, { "epoch": 0.7232817099705618, "grad_norm": 0.52734375, "learning_rate": 0.000375492058706852, "loss": 1.0351, "step": 5651 }, { "epoch": 0.7234097017790861, "grad_norm": 0.703125, "learning_rate": 0.00037516832749829313, "loss": 1.4582, "step": 5652 }, { "epoch": 0.7235376935876104, "grad_norm": 0.96875, "learning_rate": 0.0003748447036771949, "loss": 2.0068, "step": 5653 }, { "epoch": 0.7236656853961346, "grad_norm": 0.70703125, "learning_rate": 0.00037452118729917785, "loss": 1.6049, "step": 5654 }, { "epoch": 0.7237936772046589, "grad_norm": 0.53515625, "learning_rate": 0.0003741977784198426, "loss": 1.0109, "step": 5655 }, { "epoch": 0.7239216690131831, "grad_norm": 0.78125, "learning_rate": 0.0003738744770947725, "loss": 1.4134, "step": 5656 }, { "epoch": 0.7240496608217074, "grad_norm": 0.55859375, "learning_rate": 0.00037355128337953237, "loss": 0.9463, "step": 5657 }, { "epoch": 0.7241776526302317, "grad_norm": 0.65625, "learning_rate": 0.00037322819732966853, "loss": 1.404, "step": 5658 }, { "epoch": 0.7243056444387559, "grad_norm": 0.515625, "learning_rate": 0.00037290521900070774, "loss": 0.7208, "step": 5659 }, { "epoch": 0.7244336362472802, "grad_norm": 0.54296875, "learning_rate": 0.00037258234844815964, "loss": 1.0128, "step": 5660 }, { "epoch": 0.7245616280558044, "grad_norm": 0.71484375, "learning_rate": 0.0003722595857275146, "loss": 1.3968, "step": 5661 }, { "epoch": 0.7246896198643287, "grad_norm": 0.74609375, "learning_rate": 0.0003719369308942446, "loss": 1.2977, "step": 5662 }, { "epoch": 0.724817611672853, "grad_norm": 0.71875, "learning_rate": 0.0003716143840038033, "loss": 1.3525, "step": 5663 }, { "epoch": 0.7249456034813772, "grad_norm": 0.51171875, "learning_rate": 0.00037129194511162556, "loss": 0.8546, "step": 5664 }, { "epoch": 0.7250735952899015, "grad_norm": 0.63671875, "learning_rate": 0.0003709696142731277, "loss": 1.3476, "step": 5665 }, { "epoch": 0.7252015870984257, "grad_norm": 0.54296875, "learning_rate": 0.0003706473915437074, "loss": 1.1322, "step": 5666 }, { "epoch": 0.7253295789069499, "grad_norm": 0.65234375, "learning_rate": 0.0003703252769787443, "loss": 1.1852, "step": 5667 }, { "epoch": 0.7254575707154742, "grad_norm": 0.61328125, "learning_rate": 0.0003700032706335984, "loss": 1.14, "step": 5668 }, { "epoch": 0.7255855625239984, "grad_norm": 0.53515625, "learning_rate": 0.0003696813725636119, "loss": 1.0576, "step": 5669 }, { "epoch": 0.7257135543325227, "grad_norm": 0.61328125, "learning_rate": 0.0003693595828241083, "loss": 1.3215, "step": 5670 }, { "epoch": 0.725841546141047, "grad_norm": 0.734375, "learning_rate": 0.00036903790147039286, "loss": 0.9585, "step": 5671 }, { "epoch": 0.7259695379495712, "grad_norm": 0.5703125, "learning_rate": 0.0003687163285577504, "loss": 0.9516, "step": 5672 }, { "epoch": 0.7260975297580955, "grad_norm": 0.490234375, "learning_rate": 0.0003683948641414497, "loss": 0.6807, "step": 5673 }, { "epoch": 0.7262255215666198, "grad_norm": 0.765625, "learning_rate": 0.00036807350827673934, "loss": 1.492, "step": 5674 }, { "epoch": 0.726353513375144, "grad_norm": 0.6328125, "learning_rate": 0.0003677522610188493, "loss": 1.5187, "step": 5675 }, { "epoch": 0.7264815051836683, "grad_norm": 0.67578125, "learning_rate": 0.0003674311224229917, "loss": 1.1831, "step": 5676 }, { "epoch": 0.7266094969921925, "grad_norm": 0.64453125, "learning_rate": 0.00036711009254435854, "loss": 1.7742, "step": 5677 }, { "epoch": 0.7267374888007168, "grad_norm": 0.671875, "learning_rate": 0.0003667891714381246, "loss": 1.4276, "step": 5678 }, { "epoch": 0.726865480609241, "grad_norm": 0.71484375, "learning_rate": 0.0003664683591594452, "loss": 1.7183, "step": 5679 }, { "epoch": 0.7269934724177652, "grad_norm": 0.6640625, "learning_rate": 0.0003661476557634575, "loss": 1.1381, "step": 5680 }, { "epoch": 0.7271214642262895, "grad_norm": 0.6328125, "learning_rate": 0.0003658270613052789, "loss": 1.2793, "step": 5681 }, { "epoch": 0.7272494560348137, "grad_norm": 0.73046875, "learning_rate": 0.0003655065758400091, "loss": 1.7224, "step": 5682 }, { "epoch": 0.727377447843338, "grad_norm": 0.62890625, "learning_rate": 0.0003651861994227288, "loss": 0.9903, "step": 5683 }, { "epoch": 0.7275054396518623, "grad_norm": 0.62109375, "learning_rate": 0.00036486593210849984, "loss": 1.4383, "step": 5684 }, { "epoch": 0.7276334314603865, "grad_norm": 0.65625, "learning_rate": 0.00036454577395236533, "loss": 0.9163, "step": 5685 }, { "epoch": 0.7277614232689108, "grad_norm": 0.5859375, "learning_rate": 0.00036422572500934957, "loss": 1.3647, "step": 5686 }, { "epoch": 0.7278894150774351, "grad_norm": 0.60546875, "learning_rate": 0.0003639057853344583, "loss": 1.4449, "step": 5687 }, { "epoch": 0.7280174068859593, "grad_norm": 0.640625, "learning_rate": 0.0003635859549826783, "loss": 1.4701, "step": 5688 }, { "epoch": 0.7281453986944836, "grad_norm": 0.69921875, "learning_rate": 0.00036326623400897794, "loss": 1.3598, "step": 5689 }, { "epoch": 0.7282733905030078, "grad_norm": 0.63671875, "learning_rate": 0.0003629466224683059, "loss": 1.362, "step": 5690 }, { "epoch": 0.7284013823115321, "grad_norm": 0.6484375, "learning_rate": 0.00036262712041559267, "loss": 0.8958, "step": 5691 }, { "epoch": 0.7285293741200564, "grad_norm": 0.640625, "learning_rate": 0.0003623077279057501, "loss": 1.0122, "step": 5692 }, { "epoch": 0.7286573659285805, "grad_norm": 0.7890625, "learning_rate": 0.0003619884449936708, "loss": 1.1048, "step": 5693 }, { "epoch": 0.7287853577371048, "grad_norm": 0.5625, "learning_rate": 0.00036166927173422926, "loss": 0.824, "step": 5694 }, { "epoch": 0.728913349545629, "grad_norm": 0.69140625, "learning_rate": 0.0003613502081822794, "loss": 1.3975, "step": 5695 }, { "epoch": 0.7290413413541533, "grad_norm": 0.68359375, "learning_rate": 0.0003610312543926585, "loss": 1.3874, "step": 5696 }, { "epoch": 0.7291693331626776, "grad_norm": 0.86328125, "learning_rate": 0.00036071241042018364, "loss": 1.6499, "step": 5697 }, { "epoch": 0.7292973249712018, "grad_norm": 0.79296875, "learning_rate": 0.0003603936763196538, "loss": 1.7507, "step": 5698 }, { "epoch": 0.7294253167797261, "grad_norm": 0.640625, "learning_rate": 0.00036007505214584766, "loss": 1.1812, "step": 5699 }, { "epoch": 0.7295533085882504, "grad_norm": 0.625, "learning_rate": 0.0003597565379535266, "loss": 1.1631, "step": 5700 }, { "epoch": 0.7296813003967746, "grad_norm": 0.64453125, "learning_rate": 0.0003594381337974322, "loss": 1.6458, "step": 5701 }, { "epoch": 0.7298092922052989, "grad_norm": 0.65625, "learning_rate": 0.0003591198397322878, "loss": 0.8592, "step": 5702 }, { "epoch": 0.7299372840138231, "grad_norm": 0.72265625, "learning_rate": 0.00035880165581279666, "loss": 1.1907, "step": 5703 }, { "epoch": 0.7300652758223474, "grad_norm": 0.5, "learning_rate": 0.0003584835820936444, "loss": 0.8947, "step": 5704 }, { "epoch": 0.7301932676308717, "grad_norm": 0.6796875, "learning_rate": 0.00035816561862949694, "loss": 1.3307, "step": 5705 }, { "epoch": 0.7303212594393959, "grad_norm": 0.6953125, "learning_rate": 0.0003578477654750014, "loss": 1.1936, "step": 5706 }, { "epoch": 0.7304492512479202, "grad_norm": 0.6015625, "learning_rate": 0.0003575300226847863, "loss": 0.8877, "step": 5707 }, { "epoch": 0.7305772430564443, "grad_norm": 0.67578125, "learning_rate": 0.0003572123903134606, "loss": 1.1707, "step": 5708 }, { "epoch": 0.7307052348649686, "grad_norm": 0.66796875, "learning_rate": 0.0003568948684156148, "loss": 1.3404, "step": 5709 }, { "epoch": 0.7308332266734929, "grad_norm": 0.66015625, "learning_rate": 0.0003565774570458201, "loss": 1.6692, "step": 5710 }, { "epoch": 0.7309612184820171, "grad_norm": 0.734375, "learning_rate": 0.0003562601562586291, "loss": 1.7411, "step": 5711 }, { "epoch": 0.7310892102905414, "grad_norm": 0.62890625, "learning_rate": 0.00035594296610857437, "loss": 1.5402, "step": 5712 }, { "epoch": 0.7312172020990657, "grad_norm": 0.671875, "learning_rate": 0.0003556258866501706, "loss": 1.6075, "step": 5713 }, { "epoch": 0.7313451939075899, "grad_norm": 0.75, "learning_rate": 0.0003553089179379131, "loss": 1.4542, "step": 5714 }, { "epoch": 0.7314731857161142, "grad_norm": 0.68359375, "learning_rate": 0.00035499206002627794, "loss": 1.2959, "step": 5715 }, { "epoch": 0.7316011775246384, "grad_norm": 0.64453125, "learning_rate": 0.00035467531296972266, "loss": 1.4088, "step": 5716 }, { "epoch": 0.7317291693331627, "grad_norm": 0.76171875, "learning_rate": 0.00035435867682268485, "loss": 1.2917, "step": 5717 }, { "epoch": 0.731857161141687, "grad_norm": 0.5625, "learning_rate": 0.00035404215163958343, "loss": 1.1359, "step": 5718 }, { "epoch": 0.7319851529502112, "grad_norm": 0.68359375, "learning_rate": 0.00035372573747481905, "loss": 1.2973, "step": 5719 }, { "epoch": 0.7321131447587355, "grad_norm": 0.71875, "learning_rate": 0.00035340943438277276, "loss": 1.4663, "step": 5720 }, { "epoch": 0.7322411365672596, "grad_norm": 0.6953125, "learning_rate": 0.0003530932424178055, "loss": 1.6634, "step": 5721 }, { "epoch": 0.7323691283757839, "grad_norm": 0.51171875, "learning_rate": 0.0003527771616342604, "loss": 0.9416, "step": 5722 }, { "epoch": 0.7324971201843082, "grad_norm": 0.7421875, "learning_rate": 0.0003524611920864612, "loss": 1.3514, "step": 5723 }, { "epoch": 0.7326251119928324, "grad_norm": 0.703125, "learning_rate": 0.0003521453338287127, "loss": 1.2258, "step": 5724 }, { "epoch": 0.7327531038013567, "grad_norm": 0.6484375, "learning_rate": 0.0003518295869152994, "loss": 1.2931, "step": 5725 }, { "epoch": 0.732881095609881, "grad_norm": 0.6015625, "learning_rate": 0.00035151395140048814, "loss": 1.3164, "step": 5726 }, { "epoch": 0.7330090874184052, "grad_norm": 0.5078125, "learning_rate": 0.00035119842733852567, "loss": 0.9159, "step": 5727 }, { "epoch": 0.7331370792269295, "grad_norm": 0.75, "learning_rate": 0.00035088301478364026, "loss": 1.1459, "step": 5728 }, { "epoch": 0.7332650710354537, "grad_norm": 0.6796875, "learning_rate": 0.0003505677137900405, "loss": 1.4833, "step": 5729 }, { "epoch": 0.733393062843978, "grad_norm": 0.6640625, "learning_rate": 0.000350252524411916, "loss": 1.3555, "step": 5730 }, { "epoch": 0.7335210546525023, "grad_norm": 0.70703125, "learning_rate": 0.0003499374467034372, "loss": 1.4596, "step": 5731 }, { "epoch": 0.7336490464610265, "grad_norm": 0.7890625, "learning_rate": 0.00034962248071875523, "loss": 1.531, "step": 5732 }, { "epoch": 0.7337770382695508, "grad_norm": 0.5625, "learning_rate": 0.0003493076265120025, "loss": 0.9429, "step": 5733 }, { "epoch": 0.733905030078075, "grad_norm": 0.474609375, "learning_rate": 0.0003489928841372912, "loss": 1.1085, "step": 5734 }, { "epoch": 0.7340330218865992, "grad_norm": 0.859375, "learning_rate": 0.0003486782536487152, "loss": 1.3353, "step": 5735 }, { "epoch": 0.7341610136951235, "grad_norm": 0.6484375, "learning_rate": 0.0003483637351003488, "loss": 1.4862, "step": 5736 }, { "epoch": 0.7342890055036477, "grad_norm": 0.68359375, "learning_rate": 0.00034804932854624713, "loss": 1.5332, "step": 5737 }, { "epoch": 0.734416997312172, "grad_norm": 0.71484375, "learning_rate": 0.00034773503404044646, "loss": 1.3282, "step": 5738 }, { "epoch": 0.7345449891206963, "grad_norm": 0.490234375, "learning_rate": 0.0003474208516369628, "loss": 0.9028, "step": 5739 }, { "epoch": 0.7346729809292205, "grad_norm": 0.80859375, "learning_rate": 0.0003471067813897932, "loss": 1.3195, "step": 5740 }, { "epoch": 0.7348009727377448, "grad_norm": 0.7421875, "learning_rate": 0.0003467928233529167, "loss": 1.0851, "step": 5741 }, { "epoch": 0.734928964546269, "grad_norm": 0.7734375, "learning_rate": 0.000346478977580292, "loss": 1.9963, "step": 5742 }, { "epoch": 0.7350569563547933, "grad_norm": 0.7421875, "learning_rate": 0.00034616524412585793, "loss": 1.3965, "step": 5743 }, { "epoch": 0.7351849481633176, "grad_norm": 0.7734375, "learning_rate": 0.0003458516230435349, "loss": 1.4576, "step": 5744 }, { "epoch": 0.7353129399718418, "grad_norm": 0.7421875, "learning_rate": 0.0003455381143872238, "loss": 1.5325, "step": 5745 }, { "epoch": 0.7354409317803661, "grad_norm": 0.5234375, "learning_rate": 0.00034522471821080624, "loss": 1.1844, "step": 5746 }, { "epoch": 0.7355689235888903, "grad_norm": 0.9375, "learning_rate": 0.00034491143456814475, "loss": 1.4208, "step": 5747 }, { "epoch": 0.7356969153974146, "grad_norm": 0.66796875, "learning_rate": 0.00034459826351308163, "loss": 1.2089, "step": 5748 }, { "epoch": 0.7358249072059388, "grad_norm": 0.6640625, "learning_rate": 0.00034428520509944063, "loss": 1.5339, "step": 5749 }, { "epoch": 0.735952899014463, "grad_norm": 0.6875, "learning_rate": 0.00034397225938102594, "loss": 1.158, "step": 5750 }, { "epoch": 0.7360808908229873, "grad_norm": 0.640625, "learning_rate": 0.0003436594264116224, "loss": 1.4649, "step": 5751 }, { "epoch": 0.7362088826315116, "grad_norm": 0.609375, "learning_rate": 0.00034334670624499534, "loss": 1.1591, "step": 5752 }, { "epoch": 0.7363368744400358, "grad_norm": 0.66796875, "learning_rate": 0.0003430340989348909, "loss": 1.155, "step": 5753 }, { "epoch": 0.7364648662485601, "grad_norm": 0.57421875, "learning_rate": 0.0003427216045350356, "loss": 1.2908, "step": 5754 }, { "epoch": 0.7365928580570843, "grad_norm": 0.640625, "learning_rate": 0.00034240922309913715, "loss": 1.1896, "step": 5755 }, { "epoch": 0.7367208498656086, "grad_norm": 0.6171875, "learning_rate": 0.00034209695468088254, "loss": 1.2353, "step": 5756 }, { "epoch": 0.7368488416741329, "grad_norm": 0.58203125, "learning_rate": 0.0003417847993339407, "loss": 1.3299, "step": 5757 }, { "epoch": 0.7369768334826571, "grad_norm": 0.64453125, "learning_rate": 0.00034147275711196036, "loss": 1.3543, "step": 5758 }, { "epoch": 0.7371048252911814, "grad_norm": 0.71484375, "learning_rate": 0.0003411608280685713, "loss": 1.5475, "step": 5759 }, { "epoch": 0.7372328170997057, "grad_norm": 0.765625, "learning_rate": 0.00034084901225738364, "loss": 1.2073, "step": 5760 }, { "epoch": 0.7373608089082299, "grad_norm": 0.54296875, "learning_rate": 0.0003405373097319875, "loss": 0.9131, "step": 5761 }, { "epoch": 0.7374888007167542, "grad_norm": 0.5703125, "learning_rate": 0.0003402257205459544, "loss": 0.8869, "step": 5762 }, { "epoch": 0.7376167925252783, "grad_norm": 0.5546875, "learning_rate": 0.0003399142447528354, "loss": 1.0147, "step": 5763 }, { "epoch": 0.7377447843338026, "grad_norm": 0.640625, "learning_rate": 0.00033960288240616386, "loss": 0.5954, "step": 5764 }, { "epoch": 0.7378727761423269, "grad_norm": 0.5859375, "learning_rate": 0.00033929163355945157, "loss": 0.8415, "step": 5765 }, { "epoch": 0.7380007679508511, "grad_norm": 0.69921875, "learning_rate": 0.0003389804982661918, "loss": 1.5789, "step": 5766 }, { "epoch": 0.7381287597593754, "grad_norm": 0.68359375, "learning_rate": 0.00033866947657985825, "loss": 1.4877, "step": 5767 }, { "epoch": 0.7382567515678996, "grad_norm": 0.6484375, "learning_rate": 0.0003383585685539051, "loss": 1.1663, "step": 5768 }, { "epoch": 0.7383847433764239, "grad_norm": 0.640625, "learning_rate": 0.00033804777424176733, "loss": 1.2081, "step": 5769 }, { "epoch": 0.7385127351849482, "grad_norm": 0.703125, "learning_rate": 0.00033773709369685925, "loss": 1.663, "step": 5770 }, { "epoch": 0.7386407269934724, "grad_norm": 0.6796875, "learning_rate": 0.00033742652697257657, "loss": 1.1859, "step": 5771 }, { "epoch": 0.7387687188019967, "grad_norm": 0.6640625, "learning_rate": 0.00033711607412229526, "loss": 1.5242, "step": 5772 }, { "epoch": 0.738896710610521, "grad_norm": 0.58984375, "learning_rate": 0.00033680573519937184, "loss": 1.3585, "step": 5773 }, { "epoch": 0.7390247024190452, "grad_norm": 0.66796875, "learning_rate": 0.0003364955102571431, "loss": 1.1994, "step": 5774 }, { "epoch": 0.7391526942275695, "grad_norm": 0.58203125, "learning_rate": 0.000336185399348926, "loss": 1.2169, "step": 5775 }, { "epoch": 0.7392806860360936, "grad_norm": 0.75390625, "learning_rate": 0.0003358754025280182, "loss": 1.469, "step": 5776 }, { "epoch": 0.739408677844618, "grad_norm": 1.59375, "learning_rate": 0.0003355655198476978, "loss": 1.4007, "step": 5777 }, { "epoch": 0.7395366696531422, "grad_norm": 0.7265625, "learning_rate": 0.0003352557513612234, "loss": 1.1553, "step": 5778 }, { "epoch": 0.7396646614616664, "grad_norm": 0.51953125, "learning_rate": 0.00033494609712183323, "loss": 0.8752, "step": 5779 }, { "epoch": 0.7397926532701907, "grad_norm": 0.87109375, "learning_rate": 0.00033463655718274653, "loss": 1.7366, "step": 5780 }, { "epoch": 0.7399206450787149, "grad_norm": 0.9375, "learning_rate": 0.0003343271315971629, "loss": 2.1159, "step": 5781 }, { "epoch": 0.7400486368872392, "grad_norm": 0.9453125, "learning_rate": 0.00033401782041826234, "loss": 1.4447, "step": 5782 }, { "epoch": 0.7401766286957635, "grad_norm": 0.72265625, "learning_rate": 0.0003337086236992044, "loss": 1.0371, "step": 5783 }, { "epoch": 0.7403046205042877, "grad_norm": 0.84375, "learning_rate": 0.0003333995414931299, "loss": 1.6133, "step": 5784 }, { "epoch": 0.740432612312812, "grad_norm": 0.5390625, "learning_rate": 0.0003330905738531597, "loss": 0.8074, "step": 5785 }, { "epoch": 0.7405606041213363, "grad_norm": 0.498046875, "learning_rate": 0.0003327817208323947, "loss": 0.8179, "step": 5786 }, { "epoch": 0.7406885959298605, "grad_norm": 0.6328125, "learning_rate": 0.00033247298248391654, "loss": 1.5871, "step": 5787 }, { "epoch": 0.7408165877383848, "grad_norm": 0.515625, "learning_rate": 0.00033216435886078676, "loss": 0.9563, "step": 5788 }, { "epoch": 0.740944579546909, "grad_norm": 0.58984375, "learning_rate": 0.00033185585001604734, "loss": 1.2488, "step": 5789 }, { "epoch": 0.7410725713554333, "grad_norm": 0.6484375, "learning_rate": 0.0003315474560027206, "loss": 1.415, "step": 5790 }, { "epoch": 0.7412005631639575, "grad_norm": 0.50390625, "learning_rate": 0.0003312391768738094, "loss": 0.6866, "step": 5791 }, { "epoch": 0.7413285549724817, "grad_norm": 0.67578125, "learning_rate": 0.0003309310126822959, "loss": 0.9356, "step": 5792 }, { "epoch": 0.741456546781006, "grad_norm": 0.7578125, "learning_rate": 0.00033062296348114316, "loss": 1.3268, "step": 5793 }, { "epoch": 0.7415845385895302, "grad_norm": 0.6640625, "learning_rate": 0.00033031502932329474, "loss": 1.3601, "step": 5794 }, { "epoch": 0.7417125303980545, "grad_norm": 0.609375, "learning_rate": 0.0003300072102616739, "loss": 1.3033, "step": 5795 }, { "epoch": 0.7418405222065788, "grad_norm": 0.52734375, "learning_rate": 0.00032969950634918456, "loss": 0.6575, "step": 5796 }, { "epoch": 0.741968514015103, "grad_norm": 0.83984375, "learning_rate": 0.00032939191763871047, "loss": 1.5261, "step": 5797 }, { "epoch": 0.7420965058236273, "grad_norm": 0.74609375, "learning_rate": 0.0003290844441831158, "loss": 1.8313, "step": 5798 }, { "epoch": 0.7422244976321516, "grad_norm": 0.59375, "learning_rate": 0.00032877708603524483, "loss": 0.889, "step": 5799 }, { "epoch": 0.7423524894406758, "grad_norm": 0.66796875, "learning_rate": 0.0003284698432479224, "loss": 0.9925, "step": 5800 }, { "epoch": 0.7424804812492001, "grad_norm": 0.6640625, "learning_rate": 0.0003281627158739525, "loss": 1.5795, "step": 5801 }, { "epoch": 0.7426084730577243, "grad_norm": 0.6640625, "learning_rate": 0.0003278557039661202, "loss": 1.3945, "step": 5802 }, { "epoch": 0.7427364648662486, "grad_norm": 0.63671875, "learning_rate": 0.00032754880757719066, "loss": 1.2491, "step": 5803 }, { "epoch": 0.7428644566747729, "grad_norm": 0.8203125, "learning_rate": 0.0003272420267599092, "loss": 1.5644, "step": 5804 }, { "epoch": 0.742992448483297, "grad_norm": 0.7265625, "learning_rate": 0.00032693536156700045, "loss": 1.4106, "step": 5805 }, { "epoch": 0.7431204402918213, "grad_norm": 0.70703125, "learning_rate": 0.00032662881205117, "loss": 1.4181, "step": 5806 }, { "epoch": 0.7432484321003455, "grad_norm": 0.69921875, "learning_rate": 0.0003263223782651036, "loss": 1.5786, "step": 5807 }, { "epoch": 0.7433764239088698, "grad_norm": 0.98828125, "learning_rate": 0.0003260160602614668, "loss": 1.6273, "step": 5808 }, { "epoch": 0.7435044157173941, "grad_norm": 0.73828125, "learning_rate": 0.0003257098580929052, "loss": 1.7547, "step": 5809 }, { "epoch": 0.7436324075259183, "grad_norm": 0.65625, "learning_rate": 0.0003254037718120446, "loss": 1.3642, "step": 5810 }, { "epoch": 0.7437603993344426, "grad_norm": 0.67578125, "learning_rate": 0.00032509780147149117, "loss": 1.6065, "step": 5811 }, { "epoch": 0.7438883911429669, "grad_norm": 0.94140625, "learning_rate": 0.00032479194712383054, "loss": 1.0392, "step": 5812 }, { "epoch": 0.7440163829514911, "grad_norm": 0.490234375, "learning_rate": 0.0003244862088216293, "loss": 0.8275, "step": 5813 }, { "epoch": 0.7441443747600154, "grad_norm": 0.59375, "learning_rate": 0.00032418058661743267, "loss": 0.931, "step": 5814 }, { "epoch": 0.7442723665685396, "grad_norm": 0.7109375, "learning_rate": 0.00032387508056376724, "loss": 1.4868, "step": 5815 }, { "epoch": 0.7444003583770639, "grad_norm": 0.76171875, "learning_rate": 0.00032356969071313925, "loss": 1.7398, "step": 5816 }, { "epoch": 0.7445283501855882, "grad_norm": 0.65625, "learning_rate": 0.0003232644171180349, "loss": 1.1891, "step": 5817 }, { "epoch": 0.7446563419941123, "grad_norm": 0.6015625, "learning_rate": 0.00032295925983092023, "loss": 0.9595, "step": 5818 }, { "epoch": 0.7447843338026366, "grad_norm": 0.63671875, "learning_rate": 0.00032265421890424165, "loss": 1.0173, "step": 5819 }, { "epoch": 0.7449123256111608, "grad_norm": 0.6796875, "learning_rate": 0.00032234929439042527, "loss": 1.3434, "step": 5820 }, { "epoch": 0.7450403174196851, "grad_norm": 0.66015625, "learning_rate": 0.0003220444863418773, "loss": 1.6793, "step": 5821 }, { "epoch": 0.7451683092282094, "grad_norm": 0.76953125, "learning_rate": 0.0003217397948109845, "loss": 1.4386, "step": 5822 }, { "epoch": 0.7452963010367336, "grad_norm": 0.67578125, "learning_rate": 0.00032143521985011214, "loss": 1.1843, "step": 5823 }, { "epoch": 0.7454242928452579, "grad_norm": 0.6796875, "learning_rate": 0.0003211307615116068, "loss": 1.5233, "step": 5824 }, { "epoch": 0.7455522846537822, "grad_norm": 0.5625, "learning_rate": 0.0003208264198477947, "loss": 1.0409, "step": 5825 }, { "epoch": 0.7456802764623064, "grad_norm": 0.625, "learning_rate": 0.000320522194910982, "loss": 1.0026, "step": 5826 }, { "epoch": 0.7458082682708307, "grad_norm": 0.6171875, "learning_rate": 0.0003202180867534541, "loss": 1.3006, "step": 5827 }, { "epoch": 0.7459362600793549, "grad_norm": 0.7265625, "learning_rate": 0.0003199140954274774, "loss": 1.2539, "step": 5828 }, { "epoch": 0.7460642518878792, "grad_norm": 0.66015625, "learning_rate": 0.00031961022098529756, "loss": 1.3711, "step": 5829 }, { "epoch": 0.7461922436964035, "grad_norm": 0.62890625, "learning_rate": 0.00031930646347914037, "loss": 1.1335, "step": 5830 }, { "epoch": 0.7463202355049277, "grad_norm": 0.6953125, "learning_rate": 0.00031900282296121163, "loss": 1.3259, "step": 5831 }, { "epoch": 0.746448227313452, "grad_norm": 0.8515625, "learning_rate": 0.0003186992994836967, "loss": 1.5586, "step": 5832 }, { "epoch": 0.7465762191219762, "grad_norm": 0.73828125, "learning_rate": 0.00031839589309876114, "loss": 1.6686, "step": 5833 }, { "epoch": 0.7467042109305004, "grad_norm": 0.58984375, "learning_rate": 0.0003180926038585502, "loss": 0.8806, "step": 5834 }, { "epoch": 0.7468322027390247, "grad_norm": 0.61328125, "learning_rate": 0.00031778943181518936, "loss": 1.1191, "step": 5835 }, { "epoch": 0.7469601945475489, "grad_norm": 0.73046875, "learning_rate": 0.000317486377020783, "loss": 1.8472, "step": 5836 }, { "epoch": 0.7470881863560732, "grad_norm": 0.75, "learning_rate": 0.0003171834395274165, "loss": 1.3216, "step": 5837 }, { "epoch": 0.7472161781645975, "grad_norm": 0.68359375, "learning_rate": 0.00031688061938715427, "loss": 1.2393, "step": 5838 }, { "epoch": 0.7473441699731217, "grad_norm": 0.53125, "learning_rate": 0.0003165779166520415, "loss": 0.8865, "step": 5839 }, { "epoch": 0.747472161781646, "grad_norm": 0.671875, "learning_rate": 0.0003162753313741015, "loss": 1.4656, "step": 5840 }, { "epoch": 0.7476001535901702, "grad_norm": 0.55859375, "learning_rate": 0.0003159728636053393, "loss": 1.0002, "step": 5841 }, { "epoch": 0.7477281453986945, "grad_norm": 0.73828125, "learning_rate": 0.00031567051339773865, "loss": 1.6096, "step": 5842 }, { "epoch": 0.7478561372072188, "grad_norm": 0.78515625, "learning_rate": 0.0003153682808032634, "loss": 0.9653, "step": 5843 }, { "epoch": 0.747984129015743, "grad_norm": 0.7265625, "learning_rate": 0.0003150661658738575, "loss": 1.4388, "step": 5844 }, { "epoch": 0.7481121208242673, "grad_norm": 0.73828125, "learning_rate": 0.0003147641686614434, "loss": 1.1085, "step": 5845 }, { "epoch": 0.7482401126327916, "grad_norm": 0.62890625, "learning_rate": 0.00031446228921792473, "loss": 1.8167, "step": 5846 }, { "epoch": 0.7483681044413157, "grad_norm": 0.6640625, "learning_rate": 0.0003141605275951844, "loss": 1.4167, "step": 5847 }, { "epoch": 0.74849609624984, "grad_norm": 0.6796875, "learning_rate": 0.0003138588838450853, "loss": 1.1688, "step": 5848 }, { "epoch": 0.7486240880583642, "grad_norm": 0.6171875, "learning_rate": 0.00031355735801946917, "loss": 0.8056, "step": 5849 }, { "epoch": 0.7487520798668885, "grad_norm": 0.703125, "learning_rate": 0.0003132559501701584, "loss": 0.8408, "step": 5850 }, { "epoch": 0.7488800716754128, "grad_norm": 0.55078125, "learning_rate": 0.0003129546603489548, "loss": 0.9023, "step": 5851 }, { "epoch": 0.749008063483937, "grad_norm": 0.6640625, "learning_rate": 0.00031265348860764, "loss": 1.3145, "step": 5852 }, { "epoch": 0.7491360552924613, "grad_norm": 0.921875, "learning_rate": 0.00031235243499797526, "loss": 1.8804, "step": 5853 }, { "epoch": 0.7492640471009855, "grad_norm": 0.7578125, "learning_rate": 0.00031205149957170143, "loss": 1.4838, "step": 5854 }, { "epoch": 0.7493920389095098, "grad_norm": 0.58203125, "learning_rate": 0.0003117506823805393, "loss": 1.2222, "step": 5855 }, { "epoch": 0.7495200307180341, "grad_norm": 0.68359375, "learning_rate": 0.00031144998347618893, "loss": 1.2151, "step": 5856 }, { "epoch": 0.7496480225265583, "grad_norm": 0.7265625, "learning_rate": 0.0003111494029103309, "loss": 1.5949, "step": 5857 }, { "epoch": 0.7497760143350826, "grad_norm": 0.51171875, "learning_rate": 0.000310848940734624, "loss": 0.9347, "step": 5858 }, { "epoch": 0.7499040061436069, "grad_norm": 0.49609375, "learning_rate": 0.00031054859700070794, "loss": 0.7799, "step": 5859 }, { "epoch": 0.750031997952131, "grad_norm": 0.64453125, "learning_rate": 0.0003102483717602017, "loss": 1.1463, "step": 5860 }, { "epoch": 0.7501599897606553, "grad_norm": 0.73828125, "learning_rate": 0.00030994826506470385, "loss": 1.6082, "step": 5861 }, { "epoch": 0.7502879815691795, "grad_norm": 0.953125, "learning_rate": 0.00030964827696579257, "loss": 1.8192, "step": 5862 }, { "epoch": 0.7504159733777038, "grad_norm": 0.58984375, "learning_rate": 0.00030934840751502566, "loss": 1.4278, "step": 5863 }, { "epoch": 0.7505439651862281, "grad_norm": 0.7578125, "learning_rate": 0.00030904865676394057, "loss": 1.5621, "step": 5864 }, { "epoch": 0.7506719569947523, "grad_norm": 0.6640625, "learning_rate": 0.0003087490247640544, "loss": 1.3564, "step": 5865 }, { "epoch": 0.7507999488032766, "grad_norm": 0.703125, "learning_rate": 0.000308449511566864, "loss": 1.61, "step": 5866 }, { "epoch": 0.7509279406118008, "grad_norm": 0.5546875, "learning_rate": 0.00030815011722384487, "loss": 0.9814, "step": 5867 }, { "epoch": 0.7510559324203251, "grad_norm": 1.5234375, "learning_rate": 0.0003078508417864533, "loss": 1.3305, "step": 5868 }, { "epoch": 0.7511839242288494, "grad_norm": 0.8515625, "learning_rate": 0.00030755168530612444, "loss": 1.43, "step": 5869 }, { "epoch": 0.7513119160373736, "grad_norm": 0.6875, "learning_rate": 0.00030725264783427363, "loss": 1.5289, "step": 5870 }, { "epoch": 0.7514399078458979, "grad_norm": 0.59765625, "learning_rate": 0.0003069537294222946, "loss": 0.7228, "step": 5871 }, { "epoch": 0.7515678996544222, "grad_norm": 0.67578125, "learning_rate": 0.00030665493012156176, "loss": 1.3002, "step": 5872 }, { "epoch": 0.7516958914629464, "grad_norm": 0.65625, "learning_rate": 0.00030635624998342836, "loss": 1.6737, "step": 5873 }, { "epoch": 0.7518238832714706, "grad_norm": 0.74609375, "learning_rate": 0.0003060576890592278, "loss": 1.0701, "step": 5874 }, { "epoch": 0.7519518750799948, "grad_norm": 0.63671875, "learning_rate": 0.00030575924740027226, "loss": 1.202, "step": 5875 }, { "epoch": 0.7520798668885191, "grad_norm": 0.6953125, "learning_rate": 0.00030546092505785407, "loss": 1.1294, "step": 5876 }, { "epoch": 0.7522078586970434, "grad_norm": 0.703125, "learning_rate": 0.0003051627220832446, "loss": 1.4866, "step": 5877 }, { "epoch": 0.7523358505055676, "grad_norm": 0.65234375, "learning_rate": 0.00030486463852769487, "loss": 1.3078, "step": 5878 }, { "epoch": 0.7524638423140919, "grad_norm": 0.59765625, "learning_rate": 0.0003045666744424358, "loss": 1.1349, "step": 5879 }, { "epoch": 0.7525918341226161, "grad_norm": 0.482421875, "learning_rate": 0.00030426882987867653, "loss": 0.8171, "step": 5880 }, { "epoch": 0.7527198259311404, "grad_norm": 0.63671875, "learning_rate": 0.00030397110488760706, "loss": 1.5926, "step": 5881 }, { "epoch": 0.7528478177396647, "grad_norm": 0.68359375, "learning_rate": 0.00030367349952039604, "loss": 1.418, "step": 5882 }, { "epoch": 0.7529758095481889, "grad_norm": 0.484375, "learning_rate": 0.0003033760138281918, "loss": 0.7875, "step": 5883 }, { "epoch": 0.7531038013567132, "grad_norm": 0.64453125, "learning_rate": 0.0003030786478621226, "loss": 1.1093, "step": 5884 }, { "epoch": 0.7532317931652375, "grad_norm": 0.6953125, "learning_rate": 0.00030278140167329436, "loss": 1.2292, "step": 5885 }, { "epoch": 0.7533597849737617, "grad_norm": 0.6953125, "learning_rate": 0.00030248427531279475, "loss": 1.4123, "step": 5886 }, { "epoch": 0.753487776782286, "grad_norm": 0.65625, "learning_rate": 0.0003021872688316896, "loss": 1.6505, "step": 5887 }, { "epoch": 0.7536157685908101, "grad_norm": 0.68359375, "learning_rate": 0.0003018903822810244, "loss": 1.4422, "step": 5888 }, { "epoch": 0.7537437603993344, "grad_norm": 0.66796875, "learning_rate": 0.00030159361571182333, "loss": 1.389, "step": 5889 }, { "epoch": 0.7538717522078587, "grad_norm": 0.6796875, "learning_rate": 0.00030129696917509087, "loss": 1.575, "step": 5890 }, { "epoch": 0.7539997440163829, "grad_norm": 0.55078125, "learning_rate": 0.0003010004427218106, "loss": 0.7569, "step": 5891 }, { "epoch": 0.7541277358249072, "grad_norm": 0.74609375, "learning_rate": 0.00030070403640294565, "loss": 1.5979, "step": 5892 }, { "epoch": 0.7542557276334314, "grad_norm": 0.7578125, "learning_rate": 0.00030040775026943755, "loss": 1.8735, "step": 5893 }, { "epoch": 0.7543837194419557, "grad_norm": 0.498046875, "learning_rate": 0.0003001115843722083, "loss": 0.7654, "step": 5894 }, { "epoch": 0.75451171125048, "grad_norm": 0.59765625, "learning_rate": 0.00029981553876215884, "loss": 1.4183, "step": 5895 }, { "epoch": 0.7546397030590042, "grad_norm": 0.6796875, "learning_rate": 0.0002995196134901694, "loss": 1.1844, "step": 5896 }, { "epoch": 0.7547676948675285, "grad_norm": 0.65625, "learning_rate": 0.0002992238086070994, "loss": 1.5381, "step": 5897 }, { "epoch": 0.7548956866760528, "grad_norm": 0.5703125, "learning_rate": 0.0002989281241637879, "loss": 0.838, "step": 5898 }, { "epoch": 0.755023678484577, "grad_norm": 0.6328125, "learning_rate": 0.000298632560211053, "loss": 1.2481, "step": 5899 }, { "epoch": 0.7551516702931013, "grad_norm": 0.5625, "learning_rate": 0.0002983371167996921, "loss": 0.9281, "step": 5900 }, { "epoch": 0.7552796621016254, "grad_norm": 0.640625, "learning_rate": 0.00029804179398048236, "loss": 1.2941, "step": 5901 }, { "epoch": 0.7554076539101497, "grad_norm": 0.66015625, "learning_rate": 0.00029774659180417906, "loss": 1.0686, "step": 5902 }, { "epoch": 0.755535645718674, "grad_norm": 0.70703125, "learning_rate": 0.00029745151032151785, "loss": 1.3726, "step": 5903 }, { "epoch": 0.7556636375271982, "grad_norm": 0.6328125, "learning_rate": 0.0002971565495832133, "loss": 1.0257, "step": 5904 }, { "epoch": 0.7557916293357225, "grad_norm": 0.83984375, "learning_rate": 0.0002968617096399592, "loss": 1.3615, "step": 5905 }, { "epoch": 0.7559196211442468, "grad_norm": 0.7109375, "learning_rate": 0.00029656699054242874, "loss": 1.4151, "step": 5906 }, { "epoch": 0.756047612952771, "grad_norm": 0.5859375, "learning_rate": 0.00029627239234127377, "loss": 0.9974, "step": 5907 }, { "epoch": 0.7561756047612953, "grad_norm": 0.56640625, "learning_rate": 0.0002959779150871256, "loss": 0.8485, "step": 5908 }, { "epoch": 0.7563035965698195, "grad_norm": 0.69140625, "learning_rate": 0.0002956835588305957, "loss": 1.5071, "step": 5909 }, { "epoch": 0.7564315883783438, "grad_norm": 0.6796875, "learning_rate": 0.0002953893236222739, "loss": 1.3791, "step": 5910 }, { "epoch": 0.7565595801868681, "grad_norm": 0.93359375, "learning_rate": 0.0002950952095127286, "loss": 1.7148, "step": 5911 }, { "epoch": 0.7566875719953923, "grad_norm": 0.53125, "learning_rate": 0.0002948012165525085, "loss": 1.2198, "step": 5912 }, { "epoch": 0.7568155638039166, "grad_norm": 0.5859375, "learning_rate": 0.0002945073447921409, "loss": 1.1494, "step": 5913 }, { "epoch": 0.7569435556124408, "grad_norm": 0.6875, "learning_rate": 0.00029421359428213266, "loss": 1.0119, "step": 5914 }, { "epoch": 0.757071547420965, "grad_norm": 0.734375, "learning_rate": 0.0002939199650729698, "loss": 1.6029, "step": 5915 }, { "epoch": 0.7571995392294893, "grad_norm": 0.66796875, "learning_rate": 0.00029362645721511647, "loss": 1.5707, "step": 5916 }, { "epoch": 0.7573275310380135, "grad_norm": 0.6875, "learning_rate": 0.00029333307075901736, "loss": 1.3455, "step": 5917 }, { "epoch": 0.7574555228465378, "grad_norm": 0.6015625, "learning_rate": 0.0002930398057550955, "loss": 1.0185, "step": 5918 }, { "epoch": 0.7575835146550621, "grad_norm": 0.7109375, "learning_rate": 0.0002927466622537532, "loss": 1.1452, "step": 5919 }, { "epoch": 0.7577115064635863, "grad_norm": 0.85546875, "learning_rate": 0.000292453640305372, "loss": 1.3773, "step": 5920 }, { "epoch": 0.7578394982721106, "grad_norm": 0.64453125, "learning_rate": 0.00029216073996031255, "loss": 1.7001, "step": 5921 }, { "epoch": 0.7579674900806348, "grad_norm": 0.640625, "learning_rate": 0.0002918679612689145, "loss": 1.3534, "step": 5922 }, { "epoch": 0.7580954818891591, "grad_norm": 0.69921875, "learning_rate": 0.00029157530428149684, "loss": 1.3839, "step": 5923 }, { "epoch": 0.7582234736976834, "grad_norm": 0.6484375, "learning_rate": 0.0002912827690483568, "loss": 1.2468, "step": 5924 }, { "epoch": 0.7583514655062076, "grad_norm": 0.70703125, "learning_rate": 0.0002909903556197717, "loss": 1.5415, "step": 5925 }, { "epoch": 0.7584794573147319, "grad_norm": 0.6015625, "learning_rate": 0.0002906980640459975, "loss": 1.5097, "step": 5926 }, { "epoch": 0.7586074491232561, "grad_norm": 0.8125, "learning_rate": 0.0002904058943772693, "loss": 0.8155, "step": 5927 }, { "epoch": 0.7587354409317804, "grad_norm": 0.6640625, "learning_rate": 0.00029011384666380145, "loss": 1.2594, "step": 5928 }, { "epoch": 0.7588634327403047, "grad_norm": 0.62109375, "learning_rate": 0.0002898219209557864, "loss": 1.0556, "step": 5929 }, { "epoch": 0.7589914245488288, "grad_norm": 0.8515625, "learning_rate": 0.0002895301173033964, "loss": 1.6612, "step": 5930 }, { "epoch": 0.7591194163573531, "grad_norm": 0.5078125, "learning_rate": 0.00028923843575678334, "loss": 0.7407, "step": 5931 }, { "epoch": 0.7592474081658774, "grad_norm": 0.5625, "learning_rate": 0.0002889468763660772, "loss": 0.8497, "step": 5932 }, { "epoch": 0.7593753999744016, "grad_norm": 0.640625, "learning_rate": 0.0002886554391813868, "loss": 1.3403, "step": 5933 }, { "epoch": 0.7595033917829259, "grad_norm": 0.4921875, "learning_rate": 0.0002883641242528006, "loss": 0.7408, "step": 5934 }, { "epoch": 0.7596313835914501, "grad_norm": 0.498046875, "learning_rate": 0.00028807293163038574, "loss": 0.7503, "step": 5935 }, { "epoch": 0.7597593753999744, "grad_norm": 0.6484375, "learning_rate": 0.0002877818613641884, "loss": 1.1935, "step": 5936 }, { "epoch": 0.7598873672084987, "grad_norm": 0.73046875, "learning_rate": 0.00028749091350423406, "loss": 1.4146, "step": 5937 }, { "epoch": 0.7600153590170229, "grad_norm": 0.63671875, "learning_rate": 0.0002872000881005262, "loss": 1.228, "step": 5938 }, { "epoch": 0.7601433508255472, "grad_norm": 0.74609375, "learning_rate": 0.00028690938520304824, "loss": 1.4191, "step": 5939 }, { "epoch": 0.7602713426340714, "grad_norm": 0.73046875, "learning_rate": 0.0002866188048617622, "loss": 1.7078, "step": 5940 }, { "epoch": 0.7603993344425957, "grad_norm": 0.64453125, "learning_rate": 0.00028632834712660884, "loss": 1.0708, "step": 5941 }, { "epoch": 0.76052732625112, "grad_norm": 0.578125, "learning_rate": 0.00028603801204750824, "loss": 0.8285, "step": 5942 }, { "epoch": 0.7606553180596441, "grad_norm": 0.625, "learning_rate": 0.00028574779967435916, "loss": 0.6676, "step": 5943 }, { "epoch": 0.7607833098681684, "grad_norm": 0.7109375, "learning_rate": 0.00028545771005703924, "loss": 1.6365, "step": 5944 }, { "epoch": 0.7609113016766927, "grad_norm": 0.5390625, "learning_rate": 0.0002851677432454052, "loss": 0.7824, "step": 5945 }, { "epoch": 0.7610392934852169, "grad_norm": 0.734375, "learning_rate": 0.0002848778992892928, "loss": 1.5546, "step": 5946 }, { "epoch": 0.7611672852937412, "grad_norm": 0.71484375, "learning_rate": 0.0002845881782385156, "loss": 1.9672, "step": 5947 }, { "epoch": 0.7612952771022654, "grad_norm": 0.83203125, "learning_rate": 0.00028429858014286744, "loss": 1.9262, "step": 5948 }, { "epoch": 0.7614232689107897, "grad_norm": 0.67578125, "learning_rate": 0.00028400910505212043, "loss": 1.183, "step": 5949 }, { "epoch": 0.761551260719314, "grad_norm": 0.703125, "learning_rate": 0.0002837197530160257, "loss": 1.7212, "step": 5950 }, { "epoch": 0.7616792525278382, "grad_norm": 0.765625, "learning_rate": 0.00028343052408431267, "loss": 1.3336, "step": 5951 }, { "epoch": 0.7618072443363625, "grad_norm": 0.5390625, "learning_rate": 0.0002831414183066903, "loss": 1.1047, "step": 5952 }, { "epoch": 0.7619352361448867, "grad_norm": 0.62109375, "learning_rate": 0.0002828524357328456, "loss": 1.3185, "step": 5953 }, { "epoch": 0.762063227953411, "grad_norm": 0.5625, "learning_rate": 0.000282563576412446, "loss": 0.9853, "step": 5954 }, { "epoch": 0.7621912197619353, "grad_norm": 0.83984375, "learning_rate": 0.00028227484039513575, "loss": 1.2432, "step": 5955 }, { "epoch": 0.7623192115704595, "grad_norm": 0.6484375, "learning_rate": 0.00028198622773053904, "loss": 1.1078, "step": 5956 }, { "epoch": 0.7624472033789838, "grad_norm": 0.63671875, "learning_rate": 0.00028169773846825884, "loss": 1.2796, "step": 5957 }, { "epoch": 0.762575195187508, "grad_norm": 0.59765625, "learning_rate": 0.0002814093726578765, "loss": 0.8792, "step": 5958 }, { "epoch": 0.7627031869960322, "grad_norm": 0.6171875, "learning_rate": 0.0002811211303489527, "loss": 1.1183, "step": 5959 }, { "epoch": 0.7628311788045565, "grad_norm": 0.65625, "learning_rate": 0.0002808330115910259, "loss": 1.2995, "step": 5960 }, { "epoch": 0.7629591706130807, "grad_norm": 0.6328125, "learning_rate": 0.0002805450164336145, "loss": 1.5348, "step": 5961 }, { "epoch": 0.763087162421605, "grad_norm": 0.66015625, "learning_rate": 0.00028025714492621503, "loss": 1.2319, "step": 5962 }, { "epoch": 0.7632151542301293, "grad_norm": 0.62890625, "learning_rate": 0.00027996939711830274, "loss": 1.1613, "step": 5963 }, { "epoch": 0.7633431460386535, "grad_norm": 0.87109375, "learning_rate": 0.00027968177305933193, "loss": 1.5064, "step": 5964 }, { "epoch": 0.7634711378471778, "grad_norm": 0.66796875, "learning_rate": 0.0002793942727987354, "loss": 1.5781, "step": 5965 }, { "epoch": 0.763599129655702, "grad_norm": 0.59765625, "learning_rate": 0.0002791068963859248, "loss": 1.2689, "step": 5966 }, { "epoch": 0.7637271214642263, "grad_norm": 0.82421875, "learning_rate": 0.00027881964387029034, "loss": 1.3662, "step": 5967 }, { "epoch": 0.7638551132727506, "grad_norm": 0.73046875, "learning_rate": 0.0002785325153012014, "loss": 1.601, "step": 5968 }, { "epoch": 0.7639831050812748, "grad_norm": 0.75, "learning_rate": 0.00027824551072800496, "loss": 1.6389, "step": 5969 }, { "epoch": 0.7641110968897991, "grad_norm": 0.6015625, "learning_rate": 0.00027795863020002777, "loss": 1.415, "step": 5970 }, { "epoch": 0.7642390886983234, "grad_norm": 1.015625, "learning_rate": 0.000277671873766575, "loss": 1.1059, "step": 5971 }, { "epoch": 0.7643670805068475, "grad_norm": 0.7734375, "learning_rate": 0.0002773852414769306, "loss": 1.081, "step": 5972 }, { "epoch": 0.7644950723153718, "grad_norm": 0.8125, "learning_rate": 0.0002770987333803564, "loss": 1.4324, "step": 5973 }, { "epoch": 0.764623064123896, "grad_norm": 0.58203125, "learning_rate": 0.0002768123495260938, "loss": 1.074, "step": 5974 }, { "epoch": 0.7647510559324203, "grad_norm": 0.6484375, "learning_rate": 0.00027652608996336236, "loss": 1.1266, "step": 5975 }, { "epoch": 0.7648790477409446, "grad_norm": 0.5703125, "learning_rate": 0.00027623995474136063, "loss": 0.9929, "step": 5976 }, { "epoch": 0.7650070395494688, "grad_norm": 0.73828125, "learning_rate": 0.00027595394390926553, "loss": 1.0579, "step": 5977 }, { "epoch": 0.7651350313579931, "grad_norm": 0.828125, "learning_rate": 0.0002756680575162327, "loss": 1.402, "step": 5978 }, { "epoch": 0.7652630231665174, "grad_norm": 0.58984375, "learning_rate": 0.00027538229561139616, "loss": 0.7966, "step": 5979 }, { "epoch": 0.7653910149750416, "grad_norm": 0.62890625, "learning_rate": 0.000275096658243869, "loss": 1.4967, "step": 5980 }, { "epoch": 0.7655190067835659, "grad_norm": 0.5859375, "learning_rate": 0.0002748111454627428, "loss": 0.9519, "step": 5981 }, { "epoch": 0.7656469985920901, "grad_norm": 0.56640625, "learning_rate": 0.0002745257573170868, "loss": 0.8974, "step": 5982 }, { "epoch": 0.7657749904006144, "grad_norm": 0.73046875, "learning_rate": 0.0002742404938559502, "loss": 1.3709, "step": 5983 }, { "epoch": 0.7659029822091387, "grad_norm": 0.76953125, "learning_rate": 0.0002739553551283599, "loss": 1.4114, "step": 5984 }, { "epoch": 0.7660309740176628, "grad_norm": 0.55078125, "learning_rate": 0.0002736703411833218, "loss": 0.9999, "step": 5985 }, { "epoch": 0.7661589658261871, "grad_norm": 0.64453125, "learning_rate": 0.00027338545206982, "loss": 1.0199, "step": 5986 }, { "epoch": 0.7662869576347113, "grad_norm": 0.703125, "learning_rate": 0.00027310068783681753, "loss": 1.8409, "step": 5987 }, { "epoch": 0.7664149494432356, "grad_norm": 0.8671875, "learning_rate": 0.0002728160485332556, "loss": 1.0862, "step": 5988 }, { "epoch": 0.7665429412517599, "grad_norm": 0.7734375, "learning_rate": 0.000272531534208054, "loss": 1.7566, "step": 5989 }, { "epoch": 0.7666709330602841, "grad_norm": 0.71875, "learning_rate": 0.0002722471449101117, "loss": 0.9934, "step": 5990 }, { "epoch": 0.7667989248688084, "grad_norm": 0.6015625, "learning_rate": 0.0002719628806883048, "loss": 0.8887, "step": 5991 }, { "epoch": 0.7669269166773327, "grad_norm": 0.56640625, "learning_rate": 0.000271678741591489, "loss": 1.1427, "step": 5992 }, { "epoch": 0.7670549084858569, "grad_norm": 0.58984375, "learning_rate": 0.00027139472766849837, "loss": 1.0461, "step": 5993 }, { "epoch": 0.7671829002943812, "grad_norm": 0.703125, "learning_rate": 0.00027111083896814547, "loss": 1.1651, "step": 5994 }, { "epoch": 0.7673108921029054, "grad_norm": 0.55078125, "learning_rate": 0.0002708270755392207, "loss": 0.9382, "step": 5995 }, { "epoch": 0.7674388839114297, "grad_norm": 0.73046875, "learning_rate": 0.0002705434374304937, "loss": 1.1578, "step": 5996 }, { "epoch": 0.767566875719954, "grad_norm": 0.64453125, "learning_rate": 0.00027025992469071215, "loss": 1.9144, "step": 5997 }, { "epoch": 0.7676948675284782, "grad_norm": 0.609375, "learning_rate": 0.00026997653736860215, "loss": 1.2737, "step": 5998 }, { "epoch": 0.7678228593370025, "grad_norm": 0.6328125, "learning_rate": 0.00026969327551286924, "loss": 1.3793, "step": 5999 }, { "epoch": 0.7679508511455266, "grad_norm": 0.61328125, "learning_rate": 0.0002694101391721957, "loss": 1.0844, "step": 6000 }, { "epoch": 0.7680788429540509, "grad_norm": 0.58203125, "learning_rate": 0.00026912712839524336, "loss": 1.0947, "step": 6001 }, { "epoch": 0.7682068347625752, "grad_norm": 0.5078125, "learning_rate": 0.00026884424323065227, "loss": 0.8226, "step": 6002 }, { "epoch": 0.7683348265710994, "grad_norm": 0.6015625, "learning_rate": 0.000268561483727041, "loss": 0.9825, "step": 6003 }, { "epoch": 0.7684628183796237, "grad_norm": 0.90625, "learning_rate": 0.000268278849933006, "loss": 1.6239, "step": 6004 }, { "epoch": 0.768590810188148, "grad_norm": 0.609375, "learning_rate": 0.00026799634189712265, "loss": 1.0631, "step": 6005 }, { "epoch": 0.7687188019966722, "grad_norm": 0.7109375, "learning_rate": 0.00026771395966794444, "loss": 1.3838, "step": 6006 }, { "epoch": 0.7688467938051965, "grad_norm": 0.73828125, "learning_rate": 0.00026743170329400343, "loss": 1.4818, "step": 6007 }, { "epoch": 0.7689747856137207, "grad_norm": 0.703125, "learning_rate": 0.00026714957282381003, "loss": 1.3851, "step": 6008 }, { "epoch": 0.769102777422245, "grad_norm": 0.61328125, "learning_rate": 0.0002668675683058528, "loss": 1.2861, "step": 6009 }, { "epoch": 0.7692307692307693, "grad_norm": 0.56640625, "learning_rate": 0.00026658568978859886, "loss": 0.8006, "step": 6010 }, { "epoch": 0.7693587610392935, "grad_norm": 0.734375, "learning_rate": 0.00026630393732049353, "loss": 1.4396, "step": 6011 }, { "epoch": 0.7694867528478178, "grad_norm": 0.76171875, "learning_rate": 0.00026602231094996097, "loss": 1.4374, "step": 6012 }, { "epoch": 0.7696147446563419, "grad_norm": 0.65234375, "learning_rate": 0.0002657408107254027, "loss": 1.33, "step": 6013 }, { "epoch": 0.7697427364648662, "grad_norm": 0.5703125, "learning_rate": 0.0002654594366951991, "loss": 0.9101, "step": 6014 }, { "epoch": 0.7698707282733905, "grad_norm": 0.66015625, "learning_rate": 0.00026517818890770917, "loss": 1.1259, "step": 6015 }, { "epoch": 0.7699987200819147, "grad_norm": 0.60546875, "learning_rate": 0.0002648970674112701, "loss": 1.1279, "step": 6016 }, { "epoch": 0.770126711890439, "grad_norm": 0.8125, "learning_rate": 0.00026461607225419635, "loss": 2.1696, "step": 6017 }, { "epoch": 0.7702547036989633, "grad_norm": 0.7578125, "learning_rate": 0.00026433520348478223, "loss": 1.5144, "step": 6018 }, { "epoch": 0.7703826955074875, "grad_norm": 0.8359375, "learning_rate": 0.0002640544611512993, "loss": 2.0164, "step": 6019 }, { "epoch": 0.7705106873160118, "grad_norm": 0.53515625, "learning_rate": 0.00026377384530199775, "loss": 0.786, "step": 6020 }, { "epoch": 0.770638679124536, "grad_norm": 0.5546875, "learning_rate": 0.00026349335598510604, "loss": 0.7691, "step": 6021 }, { "epoch": 0.7707666709330603, "grad_norm": 0.69921875, "learning_rate": 0.00026321299324883065, "loss": 1.4258, "step": 6022 }, { "epoch": 0.7708946627415846, "grad_norm": 0.609375, "learning_rate": 0.0002629327571413567, "loss": 1.2273, "step": 6023 }, { "epoch": 0.7710226545501088, "grad_norm": 0.8515625, "learning_rate": 0.0002626526477108471, "loss": 1.177, "step": 6024 }, { "epoch": 0.7711506463586331, "grad_norm": 0.59765625, "learning_rate": 0.0002623726650054434, "loss": 1.2603, "step": 6025 }, { "epoch": 0.7712786381671572, "grad_norm": 0.6640625, "learning_rate": 0.00026209280907326494, "loss": 1.0818, "step": 6026 }, { "epoch": 0.7714066299756815, "grad_norm": 0.6171875, "learning_rate": 0.00026181307996240964, "loss": 1.1065, "step": 6027 }, { "epoch": 0.7715346217842058, "grad_norm": 0.69921875, "learning_rate": 0.0002615334777209534, "loss": 1.291, "step": 6028 }, { "epoch": 0.77166261359273, "grad_norm": 0.6640625, "learning_rate": 0.0002612540023969504, "loss": 1.0551, "step": 6029 }, { "epoch": 0.7717906054012543, "grad_norm": 0.7421875, "learning_rate": 0.00026097465403843314, "loss": 1.0369, "step": 6030 }, { "epoch": 0.7719185972097786, "grad_norm": 0.55078125, "learning_rate": 0.000260695432693412, "loss": 0.8425, "step": 6031 }, { "epoch": 0.7720465890183028, "grad_norm": 0.68359375, "learning_rate": 0.0002604163384098759, "loss": 1.0374, "step": 6032 }, { "epoch": 0.7721745808268271, "grad_norm": 0.58203125, "learning_rate": 0.00026013737123579153, "loss": 0.9619, "step": 6033 }, { "epoch": 0.7723025726353513, "grad_norm": 0.72265625, "learning_rate": 0.0002598585312191043, "loss": 1.4319, "step": 6034 }, { "epoch": 0.7724305644438756, "grad_norm": 0.734375, "learning_rate": 0.00025957981840773683, "loss": 1.2935, "step": 6035 }, { "epoch": 0.7725585562523999, "grad_norm": 0.67578125, "learning_rate": 0.0002593012328495906, "loss": 1.5247, "step": 6036 }, { "epoch": 0.7726865480609241, "grad_norm": 0.70703125, "learning_rate": 0.0002590227745925453, "loss": 1.0064, "step": 6037 }, { "epoch": 0.7728145398694484, "grad_norm": 0.73046875, "learning_rate": 0.0002587444436844586, "loss": 1.6959, "step": 6038 }, { "epoch": 0.7729425316779726, "grad_norm": 0.75390625, "learning_rate": 0.00025846624017316565, "loss": 1.4747, "step": 6039 }, { "epoch": 0.7730705234864969, "grad_norm": 0.7734375, "learning_rate": 0.0002581881641064806, "loss": 1.6392, "step": 6040 }, { "epoch": 0.7731985152950211, "grad_norm": 0.53125, "learning_rate": 0.0002579102155321952, "loss": 1.0542, "step": 6041 }, { "epoch": 0.7733265071035453, "grad_norm": 0.55078125, "learning_rate": 0.00025763239449807954, "loss": 0.819, "step": 6042 }, { "epoch": 0.7734544989120696, "grad_norm": 0.59375, "learning_rate": 0.00025735470105188164, "loss": 1.1657, "step": 6043 }, { "epoch": 0.7735824907205939, "grad_norm": 0.64453125, "learning_rate": 0.0002570771352413275, "loss": 0.8697, "step": 6044 }, { "epoch": 0.7737104825291181, "grad_norm": 0.6640625, "learning_rate": 0.0002567996971141214, "loss": 1.5477, "step": 6045 }, { "epoch": 0.7738384743376424, "grad_norm": 0.59765625, "learning_rate": 0.0002565223867179457, "loss": 1.0697, "step": 6046 }, { "epoch": 0.7739664661461666, "grad_norm": 0.5546875, "learning_rate": 0.0002562452041004608, "loss": 1.0237, "step": 6047 }, { "epoch": 0.7740944579546909, "grad_norm": 0.6796875, "learning_rate": 0.0002559681493093046, "loss": 1.1713, "step": 6048 }, { "epoch": 0.7742224497632152, "grad_norm": 0.73046875, "learning_rate": 0.0002556912223920936, "loss": 1.5754, "step": 6049 }, { "epoch": 0.7743504415717394, "grad_norm": 0.77734375, "learning_rate": 0.0002554144233964224, "loss": 2.339, "step": 6050 }, { "epoch": 0.7744784333802637, "grad_norm": 0.69140625, "learning_rate": 0.00025513775236986314, "loss": 1.1741, "step": 6051 }, { "epoch": 0.774606425188788, "grad_norm": 0.55859375, "learning_rate": 0.0002548612093599664, "loss": 1.0942, "step": 6052 }, { "epoch": 0.7747344169973122, "grad_norm": 0.6640625, "learning_rate": 0.0002545847944142604, "loss": 1.1793, "step": 6053 }, { "epoch": 0.7748624088058365, "grad_norm": 0.59375, "learning_rate": 0.00025430850758025183, "loss": 1.0242, "step": 6054 }, { "epoch": 0.7749904006143606, "grad_norm": 0.6953125, "learning_rate": 0.0002540323489054248, "loss": 1.6064, "step": 6055 }, { "epoch": 0.7751183924228849, "grad_norm": 0.7109375, "learning_rate": 0.0002537563184372421, "loss": 0.983, "step": 6056 }, { "epoch": 0.7752463842314092, "grad_norm": 0.84765625, "learning_rate": 0.00025348041622314335, "loss": 1.5031, "step": 6057 }, { "epoch": 0.7753743760399334, "grad_norm": 0.6875, "learning_rate": 0.0002532046423105472, "loss": 1.8124, "step": 6058 }, { "epoch": 0.7755023678484577, "grad_norm": 0.70703125, "learning_rate": 0.0002529289967468499, "loss": 1.6467, "step": 6059 }, { "epoch": 0.7756303596569819, "grad_norm": 0.72265625, "learning_rate": 0.00025265347957942574, "loss": 1.4106, "step": 6060 }, { "epoch": 0.7757583514655062, "grad_norm": 0.6015625, "learning_rate": 0.00025237809085562645, "loss": 1.2299, "step": 6061 }, { "epoch": 0.7758863432740305, "grad_norm": 0.6171875, "learning_rate": 0.0002521028306227822, "loss": 1.4466, "step": 6062 }, { "epoch": 0.7760143350825547, "grad_norm": 0.5859375, "learning_rate": 0.0002518276989282009, "loss": 1.2504, "step": 6063 }, { "epoch": 0.776142326891079, "grad_norm": 0.6015625, "learning_rate": 0.0002515526958191685, "loss": 0.9863, "step": 6064 }, { "epoch": 0.7762703186996033, "grad_norm": 0.671875, "learning_rate": 0.0002512778213429486, "loss": 1.4729, "step": 6065 }, { "epoch": 0.7763983105081275, "grad_norm": 0.58984375, "learning_rate": 0.00025100307554678304, "loss": 1.2179, "step": 6066 }, { "epoch": 0.7765263023166518, "grad_norm": 0.73046875, "learning_rate": 0.00025072845847789126, "loss": 1.6835, "step": 6067 }, { "epoch": 0.776654294125176, "grad_norm": 0.625, "learning_rate": 0.0002504539701834706, "loss": 1.0867, "step": 6068 }, { "epoch": 0.7767822859337002, "grad_norm": 0.609375, "learning_rate": 0.0002501796107106966, "loss": 1.3428, "step": 6069 }, { "epoch": 0.7769102777422245, "grad_norm": 0.67578125, "learning_rate": 0.00024990538010672206, "loss": 0.9998, "step": 6070 }, { "epoch": 0.7770382695507487, "grad_norm": 0.65234375, "learning_rate": 0.0002496312784186778, "loss": 0.9903, "step": 6071 }, { "epoch": 0.777166261359273, "grad_norm": 0.7421875, "learning_rate": 0.0002493573056936731, "loss": 1.2281, "step": 6072 }, { "epoch": 0.7772942531677972, "grad_norm": 0.58203125, "learning_rate": 0.0002490834619787943, "loss": 1.2963, "step": 6073 }, { "epoch": 0.7774222449763215, "grad_norm": 0.73046875, "learning_rate": 0.00024880974732110627, "loss": 1.725, "step": 6074 }, { "epoch": 0.7775502367848458, "grad_norm": 0.60546875, "learning_rate": 0.0002485361617676506, "loss": 1.2637, "step": 6075 }, { "epoch": 0.77767822859337, "grad_norm": 0.7109375, "learning_rate": 0.00024826270536544803, "loss": 1.3388, "step": 6076 }, { "epoch": 0.7778062204018943, "grad_norm": 0.5625, "learning_rate": 0.00024798937816149626, "loss": 1.0382, "step": 6077 }, { "epoch": 0.7779342122104186, "grad_norm": 0.6484375, "learning_rate": 0.0002477161802027714, "loss": 1.4794, "step": 6078 }, { "epoch": 0.7780622040189428, "grad_norm": 0.671875, "learning_rate": 0.00024744311153622635, "loss": 1.851, "step": 6079 }, { "epoch": 0.7781901958274671, "grad_norm": 0.7265625, "learning_rate": 0.0002471701722087926, "loss": 2.0376, "step": 6080 }, { "epoch": 0.7783181876359913, "grad_norm": 0.578125, "learning_rate": 0.0002468973622673791, "loss": 1.0485, "step": 6081 }, { "epoch": 0.7784461794445156, "grad_norm": 0.671875, "learning_rate": 0.00024662468175887296, "loss": 1.2498, "step": 6082 }, { "epoch": 0.7785741712530398, "grad_norm": 0.60546875, "learning_rate": 0.0002463521307301388, "loss": 0.8574, "step": 6083 }, { "epoch": 0.778702163061564, "grad_norm": 0.6796875, "learning_rate": 0.0002460797092280186, "loss": 1.1963, "step": 6084 }, { "epoch": 0.7788301548700883, "grad_norm": 0.71484375, "learning_rate": 0.0002458074172993324, "loss": 1.1001, "step": 6085 }, { "epoch": 0.7789581466786125, "grad_norm": 0.6875, "learning_rate": 0.00024553525499087814, "loss": 1.6738, "step": 6086 }, { "epoch": 0.7790861384871368, "grad_norm": 0.85546875, "learning_rate": 0.0002452632223494313, "loss": 1.3709, "step": 6087 }, { "epoch": 0.7792141302956611, "grad_norm": 0.75, "learning_rate": 0.00024499131942174525, "loss": 1.2421, "step": 6088 }, { "epoch": 0.7793421221041853, "grad_norm": 0.609375, "learning_rate": 0.0002447195462545506, "loss": 1.6077, "step": 6089 }, { "epoch": 0.7794701139127096, "grad_norm": 0.59765625, "learning_rate": 0.00024444790289455633, "loss": 1.0279, "step": 6090 }, { "epoch": 0.7795981057212339, "grad_norm": 0.80078125, "learning_rate": 0.00024417638938844876, "loss": 1.0816, "step": 6091 }, { "epoch": 0.7797260975297581, "grad_norm": 0.609375, "learning_rate": 0.00024390500578289154, "loss": 1.0834, "step": 6092 }, { "epoch": 0.7798540893382824, "grad_norm": 0.71484375, "learning_rate": 0.00024363375212452655, "loss": 1.3494, "step": 6093 }, { "epoch": 0.7799820811468066, "grad_norm": 0.455078125, "learning_rate": 0.00024336262845997303, "loss": 0.7065, "step": 6094 }, { "epoch": 0.7801100729553309, "grad_norm": 0.50390625, "learning_rate": 0.00024309163483582808, "loss": 0.8325, "step": 6095 }, { "epoch": 0.7802380647638552, "grad_norm": 0.72265625, "learning_rate": 0.00024282077129866665, "loss": 1.3516, "step": 6096 }, { "epoch": 0.7803660565723793, "grad_norm": 0.6640625, "learning_rate": 0.0002425500378950405, "loss": 1.2968, "step": 6097 }, { "epoch": 0.7804940483809036, "grad_norm": 0.5859375, "learning_rate": 0.0002422794346714794, "loss": 1.1925, "step": 6098 }, { "epoch": 0.7806220401894278, "grad_norm": 0.546875, "learning_rate": 0.0002420089616744916, "loss": 1.0823, "step": 6099 }, { "epoch": 0.7807500319979521, "grad_norm": 0.73046875, "learning_rate": 0.00024173861895056227, "loss": 1.5705, "step": 6100 }, { "epoch": 0.7808780238064764, "grad_norm": 0.60546875, "learning_rate": 0.00024146840654615354, "loss": 1.1452, "step": 6101 }, { "epoch": 0.7810060156150006, "grad_norm": 0.65234375, "learning_rate": 0.0002411983245077062, "loss": 1.369, "step": 6102 }, { "epoch": 0.7811340074235249, "grad_norm": 0.5546875, "learning_rate": 0.00024092837288163804, "loss": 0.8354, "step": 6103 }, { "epoch": 0.7812619992320492, "grad_norm": 0.8515625, "learning_rate": 0.00024065855171434493, "loss": 1.6902, "step": 6104 }, { "epoch": 0.7813899910405734, "grad_norm": 0.546875, "learning_rate": 0.00024038886105220004, "loss": 0.7931, "step": 6105 }, { "epoch": 0.7815179828490977, "grad_norm": 0.69921875, "learning_rate": 0.00024011930094155353, "loss": 1.6558, "step": 6106 }, { "epoch": 0.7816459746576219, "grad_norm": 0.64453125, "learning_rate": 0.00023984987142873415, "loss": 1.0707, "step": 6107 }, { "epoch": 0.7817739664661462, "grad_norm": 0.64453125, "learning_rate": 0.0002395805725600476, "loss": 1.4335, "step": 6108 }, { "epoch": 0.7819019582746705, "grad_norm": 0.9296875, "learning_rate": 0.00023931140438177722, "loss": 1.21, "step": 6109 }, { "epoch": 0.7820299500831946, "grad_norm": 0.81640625, "learning_rate": 0.000239042366940184, "loss": 1.3239, "step": 6110 }, { "epoch": 0.7821579418917189, "grad_norm": 0.87890625, "learning_rate": 0.00023877346028150636, "loss": 1.6738, "step": 6111 }, { "epoch": 0.7822859337002431, "grad_norm": 0.69921875, "learning_rate": 0.0002385046844519604, "loss": 1.2386, "step": 6112 }, { "epoch": 0.7824139255087674, "grad_norm": 0.5703125, "learning_rate": 0.00023823603949773943, "loss": 0.9186, "step": 6113 }, { "epoch": 0.7825419173172917, "grad_norm": 0.6015625, "learning_rate": 0.00023796752546501477, "loss": 0.7933, "step": 6114 }, { "epoch": 0.7826699091258159, "grad_norm": 0.5703125, "learning_rate": 0.00023769914239993428, "loss": 1.1618, "step": 6115 }, { "epoch": 0.7827979009343402, "grad_norm": 0.68359375, "learning_rate": 0.00023743089034862443, "loss": 1.4325, "step": 6116 }, { "epoch": 0.7829258927428645, "grad_norm": 0.71484375, "learning_rate": 0.00023716276935718862, "loss": 1.4488, "step": 6117 }, { "epoch": 0.7830538845513887, "grad_norm": 0.75390625, "learning_rate": 0.0002368947794717079, "loss": 1.5559, "step": 6118 }, { "epoch": 0.783181876359913, "grad_norm": 0.734375, "learning_rate": 0.00023662692073824032, "loss": 1.7343, "step": 6119 }, { "epoch": 0.7833098681684372, "grad_norm": 0.703125, "learning_rate": 0.0002363591932028216, "loss": 1.2624, "step": 6120 }, { "epoch": 0.7834378599769615, "grad_norm": 0.765625, "learning_rate": 0.00023609159691146574, "loss": 1.1486, "step": 6121 }, { "epoch": 0.7835658517854858, "grad_norm": 0.58203125, "learning_rate": 0.0002358241319101635, "loss": 1.0536, "step": 6122 }, { "epoch": 0.78369384359401, "grad_norm": 0.75390625, "learning_rate": 0.0002355567982448825, "loss": 1.7611, "step": 6123 }, { "epoch": 0.7838218354025343, "grad_norm": 0.77734375, "learning_rate": 0.00023528959596156852, "loss": 1.7737, "step": 6124 }, { "epoch": 0.7839498272110585, "grad_norm": 0.6953125, "learning_rate": 0.00023502252510614486, "loss": 0.9668, "step": 6125 }, { "epoch": 0.7840778190195827, "grad_norm": 0.6484375, "learning_rate": 0.0002347555857245117, "loss": 1.5643, "step": 6126 }, { "epoch": 0.784205810828107, "grad_norm": 0.6484375, "learning_rate": 0.0002344887778625474, "loss": 1.1915, "step": 6127 }, { "epoch": 0.7843338026366312, "grad_norm": 0.74609375, "learning_rate": 0.00023422210156610657, "loss": 1.3524, "step": 6128 }, { "epoch": 0.7844617944451555, "grad_norm": 0.73828125, "learning_rate": 0.0002339555568810221, "loss": 1.1542, "step": 6129 }, { "epoch": 0.7845897862536798, "grad_norm": 0.59765625, "learning_rate": 0.00023368914385310413, "loss": 1.1752, "step": 6130 }, { "epoch": 0.784717778062204, "grad_norm": 0.60546875, "learning_rate": 0.00023342286252813992, "loss": 1.2485, "step": 6131 }, { "epoch": 0.7848457698707283, "grad_norm": 0.546875, "learning_rate": 0.00023315671295189434, "loss": 1.07, "step": 6132 }, { "epoch": 0.7849737616792525, "grad_norm": 0.71484375, "learning_rate": 0.00023289069517010952, "loss": 1.169, "step": 6133 }, { "epoch": 0.7851017534877768, "grad_norm": 0.75, "learning_rate": 0.00023262480922850483, "loss": 1.2255, "step": 6134 }, { "epoch": 0.7852297452963011, "grad_norm": 0.6328125, "learning_rate": 0.00023235905517277722, "loss": 0.8516, "step": 6135 }, { "epoch": 0.7853577371048253, "grad_norm": 0.66015625, "learning_rate": 0.00023209343304860108, "loss": 1.0404, "step": 6136 }, { "epoch": 0.7854857289133496, "grad_norm": 0.9453125, "learning_rate": 0.00023182794290162723, "loss": 2.2663, "step": 6137 }, { "epoch": 0.7856137207218739, "grad_norm": 0.69921875, "learning_rate": 0.0002315625847774848, "loss": 1.4601, "step": 6138 }, { "epoch": 0.785741712530398, "grad_norm": 0.84765625, "learning_rate": 0.0002312973587217798, "loss": 1.6266, "step": 6139 }, { "epoch": 0.7858697043389223, "grad_norm": 0.703125, "learning_rate": 0.0002310322647800962, "loss": 1.3143, "step": 6140 }, { "epoch": 0.7859976961474465, "grad_norm": 0.62109375, "learning_rate": 0.00023076730299799377, "loss": 1.3663, "step": 6141 }, { "epoch": 0.7861256879559708, "grad_norm": 0.53515625, "learning_rate": 0.00023050247342101094, "loss": 0.7454, "step": 6142 }, { "epoch": 0.7862536797644951, "grad_norm": 0.65625, "learning_rate": 0.0002302377760946628, "loss": 1.3628, "step": 6143 }, { "epoch": 0.7863816715730193, "grad_norm": 0.77734375, "learning_rate": 0.00022997321106444247, "loss": 1.6593, "step": 6144 }, { "epoch": 0.7865096633815436, "grad_norm": 0.671875, "learning_rate": 0.00022970877837581917, "loss": 1.2821, "step": 6145 }, { "epoch": 0.7866376551900678, "grad_norm": 0.5859375, "learning_rate": 0.00022944447807423995, "loss": 1.0893, "step": 6146 }, { "epoch": 0.7867656469985921, "grad_norm": 0.8359375, "learning_rate": 0.0002291803102051294, "loss": 1.6324, "step": 6147 }, { "epoch": 0.7868936388071164, "grad_norm": 0.63671875, "learning_rate": 0.00022891627481388878, "loss": 1.1119, "step": 6148 }, { "epoch": 0.7870216306156406, "grad_norm": 0.72265625, "learning_rate": 0.00022865237194589737, "loss": 2.2316, "step": 6149 }, { "epoch": 0.7871496224241649, "grad_norm": 0.5625, "learning_rate": 0.00022838860164651044, "loss": 0.9043, "step": 6150 }, { "epoch": 0.7872776142326892, "grad_norm": 0.74609375, "learning_rate": 0.00022812496396106154, "loss": 1.317, "step": 6151 }, { "epoch": 0.7874056060412133, "grad_norm": 0.6015625, "learning_rate": 0.000227861458934861, "loss": 1.0483, "step": 6152 }, { "epoch": 0.7875335978497376, "grad_norm": 0.5078125, "learning_rate": 0.0002275980866131966, "loss": 0.7755, "step": 6153 }, { "epoch": 0.7876615896582618, "grad_norm": 0.59375, "learning_rate": 0.0002273348470413329, "loss": 1.1493, "step": 6154 }, { "epoch": 0.7877895814667861, "grad_norm": 0.6328125, "learning_rate": 0.00022707174026451217, "loss": 1.2476, "step": 6155 }, { "epoch": 0.7879175732753104, "grad_norm": 0.77734375, "learning_rate": 0.00022680876632795343, "loss": 1.2417, "step": 6156 }, { "epoch": 0.7880455650838346, "grad_norm": 0.59765625, "learning_rate": 0.00022654592527685303, "loss": 1.2617, "step": 6157 }, { "epoch": 0.7881735568923589, "grad_norm": 0.51953125, "learning_rate": 0.00022628321715638478, "loss": 0.8614, "step": 6158 }, { "epoch": 0.7883015487008831, "grad_norm": 0.71484375, "learning_rate": 0.0002260206420116987, "loss": 1.1341, "step": 6159 }, { "epoch": 0.7884295405094074, "grad_norm": 0.578125, "learning_rate": 0.0002257581998879229, "loss": 1.0782, "step": 6160 }, { "epoch": 0.7885575323179317, "grad_norm": 0.78515625, "learning_rate": 0.0002254958908301624, "loss": 1.5362, "step": 6161 }, { "epoch": 0.7886855241264559, "grad_norm": 0.58203125, "learning_rate": 0.00022523371488349954, "loss": 1.0229, "step": 6162 }, { "epoch": 0.7888135159349802, "grad_norm": 0.60546875, "learning_rate": 0.00022497167209299275, "loss": 1.1013, "step": 6163 }, { "epoch": 0.7889415077435045, "grad_norm": 0.6875, "learning_rate": 0.00022470976250367882, "loss": 1.3694, "step": 6164 }, { "epoch": 0.7890694995520287, "grad_norm": 0.671875, "learning_rate": 0.00022444798616057116, "loss": 1.712, "step": 6165 }, { "epoch": 0.789197491360553, "grad_norm": 0.75390625, "learning_rate": 0.0002241863431086598, "loss": 1.6403, "step": 6166 }, { "epoch": 0.7893254831690771, "grad_norm": 0.734375, "learning_rate": 0.00022392483339291348, "loss": 1.2938, "step": 6167 }, { "epoch": 0.7894534749776014, "grad_norm": 0.90234375, "learning_rate": 0.00022366345705827572, "loss": 1.2758, "step": 6168 }, { "epoch": 0.7895814667861257, "grad_norm": 0.58984375, "learning_rate": 0.00022340221414966888, "loss": 1.361, "step": 6169 }, { "epoch": 0.7897094585946499, "grad_norm": 0.73046875, "learning_rate": 0.00022314110471199168, "loss": 1.152, "step": 6170 }, { "epoch": 0.7898374504031742, "grad_norm": 0.625, "learning_rate": 0.0002228801287901202, "loss": 1.3651, "step": 6171 }, { "epoch": 0.7899654422116984, "grad_norm": 0.71875, "learning_rate": 0.00022261928642890684, "loss": 1.7125, "step": 6172 }, { "epoch": 0.7900934340202227, "grad_norm": 0.6796875, "learning_rate": 0.00022235857767318202, "loss": 1.3596, "step": 6173 }, { "epoch": 0.790221425828747, "grad_norm": 0.5546875, "learning_rate": 0.0002220980025677526, "loss": 0.8571, "step": 6174 }, { "epoch": 0.7903494176372712, "grad_norm": 0.859375, "learning_rate": 0.00022183756115740273, "loss": 2.0979, "step": 6175 }, { "epoch": 0.7904774094457955, "grad_norm": 0.70703125, "learning_rate": 0.0002215772534868934, "loss": 1.4947, "step": 6176 }, { "epoch": 0.7906054012543198, "grad_norm": 0.75390625, "learning_rate": 0.00022131707960096293, "loss": 1.573, "step": 6177 }, { "epoch": 0.790733393062844, "grad_norm": 0.57421875, "learning_rate": 0.0002210570395443261, "loss": 1.5354, "step": 6178 }, { "epoch": 0.7908613848713683, "grad_norm": 0.72265625, "learning_rate": 0.00022079713336167528, "loss": 1.2702, "step": 6179 }, { "epoch": 0.7909893766798924, "grad_norm": 0.6796875, "learning_rate": 0.00022053736109767976, "loss": 1.5319, "step": 6180 }, { "epoch": 0.7911173684884167, "grad_norm": 0.6953125, "learning_rate": 0.0002202777227969851, "loss": 1.2561, "step": 6181 }, { "epoch": 0.791245360296941, "grad_norm": 0.59375, "learning_rate": 0.00022001821850421444, "loss": 0.7871, "step": 6182 }, { "epoch": 0.7913733521054652, "grad_norm": 0.61328125, "learning_rate": 0.0002197588482639681, "loss": 0.8407, "step": 6183 }, { "epoch": 0.7915013439139895, "grad_norm": 0.609375, "learning_rate": 0.00021949961212082303, "loss": 1.2543, "step": 6184 }, { "epoch": 0.7916293357225137, "grad_norm": 0.55859375, "learning_rate": 0.0002192405101193329, "loss": 0.9715, "step": 6185 }, { "epoch": 0.791757327531038, "grad_norm": 0.59375, "learning_rate": 0.00021898154230402868, "loss": 1.1489, "step": 6186 }, { "epoch": 0.7918853193395623, "grad_norm": 0.6328125, "learning_rate": 0.00021872270871941825, "loss": 1.1173, "step": 6187 }, { "epoch": 0.7920133111480865, "grad_norm": 0.66796875, "learning_rate": 0.0002184640094099861, "loss": 1.9079, "step": 6188 }, { "epoch": 0.7921413029566108, "grad_norm": 0.7578125, "learning_rate": 0.00021820544442019475, "loss": 1.3149, "step": 6189 }, { "epoch": 0.7922692947651351, "grad_norm": 0.546875, "learning_rate": 0.0002179470137944819, "loss": 1.0584, "step": 6190 }, { "epoch": 0.7923972865736593, "grad_norm": 0.68359375, "learning_rate": 0.00021768871757726327, "loss": 1.063, "step": 6191 }, { "epoch": 0.7925252783821836, "grad_norm": 0.72265625, "learning_rate": 0.00021743055581293147, "loss": 1.6331, "step": 6192 }, { "epoch": 0.7926532701907077, "grad_norm": 0.69921875, "learning_rate": 0.00021717252854585589, "loss": 1.3583, "step": 6193 }, { "epoch": 0.792781261999232, "grad_norm": 0.6796875, "learning_rate": 0.00021691463582038217, "loss": 1.4645, "step": 6194 }, { "epoch": 0.7929092538077563, "grad_norm": 0.61328125, "learning_rate": 0.00021665687768083363, "loss": 1.1755, "step": 6195 }, { "epoch": 0.7930372456162805, "grad_norm": 0.73046875, "learning_rate": 0.0002163992541715102, "loss": 1.961, "step": 6196 }, { "epoch": 0.7931652374248048, "grad_norm": 0.6015625, "learning_rate": 0.00021614176533668873, "loss": 0.839, "step": 6197 }, { "epoch": 0.7932932292333291, "grad_norm": 0.67578125, "learning_rate": 0.00021588441122062275, "loss": 1.1729, "step": 6198 }, { "epoch": 0.7934212210418533, "grad_norm": 0.64453125, "learning_rate": 0.00021562719186754288, "loss": 1.2731, "step": 6199 }, { "epoch": 0.7935492128503776, "grad_norm": 0.55078125, "learning_rate": 0.0002153701073216563, "loss": 1.1369, "step": 6200 }, { "epoch": 0.7936772046589018, "grad_norm": 0.50390625, "learning_rate": 0.0002151131576271471, "loss": 0.8436, "step": 6201 }, { "epoch": 0.7938051964674261, "grad_norm": 0.64453125, "learning_rate": 0.0002148563428281768, "loss": 1.3356, "step": 6202 }, { "epoch": 0.7939331882759504, "grad_norm": 0.53515625, "learning_rate": 0.00021459966296888234, "loss": 0.9569, "step": 6203 }, { "epoch": 0.7940611800844746, "grad_norm": 0.6171875, "learning_rate": 0.0002143431180933788, "loss": 1.0976, "step": 6204 }, { "epoch": 0.7941891718929989, "grad_norm": 0.64453125, "learning_rate": 0.0002140867082457575, "loss": 1.3126, "step": 6205 }, { "epoch": 0.794317163701523, "grad_norm": 0.65625, "learning_rate": 0.0002138304334700869, "loss": 0.7462, "step": 6206 }, { "epoch": 0.7944451555100474, "grad_norm": 0.83203125, "learning_rate": 0.00021357429381041148, "loss": 1.1364, "step": 6207 }, { "epoch": 0.7945731473185716, "grad_norm": 0.48828125, "learning_rate": 0.00021331828931075324, "loss": 1.0402, "step": 6208 }, { "epoch": 0.7947011391270958, "grad_norm": 0.53125, "learning_rate": 0.00021306242001511066, "loss": 0.9949, "step": 6209 }, { "epoch": 0.7948291309356201, "grad_norm": 0.734375, "learning_rate": 0.0002128066859674591, "loss": 1.4255, "step": 6210 }, { "epoch": 0.7949571227441444, "grad_norm": 0.462890625, "learning_rate": 0.00021255108721175066, "loss": 0.9923, "step": 6211 }, { "epoch": 0.7950851145526686, "grad_norm": 0.56640625, "learning_rate": 0.00021229562379191392, "loss": 0.9512, "step": 6212 }, { "epoch": 0.7952131063611929, "grad_norm": 0.61328125, "learning_rate": 0.00021204029575185458, "loss": 1.2956, "step": 6213 }, { "epoch": 0.7953410981697171, "grad_norm": 0.59375, "learning_rate": 0.00021178510313545485, "loss": 0.8675, "step": 6214 }, { "epoch": 0.7954690899782414, "grad_norm": 0.7265625, "learning_rate": 0.00021153004598657412, "loss": 1.4963, "step": 6215 }, { "epoch": 0.7955970817867657, "grad_norm": 0.5390625, "learning_rate": 0.00021127512434904728, "loss": 0.7326, "step": 6216 }, { "epoch": 0.7957250735952899, "grad_norm": 0.7421875, "learning_rate": 0.0002110203382666873, "loss": 0.7626, "step": 6217 }, { "epoch": 0.7958530654038142, "grad_norm": 0.671875, "learning_rate": 0.0002107656877832832, "loss": 1.0359, "step": 6218 }, { "epoch": 0.7959810572123384, "grad_norm": 0.921875, "learning_rate": 0.0002105111729426007, "loss": 1.3033, "step": 6219 }, { "epoch": 0.7961090490208627, "grad_norm": 0.546875, "learning_rate": 0.00021025679378838246, "loss": 1.1477, "step": 6220 }, { "epoch": 0.796237040829387, "grad_norm": 0.74609375, "learning_rate": 0.00021000255036434767, "loss": 1.701, "step": 6221 }, { "epoch": 0.7963650326379111, "grad_norm": 0.57421875, "learning_rate": 0.00020974844271419202, "loss": 1.0098, "step": 6222 }, { "epoch": 0.7964930244464354, "grad_norm": 0.578125, "learning_rate": 0.00020949447088158813, "loss": 1.1743, "step": 6223 }, { "epoch": 0.7966210162549597, "grad_norm": 0.63671875, "learning_rate": 0.0002092406349101855, "loss": 0.9741, "step": 6224 }, { "epoch": 0.7967490080634839, "grad_norm": 0.64453125, "learning_rate": 0.00020898693484360932, "loss": 1.5345, "step": 6225 }, { "epoch": 0.7968769998720082, "grad_norm": 0.734375, "learning_rate": 0.0002087333707254625, "loss": 1.1976, "step": 6226 }, { "epoch": 0.7970049916805324, "grad_norm": 0.70703125, "learning_rate": 0.0002084799425993239, "loss": 1.3495, "step": 6227 }, { "epoch": 0.7971329834890567, "grad_norm": 0.54296875, "learning_rate": 0.00020822665050874967, "loss": 0.9547, "step": 6228 }, { "epoch": 0.797260975297581, "grad_norm": 0.80859375, "learning_rate": 0.00020797349449727166, "loss": 1.5635, "step": 6229 }, { "epoch": 0.7973889671061052, "grad_norm": 0.6328125, "learning_rate": 0.00020772047460839905, "loss": 1.361, "step": 6230 }, { "epoch": 0.7975169589146295, "grad_norm": 0.65234375, "learning_rate": 0.0002074675908856174, "loss": 1.4314, "step": 6231 }, { "epoch": 0.7976449507231537, "grad_norm": 0.7109375, "learning_rate": 0.00020721484337238893, "loss": 1.4621, "step": 6232 }, { "epoch": 0.797772942531678, "grad_norm": 0.82421875, "learning_rate": 0.00020696223211215238, "loss": 1.4921, "step": 6233 }, { "epoch": 0.7979009343402023, "grad_norm": 0.5625, "learning_rate": 0.00020670975714832309, "loss": 0.6698, "step": 6234 }, { "epoch": 0.7980289261487264, "grad_norm": 0.58984375, "learning_rate": 0.0002064574185242929, "loss": 1.2611, "step": 6235 }, { "epoch": 0.7981569179572507, "grad_norm": 0.5546875, "learning_rate": 0.00020620521628343048, "loss": 0.9187, "step": 6236 }, { "epoch": 0.798284909765775, "grad_norm": 0.6953125, "learning_rate": 0.00020595315046908092, "loss": 0.9244, "step": 6237 }, { "epoch": 0.7984129015742992, "grad_norm": 0.69921875, "learning_rate": 0.00020570122112456546, "loss": 1.201, "step": 6238 }, { "epoch": 0.7985408933828235, "grad_norm": 0.69140625, "learning_rate": 0.00020544942829318246, "loss": 1.0484, "step": 6239 }, { "epoch": 0.7986688851913477, "grad_norm": 0.7734375, "learning_rate": 0.00020519777201820667, "loss": 1.8598, "step": 6240 }, { "epoch": 0.798796876999872, "grad_norm": 0.76953125, "learning_rate": 0.0002049462523428891, "loss": 1.3357, "step": 6241 }, { "epoch": 0.7989248688083963, "grad_norm": 0.87109375, "learning_rate": 0.00020469486931045766, "loss": 1.7176, "step": 6242 }, { "epoch": 0.7990528606169205, "grad_norm": 0.66796875, "learning_rate": 0.00020444362296411666, "loss": 1.4896, "step": 6243 }, { "epoch": 0.7991808524254448, "grad_norm": 0.609375, "learning_rate": 0.0002041925133470467, "loss": 1.2462, "step": 6244 }, { "epoch": 0.799308844233969, "grad_norm": 0.74609375, "learning_rate": 0.00020394154050240509, "loss": 1.3411, "step": 6245 }, { "epoch": 0.7994368360424933, "grad_norm": 0.6015625, "learning_rate": 0.00020369070447332583, "loss": 1.0707, "step": 6246 }, { "epoch": 0.7995648278510176, "grad_norm": 0.65625, "learning_rate": 0.00020344000530291874, "loss": 1.0125, "step": 6247 }, { "epoch": 0.7996928196595418, "grad_norm": 0.75, "learning_rate": 0.00020318944303427067, "loss": 1.7938, "step": 6248 }, { "epoch": 0.799820811468066, "grad_norm": 0.76171875, "learning_rate": 0.00020293901771044486, "loss": 1.9149, "step": 6249 }, { "epoch": 0.7999488032765903, "grad_norm": 0.765625, "learning_rate": 0.00020268872937448114, "loss": 1.3927, "step": 6250 }, { "epoch": 0.8000767950851145, "grad_norm": 0.5703125, "learning_rate": 0.0002024385780693956, "loss": 1.2099, "step": 6251 }, { "epoch": 0.8002047868936388, "grad_norm": 0.796875, "learning_rate": 0.00020218856383818042, "loss": 0.8402, "step": 6252 }, { "epoch": 0.800332778702163, "grad_norm": 0.59765625, "learning_rate": 0.00020193868672380478, "loss": 1.235, "step": 6253 }, { "epoch": 0.8004607705106873, "grad_norm": 0.52734375, "learning_rate": 0.00020168894676921424, "loss": 0.7111, "step": 6254 }, { "epoch": 0.8005887623192116, "grad_norm": 0.77734375, "learning_rate": 0.00020143934401733055, "loss": 1.1244, "step": 6255 }, { "epoch": 0.8007167541277358, "grad_norm": 0.7421875, "learning_rate": 0.00020118987851105207, "loss": 1.8556, "step": 6256 }, { "epoch": 0.8008447459362601, "grad_norm": 0.75, "learning_rate": 0.00020094055029325352, "loss": 1.4815, "step": 6257 }, { "epoch": 0.8009727377447843, "grad_norm": 0.68359375, "learning_rate": 0.00020069135940678585, "loss": 1.5262, "step": 6258 }, { "epoch": 0.8011007295533086, "grad_norm": 0.73828125, "learning_rate": 0.000200442305894477, "loss": 1.5229, "step": 6259 }, { "epoch": 0.8012287213618329, "grad_norm": 1.3125, "learning_rate": 0.00020019338979913027, "loss": 1.1487, "step": 6260 }, { "epoch": 0.8013567131703571, "grad_norm": 0.6015625, "learning_rate": 0.00019994461116352603, "loss": 1.1138, "step": 6261 }, { "epoch": 0.8014847049788814, "grad_norm": 0.55859375, "learning_rate": 0.00019969597003042117, "loss": 1.1266, "step": 6262 }, { "epoch": 0.8016126967874057, "grad_norm": 0.625, "learning_rate": 0.00019944746644254862, "loss": 1.4253, "step": 6263 }, { "epoch": 0.8017406885959298, "grad_norm": 0.78515625, "learning_rate": 0.000199199100442618, "loss": 1.4457, "step": 6264 }, { "epoch": 0.8018686804044541, "grad_norm": 0.671875, "learning_rate": 0.00019895087207331418, "loss": 1.1049, "step": 6265 }, { "epoch": 0.8019966722129783, "grad_norm": 0.8984375, "learning_rate": 0.00019870278137730014, "loss": 1.9353, "step": 6266 }, { "epoch": 0.8021246640215026, "grad_norm": 0.6796875, "learning_rate": 0.00019845482839721395, "loss": 1.601, "step": 6267 }, { "epoch": 0.8022526558300269, "grad_norm": 0.59765625, "learning_rate": 0.0001982070131756707, "loss": 1.221, "step": 6268 }, { "epoch": 0.8023806476385511, "grad_norm": 0.578125, "learning_rate": 0.00019795933575526072, "loss": 1.02, "step": 6269 }, { "epoch": 0.8025086394470754, "grad_norm": 0.515625, "learning_rate": 0.00019771179617855195, "loss": 0.8447, "step": 6270 }, { "epoch": 0.8026366312555996, "grad_norm": 0.5625, "learning_rate": 0.00019746439448808784, "loss": 1.0898, "step": 6271 }, { "epoch": 0.8027646230641239, "grad_norm": 0.703125, "learning_rate": 0.00019721713072638858, "loss": 1.1416, "step": 6272 }, { "epoch": 0.8028926148726482, "grad_norm": 0.765625, "learning_rate": 0.00019697000493595053, "loss": 1.4194, "step": 6273 }, { "epoch": 0.8030206066811724, "grad_norm": 0.62109375, "learning_rate": 0.0001967230171592459, "loss": 1.3946, "step": 6274 }, { "epoch": 0.8031485984896967, "grad_norm": 0.62890625, "learning_rate": 0.00019647616743872375, "loss": 1.312, "step": 6275 }, { "epoch": 0.803276590298221, "grad_norm": 0.609375, "learning_rate": 0.00019622945581680918, "loss": 1.0904, "step": 6276 }, { "epoch": 0.8034045821067451, "grad_norm": 0.6640625, "learning_rate": 0.00019598288233590368, "loss": 1.0389, "step": 6277 }, { "epoch": 0.8035325739152694, "grad_norm": 1.1484375, "learning_rate": 0.00019573644703838467, "loss": 1.8005, "step": 6278 }, { "epoch": 0.8036605657237936, "grad_norm": 0.6875, "learning_rate": 0.00019549014996660642, "loss": 1.5391, "step": 6279 }, { "epoch": 0.8037885575323179, "grad_norm": 0.7265625, "learning_rate": 0.0001952439911628987, "loss": 1.8258, "step": 6280 }, { "epoch": 0.8039165493408422, "grad_norm": 0.6796875, "learning_rate": 0.00019499797066956814, "loss": 1.3678, "step": 6281 }, { "epoch": 0.8040445411493664, "grad_norm": 0.6640625, "learning_rate": 0.00019475208852889759, "loss": 1.1926, "step": 6282 }, { "epoch": 0.8041725329578907, "grad_norm": 0.71484375, "learning_rate": 0.00019450634478314523, "loss": 1.1444, "step": 6283 }, { "epoch": 0.804300524766415, "grad_norm": 0.61328125, "learning_rate": 0.0001942607394745466, "loss": 0.9583, "step": 6284 }, { "epoch": 0.8044285165749392, "grad_norm": 0.734375, "learning_rate": 0.0001940152726453127, "loss": 1.5784, "step": 6285 }, { "epoch": 0.8045565083834635, "grad_norm": 0.69140625, "learning_rate": 0.0001937699443376314, "loss": 1.0851, "step": 6286 }, { "epoch": 0.8046845001919877, "grad_norm": 0.61328125, "learning_rate": 0.00019352475459366548, "loss": 1.4298, "step": 6287 }, { "epoch": 0.804812492000512, "grad_norm": 0.76171875, "learning_rate": 0.00019327970345555566, "loss": 1.3014, "step": 6288 }, { "epoch": 0.8049404838090363, "grad_norm": 0.64453125, "learning_rate": 0.00019303479096541764, "loss": 1.2683, "step": 6289 }, { "epoch": 0.8050684756175605, "grad_norm": 0.5, "learning_rate": 0.00019279001716534383, "loss": 0.6699, "step": 6290 }, { "epoch": 0.8051964674260847, "grad_norm": 0.51953125, "learning_rate": 0.00019254538209740213, "loss": 0.7029, "step": 6291 }, { "epoch": 0.8053244592346089, "grad_norm": 0.671875, "learning_rate": 0.00019230088580363724, "loss": 1.1885, "step": 6292 }, { "epoch": 0.8054524510431332, "grad_norm": 0.71484375, "learning_rate": 0.0001920565283260699, "loss": 1.7332, "step": 6293 }, { "epoch": 0.8055804428516575, "grad_norm": 0.85546875, "learning_rate": 0.00019181230970669684, "loss": 1.7608, "step": 6294 }, { "epoch": 0.8057084346601817, "grad_norm": 0.71875, "learning_rate": 0.00019156822998749124, "loss": 1.644, "step": 6295 }, { "epoch": 0.805836426468706, "grad_norm": 0.71484375, "learning_rate": 0.00019132428921040168, "loss": 1.7265, "step": 6296 }, { "epoch": 0.8059644182772303, "grad_norm": 0.96484375, "learning_rate": 0.00019108048741735362, "loss": 1.5577, "step": 6297 }, { "epoch": 0.8060924100857545, "grad_norm": 0.78125, "learning_rate": 0.00019083682465024832, "loss": 1.5605, "step": 6298 }, { "epoch": 0.8062204018942788, "grad_norm": 0.73828125, "learning_rate": 0.00019059330095096318, "loss": 1.7285, "step": 6299 }, { "epoch": 0.806348393702803, "grad_norm": 0.640625, "learning_rate": 0.00019034991636135158, "loss": 1.5219, "step": 6300 }, { "epoch": 0.8064763855113273, "grad_norm": 0.62890625, "learning_rate": 0.00019010667092324342, "loss": 1.1018, "step": 6301 }, { "epoch": 0.8066043773198516, "grad_norm": 0.6328125, "learning_rate": 0.000189863564678444, "loss": 1.2427, "step": 6302 }, { "epoch": 0.8067323691283758, "grad_norm": 0.66015625, "learning_rate": 0.0001896205976687354, "loss": 1.3295, "step": 6303 }, { "epoch": 0.8068603609369001, "grad_norm": 0.609375, "learning_rate": 0.0001893777699358755, "loss": 1.2346, "step": 6304 }, { "epoch": 0.8069883527454242, "grad_norm": 0.64453125, "learning_rate": 0.00018913508152159774, "loss": 1.3036, "step": 6305 }, { "epoch": 0.8071163445539485, "grad_norm": 0.74609375, "learning_rate": 0.00018889253246761229, "loss": 1.2253, "step": 6306 }, { "epoch": 0.8072443363624728, "grad_norm": 0.59375, "learning_rate": 0.00018865012281560524, "loss": 1.0526, "step": 6307 }, { "epoch": 0.807372328170997, "grad_norm": 0.71484375, "learning_rate": 0.0001884078526072387, "loss": 1.4635, "step": 6308 }, { "epoch": 0.8075003199795213, "grad_norm": 0.65625, "learning_rate": 0.00018816572188415026, "loss": 1.3347, "step": 6309 }, { "epoch": 0.8076283117880456, "grad_norm": 0.5703125, "learning_rate": 0.0001879237306879542, "loss": 1.2139, "step": 6310 }, { "epoch": 0.8077563035965698, "grad_norm": 0.66796875, "learning_rate": 0.00018768187906024103, "loss": 1.2885, "step": 6311 }, { "epoch": 0.8078842954050941, "grad_norm": 0.62890625, "learning_rate": 0.00018744016704257683, "loss": 1.2732, "step": 6312 }, { "epoch": 0.8080122872136183, "grad_norm": 0.75, "learning_rate": 0.00018719859467650313, "loss": 1.674, "step": 6313 }, { "epoch": 0.8081402790221426, "grad_norm": 0.609375, "learning_rate": 0.00018695716200353861, "loss": 1.1499, "step": 6314 }, { "epoch": 0.8082682708306669, "grad_norm": 0.6953125, "learning_rate": 0.0001867158690651771, "loss": 1.6537, "step": 6315 }, { "epoch": 0.8083962626391911, "grad_norm": 0.62890625, "learning_rate": 0.000186474715902889, "loss": 1.3217, "step": 6316 }, { "epoch": 0.8085242544477154, "grad_norm": 0.578125, "learning_rate": 0.00018623370255812033, "loss": 1.1562, "step": 6317 }, { "epoch": 0.8086522462562395, "grad_norm": 0.765625, "learning_rate": 0.00018599282907229286, "loss": 1.5663, "step": 6318 }, { "epoch": 0.8087802380647638, "grad_norm": 0.6796875, "learning_rate": 0.0001857520954868047, "loss": 1.3423, "step": 6319 }, { "epoch": 0.8089082298732881, "grad_norm": 0.79296875, "learning_rate": 0.00018551150184303002, "loss": 1.202, "step": 6320 }, { "epoch": 0.8090362216818123, "grad_norm": 0.6484375, "learning_rate": 0.00018527104818231855, "loss": 1.0599, "step": 6321 }, { "epoch": 0.8091642134903366, "grad_norm": 0.6015625, "learning_rate": 0.00018503073454599617, "loss": 0.8826, "step": 6322 }, { "epoch": 0.8092922052988609, "grad_norm": 0.61328125, "learning_rate": 0.00018479056097536485, "loss": 1.3705, "step": 6323 }, { "epoch": 0.8094201971073851, "grad_norm": 0.52734375, "learning_rate": 0.00018455052751170209, "loss": 0.9006, "step": 6324 }, { "epoch": 0.8095481889159094, "grad_norm": 0.5625, "learning_rate": 0.0001843106341962617, "loss": 1.1084, "step": 6325 }, { "epoch": 0.8096761807244336, "grad_norm": 0.60546875, "learning_rate": 0.00018407088107027348, "loss": 1.2032, "step": 6326 }, { "epoch": 0.8098041725329579, "grad_norm": 0.67578125, "learning_rate": 0.00018383126817494234, "loss": 1.4822, "step": 6327 }, { "epoch": 0.8099321643414822, "grad_norm": 0.52734375, "learning_rate": 0.0001835917955514499, "loss": 0.9276, "step": 6328 }, { "epoch": 0.8100601561500064, "grad_norm": 0.59765625, "learning_rate": 0.00018335246324095345, "loss": 1.2978, "step": 6329 }, { "epoch": 0.8101881479585307, "grad_norm": 0.63671875, "learning_rate": 0.00018311327128458631, "loss": 1.5541, "step": 6330 }, { "epoch": 0.8103161397670549, "grad_norm": 0.5625, "learning_rate": 0.00018287421972345696, "loss": 1.0109, "step": 6331 }, { "epoch": 0.8104441315755792, "grad_norm": 0.66796875, "learning_rate": 0.00018263530859865073, "loss": 1.334, "step": 6332 }, { "epoch": 0.8105721233841034, "grad_norm": 0.5, "learning_rate": 0.000182396537951228, "loss": 0.8277, "step": 6333 }, { "epoch": 0.8107001151926276, "grad_norm": 0.7265625, "learning_rate": 0.00018215790782222585, "loss": 2.1084, "step": 6334 }, { "epoch": 0.8108281070011519, "grad_norm": 0.64453125, "learning_rate": 0.0001819194182526568, "loss": 1.0032, "step": 6335 }, { "epoch": 0.8109560988096762, "grad_norm": 0.7109375, "learning_rate": 0.0001816810692835087, "loss": 1.3164, "step": 6336 }, { "epoch": 0.8110840906182004, "grad_norm": 0.66796875, "learning_rate": 0.0001814428609557458, "loss": 1.1235, "step": 6337 }, { "epoch": 0.8112120824267247, "grad_norm": 0.578125, "learning_rate": 0.00018120479331030815, "loss": 0.7769, "step": 6338 }, { "epoch": 0.8113400742352489, "grad_norm": 0.5703125, "learning_rate": 0.00018096686638811177, "loss": 0.7047, "step": 6339 }, { "epoch": 0.8114680660437732, "grad_norm": 0.67578125, "learning_rate": 0.00018072908023004775, "loss": 1.4465, "step": 6340 }, { "epoch": 0.8115960578522975, "grad_norm": 0.62890625, "learning_rate": 0.00018049143487698372, "loss": 1.0473, "step": 6341 }, { "epoch": 0.8117240496608217, "grad_norm": 0.62109375, "learning_rate": 0.000180253930369763, "loss": 1.1467, "step": 6342 }, { "epoch": 0.811852041469346, "grad_norm": 0.76953125, "learning_rate": 0.00018001656674920453, "loss": 1.1201, "step": 6343 }, { "epoch": 0.8119800332778702, "grad_norm": 0.57421875, "learning_rate": 0.00017977934405610308, "loss": 1.0409, "step": 6344 }, { "epoch": 0.8121080250863945, "grad_norm": 0.609375, "learning_rate": 0.00017954226233122929, "loss": 1.0965, "step": 6345 }, { "epoch": 0.8122360168949188, "grad_norm": 1.890625, "learning_rate": 0.0001793053216153294, "loss": 1.2649, "step": 6346 }, { "epoch": 0.8123640087034429, "grad_norm": 0.62890625, "learning_rate": 0.0001790685219491256, "loss": 1.3704, "step": 6347 }, { "epoch": 0.8124920005119672, "grad_norm": 0.57421875, "learning_rate": 0.0001788318633733159, "loss": 1.3152, "step": 6348 }, { "epoch": 0.8126199923204915, "grad_norm": 0.72265625, "learning_rate": 0.00017859534592857353, "loss": 0.7551, "step": 6349 }, { "epoch": 0.8127479841290157, "grad_norm": 0.74609375, "learning_rate": 0.00017835896965554798, "loss": 1.388, "step": 6350 }, { "epoch": 0.81287597593754, "grad_norm": 0.68359375, "learning_rate": 0.0001781227345948645, "loss": 1.4418, "step": 6351 }, { "epoch": 0.8130039677460642, "grad_norm": 0.78125, "learning_rate": 0.00017788664078712414, "loss": 1.8132, "step": 6352 }, { "epoch": 0.8131319595545885, "grad_norm": 0.640625, "learning_rate": 0.00017765068827290276, "loss": 0.7896, "step": 6353 }, { "epoch": 0.8132599513631128, "grad_norm": 0.56640625, "learning_rate": 0.00017741487709275317, "loss": 0.9079, "step": 6354 }, { "epoch": 0.813387943171637, "grad_norm": 0.71875, "learning_rate": 0.00017717920728720283, "loss": 1.3667, "step": 6355 }, { "epoch": 0.8135159349801613, "grad_norm": 0.703125, "learning_rate": 0.0001769436788967561, "loss": 1.5344, "step": 6356 }, { "epoch": 0.8136439267886856, "grad_norm": 0.609375, "learning_rate": 0.0001767082919618922, "loss": 1.2828, "step": 6357 }, { "epoch": 0.8137719185972098, "grad_norm": 0.640625, "learning_rate": 0.00017647304652306596, "loss": 1.4403, "step": 6358 }, { "epoch": 0.8138999104057341, "grad_norm": 0.61328125, "learning_rate": 0.00017623794262070804, "loss": 1.3553, "step": 6359 }, { "epoch": 0.8140279022142582, "grad_norm": 0.59375, "learning_rate": 0.0001760029802952251, "loss": 0.7889, "step": 6360 }, { "epoch": 0.8141558940227825, "grad_norm": 0.67578125, "learning_rate": 0.00017576815958699932, "loss": 1.7405, "step": 6361 }, { "epoch": 0.8142838858313068, "grad_norm": 0.6328125, "learning_rate": 0.00017553348053638807, "loss": 0.9437, "step": 6362 }, { "epoch": 0.814411877639831, "grad_norm": 0.57421875, "learning_rate": 0.00017529894318372497, "loss": 1.1158, "step": 6363 }, { "epoch": 0.8145398694483553, "grad_norm": 0.8125, "learning_rate": 0.0001750645475693191, "loss": 1.3908, "step": 6364 }, { "epoch": 0.8146678612568795, "grad_norm": 0.51171875, "learning_rate": 0.00017483029373345504, "loss": 1.1871, "step": 6365 }, { "epoch": 0.8147958530654038, "grad_norm": 0.70703125, "learning_rate": 0.00017459618171639324, "loss": 1.4117, "step": 6366 }, { "epoch": 0.8149238448739281, "grad_norm": 0.73046875, "learning_rate": 0.00017436221155836952, "loss": 1.5437, "step": 6367 }, { "epoch": 0.8150518366824523, "grad_norm": 0.60546875, "learning_rate": 0.00017412838329959558, "loss": 1.2214, "step": 6368 }, { "epoch": 0.8151798284909766, "grad_norm": 0.7109375, "learning_rate": 0.00017389469698025851, "loss": 1.3425, "step": 6369 }, { "epoch": 0.8153078202995009, "grad_norm": 0.466796875, "learning_rate": 0.0001736611526405214, "loss": 0.9628, "step": 6370 }, { "epoch": 0.8154358121080251, "grad_norm": 0.6796875, "learning_rate": 0.00017342775032052205, "loss": 1.3132, "step": 6371 }, { "epoch": 0.8155638039165494, "grad_norm": 0.53515625, "learning_rate": 0.0001731944900603748, "loss": 0.8973, "step": 6372 }, { "epoch": 0.8156917957250736, "grad_norm": 0.6796875, "learning_rate": 0.00017296137190016915, "loss": 1.5198, "step": 6373 }, { "epoch": 0.8158197875335979, "grad_norm": 0.6015625, "learning_rate": 0.00017272839587997046, "loss": 0.9456, "step": 6374 }, { "epoch": 0.8159477793421221, "grad_norm": 0.7109375, "learning_rate": 0.00017249556203981898, "loss": 1.0797, "step": 6375 }, { "epoch": 0.8160757711506463, "grad_norm": 0.5859375, "learning_rate": 0.0001722628704197312, "loss": 1.2326, "step": 6376 }, { "epoch": 0.8162037629591706, "grad_norm": 0.65234375, "learning_rate": 0.00017203032105969895, "loss": 1.5767, "step": 6377 }, { "epoch": 0.8163317547676948, "grad_norm": 0.50390625, "learning_rate": 0.00017179791399968947, "loss": 0.6635, "step": 6378 }, { "epoch": 0.8164597465762191, "grad_norm": 0.6484375, "learning_rate": 0.00017156564927964634, "loss": 1.0486, "step": 6379 }, { "epoch": 0.8165877383847434, "grad_norm": 0.68359375, "learning_rate": 0.00017133352693948724, "loss": 1.2695, "step": 6380 }, { "epoch": 0.8167157301932676, "grad_norm": 0.70703125, "learning_rate": 0.00017110154701910642, "loss": 1.2817, "step": 6381 }, { "epoch": 0.8168437220017919, "grad_norm": 0.59375, "learning_rate": 0.00017086970955837344, "loss": 1.0703, "step": 6382 }, { "epoch": 0.8169717138103162, "grad_norm": 0.7734375, "learning_rate": 0.00017063801459713356, "loss": 1.6211, "step": 6383 }, { "epoch": 0.8170997056188404, "grad_norm": 0.59765625, "learning_rate": 0.00017040646217520683, "loss": 1.2753, "step": 6384 }, { "epoch": 0.8172276974273647, "grad_norm": 0.84375, "learning_rate": 0.00017017505233238962, "loss": 1.6021, "step": 6385 }, { "epoch": 0.8173556892358889, "grad_norm": 0.6796875, "learning_rate": 0.00016994378510845333, "loss": 1.1946, "step": 6386 }, { "epoch": 0.8174836810444132, "grad_norm": 0.71484375, "learning_rate": 0.00016971266054314493, "loss": 1.462, "step": 6387 }, { "epoch": 0.8176116728529375, "grad_norm": 0.58203125, "learning_rate": 0.00016948167867618713, "loss": 0.7741, "step": 6388 }, { "epoch": 0.8177396646614616, "grad_norm": 0.63671875, "learning_rate": 0.00016925083954727772, "loss": 1.4196, "step": 6389 }, { "epoch": 0.8178676564699859, "grad_norm": 0.6953125, "learning_rate": 0.00016902014319609027, "loss": 1.2514, "step": 6390 }, { "epoch": 0.8179956482785101, "grad_norm": 0.55859375, "learning_rate": 0.00016878958966227365, "loss": 1.0158, "step": 6391 }, { "epoch": 0.8181236400870344, "grad_norm": 0.90625, "learning_rate": 0.00016855917898545258, "loss": 1.8463, "step": 6392 }, { "epoch": 0.8182516318955587, "grad_norm": 0.482421875, "learning_rate": 0.0001683289112052262, "loss": 0.9649, "step": 6393 }, { "epoch": 0.8183796237040829, "grad_norm": 0.62890625, "learning_rate": 0.00016809878636117004, "loss": 1.2049, "step": 6394 }, { "epoch": 0.8185076155126072, "grad_norm": 0.59375, "learning_rate": 0.00016786880449283492, "loss": 1.022, "step": 6395 }, { "epoch": 0.8186356073211315, "grad_norm": 0.66796875, "learning_rate": 0.00016763896563974713, "loss": 1.3466, "step": 6396 }, { "epoch": 0.8187635991296557, "grad_norm": 0.75390625, "learning_rate": 0.0001674092698414077, "loss": 0.9601, "step": 6397 }, { "epoch": 0.81889159093818, "grad_norm": 0.58984375, "learning_rate": 0.00016717971713729374, "loss": 0.9671, "step": 6398 }, { "epoch": 0.8190195827467042, "grad_norm": 0.71875, "learning_rate": 0.0001669503075668578, "loss": 1.6433, "step": 6399 }, { "epoch": 0.8191475745552285, "grad_norm": 0.609375, "learning_rate": 0.00016672104116952747, "loss": 1.2212, "step": 6400 }, { "epoch": 0.8192755663637528, "grad_norm": 0.61328125, "learning_rate": 0.00016649191798470607, "loss": 1.2913, "step": 6401 }, { "epoch": 0.819403558172277, "grad_norm": 0.69140625, "learning_rate": 0.00016626293805177195, "loss": 1.5849, "step": 6402 }, { "epoch": 0.8195315499808012, "grad_norm": 0.7265625, "learning_rate": 0.00016603410141007914, "loss": 1.4943, "step": 6403 }, { "epoch": 0.8196595417893254, "grad_norm": 0.62890625, "learning_rate": 0.00016580540809895682, "loss": 1.0715, "step": 6404 }, { "epoch": 0.8197875335978497, "grad_norm": 0.61328125, "learning_rate": 0.00016557685815771006, "loss": 1.0771, "step": 6405 }, { "epoch": 0.819915525406374, "grad_norm": 0.578125, "learning_rate": 0.00016534845162561817, "loss": 1.0131, "step": 6406 }, { "epoch": 0.8200435172148982, "grad_norm": 0.5859375, "learning_rate": 0.000165120188541937, "loss": 1.1404, "step": 6407 }, { "epoch": 0.8201715090234225, "grad_norm": 0.5390625, "learning_rate": 0.00016489206894589715, "loss": 1.1012, "step": 6408 }, { "epoch": 0.8202995008319468, "grad_norm": 0.70703125, "learning_rate": 0.0001646640928767047, "loss": 1.4369, "step": 6409 }, { "epoch": 0.820427492640471, "grad_norm": 0.64453125, "learning_rate": 0.00016443626037354088, "loss": 1.2064, "step": 6410 }, { "epoch": 0.8205554844489953, "grad_norm": 0.65234375, "learning_rate": 0.00016420857147556267, "loss": 1.4007, "step": 6411 }, { "epoch": 0.8206834762575195, "grad_norm": 0.6015625, "learning_rate": 0.00016398102622190193, "loss": 1.169, "step": 6412 }, { "epoch": 0.8208114680660438, "grad_norm": 0.67578125, "learning_rate": 0.000163753624651666, "loss": 1.7751, "step": 6413 }, { "epoch": 0.8209394598745681, "grad_norm": 0.54296875, "learning_rate": 0.00016352636680393786, "loss": 0.9779, "step": 6414 }, { "epoch": 0.8210674516830923, "grad_norm": 0.8515625, "learning_rate": 0.0001632992527177749, "loss": 1.4454, "step": 6415 }, { "epoch": 0.8211954434916166, "grad_norm": 0.828125, "learning_rate": 0.00016307228243221062, "loss": 1.2393, "step": 6416 }, { "epoch": 0.8213234353001407, "grad_norm": 0.74609375, "learning_rate": 0.00016284545598625356, "loss": 2.0656, "step": 6417 }, { "epoch": 0.821451427108665, "grad_norm": 0.56640625, "learning_rate": 0.00016261877341888754, "loss": 1.1972, "step": 6418 }, { "epoch": 0.8215794189171893, "grad_norm": 0.68359375, "learning_rate": 0.00016239223476907183, "loss": 1.5271, "step": 6419 }, { "epoch": 0.8217074107257135, "grad_norm": 0.67578125, "learning_rate": 0.0001621658400757403, "loss": 1.345, "step": 6420 }, { "epoch": 0.8218354025342378, "grad_norm": 0.65234375, "learning_rate": 0.00016193958937780285, "loss": 1.4861, "step": 6421 }, { "epoch": 0.8219633943427621, "grad_norm": 0.75, "learning_rate": 0.00016171348271414433, "loss": 1.9997, "step": 6422 }, { "epoch": 0.8220913861512863, "grad_norm": 0.62109375, "learning_rate": 0.00016148752012362465, "loss": 1.0531, "step": 6423 }, { "epoch": 0.8222193779598106, "grad_norm": 0.62109375, "learning_rate": 0.00016126170164507947, "loss": 1.3409, "step": 6424 }, { "epoch": 0.8223473697683348, "grad_norm": 0.69921875, "learning_rate": 0.00016103602731731915, "loss": 1.6066, "step": 6425 }, { "epoch": 0.8224753615768591, "grad_norm": 0.58203125, "learning_rate": 0.00016081049717912944, "loss": 1.0628, "step": 6426 }, { "epoch": 0.8226033533853834, "grad_norm": 0.67578125, "learning_rate": 0.00016058511126927177, "loss": 1.2483, "step": 6427 }, { "epoch": 0.8227313451939076, "grad_norm": 0.65625, "learning_rate": 0.00016035986962648175, "loss": 1.3517, "step": 6428 }, { "epoch": 0.8228593370024319, "grad_norm": 1.703125, "learning_rate": 0.00016013477228947104, "loss": 1.4293, "step": 6429 }, { "epoch": 0.8229873288109562, "grad_norm": 0.72265625, "learning_rate": 0.00015990981929692638, "loss": 1.8429, "step": 6430 }, { "epoch": 0.8231153206194803, "grad_norm": 0.66015625, "learning_rate": 0.0001596850106875095, "loss": 1.739, "step": 6431 }, { "epoch": 0.8232433124280046, "grad_norm": 0.703125, "learning_rate": 0.00015946034649985753, "loss": 1.0612, "step": 6432 }, { "epoch": 0.8233713042365288, "grad_norm": 0.73828125, "learning_rate": 0.00015923582677258253, "loss": 1.122, "step": 6433 }, { "epoch": 0.8234992960450531, "grad_norm": 0.609375, "learning_rate": 0.0001590114515442719, "loss": 1.3149, "step": 6434 }, { "epoch": 0.8236272878535774, "grad_norm": 0.78515625, "learning_rate": 0.00015878722085348805, "loss": 1.3431, "step": 6435 }, { "epoch": 0.8237552796621016, "grad_norm": 0.7265625, "learning_rate": 0.00015856313473876915, "loss": 1.4255, "step": 6436 }, { "epoch": 0.8238832714706259, "grad_norm": 0.8671875, "learning_rate": 0.00015833919323862735, "loss": 2.2158, "step": 6437 }, { "epoch": 0.8240112632791501, "grad_norm": 0.5078125, "learning_rate": 0.00015811539639155092, "loss": 0.6549, "step": 6438 }, { "epoch": 0.8241392550876744, "grad_norm": 0.486328125, "learning_rate": 0.00015789174423600305, "loss": 0.5742, "step": 6439 }, { "epoch": 0.8242672468961987, "grad_norm": 0.53515625, "learning_rate": 0.00015766823681042185, "loss": 0.8712, "step": 6440 }, { "epoch": 0.8243952387047229, "grad_norm": 0.58203125, "learning_rate": 0.00015744487415322107, "loss": 0.952, "step": 6441 }, { "epoch": 0.8245232305132472, "grad_norm": 0.7578125, "learning_rate": 0.00015722165630278873, "loss": 1.8648, "step": 6442 }, { "epoch": 0.8246512223217715, "grad_norm": 0.68359375, "learning_rate": 0.0001569985832974885, "loss": 1.5168, "step": 6443 }, { "epoch": 0.8247792141302956, "grad_norm": 0.62109375, "learning_rate": 0.0001567756551756594, "loss": 1.1286, "step": 6444 }, { "epoch": 0.8249072059388199, "grad_norm": 0.73828125, "learning_rate": 0.00015655287197561497, "loss": 2.0485, "step": 6445 }, { "epoch": 0.8250351977473441, "grad_norm": 0.7734375, "learning_rate": 0.00015633023373564414, "loss": 1.6017, "step": 6446 }, { "epoch": 0.8251631895558684, "grad_norm": 0.66796875, "learning_rate": 0.0001561077404940111, "loss": 1.0998, "step": 6447 }, { "epoch": 0.8252911813643927, "grad_norm": 0.84375, "learning_rate": 0.00015588539228895482, "loss": 1.6422, "step": 6448 }, { "epoch": 0.8254191731729169, "grad_norm": 0.68359375, "learning_rate": 0.00015566318915868937, "loss": 1.9149, "step": 6449 }, { "epoch": 0.8255471649814412, "grad_norm": 0.66796875, "learning_rate": 0.00015544113114140425, "loss": 1.1797, "step": 6450 }, { "epoch": 0.8256751567899654, "grad_norm": 0.59375, "learning_rate": 0.0001552192182752633, "loss": 1.2452, "step": 6451 }, { "epoch": 0.8258031485984897, "grad_norm": 0.55859375, "learning_rate": 0.0001549974505984061, "loss": 0.7402, "step": 6452 }, { "epoch": 0.825931140407014, "grad_norm": 0.6640625, "learning_rate": 0.00015477582814894686, "loss": 1.2192, "step": 6453 }, { "epoch": 0.8260591322155382, "grad_norm": 0.6171875, "learning_rate": 0.0001545543509649754, "loss": 1.261, "step": 6454 }, { "epoch": 0.8261871240240625, "grad_norm": 0.451171875, "learning_rate": 0.00015433301908455543, "loss": 0.5256, "step": 6455 }, { "epoch": 0.8263151158325868, "grad_norm": 0.70703125, "learning_rate": 0.00015411183254572714, "loss": 1.3297, "step": 6456 }, { "epoch": 0.826443107641111, "grad_norm": 0.7109375, "learning_rate": 0.00015389079138650465, "loss": 1.7213, "step": 6457 }, { "epoch": 0.8265710994496352, "grad_norm": 0.67578125, "learning_rate": 0.00015366989564487787, "loss": 1.3736, "step": 6458 }, { "epoch": 0.8266990912581594, "grad_norm": 2.46875, "learning_rate": 0.00015344914535881082, "loss": 1.5278, "step": 6459 }, { "epoch": 0.8268270830666837, "grad_norm": 0.65234375, "learning_rate": 0.00015322854056624313, "loss": 1.0237, "step": 6460 }, { "epoch": 0.826955074875208, "grad_norm": 0.75390625, "learning_rate": 0.00015300808130508947, "loss": 1.8613, "step": 6461 }, { "epoch": 0.8270830666837322, "grad_norm": 0.73046875, "learning_rate": 0.00015278776761323932, "loss": 1.1849, "step": 6462 }, { "epoch": 0.8272110584922565, "grad_norm": 0.671875, "learning_rate": 0.00015256759952855738, "loss": 1.4194, "step": 6463 }, { "epoch": 0.8273390503007807, "grad_norm": 0.82421875, "learning_rate": 0.00015234757708888248, "loss": 2.0083, "step": 6464 }, { "epoch": 0.827467042109305, "grad_norm": 0.6171875, "learning_rate": 0.00015212770033202949, "loss": 1.2209, "step": 6465 }, { "epoch": 0.8275950339178293, "grad_norm": 0.72265625, "learning_rate": 0.00015190796929578777, "loss": 1.0074, "step": 6466 }, { "epoch": 0.8277230257263535, "grad_norm": 0.546875, "learning_rate": 0.00015168838401792152, "loss": 1.1444, "step": 6467 }, { "epoch": 0.8278510175348778, "grad_norm": 0.83984375, "learning_rate": 0.0001514689445361701, "loss": 2.1477, "step": 6468 }, { "epoch": 0.8279790093434021, "grad_norm": 0.69921875, "learning_rate": 0.00015124965088824792, "loss": 1.3546, "step": 6469 }, { "epoch": 0.8281070011519263, "grad_norm": 0.67578125, "learning_rate": 0.00015103050311184385, "loss": 1.1014, "step": 6470 }, { "epoch": 0.8282349929604506, "grad_norm": 0.73046875, "learning_rate": 0.0001508115012446223, "loss": 0.7142, "step": 6471 }, { "epoch": 0.8283629847689747, "grad_norm": 0.7578125, "learning_rate": 0.00015059264532422224, "loss": 1.6988, "step": 6472 }, { "epoch": 0.828490976577499, "grad_norm": 0.61328125, "learning_rate": 0.00015037393538825728, "loss": 1.3576, "step": 6473 }, { "epoch": 0.8286189683860233, "grad_norm": 0.546875, "learning_rate": 0.00015015537147431657, "loss": 0.9949, "step": 6474 }, { "epoch": 0.8287469601945475, "grad_norm": 0.72265625, "learning_rate": 0.00014993695361996374, "loss": 1.876, "step": 6475 }, { "epoch": 0.8288749520030718, "grad_norm": 0.5859375, "learning_rate": 0.00014971868186273784, "loss": 1.2106, "step": 6476 }, { "epoch": 0.829002943811596, "grad_norm": 0.53125, "learning_rate": 0.00014950055624015146, "loss": 0.7975, "step": 6477 }, { "epoch": 0.8291309356201203, "grad_norm": 0.5625, "learning_rate": 0.00014928257678969393, "loss": 0.9946, "step": 6478 }, { "epoch": 0.8292589274286446, "grad_norm": 0.52734375, "learning_rate": 0.00014906474354882827, "loss": 0.7791, "step": 6479 }, { "epoch": 0.8293869192371688, "grad_norm": 0.53515625, "learning_rate": 0.00014884705655499297, "loss": 0.8972, "step": 6480 }, { "epoch": 0.8295149110456931, "grad_norm": 0.59375, "learning_rate": 0.00014862951584560037, "loss": 0.9461, "step": 6481 }, { "epoch": 0.8296429028542174, "grad_norm": 0.65625, "learning_rate": 0.00014841212145803883, "loss": 1.4915, "step": 6482 }, { "epoch": 0.8297708946627416, "grad_norm": 0.5546875, "learning_rate": 0.00014819487342967097, "loss": 0.6069, "step": 6483 }, { "epoch": 0.8298988864712659, "grad_norm": 0.65625, "learning_rate": 0.00014797777179783456, "loss": 1.5834, "step": 6484 }, { "epoch": 0.83002687827979, "grad_norm": 0.92578125, "learning_rate": 0.0001477608165998421, "loss": 1.7548, "step": 6485 }, { "epoch": 0.8301548700883143, "grad_norm": 0.515625, "learning_rate": 0.0001475440078729804, "loss": 0.9615, "step": 6486 }, { "epoch": 0.8302828618968386, "grad_norm": 0.5859375, "learning_rate": 0.0001473273456545119, "loss": 0.8091, "step": 6487 }, { "epoch": 0.8304108537053628, "grad_norm": 0.5390625, "learning_rate": 0.00014711082998167336, "loss": 0.9273, "step": 6488 }, { "epoch": 0.8305388455138871, "grad_norm": 0.609375, "learning_rate": 0.0001468944608916767, "loss": 1.007, "step": 6489 }, { "epoch": 0.8306668373224113, "grad_norm": 0.734375, "learning_rate": 0.00014667823842170834, "loss": 1.6675, "step": 6490 }, { "epoch": 0.8307948291309356, "grad_norm": 0.64453125, "learning_rate": 0.00014646216260892964, "loss": 1.2441, "step": 6491 }, { "epoch": 0.8309228209394599, "grad_norm": 0.58203125, "learning_rate": 0.00014624623349047673, "loss": 0.9493, "step": 6492 }, { "epoch": 0.8310508127479841, "grad_norm": 0.63671875, "learning_rate": 0.00014603045110346059, "loss": 1.2221, "step": 6493 }, { "epoch": 0.8311788045565084, "grad_norm": 0.63671875, "learning_rate": 0.000145814815484967, "loss": 1.1692, "step": 6494 }, { "epoch": 0.8313067963650327, "grad_norm": 0.6484375, "learning_rate": 0.0001455993266720562, "loss": 1.2411, "step": 6495 }, { "epoch": 0.8314347881735569, "grad_norm": 0.76171875, "learning_rate": 0.0001453839847017635, "loss": 1.3437, "step": 6496 }, { "epoch": 0.8315627799820812, "grad_norm": 0.62109375, "learning_rate": 0.00014516878961109902, "loss": 1.4947, "step": 6497 }, { "epoch": 0.8316907717906054, "grad_norm": 0.65625, "learning_rate": 0.00014495374143704766, "loss": 0.9976, "step": 6498 }, { "epoch": 0.8318187635991297, "grad_norm": 0.67578125, "learning_rate": 0.00014473884021656857, "loss": 1.7582, "step": 6499 }, { "epoch": 0.831946755407654, "grad_norm": 0.67578125, "learning_rate": 0.00014452408598659595, "loss": 1.5204, "step": 6500 }, { "epoch": 0.8320747472161781, "grad_norm": 0.734375, "learning_rate": 0.0001443094787840392, "loss": 1.1701, "step": 6501 }, { "epoch": 0.8322027390247024, "grad_norm": 0.63671875, "learning_rate": 0.000144095018645782, "loss": 1.3076, "step": 6502 }, { "epoch": 0.8323307308332267, "grad_norm": 0.5625, "learning_rate": 0.00014388070560868282, "loss": 0.9604, "step": 6503 }, { "epoch": 0.8324587226417509, "grad_norm": 0.71875, "learning_rate": 0.00014366653970957456, "loss": 1.2016, "step": 6504 }, { "epoch": 0.8325867144502752, "grad_norm": 0.59765625, "learning_rate": 0.00014345252098526518, "loss": 1.0339, "step": 6505 }, { "epoch": 0.8327147062587994, "grad_norm": 0.75390625, "learning_rate": 0.0001432386494725374, "loss": 1.6986, "step": 6506 }, { "epoch": 0.8328426980673237, "grad_norm": 0.5390625, "learning_rate": 0.0001430249252081487, "loss": 1.1759, "step": 6507 }, { "epoch": 0.832970689875848, "grad_norm": 0.71875, "learning_rate": 0.00014281134822883058, "loss": 1.1509, "step": 6508 }, { "epoch": 0.8330986816843722, "grad_norm": 0.57421875, "learning_rate": 0.0001425979185712899, "loss": 0.8378, "step": 6509 }, { "epoch": 0.8332266734928965, "grad_norm": 0.69140625, "learning_rate": 0.0001423846362722081, "loss": 1.46, "step": 6510 }, { "epoch": 0.8333546653014207, "grad_norm": 0.671875, "learning_rate": 0.00014217150136824119, "loss": 1.1725, "step": 6511 }, { "epoch": 0.833482657109945, "grad_norm": 0.64453125, "learning_rate": 0.00014195851389601976, "loss": 1.4198, "step": 6512 }, { "epoch": 0.8336106489184693, "grad_norm": 0.63671875, "learning_rate": 0.0001417456738921492, "loss": 1.0739, "step": 6513 }, { "epoch": 0.8337386407269934, "grad_norm": 0.6328125, "learning_rate": 0.00014153298139320969, "loss": 1.4247, "step": 6514 }, { "epoch": 0.8338666325355177, "grad_norm": 0.62890625, "learning_rate": 0.00014132043643575565, "loss": 1.112, "step": 6515 }, { "epoch": 0.833994624344042, "grad_norm": 0.64453125, "learning_rate": 0.00014110803905631664, "loss": 1.1553, "step": 6516 }, { "epoch": 0.8341226161525662, "grad_norm": 0.55078125, "learning_rate": 0.00014089578929139636, "loss": 0.9801, "step": 6517 }, { "epoch": 0.8342506079610905, "grad_norm": 0.796875, "learning_rate": 0.00014068368717747338, "loss": 1.8055, "step": 6518 }, { "epoch": 0.8343785997696147, "grad_norm": 0.7421875, "learning_rate": 0.00014047173275100088, "loss": 1.7322, "step": 6519 }, { "epoch": 0.834506591578139, "grad_norm": 0.73046875, "learning_rate": 0.0001402599260484071, "loss": 1.3155, "step": 6520 }, { "epoch": 0.8346345833866633, "grad_norm": 0.71875, "learning_rate": 0.00014004826710609377, "loss": 1.4031, "step": 6521 }, { "epoch": 0.8347625751951875, "grad_norm": 0.72265625, "learning_rate": 0.00013983675596043833, "loss": 1.4333, "step": 6522 }, { "epoch": 0.8348905670037118, "grad_norm": 0.53125, "learning_rate": 0.00013962539264779207, "loss": 0.8931, "step": 6523 }, { "epoch": 0.835018558812236, "grad_norm": 0.75390625, "learning_rate": 0.0001394141772044817, "loss": 1.7468, "step": 6524 }, { "epoch": 0.8351465506207603, "grad_norm": 0.69921875, "learning_rate": 0.00013920310966680793, "loss": 1.2484, "step": 6525 }, { "epoch": 0.8352745424292846, "grad_norm": 0.5390625, "learning_rate": 0.0001389921900710458, "loss": 1.1519, "step": 6526 }, { "epoch": 0.8354025342378087, "grad_norm": 0.53515625, "learning_rate": 0.00013878141845344538, "loss": 0.8406, "step": 6527 }, { "epoch": 0.835530526046333, "grad_norm": 0.5703125, "learning_rate": 0.0001385707948502314, "loss": 1.0931, "step": 6528 }, { "epoch": 0.8356585178548573, "grad_norm": 0.765625, "learning_rate": 0.00013836031929760295, "loss": 1.6377, "step": 6529 }, { "epoch": 0.8357865096633815, "grad_norm": 0.546875, "learning_rate": 0.00013814999183173326, "loss": 0.8193, "step": 6530 }, { "epoch": 0.8359145014719058, "grad_norm": 0.58203125, "learning_rate": 0.00013793981248877085, "loss": 1.1256, "step": 6531 }, { "epoch": 0.83604249328043, "grad_norm": 0.80078125, "learning_rate": 0.00013772978130483838, "loss": 1.6735, "step": 6532 }, { "epoch": 0.8361704850889543, "grad_norm": 0.5546875, "learning_rate": 0.00013751989831603306, "loss": 0.8772, "step": 6533 }, { "epoch": 0.8362984768974786, "grad_norm": 0.7578125, "learning_rate": 0.00013731016355842684, "loss": 1.5017, "step": 6534 }, { "epoch": 0.8364264687060028, "grad_norm": 0.70703125, "learning_rate": 0.00013710057706806588, "loss": 1.5434, "step": 6535 }, { "epoch": 0.8365544605145271, "grad_norm": 0.61328125, "learning_rate": 0.00013689113888097116, "loss": 1.2617, "step": 6536 }, { "epoch": 0.8366824523230513, "grad_norm": 0.65625, "learning_rate": 0.00013668184903313786, "loss": 1.5469, "step": 6537 }, { "epoch": 0.8368104441315756, "grad_norm": 0.828125, "learning_rate": 0.0001364727075605362, "loss": 1.4573, "step": 6538 }, { "epoch": 0.8369384359400999, "grad_norm": 0.6328125, "learning_rate": 0.00013626371449911002, "loss": 1.0843, "step": 6539 }, { "epoch": 0.837066427748624, "grad_norm": 0.60546875, "learning_rate": 0.00013605486988477845, "loss": 1.2193, "step": 6540 }, { "epoch": 0.8371944195571484, "grad_norm": 0.765625, "learning_rate": 0.00013584617375343478, "loss": 1.7929, "step": 6541 }, { "epoch": 0.8373224113656726, "grad_norm": 1.5078125, "learning_rate": 0.00013563762614094698, "loss": 1.819, "step": 6542 }, { "epoch": 0.8374504031741968, "grad_norm": 0.890625, "learning_rate": 0.00013542922708315708, "loss": 1.8808, "step": 6543 }, { "epoch": 0.8375783949827211, "grad_norm": 0.68359375, "learning_rate": 0.00013522097661588197, "loss": 1.7176, "step": 6544 }, { "epoch": 0.8377063867912453, "grad_norm": 0.640625, "learning_rate": 0.00013501287477491253, "loss": 1.2785, "step": 6545 }, { "epoch": 0.8378343785997696, "grad_norm": 0.69140625, "learning_rate": 0.00013480492159601508, "loss": 1.4505, "step": 6546 }, { "epoch": 0.8379623704082939, "grad_norm": 0.55078125, "learning_rate": 0.00013459711711492962, "loss": 0.8364, "step": 6547 }, { "epoch": 0.8380903622168181, "grad_norm": 0.640625, "learning_rate": 0.00013438946136737028, "loss": 1.2178, "step": 6548 }, { "epoch": 0.8382183540253424, "grad_norm": 0.65234375, "learning_rate": 0.00013418195438902637, "loss": 1.4079, "step": 6549 }, { "epoch": 0.8383463458338666, "grad_norm": 0.65234375, "learning_rate": 0.0001339745962155613, "loss": 1.341, "step": 6550 }, { "epoch": 0.8384743376423909, "grad_norm": 0.56640625, "learning_rate": 0.00013376738688261315, "loss": 1.1028, "step": 6551 }, { "epoch": 0.8386023294509152, "grad_norm": 0.6953125, "learning_rate": 0.00013356032642579375, "loss": 1.1919, "step": 6552 }, { "epoch": 0.8387303212594394, "grad_norm": 0.7265625, "learning_rate": 0.00013335341488068998, "loss": 1.6525, "step": 6553 }, { "epoch": 0.8388583130679637, "grad_norm": 0.78125, "learning_rate": 0.00013314665228286303, "loss": 1.9218, "step": 6554 }, { "epoch": 0.838986304876488, "grad_norm": 0.66796875, "learning_rate": 0.00013294003866784842, "loss": 1.4924, "step": 6555 }, { "epoch": 0.8391142966850121, "grad_norm": 0.72265625, "learning_rate": 0.00013273357407115593, "loss": 1.8948, "step": 6556 }, { "epoch": 0.8392422884935364, "grad_norm": 0.5703125, "learning_rate": 0.00013252725852826996, "loss": 0.987, "step": 6557 }, { "epoch": 0.8393702803020606, "grad_norm": 0.640625, "learning_rate": 0.00013232109207464914, "loss": 1.0167, "step": 6558 }, { "epoch": 0.8394982721105849, "grad_norm": 0.59375, "learning_rate": 0.00013211507474572649, "loss": 0.9898, "step": 6559 }, { "epoch": 0.8396262639191092, "grad_norm": 0.609375, "learning_rate": 0.00013190920657690975, "loss": 1.2531, "step": 6560 }, { "epoch": 0.8397542557276334, "grad_norm": 0.5703125, "learning_rate": 0.0001317034876035802, "loss": 0.9133, "step": 6561 }, { "epoch": 0.8398822475361577, "grad_norm": 0.73046875, "learning_rate": 0.0001314979178610942, "loss": 1.8721, "step": 6562 }, { "epoch": 0.8400102393446819, "grad_norm": 0.58203125, "learning_rate": 0.0001312924973847822, "loss": 1.2701, "step": 6563 }, { "epoch": 0.8401382311532062, "grad_norm": 0.625, "learning_rate": 0.00013108722620994918, "loss": 1.514, "step": 6564 }, { "epoch": 0.8402662229617305, "grad_norm": 0.546875, "learning_rate": 0.00013088210437187431, "loss": 0.8356, "step": 6565 }, { "epoch": 0.8403942147702547, "grad_norm": 0.64453125, "learning_rate": 0.00013067713190581098, "loss": 1.4904, "step": 6566 }, { "epoch": 0.840522206578779, "grad_norm": 0.62890625, "learning_rate": 0.00013047230884698701, "loss": 1.2747, "step": 6567 }, { "epoch": 0.8406501983873033, "grad_norm": 0.70703125, "learning_rate": 0.00013026763523060436, "loss": 1.2733, "step": 6568 }, { "epoch": 0.8407781901958274, "grad_norm": 0.5703125, "learning_rate": 0.00013006311109184032, "loss": 1.0725, "step": 6569 }, { "epoch": 0.8409061820043517, "grad_norm": 0.56640625, "learning_rate": 0.0001298587364658449, "loss": 0.9015, "step": 6570 }, { "epoch": 0.8410341738128759, "grad_norm": 0.6171875, "learning_rate": 0.00012965451138774342, "loss": 1.1163, "step": 6571 }, { "epoch": 0.8411621656214002, "grad_norm": 0.609375, "learning_rate": 0.00012945043589263538, "loss": 1.1832, "step": 6572 }, { "epoch": 0.8412901574299245, "grad_norm": 0.56640625, "learning_rate": 0.00012924651001559463, "loss": 0.7039, "step": 6573 }, { "epoch": 0.8414181492384487, "grad_norm": 0.6796875, "learning_rate": 0.00012904273379166854, "loss": 1.4701, "step": 6574 }, { "epoch": 0.841546141046973, "grad_norm": 0.63671875, "learning_rate": 0.00012883910725587988, "loss": 1.4424, "step": 6575 }, { "epoch": 0.8416741328554973, "grad_norm": 0.65625, "learning_rate": 0.0001286356304432249, "loss": 1.3802, "step": 6576 }, { "epoch": 0.8418021246640215, "grad_norm": 0.82421875, "learning_rate": 0.0001284323033886745, "loss": 1.3473, "step": 6577 }, { "epoch": 0.8419301164725458, "grad_norm": 0.75390625, "learning_rate": 0.00012822912612717386, "loss": 1.8812, "step": 6578 }, { "epoch": 0.84205810828107, "grad_norm": 0.73046875, "learning_rate": 0.00012802609869364201, "loss": 1.4463, "step": 6579 }, { "epoch": 0.8421861000895943, "grad_norm": 0.59765625, "learning_rate": 0.00012782322112297274, "loss": 1.0496, "step": 6580 }, { "epoch": 0.8423140918981186, "grad_norm": 0.53515625, "learning_rate": 0.0001276204934500338, "loss": 0.8084, "step": 6581 }, { "epoch": 0.8424420837066428, "grad_norm": 0.67578125, "learning_rate": 0.00012741791570966744, "loss": 1.505, "step": 6582 }, { "epoch": 0.842570075515167, "grad_norm": 0.8203125, "learning_rate": 0.00012721548793668937, "loss": 1.6454, "step": 6583 }, { "epoch": 0.8426980673236912, "grad_norm": 0.60546875, "learning_rate": 0.0001270132101658905, "loss": 0.766, "step": 6584 }, { "epoch": 0.8428260591322155, "grad_norm": 0.796875, "learning_rate": 0.00012681108243203543, "loss": 1.6209, "step": 6585 }, { "epoch": 0.8429540509407398, "grad_norm": 0.6640625, "learning_rate": 0.00012660910476986322, "loss": 1.3398, "step": 6586 }, { "epoch": 0.843082042749264, "grad_norm": 0.71484375, "learning_rate": 0.00012640727721408696, "loss": 1.5558, "step": 6587 }, { "epoch": 0.8432100345577883, "grad_norm": 0.56640625, "learning_rate": 0.00012620559979939383, "loss": 0.7033, "step": 6588 }, { "epoch": 0.8433380263663126, "grad_norm": 0.6640625, "learning_rate": 0.00012600407256044543, "loss": 1.4392, "step": 6589 }, { "epoch": 0.8434660181748368, "grad_norm": 0.640625, "learning_rate": 0.00012580269553187762, "loss": 1.3851, "step": 6590 }, { "epoch": 0.8435940099833611, "grad_norm": 0.7421875, "learning_rate": 0.00012560146874830014, "loss": 1.2076, "step": 6591 }, { "epoch": 0.8437220017918853, "grad_norm": 0.75390625, "learning_rate": 0.00012540039224429722, "loss": 1.2536, "step": 6592 }, { "epoch": 0.8438499936004096, "grad_norm": 0.72265625, "learning_rate": 0.000125199466054427, "loss": 1.4267, "step": 6593 }, { "epoch": 0.8439779854089339, "grad_norm": 0.77734375, "learning_rate": 0.00012499869021322197, "loss": 1.8403, "step": 6594 }, { "epoch": 0.8441059772174581, "grad_norm": 0.859375, "learning_rate": 0.00012479806475518894, "loss": 1.6941, "step": 6595 }, { "epoch": 0.8442339690259824, "grad_norm": 0.5625, "learning_rate": 0.0001245975897148083, "loss": 0.9271, "step": 6596 }, { "epoch": 0.8443619608345065, "grad_norm": 0.6015625, "learning_rate": 0.00012439726512653492, "loss": 0.946, "step": 6597 }, { "epoch": 0.8444899526430308, "grad_norm": 0.6328125, "learning_rate": 0.00012419709102479804, "loss": 1.0246, "step": 6598 }, { "epoch": 0.8446179444515551, "grad_norm": 0.72265625, "learning_rate": 0.00012399706744400075, "loss": 1.5801, "step": 6599 }, { "epoch": 0.8447459362600793, "grad_norm": 0.69921875, "learning_rate": 0.00012379719441852033, "loss": 1.1873, "step": 6600 }, { "epoch": 0.8448739280686036, "grad_norm": 0.74609375, "learning_rate": 0.00012359747198270832, "loss": 1.2704, "step": 6601 }, { "epoch": 0.8450019198771279, "grad_norm": 0.6796875, "learning_rate": 0.00012339790017089003, "loss": 1.8863, "step": 6602 }, { "epoch": 0.8451299116856521, "grad_norm": 0.76171875, "learning_rate": 0.00012319847901736536, "loss": 1.8426, "step": 6603 }, { "epoch": 0.8452579034941764, "grad_norm": 0.6875, "learning_rate": 0.0001229992085564081, "loss": 1.5247, "step": 6604 }, { "epoch": 0.8453858953027006, "grad_norm": 0.6796875, "learning_rate": 0.0001228000888222659, "loss": 1.286, "step": 6605 }, { "epoch": 0.8455138871112249, "grad_norm": 0.609375, "learning_rate": 0.00012260111984916067, "loss": 1.0678, "step": 6606 }, { "epoch": 0.8456418789197492, "grad_norm": 0.5703125, "learning_rate": 0.00012240230167128864, "loss": 0.9746, "step": 6607 }, { "epoch": 0.8457698707282734, "grad_norm": 0.578125, "learning_rate": 0.00012220363432281978, "loss": 0.8899, "step": 6608 }, { "epoch": 0.8458978625367977, "grad_norm": 0.53515625, "learning_rate": 0.0001220051178378987, "loss": 0.8067, "step": 6609 }, { "epoch": 0.8460258543453218, "grad_norm": 0.63671875, "learning_rate": 0.00012180675225064308, "loss": 1.0965, "step": 6610 }, { "epoch": 0.8461538461538461, "grad_norm": 0.578125, "learning_rate": 0.00012160853759514568, "loss": 0.9568, "step": 6611 }, { "epoch": 0.8462818379623704, "grad_norm": 0.58984375, "learning_rate": 0.00012141047390547266, "loss": 1.4542, "step": 6612 }, { "epoch": 0.8464098297708946, "grad_norm": 0.69140625, "learning_rate": 0.00012121256121566471, "loss": 1.5142, "step": 6613 }, { "epoch": 0.8465378215794189, "grad_norm": 0.65234375, "learning_rate": 0.00012101479955973616, "loss": 1.0691, "step": 6614 }, { "epoch": 0.8466658133879432, "grad_norm": 0.91015625, "learning_rate": 0.0001208171889716756, "loss": 1.2965, "step": 6615 }, { "epoch": 0.8467938051964674, "grad_norm": 0.6328125, "learning_rate": 0.00012061972948544576, "loss": 0.9922, "step": 6616 }, { "epoch": 0.8469217970049917, "grad_norm": 0.62890625, "learning_rate": 0.00012042242113498303, "loss": 1.1313, "step": 6617 }, { "epoch": 0.8470497888135159, "grad_norm": 0.625, "learning_rate": 0.00012022526395419842, "loss": 1.345, "step": 6618 }, { "epoch": 0.8471777806220402, "grad_norm": 0.6328125, "learning_rate": 0.00012002825797697603, "loss": 1.2863, "step": 6619 }, { "epoch": 0.8473057724305645, "grad_norm": 0.765625, "learning_rate": 0.00011983140323717501, "loss": 1.7877, "step": 6620 }, { "epoch": 0.8474337642390887, "grad_norm": 0.73828125, "learning_rate": 0.00011963469976862773, "loss": 1.242, "step": 6621 }, { "epoch": 0.847561756047613, "grad_norm": 0.5546875, "learning_rate": 0.00011943814760514105, "loss": 1.0596, "step": 6622 }, { "epoch": 0.8476897478561372, "grad_norm": 0.625, "learning_rate": 0.0001192417467804956, "loss": 1.6958, "step": 6623 }, { "epoch": 0.8478177396646615, "grad_norm": 0.62890625, "learning_rate": 0.00011904549732844605, "loss": 1.1549, "step": 6624 }, { "epoch": 0.8479457314731857, "grad_norm": 0.6640625, "learning_rate": 0.00011884939928272109, "loss": 1.4674, "step": 6625 }, { "epoch": 0.8480737232817099, "grad_norm": 0.55078125, "learning_rate": 0.00011865345267702354, "loss": 1.0368, "step": 6626 }, { "epoch": 0.8482017150902342, "grad_norm": 0.60546875, "learning_rate": 0.00011845765754502957, "loss": 1.2033, "step": 6627 }, { "epoch": 0.8483297068987585, "grad_norm": 0.5546875, "learning_rate": 0.00011826201392038993, "loss": 1.028, "step": 6628 }, { "epoch": 0.8484576987072827, "grad_norm": 0.57421875, "learning_rate": 0.00011806652183672917, "loss": 1.0857, "step": 6629 }, { "epoch": 0.848585690515807, "grad_norm": 0.671875, "learning_rate": 0.00011787118132764585, "loss": 0.9778, "step": 6630 }, { "epoch": 0.8487136823243312, "grad_norm": 0.72265625, "learning_rate": 0.00011767599242671245, "loss": 1.5107, "step": 6631 }, { "epoch": 0.8488416741328555, "grad_norm": 0.62109375, "learning_rate": 0.00011748095516747515, "loss": 1.2956, "step": 6632 }, { "epoch": 0.8489696659413798, "grad_norm": 0.53515625, "learning_rate": 0.00011728606958345422, "loss": 0.9219, "step": 6633 }, { "epoch": 0.849097657749904, "grad_norm": 0.75, "learning_rate": 0.00011709133570814423, "loss": 0.8827, "step": 6634 }, { "epoch": 0.8492256495584283, "grad_norm": 0.68359375, "learning_rate": 0.00011689675357501306, "loss": 1.2553, "step": 6635 }, { "epoch": 0.8493536413669525, "grad_norm": 0.6953125, "learning_rate": 0.00011670232321750296, "loss": 1.1499, "step": 6636 }, { "epoch": 0.8494816331754768, "grad_norm": 0.6171875, "learning_rate": 0.00011650804466902987, "loss": 1.0334, "step": 6637 }, { "epoch": 0.849609624984001, "grad_norm": 0.82421875, "learning_rate": 0.00011631391796298385, "loss": 1.2121, "step": 6638 }, { "epoch": 0.8497376167925252, "grad_norm": 0.6640625, "learning_rate": 0.00011611994313272856, "loss": 1.2727, "step": 6639 }, { "epoch": 0.8498656086010495, "grad_norm": 0.7734375, "learning_rate": 0.00011592612021160199, "loss": 1.8812, "step": 6640 }, { "epoch": 0.8499936004095738, "grad_norm": 0.50390625, "learning_rate": 0.00011573244923291525, "loss": 0.7377, "step": 6641 }, { "epoch": 0.850121592218098, "grad_norm": 0.55859375, "learning_rate": 0.00011553893022995432, "loss": 1.5403, "step": 6642 }, { "epoch": 0.8502495840266223, "grad_norm": 0.59375, "learning_rate": 0.00011534556323597823, "loss": 0.9521, "step": 6643 }, { "epoch": 0.8503775758351465, "grad_norm": 0.7890625, "learning_rate": 0.00011515234828422072, "loss": 1.6086, "step": 6644 }, { "epoch": 0.8505055676436708, "grad_norm": 0.8125, "learning_rate": 0.00011495928540788814, "loss": 1.6568, "step": 6645 }, { "epoch": 0.8506335594521951, "grad_norm": 0.8046875, "learning_rate": 0.00011476637464016215, "loss": 2.1636, "step": 6646 }, { "epoch": 0.8507615512607193, "grad_norm": 0.81640625, "learning_rate": 0.00011457361601419736, "loss": 1.4908, "step": 6647 }, { "epoch": 0.8508895430692436, "grad_norm": 0.84765625, "learning_rate": 0.00011438100956312247, "loss": 1.1851, "step": 6648 }, { "epoch": 0.8510175348777679, "grad_norm": 0.62109375, "learning_rate": 0.00011418855532004025, "loss": 1.4554, "step": 6649 }, { "epoch": 0.8511455266862921, "grad_norm": 0.7109375, "learning_rate": 0.00011399625331802676, "loss": 1.5176, "step": 6650 }, { "epoch": 0.8512735184948164, "grad_norm": 0.6875, "learning_rate": 0.00011380410359013226, "loss": 1.6087, "step": 6651 }, { "epoch": 0.8514015103033405, "grad_norm": 0.6171875, "learning_rate": 0.00011361210616938089, "loss": 1.2715, "step": 6652 }, { "epoch": 0.8515295021118648, "grad_norm": 0.703125, "learning_rate": 0.00011342026108877069, "loss": 1.2774, "step": 6653 }, { "epoch": 0.8516574939203891, "grad_norm": 0.5390625, "learning_rate": 0.00011322856838127294, "loss": 0.9748, "step": 6654 }, { "epoch": 0.8517854857289133, "grad_norm": 0.498046875, "learning_rate": 0.0001130370280798334, "loss": 0.6351, "step": 6655 }, { "epoch": 0.8519134775374376, "grad_norm": 0.6875, "learning_rate": 0.00011284564021737131, "loss": 1.1854, "step": 6656 }, { "epoch": 0.8520414693459618, "grad_norm": 0.640625, "learning_rate": 0.00011265440482677991, "loss": 1.0922, "step": 6657 }, { "epoch": 0.8521694611544861, "grad_norm": 0.66796875, "learning_rate": 0.0001124633219409259, "loss": 1.1796, "step": 6658 }, { "epoch": 0.8522974529630104, "grad_norm": 0.65234375, "learning_rate": 0.00011227239159265012, "loss": 1.0794, "step": 6659 }, { "epoch": 0.8524254447715346, "grad_norm": 0.56640625, "learning_rate": 0.000112081613814767, "loss": 0.9938, "step": 6660 }, { "epoch": 0.8525534365800589, "grad_norm": 0.5234375, "learning_rate": 0.00011189098864006486, "loss": 0.9382, "step": 6661 }, { "epoch": 0.8526814283885832, "grad_norm": 0.75, "learning_rate": 0.00011170051610130582, "loss": 1.672, "step": 6662 }, { "epoch": 0.8528094201971074, "grad_norm": 0.5390625, "learning_rate": 0.00011151019623122538, "loss": 0.8439, "step": 6663 }, { "epoch": 0.8529374120056317, "grad_norm": 0.65234375, "learning_rate": 0.00011132002906253314, "loss": 1.1408, "step": 6664 }, { "epoch": 0.8530654038141559, "grad_norm": 0.6796875, "learning_rate": 0.00011113001462791261, "loss": 1.8365, "step": 6665 }, { "epoch": 0.8531933956226802, "grad_norm": 0.640625, "learning_rate": 0.00011094015296002102, "loss": 1.2838, "step": 6666 }, { "epoch": 0.8533213874312044, "grad_norm": 0.7421875, "learning_rate": 0.00011075044409148849, "loss": 1.5028, "step": 6667 }, { "epoch": 0.8534493792397286, "grad_norm": 0.52734375, "learning_rate": 0.00011056088805492015, "loss": 0.9667, "step": 6668 }, { "epoch": 0.8535773710482529, "grad_norm": 0.8515625, "learning_rate": 0.00011037148488289416, "loss": 1.3947, "step": 6669 }, { "epoch": 0.8537053628567771, "grad_norm": 0.59375, "learning_rate": 0.0001101822346079625, "loss": 1.1058, "step": 6670 }, { "epoch": 0.8538333546653014, "grad_norm": 0.82421875, "learning_rate": 0.00010999313726265104, "loss": 0.9749, "step": 6671 }, { "epoch": 0.8539613464738257, "grad_norm": 0.56640625, "learning_rate": 0.00010980419287945898, "loss": 0.743, "step": 6672 }, { "epoch": 0.8540893382823499, "grad_norm": 0.62890625, "learning_rate": 0.00010961540149085957, "loss": 1.5729, "step": 6673 }, { "epoch": 0.8542173300908742, "grad_norm": 0.6015625, "learning_rate": 0.00010942676312929967, "loss": 0.8338, "step": 6674 }, { "epoch": 0.8543453218993985, "grad_norm": 0.62109375, "learning_rate": 0.00010923827782720009, "loss": 1.2516, "step": 6675 }, { "epoch": 0.8544733137079227, "grad_norm": 0.765625, "learning_rate": 0.00010904994561695458, "loss": 1.7335, "step": 6676 }, { "epoch": 0.854601305516447, "grad_norm": 0.59375, "learning_rate": 0.00010886176653093149, "loss": 0.8554, "step": 6677 }, { "epoch": 0.8547292973249712, "grad_norm": 0.68359375, "learning_rate": 0.00010867374060147217, "loss": 1.6133, "step": 6678 }, { "epoch": 0.8548572891334955, "grad_norm": 0.6796875, "learning_rate": 0.0001084858678608922, "loss": 1.4035, "step": 6679 }, { "epoch": 0.8549852809420198, "grad_norm": 0.56640625, "learning_rate": 0.00010829814834148044, "loss": 1.0416, "step": 6680 }, { "epoch": 0.8551132727505439, "grad_norm": 0.65625, "learning_rate": 0.00010811058207549951, "loss": 1.4787, "step": 6681 }, { "epoch": 0.8552412645590682, "grad_norm": 0.5625, "learning_rate": 0.00010792316909518573, "loss": 1.1867, "step": 6682 }, { "epoch": 0.8553692563675924, "grad_norm": 0.875, "learning_rate": 0.00010773590943274902, "loss": 1.2604, "step": 6683 }, { "epoch": 0.8554972481761167, "grad_norm": 0.98046875, "learning_rate": 0.0001075488031203733, "loss": 1.5274, "step": 6684 }, { "epoch": 0.855625239984641, "grad_norm": 0.60546875, "learning_rate": 0.00010736185019021538, "loss": 1.3379, "step": 6685 }, { "epoch": 0.8557532317931652, "grad_norm": 0.671875, "learning_rate": 0.00010717505067440637, "loss": 1.4411, "step": 6686 }, { "epoch": 0.8558812236016895, "grad_norm": 0.55859375, "learning_rate": 0.00010698840460505066, "loss": 1.2169, "step": 6687 }, { "epoch": 0.8560092154102138, "grad_norm": 0.79296875, "learning_rate": 0.00010680191201422684, "loss": 1.2435, "step": 6688 }, { "epoch": 0.856137207218738, "grad_norm": 0.875, "learning_rate": 0.00010661557293398604, "loss": 1.5361, "step": 6689 }, { "epoch": 0.8562651990272623, "grad_norm": 0.828125, "learning_rate": 0.00010642938739635388, "loss": 1.7479, "step": 6690 }, { "epoch": 0.8563931908357865, "grad_norm": 0.67578125, "learning_rate": 0.00010624335543332964, "loss": 1.2252, "step": 6691 }, { "epoch": 0.8565211826443108, "grad_norm": 0.6875, "learning_rate": 0.00010605747707688573, "loss": 1.7671, "step": 6692 }, { "epoch": 0.8566491744528351, "grad_norm": 0.7578125, "learning_rate": 0.00010587175235896863, "loss": 1.6284, "step": 6693 }, { "epoch": 0.8567771662613592, "grad_norm": 0.6171875, "learning_rate": 0.00010568618131149765, "loss": 1.057, "step": 6694 }, { "epoch": 0.8569051580698835, "grad_norm": 0.6640625, "learning_rate": 0.00010550076396636644, "loss": 1.2736, "step": 6695 }, { "epoch": 0.8570331498784077, "grad_norm": 0.5859375, "learning_rate": 0.00010531550035544202, "loss": 1.0757, "step": 6696 }, { "epoch": 0.857161141686932, "grad_norm": 0.671875, "learning_rate": 0.00010513039051056505, "loss": 1.1896, "step": 6697 }, { "epoch": 0.8572891334954563, "grad_norm": 0.6171875, "learning_rate": 0.00010494543446354932, "loss": 1.0847, "step": 6698 }, { "epoch": 0.8574171253039805, "grad_norm": 0.640625, "learning_rate": 0.0001047606322461827, "loss": 1.3578, "step": 6699 }, { "epoch": 0.8575451171125048, "grad_norm": 0.76953125, "learning_rate": 0.0001045759838902266, "loss": 1.4909, "step": 6700 }, { "epoch": 0.8576731089210291, "grad_norm": 0.63671875, "learning_rate": 0.00010439148942741561, "loss": 1.1146, "step": 6701 }, { "epoch": 0.8578011007295533, "grad_norm": 0.5703125, "learning_rate": 0.00010420714888945815, "loss": 0.8912, "step": 6702 }, { "epoch": 0.8579290925380776, "grad_norm": 0.60546875, "learning_rate": 0.00010402296230803621, "loss": 1.0122, "step": 6703 }, { "epoch": 0.8580570843466018, "grad_norm": 0.62890625, "learning_rate": 0.00010383892971480524, "loss": 1.2944, "step": 6704 }, { "epoch": 0.8581850761551261, "grad_norm": 0.69921875, "learning_rate": 0.00010365505114139418, "loss": 1.5418, "step": 6705 }, { "epoch": 0.8583130679636504, "grad_norm": 0.83984375, "learning_rate": 0.00010347132661940573, "loss": 1.1468, "step": 6706 }, { "epoch": 0.8584410597721746, "grad_norm": 0.609375, "learning_rate": 0.00010328775618041564, "loss": 0.8764, "step": 6707 }, { "epoch": 0.8585690515806988, "grad_norm": 0.62109375, "learning_rate": 0.00010310433985597356, "loss": 1.2299, "step": 6708 }, { "epoch": 0.858697043389223, "grad_norm": 0.62890625, "learning_rate": 0.00010292107767760262, "loss": 1.2679, "step": 6709 }, { "epoch": 0.8588250351977473, "grad_norm": 0.625, "learning_rate": 0.0001027379696767995, "loss": 1.8233, "step": 6710 }, { "epoch": 0.8589530270062716, "grad_norm": 0.59375, "learning_rate": 0.00010255501588503391, "loss": 1.0516, "step": 6711 }, { "epoch": 0.8590810188147958, "grad_norm": 0.578125, "learning_rate": 0.00010237221633374983, "loss": 0.9168, "step": 6712 }, { "epoch": 0.8592090106233201, "grad_norm": 0.65234375, "learning_rate": 0.00010218957105436377, "loss": 1.0642, "step": 6713 }, { "epoch": 0.8593370024318444, "grad_norm": 0.6640625, "learning_rate": 0.00010200708007826698, "loss": 1.4872, "step": 6714 }, { "epoch": 0.8594649942403686, "grad_norm": 0.62890625, "learning_rate": 0.00010182474343682346, "loss": 1.0391, "step": 6715 }, { "epoch": 0.8595929860488929, "grad_norm": 0.58984375, "learning_rate": 0.00010164256116137017, "loss": 1.0359, "step": 6716 }, { "epoch": 0.8597209778574171, "grad_norm": 0.75390625, "learning_rate": 0.00010146053328321847, "loss": 1.7371, "step": 6717 }, { "epoch": 0.8598489696659414, "grad_norm": 0.5234375, "learning_rate": 0.00010127865983365259, "loss": 0.8946, "step": 6718 }, { "epoch": 0.8599769614744657, "grad_norm": 0.6328125, "learning_rate": 0.00010109694084393084, "loss": 1.1656, "step": 6719 }, { "epoch": 0.8601049532829899, "grad_norm": 0.7109375, "learning_rate": 0.00010091537634528403, "loss": 1.6508, "step": 6720 }, { "epoch": 0.8602329450915142, "grad_norm": 0.58984375, "learning_rate": 0.0001007339663689173, "loss": 1.1272, "step": 6721 }, { "epoch": 0.8603609369000383, "grad_norm": 0.671875, "learning_rate": 0.00010055271094600871, "loss": 1.1252, "step": 6722 }, { "epoch": 0.8604889287085626, "grad_norm": 0.71484375, "learning_rate": 0.00010037161010771013, "loss": 1.8741, "step": 6723 }, { "epoch": 0.8606169205170869, "grad_norm": 0.69140625, "learning_rate": 0.00010019066388514663, "loss": 1.241, "step": 6724 }, { "epoch": 0.8607449123256111, "grad_norm": 0.625, "learning_rate": 0.00010000987230941671, "loss": 1.1193, "step": 6725 }, { "epoch": 0.8608729041341354, "grad_norm": 0.671875, "learning_rate": 9.982923541159238e-05, "loss": 1.2572, "step": 6726 }, { "epoch": 0.8610008959426597, "grad_norm": 0.50390625, "learning_rate": 9.964875322271893e-05, "loss": 0.9744, "step": 6727 }, { "epoch": 0.8611288877511839, "grad_norm": 0.6875, "learning_rate": 9.946842577381543e-05, "loss": 1.5349, "step": 6728 }, { "epoch": 0.8612568795597082, "grad_norm": 0.578125, "learning_rate": 9.928825309587374e-05, "loss": 0.9812, "step": 6729 }, { "epoch": 0.8613848713682324, "grad_norm": 0.69921875, "learning_rate": 9.910823521985957e-05, "loss": 1.1783, "step": 6730 }, { "epoch": 0.8615128631767567, "grad_norm": 0.61328125, "learning_rate": 9.89283721767118e-05, "loss": 1.3829, "step": 6731 }, { "epoch": 0.861640854985281, "grad_norm": 1.1875, "learning_rate": 9.874866399734305e-05, "loss": 1.3069, "step": 6732 }, { "epoch": 0.8617688467938052, "grad_norm": 0.6796875, "learning_rate": 9.856911071263918e-05, "loss": 1.6364, "step": 6733 }, { "epoch": 0.8618968386023295, "grad_norm": 0.7578125, "learning_rate": 9.838971235345894e-05, "loss": 1.7389, "step": 6734 }, { "epoch": 0.8620248304108538, "grad_norm": 0.59375, "learning_rate": 9.821046895063479e-05, "loss": 1.1525, "step": 6735 }, { "epoch": 0.862152822219378, "grad_norm": 0.7421875, "learning_rate": 9.803138053497319e-05, "loss": 1.1265, "step": 6736 }, { "epoch": 0.8622808140279022, "grad_norm": 0.80859375, "learning_rate": 9.785244713725316e-05, "loss": 1.54, "step": 6737 }, { "epoch": 0.8624088058364264, "grad_norm": 0.74609375, "learning_rate": 9.767366878822703e-05, "loss": 1.3344, "step": 6738 }, { "epoch": 0.8625367976449507, "grad_norm": 0.56640625, "learning_rate": 9.749504551862087e-05, "loss": 0.8843, "step": 6739 }, { "epoch": 0.862664789453475, "grad_norm": 0.625, "learning_rate": 9.731657735913424e-05, "loss": 1.0735, "step": 6740 }, { "epoch": 0.8627927812619992, "grad_norm": 0.578125, "learning_rate": 9.71382643404397e-05, "loss": 1.0285, "step": 6741 }, { "epoch": 0.8629207730705235, "grad_norm": 0.67578125, "learning_rate": 9.696010649318288e-05, "loss": 1.3336, "step": 6742 }, { "epoch": 0.8630487648790477, "grad_norm": 0.75390625, "learning_rate": 9.678210384798347e-05, "loss": 1.1844, "step": 6743 }, { "epoch": 0.863176756687572, "grad_norm": 0.62109375, "learning_rate": 9.660425643543403e-05, "loss": 0.8923, "step": 6744 }, { "epoch": 0.8633047484960963, "grad_norm": 0.7265625, "learning_rate": 9.642656428610042e-05, "loss": 1.4477, "step": 6745 }, { "epoch": 0.8634327403046205, "grad_norm": 0.53515625, "learning_rate": 9.6249027430522e-05, "loss": 0.7757, "step": 6746 }, { "epoch": 0.8635607321131448, "grad_norm": 0.62890625, "learning_rate": 9.607164589921136e-05, "loss": 1.2902, "step": 6747 }, { "epoch": 0.8636887239216691, "grad_norm": 1.265625, "learning_rate": 9.589441972265445e-05, "loss": 1.1742, "step": 6748 }, { "epoch": 0.8638167157301933, "grad_norm": 0.625, "learning_rate": 9.571734893131034e-05, "loss": 0.848, "step": 6749 }, { "epoch": 0.8639447075387175, "grad_norm": 0.6484375, "learning_rate": 9.554043355561182e-05, "loss": 1.0286, "step": 6750 }, { "epoch": 0.8640726993472417, "grad_norm": 0.64453125, "learning_rate": 9.536367362596421e-05, "loss": 1.1007, "step": 6751 }, { "epoch": 0.864200691155766, "grad_norm": 0.671875, "learning_rate": 9.518706917274688e-05, "loss": 1.2003, "step": 6752 }, { "epoch": 0.8643286829642903, "grad_norm": 0.6640625, "learning_rate": 9.501062022631201e-05, "loss": 1.4875, "step": 6753 }, { "epoch": 0.8644566747728145, "grad_norm": 0.65234375, "learning_rate": 9.483432681698522e-05, "loss": 1.4858, "step": 6754 }, { "epoch": 0.8645846665813388, "grad_norm": 0.69140625, "learning_rate": 9.465818897506573e-05, "loss": 1.1671, "step": 6755 }, { "epoch": 0.864712658389863, "grad_norm": 0.5, "learning_rate": 9.448220673082519e-05, "loss": 0.7775, "step": 6756 }, { "epoch": 0.8648406501983873, "grad_norm": 0.6640625, "learning_rate": 9.430638011450931e-05, "loss": 1.441, "step": 6757 }, { "epoch": 0.8649686420069116, "grad_norm": 0.71484375, "learning_rate": 9.413070915633636e-05, "loss": 1.618, "step": 6758 }, { "epoch": 0.8650966338154358, "grad_norm": 0.5859375, "learning_rate": 9.395519388649909e-05, "loss": 1.3146, "step": 6759 }, { "epoch": 0.8652246256239601, "grad_norm": 0.76171875, "learning_rate": 9.37798343351618e-05, "loss": 1.2451, "step": 6760 }, { "epoch": 0.8653526174324844, "grad_norm": 0.734375, "learning_rate": 9.360463053246315e-05, "loss": 1.412, "step": 6761 }, { "epoch": 0.8654806092410086, "grad_norm": 0.6796875, "learning_rate": 9.342958250851486e-05, "loss": 1.4375, "step": 6762 }, { "epoch": 0.8656086010495329, "grad_norm": 0.6484375, "learning_rate": 9.325469029340195e-05, "loss": 1.3324, "step": 6763 }, { "epoch": 0.865736592858057, "grad_norm": 0.58984375, "learning_rate": 9.307995391718193e-05, "loss": 0.9888, "step": 6764 }, { "epoch": 0.8658645846665813, "grad_norm": 0.62109375, "learning_rate": 9.290537340988647e-05, "loss": 1.3549, "step": 6765 }, { "epoch": 0.8659925764751056, "grad_norm": 0.7109375, "learning_rate": 9.273094880151988e-05, "loss": 1.5572, "step": 6766 }, { "epoch": 0.8661205682836298, "grad_norm": 0.6796875, "learning_rate": 9.255668012206008e-05, "loss": 1.4883, "step": 6767 }, { "epoch": 0.8662485600921541, "grad_norm": 0.6171875, "learning_rate": 9.238256740145778e-05, "loss": 1.348, "step": 6768 }, { "epoch": 0.8663765519006783, "grad_norm": 0.62890625, "learning_rate": 9.220861066963715e-05, "loss": 1.3124, "step": 6769 }, { "epoch": 0.8665045437092026, "grad_norm": 0.578125, "learning_rate": 9.203480995649549e-05, "loss": 1.0503, "step": 6770 }, { "epoch": 0.8666325355177269, "grad_norm": 0.59375, "learning_rate": 9.186116529190325e-05, "loss": 1.0971, "step": 6771 }, { "epoch": 0.8667605273262511, "grad_norm": 0.6640625, "learning_rate": 9.168767670570432e-05, "loss": 1.2438, "step": 6772 }, { "epoch": 0.8668885191347754, "grad_norm": 0.6484375, "learning_rate": 9.151434422771498e-05, "loss": 1.0615, "step": 6773 }, { "epoch": 0.8670165109432997, "grad_norm": 0.70703125, "learning_rate": 9.134116788772562e-05, "loss": 1.907, "step": 6774 }, { "epoch": 0.8671445027518239, "grad_norm": 0.66796875, "learning_rate": 9.116814771549941e-05, "loss": 1.3275, "step": 6775 }, { "epoch": 0.8672724945603482, "grad_norm": 0.578125, "learning_rate": 9.099528374077249e-05, "loss": 0.9385, "step": 6776 }, { "epoch": 0.8674004863688723, "grad_norm": 0.62890625, "learning_rate": 9.08225759932546e-05, "loss": 1.3804, "step": 6777 }, { "epoch": 0.8675284781773966, "grad_norm": 0.7421875, "learning_rate": 9.065002450262817e-05, "loss": 1.279, "step": 6778 }, { "epoch": 0.8676564699859209, "grad_norm": 0.67578125, "learning_rate": 9.047762929854885e-05, "loss": 1.5356, "step": 6779 }, { "epoch": 0.8677844617944451, "grad_norm": 0.70703125, "learning_rate": 9.030539041064568e-05, "loss": 1.0181, "step": 6780 }, { "epoch": 0.8679124536029694, "grad_norm": 0.92578125, "learning_rate": 9.013330786852092e-05, "loss": 1.6626, "step": 6781 }, { "epoch": 0.8680404454114936, "grad_norm": 0.5703125, "learning_rate": 8.99613817017495e-05, "loss": 1.2216, "step": 6782 }, { "epoch": 0.8681684372200179, "grad_norm": 0.83203125, "learning_rate": 8.978961193987989e-05, "loss": 0.8813, "step": 6783 }, { "epoch": 0.8682964290285422, "grad_norm": 0.62109375, "learning_rate": 8.961799861243324e-05, "loss": 0.9819, "step": 6784 }, { "epoch": 0.8684244208370664, "grad_norm": 0.625, "learning_rate": 8.944654174890432e-05, "loss": 1.1613, "step": 6785 }, { "epoch": 0.8685524126455907, "grad_norm": 0.609375, "learning_rate": 8.927524137876097e-05, "loss": 1.0209, "step": 6786 }, { "epoch": 0.868680404454115, "grad_norm": 0.61328125, "learning_rate": 8.910409753144344e-05, "loss": 1.5691, "step": 6787 }, { "epoch": 0.8688083962626392, "grad_norm": 0.828125, "learning_rate": 8.893311023636574e-05, "loss": 0.8361, "step": 6788 }, { "epoch": 0.8689363880711635, "grad_norm": 0.64453125, "learning_rate": 8.876227952291493e-05, "loss": 1.1491, "step": 6789 }, { "epoch": 0.8690643798796877, "grad_norm": 0.6953125, "learning_rate": 8.859160542045108e-05, "loss": 1.8159, "step": 6790 }, { "epoch": 0.869192371688212, "grad_norm": 0.640625, "learning_rate": 8.842108795830717e-05, "loss": 1.3219, "step": 6791 }, { "epoch": 0.8693203634967362, "grad_norm": 0.578125, "learning_rate": 8.825072716578942e-05, "loss": 0.8719, "step": 6792 }, { "epoch": 0.8694483553052604, "grad_norm": 0.53125, "learning_rate": 8.808052307217707e-05, "loss": 0.982, "step": 6793 }, { "epoch": 0.8695763471137847, "grad_norm": 0.75, "learning_rate": 8.791047570672273e-05, "loss": 1.556, "step": 6794 }, { "epoch": 0.8697043389223089, "grad_norm": 0.640625, "learning_rate": 8.774058509865146e-05, "loss": 1.4801, "step": 6795 }, { "epoch": 0.8698323307308332, "grad_norm": 0.59765625, "learning_rate": 8.75708512771617e-05, "loss": 1.0036, "step": 6796 }, { "epoch": 0.8699603225393575, "grad_norm": 0.69921875, "learning_rate": 8.740127427142519e-05, "loss": 2.2034, "step": 6797 }, { "epoch": 0.8700883143478817, "grad_norm": 0.73828125, "learning_rate": 8.723185411058631e-05, "loss": 1.6616, "step": 6798 }, { "epoch": 0.870216306156406, "grad_norm": 0.69140625, "learning_rate": 8.7062590823763e-05, "loss": 1.2769, "step": 6799 }, { "epoch": 0.8703442979649303, "grad_norm": 0.69140625, "learning_rate": 8.689348444004541e-05, "loss": 1.3433, "step": 6800 }, { "epoch": 0.8704722897734545, "grad_norm": 0.6015625, "learning_rate": 8.672453498849753e-05, "loss": 1.3435, "step": 6801 }, { "epoch": 0.8706002815819788, "grad_norm": 0.58203125, "learning_rate": 8.65557424981559e-05, "loss": 1.109, "step": 6802 }, { "epoch": 0.870728273390503, "grad_norm": 0.63671875, "learning_rate": 8.638710699803043e-05, "loss": 0.8641, "step": 6803 }, { "epoch": 0.8708562651990273, "grad_norm": 0.73046875, "learning_rate": 8.621862851710371e-05, "loss": 1.6044, "step": 6804 }, { "epoch": 0.8709842570075516, "grad_norm": 0.6796875, "learning_rate": 8.605030708433148e-05, "loss": 1.6729, "step": 6805 }, { "epoch": 0.8711122488160757, "grad_norm": 0.7578125, "learning_rate": 8.58821427286427e-05, "loss": 1.4672, "step": 6806 }, { "epoch": 0.8712402406246, "grad_norm": 0.54296875, "learning_rate": 8.571413547893902e-05, "loss": 0.8732, "step": 6807 }, { "epoch": 0.8713682324331243, "grad_norm": 0.6640625, "learning_rate": 8.554628536409536e-05, "loss": 1.8729, "step": 6808 }, { "epoch": 0.8714962242416485, "grad_norm": 0.70703125, "learning_rate": 8.53785924129592e-05, "loss": 1.4049, "step": 6809 }, { "epoch": 0.8716242160501728, "grad_norm": 0.703125, "learning_rate": 8.521105665435147e-05, "loss": 1.4966, "step": 6810 }, { "epoch": 0.871752207858697, "grad_norm": 0.625, "learning_rate": 8.504367811706581e-05, "loss": 1.4253, "step": 6811 }, { "epoch": 0.8718801996672213, "grad_norm": 0.62890625, "learning_rate": 8.48764568298691e-05, "loss": 1.0728, "step": 6812 }, { "epoch": 0.8720081914757456, "grad_norm": 0.65625, "learning_rate": 8.470939282150092e-05, "loss": 1.1335, "step": 6813 }, { "epoch": 0.8721361832842698, "grad_norm": 0.8125, "learning_rate": 8.454248612067394e-05, "loss": 1.3313, "step": 6814 }, { "epoch": 0.8722641750927941, "grad_norm": 0.671875, "learning_rate": 8.43757367560738e-05, "loss": 1.3326, "step": 6815 }, { "epoch": 0.8723921669013183, "grad_norm": 0.8515625, "learning_rate": 8.42091447563591e-05, "loss": 1.263, "step": 6816 }, { "epoch": 0.8725201587098426, "grad_norm": 0.7265625, "learning_rate": 8.404271015016151e-05, "loss": 1.4037, "step": 6817 }, { "epoch": 0.8726481505183669, "grad_norm": 0.65625, "learning_rate": 8.387643296608515e-05, "loss": 1.4081, "step": 6818 }, { "epoch": 0.872776142326891, "grad_norm": 0.54296875, "learning_rate": 8.371031323270773e-05, "loss": 0.7403, "step": 6819 }, { "epoch": 0.8729041341354153, "grad_norm": 0.60546875, "learning_rate": 8.354435097857949e-05, "loss": 1.2833, "step": 6820 }, { "epoch": 0.8730321259439396, "grad_norm": 0.625, "learning_rate": 8.337854623222407e-05, "loss": 1.2002, "step": 6821 }, { "epoch": 0.8731601177524638, "grad_norm": 0.796875, "learning_rate": 8.321289902213724e-05, "loss": 1.2939, "step": 6822 }, { "epoch": 0.8732881095609881, "grad_norm": 0.6171875, "learning_rate": 8.304740937678835e-05, "loss": 1.1573, "step": 6823 }, { "epoch": 0.8734161013695123, "grad_norm": 0.78125, "learning_rate": 8.28820773246195e-05, "loss": 1.5487, "step": 6824 }, { "epoch": 0.8735440931780366, "grad_norm": 0.53515625, "learning_rate": 8.271690289404565e-05, "loss": 0.9573, "step": 6825 }, { "epoch": 0.8736720849865609, "grad_norm": 0.671875, "learning_rate": 8.255188611345476e-05, "loss": 1.2494, "step": 6826 }, { "epoch": 0.8738000767950851, "grad_norm": 1.203125, "learning_rate": 8.23870270112076e-05, "loss": 1.1847, "step": 6827 }, { "epoch": 0.8739280686036094, "grad_norm": 0.67578125, "learning_rate": 8.222232561563791e-05, "loss": 1.2507, "step": 6828 }, { "epoch": 0.8740560604121336, "grad_norm": 0.640625, "learning_rate": 8.20577819550522e-05, "loss": 1.2206, "step": 6829 }, { "epoch": 0.8741840522206579, "grad_norm": 0.68359375, "learning_rate": 8.189339605773039e-05, "loss": 1.4335, "step": 6830 }, { "epoch": 0.8743120440291822, "grad_norm": 0.6484375, "learning_rate": 8.17291679519242e-05, "loss": 1.3523, "step": 6831 }, { "epoch": 0.8744400358377064, "grad_norm": 0.56640625, "learning_rate": 8.156509766585928e-05, "loss": 1.6037, "step": 6832 }, { "epoch": 0.8745680276462307, "grad_norm": 0.91796875, "learning_rate": 8.140118522773376e-05, "loss": 1.5793, "step": 6833 }, { "epoch": 0.874696019454755, "grad_norm": 0.6640625, "learning_rate": 8.123743066571854e-05, "loss": 1.5232, "step": 6834 }, { "epoch": 0.8748240112632791, "grad_norm": 0.734375, "learning_rate": 8.107383400795753e-05, "loss": 1.518, "step": 6835 }, { "epoch": 0.8749520030718034, "grad_norm": 0.6953125, "learning_rate": 8.09103952825676e-05, "loss": 1.8769, "step": 6836 }, { "epoch": 0.8750799948803276, "grad_norm": 0.640625, "learning_rate": 8.074711451763816e-05, "loss": 1.0603, "step": 6837 }, { "epoch": 0.8752079866888519, "grad_norm": 0.61328125, "learning_rate": 8.058399174123176e-05, "loss": 1.1959, "step": 6838 }, { "epoch": 0.8753359784973762, "grad_norm": 0.69140625, "learning_rate": 8.042102698138399e-05, "loss": 1.1805, "step": 6839 }, { "epoch": 0.8754639703059004, "grad_norm": 0.625, "learning_rate": 8.02582202661023e-05, "loss": 1.1124, "step": 6840 }, { "epoch": 0.8755919621144247, "grad_norm": 0.70703125, "learning_rate": 8.009557162336823e-05, "loss": 1.2078, "step": 6841 }, { "epoch": 0.8757199539229489, "grad_norm": 0.5390625, "learning_rate": 7.993308108113528e-05, "loss": 0.9173, "step": 6842 }, { "epoch": 0.8758479457314732, "grad_norm": 0.69921875, "learning_rate": 7.977074866733036e-05, "loss": 1.8103, "step": 6843 }, { "epoch": 0.8759759375399975, "grad_norm": 0.6796875, "learning_rate": 7.960857440985269e-05, "loss": 1.6248, "step": 6844 }, { "epoch": 0.8761039293485217, "grad_norm": 0.640625, "learning_rate": 7.944655833657466e-05, "loss": 1.1769, "step": 6845 }, { "epoch": 0.876231921157046, "grad_norm": 0.6953125, "learning_rate": 7.928470047534119e-05, "loss": 1.5234, "step": 6846 }, { "epoch": 0.8763599129655703, "grad_norm": 0.703125, "learning_rate": 7.912300085397051e-05, "loss": 1.2149, "step": 6847 }, { "epoch": 0.8764879047740944, "grad_norm": 0.59765625, "learning_rate": 7.896145950025302e-05, "loss": 1.0309, "step": 6848 }, { "epoch": 0.8766158965826187, "grad_norm": 0.74609375, "learning_rate": 7.880007644195242e-05, "loss": 1.346, "step": 6849 }, { "epoch": 0.8767438883911429, "grad_norm": 0.6640625, "learning_rate": 7.863885170680484e-05, "loss": 1.5738, "step": 6850 }, { "epoch": 0.8768718801996672, "grad_norm": 0.625, "learning_rate": 7.847778532251949e-05, "loss": 1.5638, "step": 6851 }, { "epoch": 0.8769998720081915, "grad_norm": 0.66015625, "learning_rate": 7.831687731677839e-05, "loss": 1.3526, "step": 6852 }, { "epoch": 0.8771278638167157, "grad_norm": 0.5234375, "learning_rate": 7.81561277172359e-05, "loss": 0.7733, "step": 6853 }, { "epoch": 0.87725585562524, "grad_norm": 0.515625, "learning_rate": 7.799553655151947e-05, "loss": 0.846, "step": 6854 }, { "epoch": 0.8773838474337642, "grad_norm": 0.65625, "learning_rate": 7.783510384722936e-05, "loss": 1.1026, "step": 6855 }, { "epoch": 0.8775118392422885, "grad_norm": 0.6015625, "learning_rate": 7.767482963193883e-05, "loss": 1.0971, "step": 6856 }, { "epoch": 0.8776398310508128, "grad_norm": 0.5703125, "learning_rate": 7.751471393319287e-05, "loss": 0.8213, "step": 6857 }, { "epoch": 0.877767822859337, "grad_norm": 0.625, "learning_rate": 7.735475677851067e-05, "loss": 0.9826, "step": 6858 }, { "epoch": 0.8778958146678613, "grad_norm": 0.640625, "learning_rate": 7.719495819538325e-05, "loss": 1.3639, "step": 6859 }, { "epoch": 0.8780238064763856, "grad_norm": 0.59765625, "learning_rate": 7.703531821127441e-05, "loss": 0.8795, "step": 6860 }, { "epoch": 0.8781517982849097, "grad_norm": 0.66796875, "learning_rate": 7.687583685362131e-05, "loss": 1.5645, "step": 6861 }, { "epoch": 0.878279790093434, "grad_norm": 0.62109375, "learning_rate": 7.671651414983294e-05, "loss": 1.4325, "step": 6862 }, { "epoch": 0.8784077819019582, "grad_norm": 0.5625, "learning_rate": 7.655735012729159e-05, "loss": 0.7625, "step": 6863 }, { "epoch": 0.8785357737104825, "grad_norm": 0.81640625, "learning_rate": 7.639834481335228e-05, "loss": 1.4458, "step": 6864 }, { "epoch": 0.8786637655190068, "grad_norm": 0.5625, "learning_rate": 7.623949823534282e-05, "loss": 0.9235, "step": 6865 }, { "epoch": 0.878791757327531, "grad_norm": 0.6015625, "learning_rate": 7.608081042056325e-05, "loss": 1.3002, "step": 6866 }, { "epoch": 0.8789197491360553, "grad_norm": 0.58984375, "learning_rate": 7.592228139628676e-05, "loss": 1.2002, "step": 6867 }, { "epoch": 0.8790477409445795, "grad_norm": 0.59375, "learning_rate": 7.576391118975912e-05, "loss": 1.2567, "step": 6868 }, { "epoch": 0.8791757327531038, "grad_norm": 0.65625, "learning_rate": 7.560569982819876e-05, "loss": 1.4303, "step": 6869 }, { "epoch": 0.8793037245616281, "grad_norm": 0.72265625, "learning_rate": 7.544764733879705e-05, "loss": 1.3712, "step": 6870 }, { "epoch": 0.8794317163701523, "grad_norm": 0.6875, "learning_rate": 7.528975374871782e-05, "loss": 1.4614, "step": 6871 }, { "epoch": 0.8795597081786766, "grad_norm": 0.671875, "learning_rate": 7.51320190850976e-05, "loss": 1.177, "step": 6872 }, { "epoch": 0.8796876999872009, "grad_norm": 0.65625, "learning_rate": 7.497444337504556e-05, "loss": 1.388, "step": 6873 }, { "epoch": 0.879815691795725, "grad_norm": 0.640625, "learning_rate": 7.481702664564405e-05, "loss": 1.1488, "step": 6874 }, { "epoch": 0.8799436836042493, "grad_norm": 0.578125, "learning_rate": 7.46597689239471e-05, "loss": 0.7559, "step": 6875 }, { "epoch": 0.8800716754127735, "grad_norm": 0.66015625, "learning_rate": 7.450267023698231e-05, "loss": 1.4581, "step": 6876 }, { "epoch": 0.8801996672212978, "grad_norm": 0.65625, "learning_rate": 7.434573061174966e-05, "loss": 1.4389, "step": 6877 }, { "epoch": 0.8803276590298221, "grad_norm": 1.0390625, "learning_rate": 7.418895007522187e-05, "loss": 1.9565, "step": 6878 }, { "epoch": 0.8804556508383463, "grad_norm": 0.49609375, "learning_rate": 7.403232865434384e-05, "loss": 0.7029, "step": 6879 }, { "epoch": 0.8805836426468706, "grad_norm": 0.83984375, "learning_rate": 7.387586637603361e-05, "loss": 1.7824, "step": 6880 }, { "epoch": 0.8807116344553949, "grad_norm": 0.79296875, "learning_rate": 7.37195632671821e-05, "loss": 1.734, "step": 6881 }, { "epoch": 0.8808396262639191, "grad_norm": 0.73046875, "learning_rate": 7.356341935465227e-05, "loss": 1.5655, "step": 6882 }, { "epoch": 0.8809676180724434, "grad_norm": 0.8671875, "learning_rate": 7.340743466528022e-05, "loss": 1.3738, "step": 6883 }, { "epoch": 0.8810956098809676, "grad_norm": 0.6171875, "learning_rate": 7.325160922587403e-05, "loss": 1.0439, "step": 6884 }, { "epoch": 0.8812236016894919, "grad_norm": 0.609375, "learning_rate": 7.30959430632151e-05, "loss": 1.0681, "step": 6885 }, { "epoch": 0.8813515934980162, "grad_norm": 0.51171875, "learning_rate": 7.294043620405721e-05, "loss": 0.7244, "step": 6886 }, { "epoch": 0.8814795853065404, "grad_norm": 1.421875, "learning_rate": 7.278508867512679e-05, "loss": 1.4743, "step": 6887 }, { "epoch": 0.8816075771150647, "grad_norm": 0.7578125, "learning_rate": 7.262990050312258e-05, "loss": 1.4483, "step": 6888 }, { "epoch": 0.8817355689235888, "grad_norm": 0.78515625, "learning_rate": 7.247487171471622e-05, "loss": 2.3103, "step": 6889 }, { "epoch": 0.8818635607321131, "grad_norm": 0.84765625, "learning_rate": 7.232000233655212e-05, "loss": 1.3367, "step": 6890 }, { "epoch": 0.8819915525406374, "grad_norm": 0.90625, "learning_rate": 7.216529239524705e-05, "loss": 1.6603, "step": 6891 }, { "epoch": 0.8821195443491616, "grad_norm": 0.76171875, "learning_rate": 7.20107419173902e-05, "loss": 1.3032, "step": 6892 }, { "epoch": 0.8822475361576859, "grad_norm": 0.6484375, "learning_rate": 7.18563509295439e-05, "loss": 1.3874, "step": 6893 }, { "epoch": 0.8823755279662102, "grad_norm": 0.66796875, "learning_rate": 7.170211945824256e-05, "loss": 1.2631, "step": 6894 }, { "epoch": 0.8825035197747344, "grad_norm": 0.72265625, "learning_rate": 7.154804752999344e-05, "loss": 1.359, "step": 6895 }, { "epoch": 0.8826315115832587, "grad_norm": 0.55859375, "learning_rate": 7.139413517127635e-05, "loss": 1.2823, "step": 6896 }, { "epoch": 0.8827595033917829, "grad_norm": 0.6875, "learning_rate": 7.124038240854347e-05, "loss": 1.3112, "step": 6897 }, { "epoch": 0.8828874952003072, "grad_norm": 0.671875, "learning_rate": 7.108678926821966e-05, "loss": 1.2209, "step": 6898 }, { "epoch": 0.8830154870088315, "grad_norm": 0.66796875, "learning_rate": 7.093335577670257e-05, "loss": 1.3642, "step": 6899 }, { "epoch": 0.8831434788173557, "grad_norm": 0.93359375, "learning_rate": 7.078008196036223e-05, "loss": 1.6066, "step": 6900 }, { "epoch": 0.88327147062588, "grad_norm": 0.73828125, "learning_rate": 7.062696784554123e-05, "loss": 1.0818, "step": 6901 }, { "epoch": 0.8833994624344041, "grad_norm": 0.66796875, "learning_rate": 7.047401345855442e-05, "loss": 1.3091, "step": 6902 }, { "epoch": 0.8835274542429284, "grad_norm": 0.96484375, "learning_rate": 7.032121882568975e-05, "loss": 1.5825, "step": 6903 }, { "epoch": 0.8836554460514527, "grad_norm": 0.51171875, "learning_rate": 7.016858397320758e-05, "loss": 1.1899, "step": 6904 }, { "epoch": 0.8837834378599769, "grad_norm": 0.5859375, "learning_rate": 7.001610892734067e-05, "loss": 1.051, "step": 6905 }, { "epoch": 0.8839114296685012, "grad_norm": 0.765625, "learning_rate": 6.986379371429408e-05, "loss": 1.1902, "step": 6906 }, { "epoch": 0.8840394214770255, "grad_norm": 0.62109375, "learning_rate": 6.971163836024574e-05, "loss": 1.148, "step": 6907 }, { "epoch": 0.8841674132855497, "grad_norm": 0.765625, "learning_rate": 6.955964289134597e-05, "loss": 1.6229, "step": 6908 }, { "epoch": 0.884295405094074, "grad_norm": 0.66796875, "learning_rate": 6.9407807333718e-05, "loss": 1.0453, "step": 6909 }, { "epoch": 0.8844233969025982, "grad_norm": 0.625, "learning_rate": 6.92561317134568e-05, "loss": 1.2309, "step": 6910 }, { "epoch": 0.8845513887111225, "grad_norm": 0.73046875, "learning_rate": 6.910461605663043e-05, "loss": 1.5018, "step": 6911 }, { "epoch": 0.8846793805196468, "grad_norm": 0.734375, "learning_rate": 6.895326038927941e-05, "loss": 1.2526, "step": 6912 }, { "epoch": 0.884807372328171, "grad_norm": 0.8203125, "learning_rate": 6.880206473741646e-05, "loss": 1.8287, "step": 6913 }, { "epoch": 0.8849353641366953, "grad_norm": 0.7109375, "learning_rate": 6.865102912702725e-05, "loss": 1.4217, "step": 6914 }, { "epoch": 0.8850633559452195, "grad_norm": 0.703125, "learning_rate": 6.850015358406959e-05, "loss": 1.4995, "step": 6915 }, { "epoch": 0.8851913477537438, "grad_norm": 0.65625, "learning_rate": 6.834943813447392e-05, "loss": 1.7748, "step": 6916 }, { "epoch": 0.885319339562268, "grad_norm": 0.6328125, "learning_rate": 6.81988828041431e-05, "loss": 1.1814, "step": 6917 }, { "epoch": 0.8854473313707922, "grad_norm": 0.640625, "learning_rate": 6.804848761895266e-05, "loss": 1.0471, "step": 6918 }, { "epoch": 0.8855753231793165, "grad_norm": 0.61328125, "learning_rate": 6.789825260475014e-05, "loss": 1.0389, "step": 6919 }, { "epoch": 0.8857033149878408, "grad_norm": 0.59765625, "learning_rate": 6.774817778735598e-05, "loss": 1.0675, "step": 6920 }, { "epoch": 0.885831306796365, "grad_norm": 0.6015625, "learning_rate": 6.759826319256312e-05, "loss": 1.0725, "step": 6921 }, { "epoch": 0.8859592986048893, "grad_norm": 0.64453125, "learning_rate": 6.744850884613673e-05, "loss": 1.4555, "step": 6922 }, { "epoch": 0.8860872904134135, "grad_norm": 0.7265625, "learning_rate": 6.729891477381467e-05, "loss": 1.3694, "step": 6923 }, { "epoch": 0.8862152822219378, "grad_norm": 0.5390625, "learning_rate": 6.714948100130669e-05, "loss": 0.8964, "step": 6924 }, { "epoch": 0.8863432740304621, "grad_norm": 0.7734375, "learning_rate": 6.700020755429558e-05, "loss": 1.2581, "step": 6925 }, { "epoch": 0.8864712658389863, "grad_norm": 0.72265625, "learning_rate": 6.685109445843673e-05, "loss": 2.0216, "step": 6926 }, { "epoch": 0.8865992576475106, "grad_norm": 0.7890625, "learning_rate": 6.670214173935751e-05, "loss": 1.502, "step": 6927 }, { "epoch": 0.8867272494560348, "grad_norm": 0.60546875, "learning_rate": 6.655334942265755e-05, "loss": 1.1758, "step": 6928 }, { "epoch": 0.8868552412645591, "grad_norm": 0.87109375, "learning_rate": 6.640471753390954e-05, "loss": 1.3899, "step": 6929 }, { "epoch": 0.8869832330730834, "grad_norm": 0.6875, "learning_rate": 6.62562460986581e-05, "loss": 1.5035, "step": 6930 }, { "epoch": 0.8871112248816075, "grad_norm": 0.51171875, "learning_rate": 6.610793514242074e-05, "loss": 0.5682, "step": 6931 }, { "epoch": 0.8872392166901318, "grad_norm": 0.69921875, "learning_rate": 6.595978469068675e-05, "loss": 1.6245, "step": 6932 }, { "epoch": 0.8873672084986561, "grad_norm": 0.53125, "learning_rate": 6.58117947689183e-05, "loss": 0.9848, "step": 6933 }, { "epoch": 0.8874952003071803, "grad_norm": 0.62109375, "learning_rate": 6.566396540254993e-05, "loss": 1.093, "step": 6934 }, { "epoch": 0.8876231921157046, "grad_norm": 0.5703125, "learning_rate": 6.551629661698843e-05, "loss": 1.1916, "step": 6935 }, { "epoch": 0.8877511839242288, "grad_norm": 0.55078125, "learning_rate": 6.536878843761319e-05, "loss": 1.0193, "step": 6936 }, { "epoch": 0.8878791757327531, "grad_norm": 0.546875, "learning_rate": 6.522144088977577e-05, "loss": 1.0361, "step": 6937 }, { "epoch": 0.8880071675412774, "grad_norm": 0.62890625, "learning_rate": 6.507425399880041e-05, "loss": 1.2596, "step": 6938 }, { "epoch": 0.8881351593498016, "grad_norm": 0.671875, "learning_rate": 6.492722778998328e-05, "loss": 1.1934, "step": 6939 }, { "epoch": 0.8882631511583259, "grad_norm": 0.6640625, "learning_rate": 6.478036228859363e-05, "loss": 1.657, "step": 6940 }, { "epoch": 0.8883911429668501, "grad_norm": 0.64453125, "learning_rate": 6.463365751987226e-05, "loss": 1.3214, "step": 6941 }, { "epoch": 0.8885191347753744, "grad_norm": 0.55078125, "learning_rate": 6.448711350903291e-05, "loss": 0.8892, "step": 6942 }, { "epoch": 0.8886471265838987, "grad_norm": 0.54296875, "learning_rate": 6.434073028126163e-05, "loss": 0.6864, "step": 6943 }, { "epoch": 0.8887751183924228, "grad_norm": 0.890625, "learning_rate": 6.419450786171666e-05, "loss": 1.9425, "step": 6944 }, { "epoch": 0.8889031102009471, "grad_norm": 0.5, "learning_rate": 6.404844627552885e-05, "loss": 0.8474, "step": 6945 }, { "epoch": 0.8890311020094714, "grad_norm": 0.609375, "learning_rate": 6.390254554780107e-05, "loss": 1.3625, "step": 6946 }, { "epoch": 0.8891590938179956, "grad_norm": 0.6484375, "learning_rate": 6.375680570360876e-05, "loss": 1.1235, "step": 6947 }, { "epoch": 0.8892870856265199, "grad_norm": 0.70703125, "learning_rate": 6.361122676799957e-05, "loss": 1.2492, "step": 6948 }, { "epoch": 0.8894150774350441, "grad_norm": 0.6171875, "learning_rate": 6.346580876599394e-05, "loss": 1.1474, "step": 6949 }, { "epoch": 0.8895430692435684, "grad_norm": 0.87109375, "learning_rate": 6.33205517225841e-05, "loss": 1.3313, "step": 6950 }, { "epoch": 0.8896710610520927, "grad_norm": 0.73046875, "learning_rate": 6.317545566273475e-05, "loss": 1.3677, "step": 6951 }, { "epoch": 0.8897990528606169, "grad_norm": 0.52734375, "learning_rate": 6.303052061138314e-05, "loss": 0.7653, "step": 6952 }, { "epoch": 0.8899270446691412, "grad_norm": 0.7734375, "learning_rate": 6.28857465934387e-05, "loss": 1.5897, "step": 6953 }, { "epoch": 0.8900550364776655, "grad_norm": 0.546875, "learning_rate": 6.27411336337832e-05, "loss": 1.2967, "step": 6954 }, { "epoch": 0.8901830282861897, "grad_norm": 0.73828125, "learning_rate": 6.259668175727063e-05, "loss": 1.4949, "step": 6955 }, { "epoch": 0.890311020094714, "grad_norm": 0.61328125, "learning_rate": 6.245239098872746e-05, "loss": 0.9481, "step": 6956 }, { "epoch": 0.8904390119032382, "grad_norm": 0.71875, "learning_rate": 6.230826135295231e-05, "loss": 1.4716, "step": 6957 }, { "epoch": 0.8905670037117625, "grad_norm": 0.6171875, "learning_rate": 6.216429287471626e-05, "loss": 1.3941, "step": 6958 }, { "epoch": 0.8906949955202867, "grad_norm": 0.7734375, "learning_rate": 6.202048557876272e-05, "loss": 1.6401, "step": 6959 }, { "epoch": 0.8908229873288109, "grad_norm": 0.65234375, "learning_rate": 6.187683948980715e-05, "loss": 1.2172, "step": 6960 }, { "epoch": 0.8909509791373352, "grad_norm": 0.6796875, "learning_rate": 6.17333546325376e-05, "loss": 1.3049, "step": 6961 }, { "epoch": 0.8910789709458594, "grad_norm": 0.578125, "learning_rate": 6.159003103161432e-05, "loss": 0.8279, "step": 6962 }, { "epoch": 0.8912069627543837, "grad_norm": 0.62109375, "learning_rate": 6.144686871166949e-05, "loss": 1.3914, "step": 6963 }, { "epoch": 0.891334954562908, "grad_norm": 0.7734375, "learning_rate": 6.130386769730811e-05, "loss": 1.5065, "step": 6964 }, { "epoch": 0.8914629463714322, "grad_norm": 0.73828125, "learning_rate": 6.11610280131072e-05, "loss": 1.2295, "step": 6965 }, { "epoch": 0.8915909381799565, "grad_norm": 0.89453125, "learning_rate": 6.101834968361608e-05, "loss": 1.2158, "step": 6966 }, { "epoch": 0.8917189299884808, "grad_norm": 0.78125, "learning_rate": 6.0875832733356394e-05, "loss": 1.4647, "step": 6967 }, { "epoch": 0.891846921797005, "grad_norm": 0.57421875, "learning_rate": 6.0733477186821764e-05, "loss": 1.0427, "step": 6968 }, { "epoch": 0.8919749136055293, "grad_norm": 0.640625, "learning_rate": 6.05912830684785e-05, "loss": 1.2329, "step": 6969 }, { "epoch": 0.8921029054140535, "grad_norm": 0.703125, "learning_rate": 6.044925040276483e-05, "loss": 1.4745, "step": 6970 }, { "epoch": 0.8922308972225778, "grad_norm": 0.6875, "learning_rate": 6.0307379214091684e-05, "loss": 1.4897, "step": 6971 }, { "epoch": 0.892358889031102, "grad_norm": 0.58984375, "learning_rate": 6.016566952684155e-05, "loss": 1.0351, "step": 6972 }, { "epoch": 0.8924868808396262, "grad_norm": 0.59375, "learning_rate": 6.002412136536972e-05, "loss": 0.8937, "step": 6973 }, { "epoch": 0.8926148726481505, "grad_norm": 0.62109375, "learning_rate": 5.9882734754003523e-05, "loss": 1.5508, "step": 6974 }, { "epoch": 0.8927428644566747, "grad_norm": 0.6484375, "learning_rate": 5.974150971704251e-05, "loss": 1.0885, "step": 6975 }, { "epoch": 0.892870856265199, "grad_norm": 0.671875, "learning_rate": 5.96004462787586e-05, "loss": 1.5074, "step": 6976 }, { "epoch": 0.8929988480737233, "grad_norm": 0.7421875, "learning_rate": 5.945954446339574e-05, "loss": 1.4553, "step": 6977 }, { "epoch": 0.8931268398822475, "grad_norm": 0.66796875, "learning_rate": 5.9318804295170114e-05, "loss": 1.755, "step": 6978 }, { "epoch": 0.8932548316907718, "grad_norm": 0.6640625, "learning_rate": 5.917822579827037e-05, "loss": 1.1967, "step": 6979 }, { "epoch": 0.8933828234992961, "grad_norm": 0.68359375, "learning_rate": 5.9037808996857064e-05, "loss": 1.1038, "step": 6980 }, { "epoch": 0.8935108153078203, "grad_norm": 0.609375, "learning_rate": 5.889755391506313e-05, "loss": 1.0043, "step": 6981 }, { "epoch": 0.8936388071163446, "grad_norm": 0.96484375, "learning_rate": 5.875746057699383e-05, "loss": 1.336, "step": 6982 }, { "epoch": 0.8937667989248688, "grad_norm": 0.6328125, "learning_rate": 5.861752900672623e-05, "loss": 1.3724, "step": 6983 }, { "epoch": 0.8938947907333931, "grad_norm": 0.7421875, "learning_rate": 5.847775922831011e-05, "loss": 1.2278, "step": 6984 }, { "epoch": 0.8940227825419174, "grad_norm": 0.64453125, "learning_rate": 5.8338151265767134e-05, "loss": 0.889, "step": 6985 }, { "epoch": 0.8941507743504415, "grad_norm": 0.87109375, "learning_rate": 5.819870514309089e-05, "loss": 2.2294, "step": 6986 }, { "epoch": 0.8942787661589658, "grad_norm": 0.6171875, "learning_rate": 5.805942088424776e-05, "loss": 1.078, "step": 6987 }, { "epoch": 0.89440675796749, "grad_norm": 0.76171875, "learning_rate": 5.7920298513175815e-05, "loss": 1.723, "step": 6988 }, { "epoch": 0.8945347497760143, "grad_norm": 0.546875, "learning_rate": 5.7781338053785826e-05, "loss": 0.9356, "step": 6989 }, { "epoch": 0.8946627415845386, "grad_norm": 0.7890625, "learning_rate": 5.76425395299599e-05, "loss": 1.3908, "step": 6990 }, { "epoch": 0.8947907333930628, "grad_norm": 0.69921875, "learning_rate": 5.750390296555319e-05, "loss": 1.3265, "step": 6991 }, { "epoch": 0.8949187252015871, "grad_norm": 0.6953125, "learning_rate": 5.736542838439252e-05, "loss": 1.7221, "step": 6992 }, { "epoch": 0.8950467170101114, "grad_norm": 0.734375, "learning_rate": 5.7227115810276956e-05, "loss": 1.2575, "step": 6993 }, { "epoch": 0.8951747088186356, "grad_norm": 0.671875, "learning_rate": 5.708896526697782e-05, "loss": 1.4329, "step": 6994 }, { "epoch": 0.8953027006271599, "grad_norm": 0.6875, "learning_rate": 5.695097677823846e-05, "loss": 1.0308, "step": 6995 }, { "epoch": 0.8954306924356841, "grad_norm": 0.7265625, "learning_rate": 5.681315036777457e-05, "loss": 1.1795, "step": 6996 }, { "epoch": 0.8955586842442084, "grad_norm": 0.83203125, "learning_rate": 5.667548605927364e-05, "loss": 1.2797, "step": 6997 }, { "epoch": 0.8956866760527327, "grad_norm": 0.63671875, "learning_rate": 5.653798387639586e-05, "loss": 1.1739, "step": 6998 }, { "epoch": 0.8958146678612569, "grad_norm": 0.578125, "learning_rate": 5.640064384277288e-05, "loss": 0.9278, "step": 6999 }, { "epoch": 0.8959426596697811, "grad_norm": 0.76171875, "learning_rate": 5.6263465982008933e-05, "loss": 2.1875, "step": 7000 }, { "epoch": 0.8960706514783053, "grad_norm": 0.56640625, "learning_rate": 5.6126450317680265e-05, "loss": 1.1753, "step": 7001 }, { "epoch": 0.8961986432868296, "grad_norm": 0.56640625, "learning_rate": 5.5989596873335156e-05, "loss": 0.9284, "step": 7002 }, { "epoch": 0.8963266350953539, "grad_norm": 0.73046875, "learning_rate": 5.5852905672494235e-05, "loss": 2.0911, "step": 7003 }, { "epoch": 0.8964546269038781, "grad_norm": 0.58984375, "learning_rate": 5.5716376738649933e-05, "loss": 1.0411, "step": 7004 }, { "epoch": 0.8965826187124024, "grad_norm": 0.703125, "learning_rate": 5.5580010095267145e-05, "loss": 1.6312, "step": 7005 }, { "epoch": 0.8967106105209267, "grad_norm": 0.6875, "learning_rate": 5.5443805765782565e-05, "loss": 1.6877, "step": 7006 }, { "epoch": 0.8968386023294509, "grad_norm": 0.75390625, "learning_rate": 5.530776377360536e-05, "loss": 1.4825, "step": 7007 }, { "epoch": 0.8969665941379752, "grad_norm": 0.58984375, "learning_rate": 5.517188414211616e-05, "loss": 1.0574, "step": 7008 }, { "epoch": 0.8970945859464994, "grad_norm": 0.6953125, "learning_rate": 5.5036166894668285e-05, "loss": 1.708, "step": 7009 }, { "epoch": 0.8972225777550237, "grad_norm": 0.57421875, "learning_rate": 5.4900612054586964e-05, "loss": 0.6501, "step": 7010 }, { "epoch": 0.897350569563548, "grad_norm": 0.59765625, "learning_rate": 5.476521964516956e-05, "loss": 0.7071, "step": 7011 }, { "epoch": 0.8974785613720722, "grad_norm": 0.73828125, "learning_rate": 5.462998968968525e-05, "loss": 1.2115, "step": 7012 }, { "epoch": 0.8976065531805965, "grad_norm": 0.71484375, "learning_rate": 5.449492221137564e-05, "loss": 1.5498, "step": 7013 }, { "epoch": 0.8977345449891206, "grad_norm": 0.69921875, "learning_rate": 5.4360017233454296e-05, "loss": 1.2326, "step": 7014 }, { "epoch": 0.8978625367976449, "grad_norm": 0.69921875, "learning_rate": 5.422527477910666e-05, "loss": 2.2636, "step": 7015 }, { "epoch": 0.8979905286061692, "grad_norm": 0.80859375, "learning_rate": 5.409069487149054e-05, "loss": 1.2158, "step": 7016 }, { "epoch": 0.8981185204146934, "grad_norm": 0.65625, "learning_rate": 5.395627753373567e-05, "loss": 0.8902, "step": 7017 }, { "epoch": 0.8982465122232177, "grad_norm": 0.5859375, "learning_rate": 5.382202278894388e-05, "loss": 0.8741, "step": 7018 }, { "epoch": 0.898374504031742, "grad_norm": 0.69140625, "learning_rate": 5.368793066018896e-05, "loss": 1.3564, "step": 7019 }, { "epoch": 0.8985024958402662, "grad_norm": 1.1484375, "learning_rate": 5.35540011705169e-05, "loss": 1.0164, "step": 7020 }, { "epoch": 0.8986304876487905, "grad_norm": 0.71875, "learning_rate": 5.3420234342945514e-05, "loss": 1.6733, "step": 7021 }, { "epoch": 0.8987584794573147, "grad_norm": 0.63671875, "learning_rate": 5.328663020046487e-05, "loss": 1.175, "step": 7022 }, { "epoch": 0.898886471265839, "grad_norm": 0.54296875, "learning_rate": 5.315318876603714e-05, "loss": 0.989, "step": 7023 }, { "epoch": 0.8990144630743633, "grad_norm": 0.61328125, "learning_rate": 5.30199100625961e-05, "loss": 1.0515, "step": 7024 }, { "epoch": 0.8991424548828875, "grad_norm": 0.76171875, "learning_rate": 5.2886794113048195e-05, "loss": 1.4224, "step": 7025 }, { "epoch": 0.8992704466914118, "grad_norm": 0.63671875, "learning_rate": 5.275384094027125e-05, "loss": 1.1901, "step": 7026 }, { "epoch": 0.8993984384999361, "grad_norm": 0.62109375, "learning_rate": 5.2621050567115744e-05, "loss": 1.2595, "step": 7027 }, { "epoch": 0.8995264303084602, "grad_norm": 0.57421875, "learning_rate": 5.248842301640355e-05, "loss": 0.7152, "step": 7028 }, { "epoch": 0.8996544221169845, "grad_norm": 0.69921875, "learning_rate": 5.235595831092921e-05, "loss": 2.0291, "step": 7029 }, { "epoch": 0.8997824139255087, "grad_norm": 0.78125, "learning_rate": 5.222365647345861e-05, "loss": 1.2634, "step": 7030 }, { "epoch": 0.899910405734033, "grad_norm": 0.7890625, "learning_rate": 5.209151752673003e-05, "loss": 1.2086, "step": 7031 }, { "epoch": 0.9000383975425573, "grad_norm": 0.486328125, "learning_rate": 5.195954149345383e-05, "loss": 0.8486, "step": 7032 }, { "epoch": 0.9001663893510815, "grad_norm": 0.6328125, "learning_rate": 5.182772839631222e-05, "loss": 1.3288, "step": 7033 }, { "epoch": 0.9002943811596058, "grad_norm": 0.6328125, "learning_rate": 5.169607825795919e-05, "loss": 1.0289, "step": 7034 }, { "epoch": 0.90042237296813, "grad_norm": 0.85546875, "learning_rate": 5.156459110102119e-05, "loss": 1.2715, "step": 7035 }, { "epoch": 0.9005503647766543, "grad_norm": 0.462890625, "learning_rate": 5.143326694809636e-05, "loss": 0.5315, "step": 7036 }, { "epoch": 0.9006783565851786, "grad_norm": 0.78125, "learning_rate": 5.1302105821754765e-05, "loss": 1.9082, "step": 7037 }, { "epoch": 0.9008063483937028, "grad_norm": 0.482421875, "learning_rate": 5.117110774453881e-05, "loss": 0.7989, "step": 7038 }, { "epoch": 0.9009343402022271, "grad_norm": 0.578125, "learning_rate": 5.104027273896239e-05, "loss": 0.9675, "step": 7039 }, { "epoch": 0.9010623320107514, "grad_norm": 0.77734375, "learning_rate": 5.090960082751184e-05, "loss": 1.6182, "step": 7040 }, { "epoch": 0.9011903238192756, "grad_norm": 0.6484375, "learning_rate": 5.077909203264508e-05, "loss": 1.2184, "step": 7041 }, { "epoch": 0.9013183156277998, "grad_norm": 0.6640625, "learning_rate": 5.0648746376792396e-05, "loss": 1.8113, "step": 7042 }, { "epoch": 0.901446307436324, "grad_norm": 0.6484375, "learning_rate": 5.051856388235543e-05, "loss": 1.2986, "step": 7043 }, { "epoch": 0.9015742992448483, "grad_norm": 0.671875, "learning_rate": 5.0388544571708494e-05, "loss": 1.4754, "step": 7044 }, { "epoch": 0.9017022910533726, "grad_norm": 0.66796875, "learning_rate": 5.025868846719728e-05, "loss": 1.117, "step": 7045 }, { "epoch": 0.9018302828618968, "grad_norm": 0.77734375, "learning_rate": 5.012899559114004e-05, "loss": 2.2323, "step": 7046 }, { "epoch": 0.9019582746704211, "grad_norm": 0.83203125, "learning_rate": 4.999946596582594e-05, "loss": 1.4446, "step": 7047 }, { "epoch": 0.9020862664789453, "grad_norm": 0.58203125, "learning_rate": 4.987009961351729e-05, "loss": 1.1018, "step": 7048 }, { "epoch": 0.9022142582874696, "grad_norm": 0.77734375, "learning_rate": 4.974089655644776e-05, "loss": 1.6634, "step": 7049 }, { "epoch": 0.9023422500959939, "grad_norm": 0.78515625, "learning_rate": 4.961185681682279e-05, "loss": 2.1095, "step": 7050 }, { "epoch": 0.9024702419045181, "grad_norm": 0.52734375, "learning_rate": 4.94829804168202e-05, "loss": 0.5936, "step": 7051 }, { "epoch": 0.9025982337130424, "grad_norm": 1.0078125, "learning_rate": 4.935426737858917e-05, "loss": 2.0331, "step": 7052 }, { "epoch": 0.9027262255215667, "grad_norm": 0.58984375, "learning_rate": 4.9225717724251326e-05, "loss": 0.9377, "step": 7053 }, { "epoch": 0.9028542173300909, "grad_norm": 0.75, "learning_rate": 4.9097331475900006e-05, "loss": 1.5176, "step": 7054 }, { "epoch": 0.9029822091386152, "grad_norm": 0.62109375, "learning_rate": 4.8969108655600445e-05, "loss": 1.2552, "step": 7055 }, { "epoch": 0.9031102009471393, "grad_norm": 0.63671875, "learning_rate": 4.884104928538979e-05, "loss": 0.9139, "step": 7056 }, { "epoch": 0.9032381927556636, "grad_norm": 0.578125, "learning_rate": 4.871315338727711e-05, "loss": 1.2179, "step": 7057 }, { "epoch": 0.9033661845641879, "grad_norm": 0.71484375, "learning_rate": 4.858542098324348e-05, "loss": 1.3109, "step": 7058 }, { "epoch": 0.9034941763727121, "grad_norm": 0.69140625, "learning_rate": 4.845785209524156e-05, "loss": 1.3907, "step": 7059 }, { "epoch": 0.9036221681812364, "grad_norm": 0.54296875, "learning_rate": 4.8330446745196486e-05, "loss": 1.2605, "step": 7060 }, { "epoch": 0.9037501599897606, "grad_norm": 0.6953125, "learning_rate": 4.8203204955004634e-05, "loss": 1.7747, "step": 7061 }, { "epoch": 0.9038781517982849, "grad_norm": 0.578125, "learning_rate": 4.807612674653472e-05, "loss": 1.0932, "step": 7062 }, { "epoch": 0.9040061436068092, "grad_norm": 0.7421875, "learning_rate": 4.794921214162717e-05, "loss": 1.9243, "step": 7063 }, { "epoch": 0.9041341354153334, "grad_norm": 0.9921875, "learning_rate": 4.782246116209443e-05, "loss": 1.9346, "step": 7064 }, { "epoch": 0.9042621272238577, "grad_norm": 0.71484375, "learning_rate": 4.7695873829720405e-05, "loss": 1.2539, "step": 7065 }, { "epoch": 0.904390119032382, "grad_norm": 0.609375, "learning_rate": 4.7569450166261355e-05, "loss": 0.9817, "step": 7066 }, { "epoch": 0.9045181108409062, "grad_norm": 0.62890625, "learning_rate": 4.744319019344534e-05, "loss": 1.3898, "step": 7067 }, { "epoch": 0.9046461026494305, "grad_norm": 0.70703125, "learning_rate": 4.731709393297212e-05, "loss": 1.4314, "step": 7068 }, { "epoch": 0.9047740944579546, "grad_norm": 0.68359375, "learning_rate": 4.719116140651336e-05, "loss": 1.1836, "step": 7069 }, { "epoch": 0.9049020862664789, "grad_norm": 0.65234375, "learning_rate": 4.706539263571241e-05, "loss": 1.3145, "step": 7070 }, { "epoch": 0.9050300780750032, "grad_norm": 0.61328125, "learning_rate": 4.693978764218499e-05, "loss": 1.1618, "step": 7071 }, { "epoch": 0.9051580698835274, "grad_norm": 0.5078125, "learning_rate": 4.681434644751825e-05, "loss": 0.8274, "step": 7072 }, { "epoch": 0.9052860616920517, "grad_norm": 0.625, "learning_rate": 4.6689069073271415e-05, "loss": 1.8399, "step": 7073 }, { "epoch": 0.9054140535005759, "grad_norm": 0.7109375, "learning_rate": 4.656395554097514e-05, "loss": 1.227, "step": 7074 }, { "epoch": 0.9055420453091002, "grad_norm": 0.5625, "learning_rate": 4.643900587213246e-05, "loss": 1.4569, "step": 7075 }, { "epoch": 0.9056700371176245, "grad_norm": 0.65234375, "learning_rate": 4.631422008821784e-05, "loss": 1.3689, "step": 7076 }, { "epoch": 0.9057980289261487, "grad_norm": 0.6484375, "learning_rate": 4.618959821067792e-05, "loss": 1.3101, "step": 7077 }, { "epoch": 0.905926020734673, "grad_norm": 0.59765625, "learning_rate": 4.606514026093078e-05, "loss": 1.1557, "step": 7078 }, { "epoch": 0.9060540125431973, "grad_norm": 0.75390625, "learning_rate": 4.5940846260366766e-05, "loss": 1.8513, "step": 7079 }, { "epoch": 0.9061820043517215, "grad_norm": 0.55859375, "learning_rate": 4.5816716230347556e-05, "loss": 1.1859, "step": 7080 }, { "epoch": 0.9063099961602458, "grad_norm": 0.8671875, "learning_rate": 4.569275019220709e-05, "loss": 1.654, "step": 7081 }, { "epoch": 0.90643798796877, "grad_norm": 0.56640625, "learning_rate": 4.556894816725099e-05, "loss": 0.7372, "step": 7082 }, { "epoch": 0.9065659797772943, "grad_norm": 0.68359375, "learning_rate": 4.544531017675646e-05, "loss": 1.0335, "step": 7083 }, { "epoch": 0.9066939715858185, "grad_norm": 0.61328125, "learning_rate": 4.532183624197283e-05, "loss": 1.2064, "step": 7084 }, { "epoch": 0.9068219633943427, "grad_norm": 0.5546875, "learning_rate": 4.519852638412103e-05, "loss": 1.0515, "step": 7085 }, { "epoch": 0.906949955202867, "grad_norm": 0.69921875, "learning_rate": 4.507538062439409e-05, "loss": 1.5891, "step": 7086 }, { "epoch": 0.9070779470113912, "grad_norm": 0.73828125, "learning_rate": 4.49523989839562e-05, "loss": 1.8384, "step": 7087 }, { "epoch": 0.9072059388199155, "grad_norm": 0.6953125, "learning_rate": 4.4829581483944004e-05, "loss": 1.6506, "step": 7088 }, { "epoch": 0.9073339306284398, "grad_norm": 0.69140625, "learning_rate": 4.470692814546573e-05, "loss": 1.0895, "step": 7089 }, { "epoch": 0.907461922436964, "grad_norm": 0.62890625, "learning_rate": 4.4584438989601186e-05, "loss": 1.0166, "step": 7090 }, { "epoch": 0.9075899142454883, "grad_norm": 0.67578125, "learning_rate": 4.44621140374023e-05, "loss": 1.4948, "step": 7091 }, { "epoch": 0.9077179060540126, "grad_norm": 0.71875, "learning_rate": 4.433995330989216e-05, "loss": 1.5399, "step": 7092 }, { "epoch": 0.9078458978625368, "grad_norm": 0.6953125, "learning_rate": 4.421795682806662e-05, "loss": 1.7591, "step": 7093 }, { "epoch": 0.9079738896710611, "grad_norm": 0.53125, "learning_rate": 4.409612461289247e-05, "loss": 1.1147, "step": 7094 }, { "epoch": 0.9081018814795853, "grad_norm": 0.6171875, "learning_rate": 4.397445668530886e-05, "loss": 1.1263, "step": 7095 }, { "epoch": 0.9082298732881096, "grad_norm": 0.6015625, "learning_rate": 4.385295306622594e-05, "loss": 0.877, "step": 7096 }, { "epoch": 0.9083578650966339, "grad_norm": 0.78515625, "learning_rate": 4.373161377652624e-05, "loss": 0.9216, "step": 7097 }, { "epoch": 0.908485856905158, "grad_norm": 0.6640625, "learning_rate": 4.3610438837064056e-05, "loss": 1.7136, "step": 7098 }, { "epoch": 0.9086138487136823, "grad_norm": 0.5859375, "learning_rate": 4.34894282686652e-05, "loss": 1.0171, "step": 7099 }, { "epoch": 0.9087418405222066, "grad_norm": 0.7265625, "learning_rate": 4.3368582092127126e-05, "loss": 1.5052, "step": 7100 }, { "epoch": 0.9088698323307308, "grad_norm": 0.6484375, "learning_rate": 4.324790032821935e-05, "loss": 1.5504, "step": 7101 }, { "epoch": 0.9089978241392551, "grad_norm": 0.7265625, "learning_rate": 4.312738299768293e-05, "loss": 1.3666, "step": 7102 }, { "epoch": 0.9091258159477793, "grad_norm": 0.703125, "learning_rate": 4.300703012123075e-05, "loss": 1.3106, "step": 7103 }, { "epoch": 0.9092538077563036, "grad_norm": 0.765625, "learning_rate": 4.2886841719547485e-05, "loss": 1.3246, "step": 7104 }, { "epoch": 0.9093817995648279, "grad_norm": 0.59375, "learning_rate": 4.2766817813289396e-05, "loss": 1.3433, "step": 7105 }, { "epoch": 0.9095097913733521, "grad_norm": 0.63671875, "learning_rate": 4.2646958423084526e-05, "loss": 1.127, "step": 7106 }, { "epoch": 0.9096377831818764, "grad_norm": 0.66015625, "learning_rate": 4.252726356953274e-05, "loss": 1.4461, "step": 7107 }, { "epoch": 0.9097657749904006, "grad_norm": 1.1171875, "learning_rate": 4.240773327320557e-05, "loss": 1.9467, "step": 7108 }, { "epoch": 0.9098937667989249, "grad_norm": 0.58203125, "learning_rate": 4.228836755464593e-05, "loss": 0.6659, "step": 7109 }, { "epoch": 0.9100217586074492, "grad_norm": 0.60546875, "learning_rate": 4.2169166434369076e-05, "loss": 1.1636, "step": 7110 }, { "epoch": 0.9101497504159733, "grad_norm": 0.52734375, "learning_rate": 4.205012993286139e-05, "loss": 0.9667, "step": 7111 }, { "epoch": 0.9102777422244976, "grad_norm": 0.6328125, "learning_rate": 4.1931258070581515e-05, "loss": 1.0606, "step": 7112 }, { "epoch": 0.9104057340330219, "grad_norm": 0.6796875, "learning_rate": 4.181255086795954e-05, "loss": 1.3421, "step": 7113 }, { "epoch": 0.9105337258415461, "grad_norm": 0.65625, "learning_rate": 4.169400834539683e-05, "loss": 1.3568, "step": 7114 }, { "epoch": 0.9106617176500704, "grad_norm": 0.72265625, "learning_rate": 4.157563052326696e-05, "loss": 1.5347, "step": 7115 }, { "epoch": 0.9107897094585946, "grad_norm": 0.77734375, "learning_rate": 4.145741742191533e-05, "loss": 1.4018, "step": 7116 }, { "epoch": 0.9109177012671189, "grad_norm": 0.78515625, "learning_rate": 4.1339369061658694e-05, "loss": 1.422, "step": 7117 }, { "epoch": 0.9110456930756432, "grad_norm": 0.62890625, "learning_rate": 4.122148546278548e-05, "loss": 1.0924, "step": 7118 }, { "epoch": 0.9111736848841674, "grad_norm": 0.640625, "learning_rate": 4.1103766645555927e-05, "loss": 1.0768, "step": 7119 }, { "epoch": 0.9113016766926917, "grad_norm": 0.60546875, "learning_rate": 4.098621263020197e-05, "loss": 1.5358, "step": 7120 }, { "epoch": 0.9114296685012159, "grad_norm": 0.59765625, "learning_rate": 4.0868823436927106e-05, "loss": 0.8316, "step": 7121 }, { "epoch": 0.9115576603097402, "grad_norm": 0.6484375, "learning_rate": 4.0751599085906646e-05, "loss": 1.0937, "step": 7122 }, { "epoch": 0.9116856521182645, "grad_norm": 0.5546875, "learning_rate": 4.063453959728747e-05, "loss": 1.0212, "step": 7123 }, { "epoch": 0.9118136439267887, "grad_norm": 0.5625, "learning_rate": 4.051764499118804e-05, "loss": 1.0111, "step": 7124 }, { "epoch": 0.911941635735313, "grad_norm": 0.66796875, "learning_rate": 4.0400915287698734e-05, "loss": 1.2817, "step": 7125 }, { "epoch": 0.9120696275438372, "grad_norm": 0.6015625, "learning_rate": 4.0284350506881395e-05, "loss": 1.0092, "step": 7126 }, { "epoch": 0.9121976193523614, "grad_norm": 0.625, "learning_rate": 4.0167950668769546e-05, "loss": 1.295, "step": 7127 }, { "epoch": 0.9123256111608857, "grad_norm": 0.78515625, "learning_rate": 4.005171579336853e-05, "loss": 0.9912, "step": 7128 }, { "epoch": 0.9124536029694099, "grad_norm": 0.70703125, "learning_rate": 3.9935645900654904e-05, "loss": 1.0945, "step": 7129 }, { "epoch": 0.9125815947779342, "grad_norm": 1.203125, "learning_rate": 3.981974101057761e-05, "loss": 1.066, "step": 7130 }, { "epoch": 0.9127095865864585, "grad_norm": 0.640625, "learning_rate": 3.9704001143056276e-05, "loss": 1.3389, "step": 7131 }, { "epoch": 0.9128375783949827, "grad_norm": 0.67578125, "learning_rate": 3.9588426317982985e-05, "loss": 1.5236, "step": 7132 }, { "epoch": 0.912965570203507, "grad_norm": 0.5703125, "learning_rate": 3.947301655522106e-05, "loss": 0.9985, "step": 7133 }, { "epoch": 0.9130935620120312, "grad_norm": 0.7890625, "learning_rate": 3.935777187460565e-05, "loss": 1.6553, "step": 7134 }, { "epoch": 0.9132215538205555, "grad_norm": 0.7734375, "learning_rate": 3.924269229594335e-05, "loss": 1.2191, "step": 7135 }, { "epoch": 0.9133495456290798, "grad_norm": 0.62890625, "learning_rate": 3.912777783901234e-05, "loss": 1.0344, "step": 7136 }, { "epoch": 0.913477537437604, "grad_norm": 0.6484375, "learning_rate": 3.901302852356259e-05, "loss": 1.1649, "step": 7137 }, { "epoch": 0.9136055292461283, "grad_norm": 0.57421875, "learning_rate": 3.8898444369315554e-05, "loss": 0.9198, "step": 7138 }, { "epoch": 0.9137335210546526, "grad_norm": 0.828125, "learning_rate": 3.878402539596482e-05, "loss": 1.6909, "step": 7139 }, { "epoch": 0.9138615128631767, "grad_norm": 0.91015625, "learning_rate": 3.866977162317464e-05, "loss": 1.7527, "step": 7140 }, { "epoch": 0.913989504671701, "grad_norm": 0.490234375, "learning_rate": 3.855568307058155e-05, "loss": 0.8144, "step": 7141 }, { "epoch": 0.9141174964802252, "grad_norm": 0.640625, "learning_rate": 3.8441759757793516e-05, "loss": 1.291, "step": 7142 }, { "epoch": 0.9142454882887495, "grad_norm": 0.62109375, "learning_rate": 3.8328001704390104e-05, "loss": 1.0755, "step": 7143 }, { "epoch": 0.9143734800972738, "grad_norm": 0.6796875, "learning_rate": 3.821440892992267e-05, "loss": 1.5769, "step": 7144 }, { "epoch": 0.914501471905798, "grad_norm": 0.63671875, "learning_rate": 3.8100981453913716e-05, "loss": 1.4656, "step": 7145 }, { "epoch": 0.9146294637143223, "grad_norm": 0.66796875, "learning_rate": 3.798771929585754e-05, "loss": 0.9973, "step": 7146 }, { "epoch": 0.9147574555228465, "grad_norm": 0.6640625, "learning_rate": 3.787462247522033e-05, "loss": 1.833, "step": 7147 }, { "epoch": 0.9148854473313708, "grad_norm": 0.78515625, "learning_rate": 3.776169101143945e-05, "loss": 1.9395, "step": 7148 }, { "epoch": 0.9150134391398951, "grad_norm": 0.58984375, "learning_rate": 3.7648924923924135e-05, "loss": 1.0631, "step": 7149 }, { "epoch": 0.9151414309484193, "grad_norm": 0.53125, "learning_rate": 3.753632423205489e-05, "loss": 0.8048, "step": 7150 }, { "epoch": 0.9152694227569436, "grad_norm": 0.6484375, "learning_rate": 3.742388895518411e-05, "loss": 0.9815, "step": 7151 }, { "epoch": 0.9153974145654679, "grad_norm": 0.58984375, "learning_rate": 3.7311619112635584e-05, "loss": 0.9152, "step": 7152 }, { "epoch": 0.915525406373992, "grad_norm": 0.58984375, "learning_rate": 3.7199514723704754e-05, "loss": 1.2145, "step": 7153 }, { "epoch": 0.9156533981825163, "grad_norm": 0.6328125, "learning_rate": 3.708757580765842e-05, "loss": 0.8519, "step": 7154 }, { "epoch": 0.9157813899910405, "grad_norm": 0.62890625, "learning_rate": 3.6975802383735325e-05, "loss": 0.9585, "step": 7155 }, { "epoch": 0.9159093817995648, "grad_norm": 0.60546875, "learning_rate": 3.686419447114531e-05, "loss": 1.2383, "step": 7156 }, { "epoch": 0.9160373736080891, "grad_norm": 0.63671875, "learning_rate": 3.675275208907025e-05, "loss": 1.0011, "step": 7157 }, { "epoch": 0.9161653654166133, "grad_norm": 0.62890625, "learning_rate": 3.664147525666306e-05, "loss": 1.2651, "step": 7158 }, { "epoch": 0.9162933572251376, "grad_norm": 0.61328125, "learning_rate": 3.653036399304865e-05, "loss": 1.1003, "step": 7159 }, { "epoch": 0.9164213490336618, "grad_norm": 0.61328125, "learning_rate": 3.64194183173232e-05, "loss": 1.2451, "step": 7160 }, { "epoch": 0.9165493408421861, "grad_norm": 0.57421875, "learning_rate": 3.630863824855468e-05, "loss": 1.0277, "step": 7161 }, { "epoch": 0.9166773326507104, "grad_norm": 0.79296875, "learning_rate": 3.61980238057823e-05, "loss": 1.8214, "step": 7162 }, { "epoch": 0.9168053244592346, "grad_norm": 0.703125, "learning_rate": 3.608757500801696e-05, "loss": 1.7388, "step": 7163 }, { "epoch": 0.9169333162677589, "grad_norm": 0.65625, "learning_rate": 3.5977291874241035e-05, "loss": 1.2028, "step": 7164 }, { "epoch": 0.9170613080762832, "grad_norm": 0.69921875, "learning_rate": 3.58671744234087e-05, "loss": 0.8566, "step": 7165 }, { "epoch": 0.9171892998848074, "grad_norm": 0.7109375, "learning_rate": 3.575722267444526e-05, "loss": 1.4664, "step": 7166 }, { "epoch": 0.9173172916933316, "grad_norm": 0.671875, "learning_rate": 3.564743664624759e-05, "loss": 1.3674, "step": 7167 }, { "epoch": 0.9174452835018558, "grad_norm": 0.671875, "learning_rate": 3.553781635768438e-05, "loss": 1.0362, "step": 7168 }, { "epoch": 0.9175732753103801, "grad_norm": 0.77734375, "learning_rate": 3.542836182759546e-05, "loss": 1.8225, "step": 7169 }, { "epoch": 0.9177012671189044, "grad_norm": 0.90234375, "learning_rate": 3.531907307479265e-05, "loss": 2.0865, "step": 7170 }, { "epoch": 0.9178292589274286, "grad_norm": 0.59375, "learning_rate": 3.520995011805883e-05, "loss": 1.1705, "step": 7171 }, { "epoch": 0.9179572507359529, "grad_norm": 0.58984375, "learning_rate": 3.5100992976148436e-05, "loss": 1.2688, "step": 7172 }, { "epoch": 0.9180852425444771, "grad_norm": 0.625, "learning_rate": 3.499220166778783e-05, "loss": 1.2349, "step": 7173 }, { "epoch": 0.9182132343530014, "grad_norm": 0.75390625, "learning_rate": 3.488357621167426e-05, "loss": 2.2854, "step": 7174 }, { "epoch": 0.9183412261615257, "grad_norm": 0.54296875, "learning_rate": 3.4775116626477036e-05, "loss": 0.7769, "step": 7175 }, { "epoch": 0.9184692179700499, "grad_norm": 0.5390625, "learning_rate": 3.466682293083645e-05, "loss": 1.1432, "step": 7176 }, { "epoch": 0.9185972097785742, "grad_norm": 0.494140625, "learning_rate": 3.4558695143364736e-05, "loss": 0.6112, "step": 7177 }, { "epoch": 0.9187252015870985, "grad_norm": 0.8828125, "learning_rate": 3.4450733282645254e-05, "loss": 2.0639, "step": 7178 }, { "epoch": 0.9188531933956227, "grad_norm": 0.65234375, "learning_rate": 3.4342937367233154e-05, "loss": 1.1411, "step": 7179 }, { "epoch": 0.918981185204147, "grad_norm": 0.63671875, "learning_rate": 3.423530741565461e-05, "loss": 1.2709, "step": 7180 }, { "epoch": 0.9191091770126711, "grad_norm": 0.7421875, "learning_rate": 3.412784344640796e-05, "loss": 1.353, "step": 7181 }, { "epoch": 0.9192371688211954, "grad_norm": 0.7109375, "learning_rate": 3.4020545477962403e-05, "loss": 1.7763, "step": 7182 }, { "epoch": 0.9193651606297197, "grad_norm": 0.66015625, "learning_rate": 3.3913413528758876e-05, "loss": 0.9806, "step": 7183 }, { "epoch": 0.9194931524382439, "grad_norm": 0.69921875, "learning_rate": 3.3806447617209636e-05, "loss": 1.5415, "step": 7184 }, { "epoch": 0.9196211442467682, "grad_norm": 0.69921875, "learning_rate": 3.369964776169876e-05, "loss": 1.8714, "step": 7185 }, { "epoch": 0.9197491360552925, "grad_norm": 0.7734375, "learning_rate": 3.359301398058146e-05, "loss": 1.3952, "step": 7186 }, { "epoch": 0.9198771278638167, "grad_norm": 0.5390625, "learning_rate": 3.3486546292184284e-05, "loss": 0.7393, "step": 7187 }, { "epoch": 0.920005119672341, "grad_norm": 0.7890625, "learning_rate": 3.338024471480583e-05, "loss": 1.7557, "step": 7188 }, { "epoch": 0.9201331114808652, "grad_norm": 0.6640625, "learning_rate": 3.327410926671526e-05, "loss": 1.5267, "step": 7189 }, { "epoch": 0.9202611032893895, "grad_norm": 0.6484375, "learning_rate": 3.3168139966153974e-05, "loss": 1.113, "step": 7190 }, { "epoch": 0.9203890950979138, "grad_norm": 0.70703125, "learning_rate": 3.306233683133441e-05, "loss": 1.3718, "step": 7191 }, { "epoch": 0.920517086906438, "grad_norm": 0.7578125, "learning_rate": 3.295669988044059e-05, "loss": 1.4191, "step": 7192 }, { "epoch": 0.9206450787149623, "grad_norm": 0.71875, "learning_rate": 3.285122913162797e-05, "loss": 1.4476, "step": 7193 }, { "epoch": 0.9207730705234864, "grad_norm": 0.53125, "learning_rate": 3.274592460302339e-05, "loss": 0.8771, "step": 7194 }, { "epoch": 0.9209010623320107, "grad_norm": 0.609375, "learning_rate": 3.264078631272505e-05, "loss": 1.2699, "step": 7195 }, { "epoch": 0.921029054140535, "grad_norm": 0.77734375, "learning_rate": 3.253581427880281e-05, "loss": 1.4665, "step": 7196 }, { "epoch": 0.9211570459490592, "grad_norm": 0.76171875, "learning_rate": 3.243100851929792e-05, "loss": 1.8622, "step": 7197 }, { "epoch": 0.9212850377575835, "grad_norm": 0.8125, "learning_rate": 3.2326369052222635e-05, "loss": 1.002, "step": 7198 }, { "epoch": 0.9214130295661078, "grad_norm": 0.71484375, "learning_rate": 3.2221895895561013e-05, "loss": 1.6169, "step": 7199 }, { "epoch": 0.921541021374632, "grad_norm": 0.734375, "learning_rate": 3.2117589067268696e-05, "loss": 1.7447, "step": 7200 }, { "epoch": 0.9216690131831563, "grad_norm": 0.6171875, "learning_rate": 3.201344858527233e-05, "loss": 1.1782, "step": 7201 }, { "epoch": 0.9217970049916805, "grad_norm": 0.6875, "learning_rate": 3.190947446747017e-05, "loss": 1.745, "step": 7202 }, { "epoch": 0.9219249968002048, "grad_norm": 0.6015625, "learning_rate": 3.18056667317318e-05, "loss": 1.0928, "step": 7203 }, { "epoch": 0.9220529886087291, "grad_norm": 0.63671875, "learning_rate": 3.170202539589839e-05, "loss": 1.2122, "step": 7204 }, { "epoch": 0.9221809804172533, "grad_norm": 0.61328125, "learning_rate": 3.159855047778226e-05, "loss": 1.0202, "step": 7205 }, { "epoch": 0.9223089722257776, "grad_norm": 0.65234375, "learning_rate": 3.149524199516729e-05, "loss": 1.2713, "step": 7206 }, { "epoch": 0.9224369640343018, "grad_norm": 0.7265625, "learning_rate": 3.1392099965808716e-05, "loss": 1.431, "step": 7207 }, { "epoch": 0.922564955842826, "grad_norm": 0.84765625, "learning_rate": 3.128912440743325e-05, "loss": 1.7244, "step": 7208 }, { "epoch": 0.9226929476513503, "grad_norm": 0.5703125, "learning_rate": 3.118631533773886e-05, "loss": 1.4054, "step": 7209 }, { "epoch": 0.9228209394598745, "grad_norm": 0.6015625, "learning_rate": 3.108367277439505e-05, "loss": 1.1894, "step": 7210 }, { "epoch": 0.9229489312683988, "grad_norm": 0.80859375, "learning_rate": 3.098119673504241e-05, "loss": 2.2067, "step": 7211 }, { "epoch": 0.9230769230769231, "grad_norm": 0.482421875, "learning_rate": 3.0878887237293154e-05, "loss": 0.6942, "step": 7212 }, { "epoch": 0.9232049148854473, "grad_norm": 0.62109375, "learning_rate": 3.0776744298730895e-05, "loss": 1.5675, "step": 7213 }, { "epoch": 0.9233329066939716, "grad_norm": 0.70703125, "learning_rate": 3.06747679369106e-05, "loss": 1.175, "step": 7214 }, { "epoch": 0.9234608985024958, "grad_norm": 0.69140625, "learning_rate": 3.0572958169358456e-05, "loss": 1.5389, "step": 7215 }, { "epoch": 0.9235888903110201, "grad_norm": 0.69140625, "learning_rate": 3.0471315013572143e-05, "loss": 1.9574, "step": 7216 }, { "epoch": 0.9237168821195444, "grad_norm": 0.57421875, "learning_rate": 3.0369838487020796e-05, "loss": 0.9763, "step": 7217 }, { "epoch": 0.9238448739280686, "grad_norm": 0.451171875, "learning_rate": 3.0268528607144685e-05, "loss": 0.6109, "step": 7218 }, { "epoch": 0.9239728657365929, "grad_norm": 0.76953125, "learning_rate": 3.016738539135566e-05, "loss": 1.5177, "step": 7219 }, { "epoch": 0.9241008575451171, "grad_norm": 0.52734375, "learning_rate": 3.00664088570366e-05, "loss": 1.0007, "step": 7220 }, { "epoch": 0.9242288493536414, "grad_norm": 0.9296875, "learning_rate": 2.9965599021542278e-05, "loss": 1.9076, "step": 7221 }, { "epoch": 0.9243568411621657, "grad_norm": 0.68359375, "learning_rate": 2.9864955902198177e-05, "loss": 1.1823, "step": 7222 }, { "epoch": 0.9244848329706898, "grad_norm": 0.5390625, "learning_rate": 2.976447951630179e-05, "loss": 0.9145, "step": 7223 }, { "epoch": 0.9246128247792141, "grad_norm": 0.6875, "learning_rate": 2.9664169881121305e-05, "loss": 0.8746, "step": 7224 }, { "epoch": 0.9247408165877384, "grad_norm": 0.4921875, "learning_rate": 2.956402701389671e-05, "loss": 0.8362, "step": 7225 }, { "epoch": 0.9248688083962626, "grad_norm": 0.78125, "learning_rate": 2.9464050931839236e-05, "loss": 1.098, "step": 7226 }, { "epoch": 0.9249968002047869, "grad_norm": 0.6171875, "learning_rate": 2.936424165213125e-05, "loss": 1.0328, "step": 7227 }, { "epoch": 0.9251247920133111, "grad_norm": 0.796875, "learning_rate": 2.9264599191926588e-05, "loss": 1.4256, "step": 7228 }, { "epoch": 0.9252527838218354, "grad_norm": 0.63671875, "learning_rate": 2.916512356835066e-05, "loss": 0.73, "step": 7229 }, { "epoch": 0.9253807756303597, "grad_norm": 0.54296875, "learning_rate": 2.9065814798499678e-05, "loss": 1.2118, "step": 7230 }, { "epoch": 0.9255087674388839, "grad_norm": 0.59765625, "learning_rate": 2.896667289944166e-05, "loss": 1.2119, "step": 7231 }, { "epoch": 0.9256367592474082, "grad_norm": 0.67578125, "learning_rate": 2.886769788821575e-05, "loss": 1.5002, "step": 7232 }, { "epoch": 0.9257647510559324, "grad_norm": 0.6171875, "learning_rate": 2.876888978183234e-05, "loss": 1.0247, "step": 7233 }, { "epoch": 0.9258927428644567, "grad_norm": 0.7734375, "learning_rate": 2.8670248597273075e-05, "loss": 1.9285, "step": 7234 }, { "epoch": 0.926020734672981, "grad_norm": 0.7109375, "learning_rate": 2.8571774351491275e-05, "loss": 1.2041, "step": 7235 }, { "epoch": 0.9261487264815051, "grad_norm": 0.6015625, "learning_rate": 2.8473467061411184e-05, "loss": 1.2101, "step": 7236 }, { "epoch": 0.9262767182900294, "grad_norm": 0.625, "learning_rate": 2.837532674392862e-05, "loss": 1.1438, "step": 7237 }, { "epoch": 0.9264047100985537, "grad_norm": 0.640625, "learning_rate": 2.8277353415910422e-05, "loss": 1.1685, "step": 7238 }, { "epoch": 0.9265327019070779, "grad_norm": 0.498046875, "learning_rate": 2.8179547094195124e-05, "loss": 0.9125, "step": 7239 }, { "epoch": 0.9266606937156022, "grad_norm": 0.7109375, "learning_rate": 2.8081907795592164e-05, "loss": 1.4172, "step": 7240 }, { "epoch": 0.9267886855241264, "grad_norm": 0.74609375, "learning_rate": 2.798443553688257e-05, "loss": 1.3335, "step": 7241 }, { "epoch": 0.9269166773326507, "grad_norm": 0.69140625, "learning_rate": 2.7887130334818376e-05, "loss": 1.7512, "step": 7242 }, { "epoch": 0.927044669141175, "grad_norm": 0.625, "learning_rate": 2.7789992206123104e-05, "loss": 1.2921, "step": 7243 }, { "epoch": 0.9271726609496992, "grad_norm": 0.765625, "learning_rate": 2.7693021167491507e-05, "loss": 1.2562, "step": 7244 }, { "epoch": 0.9273006527582235, "grad_norm": 0.71875, "learning_rate": 2.7596217235589917e-05, "loss": 1.1057, "step": 7245 }, { "epoch": 0.9274286445667477, "grad_norm": 0.6796875, "learning_rate": 2.749958042705514e-05, "loss": 1.1457, "step": 7246 }, { "epoch": 0.927556636375272, "grad_norm": 0.65625, "learning_rate": 2.740311075849611e-05, "loss": 1.0665, "step": 7247 }, { "epoch": 0.9276846281837963, "grad_norm": 0.6875, "learning_rate": 2.7306808246492676e-05, "loss": 1.6068, "step": 7248 }, { "epoch": 0.9278126199923205, "grad_norm": 0.59375, "learning_rate": 2.7210672907595935e-05, "loss": 0.9735, "step": 7249 }, { "epoch": 0.9279406118008448, "grad_norm": 0.59765625, "learning_rate": 2.7114704758328334e-05, "loss": 0.8537, "step": 7250 }, { "epoch": 0.928068603609369, "grad_norm": 0.765625, "learning_rate": 2.701890381518357e-05, "loss": 1.1325, "step": 7251 }, { "epoch": 0.9281965954178932, "grad_norm": 0.7578125, "learning_rate": 2.6923270094626474e-05, "loss": 1.3095, "step": 7252 }, { "epoch": 0.9283245872264175, "grad_norm": 0.578125, "learning_rate": 2.6827803613093448e-05, "loss": 0.7333, "step": 7253 }, { "epoch": 0.9284525790349417, "grad_norm": 0.58203125, "learning_rate": 2.6732504386991928e-05, "loss": 1.2308, "step": 7254 }, { "epoch": 0.928580570843466, "grad_norm": 0.50390625, "learning_rate": 2.663737243270048e-05, "loss": 0.7533, "step": 7255 }, { "epoch": 0.9287085626519903, "grad_norm": 0.80078125, "learning_rate": 2.654240776656913e-05, "loss": 1.5704, "step": 7256 }, { "epoch": 0.9288365544605145, "grad_norm": 0.5546875, "learning_rate": 2.6447610404919054e-05, "loss": 0.9897, "step": 7257 }, { "epoch": 0.9289645462690388, "grad_norm": 0.5390625, "learning_rate": 2.6352980364042877e-05, "loss": 1.0538, "step": 7258 }, { "epoch": 0.9290925380775631, "grad_norm": 0.64453125, "learning_rate": 2.625851766020426e-05, "loss": 1.3485, "step": 7259 }, { "epoch": 0.9292205298860873, "grad_norm": 0.58984375, "learning_rate": 2.6164222309637996e-05, "loss": 1.1208, "step": 7260 }, { "epoch": 0.9293485216946116, "grad_norm": 0.87890625, "learning_rate": 2.607009432855034e-05, "loss": 1.5964, "step": 7261 }, { "epoch": 0.9294765135031358, "grad_norm": 0.625, "learning_rate": 2.59761337331188e-05, "loss": 1.0107, "step": 7262 }, { "epoch": 0.9296045053116601, "grad_norm": 0.51953125, "learning_rate": 2.588234053949212e-05, "loss": 1.0516, "step": 7263 }, { "epoch": 0.9297324971201844, "grad_norm": 0.55859375, "learning_rate": 2.5788714763789965e-05, "loss": 1.1149, "step": 7264 }, { "epoch": 0.9298604889287085, "grad_norm": 0.6796875, "learning_rate": 2.5695256422103464e-05, "loss": 0.9339, "step": 7265 }, { "epoch": 0.9299884807372328, "grad_norm": 0.921875, "learning_rate": 2.5601965530494987e-05, "loss": 1.6543, "step": 7266 }, { "epoch": 0.930116472545757, "grad_norm": 0.63671875, "learning_rate": 2.5508842104998265e-05, "loss": 0.9969, "step": 7267 }, { "epoch": 0.9302444643542813, "grad_norm": 0.671875, "learning_rate": 2.541588616161772e-05, "loss": 1.4386, "step": 7268 }, { "epoch": 0.9303724561628056, "grad_norm": 0.62890625, "learning_rate": 2.532309771632968e-05, "loss": 1.3202, "step": 7269 }, { "epoch": 0.9305004479713298, "grad_norm": 0.546875, "learning_rate": 2.523047678508106e-05, "loss": 0.8114, "step": 7270 }, { "epoch": 0.9306284397798541, "grad_norm": 0.828125, "learning_rate": 2.5138023383790454e-05, "loss": 1.8844, "step": 7271 }, { "epoch": 0.9307564315883784, "grad_norm": 0.6328125, "learning_rate": 2.504573752834738e-05, "loss": 1.7221, "step": 7272 }, { "epoch": 0.9308844233969026, "grad_norm": 0.70703125, "learning_rate": 2.4953619234612812e-05, "loss": 1.0227, "step": 7273 }, { "epoch": 0.9310124152054269, "grad_norm": 0.6171875, "learning_rate": 2.4861668518418535e-05, "loss": 1.0563, "step": 7274 }, { "epoch": 0.9311404070139511, "grad_norm": 0.53515625, "learning_rate": 2.4769885395568015e-05, "loss": 1.0697, "step": 7275 }, { "epoch": 0.9312683988224754, "grad_norm": 0.7578125, "learning_rate": 2.4678269881835636e-05, "loss": 1.2335, "step": 7276 }, { "epoch": 0.9313963906309997, "grad_norm": 0.69921875, "learning_rate": 2.45868219929668e-05, "loss": 1.7821, "step": 7277 }, { "epoch": 0.9315243824395238, "grad_norm": 0.78125, "learning_rate": 2.4495541744678497e-05, "loss": 1.5068, "step": 7278 }, { "epoch": 0.9316523742480481, "grad_norm": 0.85546875, "learning_rate": 2.4404429152658724e-05, "loss": 1.8967, "step": 7279 }, { "epoch": 0.9317803660565723, "grad_norm": 0.73046875, "learning_rate": 2.431348423256652e-05, "loss": 1.7277, "step": 7280 }, { "epoch": 0.9319083578650966, "grad_norm": 0.65234375, "learning_rate": 2.4222707000032374e-05, "loss": 1.261, "step": 7281 }, { "epoch": 0.9320363496736209, "grad_norm": 0.71875, "learning_rate": 2.41320974706577e-05, "loss": 1.3236, "step": 7282 }, { "epoch": 0.9321643414821451, "grad_norm": 0.75, "learning_rate": 2.404165566001537e-05, "loss": 1.6904, "step": 7283 }, { "epoch": 0.9322923332906694, "grad_norm": 0.58203125, "learning_rate": 2.3951381583649178e-05, "loss": 1.1405, "step": 7284 }, { "epoch": 0.9324203250991937, "grad_norm": 0.75390625, "learning_rate": 2.3861275257074265e-05, "loss": 1.5847, "step": 7285 }, { "epoch": 0.9325483169077179, "grad_norm": 0.72265625, "learning_rate": 2.3771336695776802e-05, "loss": 1.9625, "step": 7286 }, { "epoch": 0.9326763087162422, "grad_norm": 0.73828125, "learning_rate": 2.3681565915214086e-05, "loss": 1.105, "step": 7287 }, { "epoch": 0.9328043005247664, "grad_norm": 0.5625, "learning_rate": 2.3591962930814894e-05, "loss": 1.0677, "step": 7288 }, { "epoch": 0.9329322923332907, "grad_norm": 0.609375, "learning_rate": 2.3502527757978896e-05, "loss": 1.3722, "step": 7289 }, { "epoch": 0.933060284141815, "grad_norm": 0.65625, "learning_rate": 2.341326041207692e-05, "loss": 1.2119, "step": 7290 }, { "epoch": 0.9331882759503392, "grad_norm": 0.546875, "learning_rate": 2.3324160908451018e-05, "loss": 1.0004, "step": 7291 }, { "epoch": 0.9333162677588634, "grad_norm": 0.5859375, "learning_rate": 2.3235229262414392e-05, "loss": 0.9334, "step": 7292 }, { "epoch": 0.9334442595673876, "grad_norm": 0.5703125, "learning_rate": 2.3146465489251367e-05, "loss": 1.0628, "step": 7293 }, { "epoch": 0.9335722513759119, "grad_norm": 0.61328125, "learning_rate": 2.3057869604217407e-05, "loss": 1.3144, "step": 7294 }, { "epoch": 0.9337002431844362, "grad_norm": 0.78125, "learning_rate": 2.296944162253922e-05, "loss": 1.5947, "step": 7295 }, { "epoch": 0.9338282349929604, "grad_norm": 0.7421875, "learning_rate": 2.2881181559414655e-05, "loss": 1.5796, "step": 7296 }, { "epoch": 0.9339562268014847, "grad_norm": 0.60546875, "learning_rate": 2.2793089430012458e-05, "loss": 1.0058, "step": 7297 }, { "epoch": 0.934084218610009, "grad_norm": 0.6328125, "learning_rate": 2.270516524947297e-05, "loss": 1.4262, "step": 7298 }, { "epoch": 0.9342122104185332, "grad_norm": 0.609375, "learning_rate": 2.26174090329071e-05, "loss": 1.121, "step": 7299 }, { "epoch": 0.9343402022270575, "grad_norm": 0.62109375, "learning_rate": 2.2529820795397226e-05, "loss": 1.0257, "step": 7300 }, { "epoch": 0.9344681940355817, "grad_norm": 0.6484375, "learning_rate": 2.2442400551996868e-05, "loss": 0.9734, "step": 7301 }, { "epoch": 0.934596185844106, "grad_norm": 0.71484375, "learning_rate": 2.2355148317730668e-05, "loss": 1.4949, "step": 7302 }, { "epoch": 0.9347241776526303, "grad_norm": 0.65234375, "learning_rate": 2.22680641075943e-05, "loss": 1.3556, "step": 7303 }, { "epoch": 0.9348521694611545, "grad_norm": 0.59375, "learning_rate": 2.2181147936554567e-05, "loss": 1.427, "step": 7304 }, { "epoch": 0.9349801612696788, "grad_norm": 0.62109375, "learning_rate": 2.2094399819549303e-05, "loss": 1.1091, "step": 7305 }, { "epoch": 0.9351081530782029, "grad_norm": 0.65234375, "learning_rate": 2.20078197714878e-05, "loss": 1.0525, "step": 7306 }, { "epoch": 0.9352361448867272, "grad_norm": 0.59765625, "learning_rate": 2.1921407807250272e-05, "loss": 0.9384, "step": 7307 }, { "epoch": 0.9353641366952515, "grad_norm": 0.63671875, "learning_rate": 2.183516394168783e-05, "loss": 1.1675, "step": 7308 }, { "epoch": 0.9354921285037757, "grad_norm": 0.75, "learning_rate": 2.1749088189622845e-05, "loss": 1.7883, "step": 7309 }, { "epoch": 0.9356201203123, "grad_norm": 0.640625, "learning_rate": 2.166318056584904e-05, "loss": 1.2717, "step": 7310 }, { "epoch": 0.9357481121208243, "grad_norm": 0.640625, "learning_rate": 2.1577441085130934e-05, "loss": 1.1216, "step": 7311 }, { "epoch": 0.9358761039293485, "grad_norm": 0.78125, "learning_rate": 2.1491869762204297e-05, "loss": 1.3331, "step": 7312 }, { "epoch": 0.9360040957378728, "grad_norm": 0.6640625, "learning_rate": 2.140646661177581e-05, "loss": 1.4323, "step": 7313 }, { "epoch": 0.936132087546397, "grad_norm": 0.5234375, "learning_rate": 2.132123164852351e-05, "loss": 0.8994, "step": 7314 }, { "epoch": 0.9362600793549213, "grad_norm": 0.890625, "learning_rate": 2.1236164887096342e-05, "loss": 1.9351, "step": 7315 }, { "epoch": 0.9363880711634456, "grad_norm": 0.71875, "learning_rate": 2.1151266342114506e-05, "loss": 1.6955, "step": 7316 }, { "epoch": 0.9365160629719698, "grad_norm": 0.6953125, "learning_rate": 2.1066536028169105e-05, "loss": 1.6772, "step": 7317 }, { "epoch": 0.9366440547804941, "grad_norm": 0.7578125, "learning_rate": 2.0981973959822377e-05, "loss": 1.3778, "step": 7318 }, { "epoch": 0.9367720465890182, "grad_norm": 0.55078125, "learning_rate": 2.0897580151607808e-05, "loss": 1.0524, "step": 7319 }, { "epoch": 0.9369000383975425, "grad_norm": 0.609375, "learning_rate": 2.08133546180298e-05, "loss": 1.3963, "step": 7320 }, { "epoch": 0.9370280302060668, "grad_norm": 0.72265625, "learning_rate": 2.0729297373563993e-05, "loss": 1.6554, "step": 7321 }, { "epoch": 0.937156022014591, "grad_norm": 0.6640625, "learning_rate": 2.064540843265672e-05, "loss": 1.1962, "step": 7322 }, { "epoch": 0.9372840138231153, "grad_norm": 0.5546875, "learning_rate": 2.0561687809725782e-05, "loss": 1.0622, "step": 7323 }, { "epoch": 0.9374120056316396, "grad_norm": 0.7109375, "learning_rate": 2.047813551916e-05, "loss": 1.5639, "step": 7324 }, { "epoch": 0.9375399974401638, "grad_norm": 0.55859375, "learning_rate": 2.0394751575319116e-05, "loss": 1.258, "step": 7325 }, { "epoch": 0.9376679892486881, "grad_norm": 0.7578125, "learning_rate": 2.0311535992533993e-05, "loss": 1.4774, "step": 7326 }, { "epoch": 0.9377959810572123, "grad_norm": 0.5859375, "learning_rate": 2.0228488785106634e-05, "loss": 1.0753, "step": 7327 }, { "epoch": 0.9379239728657366, "grad_norm": 0.7734375, "learning_rate": 2.0145609967309963e-05, "loss": 1.2936, "step": 7328 }, { "epoch": 0.9380519646742609, "grad_norm": 0.8359375, "learning_rate": 2.0062899553388357e-05, "loss": 1.3096, "step": 7329 }, { "epoch": 0.9381799564827851, "grad_norm": 0.70703125, "learning_rate": 1.998035755755656e-05, "loss": 1.4639, "step": 7330 }, { "epoch": 0.9383079482913094, "grad_norm": 0.70703125, "learning_rate": 1.989798399400089e-05, "loss": 1.472, "step": 7331 }, { "epoch": 0.9384359400998337, "grad_norm": 0.6484375, "learning_rate": 1.9815778876878577e-05, "loss": 1.1644, "step": 7332 }, { "epoch": 0.9385639319083579, "grad_norm": 0.6796875, "learning_rate": 1.9733742220317873e-05, "loss": 1.2643, "step": 7333 }, { "epoch": 0.9386919237168821, "grad_norm": 0.6875, "learning_rate": 1.9651874038418283e-05, "loss": 1.1888, "step": 7334 }, { "epoch": 0.9388199155254063, "grad_norm": 0.67578125, "learning_rate": 1.9570174345249992e-05, "loss": 1.6116, "step": 7335 }, { "epoch": 0.9389479073339306, "grad_norm": 0.66015625, "learning_rate": 1.9488643154854434e-05, "loss": 0.9107, "step": 7336 }, { "epoch": 0.9390758991424549, "grad_norm": 0.59375, "learning_rate": 1.9407280481244183e-05, "loss": 1.271, "step": 7337 }, { "epoch": 0.9392038909509791, "grad_norm": 0.7109375, "learning_rate": 1.9326086338402605e-05, "loss": 1.63, "step": 7338 }, { "epoch": 0.9393318827595034, "grad_norm": 0.63671875, "learning_rate": 1.9245060740284316e-05, "loss": 0.8548, "step": 7339 }, { "epoch": 0.9394598745680276, "grad_norm": 0.625, "learning_rate": 1.916420370081484e-05, "loss": 1.5296, "step": 7340 }, { "epoch": 0.9395878663765519, "grad_norm": 0.546875, "learning_rate": 1.9083515233890736e-05, "loss": 1.0537, "step": 7341 }, { "epoch": 0.9397158581850762, "grad_norm": 0.74609375, "learning_rate": 1.9002995353379683e-05, "loss": 1.3953, "step": 7342 }, { "epoch": 0.9398438499936004, "grad_norm": 0.5546875, "learning_rate": 1.89226440731205e-05, "loss": 1.1903, "step": 7343 }, { "epoch": 0.9399718418021247, "grad_norm": 0.7265625, "learning_rate": 1.8842461406922473e-05, "loss": 0.9246, "step": 7344 }, { "epoch": 0.940099833610649, "grad_norm": 0.72265625, "learning_rate": 1.876244736856658e-05, "loss": 1.4625, "step": 7345 }, { "epoch": 0.9402278254191732, "grad_norm": 0.6328125, "learning_rate": 1.868260197180438e-05, "loss": 1.2463, "step": 7346 }, { "epoch": 0.9403558172276975, "grad_norm": 0.80859375, "learning_rate": 1.860292523035878e-05, "loss": 1.2823, "step": 7347 }, { "epoch": 0.9404838090362216, "grad_norm": 0.54296875, "learning_rate": 1.852341715792327e-05, "loss": 0.837, "step": 7348 }, { "epoch": 0.9406118008447459, "grad_norm": 0.6640625, "learning_rate": 1.8444077768162815e-05, "loss": 1.3919, "step": 7349 }, { "epoch": 0.9407397926532702, "grad_norm": 0.57421875, "learning_rate": 1.8364907074713056e-05, "loss": 0.9697, "step": 7350 }, { "epoch": 0.9408677844617944, "grad_norm": 0.65234375, "learning_rate": 1.828590509118089e-05, "loss": 1.4825, "step": 7351 }, { "epoch": 0.9409957762703187, "grad_norm": 0.6328125, "learning_rate": 1.8207071831144006e-05, "loss": 1.1794, "step": 7352 }, { "epoch": 0.9411237680788429, "grad_norm": 0.474609375, "learning_rate": 1.8128407308151128e-05, "loss": 0.7091, "step": 7353 }, { "epoch": 0.9412517598873672, "grad_norm": 0.58984375, "learning_rate": 1.8049911535722106e-05, "loss": 0.8835, "step": 7354 }, { "epoch": 0.9413797516958915, "grad_norm": 0.60546875, "learning_rate": 1.7971584527347594e-05, "loss": 1.2695, "step": 7355 }, { "epoch": 0.9415077435044157, "grad_norm": 0.58984375, "learning_rate": 1.7893426296489712e-05, "loss": 0.9409, "step": 7356 }, { "epoch": 0.94163573531294, "grad_norm": 0.6171875, "learning_rate": 1.7815436856580714e-05, "loss": 1.3932, "step": 7357 }, { "epoch": 0.9417637271214643, "grad_norm": 0.80859375, "learning_rate": 1.7737616221024653e-05, "loss": 1.8897, "step": 7358 }, { "epoch": 0.9418917189299885, "grad_norm": 0.5546875, "learning_rate": 1.7659964403196284e-05, "loss": 0.8283, "step": 7359 }, { "epoch": 0.9420197107385128, "grad_norm": 0.6640625, "learning_rate": 1.7582481416441144e-05, "loss": 1.6753, "step": 7360 }, { "epoch": 0.942147702547037, "grad_norm": 0.5546875, "learning_rate": 1.750516727407614e-05, "loss": 0.8635, "step": 7361 }, { "epoch": 0.9422756943555612, "grad_norm": 0.9140625, "learning_rate": 1.7428021989388866e-05, "loss": 1.3136, "step": 7362 }, { "epoch": 0.9424036861640855, "grad_norm": 0.75, "learning_rate": 1.7351045575638048e-05, "loss": 1.3276, "step": 7363 }, { "epoch": 0.9425316779726097, "grad_norm": 0.7734375, "learning_rate": 1.7274238046053214e-05, "loss": 1.709, "step": 7364 }, { "epoch": 0.942659669781134, "grad_norm": 0.984375, "learning_rate": 1.719759941383514e-05, "loss": 1.2921, "step": 7365 }, { "epoch": 0.9427876615896582, "grad_norm": 0.66015625, "learning_rate": 1.712112969215529e-05, "loss": 1.4301, "step": 7366 }, { "epoch": 0.9429156533981825, "grad_norm": 0.6796875, "learning_rate": 1.704482889415637e-05, "loss": 1.4726, "step": 7367 }, { "epoch": 0.9430436452067068, "grad_norm": 0.515625, "learning_rate": 1.6968697032951673e-05, "loss": 0.9482, "step": 7368 }, { "epoch": 0.943171637015231, "grad_norm": 0.53515625, "learning_rate": 1.6892734121625954e-05, "loss": 0.857, "step": 7369 }, { "epoch": 0.9432996288237553, "grad_norm": 0.55078125, "learning_rate": 1.681694017323454e-05, "loss": 0.8758, "step": 7370 }, { "epoch": 0.9434276206322796, "grad_norm": 0.9921875, "learning_rate": 1.6741315200803796e-05, "loss": 1.6523, "step": 7371 }, { "epoch": 0.9435556124408038, "grad_norm": 0.5546875, "learning_rate": 1.6665859217331104e-05, "loss": 0.6927, "step": 7372 }, { "epoch": 0.9436836042493281, "grad_norm": 1.28125, "learning_rate": 1.6590572235784862e-05, "loss": 0.9053, "step": 7373 }, { "epoch": 0.9438115960578523, "grad_norm": 0.67578125, "learning_rate": 1.6515454269104613e-05, "loss": 1.1728, "step": 7374 }, { "epoch": 0.9439395878663766, "grad_norm": 0.5859375, "learning_rate": 1.6440505330200138e-05, "loss": 1.245, "step": 7375 }, { "epoch": 0.9440675796749008, "grad_norm": 0.671875, "learning_rate": 1.6365725431952915e-05, "loss": 1.2265, "step": 7376 }, { "epoch": 0.944195571483425, "grad_norm": 0.75, "learning_rate": 1.6291114587214883e-05, "loss": 1.226, "step": 7377 }, { "epoch": 0.9443235632919493, "grad_norm": 0.71484375, "learning_rate": 1.621667280880945e-05, "loss": 1.9332, "step": 7378 }, { "epoch": 0.9444515551004735, "grad_norm": 0.7265625, "learning_rate": 1.6142400109530274e-05, "loss": 1.3824, "step": 7379 }, { "epoch": 0.9445795469089978, "grad_norm": 0.6875, "learning_rate": 1.6068296502142587e-05, "loss": 1.3149, "step": 7380 }, { "epoch": 0.9447075387175221, "grad_norm": 0.6328125, "learning_rate": 1.59943619993822e-05, "loss": 1.1857, "step": 7381 }, { "epoch": 0.9448355305260463, "grad_norm": 0.54296875, "learning_rate": 1.592059661395584e-05, "loss": 0.9296, "step": 7382 }, { "epoch": 0.9449635223345706, "grad_norm": 0.66015625, "learning_rate": 1.5847000358541473e-05, "loss": 1.3669, "step": 7383 }, { "epoch": 0.9450915141430949, "grad_norm": 0.5546875, "learning_rate": 1.5773573245787766e-05, "loss": 1.0403, "step": 7384 }, { "epoch": 0.9452195059516191, "grad_norm": 0.68359375, "learning_rate": 1.570031528831428e-05, "loss": 1.3391, "step": 7385 }, { "epoch": 0.9453474977601434, "grad_norm": 0.8359375, "learning_rate": 1.5627226498711623e-05, "loss": 1.3486, "step": 7386 }, { "epoch": 0.9454754895686676, "grad_norm": 0.7578125, "learning_rate": 1.5554306889541513e-05, "loss": 1.1609, "step": 7387 }, { "epoch": 0.9456034813771919, "grad_norm": 0.609375, "learning_rate": 1.5481556473335933e-05, "loss": 1.0777, "step": 7388 }, { "epoch": 0.9457314731857162, "grad_norm": 0.66015625, "learning_rate": 1.5408975262598546e-05, "loss": 1.4696, "step": 7389 }, { "epoch": 0.9458594649942403, "grad_norm": 0.6875, "learning_rate": 1.5336563269803372e-05, "loss": 1.1545, "step": 7390 }, { "epoch": 0.9459874568027646, "grad_norm": 0.59375, "learning_rate": 1.52643205073959e-05, "loss": 1.0167, "step": 7391 }, { "epoch": 0.9461154486112888, "grad_norm": 0.7578125, "learning_rate": 1.519224698779198e-05, "loss": 1.4186, "step": 7392 }, { "epoch": 0.9462434404198131, "grad_norm": 0.58984375, "learning_rate": 1.51203427233787e-05, "loss": 1.2135, "step": 7393 }, { "epoch": 0.9463714322283374, "grad_norm": 0.7890625, "learning_rate": 1.5048607726513951e-05, "loss": 1.2881, "step": 7394 }, { "epoch": 0.9464994240368616, "grad_norm": 0.6953125, "learning_rate": 1.4977042009526431e-05, "loss": 1.4367, "step": 7395 }, { "epoch": 0.9466274158453859, "grad_norm": 1.015625, "learning_rate": 1.4905645584716188e-05, "loss": 1.3082, "step": 7396 }, { "epoch": 0.9467554076539102, "grad_norm": 0.7265625, "learning_rate": 1.4834418464353627e-05, "loss": 1.0316, "step": 7397 }, { "epoch": 0.9468833994624344, "grad_norm": 0.65234375, "learning_rate": 1.4763360660680292e-05, "loss": 1.3126, "step": 7398 }, { "epoch": 0.9470113912709587, "grad_norm": 0.57421875, "learning_rate": 1.4692472185908635e-05, "loss": 0.9804, "step": 7399 }, { "epoch": 0.9471393830794829, "grad_norm": 0.74609375, "learning_rate": 1.462175305222202e-05, "loss": 1.5542, "step": 7400 }, { "epoch": 0.9472673748880072, "grad_norm": 0.78125, "learning_rate": 1.4551203271774726e-05, "loss": 1.4889, "step": 7401 }, { "epoch": 0.9473953666965315, "grad_norm": 0.69921875, "learning_rate": 1.4480822856691722e-05, "loss": 1.1303, "step": 7402 }, { "epoch": 0.9475233585050556, "grad_norm": 0.71484375, "learning_rate": 1.4410611819069219e-05, "loss": 0.4942, "step": 7403 }, { "epoch": 0.9476513503135799, "grad_norm": 0.68359375, "learning_rate": 1.43405701709739e-05, "loss": 1.6651, "step": 7404 }, { "epoch": 0.9477793421221042, "grad_norm": 0.625, "learning_rate": 1.4270697924443799e-05, "loss": 1.2995, "step": 7405 }, { "epoch": 0.9479073339306284, "grad_norm": 0.76953125, "learning_rate": 1.4200995091487424e-05, "loss": 1.0481, "step": 7406 }, { "epoch": 0.9480353257391527, "grad_norm": 0.62890625, "learning_rate": 1.4131461684084412e-05, "loss": 1.2847, "step": 7407 }, { "epoch": 0.9481633175476769, "grad_norm": 0.49609375, "learning_rate": 1.4062097714185207e-05, "loss": 1.0193, "step": 7408 }, { "epoch": 0.9482913093562012, "grad_norm": 0.609375, "learning_rate": 1.3992903193711159e-05, "loss": 1.0927, "step": 7409 }, { "epoch": 0.9484193011647255, "grad_norm": 0.64453125, "learning_rate": 1.3923878134554313e-05, "loss": 1.1384, "step": 7410 }, { "epoch": 0.9485472929732497, "grad_norm": 0.70703125, "learning_rate": 1.3855022548577845e-05, "loss": 1.4291, "step": 7411 }, { "epoch": 0.948675284781774, "grad_norm": 0.86328125, "learning_rate": 1.3786336447615844e-05, "loss": 1.344, "step": 7412 }, { "epoch": 0.9488032765902982, "grad_norm": 0.65234375, "learning_rate": 1.371781984347298e-05, "loss": 1.3381, "step": 7413 }, { "epoch": 0.9489312683988225, "grad_norm": 0.53515625, "learning_rate": 1.364947274792483e-05, "loss": 1.1402, "step": 7414 }, { "epoch": 0.9490592602073468, "grad_norm": 0.60546875, "learning_rate": 1.3581295172718222e-05, "loss": 0.7361, "step": 7415 }, { "epoch": 0.949187252015871, "grad_norm": 0.71484375, "learning_rate": 1.351328712957034e-05, "loss": 1.0652, "step": 7416 }, { "epoch": 0.9493152438243952, "grad_norm": 0.53125, "learning_rate": 1.344544863016961e-05, "loss": 0.9984, "step": 7417 }, { "epoch": 0.9494432356329195, "grad_norm": 0.65625, "learning_rate": 1.3377779686175151e-05, "loss": 1.1771, "step": 7418 }, { "epoch": 0.9495712274414437, "grad_norm": 0.59765625, "learning_rate": 1.3310280309216993e-05, "loss": 1.1088, "step": 7419 }, { "epoch": 0.949699219249968, "grad_norm": 0.6875, "learning_rate": 1.3242950510895967e-05, "loss": 1.6848, "step": 7420 }, { "epoch": 0.9498272110584922, "grad_norm": 0.57421875, "learning_rate": 1.3175790302783709e-05, "loss": 0.849, "step": 7421 }, { "epoch": 0.9499552028670165, "grad_norm": 0.734375, "learning_rate": 1.3108799696422979e-05, "loss": 1.6725, "step": 7422 }, { "epoch": 0.9500831946755408, "grad_norm": 0.5859375, "learning_rate": 1.304197870332713e-05, "loss": 0.9146, "step": 7423 }, { "epoch": 0.950211186484065, "grad_norm": 0.5546875, "learning_rate": 1.2975327334980302e-05, "loss": 1.0658, "step": 7424 }, { "epoch": 0.9503391782925893, "grad_norm": 0.62890625, "learning_rate": 1.290884560283767e-05, "loss": 1.2802, "step": 7425 }, { "epoch": 0.9504671701011135, "grad_norm": 0.66015625, "learning_rate": 1.2842533518325317e-05, "loss": 1.38, "step": 7426 }, { "epoch": 0.9505951619096378, "grad_norm": 0.67578125, "learning_rate": 1.27763910928399e-05, "loss": 1.5536, "step": 7427 }, { "epoch": 0.9507231537181621, "grad_norm": 0.75390625, "learning_rate": 1.2710418337749108e-05, "loss": 1.3478, "step": 7428 }, { "epoch": 0.9508511455266863, "grad_norm": 0.6875, "learning_rate": 1.2644615264391535e-05, "loss": 1.4781, "step": 7429 }, { "epoch": 0.9509791373352106, "grad_norm": 0.59375, "learning_rate": 1.2578981884076469e-05, "loss": 1.089, "step": 7430 }, { "epoch": 0.9511071291437349, "grad_norm": 0.734375, "learning_rate": 1.2513518208083997e-05, "loss": 1.6568, "step": 7431 }, { "epoch": 0.951235120952259, "grad_norm": 0.70703125, "learning_rate": 1.2448224247665118e-05, "loss": 1.2663, "step": 7432 }, { "epoch": 0.9513631127607833, "grad_norm": 0.8046875, "learning_rate": 1.2383100014041748e-05, "loss": 1.4813, "step": 7433 }, { "epoch": 0.9514911045693075, "grad_norm": 0.46875, "learning_rate": 1.2318145518406376e-05, "loss": 0.6611, "step": 7434 }, { "epoch": 0.9516190963778318, "grad_norm": 0.69140625, "learning_rate": 1.2253360771922739e-05, "loss": 1.6714, "step": 7435 }, { "epoch": 0.9517470881863561, "grad_norm": 0.890625, "learning_rate": 1.218874578572493e-05, "loss": 1.8971, "step": 7436 }, { "epoch": 0.9518750799948803, "grad_norm": 0.703125, "learning_rate": 1.2124300570918179e-05, "loss": 0.9184, "step": 7437 }, { "epoch": 0.9520030718034046, "grad_norm": 0.68359375, "learning_rate": 1.2060025138578402e-05, "loss": 1.44, "step": 7438 }, { "epoch": 0.9521310636119288, "grad_norm": 0.6484375, "learning_rate": 1.1995919499752427e-05, "loss": 1.1189, "step": 7439 }, { "epoch": 0.9522590554204531, "grad_norm": 0.7265625, "learning_rate": 1.1931983665457668e-05, "loss": 1.3124, "step": 7440 }, { "epoch": 0.9523870472289774, "grad_norm": 0.69921875, "learning_rate": 1.1868217646682888e-05, "loss": 1.2806, "step": 7441 }, { "epoch": 0.9525150390375016, "grad_norm": 0.80859375, "learning_rate": 1.1804621454386987e-05, "loss": 1.3877, "step": 7442 }, { "epoch": 0.9526430308460259, "grad_norm": 0.9296875, "learning_rate": 1.1741195099500113e-05, "loss": 1.3672, "step": 7443 }, { "epoch": 0.9527710226545502, "grad_norm": 0.6171875, "learning_rate": 1.167793859292321e-05, "loss": 1.005, "step": 7444 }, { "epoch": 0.9528990144630743, "grad_norm": 0.64453125, "learning_rate": 1.1614851945527804e-05, "loss": 1.4338, "step": 7445 }, { "epoch": 0.9530270062715986, "grad_norm": 0.4765625, "learning_rate": 1.1551935168156336e-05, "loss": 0.6883, "step": 7446 }, { "epoch": 0.9531549980801228, "grad_norm": 0.52734375, "learning_rate": 1.1489188271622154e-05, "loss": 0.8097, "step": 7447 }, { "epoch": 0.9532829898886471, "grad_norm": 0.73046875, "learning_rate": 1.1426611266709408e-05, "loss": 1.5806, "step": 7448 }, { "epoch": 0.9534109816971714, "grad_norm": 0.55859375, "learning_rate": 1.136420416417272e-05, "loss": 1.115, "step": 7449 }, { "epoch": 0.9535389735056956, "grad_norm": 0.671875, "learning_rate": 1.1301966974738065e-05, "loss": 1.4782, "step": 7450 }, { "epoch": 0.9536669653142199, "grad_norm": 0.61328125, "learning_rate": 1.1239899709101775e-05, "loss": 1.2902, "step": 7451 }, { "epoch": 0.9537949571227441, "grad_norm": 0.6171875, "learning_rate": 1.1178002377930984e-05, "loss": 0.9425, "step": 7452 }, { "epoch": 0.9539229489312684, "grad_norm": 0.6875, "learning_rate": 1.1116274991864073e-05, "loss": 1.3138, "step": 7453 }, { "epoch": 0.9540509407397927, "grad_norm": 0.72265625, "learning_rate": 1.1054717561509553e-05, "loss": 1.771, "step": 7454 }, { "epoch": 0.9541789325483169, "grad_norm": 0.59765625, "learning_rate": 1.0993330097447296e-05, "loss": 1.1265, "step": 7455 }, { "epoch": 0.9543069243568412, "grad_norm": 0.7265625, "learning_rate": 1.093211261022764e-05, "loss": 1.0729, "step": 7456 }, { "epoch": 0.9544349161653655, "grad_norm": 0.71484375, "learning_rate": 1.0871065110371724e-05, "loss": 1.2602, "step": 7457 }, { "epoch": 0.9545629079738897, "grad_norm": 0.609375, "learning_rate": 1.0810187608371824e-05, "loss": 1.1589, "step": 7458 }, { "epoch": 0.954690899782414, "grad_norm": 0.60546875, "learning_rate": 1.0749480114690458e-05, "loss": 1.0745, "step": 7459 }, { "epoch": 0.9548188915909381, "grad_norm": 0.60546875, "learning_rate": 1.0688942639761278e-05, "loss": 1.3958, "step": 7460 }, { "epoch": 0.9549468833994624, "grad_norm": 0.58203125, "learning_rate": 1.0628575193988633e-05, "loss": 1.0572, "step": 7461 }, { "epoch": 0.9550748752079867, "grad_norm": 0.65234375, "learning_rate": 1.0568377787747664e-05, "loss": 1.4697, "step": 7462 }, { "epoch": 0.9552028670165109, "grad_norm": 0.71484375, "learning_rate": 1.0508350431384206e-05, "loss": 1.1572, "step": 7463 }, { "epoch": 0.9553308588250352, "grad_norm": 0.63671875, "learning_rate": 1.044849313521501e-05, "loss": 1.2422, "step": 7464 }, { "epoch": 0.9554588506335594, "grad_norm": 0.65625, "learning_rate": 1.038880590952762e-05, "loss": 1.2621, "step": 7465 }, { "epoch": 0.9555868424420837, "grad_norm": 0.73828125, "learning_rate": 1.0329288764580058e-05, "loss": 1.5431, "step": 7466 }, { "epoch": 0.955714834250608, "grad_norm": 0.76953125, "learning_rate": 1.026994171060136e-05, "loss": 0.9013, "step": 7467 }, { "epoch": 0.9558428260591322, "grad_norm": 0.625, "learning_rate": 1.0210764757791369e-05, "loss": 1.171, "step": 7468 }, { "epoch": 0.9559708178676565, "grad_norm": 0.703125, "learning_rate": 1.0151757916320504e-05, "loss": 1.003, "step": 7469 }, { "epoch": 0.9560988096761808, "grad_norm": 0.67578125, "learning_rate": 1.0092921196330096e-05, "loss": 1.1393, "step": 7470 }, { "epoch": 0.956226801484705, "grad_norm": 0.6796875, "learning_rate": 1.0034254607932169e-05, "loss": 1.2497, "step": 7471 }, { "epoch": 0.9563547932932293, "grad_norm": 0.55078125, "learning_rate": 9.975758161209547e-06, "loss": 0.8546, "step": 7472 }, { "epoch": 0.9564827851017534, "grad_norm": 0.62109375, "learning_rate": 9.917431866215742e-06, "loss": 1.0306, "step": 7473 }, { "epoch": 0.9566107769102777, "grad_norm": 0.5859375, "learning_rate": 9.85927573297518e-06, "loss": 1.2612, "step": 7474 }, { "epoch": 0.956738768718802, "grad_norm": 0.703125, "learning_rate": 9.801289771482979e-06, "loss": 1.4966, "step": 7475 }, { "epoch": 0.9568667605273262, "grad_norm": 0.62890625, "learning_rate": 9.743473991704721e-06, "loss": 1.4056, "step": 7476 }, { "epoch": 0.9569947523358505, "grad_norm": 0.734375, "learning_rate": 9.685828403577125e-06, "loss": 1.8793, "step": 7477 }, { "epoch": 0.9571227441443748, "grad_norm": 0.75390625, "learning_rate": 9.628353017007595e-06, "loss": 1.4277, "step": 7478 }, { "epoch": 0.957250735952899, "grad_norm": 0.734375, "learning_rate": 9.571047841874126e-06, "loss": 1.3427, "step": 7479 }, { "epoch": 0.9573787277614233, "grad_norm": 0.59375, "learning_rate": 9.513912888025612e-06, "loss": 0.7426, "step": 7480 }, { "epoch": 0.9575067195699475, "grad_norm": 0.80859375, "learning_rate": 9.456948165281421e-06, "loss": 2.5019, "step": 7481 }, { "epoch": 0.9576347113784718, "grad_norm": 0.56640625, "learning_rate": 9.400153683432167e-06, "loss": 0.903, "step": 7482 }, { "epoch": 0.9577627031869961, "grad_norm": 0.74609375, "learning_rate": 9.343529452238597e-06, "loss": 1.8025, "step": 7483 }, { "epoch": 0.9578906949955203, "grad_norm": 0.71484375, "learning_rate": 9.287075481432705e-06, "loss": 1.797, "step": 7484 }, { "epoch": 0.9580186868040446, "grad_norm": 0.6953125, "learning_rate": 9.230791780717063e-06, "loss": 1.0188, "step": 7485 }, { "epoch": 0.9581466786125687, "grad_norm": 0.57421875, "learning_rate": 9.17467835976471e-06, "loss": 1.282, "step": 7486 }, { "epoch": 0.958274670421093, "grad_norm": 0.5859375, "learning_rate": 9.118735228219821e-06, "loss": 1.1854, "step": 7487 }, { "epoch": 0.9584026622296173, "grad_norm": 0.62890625, "learning_rate": 9.062962395697149e-06, "loss": 1.2838, "step": 7488 }, { "epoch": 0.9585306540381415, "grad_norm": 0.640625, "learning_rate": 9.00735987178214e-06, "loss": 1.1681, "step": 7489 }, { "epoch": 0.9586586458466658, "grad_norm": 0.8359375, "learning_rate": 8.951927666030813e-06, "loss": 2.0416, "step": 7490 }, { "epoch": 0.9587866376551901, "grad_norm": 0.796875, "learning_rate": 8.896665787970326e-06, "loss": 2.0226, "step": 7491 }, { "epoch": 0.9589146294637143, "grad_norm": 0.6484375, "learning_rate": 8.841574247098194e-06, "loss": 1.2248, "step": 7492 }, { "epoch": 0.9590426212722386, "grad_norm": 0.67578125, "learning_rate": 8.786653052882842e-06, "loss": 1.4571, "step": 7493 }, { "epoch": 0.9591706130807628, "grad_norm": 0.66796875, "learning_rate": 8.731902214763387e-06, "loss": 1.0433, "step": 7494 }, { "epoch": 0.9592986048892871, "grad_norm": 0.6953125, "learning_rate": 8.677321742149524e-06, "loss": 1.2215, "step": 7495 }, { "epoch": 0.9594265966978114, "grad_norm": 0.65625, "learning_rate": 8.622911644421972e-06, "loss": 1.0617, "step": 7496 }, { "epoch": 0.9595545885063356, "grad_norm": 0.609375, "learning_rate": 8.568671930931915e-06, "loss": 1.2573, "step": 7497 }, { "epoch": 0.9596825803148599, "grad_norm": 0.5390625, "learning_rate": 8.514602611001343e-06, "loss": 1.0874, "step": 7498 }, { "epoch": 0.959810572123384, "grad_norm": 0.6328125, "learning_rate": 8.460703693922933e-06, "loss": 1.0882, "step": 7499 }, { "epoch": 0.9599385639319084, "grad_norm": 0.85546875, "learning_rate": 8.406975188959943e-06, "loss": 2.3958, "step": 7500 }, { "epoch": 0.9600665557404326, "grad_norm": 0.75, "learning_rate": 8.353417105346762e-06, "loss": 1.4024, "step": 7501 }, { "epoch": 0.9601945475489568, "grad_norm": 0.703125, "learning_rate": 8.300029452288027e-06, "loss": 1.9261, "step": 7502 }, { "epoch": 0.9603225393574811, "grad_norm": 0.6015625, "learning_rate": 8.2468122389594e-06, "loss": 0.9134, "step": 7503 }, { "epoch": 0.9604505311660054, "grad_norm": 0.765625, "learning_rate": 8.193765474506899e-06, "loss": 1.4316, "step": 7504 }, { "epoch": 0.9605785229745296, "grad_norm": 0.70703125, "learning_rate": 8.140889168047671e-06, "loss": 1.4356, "step": 7505 }, { "epoch": 0.9607065147830539, "grad_norm": 0.57421875, "learning_rate": 8.088183328669341e-06, "loss": 1.0342, "step": 7506 }, { "epoch": 0.9608345065915781, "grad_norm": 0.53125, "learning_rate": 8.035647965430215e-06, "loss": 1.2967, "step": 7507 }, { "epoch": 0.9609624984001024, "grad_norm": 0.66796875, "learning_rate": 7.983283087359406e-06, "loss": 1.4675, "step": 7508 }, { "epoch": 0.9610904902086267, "grad_norm": 0.640625, "learning_rate": 7.931088703456602e-06, "loss": 1.3239, "step": 7509 }, { "epoch": 0.9612184820171509, "grad_norm": 0.703125, "learning_rate": 7.879064822692294e-06, "loss": 1.1895, "step": 7510 }, { "epoch": 0.9613464738256752, "grad_norm": 0.796875, "learning_rate": 7.827211454007666e-06, "loss": 2.0297, "step": 7511 }, { "epoch": 0.9614744656341994, "grad_norm": 0.56640625, "learning_rate": 7.775528606314365e-06, "loss": 1.1003, "step": 7512 }, { "epoch": 0.9616024574427237, "grad_norm": 0.65625, "learning_rate": 7.724016288495173e-06, "loss": 0.8079, "step": 7513 }, { "epoch": 0.961730449251248, "grad_norm": 0.65234375, "learning_rate": 7.672674509403232e-06, "loss": 1.1731, "step": 7514 }, { "epoch": 0.9618584410597721, "grad_norm": 0.490234375, "learning_rate": 7.621503277862485e-06, "loss": 0.8523, "step": 7515 }, { "epoch": 0.9619864328682964, "grad_norm": 0.71875, "learning_rate": 7.570502602667451e-06, "loss": 1.0342, "step": 7516 }, { "epoch": 0.9621144246768207, "grad_norm": 0.83984375, "learning_rate": 7.5196724925833405e-06, "loss": 1.5056, "step": 7517 }, { "epoch": 0.9622424164853449, "grad_norm": 0.73046875, "learning_rate": 7.469012956346388e-06, "loss": 1.2152, "step": 7518 }, { "epoch": 0.9623704082938692, "grad_norm": 0.62890625, "learning_rate": 7.418524002663296e-06, "loss": 1.3152, "step": 7519 }, { "epoch": 0.9624984001023934, "grad_norm": 0.5703125, "learning_rate": 7.368205640211012e-06, "loss": 0.9833, "step": 7520 }, { "epoch": 0.9626263919109177, "grad_norm": 0.6015625, "learning_rate": 7.31805787763784e-06, "loss": 1.009, "step": 7521 }, { "epoch": 0.962754383719442, "grad_norm": 0.5625, "learning_rate": 7.268080723562553e-06, "loss": 1.1865, "step": 7522 }, { "epoch": 0.9628823755279662, "grad_norm": 0.7421875, "learning_rate": 7.218274186574281e-06, "loss": 1.3723, "step": 7523 }, { "epoch": 0.9630103673364905, "grad_norm": 0.625, "learning_rate": 7.168638275233397e-06, "loss": 1.4144, "step": 7524 }, { "epoch": 0.9631383591450147, "grad_norm": 0.86328125, "learning_rate": 7.119172998070411e-06, "loss": 1.1453, "step": 7525 }, { "epoch": 0.963266350953539, "grad_norm": 0.625, "learning_rate": 7.069878363586746e-06, "loss": 1.4703, "step": 7526 }, { "epoch": 0.9633943427620633, "grad_norm": 0.6484375, "learning_rate": 7.020754380254624e-06, "loss": 1.1549, "step": 7527 }, { "epoch": 0.9635223345705874, "grad_norm": 0.65625, "learning_rate": 6.971801056516624e-06, "loss": 1.2478, "step": 7528 }, { "epoch": 0.9636503263791117, "grad_norm": 0.54296875, "learning_rate": 6.923018400786241e-06, "loss": 0.7673, "step": 7529 }, { "epoch": 0.963778318187636, "grad_norm": 0.828125, "learning_rate": 6.8744064214476545e-06, "loss": 1.5444, "step": 7530 }, { "epoch": 0.9639063099961602, "grad_norm": 0.6484375, "learning_rate": 6.8259651268555154e-06, "loss": 1.1685, "step": 7531 }, { "epoch": 0.9640343018046845, "grad_norm": 0.66015625, "learning_rate": 6.777694525335276e-06, "loss": 1.1252, "step": 7532 }, { "epoch": 0.9641622936132087, "grad_norm": 0.66015625, "learning_rate": 6.729594625183078e-06, "loss": 0.9325, "step": 7533 }, { "epoch": 0.964290285421733, "grad_norm": 0.68359375, "learning_rate": 6.6816654346655296e-06, "loss": 1.3637, "step": 7534 }, { "epoch": 0.9644182772302573, "grad_norm": 0.62109375, "learning_rate": 6.633906962020153e-06, "loss": 1.158, "step": 7535 }, { "epoch": 0.9645462690387815, "grad_norm": 0.64453125, "learning_rate": 6.586319215454939e-06, "loss": 1.2123, "step": 7536 }, { "epoch": 0.9646742608473058, "grad_norm": 0.640625, "learning_rate": 6.538902203148789e-06, "loss": 1.4784, "step": 7537 }, { "epoch": 0.96480225265583, "grad_norm": 0.71484375, "learning_rate": 6.491655933250962e-06, "loss": 1.1367, "step": 7538 }, { "epoch": 0.9649302444643543, "grad_norm": 0.6171875, "learning_rate": 6.4445804138814065e-06, "loss": 1.3062, "step": 7539 }, { "epoch": 0.9650582362728786, "grad_norm": 0.64453125, "learning_rate": 6.3976756531308745e-06, "loss": 1.4394, "step": 7540 }, { "epoch": 0.9651862280814028, "grad_norm": 0.64453125, "learning_rate": 6.350941659060805e-06, "loss": 1.8225, "step": 7541 }, { "epoch": 0.965314219889927, "grad_norm": 0.76953125, "learning_rate": 6.304378439703106e-06, "loss": 1.1458, "step": 7542 }, { "epoch": 0.9654422116984513, "grad_norm": 0.6640625, "learning_rate": 6.257986003060489e-06, "loss": 1.2515, "step": 7543 }, { "epoch": 0.9655702035069755, "grad_norm": 0.609375, "learning_rate": 6.211764357106131e-06, "loss": 1.1473, "step": 7544 }, { "epoch": 0.9656981953154998, "grad_norm": 0.69140625, "learning_rate": 6.16571350978401e-06, "loss": 1.4845, "step": 7545 }, { "epoch": 0.965826187124024, "grad_norm": 0.765625, "learning_rate": 6.119833469008906e-06, "loss": 1.0006, "step": 7546 }, { "epoch": 0.9659541789325483, "grad_norm": 0.72265625, "learning_rate": 6.074124242665735e-06, "loss": 1.511, "step": 7547 }, { "epoch": 0.9660821707410726, "grad_norm": 0.671875, "learning_rate": 6.028585838610656e-06, "loss": 1.1538, "step": 7548 }, { "epoch": 0.9662101625495968, "grad_norm": 0.60546875, "learning_rate": 5.983218264669965e-06, "loss": 1.3544, "step": 7549 }, { "epoch": 0.9663381543581211, "grad_norm": 0.69921875, "learning_rate": 5.938021528640869e-06, "loss": 2.0538, "step": 7550 }, { "epoch": 0.9664661461666454, "grad_norm": 0.80859375, "learning_rate": 5.892995638291155e-06, "loss": 1.7865, "step": 7551 }, { "epoch": 0.9665941379751696, "grad_norm": 0.546875, "learning_rate": 5.848140601359408e-06, "loss": 0.8754, "step": 7552 }, { "epoch": 0.9667221297836939, "grad_norm": 0.53515625, "learning_rate": 5.8034564255544654e-06, "loss": 1.0543, "step": 7553 }, { "epoch": 0.9668501215922181, "grad_norm": 0.6953125, "learning_rate": 5.758943118556181e-06, "loss": 1.474, "step": 7554 }, { "epoch": 0.9669781134007424, "grad_norm": 0.71484375, "learning_rate": 5.714600688014882e-06, "loss": 1.2033, "step": 7555 }, { "epoch": 0.9671061052092667, "grad_norm": 0.66015625, "learning_rate": 5.67042914155147e-06, "loss": 1.1382, "step": 7556 }, { "epoch": 0.9672340970177908, "grad_norm": 0.74609375, "learning_rate": 5.62642848675754e-06, "loss": 1.4167, "step": 7557 }, { "epoch": 0.9673620888263151, "grad_norm": 0.7421875, "learning_rate": 5.582598731195376e-06, "loss": 1.4513, "step": 7558 }, { "epoch": 0.9674900806348393, "grad_norm": 0.58984375, "learning_rate": 5.538939882397842e-06, "loss": 1.0617, "step": 7559 }, { "epoch": 0.9676180724433636, "grad_norm": 0.6171875, "learning_rate": 5.4954519478682685e-06, "loss": 1.3555, "step": 7560 }, { "epoch": 0.9677460642518879, "grad_norm": 0.61328125, "learning_rate": 5.452134935080899e-06, "loss": 1.2107, "step": 7561 }, { "epoch": 0.9678740560604121, "grad_norm": 0.7734375, "learning_rate": 5.408988851480445e-06, "loss": 1.4609, "step": 7562 }, { "epoch": 0.9680020478689364, "grad_norm": 0.53515625, "learning_rate": 5.366013704482309e-06, "loss": 0.8075, "step": 7563 }, { "epoch": 0.9681300396774607, "grad_norm": 0.9375, "learning_rate": 5.3232095014723595e-06, "loss": 1.8681, "step": 7564 }, { "epoch": 0.9682580314859849, "grad_norm": 0.67578125, "learning_rate": 5.280576249807268e-06, "loss": 1.6119, "step": 7565 }, { "epoch": 0.9683860232945092, "grad_norm": 0.69140625, "learning_rate": 5.238113956814283e-06, "loss": 1.4864, "step": 7566 }, { "epoch": 0.9685140151030334, "grad_norm": 0.63671875, "learning_rate": 5.195822629791125e-06, "loss": 1.4004, "step": 7567 }, { "epoch": 0.9686420069115577, "grad_norm": 0.6796875, "learning_rate": 5.153702276006311e-06, "loss": 1.2327, "step": 7568 }, { "epoch": 0.968769998720082, "grad_norm": 0.703125, "learning_rate": 5.111752902698941e-06, "loss": 1.2283, "step": 7569 }, { "epoch": 0.9688979905286061, "grad_norm": 0.62890625, "learning_rate": 5.06997451707858e-06, "loss": 1.386, "step": 7570 }, { "epoch": 0.9690259823371304, "grad_norm": 0.640625, "learning_rate": 5.028367126325595e-06, "loss": 0.9887, "step": 7571 }, { "epoch": 0.9691539741456546, "grad_norm": 0.62109375, "learning_rate": 4.986930737590933e-06, "loss": 1.3942, "step": 7572 }, { "epoch": 0.9692819659541789, "grad_norm": 0.73828125, "learning_rate": 4.9456653579961204e-06, "loss": 1.2143, "step": 7573 }, { "epoch": 0.9694099577627032, "grad_norm": 0.796875, "learning_rate": 4.90457099463315e-06, "loss": 1.3556, "step": 7574 }, { "epoch": 0.9695379495712274, "grad_norm": 0.64453125, "learning_rate": 4.863647654564929e-06, "loss": 1.3877, "step": 7575 }, { "epoch": 0.9696659413797517, "grad_norm": 0.447265625, "learning_rate": 4.822895344824607e-06, "loss": 0.5977, "step": 7576 }, { "epoch": 0.969793933188276, "grad_norm": 0.55859375, "learning_rate": 4.782314072416361e-06, "loss": 0.9316, "step": 7577 }, { "epoch": 0.9699219249968002, "grad_norm": 0.73828125, "learning_rate": 4.741903844314499e-06, "loss": 2.2112, "step": 7578 }, { "epoch": 0.9700499168053245, "grad_norm": 0.77734375, "learning_rate": 4.701664667464245e-06, "loss": 1.6217, "step": 7579 }, { "epoch": 0.9701779086138487, "grad_norm": 0.46484375, "learning_rate": 4.66159654878151e-06, "loss": 0.6803, "step": 7580 }, { "epoch": 0.970305900422373, "grad_norm": 0.59765625, "learning_rate": 4.621699495152454e-06, "loss": 0.9801, "step": 7581 }, { "epoch": 0.9704338922308973, "grad_norm": 0.62109375, "learning_rate": 4.581973513434145e-06, "loss": 0.8224, "step": 7582 }, { "epoch": 0.9705618840394215, "grad_norm": 0.60546875, "learning_rate": 4.542418610454124e-06, "loss": 0.8478, "step": 7583 }, { "epoch": 0.9706898758479457, "grad_norm": 0.59765625, "learning_rate": 4.503034793010508e-06, "loss": 0.8711, "step": 7584 }, { "epoch": 0.9708178676564699, "grad_norm": 0.69140625, "learning_rate": 4.463822067871992e-06, "loss": 1.5242, "step": 7585 }, { "epoch": 0.9709458594649942, "grad_norm": 0.5859375, "learning_rate": 4.424780441778076e-06, "loss": 0.8084, "step": 7586 }, { "epoch": 0.9710738512735185, "grad_norm": 0.7890625, "learning_rate": 4.385909921438503e-06, "loss": 1.9207, "step": 7587 }, { "epoch": 0.9712018430820427, "grad_norm": 0.58203125, "learning_rate": 4.347210513534039e-06, "loss": 0.9794, "step": 7588 }, { "epoch": 0.971329834890567, "grad_norm": 0.73046875, "learning_rate": 4.308682224715588e-06, "loss": 1.5718, "step": 7589 }, { "epoch": 0.9714578266990913, "grad_norm": 0.875, "learning_rate": 4.2703250616049625e-06, "loss": 1.1393, "step": 7590 }, { "epoch": 0.9715858185076155, "grad_norm": 0.79296875, "learning_rate": 4.232139030794447e-06, "loss": 1.324, "step": 7591 }, { "epoch": 0.9717138103161398, "grad_norm": 0.609375, "learning_rate": 4.194124138847011e-06, "loss": 1.1397, "step": 7592 }, { "epoch": 0.971841802124664, "grad_norm": 0.66015625, "learning_rate": 4.156280392295986e-06, "loss": 1.2762, "step": 7593 }, { "epoch": 0.9719697939331883, "grad_norm": 0.65625, "learning_rate": 4.118607797645502e-06, "loss": 1.0449, "step": 7594 }, { "epoch": 0.9720977857417126, "grad_norm": 0.8984375, "learning_rate": 4.081106361370157e-06, "loss": 1.7073, "step": 7595 }, { "epoch": 0.9722257775502368, "grad_norm": 0.6875, "learning_rate": 4.043776089915241e-06, "loss": 1.1832, "step": 7596 }, { "epoch": 0.972353769358761, "grad_norm": 0.69921875, "learning_rate": 4.006616989696621e-06, "loss": 1.3325, "step": 7597 }, { "epoch": 0.9724817611672852, "grad_norm": 0.7421875, "learning_rate": 3.969629067100522e-06, "loss": 1.5695, "step": 7598 }, { "epoch": 0.9726097529758095, "grad_norm": 0.6171875, "learning_rate": 3.932812328484081e-06, "loss": 1.1152, "step": 7599 }, { "epoch": 0.9727377447843338, "grad_norm": 0.69140625, "learning_rate": 3.896166780174792e-06, "loss": 1.1303, "step": 7600 }, { "epoch": 0.972865736592858, "grad_norm": 0.5390625, "learning_rate": 3.85969242847084e-06, "loss": 1.0784, "step": 7601 }, { "epoch": 0.9729937284013823, "grad_norm": 0.6328125, "learning_rate": 3.8233892796407655e-06, "loss": 1.1425, "step": 7602 }, { "epoch": 0.9731217202099066, "grad_norm": 0.625, "learning_rate": 3.787257339924133e-06, "loss": 1.5591, "step": 7603 }, { "epoch": 0.9732497120184308, "grad_norm": 0.59375, "learning_rate": 3.7512966155305307e-06, "loss": 1.3087, "step": 7604 }, { "epoch": 0.9733777038269551, "grad_norm": 0.625, "learning_rate": 3.715507112640459e-06, "loss": 1.5297, "step": 7605 }, { "epoch": 0.9735056956354793, "grad_norm": 0.84375, "learning_rate": 3.679888837404999e-06, "loss": 1.2543, "step": 7606 }, { "epoch": 0.9736336874440036, "grad_norm": 0.6875, "learning_rate": 3.6444417959456965e-06, "loss": 1.3066, "step": 7607 }, { "epoch": 0.9737616792525279, "grad_norm": 0.60546875, "learning_rate": 3.6091659943546796e-06, "loss": 1.2266, "step": 7608 }, { "epoch": 0.9738896710610521, "grad_norm": 0.65234375, "learning_rate": 3.5740614386947647e-06, "loss": 1.2107, "step": 7609 }, { "epoch": 0.9740176628695764, "grad_norm": 0.79296875, "learning_rate": 3.5391281349991257e-06, "loss": 1.4656, "step": 7610 }, { "epoch": 0.9741456546781005, "grad_norm": 0.56640625, "learning_rate": 3.5043660892716265e-06, "loss": 1.0906, "step": 7611 }, { "epoch": 0.9742736464866248, "grad_norm": 0.65234375, "learning_rate": 3.469775307486711e-06, "loss": 1.2089, "step": 7612 }, { "epoch": 0.9744016382951491, "grad_norm": 0.64453125, "learning_rate": 3.435355795589401e-06, "loss": 1.0741, "step": 7613 }, { "epoch": 0.9745296301036733, "grad_norm": 0.59375, "learning_rate": 3.401107559495187e-06, "loss": 0.9924, "step": 7614 }, { "epoch": 0.9746576219121976, "grad_norm": 0.578125, "learning_rate": 3.367030605090249e-06, "loss": 1.1738, "step": 7615 }, { "epoch": 0.9747856137207219, "grad_norm": 0.62109375, "learning_rate": 3.3331249382312357e-06, "loss": 1.1016, "step": 7616 }, { "epoch": 0.9749136055292461, "grad_norm": 0.59765625, "learning_rate": 3.299390564745264e-06, "loss": 0.8735, "step": 7617 }, { "epoch": 0.9750415973377704, "grad_norm": 0.67578125, "learning_rate": 3.265827490430362e-06, "loss": 1.2482, "step": 7618 }, { "epoch": 0.9751695891462946, "grad_norm": 0.68359375, "learning_rate": 3.232435721054805e-06, "loss": 1.1521, "step": 7619 }, { "epoch": 0.9752975809548189, "grad_norm": 0.69140625, "learning_rate": 3.1992152623574465e-06, "loss": 1.1189, "step": 7620 }, { "epoch": 0.9754255727633432, "grad_norm": 0.6484375, "learning_rate": 3.166166120047831e-06, "loss": 1.3359, "step": 7621 }, { "epoch": 0.9755535645718674, "grad_norm": 0.60546875, "learning_rate": 3.133288299805859e-06, "loss": 1.242, "step": 7622 }, { "epoch": 0.9756815563803917, "grad_norm": 0.7265625, "learning_rate": 3.1005818072822324e-06, "loss": 1.5072, "step": 7623 }, { "epoch": 0.9758095481889159, "grad_norm": 0.62890625, "learning_rate": 3.068046648098122e-06, "loss": 1.1459, "step": 7624 }, { "epoch": 0.9759375399974402, "grad_norm": 0.64453125, "learning_rate": 3.0356828278451653e-06, "loss": 0.814, "step": 7625 }, { "epoch": 0.9760655318059644, "grad_norm": 0.60546875, "learning_rate": 3.0034903520856916e-06, "loss": 1.0374, "step": 7626 }, { "epoch": 0.9761935236144886, "grad_norm": 0.80859375, "learning_rate": 2.971469226352275e-06, "loss": 1.3171, "step": 7627 }, { "epoch": 0.9763215154230129, "grad_norm": 0.625, "learning_rate": 2.9396194561485125e-06, "loss": 1.0373, "step": 7628 }, { "epoch": 0.9764495072315372, "grad_norm": 0.65234375, "learning_rate": 2.907941046948248e-06, "loss": 1.21, "step": 7629 }, { "epoch": 0.9765774990400614, "grad_norm": 0.76171875, "learning_rate": 2.8764340041957936e-06, "loss": 1.5873, "step": 7630 }, { "epoch": 0.9767054908485857, "grad_norm": 0.64453125, "learning_rate": 2.845098333306262e-06, "loss": 1.2388, "step": 7631 }, { "epoch": 0.9768334826571099, "grad_norm": 0.69140625, "learning_rate": 2.8139340396652336e-06, "loss": 1.0252, "step": 7632 }, { "epoch": 0.9769614744656342, "grad_norm": 0.69140625, "learning_rate": 2.7829411286287574e-06, "loss": 1.8253, "step": 7633 }, { "epoch": 0.9770894662741585, "grad_norm": 0.671875, "learning_rate": 2.752119605523351e-06, "loss": 1.2156, "step": 7634 }, { "epoch": 0.9772174580826827, "grad_norm": 0.73046875, "learning_rate": 2.7214694756464433e-06, "loss": 1.7647, "step": 7635 }, { "epoch": 0.977345449891207, "grad_norm": 0.73828125, "learning_rate": 2.690990744265487e-06, "loss": 1.158, "step": 7636 }, { "epoch": 0.9774734416997313, "grad_norm": 0.59375, "learning_rate": 2.6606834166188475e-06, "loss": 1.2144, "step": 7637 }, { "epoch": 0.9776014335082555, "grad_norm": 0.78515625, "learning_rate": 2.6305474979154697e-06, "loss": 0.916, "step": 7638 }, { "epoch": 0.9777294253167798, "grad_norm": 0.72265625, "learning_rate": 2.600582993334544e-06, "loss": 1.4087, "step": 7639 }, { "epoch": 0.9778574171253039, "grad_norm": 0.53515625, "learning_rate": 2.57078990802595e-06, "loss": 0.919, "step": 7640 }, { "epoch": 0.9779854089338282, "grad_norm": 0.75, "learning_rate": 2.541168247110148e-06, "loss": 1.4588, "step": 7641 }, { "epoch": 0.9781134007423525, "grad_norm": 0.8203125, "learning_rate": 2.511718015678177e-06, "loss": 2.0011, "step": 7642 }, { "epoch": 0.9782413925508767, "grad_norm": 0.58984375, "learning_rate": 2.4824392187914323e-06, "loss": 0.886, "step": 7643 }, { "epoch": 0.978369384359401, "grad_norm": 0.5859375, "learning_rate": 2.453331861482e-06, "loss": 1.1801, "step": 7644 }, { "epoch": 0.9784973761679252, "grad_norm": 0.73046875, "learning_rate": 2.4243959487524335e-06, "loss": 1.9849, "step": 7645 }, { "epoch": 0.9786253679764495, "grad_norm": 0.74609375, "learning_rate": 2.3956314855758665e-06, "loss": 1.471, "step": 7646 }, { "epoch": 0.9787533597849738, "grad_norm": 0.77734375, "learning_rate": 2.3670384768958997e-06, "loss": 1.4344, "step": 7647 }, { "epoch": 0.978881351593498, "grad_norm": 0.7578125, "learning_rate": 2.3386169276267134e-06, "loss": 1.5816, "step": 7648 }, { "epoch": 0.9790093434020223, "grad_norm": 0.6640625, "learning_rate": 2.3103668426530668e-06, "loss": 1.2392, "step": 7649 }, { "epoch": 0.9791373352105466, "grad_norm": 0.5, "learning_rate": 2.282288226830187e-06, "loss": 0.9088, "step": 7650 }, { "epoch": 0.9792653270190708, "grad_norm": 0.9296875, "learning_rate": 2.254381084983659e-06, "loss": 2.5579, "step": 7651 }, { "epoch": 0.9793933188275951, "grad_norm": 0.57421875, "learning_rate": 2.2266454219100894e-06, "loss": 0.8467, "step": 7652 }, { "epoch": 0.9795213106361192, "grad_norm": 0.62890625, "learning_rate": 2.199081242376e-06, "loss": 1.029, "step": 7653 }, { "epoch": 0.9796493024446435, "grad_norm": 0.7734375, "learning_rate": 2.171688551118933e-06, "loss": 1.5759, "step": 7654 }, { "epoch": 0.9797772942531678, "grad_norm": 0.66015625, "learning_rate": 2.1444673528466794e-06, "loss": 1.3067, "step": 7655 }, { "epoch": 0.979905286061692, "grad_norm": 0.7734375, "learning_rate": 2.1174176522377184e-06, "loss": 1.496, "step": 7656 }, { "epoch": 0.9800332778702163, "grad_norm": 0.75390625, "learning_rate": 2.090539453940998e-06, "loss": 1.2532, "step": 7657 }, { "epoch": 0.9801612696787405, "grad_norm": 0.58984375, "learning_rate": 2.0638327625758237e-06, "loss": 0.8665, "step": 7658 }, { "epoch": 0.9802892614872648, "grad_norm": 0.77734375, "learning_rate": 2.0372975827321895e-06, "loss": 1.1649, "step": 7659 }, { "epoch": 0.9804172532957891, "grad_norm": 0.70703125, "learning_rate": 2.0109339189707807e-06, "loss": 1.1436, "step": 7660 }, { "epoch": 0.9805452451043133, "grad_norm": 0.66796875, "learning_rate": 1.9847417758225294e-06, "loss": 1.4215, "step": 7661 }, { "epoch": 0.9806732369128376, "grad_norm": 0.59765625, "learning_rate": 1.958721157788834e-06, "loss": 1.1823, "step": 7662 }, { "epoch": 0.9808012287213619, "grad_norm": 0.58203125, "learning_rate": 1.9328720693420065e-06, "loss": 1.0413, "step": 7663 }, { "epoch": 0.9809292205298861, "grad_norm": 0.71875, "learning_rate": 1.907194514924493e-06, "loss": 1.5131, "step": 7664 }, { "epoch": 0.9810572123384104, "grad_norm": 0.6875, "learning_rate": 1.8816884989494298e-06, "loss": 1.3872, "step": 7665 }, { "epoch": 0.9811852041469346, "grad_norm": 0.62890625, "learning_rate": 1.856354025800422e-06, "loss": 1.5878, "step": 7666 }, { "epoch": 0.9813131959554589, "grad_norm": 0.796875, "learning_rate": 1.8311910998315417e-06, "loss": 1.1224, "step": 7667 }, { "epoch": 0.9814411877639831, "grad_norm": 0.59375, "learning_rate": 1.806199725367552e-06, "loss": 1.0522, "step": 7668 }, { "epoch": 0.9815691795725073, "grad_norm": 0.59765625, "learning_rate": 1.781379906703573e-06, "loss": 0.942, "step": 7669 }, { "epoch": 0.9816971713810316, "grad_norm": 0.6328125, "learning_rate": 1.7567316481054142e-06, "loss": 1.2008, "step": 7670 }, { "epoch": 0.9818251631895558, "grad_norm": 0.6484375, "learning_rate": 1.7322549538091315e-06, "loss": 1.5894, "step": 7671 }, { "epoch": 0.9819531549980801, "grad_norm": 0.68359375, "learning_rate": 1.7079498280213602e-06, "loss": 1.4512, "step": 7672 }, { "epoch": 0.9820811468066044, "grad_norm": 0.65625, "learning_rate": 1.6838162749196472e-06, "loss": 1.3655, "step": 7673 }, { "epoch": 0.9822091386151286, "grad_norm": 0.66015625, "learning_rate": 1.659854298651453e-06, "loss": 1.5504, "step": 7674 }, { "epoch": 0.9823371304236529, "grad_norm": 0.828125, "learning_rate": 1.636063903335039e-06, "loss": 1.672, "step": 7675 }, { "epoch": 0.9824651222321772, "grad_norm": 0.8828125, "learning_rate": 1.6124450930593558e-06, "loss": 2.3579, "step": 7676 }, { "epoch": 0.9825931140407014, "grad_norm": 0.640625, "learning_rate": 1.5889978718836019e-06, "loss": 1.1299, "step": 7677 }, { "epoch": 0.9827211058492257, "grad_norm": 0.55078125, "learning_rate": 1.565722243837553e-06, "loss": 0.8458, "step": 7678 }, { "epoch": 0.9828490976577499, "grad_norm": 0.78515625, "learning_rate": 1.542618212921454e-06, "loss": 1.0402, "step": 7679 }, { "epoch": 0.9829770894662742, "grad_norm": 0.6875, "learning_rate": 1.5196857831061284e-06, "loss": 1.3139, "step": 7680 }, { "epoch": 0.9831050812747985, "grad_norm": 0.67578125, "learning_rate": 1.4969249583328682e-06, "loss": 1.4203, "step": 7681 }, { "epoch": 0.9832330730833226, "grad_norm": 1.1015625, "learning_rate": 1.474335742513655e-06, "loss": 1.2201, "step": 7682 }, { "epoch": 0.9833610648918469, "grad_norm": 0.734375, "learning_rate": 1.4519181395306059e-06, "loss": 1.3624, "step": 7683 }, { "epoch": 0.9834890567003711, "grad_norm": 0.77734375, "learning_rate": 1.4296721532366385e-06, "loss": 1.3955, "step": 7684 }, { "epoch": 0.9836170485088954, "grad_norm": 0.56640625, "learning_rate": 1.4075977874550282e-06, "loss": 0.723, "step": 7685 }, { "epoch": 0.9837450403174197, "grad_norm": 0.671875, "learning_rate": 1.3856950459796292e-06, "loss": 1.215, "step": 7686 }, { "epoch": 0.9838730321259439, "grad_norm": 0.81640625, "learning_rate": 1.3639639325748743e-06, "loss": 1.7912, "step": 7687 }, { "epoch": 0.9840010239344682, "grad_norm": 0.76953125, "learning_rate": 1.342404450975554e-06, "loss": 1.465, "step": 7688 }, { "epoch": 0.9841290157429925, "grad_norm": 0.68359375, "learning_rate": 1.3210166048870375e-06, "loss": 1.5159, "step": 7689 }, { "epoch": 0.9842570075515167, "grad_norm": 0.59375, "learning_rate": 1.2998003979850516e-06, "loss": 1.5011, "step": 7690 }, { "epoch": 0.984384999360041, "grad_norm": 0.5234375, "learning_rate": 1.2787558339161232e-06, "loss": 0.748, "step": 7691 }, { "epoch": 0.9845129911685652, "grad_norm": 0.5625, "learning_rate": 1.2578829162970262e-06, "loss": 1.0139, "step": 7692 }, { "epoch": 0.9846409829770895, "grad_norm": 0.640625, "learning_rate": 1.2371816487150022e-06, "loss": 1.0715, "step": 7693 }, { "epoch": 0.9847689747856138, "grad_norm": 0.61328125, "learning_rate": 1.216652034728094e-06, "loss": 1.0503, "step": 7694 }, { "epoch": 0.984896966594138, "grad_norm": 0.77734375, "learning_rate": 1.1962940778644794e-06, "loss": 1.0798, "step": 7695 }, { "epoch": 0.9850249584026622, "grad_norm": 0.5625, "learning_rate": 1.176107781623026e-06, "loss": 1.108, "step": 7696 }, { "epoch": 0.9851529502111864, "grad_norm": 0.70703125, "learning_rate": 1.1560931494732918e-06, "loss": 1.4611, "step": 7697 }, { "epoch": 0.9852809420197107, "grad_norm": 0.58984375, "learning_rate": 1.136250184854859e-06, "loss": 1.3569, "step": 7698 }, { "epoch": 0.985408933828235, "grad_norm": 0.765625, "learning_rate": 1.1165788911781105e-06, "loss": 1.629, "step": 7699 }, { "epoch": 0.9855369256367592, "grad_norm": 0.7578125, "learning_rate": 1.0970792718240086e-06, "loss": 1.2674, "step": 7700 }, { "epoch": 0.9856649174452835, "grad_norm": 0.5546875, "learning_rate": 1.0777513301437613e-06, "loss": 0.7952, "step": 7701 }, { "epoch": 0.9857929092538078, "grad_norm": 0.703125, "learning_rate": 1.0585950694591561e-06, "loss": 1.1816, "step": 7702 }, { "epoch": 0.985920901062332, "grad_norm": 0.72265625, "learning_rate": 1.0396104930626705e-06, "loss": 1.4056, "step": 7703 }, { "epoch": 0.9860488928708563, "grad_norm": 0.8359375, "learning_rate": 1.0207976042169165e-06, "loss": 1.2592, "step": 7704 }, { "epoch": 0.9861768846793805, "grad_norm": 0.703125, "learning_rate": 1.0021564061554189e-06, "loss": 1.5596, "step": 7705 }, { "epoch": 0.9863048764879048, "grad_norm": 0.72265625, "learning_rate": 9.836869020817263e-07, "loss": 1.1633, "step": 7706 }, { "epoch": 0.9864328682964291, "grad_norm": 0.71875, "learning_rate": 9.653890951701882e-07, "loss": 1.0803, "step": 7707 }, { "epoch": 0.9865608601049533, "grad_norm": 1.0078125, "learning_rate": 9.472629885657335e-07, "loss": 1.5093, "step": 7708 }, { "epoch": 0.9866888519134775, "grad_norm": 0.6015625, "learning_rate": 9.29308585383537e-07, "loss": 0.9156, "step": 7709 }, { "epoch": 0.9868168437220018, "grad_norm": 0.70703125, "learning_rate": 9.115258887093525e-07, "loss": 1.6845, "step": 7710 }, { "epoch": 0.986944835530526, "grad_norm": 0.609375, "learning_rate": 8.939149015992909e-07, "loss": 0.9045, "step": 7711 }, { "epoch": 0.9870728273390503, "grad_norm": 0.74609375, "learning_rate": 8.764756270803753e-07, "loss": 1.1633, "step": 7712 }, { "epoch": 0.9872008191475745, "grad_norm": 0.609375, "learning_rate": 8.592080681495418e-07, "loss": 1.0892, "step": 7713 }, { "epoch": 0.9873288109560988, "grad_norm": 0.5, "learning_rate": 8.421122277746385e-07, "loss": 0.8027, "step": 7714 }, { "epoch": 0.9874568027646231, "grad_norm": 0.703125, "learning_rate": 8.251881088938706e-07, "loss": 1.3451, "step": 7715 }, { "epoch": 0.9875847945731473, "grad_norm": 0.70703125, "learning_rate": 8.084357144159116e-07, "loss": 1.3319, "step": 7716 }, { "epoch": 0.9877127863816716, "grad_norm": 0.7421875, "learning_rate": 7.918550472199026e-07, "loss": 1.203, "step": 7717 }, { "epoch": 0.9878407781901958, "grad_norm": 0.5546875, "learning_rate": 7.754461101554533e-07, "loss": 1.029, "step": 7718 }, { "epoch": 0.9879687699987201, "grad_norm": 0.53125, "learning_rate": 7.592089060427521e-07, "loss": 0.9089, "step": 7719 }, { "epoch": 0.9880967618072444, "grad_norm": 0.53515625, "learning_rate": 7.431434376725665e-07, "loss": 1.0938, "step": 7720 }, { "epoch": 0.9882247536157686, "grad_norm": 0.62109375, "learning_rate": 7.272497078057994e-07, "loss": 1.1817, "step": 7721 }, { "epoch": 0.9883527454242929, "grad_norm": 0.609375, "learning_rate": 7.115277191741543e-07, "loss": 0.9498, "step": 7722 }, { "epoch": 0.9884807372328172, "grad_norm": 0.76171875, "learning_rate": 6.95977474479692e-07, "loss": 1.6569, "step": 7723 }, { "epoch": 0.9886087290413413, "grad_norm": 0.6015625, "learning_rate": 6.805989763949416e-07, "loss": 0.6282, "step": 7724 }, { "epoch": 0.9887367208498656, "grad_norm": 0.67578125, "learning_rate": 6.653922275630109e-07, "loss": 1.7301, "step": 7725 }, { "epoch": 0.9888647126583898, "grad_norm": 0.85546875, "learning_rate": 6.503572305973648e-07, "loss": 1.6303, "step": 7726 }, { "epoch": 0.9889927044669141, "grad_norm": 0.8984375, "learning_rate": 6.354939880820476e-07, "loss": 2.0322, "step": 7727 }, { "epoch": 0.9891206962754384, "grad_norm": 0.6328125, "learning_rate": 6.208025025714603e-07, "loss": 1.3237, "step": 7728 }, { "epoch": 0.9892486880839626, "grad_norm": 0.75390625, "learning_rate": 6.062827765906942e-07, "loss": 1.4071, "step": 7729 }, { "epoch": 0.9893766798924869, "grad_norm": 0.56640625, "learning_rate": 5.919348126351976e-07, "loss": 0.8781, "step": 7730 }, { "epoch": 0.9895046717010111, "grad_norm": 0.6640625, "learning_rate": 5.777586131707757e-07, "loss": 1.3403, "step": 7731 }, { "epoch": 0.9896326635095354, "grad_norm": 0.71484375, "learning_rate": 5.637541806340352e-07, "loss": 1.5385, "step": 7732 }, { "epoch": 0.9897606553180597, "grad_norm": 0.73046875, "learning_rate": 5.499215174317174e-07, "loss": 1.7873, "step": 7733 }, { "epoch": 0.9898886471265839, "grad_norm": 0.875, "learning_rate": 5.362606259411429e-07, "loss": 1.147, "step": 7734 }, { "epoch": 0.9900166389351082, "grad_norm": 0.65234375, "learning_rate": 5.227715085103224e-07, "loss": 1.2078, "step": 7735 }, { "epoch": 0.9901446307436325, "grad_norm": 0.671875, "learning_rate": 5.094541674574016e-07, "loss": 1.6193, "step": 7736 }, { "epoch": 0.9902726225521566, "grad_norm": 0.9296875, "learning_rate": 4.963086050713273e-07, "loss": 1.3844, "step": 7737 }, { "epoch": 0.9904006143606809, "grad_norm": 0.61328125, "learning_rate": 4.833348236114032e-07, "loss": 0.877, "step": 7738 }, { "epoch": 0.9905286061692051, "grad_norm": 0.70703125, "learning_rate": 4.7053282530717946e-07, "loss": 1.6303, "step": 7739 }, { "epoch": 0.9906565979777294, "grad_norm": 0.73828125, "learning_rate": 4.579026123591179e-07, "loss": 1.504, "step": 7740 }, { "epoch": 0.9907845897862537, "grad_norm": 0.67578125, "learning_rate": 4.4544418693770464e-07, "loss": 1.5675, "step": 7741 }, { "epoch": 0.9909125815947779, "grad_norm": 0.6484375, "learning_rate": 4.331575511843378e-07, "loss": 1.6593, "step": 7742 }, { "epoch": 0.9910405734033022, "grad_norm": 0.66796875, "learning_rate": 4.210427072105505e-07, "loss": 1.5741, "step": 7743 }, { "epoch": 0.9911685652118264, "grad_norm": 0.53515625, "learning_rate": 4.090996570984551e-07, "loss": 0.8151, "step": 7744 }, { "epoch": 0.9912965570203507, "grad_norm": 0.5546875, "learning_rate": 3.973284029007429e-07, "loss": 0.9477, "step": 7745 }, { "epoch": 0.991424548828875, "grad_norm": 0.95703125, "learning_rate": 3.857289466404623e-07, "loss": 1.8974, "step": 7746 }, { "epoch": 0.9915525406373992, "grad_norm": 0.62109375, "learning_rate": 3.743012903112408e-07, "loss": 1.2364, "step": 7747 }, { "epoch": 0.9916805324459235, "grad_norm": 0.64453125, "learning_rate": 3.630454358769519e-07, "loss": 1.2693, "step": 7748 }, { "epoch": 0.9918085242544478, "grad_norm": 0.6171875, "learning_rate": 3.519613852721593e-07, "loss": 1.0317, "step": 7749 }, { "epoch": 0.991936516062972, "grad_norm": 0.609375, "learning_rate": 3.4104914040178346e-07, "loss": 1.1233, "step": 7750 }, { "epoch": 0.9920645078714962, "grad_norm": 0.8125, "learning_rate": 3.3030870314143534e-07, "loss": 1.7189, "step": 7751 }, { "epoch": 0.9921924996800204, "grad_norm": 0.6875, "learning_rate": 3.197400753369717e-07, "loss": 1.2214, "step": 7752 }, { "epoch": 0.9923204914885447, "grad_norm": 0.609375, "learning_rate": 3.0934325880460634e-07, "loss": 1.2246, "step": 7753 }, { "epoch": 0.992448483297069, "grad_norm": 0.703125, "learning_rate": 2.991182553314653e-07, "loss": 1.1971, "step": 7754 }, { "epoch": 0.9925764751055932, "grad_norm": 0.5546875, "learning_rate": 2.8906506667469856e-07, "loss": 1.147, "step": 7755 }, { "epoch": 0.9927044669141175, "grad_norm": 0.6640625, "learning_rate": 2.791836945621462e-07, "loss": 1.5582, "step": 7756 }, { "epoch": 0.9928324587226417, "grad_norm": 0.625, "learning_rate": 2.6947414069222743e-07, "loss": 1.1727, "step": 7757 }, { "epoch": 0.992960450531166, "grad_norm": 0.67578125, "learning_rate": 2.5993640673338534e-07, "loss": 1.5483, "step": 7758 }, { "epoch": 0.9930884423396903, "grad_norm": 0.6796875, "learning_rate": 2.505704943251974e-07, "loss": 1.3965, "step": 7759 }, { "epoch": 0.9932164341482145, "grad_norm": 0.5546875, "learning_rate": 2.413764050770428e-07, "loss": 1.0938, "step": 7760 }, { "epoch": 0.9933444259567388, "grad_norm": 0.61328125, "learning_rate": 2.3235414056921312e-07, "loss": 1.2474, "step": 7761 }, { "epoch": 0.9934724177652631, "grad_norm": 0.55078125, "learning_rate": 2.2350370235235672e-07, "loss": 0.69, "step": 7762 }, { "epoch": 0.9936004095737873, "grad_norm": 0.69921875, "learning_rate": 2.1482509194747923e-07, "loss": 1.1803, "step": 7763 }, { "epoch": 0.9937284013823116, "grad_norm": 0.671875, "learning_rate": 2.0631831084616526e-07, "loss": 1.0686, "step": 7764 }, { "epoch": 0.9938563931908357, "grad_norm": 0.71875, "learning_rate": 1.979833605104675e-07, "loss": 1.6951, "step": 7765 }, { "epoch": 0.99398438499936, "grad_norm": 0.6875, "learning_rate": 1.8982024237290672e-07, "loss": 1.3374, "step": 7766 }, { "epoch": 0.9941123768078843, "grad_norm": 0.6796875, "learning_rate": 1.8182895783636076e-07, "loss": 1.4796, "step": 7767 }, { "epoch": 0.9942403686164085, "grad_norm": 0.71875, "learning_rate": 1.7400950827439754e-07, "loss": 1.5825, "step": 7768 }, { "epoch": 0.9943683604249328, "grad_norm": 0.6328125, "learning_rate": 1.66361895030831e-07, "loss": 1.0009, "step": 7769 }, { "epoch": 0.994496352233457, "grad_norm": 0.58203125, "learning_rate": 1.5888611941994313e-07, "loss": 0.8854, "step": 7770 }, { "epoch": 0.9946243440419813, "grad_norm": 0.46484375, "learning_rate": 1.515821827267061e-07, "loss": 0.5965, "step": 7771 }, { "epoch": 0.9947523358505056, "grad_norm": 0.72265625, "learning_rate": 1.4445008620644905e-07, "loss": 1.3185, "step": 7772 }, { "epoch": 0.9948803276590298, "grad_norm": 0.640625, "learning_rate": 1.3748983108474723e-07, "loss": 1.0678, "step": 7773 }, { "epoch": 0.9950083194675541, "grad_norm": 0.59375, "learning_rate": 1.307014185579769e-07, "loss": 1.1014, "step": 7774 }, { "epoch": 0.9951363112760784, "grad_norm": 0.6015625, "learning_rate": 1.2408484979276047e-07, "loss": 1.1888, "step": 7775 }, { "epoch": 0.9952643030846026, "grad_norm": 0.6171875, "learning_rate": 1.1764012592641038e-07, "loss": 1.3292, "step": 7776 }, { "epoch": 0.9953922948931269, "grad_norm": 0.74609375, "learning_rate": 1.113672480663741e-07, "loss": 1.3634, "step": 7777 }, { "epoch": 0.995520286701651, "grad_norm": 0.64453125, "learning_rate": 1.0526621729090025e-07, "loss": 1.1016, "step": 7778 }, { "epoch": 0.9956482785101753, "grad_norm": 0.65625, "learning_rate": 9.933703464837241e-08, "loss": 1.0018, "step": 7779 }, { "epoch": 0.9957762703186996, "grad_norm": 0.6484375, "learning_rate": 9.357970115808633e-08, "loss": 1.1054, "step": 7780 }, { "epoch": 0.9959042621272238, "grad_norm": 0.6328125, "learning_rate": 8.799421780925077e-08, "loss": 1.1555, "step": 7781 }, { "epoch": 0.9960322539357481, "grad_norm": 0.65625, "learning_rate": 8.258058556187553e-08, "loss": 1.4344, "step": 7782 }, { "epoch": 0.9961602457442724, "grad_norm": 0.70703125, "learning_rate": 7.733880534654958e-08, "loss": 1.5572, "step": 7783 }, { "epoch": 0.9962882375527966, "grad_norm": 0.71484375, "learning_rate": 7.226887806399685e-08, "loss": 1.4662, "step": 7784 }, { "epoch": 0.9964162293613209, "grad_norm": 0.71484375, "learning_rate": 6.737080458574241e-08, "loss": 1.5294, "step": 7785 }, { "epoch": 0.9965442211698451, "grad_norm": 0.4921875, "learning_rate": 6.264458575333532e-08, "loss": 0.6154, "step": 7786 }, { "epoch": 0.9966722129783694, "grad_norm": 0.76171875, "learning_rate": 5.809022237923678e-08, "loss": 1.5279, "step": 7787 }, { "epoch": 0.9968002047868937, "grad_norm": 0.84765625, "learning_rate": 5.370771524615403e-08, "loss": 0.9083, "step": 7788 }, { "epoch": 0.9969281965954179, "grad_norm": 0.59375, "learning_rate": 4.94970651073734e-08, "loss": 0.8643, "step": 7789 }, { "epoch": 0.9970561884039422, "grad_norm": 0.53515625, "learning_rate": 4.545827268642722e-08, "loss": 1.049, "step": 7790 }, { "epoch": 0.9971841802124664, "grad_norm": 0.62109375, "learning_rate": 4.159133867742693e-08, "loss": 1.2817, "step": 7791 }, { "epoch": 0.9973121720209907, "grad_norm": 0.67578125, "learning_rate": 3.7896263745063056e-08, "loss": 1.9225, "step": 7792 }, { "epoch": 0.997440163829515, "grad_norm": 0.60546875, "learning_rate": 3.4373048524383164e-08, "loss": 1.3978, "step": 7793 }, { "epoch": 0.9975681556380391, "grad_norm": 0.6015625, "learning_rate": 3.10216936209029e-08, "loss": 0.9134, "step": 7794 }, { "epoch": 0.9976961474465634, "grad_norm": 0.89453125, "learning_rate": 2.7842199610605967e-08, "loss": 1.1647, "step": 7795 }, { "epoch": 0.9978241392550877, "grad_norm": 0.68359375, "learning_rate": 2.4834567039944133e-08, "loss": 1.2581, "step": 7796 }, { "epoch": 0.9979521310636119, "grad_norm": 0.5546875, "learning_rate": 2.1998796425837243e-08, "loss": 1.0091, "step": 7797 }, { "epoch": 0.9980801228721362, "grad_norm": 0.451171875, "learning_rate": 1.9334888255562177e-08, "loss": 0.5595, "step": 7798 }, { "epoch": 0.9982081146806604, "grad_norm": 0.828125, "learning_rate": 1.6842842987085937e-08, "loss": 1.862, "step": 7799 }, { "epoch": 0.9983361064891847, "grad_norm": 0.609375, "learning_rate": 1.4522661048621544e-08, "loss": 1.0089, "step": 7800 }, { "epoch": 0.998464098297709, "grad_norm": 0.80859375, "learning_rate": 1.2374342839072128e-08, "loss": 1.3608, "step": 7801 }, { "epoch": 0.9985920901062332, "grad_norm": 0.61328125, "learning_rate": 1.0397888727364801e-08, "loss": 0.9756, "step": 7802 }, { "epoch": 0.9987200819147575, "grad_norm": 0.61328125, "learning_rate": 8.593299053449854e-09, "loss": 1.2206, "step": 7803 }, { "epoch": 0.9988480737232817, "grad_norm": 0.51953125, "learning_rate": 6.960574127412578e-09, "loss": 0.9246, "step": 7804 }, { "epoch": 0.998976065531806, "grad_norm": 0.69921875, "learning_rate": 5.499714229917352e-09, "loss": 1.182, "step": 7805 }, { "epoch": 0.9991040573403303, "grad_norm": 0.6171875, "learning_rate": 4.210719611874581e-09, "loss": 1.1942, "step": 7806 }, { "epoch": 0.9992320491488544, "grad_norm": 0.4609375, "learning_rate": 3.0935904948847792e-09, "loss": 0.8916, "step": 7807 }, { "epoch": 0.9993600409573787, "grad_norm": 0.51171875, "learning_rate": 2.1483270710165315e-09, "loss": 0.7018, "step": 7808 }, { "epoch": 0.999488032765903, "grad_norm": 0.765625, "learning_rate": 1.3749295026954656e-09, "loss": 1.4432, "step": 7809 }, { "epoch": 0.9996160245744272, "grad_norm": 0.63671875, "learning_rate": 7.733979228152777e-10, "loss": 1.2541, "step": 7810 }, { "epoch": 0.9997440163829515, "grad_norm": 0.6796875, "learning_rate": 3.437324347377313e-10, "loss": 1.3286, "step": 7811 }, { "epoch": 0.9998720081914757, "grad_norm": 0.6171875, "learning_rate": 8.593311240367995e-11, "loss": 0.7655, "step": 7812 }, { "epoch": 1.0, "grad_norm": 0.59765625, "learning_rate": 0.0, "loss": 0.9618, "step": 7813 } ], "logging_steps": 1, "max_steps": 7813, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 15000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7044895314634342e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }