{ "best_metric": null, "best_model_checkpoint": null, "epoch": 25.6, "eval_steps": 500, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00128, "grad_norm": 0.6006259322166443, "learning_rate": 5e-06, "loss": 1.1793, "step": 1 }, { "epoch": 0.00256, "grad_norm": 0.6071895956993103, "learning_rate": 1e-05, "loss": 1.2464, "step": 2 }, { "epoch": 0.00384, "grad_norm": 0.584945797920227, "learning_rate": 1.5e-05, "loss": 1.2501, "step": 3 }, { "epoch": 0.00512, "grad_norm": 0.6185263395309448, "learning_rate": 2e-05, "loss": 1.285, "step": 4 }, { "epoch": 0.0064, "grad_norm": 0.6382588744163513, "learning_rate": 2.5e-05, "loss": 1.2345, "step": 5 }, { "epoch": 0.00768, "grad_norm": 0.6335554122924805, "learning_rate": 3e-05, "loss": 1.2112, "step": 6 }, { "epoch": 0.00896, "grad_norm": 0.619415283203125, "learning_rate": 3.5e-05, "loss": 1.2691, "step": 7 }, { "epoch": 0.01024, "grad_norm": 0.6417765617370605, "learning_rate": 4e-05, "loss": 1.2458, "step": 8 }, { "epoch": 0.01152, "grad_norm": 0.5834643244743347, "learning_rate": 4.5e-05, "loss": 1.119, "step": 9 }, { "epoch": 0.0128, "grad_norm": 0.5374929308891296, "learning_rate": 5e-05, "loss": 1.0867, "step": 10 }, { "epoch": 0.01408, "grad_norm": 0.5630415678024292, "learning_rate": 4.9997999199679874e-05, "loss": 1.1491, "step": 11 }, { "epoch": 0.01536, "grad_norm": 0.5184459090232849, "learning_rate": 4.9995998399359746e-05, "loss": 1.1131, "step": 12 }, { "epoch": 0.01664, "grad_norm": 0.5558237433433533, "learning_rate": 4.999399759903962e-05, "loss": 1.1601, "step": 13 }, { "epoch": 0.01792, "grad_norm": 0.5303581953048706, "learning_rate": 4.999199679871949e-05, "loss": 1.0869, "step": 14 }, { "epoch": 0.0192, "grad_norm": 0.570815920829773, "learning_rate": 4.998999599839936e-05, "loss": 1.074, "step": 15 }, { "epoch": 0.02048, "grad_norm": 0.5760554075241089, "learning_rate": 4.9987995198079234e-05, "loss": 1.1381, "step": 16 }, { "epoch": 0.02176, "grad_norm": 0.565275251865387, "learning_rate": 4.9985994397759105e-05, "loss": 1.1634, "step": 17 }, { "epoch": 0.02304, "grad_norm": 0.5491355061531067, "learning_rate": 4.998399359743898e-05, "loss": 1.0564, "step": 18 }, { "epoch": 0.02432, "grad_norm": 0.5034635663032532, "learning_rate": 4.998199279711885e-05, "loss": 0.9781, "step": 19 }, { "epoch": 0.0256, "grad_norm": 0.5160910487174988, "learning_rate": 4.997999199679872e-05, "loss": 1.0313, "step": 20 }, { "epoch": 0.02688, "grad_norm": 0.5402336716651917, "learning_rate": 4.99779911964786e-05, "loss": 1.1054, "step": 21 }, { "epoch": 0.02816, "grad_norm": 0.4777592420578003, "learning_rate": 4.9975990396158465e-05, "loss": 0.9927, "step": 22 }, { "epoch": 0.02944, "grad_norm": 0.4950423240661621, "learning_rate": 4.9973989595838337e-05, "loss": 1.0234, "step": 23 }, { "epoch": 0.03072, "grad_norm": 0.4756661057472229, "learning_rate": 4.997198879551821e-05, "loss": 0.9862, "step": 24 }, { "epoch": 0.032, "grad_norm": 0.47625282406806946, "learning_rate": 4.996998799519808e-05, "loss": 0.9818, "step": 25 }, { "epoch": 0.03328, "grad_norm": 0.4371047914028168, "learning_rate": 4.996798719487795e-05, "loss": 0.9961, "step": 26 }, { "epoch": 0.03456, "grad_norm": 0.4368005692958832, "learning_rate": 4.9965986394557824e-05, "loss": 0.9859, "step": 27 }, { "epoch": 0.03584, "grad_norm": 0.4649926424026489, "learning_rate": 4.99639855942377e-05, "loss": 1.0016, "step": 28 }, { "epoch": 0.03712, "grad_norm": 0.45246022939682007, "learning_rate": 4.9961984793917574e-05, "loss": 1.016, "step": 29 }, { "epoch": 0.0384, "grad_norm": 0.41020557284355164, "learning_rate": 4.995998399359744e-05, "loss": 0.9839, "step": 30 }, { "epoch": 0.03968, "grad_norm": 0.39636656641960144, "learning_rate": 4.995798319327731e-05, "loss": 0.9041, "step": 31 }, { "epoch": 0.04096, "grad_norm": 0.4260682165622711, "learning_rate": 4.995598239295718e-05, "loss": 1.0016, "step": 32 }, { "epoch": 0.04224, "grad_norm": 0.4026011526584625, "learning_rate": 4.9953981592637055e-05, "loss": 0.8832, "step": 33 }, { "epoch": 0.04352, "grad_norm": 0.3997635841369629, "learning_rate": 4.995198079231693e-05, "loss": 0.968, "step": 34 }, { "epoch": 0.0448, "grad_norm": 0.37634411454200745, "learning_rate": 4.9949979991996806e-05, "loss": 0.9207, "step": 35 }, { "epoch": 0.04608, "grad_norm": 0.3642372786998749, "learning_rate": 4.994797919167668e-05, "loss": 0.8979, "step": 36 }, { "epoch": 0.04736, "grad_norm": 0.3647741675376892, "learning_rate": 4.994597839135655e-05, "loss": 0.8897, "step": 37 }, { "epoch": 0.04864, "grad_norm": 0.3974230885505676, "learning_rate": 4.9943977591036414e-05, "loss": 0.9336, "step": 38 }, { "epoch": 0.04992, "grad_norm": 0.3915899693965912, "learning_rate": 4.9941976790716286e-05, "loss": 0.9408, "step": 39 }, { "epoch": 0.0512, "grad_norm": 0.40685927867889404, "learning_rate": 4.993997599039616e-05, "loss": 0.9166, "step": 40 }, { "epoch": 0.05248, "grad_norm": 0.35324081778526306, "learning_rate": 4.993797519007603e-05, "loss": 0.8683, "step": 41 }, { "epoch": 0.05376, "grad_norm": 0.36212658882141113, "learning_rate": 4.993597438975591e-05, "loss": 0.8828, "step": 42 }, { "epoch": 0.05504, "grad_norm": 0.38610896468162537, "learning_rate": 4.993397358943578e-05, "loss": 0.9092, "step": 43 }, { "epoch": 0.05632, "grad_norm": 0.36929619312286377, "learning_rate": 4.993197278911565e-05, "loss": 0.9132, "step": 44 }, { "epoch": 0.0576, "grad_norm": 0.3837580680847168, "learning_rate": 4.9929971988795524e-05, "loss": 0.9606, "step": 45 }, { "epoch": 0.05888, "grad_norm": 0.36300909519195557, "learning_rate": 4.992797118847539e-05, "loss": 0.8827, "step": 46 }, { "epoch": 0.06016, "grad_norm": 0.38039928674697876, "learning_rate": 4.992597038815526e-05, "loss": 0.8996, "step": 47 }, { "epoch": 0.06144, "grad_norm": 0.3811199367046356, "learning_rate": 4.992396958783513e-05, "loss": 0.9114, "step": 48 }, { "epoch": 0.06272, "grad_norm": 0.36827829480171204, "learning_rate": 4.992196878751501e-05, "loss": 0.8199, "step": 49 }, { "epoch": 0.064, "grad_norm": 0.35475796461105347, "learning_rate": 4.9919967987194883e-05, "loss": 0.8741, "step": 50 }, { "epoch": 0.06528, "grad_norm": 0.3906758725643158, "learning_rate": 4.9917967186874755e-05, "loss": 0.9037, "step": 51 }, { "epoch": 0.06656, "grad_norm": 0.3880329430103302, "learning_rate": 4.991596638655463e-05, "loss": 0.9474, "step": 52 }, { "epoch": 0.06784, "grad_norm": 0.38061219453811646, "learning_rate": 4.99139655862345e-05, "loss": 0.894, "step": 53 }, { "epoch": 0.06912, "grad_norm": 0.39929622411727905, "learning_rate": 4.9911964785914364e-05, "loss": 0.9322, "step": 54 }, { "epoch": 0.0704, "grad_norm": 0.38368162512779236, "learning_rate": 4.9909963985594236e-05, "loss": 0.9286, "step": 55 }, { "epoch": 0.07168, "grad_norm": 0.3783194124698639, "learning_rate": 4.9907963185274115e-05, "loss": 0.9337, "step": 56 }, { "epoch": 0.07296, "grad_norm": 0.3646315932273865, "learning_rate": 4.9905962384953986e-05, "loss": 0.8836, "step": 57 }, { "epoch": 0.07424, "grad_norm": 0.3632262349128723, "learning_rate": 4.990396158463386e-05, "loss": 0.8382, "step": 58 }, { "epoch": 0.07552, "grad_norm": 0.41220006346702576, "learning_rate": 4.990196078431373e-05, "loss": 0.9255, "step": 59 }, { "epoch": 0.0768, "grad_norm": 0.39080944657325745, "learning_rate": 4.98999599839936e-05, "loss": 0.8664, "step": 60 }, { "epoch": 0.07808, "grad_norm": 0.37954285740852356, "learning_rate": 4.9897959183673474e-05, "loss": 0.8716, "step": 61 }, { "epoch": 0.07936, "grad_norm": 0.40623852610588074, "learning_rate": 4.989595838335334e-05, "loss": 0.9211, "step": 62 }, { "epoch": 0.08064, "grad_norm": 0.3674965500831604, "learning_rate": 4.989395758303322e-05, "loss": 0.8427, "step": 63 }, { "epoch": 0.08192, "grad_norm": 0.3909313380718231, "learning_rate": 4.989195678271309e-05, "loss": 0.8804, "step": 64 }, { "epoch": 0.0832, "grad_norm": 0.397886723279953, "learning_rate": 4.988995598239296e-05, "loss": 0.8772, "step": 65 }, { "epoch": 0.08448, "grad_norm": 0.38570407032966614, "learning_rate": 4.988795518207283e-05, "loss": 0.8593, "step": 66 }, { "epoch": 0.08576, "grad_norm": 0.4147164225578308, "learning_rate": 4.9885954381752705e-05, "loss": 0.9182, "step": 67 }, { "epoch": 0.08704, "grad_norm": 0.3853755593299866, "learning_rate": 4.988395358143258e-05, "loss": 0.8583, "step": 68 }, { "epoch": 0.08832, "grad_norm": 0.3911557197570801, "learning_rate": 4.988195278111245e-05, "loss": 0.8066, "step": 69 }, { "epoch": 0.0896, "grad_norm": 0.3869986832141876, "learning_rate": 4.9879951980792314e-05, "loss": 0.8863, "step": 70 }, { "epoch": 0.09088, "grad_norm": 0.425576388835907, "learning_rate": 4.987795118047219e-05, "loss": 0.9285, "step": 71 }, { "epoch": 0.09216, "grad_norm": 0.4300587773323059, "learning_rate": 4.9875950380152064e-05, "loss": 0.8604, "step": 72 }, { "epoch": 0.09344, "grad_norm": 0.4201204478740692, "learning_rate": 4.9873949579831936e-05, "loss": 0.8818, "step": 73 }, { "epoch": 0.09472, "grad_norm": 0.41632699966430664, "learning_rate": 4.987194877951181e-05, "loss": 0.8951, "step": 74 }, { "epoch": 0.096, "grad_norm": 0.398713082075119, "learning_rate": 4.986994797919168e-05, "loss": 0.8571, "step": 75 }, { "epoch": 0.09728, "grad_norm": 0.40266332030296326, "learning_rate": 4.986794717887155e-05, "loss": 0.8546, "step": 76 }, { "epoch": 0.09856, "grad_norm": 0.41864725947380066, "learning_rate": 4.9865946378551424e-05, "loss": 0.8228, "step": 77 }, { "epoch": 0.09984, "grad_norm": 0.3853597044944763, "learning_rate": 4.9863945578231295e-05, "loss": 0.8282, "step": 78 }, { "epoch": 0.10112, "grad_norm": 0.44597557187080383, "learning_rate": 4.986194477791117e-05, "loss": 0.9163, "step": 79 }, { "epoch": 0.1024, "grad_norm": 0.4048195481300354, "learning_rate": 4.985994397759104e-05, "loss": 0.8456, "step": 80 }, { "epoch": 0.10368, "grad_norm": 0.4152766466140747, "learning_rate": 4.985794317727091e-05, "loss": 0.8562, "step": 81 }, { "epoch": 0.10496, "grad_norm": 0.4202408492565155, "learning_rate": 4.985594237695078e-05, "loss": 0.8878, "step": 82 }, { "epoch": 0.10624, "grad_norm": 0.4306046962738037, "learning_rate": 4.9853941576630655e-05, "loss": 0.8816, "step": 83 }, { "epoch": 0.10752, "grad_norm": 0.4268186688423157, "learning_rate": 4.9851940776310527e-05, "loss": 0.7838, "step": 84 }, { "epoch": 0.1088, "grad_norm": 0.4262542128562927, "learning_rate": 4.98499399759904e-05, "loss": 0.812, "step": 85 }, { "epoch": 0.11008, "grad_norm": 0.44477730989456177, "learning_rate": 4.984793917567027e-05, "loss": 0.8684, "step": 86 }, { "epoch": 0.11136, "grad_norm": 0.4398692548274994, "learning_rate": 4.984593837535014e-05, "loss": 0.8871, "step": 87 }, { "epoch": 0.11264, "grad_norm": 0.4244236350059509, "learning_rate": 4.9843937575030014e-05, "loss": 0.8801, "step": 88 }, { "epoch": 0.11392, "grad_norm": 0.47705894708633423, "learning_rate": 4.9841936774709886e-05, "loss": 0.8609, "step": 89 }, { "epoch": 0.1152, "grad_norm": 0.44810983538627625, "learning_rate": 4.983993597438976e-05, "loss": 0.8376, "step": 90 }, { "epoch": 0.11648, "grad_norm": 0.41785943508148193, "learning_rate": 4.983793517406963e-05, "loss": 0.8113, "step": 91 }, { "epoch": 0.11776, "grad_norm": 0.44845548272132874, "learning_rate": 4.98359343737495e-05, "loss": 0.8438, "step": 92 }, { "epoch": 0.11904, "grad_norm": 0.4400695562362671, "learning_rate": 4.983393357342937e-05, "loss": 0.8242, "step": 93 }, { "epoch": 0.12032, "grad_norm": 0.4483565092086792, "learning_rate": 4.9831932773109245e-05, "loss": 0.8877, "step": 94 }, { "epoch": 0.1216, "grad_norm": 0.46313560009002686, "learning_rate": 4.982993197278912e-05, "loss": 0.8162, "step": 95 }, { "epoch": 0.12288, "grad_norm": 0.4211414158344269, "learning_rate": 4.982793117246899e-05, "loss": 0.807, "step": 96 }, { "epoch": 0.12416, "grad_norm": 0.44126561284065247, "learning_rate": 4.982593037214886e-05, "loss": 0.8403, "step": 97 }, { "epoch": 0.12544, "grad_norm": 0.4854923486709595, "learning_rate": 4.982392957182873e-05, "loss": 0.8869, "step": 98 }, { "epoch": 0.12672, "grad_norm": 0.5096859335899353, "learning_rate": 4.982192877150861e-05, "loss": 0.9123, "step": 99 }, { "epoch": 0.128, "grad_norm": 0.4502103328704834, "learning_rate": 4.9819927971188476e-05, "loss": 0.834, "step": 100 }, { "epoch": 0.12928, "grad_norm": 0.4698023796081543, "learning_rate": 4.981792717086835e-05, "loss": 0.8685, "step": 101 }, { "epoch": 0.13056, "grad_norm": 0.464958131313324, "learning_rate": 4.981592637054822e-05, "loss": 0.8724, "step": 102 }, { "epoch": 0.13184, "grad_norm": 0.44018036127090454, "learning_rate": 4.981392557022809e-05, "loss": 0.8166, "step": 103 }, { "epoch": 0.13312, "grad_norm": 0.4710625112056732, "learning_rate": 4.9811924769907964e-05, "loss": 0.9041, "step": 104 }, { "epoch": 0.1344, "grad_norm": 0.45761746168136597, "learning_rate": 4.9809923969587836e-05, "loss": 0.8414, "step": 105 }, { "epoch": 0.13568, "grad_norm": 0.43286818265914917, "learning_rate": 4.9807923169267714e-05, "loss": 0.827, "step": 106 }, { "epoch": 0.13696, "grad_norm": 0.4683651030063629, "learning_rate": 4.9805922368947586e-05, "loss": 0.8831, "step": 107 }, { "epoch": 0.13824, "grad_norm": 0.4688320457935333, "learning_rate": 4.980392156862745e-05, "loss": 0.8693, "step": 108 }, { "epoch": 0.13952, "grad_norm": 0.4783925712108612, "learning_rate": 4.980192076830732e-05, "loss": 0.8086, "step": 109 }, { "epoch": 0.1408, "grad_norm": 0.463559091091156, "learning_rate": 4.9799919967987195e-05, "loss": 0.8274, "step": 110 }, { "epoch": 0.14208, "grad_norm": 0.5109454989433289, "learning_rate": 4.979791916766707e-05, "loss": 0.9021, "step": 111 }, { "epoch": 0.14336, "grad_norm": 0.44130197167396545, "learning_rate": 4.979591836734694e-05, "loss": 0.8358, "step": 112 }, { "epoch": 0.14464, "grad_norm": 0.45340853929519653, "learning_rate": 4.979391756702682e-05, "loss": 0.8054, "step": 113 }, { "epoch": 0.14592, "grad_norm": 0.496158629655838, "learning_rate": 4.979191676670669e-05, "loss": 0.8812, "step": 114 }, { "epoch": 0.1472, "grad_norm": 0.4708779454231262, "learning_rate": 4.978991596638656e-05, "loss": 0.7753, "step": 115 }, { "epoch": 0.14848, "grad_norm": 0.5166846513748169, "learning_rate": 4.9787915166066426e-05, "loss": 0.9508, "step": 116 }, { "epoch": 0.14976, "grad_norm": 0.4957675635814667, "learning_rate": 4.97859143657463e-05, "loss": 0.8593, "step": 117 }, { "epoch": 0.15104, "grad_norm": 0.4897170066833496, "learning_rate": 4.978391356542617e-05, "loss": 0.8419, "step": 118 }, { "epoch": 0.15232, "grad_norm": 0.4425666630268097, "learning_rate": 4.978191276510604e-05, "loss": 0.8184, "step": 119 }, { "epoch": 0.1536, "grad_norm": 0.4843791723251343, "learning_rate": 4.977991196478592e-05, "loss": 0.8335, "step": 120 }, { "epoch": 0.15488, "grad_norm": 0.4781690239906311, "learning_rate": 4.977791116446579e-05, "loss": 0.8835, "step": 121 }, { "epoch": 0.15616, "grad_norm": 0.5279680490493774, "learning_rate": 4.9775910364145664e-05, "loss": 0.8627, "step": 122 }, { "epoch": 0.15744, "grad_norm": 0.525348961353302, "learning_rate": 4.9773909563825536e-05, "loss": 0.9173, "step": 123 }, { "epoch": 0.15872, "grad_norm": 0.4671289324760437, "learning_rate": 4.97719087635054e-05, "loss": 0.8189, "step": 124 }, { "epoch": 0.16, "grad_norm": 0.48094815015792847, "learning_rate": 4.976990796318527e-05, "loss": 0.8514, "step": 125 }, { "epoch": 0.16128, "grad_norm": 0.49200230836868286, "learning_rate": 4.9767907162865145e-05, "loss": 0.8669, "step": 126 }, { "epoch": 0.16256, "grad_norm": 0.45223578810691833, "learning_rate": 4.976590636254502e-05, "loss": 0.811, "step": 127 }, { "epoch": 0.16384, "grad_norm": 0.48548707365989685, "learning_rate": 4.9763905562224895e-05, "loss": 0.8733, "step": 128 }, { "epoch": 0.16512, "grad_norm": 0.4744742214679718, "learning_rate": 4.976190476190477e-05, "loss": 0.82, "step": 129 }, { "epoch": 0.1664, "grad_norm": 0.47322678565979004, "learning_rate": 4.975990396158464e-05, "loss": 0.8076, "step": 130 }, { "epoch": 0.16768, "grad_norm": 0.5006943345069885, "learning_rate": 4.975790316126451e-05, "loss": 0.9173, "step": 131 }, { "epoch": 0.16896, "grad_norm": 0.5181525945663452, "learning_rate": 4.9755902360944376e-05, "loss": 0.8946, "step": 132 }, { "epoch": 0.17024, "grad_norm": 0.4706552028656006, "learning_rate": 4.975390156062425e-05, "loss": 0.8297, "step": 133 }, { "epoch": 0.17152, "grad_norm": 0.5205573439598083, "learning_rate": 4.9751900760304126e-05, "loss": 0.9313, "step": 134 }, { "epoch": 0.1728, "grad_norm": 0.5010756850242615, "learning_rate": 4.9749899959984e-05, "loss": 0.7662, "step": 135 }, { "epoch": 0.17408, "grad_norm": 0.5203558206558228, "learning_rate": 4.974789915966387e-05, "loss": 0.8708, "step": 136 }, { "epoch": 0.17536, "grad_norm": 0.5031847357749939, "learning_rate": 4.974589835934374e-05, "loss": 0.8258, "step": 137 }, { "epoch": 0.17664, "grad_norm": 0.5000547170639038, "learning_rate": 4.9743897559023614e-05, "loss": 0.804, "step": 138 }, { "epoch": 0.17792, "grad_norm": 0.5364178419113159, "learning_rate": 4.9741896758703485e-05, "loss": 0.8398, "step": 139 }, { "epoch": 0.1792, "grad_norm": 0.4716164171695709, "learning_rate": 4.973989595838335e-05, "loss": 0.8043, "step": 140 }, { "epoch": 0.18048, "grad_norm": 0.48213207721710205, "learning_rate": 4.973789515806323e-05, "loss": 0.803, "step": 141 }, { "epoch": 0.18176, "grad_norm": 0.49524998664855957, "learning_rate": 4.97358943577431e-05, "loss": 0.8401, "step": 142 }, { "epoch": 0.18304, "grad_norm": 0.5219356417655945, "learning_rate": 4.973389355742297e-05, "loss": 0.8474, "step": 143 }, { "epoch": 0.18432, "grad_norm": 0.5039879679679871, "learning_rate": 4.9731892757102845e-05, "loss": 0.8219, "step": 144 }, { "epoch": 0.1856, "grad_norm": 0.48044779896736145, "learning_rate": 4.9729891956782717e-05, "loss": 0.8212, "step": 145 }, { "epoch": 0.18688, "grad_norm": 0.5111274123191833, "learning_rate": 4.972789115646259e-05, "loss": 0.8266, "step": 146 }, { "epoch": 0.18816, "grad_norm": 0.5483076572418213, "learning_rate": 4.972589035614246e-05, "loss": 0.8127, "step": 147 }, { "epoch": 0.18944, "grad_norm": 0.4989505708217621, "learning_rate": 4.972388955582233e-05, "loss": 0.8285, "step": 148 }, { "epoch": 0.19072, "grad_norm": 0.5818595886230469, "learning_rate": 4.9721888755502204e-05, "loss": 0.8504, "step": 149 }, { "epoch": 0.192, "grad_norm": 0.530348539352417, "learning_rate": 4.9719887955182076e-05, "loss": 0.8316, "step": 150 }, { "epoch": 0.19328, "grad_norm": 0.50677090883255, "learning_rate": 4.971788715486195e-05, "loss": 0.8048, "step": 151 }, { "epoch": 0.19456, "grad_norm": 0.5426594614982605, "learning_rate": 4.971588635454182e-05, "loss": 0.8929, "step": 152 }, { "epoch": 0.19584, "grad_norm": 0.5290346145629883, "learning_rate": 4.971388555422169e-05, "loss": 0.8454, "step": 153 }, { "epoch": 0.19712, "grad_norm": 0.49939247965812683, "learning_rate": 4.971188475390156e-05, "loss": 0.8309, "step": 154 }, { "epoch": 0.1984, "grad_norm": 0.527169406414032, "learning_rate": 4.9709883953581435e-05, "loss": 0.8509, "step": 155 }, { "epoch": 0.19968, "grad_norm": 0.525046169757843, "learning_rate": 4.970788315326131e-05, "loss": 0.8518, "step": 156 }, { "epoch": 0.20096, "grad_norm": 0.5176852345466614, "learning_rate": 4.970588235294118e-05, "loss": 0.8062, "step": 157 }, { "epoch": 0.20224, "grad_norm": 0.5134345889091492, "learning_rate": 4.970388155262105e-05, "loss": 0.8723, "step": 158 }, { "epoch": 0.20352, "grad_norm": 0.5024152994155884, "learning_rate": 4.970188075230092e-05, "loss": 0.8053, "step": 159 }, { "epoch": 0.2048, "grad_norm": 0.5135634541511536, "learning_rate": 4.9699879951980794e-05, "loss": 0.7935, "step": 160 }, { "epoch": 0.20608, "grad_norm": 0.5161725878715515, "learning_rate": 4.9697879151660666e-05, "loss": 0.808, "step": 161 }, { "epoch": 0.20736, "grad_norm": 0.5259711146354675, "learning_rate": 4.969587835134054e-05, "loss": 0.8209, "step": 162 }, { "epoch": 0.20864, "grad_norm": 0.4903233051300049, "learning_rate": 4.969387755102041e-05, "loss": 0.793, "step": 163 }, { "epoch": 0.20992, "grad_norm": 0.5051981806755066, "learning_rate": 4.969187675070028e-05, "loss": 0.7971, "step": 164 }, { "epoch": 0.2112, "grad_norm": 0.49914005398750305, "learning_rate": 4.9689875950380154e-05, "loss": 0.8292, "step": 165 }, { "epoch": 0.21248, "grad_norm": 0.5912098288536072, "learning_rate": 4.9687875150060026e-05, "loss": 0.8573, "step": 166 }, { "epoch": 0.21376, "grad_norm": 0.5313730239868164, "learning_rate": 4.96858743497399e-05, "loss": 0.8106, "step": 167 }, { "epoch": 0.21504, "grad_norm": 0.5088040232658386, "learning_rate": 4.968387354941977e-05, "loss": 0.8159, "step": 168 }, { "epoch": 0.21632, "grad_norm": 0.5306310057640076, "learning_rate": 4.968187274909965e-05, "loss": 0.7539, "step": 169 }, { "epoch": 0.2176, "grad_norm": 0.4969288110733032, "learning_rate": 4.967987194877951e-05, "loss": 0.7364, "step": 170 }, { "epoch": 0.21888, "grad_norm": 0.5466715097427368, "learning_rate": 4.9677871148459385e-05, "loss": 0.7734, "step": 171 }, { "epoch": 0.22016, "grad_norm": 0.5333448052406311, "learning_rate": 4.967587034813926e-05, "loss": 0.7622, "step": 172 }, { "epoch": 0.22144, "grad_norm": 0.49693411588668823, "learning_rate": 4.967386954781913e-05, "loss": 0.7967, "step": 173 }, { "epoch": 0.22272, "grad_norm": 0.5356098413467407, "learning_rate": 4.9671868747499e-05, "loss": 0.8457, "step": 174 }, { "epoch": 0.224, "grad_norm": 0.5146982073783875, "learning_rate": 4.966986794717887e-05, "loss": 0.8258, "step": 175 }, { "epoch": 0.22528, "grad_norm": 0.5248112082481384, "learning_rate": 4.966786714685875e-05, "loss": 0.7929, "step": 176 }, { "epoch": 0.22656, "grad_norm": 0.5330566763877869, "learning_rate": 4.966586634653862e-05, "loss": 0.7916, "step": 177 }, { "epoch": 0.22784, "grad_norm": 0.5528807044029236, "learning_rate": 4.966386554621849e-05, "loss": 0.8199, "step": 178 }, { "epoch": 0.22912, "grad_norm": 0.5558779835700989, "learning_rate": 4.966186474589836e-05, "loss": 0.7826, "step": 179 }, { "epoch": 0.2304, "grad_norm": 0.5845450758934021, "learning_rate": 4.965986394557823e-05, "loss": 0.8208, "step": 180 }, { "epoch": 0.23168, "grad_norm": 0.565697431564331, "learning_rate": 4.9657863145258103e-05, "loss": 0.8423, "step": 181 }, { "epoch": 0.23296, "grad_norm": 0.49734389781951904, "learning_rate": 4.9655862344937975e-05, "loss": 0.7908, "step": 182 }, { "epoch": 0.23424, "grad_norm": 0.5148658156394958, "learning_rate": 4.965386154461785e-05, "loss": 0.7928, "step": 183 }, { "epoch": 0.23552, "grad_norm": 0.5373660922050476, "learning_rate": 4.9651860744297726e-05, "loss": 0.794, "step": 184 }, { "epoch": 0.2368, "grad_norm": 0.5560426712036133, "learning_rate": 4.96498599439776e-05, "loss": 0.8215, "step": 185 }, { "epoch": 0.23808, "grad_norm": 0.5648142695426941, "learning_rate": 4.964785914365746e-05, "loss": 0.8003, "step": 186 }, { "epoch": 0.23936, "grad_norm": 0.6051068902015686, "learning_rate": 4.9645858343337335e-05, "loss": 0.8386, "step": 187 }, { "epoch": 0.24064, "grad_norm": 0.5387214422225952, "learning_rate": 4.9643857543017206e-05, "loss": 0.8217, "step": 188 }, { "epoch": 0.24192, "grad_norm": 0.5685967803001404, "learning_rate": 4.964185674269708e-05, "loss": 0.8346, "step": 189 }, { "epoch": 0.2432, "grad_norm": 0.5596044659614563, "learning_rate": 4.963985594237695e-05, "loss": 0.8023, "step": 190 }, { "epoch": 0.24448, "grad_norm": 0.556212842464447, "learning_rate": 4.963785514205683e-05, "loss": 0.8171, "step": 191 }, { "epoch": 0.24576, "grad_norm": 0.5799182653427124, "learning_rate": 4.96358543417367e-05, "loss": 0.7663, "step": 192 }, { "epoch": 0.24704, "grad_norm": 0.5294367074966431, "learning_rate": 4.963385354141657e-05, "loss": 0.7845, "step": 193 }, { "epoch": 0.24832, "grad_norm": 0.5038023591041565, "learning_rate": 4.963185274109644e-05, "loss": 0.7471, "step": 194 }, { "epoch": 0.2496, "grad_norm": 0.5687416195869446, "learning_rate": 4.962985194077631e-05, "loss": 0.8603, "step": 195 }, { "epoch": 0.25088, "grad_norm": 0.5455114245414734, "learning_rate": 4.962785114045618e-05, "loss": 0.8024, "step": 196 }, { "epoch": 0.25216, "grad_norm": 0.5598348379135132, "learning_rate": 4.962585034013605e-05, "loss": 0.7709, "step": 197 }, { "epoch": 0.25344, "grad_norm": 0.5542442798614502, "learning_rate": 4.962384953981593e-05, "loss": 0.7776, "step": 198 }, { "epoch": 0.25472, "grad_norm": 0.5675541162490845, "learning_rate": 4.9621848739495804e-05, "loss": 0.8026, "step": 199 }, { "epoch": 0.256, "grad_norm": 0.5367075204849243, "learning_rate": 4.9619847939175676e-05, "loss": 0.8048, "step": 200 }, { "epoch": 0.25728, "grad_norm": 0.5945519804954529, "learning_rate": 4.961784713885555e-05, "loss": 0.8014, "step": 201 }, { "epoch": 0.25856, "grad_norm": 0.5422332882881165, "learning_rate": 4.961584633853541e-05, "loss": 0.8761, "step": 202 }, { "epoch": 0.25984, "grad_norm": 0.5434099435806274, "learning_rate": 4.9613845538215284e-05, "loss": 0.793, "step": 203 }, { "epoch": 0.26112, "grad_norm": 0.49900439381599426, "learning_rate": 4.9611844737895156e-05, "loss": 0.7663, "step": 204 }, { "epoch": 0.2624, "grad_norm": 0.5360503792762756, "learning_rate": 4.9609843937575035e-05, "loss": 0.8549, "step": 205 }, { "epoch": 0.26368, "grad_norm": 0.5519281625747681, "learning_rate": 4.960784313725491e-05, "loss": 0.8166, "step": 206 }, { "epoch": 0.26496, "grad_norm": 0.5933863520622253, "learning_rate": 4.960584233693478e-05, "loss": 0.7875, "step": 207 }, { "epoch": 0.26624, "grad_norm": 0.5276301503181458, "learning_rate": 4.960384153661465e-05, "loss": 0.798, "step": 208 }, { "epoch": 0.26752, "grad_norm": 0.5891780853271484, "learning_rate": 4.960184073629452e-05, "loss": 0.7636, "step": 209 }, { "epoch": 0.2688, "grad_norm": 0.6080418825149536, "learning_rate": 4.959983993597439e-05, "loss": 0.8056, "step": 210 }, { "epoch": 0.27008, "grad_norm": 0.5800920724868774, "learning_rate": 4.959783913565426e-05, "loss": 0.7786, "step": 211 }, { "epoch": 0.27136, "grad_norm": 0.544090747833252, "learning_rate": 4.959583833533414e-05, "loss": 0.807, "step": 212 }, { "epoch": 0.27264, "grad_norm": 0.5422959923744202, "learning_rate": 4.959383753501401e-05, "loss": 0.8109, "step": 213 }, { "epoch": 0.27392, "grad_norm": 0.5759320855140686, "learning_rate": 4.959183673469388e-05, "loss": 0.8202, "step": 214 }, { "epoch": 0.2752, "grad_norm": 0.6158757209777832, "learning_rate": 4.958983593437375e-05, "loss": 0.8622, "step": 215 }, { "epoch": 0.27648, "grad_norm": 0.5751588344573975, "learning_rate": 4.9587835134053625e-05, "loss": 0.8078, "step": 216 }, { "epoch": 0.27776, "grad_norm": 0.5633991956710815, "learning_rate": 4.95858343337335e-05, "loss": 0.8264, "step": 217 }, { "epoch": 0.27904, "grad_norm": 0.5534703135490417, "learning_rate": 4.958383353341336e-05, "loss": 0.8046, "step": 218 }, { "epoch": 0.28032, "grad_norm": 0.5665048956871033, "learning_rate": 4.958183273309324e-05, "loss": 0.8078, "step": 219 }, { "epoch": 0.2816, "grad_norm": 0.5370583534240723, "learning_rate": 4.957983193277311e-05, "loss": 0.7598, "step": 220 }, { "epoch": 0.28288, "grad_norm": 0.5629434585571289, "learning_rate": 4.9577831132452985e-05, "loss": 0.8066, "step": 221 }, { "epoch": 0.28416, "grad_norm": 0.5847271680831909, "learning_rate": 4.9575830332132856e-05, "loss": 0.8446, "step": 222 }, { "epoch": 0.28544, "grad_norm": 0.5916556119918823, "learning_rate": 4.957382953181273e-05, "loss": 0.802, "step": 223 }, { "epoch": 0.28672, "grad_norm": 0.6037752628326416, "learning_rate": 4.95718287314926e-05, "loss": 0.8271, "step": 224 }, { "epoch": 0.288, "grad_norm": 0.5692646503448486, "learning_rate": 4.956982793117247e-05, "loss": 0.8059, "step": 225 }, { "epoch": 0.28928, "grad_norm": 0.516044020652771, "learning_rate": 4.9567827130852344e-05, "loss": 0.7265, "step": 226 }, { "epoch": 0.29056, "grad_norm": 0.5782244801521301, "learning_rate": 4.9565826330532216e-05, "loss": 0.7515, "step": 227 }, { "epoch": 0.29184, "grad_norm": 0.5291789174079895, "learning_rate": 4.956382553021209e-05, "loss": 0.7896, "step": 228 }, { "epoch": 0.29312, "grad_norm": 0.5594845414161682, "learning_rate": 4.956182472989196e-05, "loss": 0.8484, "step": 229 }, { "epoch": 0.2944, "grad_norm": 0.5874782800674438, "learning_rate": 4.955982392957183e-05, "loss": 0.8865, "step": 230 }, { "epoch": 0.29568, "grad_norm": 0.5848100781440735, "learning_rate": 4.95578231292517e-05, "loss": 0.8274, "step": 231 }, { "epoch": 0.29696, "grad_norm": 0.5242554545402527, "learning_rate": 4.9555822328931575e-05, "loss": 0.7192, "step": 232 }, { "epoch": 0.29824, "grad_norm": 0.5743463039398193, "learning_rate": 4.955382152861145e-05, "loss": 0.82, "step": 233 }, { "epoch": 0.29952, "grad_norm": 0.5418503880500793, "learning_rate": 4.955182072829132e-05, "loss": 0.7837, "step": 234 }, { "epoch": 0.3008, "grad_norm": 0.5967192053794861, "learning_rate": 4.954981992797119e-05, "loss": 0.7851, "step": 235 }, { "epoch": 0.30208, "grad_norm": 0.6640393137931824, "learning_rate": 4.954781912765106e-05, "loss": 0.868, "step": 236 }, { "epoch": 0.30336, "grad_norm": 0.5320351123809814, "learning_rate": 4.9545818327330934e-05, "loss": 0.7881, "step": 237 }, { "epoch": 0.30464, "grad_norm": 0.5583452582359314, "learning_rate": 4.9543817527010806e-05, "loss": 0.7926, "step": 238 }, { "epoch": 0.30592, "grad_norm": 0.5930384993553162, "learning_rate": 4.954181672669068e-05, "loss": 0.8088, "step": 239 }, { "epoch": 0.3072, "grad_norm": 0.5465940833091736, "learning_rate": 4.953981592637055e-05, "loss": 0.8136, "step": 240 }, { "epoch": 0.30848, "grad_norm": 0.5826738476753235, "learning_rate": 4.953781512605042e-05, "loss": 0.8333, "step": 241 }, { "epoch": 0.30976, "grad_norm": 0.5693926811218262, "learning_rate": 4.9535814325730293e-05, "loss": 0.8225, "step": 242 }, { "epoch": 0.31104, "grad_norm": 0.5517392158508301, "learning_rate": 4.9533813525410165e-05, "loss": 0.797, "step": 243 }, { "epoch": 0.31232, "grad_norm": 0.5277729034423828, "learning_rate": 4.953181272509004e-05, "loss": 0.74, "step": 244 }, { "epoch": 0.3136, "grad_norm": 0.5529365539550781, "learning_rate": 4.952981192476991e-05, "loss": 0.785, "step": 245 }, { "epoch": 0.31488, "grad_norm": 0.5595861673355103, "learning_rate": 4.952781112444978e-05, "loss": 0.8319, "step": 246 }, { "epoch": 0.31616, "grad_norm": 0.5934503674507141, "learning_rate": 4.952581032412966e-05, "loss": 0.7452, "step": 247 }, { "epoch": 0.31744, "grad_norm": 0.5756224989891052, "learning_rate": 4.9523809523809525e-05, "loss": 0.7893, "step": 248 }, { "epoch": 0.31872, "grad_norm": 0.6283906698226929, "learning_rate": 4.9521808723489396e-05, "loss": 0.9012, "step": 249 }, { "epoch": 0.32, "grad_norm": 0.5654857158660889, "learning_rate": 4.951980792316927e-05, "loss": 0.7181, "step": 250 }, { "epoch": 0.32128, "grad_norm": 0.5287903547286987, "learning_rate": 4.951780712284914e-05, "loss": 0.7838, "step": 251 }, { "epoch": 0.32256, "grad_norm": 0.5523903369903564, "learning_rate": 4.951580632252901e-05, "loss": 0.8112, "step": 252 }, { "epoch": 0.32384, "grad_norm": 0.5311181545257568, "learning_rate": 4.9513805522208884e-05, "loss": 0.7635, "step": 253 }, { "epoch": 0.32512, "grad_norm": 0.5373832583427429, "learning_rate": 4.951180472188876e-05, "loss": 0.8084, "step": 254 }, { "epoch": 0.3264, "grad_norm": 0.506324052810669, "learning_rate": 4.9509803921568634e-05, "loss": 0.7529, "step": 255 }, { "epoch": 0.32768, "grad_norm": 0.5614925622940063, "learning_rate": 4.95078031212485e-05, "loss": 0.7682, "step": 256 }, { "epoch": 0.32896, "grad_norm": 0.5953945517539978, "learning_rate": 4.950580232092837e-05, "loss": 0.7948, "step": 257 }, { "epoch": 0.33024, "grad_norm": 0.5029836297035217, "learning_rate": 4.950380152060824e-05, "loss": 0.7703, "step": 258 }, { "epoch": 0.33152, "grad_norm": 0.591628909111023, "learning_rate": 4.9501800720288115e-05, "loss": 0.8775, "step": 259 }, { "epoch": 0.3328, "grad_norm": 0.5612605214118958, "learning_rate": 4.949979991996799e-05, "loss": 0.8241, "step": 260 }, { "epoch": 0.33408, "grad_norm": 0.5402380228042603, "learning_rate": 4.9497799119647866e-05, "loss": 0.7018, "step": 261 }, { "epoch": 0.33536, "grad_norm": 0.5295984148979187, "learning_rate": 4.949579831932774e-05, "loss": 0.8048, "step": 262 }, { "epoch": 0.33664, "grad_norm": 0.5453588962554932, "learning_rate": 4.949379751900761e-05, "loss": 0.8198, "step": 263 }, { "epoch": 0.33792, "grad_norm": 0.5718904137611389, "learning_rate": 4.9491796718687474e-05, "loss": 0.744, "step": 264 }, { "epoch": 0.3392, "grad_norm": 0.5623178482055664, "learning_rate": 4.9489795918367346e-05, "loss": 0.7693, "step": 265 }, { "epoch": 0.34048, "grad_norm": 0.5621166229248047, "learning_rate": 4.948779511804722e-05, "loss": 0.7858, "step": 266 }, { "epoch": 0.34176, "grad_norm": 0.5423117876052856, "learning_rate": 4.948579431772709e-05, "loss": 0.7858, "step": 267 }, { "epoch": 0.34304, "grad_norm": 0.5965234637260437, "learning_rate": 4.948379351740697e-05, "loss": 0.7983, "step": 268 }, { "epoch": 0.34432, "grad_norm": 0.5860886573791504, "learning_rate": 4.948179271708684e-05, "loss": 0.7928, "step": 269 }, { "epoch": 0.3456, "grad_norm": 0.5337466597557068, "learning_rate": 4.947979191676671e-05, "loss": 0.792, "step": 270 }, { "epoch": 0.34688, "grad_norm": 0.6088325381278992, "learning_rate": 4.9477791116446584e-05, "loss": 0.8432, "step": 271 }, { "epoch": 0.34816, "grad_norm": 0.5539262890815735, "learning_rate": 4.947579031612645e-05, "loss": 0.8019, "step": 272 }, { "epoch": 0.34944, "grad_norm": 0.5926856398582458, "learning_rate": 4.947378951580632e-05, "loss": 0.7869, "step": 273 }, { "epoch": 0.35072, "grad_norm": 0.5703017711639404, "learning_rate": 4.947178871548619e-05, "loss": 0.8028, "step": 274 }, { "epoch": 0.352, "grad_norm": 0.5751794576644897, "learning_rate": 4.946978791516607e-05, "loss": 0.8149, "step": 275 }, { "epoch": 0.35328, "grad_norm": 0.5824299454689026, "learning_rate": 4.9467787114845943e-05, "loss": 0.7349, "step": 276 }, { "epoch": 0.35456, "grad_norm": 0.5836261510848999, "learning_rate": 4.9465786314525815e-05, "loss": 0.8029, "step": 277 }, { "epoch": 0.35584, "grad_norm": 0.6032565236091614, "learning_rate": 4.946378551420569e-05, "loss": 0.806, "step": 278 }, { "epoch": 0.35712, "grad_norm": 0.5672261118888855, "learning_rate": 4.946178471388556e-05, "loss": 0.8044, "step": 279 }, { "epoch": 0.3584, "grad_norm": 0.5680912733078003, "learning_rate": 4.9459783913565424e-05, "loss": 0.7874, "step": 280 }, { "epoch": 0.35968, "grad_norm": 0.5770596861839294, "learning_rate": 4.9457783113245296e-05, "loss": 0.7395, "step": 281 }, { "epoch": 0.36096, "grad_norm": 0.5634730458259583, "learning_rate": 4.9455782312925175e-05, "loss": 0.7117, "step": 282 }, { "epoch": 0.36224, "grad_norm": 0.6248441934585571, "learning_rate": 4.9453781512605046e-05, "loss": 0.7941, "step": 283 }, { "epoch": 0.36352, "grad_norm": 0.5985219478607178, "learning_rate": 4.945178071228492e-05, "loss": 0.7826, "step": 284 }, { "epoch": 0.3648, "grad_norm": 0.5826301574707031, "learning_rate": 4.944977991196479e-05, "loss": 0.7999, "step": 285 }, { "epoch": 0.36608, "grad_norm": 0.5907866954803467, "learning_rate": 4.944777911164466e-05, "loss": 0.7725, "step": 286 }, { "epoch": 0.36736, "grad_norm": 0.6153370141983032, "learning_rate": 4.9445778311324534e-05, "loss": 0.8651, "step": 287 }, { "epoch": 0.36864, "grad_norm": 0.5640325546264648, "learning_rate": 4.94437775110044e-05, "loss": 0.7517, "step": 288 }, { "epoch": 0.36992, "grad_norm": 0.5785256028175354, "learning_rate": 4.944177671068427e-05, "loss": 0.8121, "step": 289 }, { "epoch": 0.3712, "grad_norm": 0.5664944052696228, "learning_rate": 4.943977591036415e-05, "loss": 0.7345, "step": 290 }, { "epoch": 0.37248, "grad_norm": 0.5458950996398926, "learning_rate": 4.943777511004402e-05, "loss": 0.73, "step": 291 }, { "epoch": 0.37376, "grad_norm": 0.5893869996070862, "learning_rate": 4.943577430972389e-05, "loss": 0.7927, "step": 292 }, { "epoch": 0.37504, "grad_norm": 0.6073876619338989, "learning_rate": 4.9433773509403765e-05, "loss": 0.7971, "step": 293 }, { "epoch": 0.37632, "grad_norm": 0.5632811784744263, "learning_rate": 4.943177270908364e-05, "loss": 0.7442, "step": 294 }, { "epoch": 0.3776, "grad_norm": 0.5832781791687012, "learning_rate": 4.942977190876351e-05, "loss": 0.8098, "step": 295 }, { "epoch": 0.37888, "grad_norm": 0.5741657018661499, "learning_rate": 4.9427771108443374e-05, "loss": 0.7586, "step": 296 }, { "epoch": 0.38016, "grad_norm": 0.6392624974250793, "learning_rate": 4.942577030812325e-05, "loss": 0.7998, "step": 297 }, { "epoch": 0.38144, "grad_norm": 0.5990936160087585, "learning_rate": 4.9423769507803124e-05, "loss": 0.8197, "step": 298 }, { "epoch": 0.38272, "grad_norm": 0.5655567049980164, "learning_rate": 4.9421768707482996e-05, "loss": 0.7936, "step": 299 }, { "epoch": 0.384, "grad_norm": 0.5669339895248413, "learning_rate": 4.941976790716287e-05, "loss": 0.7923, "step": 300 }, { "epoch": 0.38528, "grad_norm": 0.5685401558876038, "learning_rate": 4.941776710684274e-05, "loss": 0.8257, "step": 301 }, { "epoch": 0.38656, "grad_norm": 0.581586480140686, "learning_rate": 4.941576630652261e-05, "loss": 0.7951, "step": 302 }, { "epoch": 0.38784, "grad_norm": 0.5897729992866516, "learning_rate": 4.9413765506202484e-05, "loss": 0.7391, "step": 303 }, { "epoch": 0.38912, "grad_norm": 0.5444718599319458, "learning_rate": 4.9411764705882355e-05, "loss": 0.7317, "step": 304 }, { "epoch": 0.3904, "grad_norm": 0.5707020163536072, "learning_rate": 4.940976390556223e-05, "loss": 0.7453, "step": 305 }, { "epoch": 0.39168, "grad_norm": 0.5731601119041443, "learning_rate": 4.94077631052421e-05, "loss": 0.7614, "step": 306 }, { "epoch": 0.39296, "grad_norm": 0.5668581128120422, "learning_rate": 4.940576230492197e-05, "loss": 0.7696, "step": 307 }, { "epoch": 0.39424, "grad_norm": 0.6144934892654419, "learning_rate": 4.940376150460184e-05, "loss": 0.7765, "step": 308 }, { "epoch": 0.39552, "grad_norm": 0.6028556823730469, "learning_rate": 4.9401760704281715e-05, "loss": 0.8292, "step": 309 }, { "epoch": 0.3968, "grad_norm": 0.5614394545555115, "learning_rate": 4.9399759903961587e-05, "loss": 0.7867, "step": 310 }, { "epoch": 0.39808, "grad_norm": 0.6021682620048523, "learning_rate": 4.939775910364146e-05, "loss": 0.7805, "step": 311 }, { "epoch": 0.39936, "grad_norm": 0.6178930401802063, "learning_rate": 4.939575830332133e-05, "loss": 0.846, "step": 312 }, { "epoch": 0.40064, "grad_norm": 0.6026805639266968, "learning_rate": 4.93937575030012e-05, "loss": 0.7695, "step": 313 }, { "epoch": 0.40192, "grad_norm": 0.6167937517166138, "learning_rate": 4.9391756702681074e-05, "loss": 0.7939, "step": 314 }, { "epoch": 0.4032, "grad_norm": 0.5787027478218079, "learning_rate": 4.9389755902360946e-05, "loss": 0.7763, "step": 315 }, { "epoch": 0.40448, "grad_norm": 0.5984156131744385, "learning_rate": 4.938775510204082e-05, "loss": 0.8166, "step": 316 }, { "epoch": 0.40576, "grad_norm": 0.6511149406433105, "learning_rate": 4.938575430172069e-05, "loss": 0.8453, "step": 317 }, { "epoch": 0.40704, "grad_norm": 0.6076487898826599, "learning_rate": 4.938375350140057e-05, "loss": 0.7335, "step": 318 }, { "epoch": 0.40832, "grad_norm": 0.5762828588485718, "learning_rate": 4.938175270108043e-05, "loss": 0.7503, "step": 319 }, { "epoch": 0.4096, "grad_norm": 0.6130267381668091, "learning_rate": 4.9379751900760305e-05, "loss": 0.7707, "step": 320 }, { "epoch": 0.41088, "grad_norm": 0.6319014430046082, "learning_rate": 4.937775110044018e-05, "loss": 0.7817, "step": 321 }, { "epoch": 0.41216, "grad_norm": 0.5863409638404846, "learning_rate": 4.937575030012005e-05, "loss": 0.8031, "step": 322 }, { "epoch": 0.41344, "grad_norm": 0.5963563323020935, "learning_rate": 4.937374949979992e-05, "loss": 0.8168, "step": 323 }, { "epoch": 0.41472, "grad_norm": 0.5877537131309509, "learning_rate": 4.937174869947979e-05, "loss": 0.8302, "step": 324 }, { "epoch": 0.416, "grad_norm": 0.5984660387039185, "learning_rate": 4.936974789915967e-05, "loss": 0.8747, "step": 325 }, { "epoch": 0.41728, "grad_norm": 0.5737277269363403, "learning_rate": 4.936774709883954e-05, "loss": 0.8451, "step": 326 }, { "epoch": 0.41856, "grad_norm": 0.5724412798881531, "learning_rate": 4.936574629851941e-05, "loss": 0.8066, "step": 327 }, { "epoch": 0.41984, "grad_norm": 0.5816782116889954, "learning_rate": 4.936374549819928e-05, "loss": 0.756, "step": 328 }, { "epoch": 0.42112, "grad_norm": 0.592029333114624, "learning_rate": 4.936174469787915e-05, "loss": 0.7409, "step": 329 }, { "epoch": 0.4224, "grad_norm": 0.5857312083244324, "learning_rate": 4.9359743897559024e-05, "loss": 0.7277, "step": 330 }, { "epoch": 0.42368, "grad_norm": 0.6745790839195251, "learning_rate": 4.9357743097238896e-05, "loss": 0.8739, "step": 331 }, { "epoch": 0.42496, "grad_norm": 0.5804886221885681, "learning_rate": 4.9355742296918774e-05, "loss": 0.7704, "step": 332 }, { "epoch": 0.42624, "grad_norm": 0.6009431481361389, "learning_rate": 4.9353741496598646e-05, "loss": 0.7817, "step": 333 }, { "epoch": 0.42752, "grad_norm": 0.5622819066047668, "learning_rate": 4.935174069627852e-05, "loss": 0.7781, "step": 334 }, { "epoch": 0.4288, "grad_norm": 0.6032870411872864, "learning_rate": 4.934973989595838e-05, "loss": 0.8291, "step": 335 }, { "epoch": 0.43008, "grad_norm": 0.6172161102294922, "learning_rate": 4.9347739095638255e-05, "loss": 0.8107, "step": 336 }, { "epoch": 0.43136, "grad_norm": 0.5511362552642822, "learning_rate": 4.934573829531813e-05, "loss": 0.7283, "step": 337 }, { "epoch": 0.43264, "grad_norm": 0.5732323527336121, "learning_rate": 4.9343737494998e-05, "loss": 0.694, "step": 338 }, { "epoch": 0.43392, "grad_norm": 0.5522581934928894, "learning_rate": 4.934173669467788e-05, "loss": 0.8047, "step": 339 }, { "epoch": 0.4352, "grad_norm": 0.5704146027565002, "learning_rate": 4.933973589435775e-05, "loss": 0.789, "step": 340 }, { "epoch": 0.43648, "grad_norm": 0.564039409160614, "learning_rate": 4.933773509403762e-05, "loss": 0.764, "step": 341 }, { "epoch": 0.43776, "grad_norm": 0.610349714756012, "learning_rate": 4.933573429371749e-05, "loss": 0.7716, "step": 342 }, { "epoch": 0.43904, "grad_norm": 0.5944207310676575, "learning_rate": 4.933373349339736e-05, "loss": 0.7811, "step": 343 }, { "epoch": 0.44032, "grad_norm": 0.5716763734817505, "learning_rate": 4.933173269307723e-05, "loss": 0.7904, "step": 344 }, { "epoch": 0.4416, "grad_norm": 0.6018866896629333, "learning_rate": 4.93297318927571e-05, "loss": 0.783, "step": 345 }, { "epoch": 0.44288, "grad_norm": 0.5952413082122803, "learning_rate": 4.932773109243698e-05, "loss": 0.8399, "step": 346 }, { "epoch": 0.44416, "grad_norm": 0.5757237672805786, "learning_rate": 4.932573029211685e-05, "loss": 0.829, "step": 347 }, { "epoch": 0.44544, "grad_norm": 0.6060802340507507, "learning_rate": 4.9323729491796724e-05, "loss": 0.8134, "step": 348 }, { "epoch": 0.44672, "grad_norm": 0.6192966103553772, "learning_rate": 4.9321728691476596e-05, "loss": 0.8245, "step": 349 }, { "epoch": 0.448, "grad_norm": 0.5844107270240784, "learning_rate": 4.931972789115647e-05, "loss": 0.7463, "step": 350 }, { "epoch": 0.44928, "grad_norm": 0.6216725707054138, "learning_rate": 4.931772709083633e-05, "loss": 0.7797, "step": 351 }, { "epoch": 0.45056, "grad_norm": 0.6098612546920776, "learning_rate": 4.9315726290516205e-05, "loss": 0.7743, "step": 352 }, { "epoch": 0.45184, "grad_norm": 0.5867882966995239, "learning_rate": 4.931372549019608e-05, "loss": 0.7186, "step": 353 }, { "epoch": 0.45312, "grad_norm": 0.5984062552452087, "learning_rate": 4.9311724689875955e-05, "loss": 0.7579, "step": 354 }, { "epoch": 0.4544, "grad_norm": 0.6255276203155518, "learning_rate": 4.930972388955583e-05, "loss": 0.8468, "step": 355 }, { "epoch": 0.45568, "grad_norm": 0.5381520986557007, "learning_rate": 4.93077230892357e-05, "loss": 0.7261, "step": 356 }, { "epoch": 0.45696, "grad_norm": 0.5565890073776245, "learning_rate": 4.930572228891557e-05, "loss": 0.7827, "step": 357 }, { "epoch": 0.45824, "grad_norm": 0.6231935620307922, "learning_rate": 4.930372148859544e-05, "loss": 0.8469, "step": 358 }, { "epoch": 0.45952, "grad_norm": 0.6231086850166321, "learning_rate": 4.930172068827531e-05, "loss": 0.8246, "step": 359 }, { "epoch": 0.4608, "grad_norm": 0.5462002158164978, "learning_rate": 4.9299719887955186e-05, "loss": 0.7182, "step": 360 }, { "epoch": 0.46208, "grad_norm": 0.5747554302215576, "learning_rate": 4.929771908763506e-05, "loss": 0.7594, "step": 361 }, { "epoch": 0.46336, "grad_norm": 0.5723777413368225, "learning_rate": 4.929571828731493e-05, "loss": 0.7787, "step": 362 }, { "epoch": 0.46464, "grad_norm": 0.6416032910346985, "learning_rate": 4.92937174869948e-05, "loss": 0.8315, "step": 363 }, { "epoch": 0.46592, "grad_norm": 0.603192925453186, "learning_rate": 4.9291716686674674e-05, "loss": 0.7719, "step": 364 }, { "epoch": 0.4672, "grad_norm": 0.6231751441955566, "learning_rate": 4.9289715886354545e-05, "loss": 0.8031, "step": 365 }, { "epoch": 0.46848, "grad_norm": 0.6328988671302795, "learning_rate": 4.928771508603442e-05, "loss": 0.8228, "step": 366 }, { "epoch": 0.46976, "grad_norm": 0.5893206000328064, "learning_rate": 4.928571428571429e-05, "loss": 0.7112, "step": 367 }, { "epoch": 0.47104, "grad_norm": 0.5830476880073547, "learning_rate": 4.928371348539416e-05, "loss": 0.7569, "step": 368 }, { "epoch": 0.47232, "grad_norm": 0.5955057740211487, "learning_rate": 4.928171268507403e-05, "loss": 0.7747, "step": 369 }, { "epoch": 0.4736, "grad_norm": 0.5977997183799744, "learning_rate": 4.9279711884753905e-05, "loss": 0.7904, "step": 370 }, { "epoch": 0.47488, "grad_norm": 0.5674768090248108, "learning_rate": 4.9277711084433777e-05, "loss": 0.752, "step": 371 }, { "epoch": 0.47616, "grad_norm": 0.6169261932373047, "learning_rate": 4.927571028411365e-05, "loss": 0.7574, "step": 372 }, { "epoch": 0.47744, "grad_norm": 0.618199348449707, "learning_rate": 4.927370948379352e-05, "loss": 0.7908, "step": 373 }, { "epoch": 0.47872, "grad_norm": 0.5316541194915771, "learning_rate": 4.927170868347339e-05, "loss": 0.8064, "step": 374 }, { "epoch": 0.48, "grad_norm": 0.5974490642547607, "learning_rate": 4.9269707883153264e-05, "loss": 0.7936, "step": 375 }, { "epoch": 0.48128, "grad_norm": 0.6208757758140564, "learning_rate": 4.9267707082833136e-05, "loss": 0.7862, "step": 376 }, { "epoch": 0.48256, "grad_norm": 0.6077625155448914, "learning_rate": 4.926570628251301e-05, "loss": 0.7473, "step": 377 }, { "epoch": 0.48384, "grad_norm": 0.591814398765564, "learning_rate": 4.926370548219288e-05, "loss": 0.7455, "step": 378 }, { "epoch": 0.48512, "grad_norm": 0.6128320097923279, "learning_rate": 4.926170468187275e-05, "loss": 0.8214, "step": 379 }, { "epoch": 0.4864, "grad_norm": 0.5914111733436584, "learning_rate": 4.925970388155262e-05, "loss": 0.8374, "step": 380 }, { "epoch": 0.48768, "grad_norm": 0.5888047218322754, "learning_rate": 4.9257703081232495e-05, "loss": 0.7639, "step": 381 }, { "epoch": 0.48896, "grad_norm": 0.5357518196105957, "learning_rate": 4.925570228091237e-05, "loss": 0.7629, "step": 382 }, { "epoch": 0.49024, "grad_norm": 0.5791406631469727, "learning_rate": 4.925370148059224e-05, "loss": 0.77, "step": 383 }, { "epoch": 0.49152, "grad_norm": 0.5658066272735596, "learning_rate": 4.925170068027211e-05, "loss": 0.8229, "step": 384 }, { "epoch": 0.4928, "grad_norm": 0.5416905283927917, "learning_rate": 4.924969987995198e-05, "loss": 0.7255, "step": 385 }, { "epoch": 0.49408, "grad_norm": 0.5073053240776062, "learning_rate": 4.9247699079631854e-05, "loss": 0.7436, "step": 386 }, { "epoch": 0.49536, "grad_norm": 0.5704997777938843, "learning_rate": 4.9245698279311726e-05, "loss": 0.8392, "step": 387 }, { "epoch": 0.49664, "grad_norm": 0.6032180786132812, "learning_rate": 4.9243697478991605e-05, "loss": 0.7793, "step": 388 }, { "epoch": 0.49792, "grad_norm": 0.6236636638641357, "learning_rate": 4.924169667867147e-05, "loss": 0.8586, "step": 389 }, { "epoch": 0.4992, "grad_norm": 0.5844170451164246, "learning_rate": 4.923969587835134e-05, "loss": 0.7596, "step": 390 }, { "epoch": 0.50048, "grad_norm": 0.5753904581069946, "learning_rate": 4.9237695078031214e-05, "loss": 0.7724, "step": 391 }, { "epoch": 0.50176, "grad_norm": 0.5851483941078186, "learning_rate": 4.9235694277711086e-05, "loss": 0.809, "step": 392 }, { "epoch": 0.50304, "grad_norm": 0.6062473058700562, "learning_rate": 4.923369347739096e-05, "loss": 0.7379, "step": 393 }, { "epoch": 0.50432, "grad_norm": 0.5940774083137512, "learning_rate": 4.923169267707083e-05, "loss": 0.7636, "step": 394 }, { "epoch": 0.5056, "grad_norm": 0.6108981370925903, "learning_rate": 4.922969187675071e-05, "loss": 0.7407, "step": 395 }, { "epoch": 0.50688, "grad_norm": 0.5814557671546936, "learning_rate": 4.922769107643058e-05, "loss": 0.7893, "step": 396 }, { "epoch": 0.50816, "grad_norm": 0.5524982810020447, "learning_rate": 4.9225690276110445e-05, "loss": 0.7972, "step": 397 }, { "epoch": 0.50944, "grad_norm": 0.6131865382194519, "learning_rate": 4.922368947579032e-05, "loss": 0.8582, "step": 398 }, { "epoch": 0.51072, "grad_norm": 0.5640206336975098, "learning_rate": 4.922168867547019e-05, "loss": 0.7221, "step": 399 }, { "epoch": 0.512, "grad_norm": 0.5649920701980591, "learning_rate": 4.921968787515006e-05, "loss": 0.7759, "step": 400 }, { "epoch": 0.51328, "grad_norm": 0.6427083611488342, "learning_rate": 4.921768707482993e-05, "loss": 0.8388, "step": 401 }, { "epoch": 0.51456, "grad_norm": 0.6196634769439697, "learning_rate": 4.9215686274509804e-05, "loss": 0.8383, "step": 402 }, { "epoch": 0.51584, "grad_norm": 0.5725811123847961, "learning_rate": 4.921368547418968e-05, "loss": 0.7602, "step": 403 }, { "epoch": 0.51712, "grad_norm": 0.6151264309883118, "learning_rate": 4.9211684673869555e-05, "loss": 0.7759, "step": 404 }, { "epoch": 0.5184, "grad_norm": 0.6025890707969666, "learning_rate": 4.920968387354942e-05, "loss": 0.7741, "step": 405 }, { "epoch": 0.51968, "grad_norm": 0.5948590636253357, "learning_rate": 4.920768307322929e-05, "loss": 0.7644, "step": 406 }, { "epoch": 0.52096, "grad_norm": 0.5557575821876526, "learning_rate": 4.9205682272909163e-05, "loss": 0.7398, "step": 407 }, { "epoch": 0.52224, "grad_norm": 0.6094503402709961, "learning_rate": 4.9203681472589035e-05, "loss": 0.7671, "step": 408 }, { "epoch": 0.52352, "grad_norm": 0.6672384738922119, "learning_rate": 4.920168067226891e-05, "loss": 0.7938, "step": 409 }, { "epoch": 0.5248, "grad_norm": 0.6114287376403809, "learning_rate": 4.9199679871948786e-05, "loss": 0.8331, "step": 410 }, { "epoch": 0.52608, "grad_norm": 0.5579826831817627, "learning_rate": 4.919767907162866e-05, "loss": 0.7256, "step": 411 }, { "epoch": 0.52736, "grad_norm": 0.5519280433654785, "learning_rate": 4.919567827130853e-05, "loss": 0.755, "step": 412 }, { "epoch": 0.52864, "grad_norm": 0.5924770832061768, "learning_rate": 4.9193677470988395e-05, "loss": 0.7783, "step": 413 }, { "epoch": 0.52992, "grad_norm": 0.5979952812194824, "learning_rate": 4.9191676670668266e-05, "loss": 0.7305, "step": 414 }, { "epoch": 0.5312, "grad_norm": 0.6058392524719238, "learning_rate": 4.918967587034814e-05, "loss": 0.7754, "step": 415 }, { "epoch": 0.53248, "grad_norm": 0.5810478925704956, "learning_rate": 4.918767507002801e-05, "loss": 0.7785, "step": 416 }, { "epoch": 0.53376, "grad_norm": 0.5745630860328674, "learning_rate": 4.918567426970789e-05, "loss": 0.7649, "step": 417 }, { "epoch": 0.53504, "grad_norm": 0.6739091277122498, "learning_rate": 4.918367346938776e-05, "loss": 0.8086, "step": 418 }, { "epoch": 0.53632, "grad_norm": 0.6409255266189575, "learning_rate": 4.918167266906763e-05, "loss": 0.7439, "step": 419 }, { "epoch": 0.5376, "grad_norm": 0.6018539667129517, "learning_rate": 4.9179671868747504e-05, "loss": 0.8367, "step": 420 }, { "epoch": 0.53888, "grad_norm": 0.6245967745780945, "learning_rate": 4.917767106842737e-05, "loss": 0.8294, "step": 421 }, { "epoch": 0.54016, "grad_norm": 0.5872459411621094, "learning_rate": 4.917567026810724e-05, "loss": 0.7508, "step": 422 }, { "epoch": 0.54144, "grad_norm": 0.6180544495582581, "learning_rate": 4.917366946778711e-05, "loss": 0.8241, "step": 423 }, { "epoch": 0.54272, "grad_norm": 0.56467604637146, "learning_rate": 4.917166866746699e-05, "loss": 0.7355, "step": 424 }, { "epoch": 0.544, "grad_norm": 0.5801830887794495, "learning_rate": 4.9169667867146864e-05, "loss": 0.7786, "step": 425 }, { "epoch": 0.54528, "grad_norm": 0.5825265049934387, "learning_rate": 4.9167667066826735e-05, "loss": 0.7876, "step": 426 }, { "epoch": 0.54656, "grad_norm": 0.5895298719406128, "learning_rate": 4.916566626650661e-05, "loss": 0.8246, "step": 427 }, { "epoch": 0.54784, "grad_norm": 0.5841552019119263, "learning_rate": 4.916366546618648e-05, "loss": 0.7721, "step": 428 }, { "epoch": 0.54912, "grad_norm": 0.6116175651550293, "learning_rate": 4.9161664665866344e-05, "loss": 0.7855, "step": 429 }, { "epoch": 0.5504, "grad_norm": 0.6221590042114258, "learning_rate": 4.9159663865546216e-05, "loss": 0.7947, "step": 430 }, { "epoch": 0.55168, "grad_norm": 0.5872395634651184, "learning_rate": 4.9157663065226095e-05, "loss": 0.7433, "step": 431 }, { "epoch": 0.55296, "grad_norm": 0.5529791712760925, "learning_rate": 4.9155662264905967e-05, "loss": 0.7531, "step": 432 }, { "epoch": 0.55424, "grad_norm": 0.5675722360610962, "learning_rate": 4.915366146458584e-05, "loss": 0.7426, "step": 433 }, { "epoch": 0.55552, "grad_norm": 0.5776454210281372, "learning_rate": 4.915166066426571e-05, "loss": 0.8128, "step": 434 }, { "epoch": 0.5568, "grad_norm": 0.5539517402648926, "learning_rate": 4.914965986394558e-05, "loss": 0.806, "step": 435 }, { "epoch": 0.55808, "grad_norm": 0.612989068031311, "learning_rate": 4.9147659063625454e-05, "loss": 0.7703, "step": 436 }, { "epoch": 0.55936, "grad_norm": 0.6443326473236084, "learning_rate": 4.914565826330532e-05, "loss": 0.7351, "step": 437 }, { "epoch": 0.56064, "grad_norm": 0.5801988244056702, "learning_rate": 4.91436574629852e-05, "loss": 0.792, "step": 438 }, { "epoch": 0.56192, "grad_norm": 0.5797520875930786, "learning_rate": 4.914165666266507e-05, "loss": 0.7547, "step": 439 }, { "epoch": 0.5632, "grad_norm": 0.6442052721977234, "learning_rate": 4.913965586234494e-05, "loss": 0.7916, "step": 440 }, { "epoch": 0.56448, "grad_norm": 0.5808482766151428, "learning_rate": 4.913765506202481e-05, "loss": 0.7912, "step": 441 }, { "epoch": 0.56576, "grad_norm": 0.6839514970779419, "learning_rate": 4.9135654261704685e-05, "loss": 0.8279, "step": 442 }, { "epoch": 0.56704, "grad_norm": 0.6102442145347595, "learning_rate": 4.913365346138456e-05, "loss": 0.8261, "step": 443 }, { "epoch": 0.56832, "grad_norm": 0.5921707153320312, "learning_rate": 4.913165266106443e-05, "loss": 0.7572, "step": 444 }, { "epoch": 0.5696, "grad_norm": 0.6068132519721985, "learning_rate": 4.91296518607443e-05, "loss": 0.6949, "step": 445 }, { "epoch": 0.57088, "grad_norm": 0.5876120924949646, "learning_rate": 4.912765106042417e-05, "loss": 0.7906, "step": 446 }, { "epoch": 0.57216, "grad_norm": 0.587684154510498, "learning_rate": 4.9125650260104044e-05, "loss": 0.7745, "step": 447 }, { "epoch": 0.57344, "grad_norm": 0.6146616339683533, "learning_rate": 4.9123649459783916e-05, "loss": 0.7909, "step": 448 }, { "epoch": 0.57472, "grad_norm": 0.5915645360946655, "learning_rate": 4.912164865946379e-05, "loss": 0.7361, "step": 449 }, { "epoch": 0.576, "grad_norm": 0.5974079966545105, "learning_rate": 4.911964785914366e-05, "loss": 0.7355, "step": 450 }, { "epoch": 0.57728, "grad_norm": 0.6230568885803223, "learning_rate": 4.911764705882353e-05, "loss": 0.7886, "step": 451 }, { "epoch": 0.57856, "grad_norm": 0.604843020439148, "learning_rate": 4.9115646258503404e-05, "loss": 0.7817, "step": 452 }, { "epoch": 0.57984, "grad_norm": 0.6246700882911682, "learning_rate": 4.9113645458183276e-05, "loss": 0.7952, "step": 453 }, { "epoch": 0.58112, "grad_norm": 0.5888630747795105, "learning_rate": 4.911164465786315e-05, "loss": 0.7482, "step": 454 }, { "epoch": 0.5824, "grad_norm": 0.5762295722961426, "learning_rate": 4.910964385754302e-05, "loss": 0.7301, "step": 455 }, { "epoch": 0.58368, "grad_norm": 0.6081997156143188, "learning_rate": 4.910764305722289e-05, "loss": 0.7295, "step": 456 }, { "epoch": 0.58496, "grad_norm": 0.6235604286193848, "learning_rate": 4.910564225690276e-05, "loss": 0.8245, "step": 457 }, { "epoch": 0.58624, "grad_norm": 0.6132957339286804, "learning_rate": 4.9103641456582635e-05, "loss": 0.7289, "step": 458 }, { "epoch": 0.58752, "grad_norm": 0.6033244132995605, "learning_rate": 4.910164065626251e-05, "loss": 0.8088, "step": 459 }, { "epoch": 0.5888, "grad_norm": 0.6521779894828796, "learning_rate": 4.909963985594238e-05, "loss": 0.8683, "step": 460 }, { "epoch": 0.59008, "grad_norm": 0.6089750528335571, "learning_rate": 4.909763905562225e-05, "loss": 0.7418, "step": 461 }, { "epoch": 0.59136, "grad_norm": 0.6081743836402893, "learning_rate": 4.909563825530212e-05, "loss": 0.7847, "step": 462 }, { "epoch": 0.59264, "grad_norm": 0.5884706377983093, "learning_rate": 4.9093637454981994e-05, "loss": 0.7429, "step": 463 }, { "epoch": 0.59392, "grad_norm": 0.5694007873535156, "learning_rate": 4.9091636654661866e-05, "loss": 0.7418, "step": 464 }, { "epoch": 0.5952, "grad_norm": 0.5716164112091064, "learning_rate": 4.908963585434174e-05, "loss": 0.7683, "step": 465 }, { "epoch": 0.59648, "grad_norm": 0.5955536365509033, "learning_rate": 4.9087635054021617e-05, "loss": 0.7523, "step": 466 }, { "epoch": 0.59776, "grad_norm": 0.5757789611816406, "learning_rate": 4.908563425370148e-05, "loss": 0.7261, "step": 467 }, { "epoch": 0.59904, "grad_norm": 0.5787334442138672, "learning_rate": 4.9083633453381353e-05, "loss": 0.7195, "step": 468 }, { "epoch": 0.60032, "grad_norm": 0.568333625793457, "learning_rate": 4.9081632653061225e-05, "loss": 0.7462, "step": 469 }, { "epoch": 0.6016, "grad_norm": 0.5744010806083679, "learning_rate": 4.90796318527411e-05, "loss": 0.8024, "step": 470 }, { "epoch": 0.60288, "grad_norm": 0.6531276702880859, "learning_rate": 4.907763105242097e-05, "loss": 0.8175, "step": 471 }, { "epoch": 0.60416, "grad_norm": 0.6218701004981995, "learning_rate": 4.907563025210084e-05, "loss": 0.7496, "step": 472 }, { "epoch": 0.60544, "grad_norm": 0.6153817772865295, "learning_rate": 4.907362945178072e-05, "loss": 0.8135, "step": 473 }, { "epoch": 0.60672, "grad_norm": 0.6718766689300537, "learning_rate": 4.907162865146059e-05, "loss": 0.8049, "step": 474 }, { "epoch": 0.608, "grad_norm": 0.6055019497871399, "learning_rate": 4.9069627851140456e-05, "loss": 0.7722, "step": 475 }, { "epoch": 0.60928, "grad_norm": 0.5902823805809021, "learning_rate": 4.906762705082033e-05, "loss": 0.7665, "step": 476 }, { "epoch": 0.61056, "grad_norm": 0.5945876836776733, "learning_rate": 4.90656262505002e-05, "loss": 0.7589, "step": 477 }, { "epoch": 0.61184, "grad_norm": 0.6314772963523865, "learning_rate": 4.906362545018007e-05, "loss": 0.8462, "step": 478 }, { "epoch": 0.61312, "grad_norm": 0.6117784380912781, "learning_rate": 4.9061624649859944e-05, "loss": 0.7623, "step": 479 }, { "epoch": 0.6144, "grad_norm": 0.5828944444656372, "learning_rate": 4.905962384953982e-05, "loss": 0.752, "step": 480 }, { "epoch": 0.61568, "grad_norm": 0.6389551162719727, "learning_rate": 4.9057623049219694e-05, "loss": 0.7396, "step": 481 }, { "epoch": 0.61696, "grad_norm": 0.5840926170349121, "learning_rate": 4.9055622248899566e-05, "loss": 0.7754, "step": 482 }, { "epoch": 0.61824, "grad_norm": 0.6001207232475281, "learning_rate": 4.905362144857943e-05, "loss": 0.7678, "step": 483 }, { "epoch": 0.61952, "grad_norm": 0.5531839728355408, "learning_rate": 4.90516206482593e-05, "loss": 0.7174, "step": 484 }, { "epoch": 0.6208, "grad_norm": 0.6437347531318665, "learning_rate": 4.9049619847939175e-05, "loss": 0.7857, "step": 485 }, { "epoch": 0.62208, "grad_norm": 0.5961335301399231, "learning_rate": 4.904761904761905e-05, "loss": 0.754, "step": 486 }, { "epoch": 0.62336, "grad_norm": 0.5557673573493958, "learning_rate": 4.9045618247298926e-05, "loss": 0.7418, "step": 487 }, { "epoch": 0.62464, "grad_norm": 0.6520072221755981, "learning_rate": 4.90436174469788e-05, "loss": 0.8426, "step": 488 }, { "epoch": 0.62592, "grad_norm": 0.5974238514900208, "learning_rate": 4.904161664665867e-05, "loss": 0.7502, "step": 489 }, { "epoch": 0.6272, "grad_norm": 0.6083403825759888, "learning_rate": 4.903961584633854e-05, "loss": 0.7499, "step": 490 }, { "epoch": 0.62848, "grad_norm": 0.5932098031044006, "learning_rate": 4.9037615046018406e-05, "loss": 0.7344, "step": 491 }, { "epoch": 0.62976, "grad_norm": 0.6413688659667969, "learning_rate": 4.903561424569828e-05, "loss": 0.7552, "step": 492 }, { "epoch": 0.63104, "grad_norm": 0.6206446290016174, "learning_rate": 4.903361344537815e-05, "loss": 0.7687, "step": 493 }, { "epoch": 0.63232, "grad_norm": 0.5776639580726624, "learning_rate": 4.903161264505803e-05, "loss": 0.8371, "step": 494 }, { "epoch": 0.6336, "grad_norm": 0.6182857155799866, "learning_rate": 4.90296118447379e-05, "loss": 0.7361, "step": 495 }, { "epoch": 0.63488, "grad_norm": 0.6016165018081665, "learning_rate": 4.902761104441777e-05, "loss": 0.8338, "step": 496 }, { "epoch": 0.63616, "grad_norm": 0.6279016733169556, "learning_rate": 4.9025610244097644e-05, "loss": 0.756, "step": 497 }, { "epoch": 0.63744, "grad_norm": 0.6500204205513, "learning_rate": 4.9023609443777516e-05, "loss": 0.8069, "step": 498 }, { "epoch": 0.63872, "grad_norm": 0.6177827715873718, "learning_rate": 4.902160864345738e-05, "loss": 0.7365, "step": 499 }, { "epoch": 0.64, "grad_norm": 0.5693172216415405, "learning_rate": 4.901960784313725e-05, "loss": 0.6754, "step": 500 }, { "epoch": 0.64128, "grad_norm": 0.5654707551002502, "learning_rate": 4.901760704281713e-05, "loss": 0.7525, "step": 501 }, { "epoch": 0.64256, "grad_norm": 0.648030161857605, "learning_rate": 4.9015606242497e-05, "loss": 0.764, "step": 502 }, { "epoch": 0.64384, "grad_norm": 0.6066122651100159, "learning_rate": 4.9013605442176875e-05, "loss": 0.7487, "step": 503 }, { "epoch": 0.64512, "grad_norm": 0.6264938712120056, "learning_rate": 4.901160464185675e-05, "loss": 0.8013, "step": 504 }, { "epoch": 0.6464, "grad_norm": 0.60635906457901, "learning_rate": 4.900960384153662e-05, "loss": 0.7421, "step": 505 }, { "epoch": 0.64768, "grad_norm": 0.5976872444152832, "learning_rate": 4.900760304121649e-05, "loss": 0.733, "step": 506 }, { "epoch": 0.64896, "grad_norm": 0.5609980225563049, "learning_rate": 4.9005602240896356e-05, "loss": 0.7141, "step": 507 }, { "epoch": 0.65024, "grad_norm": 0.6109974980354309, "learning_rate": 4.9003601440576234e-05, "loss": 0.7301, "step": 508 }, { "epoch": 0.65152, "grad_norm": 0.6050900220870972, "learning_rate": 4.9001600640256106e-05, "loss": 0.7644, "step": 509 }, { "epoch": 0.6528, "grad_norm": 0.571101188659668, "learning_rate": 4.899959983993598e-05, "loss": 0.7736, "step": 510 }, { "epoch": 0.65408, "grad_norm": 0.5456352829933167, "learning_rate": 4.899759903961585e-05, "loss": 0.7738, "step": 511 }, { "epoch": 0.65536, "grad_norm": 0.5890512466430664, "learning_rate": 4.899559823929572e-05, "loss": 0.6723, "step": 512 }, { "epoch": 0.65664, "grad_norm": 0.5800885558128357, "learning_rate": 4.8993597438975594e-05, "loss": 0.7153, "step": 513 }, { "epoch": 0.65792, "grad_norm": 0.6010210514068604, "learning_rate": 4.8991596638655466e-05, "loss": 0.7784, "step": 514 }, { "epoch": 0.6592, "grad_norm": 0.6430292129516602, "learning_rate": 4.898959583833533e-05, "loss": 0.7992, "step": 515 }, { "epoch": 0.66048, "grad_norm": 0.5662366151809692, "learning_rate": 4.898759503801521e-05, "loss": 0.7449, "step": 516 }, { "epoch": 0.66176, "grad_norm": 0.6085848212242126, "learning_rate": 4.898559423769508e-05, "loss": 0.7972, "step": 517 }, { "epoch": 0.66304, "grad_norm": 0.5684623122215271, "learning_rate": 4.898359343737495e-05, "loss": 0.7667, "step": 518 }, { "epoch": 0.66432, "grad_norm": 0.5847243666648865, "learning_rate": 4.8981592637054825e-05, "loss": 0.7233, "step": 519 }, { "epoch": 0.6656, "grad_norm": 0.6326200366020203, "learning_rate": 4.89795918367347e-05, "loss": 0.7656, "step": 520 }, { "epoch": 0.66688, "grad_norm": 0.6132087707519531, "learning_rate": 4.897759103641457e-05, "loss": 0.7842, "step": 521 }, { "epoch": 0.66816, "grad_norm": 0.5856083631515503, "learning_rate": 4.897559023609444e-05, "loss": 0.7745, "step": 522 }, { "epoch": 0.66944, "grad_norm": 0.6147796511650085, "learning_rate": 4.897358943577431e-05, "loss": 0.7964, "step": 523 }, { "epoch": 0.67072, "grad_norm": 0.6051590442657471, "learning_rate": 4.8971588635454184e-05, "loss": 0.7573, "step": 524 }, { "epoch": 0.672, "grad_norm": 0.6189625263214111, "learning_rate": 4.8969587835134056e-05, "loss": 0.8481, "step": 525 }, { "epoch": 0.67328, "grad_norm": 0.5812535881996155, "learning_rate": 4.896758703481393e-05, "loss": 0.8213, "step": 526 }, { "epoch": 0.67456, "grad_norm": 0.5988802909851074, "learning_rate": 4.89655862344938e-05, "loss": 0.6807, "step": 527 }, { "epoch": 0.67584, "grad_norm": 0.5860095620155334, "learning_rate": 4.896358543417367e-05, "loss": 0.7816, "step": 528 }, { "epoch": 0.67712, "grad_norm": 0.5878039598464966, "learning_rate": 4.8961584633853543e-05, "loss": 0.7771, "step": 529 }, { "epoch": 0.6784, "grad_norm": 0.6504335403442383, "learning_rate": 4.8959583833533415e-05, "loss": 0.82, "step": 530 }, { "epoch": 0.67968, "grad_norm": 0.5590863227844238, "learning_rate": 4.895758303321329e-05, "loss": 0.7686, "step": 531 }, { "epoch": 0.68096, "grad_norm": 0.6558895707130432, "learning_rate": 4.895558223289316e-05, "loss": 0.8116, "step": 532 }, { "epoch": 0.68224, "grad_norm": 0.6584180593490601, "learning_rate": 4.895358143257303e-05, "loss": 0.7509, "step": 533 }, { "epoch": 0.68352, "grad_norm": 0.5821325778961182, "learning_rate": 4.89515806322529e-05, "loss": 0.7289, "step": 534 }, { "epoch": 0.6848, "grad_norm": 0.6342353820800781, "learning_rate": 4.8949579831932775e-05, "loss": 0.7905, "step": 535 }, { "epoch": 0.68608, "grad_norm": 0.595000684261322, "learning_rate": 4.8947579031612646e-05, "loss": 0.7344, "step": 536 }, { "epoch": 0.68736, "grad_norm": 0.6255506277084351, "learning_rate": 4.894557823129252e-05, "loss": 0.7994, "step": 537 }, { "epoch": 0.68864, "grad_norm": 0.6021307110786438, "learning_rate": 4.894357743097239e-05, "loss": 0.8138, "step": 538 }, { "epoch": 0.68992, "grad_norm": 0.5913227796554565, "learning_rate": 4.894157663065226e-05, "loss": 0.7822, "step": 539 }, { "epoch": 0.6912, "grad_norm": 0.6197808980941772, "learning_rate": 4.8939575830332134e-05, "loss": 0.7853, "step": 540 }, { "epoch": 0.69248, "grad_norm": 0.5861679911613464, "learning_rate": 4.8937575030012006e-05, "loss": 0.7293, "step": 541 }, { "epoch": 0.69376, "grad_norm": 0.5833053588867188, "learning_rate": 4.893557422969188e-05, "loss": 0.7416, "step": 542 }, { "epoch": 0.69504, "grad_norm": 0.5544353723526001, "learning_rate": 4.893357342937175e-05, "loss": 0.7278, "step": 543 }, { "epoch": 0.69632, "grad_norm": 0.5617831945419312, "learning_rate": 4.893157262905163e-05, "loss": 0.7219, "step": 544 }, { "epoch": 0.6976, "grad_norm": 0.6455240845680237, "learning_rate": 4.892957182873149e-05, "loss": 0.7656, "step": 545 }, { "epoch": 0.69888, "grad_norm": 0.63936448097229, "learning_rate": 4.8927571028411365e-05, "loss": 0.8029, "step": 546 }, { "epoch": 0.70016, "grad_norm": 0.5780683755874634, "learning_rate": 4.892557022809124e-05, "loss": 0.7684, "step": 547 }, { "epoch": 0.70144, "grad_norm": 0.571263313293457, "learning_rate": 4.892356942777111e-05, "loss": 0.6714, "step": 548 }, { "epoch": 0.70272, "grad_norm": 0.6045280694961548, "learning_rate": 4.892156862745098e-05, "loss": 0.7374, "step": 549 }, { "epoch": 0.704, "grad_norm": 0.5887953639030457, "learning_rate": 4.891956782713085e-05, "loss": 0.7341, "step": 550 }, { "epoch": 0.70528, "grad_norm": 0.6200657486915588, "learning_rate": 4.891756702681073e-05, "loss": 0.7835, "step": 551 }, { "epoch": 0.70656, "grad_norm": 0.6312881112098694, "learning_rate": 4.89155662264906e-05, "loss": 0.8279, "step": 552 }, { "epoch": 0.70784, "grad_norm": 0.6151871681213379, "learning_rate": 4.891356542617047e-05, "loss": 0.7467, "step": 553 }, { "epoch": 0.70912, "grad_norm": 0.5729169249534607, "learning_rate": 4.891156462585034e-05, "loss": 0.7405, "step": 554 }, { "epoch": 0.7104, "grad_norm": 0.5994395017623901, "learning_rate": 4.890956382553021e-05, "loss": 0.8057, "step": 555 }, { "epoch": 0.71168, "grad_norm": 0.564448893070221, "learning_rate": 4.8907563025210084e-05, "loss": 0.7428, "step": 556 }, { "epoch": 0.71296, "grad_norm": 0.6355032920837402, "learning_rate": 4.8905562224889955e-05, "loss": 0.79, "step": 557 }, { "epoch": 0.71424, "grad_norm": 0.5675954222679138, "learning_rate": 4.8903561424569834e-05, "loss": 0.7338, "step": 558 }, { "epoch": 0.71552, "grad_norm": 0.5565802454948425, "learning_rate": 4.8901560624249706e-05, "loss": 0.7997, "step": 559 }, { "epoch": 0.7168, "grad_norm": 0.5979486107826233, "learning_rate": 4.889955982392958e-05, "loss": 0.7952, "step": 560 }, { "epoch": 0.71808, "grad_norm": 0.5932238101959229, "learning_rate": 4.889755902360944e-05, "loss": 0.7106, "step": 561 }, { "epoch": 0.71936, "grad_norm": 0.659325361251831, "learning_rate": 4.8895558223289315e-05, "loss": 0.703, "step": 562 }, { "epoch": 0.72064, "grad_norm": 0.6149618029594421, "learning_rate": 4.889355742296919e-05, "loss": 0.762, "step": 563 }, { "epoch": 0.72192, "grad_norm": 0.5758309364318848, "learning_rate": 4.889155662264906e-05, "loss": 0.7698, "step": 564 }, { "epoch": 0.7232, "grad_norm": 0.5630546808242798, "learning_rate": 4.888955582232894e-05, "loss": 0.7165, "step": 565 }, { "epoch": 0.72448, "grad_norm": 0.6149067282676697, "learning_rate": 4.888755502200881e-05, "loss": 0.8136, "step": 566 }, { "epoch": 0.72576, "grad_norm": 0.6086531281471252, "learning_rate": 4.888555422168868e-05, "loss": 0.7992, "step": 567 }, { "epoch": 0.72704, "grad_norm": 0.6159600615501404, "learning_rate": 4.888355342136855e-05, "loss": 0.8229, "step": 568 }, { "epoch": 0.72832, "grad_norm": 0.5952989459037781, "learning_rate": 4.888155262104842e-05, "loss": 0.7035, "step": 569 }, { "epoch": 0.7296, "grad_norm": 0.6345956921577454, "learning_rate": 4.887955182072829e-05, "loss": 0.8317, "step": 570 }, { "epoch": 0.73088, "grad_norm": 0.6076996326446533, "learning_rate": 4.887755102040816e-05, "loss": 0.7937, "step": 571 }, { "epoch": 0.73216, "grad_norm": 0.5981006622314453, "learning_rate": 4.887555022008804e-05, "loss": 0.6843, "step": 572 }, { "epoch": 0.73344, "grad_norm": 0.5810612440109253, "learning_rate": 4.887354941976791e-05, "loss": 0.6974, "step": 573 }, { "epoch": 0.73472, "grad_norm": 0.5900784730911255, "learning_rate": 4.8871548619447784e-05, "loss": 0.7718, "step": 574 }, { "epoch": 0.736, "grad_norm": 0.5939428210258484, "learning_rate": 4.8869547819127656e-05, "loss": 0.7176, "step": 575 }, { "epoch": 0.73728, "grad_norm": 0.6226340532302856, "learning_rate": 4.886754701880753e-05, "loss": 0.728, "step": 576 }, { "epoch": 0.73856, "grad_norm": 0.6123561859130859, "learning_rate": 4.886554621848739e-05, "loss": 0.7569, "step": 577 }, { "epoch": 0.73984, "grad_norm": 0.6113354563713074, "learning_rate": 4.8863545418167264e-05, "loss": 0.7683, "step": 578 }, { "epoch": 0.74112, "grad_norm": 0.6293046474456787, "learning_rate": 4.886154461784714e-05, "loss": 0.8001, "step": 579 }, { "epoch": 0.7424, "grad_norm": 0.5882707238197327, "learning_rate": 4.8859543817527015e-05, "loss": 0.6901, "step": 580 }, { "epoch": 0.74368, "grad_norm": 0.5842380523681641, "learning_rate": 4.885754301720689e-05, "loss": 0.7069, "step": 581 }, { "epoch": 0.74496, "grad_norm": 0.6349176168441772, "learning_rate": 4.885554221688676e-05, "loss": 0.7323, "step": 582 }, { "epoch": 0.74624, "grad_norm": 0.5746724605560303, "learning_rate": 4.885354141656663e-05, "loss": 0.7153, "step": 583 }, { "epoch": 0.74752, "grad_norm": 0.5570270419120789, "learning_rate": 4.88515406162465e-05, "loss": 0.7684, "step": 584 }, { "epoch": 0.7488, "grad_norm": 0.6300098896026611, "learning_rate": 4.884953981592637e-05, "loss": 0.7983, "step": 585 }, { "epoch": 0.75008, "grad_norm": 0.6173220872879028, "learning_rate": 4.8847539015606246e-05, "loss": 0.8115, "step": 586 }, { "epoch": 0.75136, "grad_norm": 0.6043545007705688, "learning_rate": 4.884553821528612e-05, "loss": 0.7274, "step": 587 }, { "epoch": 0.75264, "grad_norm": 0.6310338377952576, "learning_rate": 4.884353741496599e-05, "loss": 0.7706, "step": 588 }, { "epoch": 0.75392, "grad_norm": 0.6150200366973877, "learning_rate": 4.884153661464586e-05, "loss": 0.7907, "step": 589 }, { "epoch": 0.7552, "grad_norm": 0.5880206823348999, "learning_rate": 4.8839535814325734e-05, "loss": 0.7382, "step": 590 }, { "epoch": 0.75648, "grad_norm": 0.6823338866233826, "learning_rate": 4.8837535014005605e-05, "loss": 0.7575, "step": 591 }, { "epoch": 0.75776, "grad_norm": 0.6266196370124817, "learning_rate": 4.883553421368548e-05, "loss": 0.7685, "step": 592 }, { "epoch": 0.75904, "grad_norm": 0.637712836265564, "learning_rate": 4.883353341336535e-05, "loss": 0.7326, "step": 593 }, { "epoch": 0.76032, "grad_norm": 0.5898723006248474, "learning_rate": 4.883153261304522e-05, "loss": 0.749, "step": 594 }, { "epoch": 0.7616, "grad_norm": 0.6104643940925598, "learning_rate": 4.882953181272509e-05, "loss": 0.7539, "step": 595 }, { "epoch": 0.76288, "grad_norm": 0.615857720375061, "learning_rate": 4.8827531012404965e-05, "loss": 0.7938, "step": 596 }, { "epoch": 0.76416, "grad_norm": 0.633080780506134, "learning_rate": 4.8825530212084837e-05, "loss": 0.8089, "step": 597 }, { "epoch": 0.76544, "grad_norm": 0.615404486656189, "learning_rate": 4.882352941176471e-05, "loss": 0.8339, "step": 598 }, { "epoch": 0.76672, "grad_norm": 0.5635229349136353, "learning_rate": 4.882152861144458e-05, "loss": 0.6853, "step": 599 }, { "epoch": 0.768, "grad_norm": 0.579932689666748, "learning_rate": 4.881952781112445e-05, "loss": 0.7536, "step": 600 }, { "epoch": 0.76928, "grad_norm": 0.5975157618522644, "learning_rate": 4.8817527010804324e-05, "loss": 0.7634, "step": 601 }, { "epoch": 0.77056, "grad_norm": 0.5809814929962158, "learning_rate": 4.8815526210484196e-05, "loss": 0.7197, "step": 602 }, { "epoch": 0.77184, "grad_norm": 0.5941184163093567, "learning_rate": 4.881352541016407e-05, "loss": 0.7532, "step": 603 }, { "epoch": 0.77312, "grad_norm": 0.629640519618988, "learning_rate": 4.881152460984394e-05, "loss": 0.7685, "step": 604 }, { "epoch": 0.7744, "grad_norm": 0.6509769558906555, "learning_rate": 4.880952380952381e-05, "loss": 0.738, "step": 605 }, { "epoch": 0.77568, "grad_norm": 0.5938847661018372, "learning_rate": 4.880752300920368e-05, "loss": 0.7364, "step": 606 }, { "epoch": 0.77696, "grad_norm": 0.5702788829803467, "learning_rate": 4.8805522208883555e-05, "loss": 0.7057, "step": 607 }, { "epoch": 0.77824, "grad_norm": 0.5880799889564514, "learning_rate": 4.880352140856343e-05, "loss": 0.6999, "step": 608 }, { "epoch": 0.77952, "grad_norm": 0.6025612950325012, "learning_rate": 4.88015206082433e-05, "loss": 0.7189, "step": 609 }, { "epoch": 0.7808, "grad_norm": 0.6370987296104431, "learning_rate": 4.879951980792317e-05, "loss": 0.7947, "step": 610 }, { "epoch": 0.78208, "grad_norm": 0.5612797737121582, "learning_rate": 4.879751900760304e-05, "loss": 0.6949, "step": 611 }, { "epoch": 0.78336, "grad_norm": 0.6249443888664246, "learning_rate": 4.8795518207282914e-05, "loss": 0.7593, "step": 612 }, { "epoch": 0.78464, "grad_norm": 0.6149105429649353, "learning_rate": 4.8793517406962786e-05, "loss": 0.8293, "step": 613 }, { "epoch": 0.78592, "grad_norm": 0.6441102623939514, "learning_rate": 4.8791516606642665e-05, "loss": 0.7343, "step": 614 }, { "epoch": 0.7872, "grad_norm": 0.5925320386886597, "learning_rate": 4.878951580632253e-05, "loss": 0.7466, "step": 615 }, { "epoch": 0.78848, "grad_norm": 0.6006816029548645, "learning_rate": 4.87875150060024e-05, "loss": 0.8136, "step": 616 }, { "epoch": 0.78976, "grad_norm": 0.6164388060569763, "learning_rate": 4.8785514205682274e-05, "loss": 0.7459, "step": 617 }, { "epoch": 0.79104, "grad_norm": 0.6219509840011597, "learning_rate": 4.8783513405362146e-05, "loss": 0.7392, "step": 618 }, { "epoch": 0.79232, "grad_norm": 0.6259922981262207, "learning_rate": 4.878151260504202e-05, "loss": 0.7768, "step": 619 }, { "epoch": 0.7936, "grad_norm": 0.5972528457641602, "learning_rate": 4.877951180472189e-05, "loss": 0.7749, "step": 620 }, { "epoch": 0.79488, "grad_norm": 0.5769963264465332, "learning_rate": 4.877751100440177e-05, "loss": 0.7324, "step": 621 }, { "epoch": 0.79616, "grad_norm": 0.5725414752960205, "learning_rate": 4.877551020408164e-05, "loss": 0.7089, "step": 622 }, { "epoch": 0.79744, "grad_norm": 0.6348921656608582, "learning_rate": 4.8773509403761505e-05, "loss": 0.8087, "step": 623 }, { "epoch": 0.79872, "grad_norm": 0.6241890788078308, "learning_rate": 4.877150860344138e-05, "loss": 0.7478, "step": 624 }, { "epoch": 0.8, "grad_norm": 0.6045325398445129, "learning_rate": 4.876950780312125e-05, "loss": 0.7607, "step": 625 }, { "epoch": 0.80128, "grad_norm": 0.6093403100967407, "learning_rate": 4.876750700280112e-05, "loss": 0.7503, "step": 626 }, { "epoch": 0.80256, "grad_norm": 0.6271349787712097, "learning_rate": 4.876550620248099e-05, "loss": 0.7631, "step": 627 }, { "epoch": 0.80384, "grad_norm": 0.5978842377662659, "learning_rate": 4.8763505402160864e-05, "loss": 0.75, "step": 628 }, { "epoch": 0.80512, "grad_norm": 0.5923817157745361, "learning_rate": 4.876150460184074e-05, "loss": 0.6844, "step": 629 }, { "epoch": 0.8064, "grad_norm": 0.604054868221283, "learning_rate": 4.8759503801520615e-05, "loss": 0.7224, "step": 630 }, { "epoch": 0.80768, "grad_norm": 0.645494282245636, "learning_rate": 4.875750300120048e-05, "loss": 0.7912, "step": 631 }, { "epoch": 0.80896, "grad_norm": 0.6240355968475342, "learning_rate": 4.875550220088035e-05, "loss": 0.7429, "step": 632 }, { "epoch": 0.81024, "grad_norm": 0.6154593229293823, "learning_rate": 4.875350140056022e-05, "loss": 0.7693, "step": 633 }, { "epoch": 0.81152, "grad_norm": 0.6306690573692322, "learning_rate": 4.8751500600240095e-05, "loss": 0.733, "step": 634 }, { "epoch": 0.8128, "grad_norm": 0.5954400897026062, "learning_rate": 4.874949979991997e-05, "loss": 0.8013, "step": 635 }, { "epoch": 0.81408, "grad_norm": 0.6127607226371765, "learning_rate": 4.8747498999599846e-05, "loss": 0.7676, "step": 636 }, { "epoch": 0.81536, "grad_norm": 0.6720303893089294, "learning_rate": 4.874549819927972e-05, "loss": 0.7987, "step": 637 }, { "epoch": 0.81664, "grad_norm": 0.5854519605636597, "learning_rate": 4.874349739895959e-05, "loss": 0.769, "step": 638 }, { "epoch": 0.81792, "grad_norm": 0.6683208346366882, "learning_rate": 4.8741496598639455e-05, "loss": 0.7729, "step": 639 }, { "epoch": 0.8192, "grad_norm": 0.5927050709724426, "learning_rate": 4.8739495798319326e-05, "loss": 0.7671, "step": 640 }, { "epoch": 0.82048, "grad_norm": 0.5779833793640137, "learning_rate": 4.87374949979992e-05, "loss": 0.6955, "step": 641 }, { "epoch": 0.82176, "grad_norm": 0.6800764799118042, "learning_rate": 4.873549419767907e-05, "loss": 0.7995, "step": 642 }, { "epoch": 0.82304, "grad_norm": 0.5779314041137695, "learning_rate": 4.873349339735895e-05, "loss": 0.7296, "step": 643 }, { "epoch": 0.82432, "grad_norm": 0.5840075016021729, "learning_rate": 4.873149259703882e-05, "loss": 0.7412, "step": 644 }, { "epoch": 0.8256, "grad_norm": 0.5941234230995178, "learning_rate": 4.872949179671869e-05, "loss": 0.8138, "step": 645 }, { "epoch": 0.82688, "grad_norm": 0.5719572305679321, "learning_rate": 4.8727490996398564e-05, "loss": 0.7713, "step": 646 }, { "epoch": 0.82816, "grad_norm": 0.6207135915756226, "learning_rate": 4.872549019607843e-05, "loss": 0.7417, "step": 647 }, { "epoch": 0.82944, "grad_norm": 0.6460603475570679, "learning_rate": 4.87234893957583e-05, "loss": 0.8659, "step": 648 }, { "epoch": 0.83072, "grad_norm": 0.5850974321365356, "learning_rate": 4.872148859543817e-05, "loss": 0.7083, "step": 649 }, { "epoch": 0.832, "grad_norm": 0.5269494652748108, "learning_rate": 4.871948779511805e-05, "loss": 0.6849, "step": 650 }, { "epoch": 0.83328, "grad_norm": 0.6789783239364624, "learning_rate": 4.8717486994797924e-05, "loss": 0.8186, "step": 651 }, { "epoch": 0.83456, "grad_norm": 0.5852586030960083, "learning_rate": 4.8715486194477795e-05, "loss": 0.759, "step": 652 }, { "epoch": 0.83584, "grad_norm": 0.661586582660675, "learning_rate": 4.871348539415767e-05, "loss": 0.8003, "step": 653 }, { "epoch": 0.83712, "grad_norm": 0.6112899780273438, "learning_rate": 4.871148459383754e-05, "loss": 0.7195, "step": 654 }, { "epoch": 0.8384, "grad_norm": 0.6239529252052307, "learning_rate": 4.8709483793517404e-05, "loss": 0.6815, "step": 655 }, { "epoch": 0.83968, "grad_norm": 0.6071779131889343, "learning_rate": 4.8707482993197276e-05, "loss": 0.6849, "step": 656 }, { "epoch": 0.84096, "grad_norm": 0.6142399311065674, "learning_rate": 4.8705482192877155e-05, "loss": 0.7898, "step": 657 }, { "epoch": 0.84224, "grad_norm": 0.5916489958763123, "learning_rate": 4.8703481392557027e-05, "loss": 0.7373, "step": 658 }, { "epoch": 0.84352, "grad_norm": 0.6097525954246521, "learning_rate": 4.87014805922369e-05, "loss": 0.751, "step": 659 }, { "epoch": 0.8448, "grad_norm": 0.5901023149490356, "learning_rate": 4.869947979191677e-05, "loss": 0.6901, "step": 660 }, { "epoch": 0.84608, "grad_norm": 0.5702456831932068, "learning_rate": 4.869747899159664e-05, "loss": 0.7518, "step": 661 }, { "epoch": 0.84736, "grad_norm": 0.606547474861145, "learning_rate": 4.8695478191276514e-05, "loss": 0.7508, "step": 662 }, { "epoch": 0.84864, "grad_norm": 0.6051300764083862, "learning_rate": 4.869347739095638e-05, "loss": 0.7659, "step": 663 }, { "epoch": 0.84992, "grad_norm": 0.6050798296928406, "learning_rate": 4.869147659063626e-05, "loss": 0.7429, "step": 664 }, { "epoch": 0.8512, "grad_norm": 0.6163650751113892, "learning_rate": 4.868947579031613e-05, "loss": 0.7387, "step": 665 }, { "epoch": 0.85248, "grad_norm": 0.6229680180549622, "learning_rate": 4.8687474989996e-05, "loss": 0.7673, "step": 666 }, { "epoch": 0.85376, "grad_norm": 0.6289007663726807, "learning_rate": 4.868547418967587e-05, "loss": 0.738, "step": 667 }, { "epoch": 0.85504, "grad_norm": 0.609407901763916, "learning_rate": 4.8683473389355745e-05, "loss": 0.7457, "step": 668 }, { "epoch": 0.85632, "grad_norm": 0.6026242971420288, "learning_rate": 4.868147258903562e-05, "loss": 0.7617, "step": 669 }, { "epoch": 0.8576, "grad_norm": 0.6227514147758484, "learning_rate": 4.867947178871549e-05, "loss": 0.7938, "step": 670 }, { "epoch": 0.85888, "grad_norm": 0.6264092922210693, "learning_rate": 4.867747098839536e-05, "loss": 0.7826, "step": 671 }, { "epoch": 0.86016, "grad_norm": 0.5935805439949036, "learning_rate": 4.867547018807523e-05, "loss": 0.7143, "step": 672 }, { "epoch": 0.86144, "grad_norm": 0.6176136136054993, "learning_rate": 4.8673469387755104e-05, "loss": 0.7985, "step": 673 }, { "epoch": 0.86272, "grad_norm": 0.6620816588401794, "learning_rate": 4.8671468587434976e-05, "loss": 0.7727, "step": 674 }, { "epoch": 0.864, "grad_norm": 0.6603094935417175, "learning_rate": 4.866946778711485e-05, "loss": 0.7125, "step": 675 }, { "epoch": 0.86528, "grad_norm": 0.6638785600662231, "learning_rate": 4.866746698679472e-05, "loss": 0.7305, "step": 676 }, { "epoch": 0.86656, "grad_norm": 0.6319957375526428, "learning_rate": 4.866546618647459e-05, "loss": 0.7595, "step": 677 }, { "epoch": 0.86784, "grad_norm": 0.5931079387664795, "learning_rate": 4.8663465386154464e-05, "loss": 0.7415, "step": 678 }, { "epoch": 0.86912, "grad_norm": 0.6137109398841858, "learning_rate": 4.8661464585834336e-05, "loss": 0.7438, "step": 679 }, { "epoch": 0.8704, "grad_norm": 0.5985450744628906, "learning_rate": 4.865946378551421e-05, "loss": 0.7409, "step": 680 }, { "epoch": 0.87168, "grad_norm": 0.62948077917099, "learning_rate": 4.865746298519408e-05, "loss": 0.8124, "step": 681 }, { "epoch": 0.87296, "grad_norm": 0.5815073251724243, "learning_rate": 4.865546218487395e-05, "loss": 0.73, "step": 682 }, { "epoch": 0.87424, "grad_norm": 0.5903521180152893, "learning_rate": 4.865346138455382e-05, "loss": 0.7071, "step": 683 }, { "epoch": 0.87552, "grad_norm": 0.6045056581497192, "learning_rate": 4.8651460584233695e-05, "loss": 0.7571, "step": 684 }, { "epoch": 0.8768, "grad_norm": 0.6277945041656494, "learning_rate": 4.8649459783913573e-05, "loss": 0.7475, "step": 685 }, { "epoch": 0.87808, "grad_norm": 0.6049608588218689, "learning_rate": 4.864745898359344e-05, "loss": 0.7857, "step": 686 }, { "epoch": 0.87936, "grad_norm": 0.6526109576225281, "learning_rate": 4.864545818327331e-05, "loss": 0.7536, "step": 687 }, { "epoch": 0.88064, "grad_norm": 0.6064791679382324, "learning_rate": 4.864345738295318e-05, "loss": 0.7731, "step": 688 }, { "epoch": 0.88192, "grad_norm": 0.5711452960968018, "learning_rate": 4.8641456582633054e-05, "loss": 0.7581, "step": 689 }, { "epoch": 0.8832, "grad_norm": 0.603302538394928, "learning_rate": 4.8639455782312926e-05, "loss": 0.7323, "step": 690 }, { "epoch": 0.88448, "grad_norm": 0.6006559729576111, "learning_rate": 4.86374549819928e-05, "loss": 0.7074, "step": 691 }, { "epoch": 0.88576, "grad_norm": 0.5934480428695679, "learning_rate": 4.8635454181672676e-05, "loss": 0.73, "step": 692 }, { "epoch": 0.88704, "grad_norm": 0.5922892093658447, "learning_rate": 4.863345338135255e-05, "loss": 0.7075, "step": 693 }, { "epoch": 0.88832, "grad_norm": 0.5788107514381409, "learning_rate": 4.8631452581032413e-05, "loss": 0.7307, "step": 694 }, { "epoch": 0.8896, "grad_norm": 0.5999823808670044, "learning_rate": 4.8629451780712285e-05, "loss": 0.811, "step": 695 }, { "epoch": 0.89088, "grad_norm": 0.6252598762512207, "learning_rate": 4.862745098039216e-05, "loss": 0.7221, "step": 696 }, { "epoch": 0.89216, "grad_norm": 0.6279902458190918, "learning_rate": 4.862545018007203e-05, "loss": 0.7428, "step": 697 }, { "epoch": 0.89344, "grad_norm": 0.6252185702323914, "learning_rate": 4.86234493797519e-05, "loss": 0.7786, "step": 698 }, { "epoch": 0.89472, "grad_norm": 0.6247672438621521, "learning_rate": 4.862144857943178e-05, "loss": 0.7813, "step": 699 }, { "epoch": 0.896, "grad_norm": 0.6344950199127197, "learning_rate": 4.861944777911165e-05, "loss": 0.7709, "step": 700 }, { "epoch": 0.89728, "grad_norm": 0.6928293704986572, "learning_rate": 4.861744697879152e-05, "loss": 0.7902, "step": 701 }, { "epoch": 0.89856, "grad_norm": 0.5995805859565735, "learning_rate": 4.861544617847139e-05, "loss": 0.7567, "step": 702 }, { "epoch": 0.89984, "grad_norm": 0.6318543553352356, "learning_rate": 4.861344537815126e-05, "loss": 0.7079, "step": 703 }, { "epoch": 0.90112, "grad_norm": 0.5997210144996643, "learning_rate": 4.861144457783113e-05, "loss": 0.7424, "step": 704 }, { "epoch": 0.9024, "grad_norm": 0.6286918520927429, "learning_rate": 4.8609443777511004e-05, "loss": 0.7609, "step": 705 }, { "epoch": 0.90368, "grad_norm": 0.5855825543403625, "learning_rate": 4.860744297719088e-05, "loss": 0.7097, "step": 706 }, { "epoch": 0.90496, "grad_norm": 0.6897373795509338, "learning_rate": 4.8605442176870754e-05, "loss": 0.8089, "step": 707 }, { "epoch": 0.90624, "grad_norm": 0.6260521411895752, "learning_rate": 4.8603441376550626e-05, "loss": 0.7375, "step": 708 }, { "epoch": 0.90752, "grad_norm": 0.5700168013572693, "learning_rate": 4.86014405762305e-05, "loss": 0.7927, "step": 709 }, { "epoch": 0.9088, "grad_norm": 0.6189404726028442, "learning_rate": 4.859943977591036e-05, "loss": 0.7137, "step": 710 }, { "epoch": 0.91008, "grad_norm": 0.6204996109008789, "learning_rate": 4.8597438975590235e-05, "loss": 0.7494, "step": 711 }, { "epoch": 0.91136, "grad_norm": 0.5846236944198608, "learning_rate": 4.859543817527011e-05, "loss": 0.7146, "step": 712 }, { "epoch": 0.91264, "grad_norm": 0.6079381704330444, "learning_rate": 4.8593437374949985e-05, "loss": 0.7166, "step": 713 }, { "epoch": 0.91392, "grad_norm": 0.6260196566581726, "learning_rate": 4.859143657462986e-05, "loss": 0.7639, "step": 714 }, { "epoch": 0.9152, "grad_norm": 0.6194947361946106, "learning_rate": 4.858943577430973e-05, "loss": 0.7466, "step": 715 }, { "epoch": 0.91648, "grad_norm": 0.5631256699562073, "learning_rate": 4.85874349739896e-05, "loss": 0.7732, "step": 716 }, { "epoch": 0.91776, "grad_norm": 0.6009548306465149, "learning_rate": 4.858543417366947e-05, "loss": 0.7793, "step": 717 }, { "epoch": 0.91904, "grad_norm": 0.6175373792648315, "learning_rate": 4.858343337334934e-05, "loss": 0.7672, "step": 718 }, { "epoch": 0.92032, "grad_norm": 0.6090966463088989, "learning_rate": 4.858143257302921e-05, "loss": 0.7359, "step": 719 }, { "epoch": 0.9216, "grad_norm": 0.5752703547477722, "learning_rate": 4.857943177270909e-05, "loss": 0.7208, "step": 720 }, { "epoch": 0.92288, "grad_norm": 0.5956680774688721, "learning_rate": 4.857743097238896e-05, "loss": 0.7028, "step": 721 }, { "epoch": 0.92416, "grad_norm": 0.6232988238334656, "learning_rate": 4.857543017206883e-05, "loss": 0.759, "step": 722 }, { "epoch": 0.92544, "grad_norm": 0.6512808799743652, "learning_rate": 4.8573429371748704e-05, "loss": 0.7903, "step": 723 }, { "epoch": 0.92672, "grad_norm": 0.6665694117546082, "learning_rate": 4.8571428571428576e-05, "loss": 0.7636, "step": 724 }, { "epoch": 0.928, "grad_norm": 0.5766971111297607, "learning_rate": 4.856942777110845e-05, "loss": 0.7045, "step": 725 }, { "epoch": 0.92928, "grad_norm": 0.610283613204956, "learning_rate": 4.856742697078831e-05, "loss": 0.7486, "step": 726 }, { "epoch": 0.93056, "grad_norm": 0.6264128088951111, "learning_rate": 4.856542617046819e-05, "loss": 0.7867, "step": 727 }, { "epoch": 0.93184, "grad_norm": 0.6030863523483276, "learning_rate": 4.856342537014806e-05, "loss": 0.75, "step": 728 }, { "epoch": 0.93312, "grad_norm": 0.6077870726585388, "learning_rate": 4.8561424569827935e-05, "loss": 0.795, "step": 729 }, { "epoch": 0.9344, "grad_norm": 0.554818332195282, "learning_rate": 4.855942376950781e-05, "loss": 0.7304, "step": 730 }, { "epoch": 0.93568, "grad_norm": 0.5891968607902527, "learning_rate": 4.855742296918768e-05, "loss": 0.7437, "step": 731 }, { "epoch": 0.93696, "grad_norm": 0.5956717133522034, "learning_rate": 4.855542216886755e-05, "loss": 0.7851, "step": 732 }, { "epoch": 0.93824, "grad_norm": 0.6364977359771729, "learning_rate": 4.855342136854742e-05, "loss": 0.7926, "step": 733 }, { "epoch": 0.93952, "grad_norm": 0.6908712387084961, "learning_rate": 4.8551420568227294e-05, "loss": 0.7565, "step": 734 }, { "epoch": 0.9408, "grad_norm": 0.5801886320114136, "learning_rate": 4.8549419767907166e-05, "loss": 0.7251, "step": 735 }, { "epoch": 0.94208, "grad_norm": 0.6222661137580872, "learning_rate": 4.854741896758704e-05, "loss": 0.6952, "step": 736 }, { "epoch": 0.94336, "grad_norm": 0.6288057565689087, "learning_rate": 4.854541816726691e-05, "loss": 0.7691, "step": 737 }, { "epoch": 0.94464, "grad_norm": 0.6167927384376526, "learning_rate": 4.854341736694678e-05, "loss": 0.7479, "step": 738 }, { "epoch": 0.94592, "grad_norm": 0.5813360214233398, "learning_rate": 4.8541416566626654e-05, "loss": 0.7309, "step": 739 }, { "epoch": 0.9472, "grad_norm": 0.6290002465248108, "learning_rate": 4.8539415766306526e-05, "loss": 0.7725, "step": 740 }, { "epoch": 0.94848, "grad_norm": 0.6278480291366577, "learning_rate": 4.85374149659864e-05, "loss": 0.762, "step": 741 }, { "epoch": 0.94976, "grad_norm": 0.5977178812026978, "learning_rate": 4.853541416566627e-05, "loss": 0.7463, "step": 742 }, { "epoch": 0.95104, "grad_norm": 0.6514163613319397, "learning_rate": 4.853341336534614e-05, "loss": 0.7873, "step": 743 }, { "epoch": 0.95232, "grad_norm": 0.5944337844848633, "learning_rate": 4.853141256502601e-05, "loss": 0.7534, "step": 744 }, { "epoch": 0.9536, "grad_norm": 0.605015218257904, "learning_rate": 4.8529411764705885e-05, "loss": 0.7595, "step": 745 }, { "epoch": 0.95488, "grad_norm": 0.6433084607124329, "learning_rate": 4.852741096438576e-05, "loss": 0.8321, "step": 746 }, { "epoch": 0.95616, "grad_norm": 0.5718684792518616, "learning_rate": 4.852541016406563e-05, "loss": 0.7344, "step": 747 }, { "epoch": 0.95744, "grad_norm": 0.6268803477287292, "learning_rate": 4.85234093637455e-05, "loss": 0.772, "step": 748 }, { "epoch": 0.95872, "grad_norm": 0.6136980056762695, "learning_rate": 4.852140856342537e-05, "loss": 0.7513, "step": 749 }, { "epoch": 0.96, "grad_norm": 0.6517449617385864, "learning_rate": 4.8519407763105244e-05, "loss": 0.7786, "step": 750 }, { "epoch": 0.96128, "grad_norm": 0.5884830355644226, "learning_rate": 4.8517406962785116e-05, "loss": 0.7912, "step": 751 }, { "epoch": 0.96256, "grad_norm": 0.6458854675292969, "learning_rate": 4.851540616246499e-05, "loss": 0.7881, "step": 752 }, { "epoch": 0.96384, "grad_norm": 0.6329476237297058, "learning_rate": 4.851340536214486e-05, "loss": 0.7819, "step": 753 }, { "epoch": 0.96512, "grad_norm": 0.5884618163108826, "learning_rate": 4.851140456182473e-05, "loss": 0.7786, "step": 754 }, { "epoch": 0.9664, "grad_norm": 0.5475010871887207, "learning_rate": 4.8509403761504603e-05, "loss": 0.7172, "step": 755 }, { "epoch": 0.96768, "grad_norm": 0.6388839483261108, "learning_rate": 4.8507402961184475e-05, "loss": 0.7659, "step": 756 }, { "epoch": 0.96896, "grad_norm": 0.6201974153518677, "learning_rate": 4.850540216086435e-05, "loss": 0.7723, "step": 757 }, { "epoch": 0.97024, "grad_norm": 0.6046715974807739, "learning_rate": 4.850340136054422e-05, "loss": 0.7486, "step": 758 }, { "epoch": 0.97152, "grad_norm": 0.6070242524147034, "learning_rate": 4.850140056022409e-05, "loss": 0.763, "step": 759 }, { "epoch": 0.9728, "grad_norm": 0.6031877994537354, "learning_rate": 4.849939975990396e-05, "loss": 0.7415, "step": 760 }, { "epoch": 0.97408, "grad_norm": 0.5899202823638916, "learning_rate": 4.8497398959583835e-05, "loss": 0.7867, "step": 761 }, { "epoch": 0.97536, "grad_norm": 0.6172879338264465, "learning_rate": 4.8495398159263706e-05, "loss": 0.6983, "step": 762 }, { "epoch": 0.97664, "grad_norm": 0.639527440071106, "learning_rate": 4.8493397358943585e-05, "loss": 0.7358, "step": 763 }, { "epoch": 0.97792, "grad_norm": 0.6101019382476807, "learning_rate": 4.849139655862345e-05, "loss": 0.7267, "step": 764 }, { "epoch": 0.9792, "grad_norm": 0.6153104305267334, "learning_rate": 4.848939575830332e-05, "loss": 0.7648, "step": 765 }, { "epoch": 0.98048, "grad_norm": 0.5717357993125916, "learning_rate": 4.8487394957983194e-05, "loss": 0.7197, "step": 766 }, { "epoch": 0.98176, "grad_norm": 0.6005909442901611, "learning_rate": 4.8485394157663066e-05, "loss": 0.6786, "step": 767 }, { "epoch": 0.98304, "grad_norm": 0.6198951005935669, "learning_rate": 4.848339335734294e-05, "loss": 0.759, "step": 768 }, { "epoch": 0.98432, "grad_norm": 0.6164072155952454, "learning_rate": 4.848139255702281e-05, "loss": 0.757, "step": 769 }, { "epoch": 0.9856, "grad_norm": 0.5673977136611938, "learning_rate": 4.847939175670269e-05, "loss": 0.7746, "step": 770 }, { "epoch": 0.98688, "grad_norm": 0.6161131262779236, "learning_rate": 4.847739095638256e-05, "loss": 0.77, "step": 771 }, { "epoch": 0.98816, "grad_norm": 0.623842716217041, "learning_rate": 4.8475390156062425e-05, "loss": 0.7528, "step": 772 }, { "epoch": 0.98944, "grad_norm": 0.6155294179916382, "learning_rate": 4.84733893557423e-05, "loss": 0.7517, "step": 773 }, { "epoch": 0.99072, "grad_norm": 0.6041366457939148, "learning_rate": 4.847138855542217e-05, "loss": 0.7704, "step": 774 }, { "epoch": 0.992, "grad_norm": 0.6505810618400574, "learning_rate": 4.846938775510204e-05, "loss": 0.8253, "step": 775 }, { "epoch": 0.99328, "grad_norm": 0.6835885643959045, "learning_rate": 4.846738695478191e-05, "loss": 0.8475, "step": 776 }, { "epoch": 0.99456, "grad_norm": 0.6628032922744751, "learning_rate": 4.846538615446179e-05, "loss": 0.7722, "step": 777 }, { "epoch": 0.99584, "grad_norm": 0.5734715461730957, "learning_rate": 4.846338535414166e-05, "loss": 0.7254, "step": 778 }, { "epoch": 0.99712, "grad_norm": 0.6373343467712402, "learning_rate": 4.8461384553821535e-05, "loss": 0.7545, "step": 779 }, { "epoch": 0.9984, "grad_norm": 0.6454253196716309, "learning_rate": 4.84593837535014e-05, "loss": 0.7757, "step": 780 }, { "epoch": 0.99968, "grad_norm": 0.6249713897705078, "learning_rate": 4.845738295318127e-05, "loss": 0.6912, "step": 781 }, { "epoch": 1.00096, "grad_norm": 1.1785544157028198, "learning_rate": 4.8455382152861144e-05, "loss": 1.291, "step": 782 }, { "epoch": 1.00224, "grad_norm": 0.6030226349830627, "learning_rate": 4.8453381352541015e-05, "loss": 0.794, "step": 783 }, { "epoch": 1.00352, "grad_norm": 0.5952867865562439, "learning_rate": 4.8451380552220894e-05, "loss": 0.7109, "step": 784 }, { "epoch": 1.0048, "grad_norm": 0.595658004283905, "learning_rate": 4.8449379751900766e-05, "loss": 0.7261, "step": 785 }, { "epoch": 1.00608, "grad_norm": 0.6058535575866699, "learning_rate": 4.844737895158064e-05, "loss": 0.7573, "step": 786 }, { "epoch": 1.00736, "grad_norm": 0.5743350982666016, "learning_rate": 4.844537815126051e-05, "loss": 0.6788, "step": 787 }, { "epoch": 1.00864, "grad_norm": 0.6272056102752686, "learning_rate": 4.8443377350940375e-05, "loss": 0.7366, "step": 788 }, { "epoch": 1.00992, "grad_norm": 0.6139172315597534, "learning_rate": 4.8441376550620247e-05, "loss": 0.7665, "step": 789 }, { "epoch": 1.0112, "grad_norm": 0.6504907011985779, "learning_rate": 4.843937575030012e-05, "loss": 0.7647, "step": 790 }, { "epoch": 1.01248, "grad_norm": 0.6235556602478027, "learning_rate": 4.843737494998e-05, "loss": 0.7775, "step": 791 }, { "epoch": 1.01376, "grad_norm": 0.5893948078155518, "learning_rate": 4.843537414965987e-05, "loss": 0.7583, "step": 792 }, { "epoch": 1.01504, "grad_norm": 0.6206098198890686, "learning_rate": 4.843337334933974e-05, "loss": 0.7281, "step": 793 }, { "epoch": 1.01632, "grad_norm": 0.639717161655426, "learning_rate": 4.843137254901961e-05, "loss": 0.7478, "step": 794 }, { "epoch": 1.0176, "grad_norm": 0.6055462956428528, "learning_rate": 4.8429371748699484e-05, "loss": 0.698, "step": 795 }, { "epoch": 1.01888, "grad_norm": 0.6277878880500793, "learning_rate": 4.842737094837935e-05, "loss": 0.7602, "step": 796 }, { "epoch": 1.02016, "grad_norm": 0.6001823544502258, "learning_rate": 4.842537014805922e-05, "loss": 0.6931, "step": 797 }, { "epoch": 1.02144, "grad_norm": 0.6369767785072327, "learning_rate": 4.84233693477391e-05, "loss": 0.7378, "step": 798 }, { "epoch": 1.02272, "grad_norm": 0.6533408761024475, "learning_rate": 4.842136854741897e-05, "loss": 0.7738, "step": 799 }, { "epoch": 1.024, "grad_norm": 0.6154987812042236, "learning_rate": 4.8419367747098844e-05, "loss": 0.7081, "step": 800 }, { "epoch": 1.02528, "grad_norm": 0.5977925062179565, "learning_rate": 4.8417366946778716e-05, "loss": 0.6907, "step": 801 }, { "epoch": 1.02656, "grad_norm": 0.5921528935432434, "learning_rate": 4.841536614645859e-05, "loss": 0.7432, "step": 802 }, { "epoch": 1.02784, "grad_norm": 0.5911890268325806, "learning_rate": 4.841336534613846e-05, "loss": 0.6892, "step": 803 }, { "epoch": 1.02912, "grad_norm": 0.6107017993927002, "learning_rate": 4.8411364545818324e-05, "loss": 0.7641, "step": 804 }, { "epoch": 1.0304, "grad_norm": 0.6192318797111511, "learning_rate": 4.84093637454982e-05, "loss": 0.7411, "step": 805 }, { "epoch": 1.03168, "grad_norm": 0.5901237726211548, "learning_rate": 4.8407362945178075e-05, "loss": 0.7253, "step": 806 }, { "epoch": 1.03296, "grad_norm": 0.6684927344322205, "learning_rate": 4.840536214485795e-05, "loss": 0.7597, "step": 807 }, { "epoch": 1.03424, "grad_norm": 0.6069619059562683, "learning_rate": 4.840336134453782e-05, "loss": 0.709, "step": 808 }, { "epoch": 1.03552, "grad_norm": 0.6064357757568359, "learning_rate": 4.840136054421769e-05, "loss": 0.7321, "step": 809 }, { "epoch": 1.0368, "grad_norm": 0.5926334261894226, "learning_rate": 4.839935974389756e-05, "loss": 0.7346, "step": 810 }, { "epoch": 1.03808, "grad_norm": 0.6023288369178772, "learning_rate": 4.8397358943577434e-05, "loss": 0.7672, "step": 811 }, { "epoch": 1.03936, "grad_norm": 0.6245213747024536, "learning_rate": 4.8395358143257306e-05, "loss": 0.7458, "step": 812 }, { "epoch": 1.04064, "grad_norm": 0.601436197757721, "learning_rate": 4.839335734293718e-05, "loss": 0.7848, "step": 813 }, { "epoch": 1.04192, "grad_norm": 0.6355320811271667, "learning_rate": 4.839135654261705e-05, "loss": 0.8105, "step": 814 }, { "epoch": 1.0432, "grad_norm": 0.5975676774978638, "learning_rate": 4.838935574229692e-05, "loss": 0.692, "step": 815 }, { "epoch": 1.04448, "grad_norm": 0.6143369674682617, "learning_rate": 4.8387354941976793e-05, "loss": 0.7551, "step": 816 }, { "epoch": 1.04576, "grad_norm": 0.6289949417114258, "learning_rate": 4.8385354141656665e-05, "loss": 0.6912, "step": 817 }, { "epoch": 1.04704, "grad_norm": 0.6380932331085205, "learning_rate": 4.838335334133654e-05, "loss": 0.6864, "step": 818 }, { "epoch": 1.04832, "grad_norm": 0.6500244736671448, "learning_rate": 4.838135254101641e-05, "loss": 0.7205, "step": 819 }, { "epoch": 1.0496, "grad_norm": 0.6350643038749695, "learning_rate": 4.837935174069628e-05, "loss": 0.7197, "step": 820 }, { "epoch": 1.05088, "grad_norm": 0.6777990460395813, "learning_rate": 4.837735094037615e-05, "loss": 0.8366, "step": 821 }, { "epoch": 1.05216, "grad_norm": 0.6251175999641418, "learning_rate": 4.8375350140056025e-05, "loss": 0.7125, "step": 822 }, { "epoch": 1.05344, "grad_norm": 0.6193746328353882, "learning_rate": 4.8373349339735896e-05, "loss": 0.798, "step": 823 }, { "epoch": 1.05472, "grad_norm": 0.6032739877700806, "learning_rate": 4.837134853941577e-05, "loss": 0.7334, "step": 824 }, { "epoch": 1.056, "grad_norm": 0.6197022199630737, "learning_rate": 4.836934773909564e-05, "loss": 0.737, "step": 825 }, { "epoch": 1.05728, "grad_norm": 0.646630585193634, "learning_rate": 4.836734693877551e-05, "loss": 0.7796, "step": 826 }, { "epoch": 1.05856, "grad_norm": 0.5807000398635864, "learning_rate": 4.8365346138455384e-05, "loss": 0.7328, "step": 827 }, { "epoch": 1.05984, "grad_norm": 0.6248646974563599, "learning_rate": 4.8363345338135256e-05, "loss": 0.7364, "step": 828 }, { "epoch": 1.06112, "grad_norm": 0.6000564098358154, "learning_rate": 4.836134453781513e-05, "loss": 0.6898, "step": 829 }, { "epoch": 1.0624, "grad_norm": 0.603438138961792, "learning_rate": 4.8359343737495e-05, "loss": 0.7358, "step": 830 }, { "epoch": 1.06368, "grad_norm": 0.605436384677887, "learning_rate": 4.835734293717487e-05, "loss": 0.7493, "step": 831 }, { "epoch": 1.06496, "grad_norm": 0.6158525347709656, "learning_rate": 4.835534213685474e-05, "loss": 0.743, "step": 832 }, { "epoch": 1.06624, "grad_norm": 0.6049724817276001, "learning_rate": 4.835334133653462e-05, "loss": 0.7252, "step": 833 }, { "epoch": 1.06752, "grad_norm": 0.5752277970314026, "learning_rate": 4.835134053621449e-05, "loss": 0.6848, "step": 834 }, { "epoch": 1.0688, "grad_norm": 0.6556239128112793, "learning_rate": 4.834933973589436e-05, "loss": 0.7814, "step": 835 }, { "epoch": 1.07008, "grad_norm": 0.6204028129577637, "learning_rate": 4.834733893557423e-05, "loss": 0.7009, "step": 836 }, { "epoch": 1.07136, "grad_norm": 0.5991467833518982, "learning_rate": 4.83453381352541e-05, "loss": 0.7251, "step": 837 }, { "epoch": 1.07264, "grad_norm": 0.5856754779815674, "learning_rate": 4.8343337334933974e-05, "loss": 0.7455, "step": 838 }, { "epoch": 1.07392, "grad_norm": 0.6454278826713562, "learning_rate": 4.8341336534613846e-05, "loss": 0.7808, "step": 839 }, { "epoch": 1.0752, "grad_norm": 0.6240683197975159, "learning_rate": 4.8339335734293725e-05, "loss": 0.7066, "step": 840 }, { "epoch": 1.07648, "grad_norm": 0.6424701809883118, "learning_rate": 4.83373349339736e-05, "loss": 0.7447, "step": 841 }, { "epoch": 1.07776, "grad_norm": 0.5974248051643372, "learning_rate": 4.833533413365346e-05, "loss": 0.735, "step": 842 }, { "epoch": 1.07904, "grad_norm": 0.6363813281059265, "learning_rate": 4.8333333333333334e-05, "loss": 0.7302, "step": 843 }, { "epoch": 1.08032, "grad_norm": 0.6119391918182373, "learning_rate": 4.8331332533013205e-05, "loss": 0.7222, "step": 844 }, { "epoch": 1.0816, "grad_norm": 0.5887010097503662, "learning_rate": 4.832933173269308e-05, "loss": 0.7207, "step": 845 }, { "epoch": 1.08288, "grad_norm": 0.6205199360847473, "learning_rate": 4.832733093237295e-05, "loss": 0.72, "step": 846 }, { "epoch": 1.08416, "grad_norm": 0.570545494556427, "learning_rate": 4.832533013205282e-05, "loss": 0.7201, "step": 847 }, { "epoch": 1.08544, "grad_norm": 0.6642616987228394, "learning_rate": 4.83233293317327e-05, "loss": 0.8064, "step": 848 }, { "epoch": 1.08672, "grad_norm": 0.650346577167511, "learning_rate": 4.832132853141257e-05, "loss": 0.7717, "step": 849 }, { "epoch": 1.088, "grad_norm": 0.5787684917449951, "learning_rate": 4.831932773109244e-05, "loss": 0.6377, "step": 850 }, { "epoch": 1.08928, "grad_norm": 0.6291863322257996, "learning_rate": 4.831732693077231e-05, "loss": 0.7332, "step": 851 }, { "epoch": 1.09056, "grad_norm": 0.6485406160354614, "learning_rate": 4.831532613045218e-05, "loss": 0.7445, "step": 852 }, { "epoch": 1.09184, "grad_norm": 0.6354153156280518, "learning_rate": 4.831332533013205e-05, "loss": 0.7272, "step": 853 }, { "epoch": 1.09312, "grad_norm": 0.5846593379974365, "learning_rate": 4.8311324529811924e-05, "loss": 0.7165, "step": 854 }, { "epoch": 1.0944, "grad_norm": 0.6126893758773804, "learning_rate": 4.83093237294918e-05, "loss": 0.6913, "step": 855 }, { "epoch": 1.09568, "grad_norm": 0.631846010684967, "learning_rate": 4.8307322929171675e-05, "loss": 0.7566, "step": 856 }, { "epoch": 1.09696, "grad_norm": 0.6731998920440674, "learning_rate": 4.8305322128851546e-05, "loss": 0.7488, "step": 857 }, { "epoch": 1.09824, "grad_norm": 0.6166826486587524, "learning_rate": 4.830332132853141e-05, "loss": 0.7271, "step": 858 }, { "epoch": 1.09952, "grad_norm": 0.5922271013259888, "learning_rate": 4.830132052821128e-05, "loss": 0.6596, "step": 859 }, { "epoch": 1.1008, "grad_norm": 0.6259810328483582, "learning_rate": 4.8299319727891155e-05, "loss": 0.7917, "step": 860 }, { "epoch": 1.10208, "grad_norm": 0.6506845355033875, "learning_rate": 4.829731892757103e-05, "loss": 0.7029, "step": 861 }, { "epoch": 1.10336, "grad_norm": 0.6502883434295654, "learning_rate": 4.8295318127250906e-05, "loss": 0.6832, "step": 862 }, { "epoch": 1.10464, "grad_norm": 0.6381411552429199, "learning_rate": 4.829331732693078e-05, "loss": 0.7275, "step": 863 }, { "epoch": 1.10592, "grad_norm": 0.629329264163971, "learning_rate": 4.829131652661065e-05, "loss": 0.7542, "step": 864 }, { "epoch": 1.1072, "grad_norm": 0.6186816692352295, "learning_rate": 4.828931572629052e-05, "loss": 0.7195, "step": 865 }, { "epoch": 1.10848, "grad_norm": 0.5985262393951416, "learning_rate": 4.8287314925970386e-05, "loss": 0.7651, "step": 866 }, { "epoch": 1.10976, "grad_norm": 0.6066650152206421, "learning_rate": 4.828531412565026e-05, "loss": 0.7572, "step": 867 }, { "epoch": 1.11104, "grad_norm": 0.6421188712120056, "learning_rate": 4.828331332533013e-05, "loss": 0.7796, "step": 868 }, { "epoch": 1.11232, "grad_norm": 0.6231709122657776, "learning_rate": 4.828131252501001e-05, "loss": 0.7564, "step": 869 }, { "epoch": 1.1136, "grad_norm": 0.6402696967124939, "learning_rate": 4.827931172468988e-05, "loss": 0.7095, "step": 870 }, { "epoch": 1.11488, "grad_norm": 0.5962916612625122, "learning_rate": 4.827731092436975e-05, "loss": 0.7085, "step": 871 }, { "epoch": 1.11616, "grad_norm": 0.6202763319015503, "learning_rate": 4.8275310124049624e-05, "loss": 0.7063, "step": 872 }, { "epoch": 1.11744, "grad_norm": 0.626986026763916, "learning_rate": 4.8273309323729496e-05, "loss": 0.7705, "step": 873 }, { "epoch": 1.11872, "grad_norm": 0.6388139724731445, "learning_rate": 4.827130852340936e-05, "loss": 0.754, "step": 874 }, { "epoch": 1.12, "grad_norm": 0.6084983348846436, "learning_rate": 4.826930772308923e-05, "loss": 0.7012, "step": 875 }, { "epoch": 1.12128, "grad_norm": 0.6018005609512329, "learning_rate": 4.826730692276911e-05, "loss": 0.7186, "step": 876 }, { "epoch": 1.12256, "grad_norm": 0.6124555468559265, "learning_rate": 4.8265306122448984e-05, "loss": 0.7023, "step": 877 }, { "epoch": 1.12384, "grad_norm": 0.610120415687561, "learning_rate": 4.8263305322128855e-05, "loss": 0.6897, "step": 878 }, { "epoch": 1.12512, "grad_norm": 0.6392123103141785, "learning_rate": 4.826130452180873e-05, "loss": 0.7632, "step": 879 }, { "epoch": 1.1264, "grad_norm": 0.6437667608261108, "learning_rate": 4.82593037214886e-05, "loss": 0.8141, "step": 880 }, { "epoch": 1.12768, "grad_norm": 0.6260906457901001, "learning_rate": 4.825730292116847e-05, "loss": 0.7017, "step": 881 }, { "epoch": 1.12896, "grad_norm": 0.6186864972114563, "learning_rate": 4.8255302120848336e-05, "loss": 0.6796, "step": 882 }, { "epoch": 1.13024, "grad_norm": 0.6303755044937134, "learning_rate": 4.8253301320528215e-05, "loss": 0.7379, "step": 883 }, { "epoch": 1.13152, "grad_norm": 0.5899752378463745, "learning_rate": 4.8251300520208087e-05, "loss": 0.7175, "step": 884 }, { "epoch": 1.1328, "grad_norm": 0.5789473652839661, "learning_rate": 4.824929971988796e-05, "loss": 0.6513, "step": 885 }, { "epoch": 1.13408, "grad_norm": 0.6217113733291626, "learning_rate": 4.824729891956783e-05, "loss": 0.7529, "step": 886 }, { "epoch": 1.13536, "grad_norm": 0.6412451267242432, "learning_rate": 4.82452981192477e-05, "loss": 0.7977, "step": 887 }, { "epoch": 1.13664, "grad_norm": 0.6278442144393921, "learning_rate": 4.8243297318927574e-05, "loss": 0.7078, "step": 888 }, { "epoch": 1.13792, "grad_norm": 0.6618484854698181, "learning_rate": 4.8241296518607446e-05, "loss": 0.7483, "step": 889 }, { "epoch": 1.1392, "grad_norm": 0.603378176689148, "learning_rate": 4.823929571828732e-05, "loss": 0.7063, "step": 890 }, { "epoch": 1.14048, "grad_norm": 0.6106945276260376, "learning_rate": 4.823729491796719e-05, "loss": 0.7299, "step": 891 }, { "epoch": 1.14176, "grad_norm": 0.6208406090736389, "learning_rate": 4.823529411764706e-05, "loss": 0.7362, "step": 892 }, { "epoch": 1.14304, "grad_norm": 0.63667231798172, "learning_rate": 4.823329331732693e-05, "loss": 0.744, "step": 893 }, { "epoch": 1.14432, "grad_norm": 0.5953905582427979, "learning_rate": 4.8231292517006805e-05, "loss": 0.7047, "step": 894 }, { "epoch": 1.1456, "grad_norm": 0.6081019043922424, "learning_rate": 4.822929171668668e-05, "loss": 0.7546, "step": 895 }, { "epoch": 1.14688, "grad_norm": 0.6807315349578857, "learning_rate": 4.822729091636655e-05, "loss": 0.7761, "step": 896 }, { "epoch": 1.14816, "grad_norm": 0.6396183967590332, "learning_rate": 4.822529011604642e-05, "loss": 0.7651, "step": 897 }, { "epoch": 1.14944, "grad_norm": 0.6671329736709595, "learning_rate": 4.822328931572629e-05, "loss": 0.8052, "step": 898 }, { "epoch": 1.15072, "grad_norm": 0.6230823397636414, "learning_rate": 4.8221288515406164e-05, "loss": 0.7748, "step": 899 }, { "epoch": 1.152, "grad_norm": 0.6346234083175659, "learning_rate": 4.8219287715086036e-05, "loss": 0.7335, "step": 900 }, { "epoch": 1.15328, "grad_norm": 0.6477737426757812, "learning_rate": 4.821728691476591e-05, "loss": 0.7561, "step": 901 }, { "epoch": 1.15456, "grad_norm": 0.6157343983650208, "learning_rate": 4.821528611444578e-05, "loss": 0.7422, "step": 902 }, { "epoch": 1.15584, "grad_norm": 0.6227743625640869, "learning_rate": 4.821328531412565e-05, "loss": 0.7081, "step": 903 }, { "epoch": 1.15712, "grad_norm": 0.6442050337791443, "learning_rate": 4.8211284513805524e-05, "loss": 0.7545, "step": 904 }, { "epoch": 1.1584, "grad_norm": 0.5827674865722656, "learning_rate": 4.8209283713485396e-05, "loss": 0.6298, "step": 905 }, { "epoch": 1.15968, "grad_norm": 0.6147791147232056, "learning_rate": 4.820728291316527e-05, "loss": 0.7058, "step": 906 }, { "epoch": 1.16096, "grad_norm": 0.6040810942649841, "learning_rate": 4.820528211284514e-05, "loss": 0.7474, "step": 907 }, { "epoch": 1.16224, "grad_norm": 0.6344786286354065, "learning_rate": 4.820328131252501e-05, "loss": 0.732, "step": 908 }, { "epoch": 1.16352, "grad_norm": 0.6694657802581787, "learning_rate": 4.820128051220488e-05, "loss": 0.7453, "step": 909 }, { "epoch": 1.1648, "grad_norm": 0.6136607527732849, "learning_rate": 4.8199279711884755e-05, "loss": 0.7458, "step": 910 }, { "epoch": 1.16608, "grad_norm": 0.6418918371200562, "learning_rate": 4.8197278911564633e-05, "loss": 0.723, "step": 911 }, { "epoch": 1.16736, "grad_norm": 0.6443724036216736, "learning_rate": 4.81952781112445e-05, "loss": 0.6964, "step": 912 }, { "epoch": 1.16864, "grad_norm": 0.6705388426780701, "learning_rate": 4.819327731092437e-05, "loss": 0.7893, "step": 913 }, { "epoch": 1.16992, "grad_norm": 0.6297101974487305, "learning_rate": 4.819127651060424e-05, "loss": 0.7399, "step": 914 }, { "epoch": 1.1712, "grad_norm": 0.6698152422904968, "learning_rate": 4.8189275710284114e-05, "loss": 0.7513, "step": 915 }, { "epoch": 1.17248, "grad_norm": 0.6113006472587585, "learning_rate": 4.8187274909963986e-05, "loss": 0.7495, "step": 916 }, { "epoch": 1.17376, "grad_norm": 0.6909430027008057, "learning_rate": 4.818527410964386e-05, "loss": 0.795, "step": 917 }, { "epoch": 1.17504, "grad_norm": 0.6630053520202637, "learning_rate": 4.8183273309323736e-05, "loss": 0.7305, "step": 918 }, { "epoch": 1.17632, "grad_norm": 0.6622920036315918, "learning_rate": 4.818127250900361e-05, "loss": 0.7672, "step": 919 }, { "epoch": 1.1776, "grad_norm": 0.6172346472740173, "learning_rate": 4.817927170868347e-05, "loss": 0.6956, "step": 920 }, { "epoch": 1.17888, "grad_norm": 0.6722678542137146, "learning_rate": 4.8177270908363345e-05, "loss": 0.7633, "step": 921 }, { "epoch": 1.1801599999999999, "grad_norm": 0.6573194861412048, "learning_rate": 4.817527010804322e-05, "loss": 0.7313, "step": 922 }, { "epoch": 1.18144, "grad_norm": 0.598237931728363, "learning_rate": 4.817326930772309e-05, "loss": 0.6963, "step": 923 }, { "epoch": 1.18272, "grad_norm": 0.6254197359085083, "learning_rate": 4.817126850740296e-05, "loss": 0.727, "step": 924 }, { "epoch": 1.184, "grad_norm": 0.6584347486495972, "learning_rate": 4.816926770708284e-05, "loss": 0.7281, "step": 925 }, { "epoch": 1.1852800000000001, "grad_norm": 0.6371662020683289, "learning_rate": 4.816726690676271e-05, "loss": 0.7286, "step": 926 }, { "epoch": 1.18656, "grad_norm": 0.6230917572975159, "learning_rate": 4.816526610644258e-05, "loss": 0.725, "step": 927 }, { "epoch": 1.18784, "grad_norm": 0.613785445690155, "learning_rate": 4.816326530612245e-05, "loss": 0.7596, "step": 928 }, { "epoch": 1.18912, "grad_norm": 0.6123350262641907, "learning_rate": 4.816126450580232e-05, "loss": 0.7271, "step": 929 }, { "epoch": 1.1904, "grad_norm": 0.604227602481842, "learning_rate": 4.815926370548219e-05, "loss": 0.72, "step": 930 }, { "epoch": 1.19168, "grad_norm": 0.6275907754898071, "learning_rate": 4.8157262905162064e-05, "loss": 0.6766, "step": 931 }, { "epoch": 1.19296, "grad_norm": 0.6855752468109131, "learning_rate": 4.815526210484194e-05, "loss": 0.7628, "step": 932 }, { "epoch": 1.19424, "grad_norm": 0.6172601580619812, "learning_rate": 4.8153261304521814e-05, "loss": 0.7254, "step": 933 }, { "epoch": 1.19552, "grad_norm": 0.6443458795547485, "learning_rate": 4.8151260504201686e-05, "loss": 0.7489, "step": 934 }, { "epoch": 1.1968, "grad_norm": 0.6204928755760193, "learning_rate": 4.814925970388156e-05, "loss": 0.7606, "step": 935 }, { "epoch": 1.19808, "grad_norm": 0.6195954084396362, "learning_rate": 4.814725890356142e-05, "loss": 0.7121, "step": 936 }, { "epoch": 1.19936, "grad_norm": 0.6397810578346252, "learning_rate": 4.8145258103241295e-05, "loss": 0.7392, "step": 937 }, { "epoch": 1.20064, "grad_norm": 0.6325953602790833, "learning_rate": 4.814325730292117e-05, "loss": 0.7099, "step": 938 }, { "epoch": 1.2019199999999999, "grad_norm": 0.5596916079521179, "learning_rate": 4.8141256502601045e-05, "loss": 0.6416, "step": 939 }, { "epoch": 1.2032, "grad_norm": 0.6255161762237549, "learning_rate": 4.813925570228092e-05, "loss": 0.6897, "step": 940 }, { "epoch": 1.20448, "grad_norm": 0.6093683242797852, "learning_rate": 4.813725490196079e-05, "loss": 0.7421, "step": 941 }, { "epoch": 1.20576, "grad_norm": 0.6216311454772949, "learning_rate": 4.813525410164066e-05, "loss": 0.6835, "step": 942 }, { "epoch": 1.2070400000000001, "grad_norm": 0.671823263168335, "learning_rate": 4.813325330132053e-05, "loss": 0.7734, "step": 943 }, { "epoch": 1.20832, "grad_norm": 0.7053853869438171, "learning_rate": 4.81312525010004e-05, "loss": 0.7447, "step": 944 }, { "epoch": 1.2096, "grad_norm": 0.5679812431335449, "learning_rate": 4.812925170068027e-05, "loss": 0.6654, "step": 945 }, { "epoch": 1.21088, "grad_norm": 0.5954858660697937, "learning_rate": 4.812725090036015e-05, "loss": 0.6828, "step": 946 }, { "epoch": 1.21216, "grad_norm": 0.6449232697486877, "learning_rate": 4.812525010004002e-05, "loss": 0.7228, "step": 947 }, { "epoch": 1.21344, "grad_norm": 0.6288172006607056, "learning_rate": 4.812324929971989e-05, "loss": 0.7043, "step": 948 }, { "epoch": 1.21472, "grad_norm": 0.6665524244308472, "learning_rate": 4.8121248499399764e-05, "loss": 0.7963, "step": 949 }, { "epoch": 1.216, "grad_norm": 0.6399210095405579, "learning_rate": 4.8119247699079636e-05, "loss": 0.7559, "step": 950 }, { "epoch": 1.21728, "grad_norm": 0.611251711845398, "learning_rate": 4.811724689875951e-05, "loss": 0.7279, "step": 951 }, { "epoch": 1.21856, "grad_norm": 0.6060947775840759, "learning_rate": 4.811524609843937e-05, "loss": 0.7066, "step": 952 }, { "epoch": 1.21984, "grad_norm": 0.660533607006073, "learning_rate": 4.811324529811925e-05, "loss": 0.7361, "step": 953 }, { "epoch": 1.22112, "grad_norm": 0.6510452032089233, "learning_rate": 4.811124449779912e-05, "loss": 0.7185, "step": 954 }, { "epoch": 1.2224, "grad_norm": 0.649373471736908, "learning_rate": 4.8109243697478995e-05, "loss": 0.7748, "step": 955 }, { "epoch": 1.2236799999999999, "grad_norm": 0.6497557759284973, "learning_rate": 4.810724289715887e-05, "loss": 0.7367, "step": 956 }, { "epoch": 1.22496, "grad_norm": 0.6782010197639465, "learning_rate": 4.810524209683874e-05, "loss": 0.6939, "step": 957 }, { "epoch": 1.22624, "grad_norm": 0.647323727607727, "learning_rate": 4.810324129651861e-05, "loss": 0.6888, "step": 958 }, { "epoch": 1.22752, "grad_norm": 0.6662117838859558, "learning_rate": 4.810124049619848e-05, "loss": 0.7344, "step": 959 }, { "epoch": 1.2288000000000001, "grad_norm": 0.6221624612808228, "learning_rate": 4.809923969587835e-05, "loss": 0.7277, "step": 960 }, { "epoch": 1.23008, "grad_norm": 0.6320923566818237, "learning_rate": 4.8097238895558226e-05, "loss": 0.7305, "step": 961 }, { "epoch": 1.23136, "grad_norm": 0.6177527904510498, "learning_rate": 4.80952380952381e-05, "loss": 0.7247, "step": 962 }, { "epoch": 1.23264, "grad_norm": 0.6447619199752808, "learning_rate": 4.809323729491797e-05, "loss": 0.739, "step": 963 }, { "epoch": 1.23392, "grad_norm": 0.6220279335975647, "learning_rate": 4.809123649459784e-05, "loss": 0.7493, "step": 964 }, { "epoch": 1.2352, "grad_norm": 0.6294159889221191, "learning_rate": 4.8089235694277714e-05, "loss": 0.7178, "step": 965 }, { "epoch": 1.23648, "grad_norm": 0.6304751634597778, "learning_rate": 4.8087234893957586e-05, "loss": 0.7529, "step": 966 }, { "epoch": 1.23776, "grad_norm": 0.654147744178772, "learning_rate": 4.808523409363746e-05, "loss": 0.7369, "step": 967 }, { "epoch": 1.23904, "grad_norm": 0.6700395345687866, "learning_rate": 4.808323329331733e-05, "loss": 0.7395, "step": 968 }, { "epoch": 1.24032, "grad_norm": 0.6803591251373291, "learning_rate": 4.80812324929972e-05, "loss": 0.815, "step": 969 }, { "epoch": 1.2416, "grad_norm": 0.6240243911743164, "learning_rate": 4.807923169267707e-05, "loss": 0.7645, "step": 970 }, { "epoch": 1.24288, "grad_norm": 0.6563379764556885, "learning_rate": 4.8077230892356945e-05, "loss": 0.7685, "step": 971 }, { "epoch": 1.24416, "grad_norm": 0.6459086537361145, "learning_rate": 4.807523009203682e-05, "loss": 0.7109, "step": 972 }, { "epoch": 1.2454399999999999, "grad_norm": 0.6518861651420593, "learning_rate": 4.807322929171669e-05, "loss": 0.7225, "step": 973 }, { "epoch": 1.24672, "grad_norm": 0.6392287015914917, "learning_rate": 4.807122849139656e-05, "loss": 0.7454, "step": 974 }, { "epoch": 1.248, "grad_norm": 0.639618456363678, "learning_rate": 4.806922769107643e-05, "loss": 0.7345, "step": 975 }, { "epoch": 1.24928, "grad_norm": 0.6422220468521118, "learning_rate": 4.8067226890756304e-05, "loss": 0.7571, "step": 976 }, { "epoch": 1.2505600000000001, "grad_norm": 0.6119744777679443, "learning_rate": 4.8065226090436176e-05, "loss": 0.7367, "step": 977 }, { "epoch": 1.25184, "grad_norm": 0.6147500872612, "learning_rate": 4.806322529011605e-05, "loss": 0.6999, "step": 978 }, { "epoch": 1.25312, "grad_norm": 0.6690681576728821, "learning_rate": 4.806122448979592e-05, "loss": 0.7657, "step": 979 }, { "epoch": 1.2544, "grad_norm": 0.6589951515197754, "learning_rate": 4.805922368947579e-05, "loss": 0.6975, "step": 980 }, { "epoch": 1.25568, "grad_norm": 0.6869469285011292, "learning_rate": 4.8057222889155663e-05, "loss": 0.7403, "step": 981 }, { "epoch": 1.25696, "grad_norm": 0.6274691224098206, "learning_rate": 4.805522208883554e-05, "loss": 0.7046, "step": 982 }, { "epoch": 1.25824, "grad_norm": 0.6332339644432068, "learning_rate": 4.805322128851541e-05, "loss": 0.7729, "step": 983 }, { "epoch": 1.25952, "grad_norm": 0.6267856955528259, "learning_rate": 4.805122048819528e-05, "loss": 0.75, "step": 984 }, { "epoch": 1.2608, "grad_norm": 0.6330248117446899, "learning_rate": 4.804921968787515e-05, "loss": 0.7085, "step": 985 }, { "epoch": 1.26208, "grad_norm": 0.6453738212585449, "learning_rate": 4.804721888755502e-05, "loss": 0.7433, "step": 986 }, { "epoch": 1.26336, "grad_norm": 0.6724556684494019, "learning_rate": 4.8045218087234895e-05, "loss": 0.7734, "step": 987 }, { "epoch": 1.26464, "grad_norm": 0.6251585483551025, "learning_rate": 4.8043217286914766e-05, "loss": 0.6533, "step": 988 }, { "epoch": 1.26592, "grad_norm": 0.6305882930755615, "learning_rate": 4.8041216486594645e-05, "loss": 0.7489, "step": 989 }, { "epoch": 1.2671999999999999, "grad_norm": 0.6422156095504761, "learning_rate": 4.803921568627452e-05, "loss": 0.7583, "step": 990 }, { "epoch": 1.26848, "grad_norm": 0.6040436029434204, "learning_rate": 4.803721488595438e-05, "loss": 0.6738, "step": 991 }, { "epoch": 1.26976, "grad_norm": 0.6677682399749756, "learning_rate": 4.8035214085634254e-05, "loss": 0.7181, "step": 992 }, { "epoch": 1.27104, "grad_norm": 0.6229025721549988, "learning_rate": 4.8033213285314126e-05, "loss": 0.6651, "step": 993 }, { "epoch": 1.2723200000000001, "grad_norm": 0.6611243486404419, "learning_rate": 4.8031212484994e-05, "loss": 0.787, "step": 994 }, { "epoch": 1.2736, "grad_norm": 0.6305986642837524, "learning_rate": 4.802921168467387e-05, "loss": 0.763, "step": 995 }, { "epoch": 1.27488, "grad_norm": 0.6216512322425842, "learning_rate": 4.802721088435375e-05, "loss": 0.7323, "step": 996 }, { "epoch": 1.27616, "grad_norm": 0.6225135922431946, "learning_rate": 4.802521008403362e-05, "loss": 0.7429, "step": 997 }, { "epoch": 1.27744, "grad_norm": 0.6232655048370361, "learning_rate": 4.802320928371349e-05, "loss": 0.7163, "step": 998 }, { "epoch": 1.27872, "grad_norm": 0.585407555103302, "learning_rate": 4.802120848339336e-05, "loss": 0.6991, "step": 999 }, { "epoch": 1.28, "grad_norm": 0.6161494255065918, "learning_rate": 4.801920768307323e-05, "loss": 0.7389, "step": 1000 }, { "epoch": 1.28128, "grad_norm": 0.5975562930107117, "learning_rate": 4.80172068827531e-05, "loss": 0.7076, "step": 1001 }, { "epoch": 1.28256, "grad_norm": 0.6090825200080872, "learning_rate": 4.801520608243297e-05, "loss": 0.724, "step": 1002 }, { "epoch": 1.28384, "grad_norm": 0.6444687843322754, "learning_rate": 4.801320528211285e-05, "loss": 0.7562, "step": 1003 }, { "epoch": 1.28512, "grad_norm": 0.6110180616378784, "learning_rate": 4.801120448179272e-05, "loss": 0.6253, "step": 1004 }, { "epoch": 1.2864, "grad_norm": 0.683491051197052, "learning_rate": 4.8009203681472595e-05, "loss": 0.794, "step": 1005 }, { "epoch": 1.28768, "grad_norm": 0.6164990663528442, "learning_rate": 4.8007202881152467e-05, "loss": 0.6802, "step": 1006 }, { "epoch": 1.2889599999999999, "grad_norm": 0.6573185920715332, "learning_rate": 4.800520208083233e-05, "loss": 0.7123, "step": 1007 }, { "epoch": 1.29024, "grad_norm": 0.6175433993339539, "learning_rate": 4.8003201280512204e-05, "loss": 0.6906, "step": 1008 }, { "epoch": 1.29152, "grad_norm": 0.6465235948562622, "learning_rate": 4.8001200480192075e-05, "loss": 0.6843, "step": 1009 }, { "epoch": 1.2928, "grad_norm": 0.617728590965271, "learning_rate": 4.7999199679871954e-05, "loss": 0.7187, "step": 1010 }, { "epoch": 1.2940800000000001, "grad_norm": 0.660306990146637, "learning_rate": 4.7997198879551826e-05, "loss": 0.7547, "step": 1011 }, { "epoch": 1.29536, "grad_norm": 0.6021302938461304, "learning_rate": 4.79951980792317e-05, "loss": 0.6895, "step": 1012 }, { "epoch": 1.29664, "grad_norm": 0.6052932143211365, "learning_rate": 4.799319727891157e-05, "loss": 0.7183, "step": 1013 }, { "epoch": 1.29792, "grad_norm": 0.6130071878433228, "learning_rate": 4.799119647859144e-05, "loss": 0.7347, "step": 1014 }, { "epoch": 1.2992, "grad_norm": 0.659451961517334, "learning_rate": 4.7989195678271307e-05, "loss": 0.7356, "step": 1015 }, { "epoch": 1.30048, "grad_norm": 0.6528733372688293, "learning_rate": 4.798719487795118e-05, "loss": 0.6957, "step": 1016 }, { "epoch": 1.30176, "grad_norm": 0.6614980697631836, "learning_rate": 4.798519407763106e-05, "loss": 0.7466, "step": 1017 }, { "epoch": 1.30304, "grad_norm": 0.6211322546005249, "learning_rate": 4.798319327731093e-05, "loss": 0.7367, "step": 1018 }, { "epoch": 1.30432, "grad_norm": 0.6198946833610535, "learning_rate": 4.79811924769908e-05, "loss": 0.7108, "step": 1019 }, { "epoch": 1.3056, "grad_norm": 0.6257739067077637, "learning_rate": 4.797919167667067e-05, "loss": 0.7444, "step": 1020 }, { "epoch": 1.30688, "grad_norm": 0.6333909630775452, "learning_rate": 4.7977190876350544e-05, "loss": 0.6794, "step": 1021 }, { "epoch": 1.30816, "grad_norm": 0.6291680335998535, "learning_rate": 4.7975190076030416e-05, "loss": 0.7711, "step": 1022 }, { "epoch": 1.30944, "grad_norm": 0.6223838925361633, "learning_rate": 4.797318927571028e-05, "loss": 0.7198, "step": 1023 }, { "epoch": 1.3107199999999999, "grad_norm": 0.6127943992614746, "learning_rate": 4.797118847539016e-05, "loss": 0.6988, "step": 1024 }, { "epoch": 1.312, "grad_norm": 0.6955495476722717, "learning_rate": 4.796918767507003e-05, "loss": 0.7987, "step": 1025 }, { "epoch": 1.31328, "grad_norm": 0.6549299359321594, "learning_rate": 4.7967186874749904e-05, "loss": 0.7838, "step": 1026 }, { "epoch": 1.31456, "grad_norm": 0.5967473387718201, "learning_rate": 4.7965186074429776e-05, "loss": 0.7516, "step": 1027 }, { "epoch": 1.3158400000000001, "grad_norm": 0.6326315999031067, "learning_rate": 4.796318527410965e-05, "loss": 0.7809, "step": 1028 }, { "epoch": 1.31712, "grad_norm": 0.6114999651908875, "learning_rate": 4.796118447378952e-05, "loss": 0.7084, "step": 1029 }, { "epoch": 1.3184, "grad_norm": 0.6324224472045898, "learning_rate": 4.795918367346939e-05, "loss": 0.7194, "step": 1030 }, { "epoch": 1.31968, "grad_norm": 0.6108016967773438, "learning_rate": 4.795718287314926e-05, "loss": 0.7008, "step": 1031 }, { "epoch": 1.32096, "grad_norm": 0.6326055526733398, "learning_rate": 4.7955182072829135e-05, "loss": 0.747, "step": 1032 }, { "epoch": 1.32224, "grad_norm": 0.5859755277633667, "learning_rate": 4.795318127250901e-05, "loss": 0.7171, "step": 1033 }, { "epoch": 1.32352, "grad_norm": 0.6281324028968811, "learning_rate": 4.795118047218888e-05, "loss": 0.7921, "step": 1034 }, { "epoch": 1.3248, "grad_norm": 0.5920976996421814, "learning_rate": 4.794917967186875e-05, "loss": 0.6885, "step": 1035 }, { "epoch": 1.32608, "grad_norm": 0.6835001111030579, "learning_rate": 4.794717887154862e-05, "loss": 0.7674, "step": 1036 }, { "epoch": 1.32736, "grad_norm": 0.6190808415412903, "learning_rate": 4.7945178071228494e-05, "loss": 0.7408, "step": 1037 }, { "epoch": 1.32864, "grad_norm": 0.6121822595596313, "learning_rate": 4.7943177270908366e-05, "loss": 0.6875, "step": 1038 }, { "epoch": 1.32992, "grad_norm": 0.6977471709251404, "learning_rate": 4.794117647058824e-05, "loss": 0.8305, "step": 1039 }, { "epoch": 1.3312, "grad_norm": 0.6396989822387695, "learning_rate": 4.793917567026811e-05, "loss": 0.705, "step": 1040 }, { "epoch": 1.3324799999999999, "grad_norm": 0.6352539658546448, "learning_rate": 4.793717486994798e-05, "loss": 0.8305, "step": 1041 }, { "epoch": 1.33376, "grad_norm": 0.6385099291801453, "learning_rate": 4.7935174069627853e-05, "loss": 0.723, "step": 1042 }, { "epoch": 1.33504, "grad_norm": 0.6024138927459717, "learning_rate": 4.7933173269307725e-05, "loss": 0.684, "step": 1043 }, { "epoch": 1.33632, "grad_norm": 0.6106967329978943, "learning_rate": 4.79311724689876e-05, "loss": 0.762, "step": 1044 }, { "epoch": 1.3376000000000001, "grad_norm": 0.6504346132278442, "learning_rate": 4.792917166866747e-05, "loss": 0.6937, "step": 1045 }, { "epoch": 1.33888, "grad_norm": 0.6468571424484253, "learning_rate": 4.792717086834734e-05, "loss": 0.7919, "step": 1046 }, { "epoch": 1.34016, "grad_norm": 0.6253741979598999, "learning_rate": 4.792517006802721e-05, "loss": 0.7041, "step": 1047 }, { "epoch": 1.34144, "grad_norm": 0.6577421426773071, "learning_rate": 4.7923169267707085e-05, "loss": 0.8185, "step": 1048 }, { "epoch": 1.34272, "grad_norm": 0.6654506325721741, "learning_rate": 4.7921168467386956e-05, "loss": 0.7253, "step": 1049 }, { "epoch": 1.3439999999999999, "grad_norm": 0.6202994585037231, "learning_rate": 4.791916766706683e-05, "loss": 0.6825, "step": 1050 }, { "epoch": 1.34528, "grad_norm": 0.6256506443023682, "learning_rate": 4.79171668667467e-05, "loss": 0.6949, "step": 1051 }, { "epoch": 1.34656, "grad_norm": 0.6409286856651306, "learning_rate": 4.791516606642658e-05, "loss": 0.72, "step": 1052 }, { "epoch": 1.34784, "grad_norm": 0.6390777230262756, "learning_rate": 4.7913165266106444e-05, "loss": 0.7029, "step": 1053 }, { "epoch": 1.34912, "grad_norm": 0.620672345161438, "learning_rate": 4.7911164465786316e-05, "loss": 0.7373, "step": 1054 }, { "epoch": 1.3504, "grad_norm": 0.6143883466720581, "learning_rate": 4.790916366546619e-05, "loss": 0.7371, "step": 1055 }, { "epoch": 1.35168, "grad_norm": 0.6494544744491577, "learning_rate": 4.790716286514606e-05, "loss": 0.7677, "step": 1056 }, { "epoch": 1.35296, "grad_norm": 0.6207596659660339, "learning_rate": 4.790516206482593e-05, "loss": 0.7106, "step": 1057 }, { "epoch": 1.3542399999999999, "grad_norm": 0.6054299473762512, "learning_rate": 4.79031612645058e-05, "loss": 0.7055, "step": 1058 }, { "epoch": 1.35552, "grad_norm": 0.6713016629219055, "learning_rate": 4.790116046418568e-05, "loss": 0.7229, "step": 1059 }, { "epoch": 1.3568, "grad_norm": 0.6039015054702759, "learning_rate": 4.7899159663865554e-05, "loss": 0.6853, "step": 1060 }, { "epoch": 1.35808, "grad_norm": 0.6022716164588928, "learning_rate": 4.789715886354542e-05, "loss": 0.7085, "step": 1061 }, { "epoch": 1.3593600000000001, "grad_norm": 0.6193332076072693, "learning_rate": 4.789515806322529e-05, "loss": 0.7405, "step": 1062 }, { "epoch": 1.36064, "grad_norm": 0.6295928359031677, "learning_rate": 4.789315726290516e-05, "loss": 0.7142, "step": 1063 }, { "epoch": 1.36192, "grad_norm": 0.6390275359153748, "learning_rate": 4.7891156462585034e-05, "loss": 0.7402, "step": 1064 }, { "epoch": 1.3632, "grad_norm": 0.6579723954200745, "learning_rate": 4.7889155662264906e-05, "loss": 0.7671, "step": 1065 }, { "epoch": 1.36448, "grad_norm": 0.6290743350982666, "learning_rate": 4.7887154861944785e-05, "loss": 0.7414, "step": 1066 }, { "epoch": 1.3657599999999999, "grad_norm": 0.6132893562316895, "learning_rate": 4.788515406162466e-05, "loss": 0.6352, "step": 1067 }, { "epoch": 1.36704, "grad_norm": 0.6277240514755249, "learning_rate": 4.788315326130453e-05, "loss": 0.6889, "step": 1068 }, { "epoch": 1.36832, "grad_norm": 0.6457761526107788, "learning_rate": 4.7881152460984394e-05, "loss": 0.7332, "step": 1069 }, { "epoch": 1.3696, "grad_norm": 0.6207125186920166, "learning_rate": 4.7879151660664265e-05, "loss": 0.711, "step": 1070 }, { "epoch": 1.37088, "grad_norm": 0.662109375, "learning_rate": 4.787715086034414e-05, "loss": 0.8474, "step": 1071 }, { "epoch": 1.37216, "grad_norm": 0.6627180576324463, "learning_rate": 4.787515006002401e-05, "loss": 0.7317, "step": 1072 }, { "epoch": 1.37344, "grad_norm": 0.6555449962615967, "learning_rate": 4.787314925970388e-05, "loss": 0.767, "step": 1073 }, { "epoch": 1.37472, "grad_norm": 0.619805634021759, "learning_rate": 4.787114845938376e-05, "loss": 0.6963, "step": 1074 }, { "epoch": 1.376, "grad_norm": 0.6373822093009949, "learning_rate": 4.786914765906363e-05, "loss": 0.7016, "step": 1075 }, { "epoch": 1.37728, "grad_norm": 0.6532891392707825, "learning_rate": 4.78671468587435e-05, "loss": 0.694, "step": 1076 }, { "epoch": 1.37856, "grad_norm": 0.6646358370780945, "learning_rate": 4.786514605842337e-05, "loss": 0.7097, "step": 1077 }, { "epoch": 1.37984, "grad_norm": 0.6382097601890564, "learning_rate": 4.786314525810324e-05, "loss": 0.7613, "step": 1078 }, { "epoch": 1.3811200000000001, "grad_norm": 0.5848649144172668, "learning_rate": 4.786114445778311e-05, "loss": 0.7163, "step": 1079 }, { "epoch": 1.3824, "grad_norm": 0.6216439008712769, "learning_rate": 4.7859143657462984e-05, "loss": 0.708, "step": 1080 }, { "epoch": 1.38368, "grad_norm": 0.6398138999938965, "learning_rate": 4.785714285714286e-05, "loss": 0.7093, "step": 1081 }, { "epoch": 1.38496, "grad_norm": 0.601285994052887, "learning_rate": 4.7855142056822734e-05, "loss": 0.7506, "step": 1082 }, { "epoch": 1.38624, "grad_norm": 0.5848278999328613, "learning_rate": 4.7853141256502606e-05, "loss": 0.7303, "step": 1083 }, { "epoch": 1.3875199999999999, "grad_norm": 0.6471388339996338, "learning_rate": 4.785114045618248e-05, "loss": 0.7204, "step": 1084 }, { "epoch": 1.3888, "grad_norm": 0.6106283068656921, "learning_rate": 4.784913965586234e-05, "loss": 0.6882, "step": 1085 }, { "epoch": 1.39008, "grad_norm": 0.6192972660064697, "learning_rate": 4.7847138855542215e-05, "loss": 0.7204, "step": 1086 }, { "epoch": 1.39136, "grad_norm": 0.6262179613113403, "learning_rate": 4.784513805522209e-05, "loss": 0.7779, "step": 1087 }, { "epoch": 1.39264, "grad_norm": 0.5966930985450745, "learning_rate": 4.7843137254901966e-05, "loss": 0.7018, "step": 1088 }, { "epoch": 1.39392, "grad_norm": 0.5986944437026978, "learning_rate": 4.784113645458184e-05, "loss": 0.7918, "step": 1089 }, { "epoch": 1.3952, "grad_norm": 0.649640679359436, "learning_rate": 4.783913565426171e-05, "loss": 0.8004, "step": 1090 }, { "epoch": 1.39648, "grad_norm": 0.6140245199203491, "learning_rate": 4.783713485394158e-05, "loss": 0.7116, "step": 1091 }, { "epoch": 1.39776, "grad_norm": 0.6077390909194946, "learning_rate": 4.783513405362145e-05, "loss": 0.7018, "step": 1092 }, { "epoch": 1.39904, "grad_norm": 0.6407623887062073, "learning_rate": 4.783313325330132e-05, "loss": 0.7147, "step": 1093 }, { "epoch": 1.40032, "grad_norm": 0.6380225419998169, "learning_rate": 4.783113245298119e-05, "loss": 0.7089, "step": 1094 }, { "epoch": 1.4016, "grad_norm": 0.5923783183097839, "learning_rate": 4.782913165266107e-05, "loss": 0.6903, "step": 1095 }, { "epoch": 1.4028800000000001, "grad_norm": 0.6157001852989197, "learning_rate": 4.782713085234094e-05, "loss": 0.7125, "step": 1096 }, { "epoch": 1.40416, "grad_norm": 0.6462724208831787, "learning_rate": 4.782513005202081e-05, "loss": 0.7091, "step": 1097 }, { "epoch": 1.40544, "grad_norm": 0.6279814839363098, "learning_rate": 4.7823129251700684e-05, "loss": 0.7286, "step": 1098 }, { "epoch": 1.40672, "grad_norm": 0.6624937057495117, "learning_rate": 4.7821128451380556e-05, "loss": 0.7458, "step": 1099 }, { "epoch": 1.408, "grad_norm": 0.5872619152069092, "learning_rate": 4.781912765106043e-05, "loss": 0.6632, "step": 1100 }, { "epoch": 1.4092799999999999, "grad_norm": 0.6288279294967651, "learning_rate": 4.781712685074029e-05, "loss": 0.7713, "step": 1101 }, { "epoch": 1.41056, "grad_norm": 0.6384450793266296, "learning_rate": 4.781512605042017e-05, "loss": 0.7663, "step": 1102 }, { "epoch": 1.41184, "grad_norm": 0.6555289626121521, "learning_rate": 4.7813125250100043e-05, "loss": 0.7339, "step": 1103 }, { "epoch": 1.41312, "grad_norm": 0.6459196209907532, "learning_rate": 4.7811124449779915e-05, "loss": 0.7491, "step": 1104 }, { "epoch": 1.4144, "grad_norm": 0.6260380148887634, "learning_rate": 4.780912364945979e-05, "loss": 0.7219, "step": 1105 }, { "epoch": 1.41568, "grad_norm": 0.602838933467865, "learning_rate": 4.780712284913966e-05, "loss": 0.6817, "step": 1106 }, { "epoch": 1.41696, "grad_norm": 0.6588259935379028, "learning_rate": 4.780512204881953e-05, "loss": 0.7484, "step": 1107 }, { "epoch": 1.41824, "grad_norm": 0.6435507535934448, "learning_rate": 4.78031212484994e-05, "loss": 0.7629, "step": 1108 }, { "epoch": 1.41952, "grad_norm": 0.6327363848686218, "learning_rate": 4.7801120448179275e-05, "loss": 0.7366, "step": 1109 }, { "epoch": 1.4208, "grad_norm": 0.7007765769958496, "learning_rate": 4.7799119647859146e-05, "loss": 0.7356, "step": 1110 }, { "epoch": 1.42208, "grad_norm": 0.6083685159683228, "learning_rate": 4.779711884753902e-05, "loss": 0.7023, "step": 1111 }, { "epoch": 1.42336, "grad_norm": 0.6256844997406006, "learning_rate": 4.779511804721889e-05, "loss": 0.709, "step": 1112 }, { "epoch": 1.4246400000000001, "grad_norm": 0.6442143321037292, "learning_rate": 4.779311724689876e-05, "loss": 0.6688, "step": 1113 }, { "epoch": 1.42592, "grad_norm": 0.6138997673988342, "learning_rate": 4.7791116446578634e-05, "loss": 0.7211, "step": 1114 }, { "epoch": 1.4272, "grad_norm": 0.6425867676734924, "learning_rate": 4.7789115646258506e-05, "loss": 0.7207, "step": 1115 }, { "epoch": 1.42848, "grad_norm": 0.6066508889198303, "learning_rate": 4.778711484593838e-05, "loss": 0.7242, "step": 1116 }, { "epoch": 1.42976, "grad_norm": 0.5861117243766785, "learning_rate": 4.778511404561825e-05, "loss": 0.6949, "step": 1117 }, { "epoch": 1.4310399999999999, "grad_norm": 0.6210717558860779, "learning_rate": 4.778311324529812e-05, "loss": 0.7018, "step": 1118 }, { "epoch": 1.43232, "grad_norm": 0.6068648099899292, "learning_rate": 4.778111244497799e-05, "loss": 0.7291, "step": 1119 }, { "epoch": 1.4336, "grad_norm": 0.5990647673606873, "learning_rate": 4.7779111644657865e-05, "loss": 0.7016, "step": 1120 }, { "epoch": 1.43488, "grad_norm": 0.5916838049888611, "learning_rate": 4.777711084433774e-05, "loss": 0.6895, "step": 1121 }, { "epoch": 1.43616, "grad_norm": 0.6179905533790588, "learning_rate": 4.777511004401761e-05, "loss": 0.6555, "step": 1122 }, { "epoch": 1.43744, "grad_norm": 0.626921534538269, "learning_rate": 4.777310924369748e-05, "loss": 0.7184, "step": 1123 }, { "epoch": 1.43872, "grad_norm": 0.6084997057914734, "learning_rate": 4.777110844337735e-05, "loss": 0.6929, "step": 1124 }, { "epoch": 1.44, "grad_norm": 0.6844547390937805, "learning_rate": 4.7769107643057224e-05, "loss": 0.7536, "step": 1125 }, { "epoch": 1.44128, "grad_norm": 0.6585316061973572, "learning_rate": 4.7767106842737096e-05, "loss": 0.7163, "step": 1126 }, { "epoch": 1.44256, "grad_norm": 0.6486889123916626, "learning_rate": 4.776510604241697e-05, "loss": 0.7305, "step": 1127 }, { "epoch": 1.44384, "grad_norm": 0.5947285890579224, "learning_rate": 4.776310524209684e-05, "loss": 0.7082, "step": 1128 }, { "epoch": 1.44512, "grad_norm": 0.6200329065322876, "learning_rate": 4.776110444177671e-05, "loss": 0.7104, "step": 1129 }, { "epoch": 1.4464000000000001, "grad_norm": 0.6373615860939026, "learning_rate": 4.775910364145659e-05, "loss": 0.7293, "step": 1130 }, { "epoch": 1.44768, "grad_norm": 0.6362440586090088, "learning_rate": 4.7757102841136455e-05, "loss": 0.7632, "step": 1131 }, { "epoch": 1.44896, "grad_norm": 0.6665335893630981, "learning_rate": 4.775510204081633e-05, "loss": 0.7378, "step": 1132 }, { "epoch": 1.45024, "grad_norm": 0.6526025533676147, "learning_rate": 4.77531012404962e-05, "loss": 0.7462, "step": 1133 }, { "epoch": 1.45152, "grad_norm": 0.5860944986343384, "learning_rate": 4.775110044017607e-05, "loss": 0.6819, "step": 1134 }, { "epoch": 1.4527999999999999, "grad_norm": 0.6410444378852844, "learning_rate": 4.774909963985594e-05, "loss": 0.7381, "step": 1135 }, { "epoch": 1.45408, "grad_norm": 0.6227912306785583, "learning_rate": 4.7747098839535815e-05, "loss": 0.6726, "step": 1136 }, { "epoch": 1.45536, "grad_norm": 0.633658230304718, "learning_rate": 4.774509803921569e-05, "loss": 0.705, "step": 1137 }, { "epoch": 1.45664, "grad_norm": 0.6156677007675171, "learning_rate": 4.7743097238895565e-05, "loss": 0.6913, "step": 1138 }, { "epoch": 1.45792, "grad_norm": 0.6303706169128418, "learning_rate": 4.774109643857543e-05, "loss": 0.721, "step": 1139 }, { "epoch": 1.4592, "grad_norm": 0.5811339020729065, "learning_rate": 4.77390956382553e-05, "loss": 0.727, "step": 1140 }, { "epoch": 1.46048, "grad_norm": 0.6627085208892822, "learning_rate": 4.7737094837935174e-05, "loss": 0.7594, "step": 1141 }, { "epoch": 1.46176, "grad_norm": 0.640595555305481, "learning_rate": 4.7735094037615046e-05, "loss": 0.7016, "step": 1142 }, { "epoch": 1.46304, "grad_norm": 0.623562216758728, "learning_rate": 4.773309323729492e-05, "loss": 0.7622, "step": 1143 }, { "epoch": 1.46432, "grad_norm": 0.6135162711143494, "learning_rate": 4.7731092436974796e-05, "loss": 0.6906, "step": 1144 }, { "epoch": 1.4656, "grad_norm": 0.6718834042549133, "learning_rate": 4.772909163665467e-05, "loss": 0.7215, "step": 1145 }, { "epoch": 1.46688, "grad_norm": 0.6534852981567383, "learning_rate": 4.772709083633454e-05, "loss": 0.7794, "step": 1146 }, { "epoch": 1.4681600000000001, "grad_norm": 0.6073404550552368, "learning_rate": 4.7725090036014405e-05, "loss": 0.6651, "step": 1147 }, { "epoch": 1.46944, "grad_norm": 0.6143197417259216, "learning_rate": 4.772308923569428e-05, "loss": 0.7012, "step": 1148 }, { "epoch": 1.47072, "grad_norm": 0.6453569531440735, "learning_rate": 4.772108843537415e-05, "loss": 0.7459, "step": 1149 }, { "epoch": 1.472, "grad_norm": 0.6633815169334412, "learning_rate": 4.771908763505402e-05, "loss": 0.7477, "step": 1150 }, { "epoch": 1.47328, "grad_norm": 0.6284077167510986, "learning_rate": 4.77170868347339e-05, "loss": 0.7471, "step": 1151 }, { "epoch": 1.4745599999999999, "grad_norm": 0.6360135674476624, "learning_rate": 4.771508603441377e-05, "loss": 0.7069, "step": 1152 }, { "epoch": 1.47584, "grad_norm": 0.6397899389266968, "learning_rate": 4.771308523409364e-05, "loss": 0.6937, "step": 1153 }, { "epoch": 1.47712, "grad_norm": 0.6112055778503418, "learning_rate": 4.7711084433773515e-05, "loss": 0.7514, "step": 1154 }, { "epoch": 1.4784, "grad_norm": 0.608342170715332, "learning_rate": 4.770908363345338e-05, "loss": 0.7085, "step": 1155 }, { "epoch": 1.47968, "grad_norm": 0.6625191569328308, "learning_rate": 4.770708283313325e-05, "loss": 0.7201, "step": 1156 }, { "epoch": 1.48096, "grad_norm": 0.6654885411262512, "learning_rate": 4.7705082032813124e-05, "loss": 0.7487, "step": 1157 }, { "epoch": 1.48224, "grad_norm": 0.6362460851669312, "learning_rate": 4.7703081232493e-05, "loss": 0.7151, "step": 1158 }, { "epoch": 1.48352, "grad_norm": 0.6087484359741211, "learning_rate": 4.7701080432172874e-05, "loss": 0.7062, "step": 1159 }, { "epoch": 1.4848, "grad_norm": 0.6134206652641296, "learning_rate": 4.7699079631852746e-05, "loss": 0.7385, "step": 1160 }, { "epoch": 1.48608, "grad_norm": 0.5883345603942871, "learning_rate": 4.769707883153262e-05, "loss": 0.7447, "step": 1161 }, { "epoch": 1.48736, "grad_norm": 0.6443062424659729, "learning_rate": 4.769507803121249e-05, "loss": 0.6859, "step": 1162 }, { "epoch": 1.48864, "grad_norm": 0.6045071482658386, "learning_rate": 4.7693077230892355e-05, "loss": 0.7062, "step": 1163 }, { "epoch": 1.4899200000000001, "grad_norm": 0.6627300381660461, "learning_rate": 4.769107643057223e-05, "loss": 0.7499, "step": 1164 }, { "epoch": 1.4912, "grad_norm": 0.6466721892356873, "learning_rate": 4.7689075630252105e-05, "loss": 0.7218, "step": 1165 }, { "epoch": 1.49248, "grad_norm": 0.6242368817329407, "learning_rate": 4.768707482993198e-05, "loss": 0.7273, "step": 1166 }, { "epoch": 1.49376, "grad_norm": 0.614842414855957, "learning_rate": 4.768507402961185e-05, "loss": 0.7216, "step": 1167 }, { "epoch": 1.49504, "grad_norm": 0.6312155723571777, "learning_rate": 4.768307322929172e-05, "loss": 0.7566, "step": 1168 }, { "epoch": 1.4963199999999999, "grad_norm": 0.6303775906562805, "learning_rate": 4.768107242897159e-05, "loss": 0.7076, "step": 1169 }, { "epoch": 1.4976, "grad_norm": 0.6277086734771729, "learning_rate": 4.7679071628651465e-05, "loss": 0.6755, "step": 1170 }, { "epoch": 1.49888, "grad_norm": 0.6203471422195435, "learning_rate": 4.767707082833133e-05, "loss": 0.716, "step": 1171 }, { "epoch": 1.5001600000000002, "grad_norm": 0.6107301115989685, "learning_rate": 4.767507002801121e-05, "loss": 0.7121, "step": 1172 }, { "epoch": 1.50144, "grad_norm": 0.6433287858963013, "learning_rate": 4.767306922769108e-05, "loss": 0.7116, "step": 1173 }, { "epoch": 1.50272, "grad_norm": 0.6727626919746399, "learning_rate": 4.767106842737095e-05, "loss": 0.7444, "step": 1174 }, { "epoch": 1.504, "grad_norm": 0.6506801247596741, "learning_rate": 4.7669067627050824e-05, "loss": 0.6786, "step": 1175 }, { "epoch": 1.50528, "grad_norm": 0.642342746257782, "learning_rate": 4.7667066826730696e-05, "loss": 0.7178, "step": 1176 }, { "epoch": 1.50656, "grad_norm": 0.6692764759063721, "learning_rate": 4.766506602641057e-05, "loss": 0.7793, "step": 1177 }, { "epoch": 1.5078399999999998, "grad_norm": 0.6183244585990906, "learning_rate": 4.766306522609044e-05, "loss": 0.7034, "step": 1178 }, { "epoch": 1.50912, "grad_norm": 0.6591295003890991, "learning_rate": 4.766106442577031e-05, "loss": 0.7507, "step": 1179 }, { "epoch": 1.5104, "grad_norm": 0.6517340540885925, "learning_rate": 4.765906362545018e-05, "loss": 0.7276, "step": 1180 }, { "epoch": 1.5116800000000001, "grad_norm": 0.6169537901878357, "learning_rate": 4.7657062825130055e-05, "loss": 0.7647, "step": 1181 }, { "epoch": 1.51296, "grad_norm": 0.6543555855751038, "learning_rate": 4.765506202480993e-05, "loss": 0.7227, "step": 1182 }, { "epoch": 1.51424, "grad_norm": 0.5778194665908813, "learning_rate": 4.76530612244898e-05, "loss": 0.6666, "step": 1183 }, { "epoch": 1.51552, "grad_norm": 0.6411781907081604, "learning_rate": 4.765106042416967e-05, "loss": 0.7447, "step": 1184 }, { "epoch": 1.5168, "grad_norm": 0.6526811718940735, "learning_rate": 4.764905962384954e-05, "loss": 0.6795, "step": 1185 }, { "epoch": 1.5180799999999999, "grad_norm": 0.6295871734619141, "learning_rate": 4.7647058823529414e-05, "loss": 0.7025, "step": 1186 }, { "epoch": 1.51936, "grad_norm": 0.6581454277038574, "learning_rate": 4.7645058023209286e-05, "loss": 0.7639, "step": 1187 }, { "epoch": 1.52064, "grad_norm": 0.6672399640083313, "learning_rate": 4.764305722288916e-05, "loss": 0.7384, "step": 1188 }, { "epoch": 1.5219200000000002, "grad_norm": 0.6672317385673523, "learning_rate": 4.764105642256903e-05, "loss": 0.7389, "step": 1189 }, { "epoch": 1.5232, "grad_norm": 0.6444031000137329, "learning_rate": 4.76390556222489e-05, "loss": 0.7833, "step": 1190 }, { "epoch": 1.52448, "grad_norm": 0.6317417621612549, "learning_rate": 4.7637054821928774e-05, "loss": 0.7112, "step": 1191 }, { "epoch": 1.52576, "grad_norm": 0.627162516117096, "learning_rate": 4.7635054021608646e-05, "loss": 0.6628, "step": 1192 }, { "epoch": 1.52704, "grad_norm": 0.5882164239883423, "learning_rate": 4.763305322128852e-05, "loss": 0.7103, "step": 1193 }, { "epoch": 1.52832, "grad_norm": 0.6305367350578308, "learning_rate": 4.763105242096839e-05, "loss": 0.7103, "step": 1194 }, { "epoch": 1.5295999999999998, "grad_norm": 0.6403663158416748, "learning_rate": 4.762905162064826e-05, "loss": 0.6768, "step": 1195 }, { "epoch": 1.53088, "grad_norm": 0.6659502983093262, "learning_rate": 4.762705082032813e-05, "loss": 0.7166, "step": 1196 }, { "epoch": 1.53216, "grad_norm": 0.6176123023033142, "learning_rate": 4.7625050020008005e-05, "loss": 0.6794, "step": 1197 }, { "epoch": 1.5334400000000001, "grad_norm": 0.6616097688674927, "learning_rate": 4.762304921968788e-05, "loss": 0.7685, "step": 1198 }, { "epoch": 1.53472, "grad_norm": 0.6266618967056274, "learning_rate": 4.762104841936775e-05, "loss": 0.6784, "step": 1199 }, { "epoch": 1.536, "grad_norm": 0.6575819849967957, "learning_rate": 4.761904761904762e-05, "loss": 0.7432, "step": 1200 }, { "epoch": 1.53728, "grad_norm": 0.6002248525619507, "learning_rate": 4.761704681872749e-05, "loss": 0.6946, "step": 1201 }, { "epoch": 1.53856, "grad_norm": 0.6573600172996521, "learning_rate": 4.7615046018407364e-05, "loss": 0.7185, "step": 1202 }, { "epoch": 1.5398399999999999, "grad_norm": 0.6227654218673706, "learning_rate": 4.7613045218087236e-05, "loss": 0.7381, "step": 1203 }, { "epoch": 1.54112, "grad_norm": 0.6044524908065796, "learning_rate": 4.761104441776711e-05, "loss": 0.704, "step": 1204 }, { "epoch": 1.5424, "grad_norm": 0.6178364753723145, "learning_rate": 4.760904361744698e-05, "loss": 0.6916, "step": 1205 }, { "epoch": 1.5436800000000002, "grad_norm": 0.6074690222740173, "learning_rate": 4.760704281712685e-05, "loss": 0.7256, "step": 1206 }, { "epoch": 1.54496, "grad_norm": 0.6533793807029724, "learning_rate": 4.760504201680672e-05, "loss": 0.6969, "step": 1207 }, { "epoch": 1.54624, "grad_norm": 0.6194373965263367, "learning_rate": 4.76030412164866e-05, "loss": 0.7138, "step": 1208 }, { "epoch": 1.54752, "grad_norm": 0.6618860363960266, "learning_rate": 4.760104041616647e-05, "loss": 0.7754, "step": 1209 }, { "epoch": 1.5488, "grad_norm": 0.6461208462715149, "learning_rate": 4.759903961584634e-05, "loss": 0.7114, "step": 1210 }, { "epoch": 1.55008, "grad_norm": 0.6476582884788513, "learning_rate": 4.759703881552621e-05, "loss": 0.6973, "step": 1211 }, { "epoch": 1.5513599999999999, "grad_norm": 0.6671127080917358, "learning_rate": 4.759503801520608e-05, "loss": 0.7009, "step": 1212 }, { "epoch": 1.55264, "grad_norm": 0.6456823348999023, "learning_rate": 4.7593037214885954e-05, "loss": 0.7214, "step": 1213 }, { "epoch": 1.55392, "grad_norm": 0.6629720330238342, "learning_rate": 4.7591036414565826e-05, "loss": 0.7583, "step": 1214 }, { "epoch": 1.5552000000000001, "grad_norm": 0.649437427520752, "learning_rate": 4.7589035614245705e-05, "loss": 0.7532, "step": 1215 }, { "epoch": 1.55648, "grad_norm": 0.6041565537452698, "learning_rate": 4.758703481392558e-05, "loss": 0.6884, "step": 1216 }, { "epoch": 1.55776, "grad_norm": 0.6424383521080017, "learning_rate": 4.758503401360544e-05, "loss": 0.7084, "step": 1217 }, { "epoch": 1.55904, "grad_norm": 0.6945663690567017, "learning_rate": 4.7583033213285314e-05, "loss": 0.75, "step": 1218 }, { "epoch": 1.56032, "grad_norm": 0.6336300373077393, "learning_rate": 4.7581032412965186e-05, "loss": 0.7537, "step": 1219 }, { "epoch": 1.5615999999999999, "grad_norm": 0.6776460409164429, "learning_rate": 4.757903161264506e-05, "loss": 0.7617, "step": 1220 }, { "epoch": 1.56288, "grad_norm": 0.6466001272201538, "learning_rate": 4.757703081232493e-05, "loss": 0.7482, "step": 1221 }, { "epoch": 1.56416, "grad_norm": 0.630434513092041, "learning_rate": 4.757503001200481e-05, "loss": 0.724, "step": 1222 }, { "epoch": 1.5654400000000002, "grad_norm": 0.6976414322853088, "learning_rate": 4.757302921168468e-05, "loss": 0.7818, "step": 1223 }, { "epoch": 1.5667200000000001, "grad_norm": 0.6347379684448242, "learning_rate": 4.757102841136455e-05, "loss": 0.7551, "step": 1224 }, { "epoch": 1.568, "grad_norm": 0.6358786225318909, "learning_rate": 4.756902761104442e-05, "loss": 0.6909, "step": 1225 }, { "epoch": 1.56928, "grad_norm": 0.6460133194923401, "learning_rate": 4.756702681072429e-05, "loss": 0.7101, "step": 1226 }, { "epoch": 1.57056, "grad_norm": 0.6669771671295166, "learning_rate": 4.756502601040416e-05, "loss": 0.7721, "step": 1227 }, { "epoch": 1.57184, "grad_norm": 0.6039112210273743, "learning_rate": 4.756302521008403e-05, "loss": 0.712, "step": 1228 }, { "epoch": 1.5731199999999999, "grad_norm": 0.6793951392173767, "learning_rate": 4.756102440976391e-05, "loss": 0.747, "step": 1229 }, { "epoch": 1.5744, "grad_norm": 0.6434732675552368, "learning_rate": 4.755902360944378e-05, "loss": 0.6894, "step": 1230 }, { "epoch": 1.57568, "grad_norm": 0.6476708650588989, "learning_rate": 4.7557022809123655e-05, "loss": 0.6805, "step": 1231 }, { "epoch": 1.5769600000000001, "grad_norm": 0.6917094588279724, "learning_rate": 4.7555022008803527e-05, "loss": 0.6978, "step": 1232 }, { "epoch": 1.57824, "grad_norm": 0.631247878074646, "learning_rate": 4.755302120848339e-05, "loss": 0.6858, "step": 1233 }, { "epoch": 1.57952, "grad_norm": 0.6304442286491394, "learning_rate": 4.7551020408163263e-05, "loss": 0.6866, "step": 1234 }, { "epoch": 1.5808, "grad_norm": 0.6345778703689575, "learning_rate": 4.7549019607843135e-05, "loss": 0.7146, "step": 1235 }, { "epoch": 1.58208, "grad_norm": 0.639773964881897, "learning_rate": 4.7547018807523014e-05, "loss": 0.7268, "step": 1236 }, { "epoch": 1.5833599999999999, "grad_norm": 0.6575612425804138, "learning_rate": 4.7545018007202886e-05, "loss": 0.7142, "step": 1237 }, { "epoch": 1.58464, "grad_norm": 0.6063089966773987, "learning_rate": 4.754301720688276e-05, "loss": 0.7651, "step": 1238 }, { "epoch": 1.58592, "grad_norm": 0.6471563577651978, "learning_rate": 4.754101640656263e-05, "loss": 0.7748, "step": 1239 }, { "epoch": 1.5872000000000002, "grad_norm": 0.6223157048225403, "learning_rate": 4.75390156062425e-05, "loss": 0.7069, "step": 1240 }, { "epoch": 1.5884800000000001, "grad_norm": 0.64573073387146, "learning_rate": 4.7537014805922366e-05, "loss": 0.766, "step": 1241 }, { "epoch": 1.58976, "grad_norm": 0.6501642465591431, "learning_rate": 4.753501400560224e-05, "loss": 0.7455, "step": 1242 }, { "epoch": 1.59104, "grad_norm": 0.6377203464508057, "learning_rate": 4.753301320528212e-05, "loss": 0.6988, "step": 1243 }, { "epoch": 1.59232, "grad_norm": 0.6198776364326477, "learning_rate": 4.753101240496199e-05, "loss": 0.7555, "step": 1244 }, { "epoch": 1.5936, "grad_norm": 0.6620365381240845, "learning_rate": 4.752901160464186e-05, "loss": 0.7119, "step": 1245 }, { "epoch": 1.5948799999999999, "grad_norm": 0.6182314157485962, "learning_rate": 4.752701080432173e-05, "loss": 0.6488, "step": 1246 }, { "epoch": 1.59616, "grad_norm": 0.6267403960227966, "learning_rate": 4.7525010004001604e-05, "loss": 0.7483, "step": 1247 }, { "epoch": 1.59744, "grad_norm": 0.6369834542274475, "learning_rate": 4.7523009203681476e-05, "loss": 0.6785, "step": 1248 }, { "epoch": 1.5987200000000001, "grad_norm": 0.6468684077262878, "learning_rate": 4.752100840336134e-05, "loss": 0.7245, "step": 1249 }, { "epoch": 1.6, "grad_norm": 0.6526497006416321, "learning_rate": 4.751900760304122e-05, "loss": 0.7311, "step": 1250 }, { "epoch": 1.60128, "grad_norm": 0.6682162284851074, "learning_rate": 4.751700680272109e-05, "loss": 0.7086, "step": 1251 }, { "epoch": 1.60256, "grad_norm": 0.6937553286552429, "learning_rate": 4.7515006002400964e-05, "loss": 0.6991, "step": 1252 }, { "epoch": 1.60384, "grad_norm": 0.6066246032714844, "learning_rate": 4.7513005202080836e-05, "loss": 0.6762, "step": 1253 }, { "epoch": 1.6051199999999999, "grad_norm": 0.7019811272621155, "learning_rate": 4.751100440176071e-05, "loss": 0.7746, "step": 1254 }, { "epoch": 1.6064, "grad_norm": 0.6307134032249451, "learning_rate": 4.750900360144058e-05, "loss": 0.7058, "step": 1255 }, { "epoch": 1.60768, "grad_norm": 0.6239479780197144, "learning_rate": 4.750700280112045e-05, "loss": 0.6974, "step": 1256 }, { "epoch": 1.60896, "grad_norm": 0.6378730535507202, "learning_rate": 4.750500200080032e-05, "loss": 0.7755, "step": 1257 }, { "epoch": 1.6102400000000001, "grad_norm": 0.6123172640800476, "learning_rate": 4.7503001200480195e-05, "loss": 0.6952, "step": 1258 }, { "epoch": 1.61152, "grad_norm": 0.6375351548194885, "learning_rate": 4.750100040016007e-05, "loss": 0.7457, "step": 1259 }, { "epoch": 1.6128, "grad_norm": 0.6497233510017395, "learning_rate": 4.749899959983994e-05, "loss": 0.7198, "step": 1260 }, { "epoch": 1.61408, "grad_norm": 0.6439145803451538, "learning_rate": 4.749699879951981e-05, "loss": 0.7273, "step": 1261 }, { "epoch": 1.61536, "grad_norm": 0.590140700340271, "learning_rate": 4.749499799919968e-05, "loss": 0.6665, "step": 1262 }, { "epoch": 1.6166399999999999, "grad_norm": 0.6305953860282898, "learning_rate": 4.7492997198879554e-05, "loss": 0.7422, "step": 1263 }, { "epoch": 1.61792, "grad_norm": 0.6392373442649841, "learning_rate": 4.7490996398559426e-05, "loss": 0.7263, "step": 1264 }, { "epoch": 1.6192, "grad_norm": 0.6296020746231079, "learning_rate": 4.74889955982393e-05, "loss": 0.7455, "step": 1265 }, { "epoch": 1.6204800000000001, "grad_norm": 0.6070896983146667, "learning_rate": 4.748699479791917e-05, "loss": 0.7014, "step": 1266 }, { "epoch": 1.62176, "grad_norm": 0.6309097409248352, "learning_rate": 4.748499399759904e-05, "loss": 0.7737, "step": 1267 }, { "epoch": 1.62304, "grad_norm": 0.591622531414032, "learning_rate": 4.7482993197278913e-05, "loss": 0.7032, "step": 1268 }, { "epoch": 1.62432, "grad_norm": 0.5985342860221863, "learning_rate": 4.7480992396958785e-05, "loss": 0.7006, "step": 1269 }, { "epoch": 1.6256, "grad_norm": 0.5956323146820068, "learning_rate": 4.747899159663866e-05, "loss": 0.6925, "step": 1270 }, { "epoch": 1.6268799999999999, "grad_norm": 0.6757445931434631, "learning_rate": 4.747699079631853e-05, "loss": 0.7091, "step": 1271 }, { "epoch": 1.62816, "grad_norm": 0.6045747399330139, "learning_rate": 4.74749899959984e-05, "loss": 0.6801, "step": 1272 }, { "epoch": 1.62944, "grad_norm": 0.6032465696334839, "learning_rate": 4.747298919567827e-05, "loss": 0.6913, "step": 1273 }, { "epoch": 1.63072, "grad_norm": 0.6105408072471619, "learning_rate": 4.7470988395358145e-05, "loss": 0.7102, "step": 1274 }, { "epoch": 1.6320000000000001, "grad_norm": 0.6021878123283386, "learning_rate": 4.7468987595038016e-05, "loss": 0.66, "step": 1275 }, { "epoch": 1.63328, "grad_norm": 0.6317958235740662, "learning_rate": 4.746698679471789e-05, "loss": 0.7622, "step": 1276 }, { "epoch": 1.63456, "grad_norm": 0.6472170352935791, "learning_rate": 4.746498599439776e-05, "loss": 0.8176, "step": 1277 }, { "epoch": 1.63584, "grad_norm": 0.6169067621231079, "learning_rate": 4.746298519407764e-05, "loss": 0.7191, "step": 1278 }, { "epoch": 1.63712, "grad_norm": 0.6021884083747864, "learning_rate": 4.7460984393757504e-05, "loss": 0.7359, "step": 1279 }, { "epoch": 1.6383999999999999, "grad_norm": 0.6332974433898926, "learning_rate": 4.7458983593437376e-05, "loss": 0.7494, "step": 1280 }, { "epoch": 1.63968, "grad_norm": 0.6417760252952576, "learning_rate": 4.745698279311725e-05, "loss": 0.7384, "step": 1281 }, { "epoch": 1.64096, "grad_norm": 0.6329305768013, "learning_rate": 4.745498199279712e-05, "loss": 0.7398, "step": 1282 }, { "epoch": 1.6422400000000001, "grad_norm": 0.6128019690513611, "learning_rate": 4.745298119247699e-05, "loss": 0.6854, "step": 1283 }, { "epoch": 1.64352, "grad_norm": 0.6532317399978638, "learning_rate": 4.745098039215686e-05, "loss": 0.6853, "step": 1284 }, { "epoch": 1.6448, "grad_norm": 0.6134045720100403, "learning_rate": 4.744897959183674e-05, "loss": 0.6766, "step": 1285 }, { "epoch": 1.64608, "grad_norm": 0.6327788829803467, "learning_rate": 4.7446978791516614e-05, "loss": 0.6836, "step": 1286 }, { "epoch": 1.64736, "grad_norm": 0.669122040271759, "learning_rate": 4.744497799119648e-05, "loss": 0.7196, "step": 1287 }, { "epoch": 1.6486399999999999, "grad_norm": 0.614807665348053, "learning_rate": 4.744297719087635e-05, "loss": 0.7626, "step": 1288 }, { "epoch": 1.64992, "grad_norm": 0.6070460081100464, "learning_rate": 4.744097639055622e-05, "loss": 0.6698, "step": 1289 }, { "epoch": 1.6512, "grad_norm": 0.6391851902008057, "learning_rate": 4.7438975590236094e-05, "loss": 0.7167, "step": 1290 }, { "epoch": 1.65248, "grad_norm": 0.6460067629814148, "learning_rate": 4.7436974789915966e-05, "loss": 0.7165, "step": 1291 }, { "epoch": 1.6537600000000001, "grad_norm": 0.6238282918930054, "learning_rate": 4.7434973989595845e-05, "loss": 0.7058, "step": 1292 }, { "epoch": 1.65504, "grad_norm": 0.6249229311943054, "learning_rate": 4.7432973189275717e-05, "loss": 0.7196, "step": 1293 }, { "epoch": 1.65632, "grad_norm": 0.6393983960151672, "learning_rate": 4.743097238895559e-05, "loss": 0.6811, "step": 1294 }, { "epoch": 1.6576, "grad_norm": 0.6782253384590149, "learning_rate": 4.7428971588635454e-05, "loss": 0.7559, "step": 1295 }, { "epoch": 1.65888, "grad_norm": 0.5967546701431274, "learning_rate": 4.7426970788315325e-05, "loss": 0.6603, "step": 1296 }, { "epoch": 1.6601599999999999, "grad_norm": 0.6373478174209595, "learning_rate": 4.74249699879952e-05, "loss": 0.7279, "step": 1297 }, { "epoch": 1.66144, "grad_norm": 0.6224157214164734, "learning_rate": 4.742296918767507e-05, "loss": 0.6636, "step": 1298 }, { "epoch": 1.66272, "grad_norm": 0.6072138547897339, "learning_rate": 4.742096838735494e-05, "loss": 0.6899, "step": 1299 }, { "epoch": 1.6640000000000001, "grad_norm": 0.6565014719963074, "learning_rate": 4.741896758703482e-05, "loss": 0.7157, "step": 1300 }, { "epoch": 1.66528, "grad_norm": 0.653264045715332, "learning_rate": 4.741696678671469e-05, "loss": 0.7142, "step": 1301 }, { "epoch": 1.66656, "grad_norm": 0.6316604018211365, "learning_rate": 4.741496598639456e-05, "loss": 0.6723, "step": 1302 }, { "epoch": 1.66784, "grad_norm": 0.6184001564979553, "learning_rate": 4.741296518607443e-05, "loss": 0.7288, "step": 1303 }, { "epoch": 1.66912, "grad_norm": 0.6457557082176208, "learning_rate": 4.74109643857543e-05, "loss": 0.7429, "step": 1304 }, { "epoch": 1.6703999999999999, "grad_norm": 0.6284366250038147, "learning_rate": 4.740896358543417e-05, "loss": 0.6764, "step": 1305 }, { "epoch": 1.67168, "grad_norm": 0.6020685434341431, "learning_rate": 4.7406962785114044e-05, "loss": 0.6978, "step": 1306 }, { "epoch": 1.67296, "grad_norm": 0.6325390338897705, "learning_rate": 4.740496198479392e-05, "loss": 0.7271, "step": 1307 }, { "epoch": 1.67424, "grad_norm": 0.6092963814735413, "learning_rate": 4.7402961184473794e-05, "loss": 0.7342, "step": 1308 }, { "epoch": 1.6755200000000001, "grad_norm": 0.6341113448143005, "learning_rate": 4.7400960384153666e-05, "loss": 0.7043, "step": 1309 }, { "epoch": 1.6768, "grad_norm": 0.6368173360824585, "learning_rate": 4.739895958383354e-05, "loss": 0.7099, "step": 1310 }, { "epoch": 1.67808, "grad_norm": 0.6337018609046936, "learning_rate": 4.73969587835134e-05, "loss": 0.6765, "step": 1311 }, { "epoch": 1.67936, "grad_norm": 0.7150176167488098, "learning_rate": 4.7394957983193275e-05, "loss": 0.7172, "step": 1312 }, { "epoch": 1.68064, "grad_norm": 0.6691837906837463, "learning_rate": 4.739295718287315e-05, "loss": 0.7095, "step": 1313 }, { "epoch": 1.6819199999999999, "grad_norm": 0.6361916661262512, "learning_rate": 4.7390956382553026e-05, "loss": 0.7367, "step": 1314 }, { "epoch": 1.6832, "grad_norm": 0.6631631255149841, "learning_rate": 4.73889555822329e-05, "loss": 0.7083, "step": 1315 }, { "epoch": 1.68448, "grad_norm": 0.6436387896537781, "learning_rate": 4.738695478191277e-05, "loss": 0.7754, "step": 1316 }, { "epoch": 1.6857600000000001, "grad_norm": 0.644277811050415, "learning_rate": 4.738495398159264e-05, "loss": 0.7025, "step": 1317 }, { "epoch": 1.68704, "grad_norm": 0.6064467430114746, "learning_rate": 4.738295318127251e-05, "loss": 0.7252, "step": 1318 }, { "epoch": 1.68832, "grad_norm": 0.6227229237556458, "learning_rate": 4.738095238095238e-05, "loss": 0.6998, "step": 1319 }, { "epoch": 1.6896, "grad_norm": 0.644025981426239, "learning_rate": 4.737895158063225e-05, "loss": 0.718, "step": 1320 }, { "epoch": 1.69088, "grad_norm": 0.6950893998146057, "learning_rate": 4.737695078031213e-05, "loss": 0.7618, "step": 1321 }, { "epoch": 1.6921599999999999, "grad_norm": 0.6096273064613342, "learning_rate": 4.7374949979992e-05, "loss": 0.7492, "step": 1322 }, { "epoch": 1.6934399999999998, "grad_norm": 0.624535322189331, "learning_rate": 4.737294917967187e-05, "loss": 0.6749, "step": 1323 }, { "epoch": 1.69472, "grad_norm": 0.7543303966522217, "learning_rate": 4.7370948379351744e-05, "loss": 0.7168, "step": 1324 }, { "epoch": 1.696, "grad_norm": 0.6681160926818848, "learning_rate": 4.7368947579031616e-05, "loss": 0.7414, "step": 1325 }, { "epoch": 1.6972800000000001, "grad_norm": 0.6174480319023132, "learning_rate": 4.736694677871149e-05, "loss": 0.702, "step": 1326 }, { "epoch": 1.69856, "grad_norm": 0.6031041145324707, "learning_rate": 4.736494597839135e-05, "loss": 0.6811, "step": 1327 }, { "epoch": 1.69984, "grad_norm": 0.6663240194320679, "learning_rate": 4.736294517807123e-05, "loss": 0.7624, "step": 1328 }, { "epoch": 1.70112, "grad_norm": 0.6095741391181946, "learning_rate": 4.7360944377751103e-05, "loss": 0.6708, "step": 1329 }, { "epoch": 1.7024, "grad_norm": 0.6273066997528076, "learning_rate": 4.7358943577430975e-05, "loss": 0.7313, "step": 1330 }, { "epoch": 1.7036799999999999, "grad_norm": 0.6379857063293457, "learning_rate": 4.735694277711085e-05, "loss": 0.7075, "step": 1331 }, { "epoch": 1.70496, "grad_norm": 0.606400728225708, "learning_rate": 4.735494197679072e-05, "loss": 0.7267, "step": 1332 }, { "epoch": 1.70624, "grad_norm": 0.6663306355476379, "learning_rate": 4.735294117647059e-05, "loss": 0.7154, "step": 1333 }, { "epoch": 1.7075200000000001, "grad_norm": 0.6471573114395142, "learning_rate": 4.735094037615046e-05, "loss": 0.7464, "step": 1334 }, { "epoch": 1.7088, "grad_norm": 0.6170628070831299, "learning_rate": 4.7348939575830335e-05, "loss": 0.7251, "step": 1335 }, { "epoch": 1.71008, "grad_norm": 0.6125094890594482, "learning_rate": 4.7346938775510206e-05, "loss": 0.7548, "step": 1336 }, { "epoch": 1.71136, "grad_norm": 0.6116237044334412, "learning_rate": 4.734493797519008e-05, "loss": 0.7347, "step": 1337 }, { "epoch": 1.71264, "grad_norm": 0.6676164269447327, "learning_rate": 4.734293717486995e-05, "loss": 0.7439, "step": 1338 }, { "epoch": 1.7139199999999999, "grad_norm": 0.6193406581878662, "learning_rate": 4.734093637454982e-05, "loss": 0.7186, "step": 1339 }, { "epoch": 1.7151999999999998, "grad_norm": 0.5905833840370178, "learning_rate": 4.7338935574229694e-05, "loss": 0.6775, "step": 1340 }, { "epoch": 1.71648, "grad_norm": 0.6464902758598328, "learning_rate": 4.7336934773909566e-05, "loss": 0.7586, "step": 1341 }, { "epoch": 1.71776, "grad_norm": 0.6692549586296082, "learning_rate": 4.733493397358944e-05, "loss": 0.7933, "step": 1342 }, { "epoch": 1.7190400000000001, "grad_norm": 0.6424185037612915, "learning_rate": 4.733293317326931e-05, "loss": 0.7422, "step": 1343 }, { "epoch": 1.72032, "grad_norm": 0.6273303031921387, "learning_rate": 4.733093237294918e-05, "loss": 0.6837, "step": 1344 }, { "epoch": 1.7216, "grad_norm": 0.6037375330924988, "learning_rate": 4.732893157262905e-05, "loss": 0.7306, "step": 1345 }, { "epoch": 1.72288, "grad_norm": 0.6088179349899292, "learning_rate": 4.7326930772308925e-05, "loss": 0.7008, "step": 1346 }, { "epoch": 1.72416, "grad_norm": 0.6654016375541687, "learning_rate": 4.73249299719888e-05, "loss": 0.7958, "step": 1347 }, { "epoch": 1.7254399999999999, "grad_norm": 0.6818312406539917, "learning_rate": 4.732292917166867e-05, "loss": 0.7136, "step": 1348 }, { "epoch": 1.72672, "grad_norm": 0.5944163799285889, "learning_rate": 4.732092837134855e-05, "loss": 0.7097, "step": 1349 }, { "epoch": 1.728, "grad_norm": 0.6351943016052246, "learning_rate": 4.731892757102841e-05, "loss": 0.7255, "step": 1350 }, { "epoch": 1.7292800000000002, "grad_norm": 0.6025944948196411, "learning_rate": 4.7316926770708284e-05, "loss": 0.6275, "step": 1351 }, { "epoch": 1.73056, "grad_norm": 0.6137747764587402, "learning_rate": 4.7314925970388156e-05, "loss": 0.722, "step": 1352 }, { "epoch": 1.73184, "grad_norm": 0.6036489605903625, "learning_rate": 4.731292517006803e-05, "loss": 0.7132, "step": 1353 }, { "epoch": 1.73312, "grad_norm": 0.6218430399894714, "learning_rate": 4.73109243697479e-05, "loss": 0.6794, "step": 1354 }, { "epoch": 1.7344, "grad_norm": 0.582331120967865, "learning_rate": 4.730892356942777e-05, "loss": 0.7159, "step": 1355 }, { "epoch": 1.73568, "grad_norm": 0.626875638961792, "learning_rate": 4.730692276910765e-05, "loss": 0.7335, "step": 1356 }, { "epoch": 1.7369599999999998, "grad_norm": 0.6015035510063171, "learning_rate": 4.730492196878752e-05, "loss": 0.6889, "step": 1357 }, { "epoch": 1.73824, "grad_norm": 0.6084362268447876, "learning_rate": 4.730292116846739e-05, "loss": 0.7034, "step": 1358 }, { "epoch": 1.73952, "grad_norm": 0.6344590187072754, "learning_rate": 4.730092036814726e-05, "loss": 0.6738, "step": 1359 }, { "epoch": 1.7408000000000001, "grad_norm": 0.6038119792938232, "learning_rate": 4.729891956782713e-05, "loss": 0.7495, "step": 1360 }, { "epoch": 1.74208, "grad_norm": 0.5990369319915771, "learning_rate": 4.7296918767507e-05, "loss": 0.7376, "step": 1361 }, { "epoch": 1.74336, "grad_norm": 0.6152486205101013, "learning_rate": 4.7294917967186875e-05, "loss": 0.6552, "step": 1362 }, { "epoch": 1.74464, "grad_norm": 0.6658695936203003, "learning_rate": 4.729291716686675e-05, "loss": 0.7151, "step": 1363 }, { "epoch": 1.74592, "grad_norm": 0.6392641067504883, "learning_rate": 4.7290916366546625e-05, "loss": 0.7298, "step": 1364 }, { "epoch": 1.7471999999999999, "grad_norm": 0.6059170961380005, "learning_rate": 4.72889155662265e-05, "loss": 0.7054, "step": 1365 }, { "epoch": 1.74848, "grad_norm": 0.5970711708068848, "learning_rate": 4.728691476590636e-05, "loss": 0.7038, "step": 1366 }, { "epoch": 1.74976, "grad_norm": 0.6231157183647156, "learning_rate": 4.7284913965586234e-05, "loss": 0.6903, "step": 1367 }, { "epoch": 1.7510400000000002, "grad_norm": 0.6200019121170044, "learning_rate": 4.7282913165266106e-05, "loss": 0.6916, "step": 1368 }, { "epoch": 1.75232, "grad_norm": 0.62948077917099, "learning_rate": 4.728091236494598e-05, "loss": 0.7023, "step": 1369 }, { "epoch": 1.7536, "grad_norm": 0.6121958494186401, "learning_rate": 4.7278911564625856e-05, "loss": 0.6912, "step": 1370 }, { "epoch": 1.75488, "grad_norm": 0.6384387612342834, "learning_rate": 4.727691076430573e-05, "loss": 0.7216, "step": 1371 }, { "epoch": 1.75616, "grad_norm": 0.6484329700469971, "learning_rate": 4.72749099639856e-05, "loss": 0.6696, "step": 1372 }, { "epoch": 1.75744, "grad_norm": 0.6550239324569702, "learning_rate": 4.727290916366547e-05, "loss": 0.7213, "step": 1373 }, { "epoch": 1.7587199999999998, "grad_norm": 0.6201356649398804, "learning_rate": 4.727090836334534e-05, "loss": 0.7322, "step": 1374 }, { "epoch": 1.76, "grad_norm": 0.6567116379737854, "learning_rate": 4.726890756302521e-05, "loss": 0.7732, "step": 1375 }, { "epoch": 1.76128, "grad_norm": 0.6301355957984924, "learning_rate": 4.726690676270508e-05, "loss": 0.7397, "step": 1376 }, { "epoch": 1.7625600000000001, "grad_norm": 0.6590511202812195, "learning_rate": 4.726490596238496e-05, "loss": 0.7581, "step": 1377 }, { "epoch": 1.76384, "grad_norm": 0.6398580074310303, "learning_rate": 4.726290516206483e-05, "loss": 0.7062, "step": 1378 }, { "epoch": 1.76512, "grad_norm": 0.6533066630363464, "learning_rate": 4.72609043617447e-05, "loss": 0.7078, "step": 1379 }, { "epoch": 1.7664, "grad_norm": 0.6254117488861084, "learning_rate": 4.7258903561424575e-05, "loss": 0.6917, "step": 1380 }, { "epoch": 1.76768, "grad_norm": 0.6385900378227234, "learning_rate": 4.725690276110445e-05, "loss": 0.7951, "step": 1381 }, { "epoch": 1.7689599999999999, "grad_norm": 0.6786746978759766, "learning_rate": 4.725490196078431e-05, "loss": 0.7645, "step": 1382 }, { "epoch": 1.77024, "grad_norm": 0.6253941059112549, "learning_rate": 4.7252901160464184e-05, "loss": 0.7064, "step": 1383 }, { "epoch": 1.77152, "grad_norm": 0.6229246258735657, "learning_rate": 4.725090036014406e-05, "loss": 0.7386, "step": 1384 }, { "epoch": 1.7728000000000002, "grad_norm": 0.6111705303192139, "learning_rate": 4.7248899559823934e-05, "loss": 0.6646, "step": 1385 }, { "epoch": 1.77408, "grad_norm": 0.6427714228630066, "learning_rate": 4.7246898759503806e-05, "loss": 0.7058, "step": 1386 }, { "epoch": 1.77536, "grad_norm": 0.635543704032898, "learning_rate": 4.724489795918368e-05, "loss": 0.7016, "step": 1387 }, { "epoch": 1.77664, "grad_norm": 0.6031673550605774, "learning_rate": 4.724289715886355e-05, "loss": 0.6427, "step": 1388 }, { "epoch": 1.77792, "grad_norm": 0.6220544576644897, "learning_rate": 4.724089635854342e-05, "loss": 0.731, "step": 1389 }, { "epoch": 1.7792, "grad_norm": 0.6029757261276245, "learning_rate": 4.723889555822329e-05, "loss": 0.7165, "step": 1390 }, { "epoch": 1.7804799999999998, "grad_norm": 0.6523854732513428, "learning_rate": 4.7236894757903165e-05, "loss": 0.7179, "step": 1391 }, { "epoch": 1.78176, "grad_norm": 0.6103372573852539, "learning_rate": 4.723489395758304e-05, "loss": 0.6943, "step": 1392 }, { "epoch": 1.78304, "grad_norm": 0.631454348564148, "learning_rate": 4.723289315726291e-05, "loss": 0.7071, "step": 1393 }, { "epoch": 1.7843200000000001, "grad_norm": 0.6054040193557739, "learning_rate": 4.723089235694278e-05, "loss": 0.6621, "step": 1394 }, { "epoch": 1.7856, "grad_norm": 0.6552069783210754, "learning_rate": 4.722889155662265e-05, "loss": 0.7767, "step": 1395 }, { "epoch": 1.78688, "grad_norm": 0.6483544111251831, "learning_rate": 4.7226890756302525e-05, "loss": 0.759, "step": 1396 }, { "epoch": 1.78816, "grad_norm": 0.6473653316497803, "learning_rate": 4.7224889955982396e-05, "loss": 0.6879, "step": 1397 }, { "epoch": 1.78944, "grad_norm": 0.6504471898078918, "learning_rate": 4.722288915566227e-05, "loss": 0.7329, "step": 1398 }, { "epoch": 1.7907199999999999, "grad_norm": 0.6064606308937073, "learning_rate": 4.722088835534214e-05, "loss": 0.7472, "step": 1399 }, { "epoch": 1.792, "grad_norm": 0.6191816926002502, "learning_rate": 4.721888755502201e-05, "loss": 0.7031, "step": 1400 }, { "epoch": 1.79328, "grad_norm": 0.6464985013008118, "learning_rate": 4.7216886754701884e-05, "loss": 0.7053, "step": 1401 }, { "epoch": 1.7945600000000002, "grad_norm": 0.6391028165817261, "learning_rate": 4.7214885954381756e-05, "loss": 0.6984, "step": 1402 }, { "epoch": 1.79584, "grad_norm": 0.6238522529602051, "learning_rate": 4.721288515406163e-05, "loss": 0.7134, "step": 1403 }, { "epoch": 1.79712, "grad_norm": 0.6372814774513245, "learning_rate": 4.72108843537415e-05, "loss": 0.741, "step": 1404 }, { "epoch": 1.7984, "grad_norm": 0.6125516295433044, "learning_rate": 4.720888355342137e-05, "loss": 0.7364, "step": 1405 }, { "epoch": 1.79968, "grad_norm": 0.6260775923728943, "learning_rate": 4.720688275310124e-05, "loss": 0.7158, "step": 1406 }, { "epoch": 1.80096, "grad_norm": 0.6567710041999817, "learning_rate": 4.7204881952781115e-05, "loss": 0.7745, "step": 1407 }, { "epoch": 1.8022399999999998, "grad_norm": 0.6455616354942322, "learning_rate": 4.720288115246099e-05, "loss": 0.7038, "step": 1408 }, { "epoch": 1.80352, "grad_norm": 0.6271963715553284, "learning_rate": 4.720088035214086e-05, "loss": 0.7339, "step": 1409 }, { "epoch": 1.8048, "grad_norm": 0.6165766716003418, "learning_rate": 4.719887955182073e-05, "loss": 0.8227, "step": 1410 }, { "epoch": 1.8060800000000001, "grad_norm": 0.6498475074768066, "learning_rate": 4.71968787515006e-05, "loss": 0.7552, "step": 1411 }, { "epoch": 1.80736, "grad_norm": 0.6534706950187683, "learning_rate": 4.7194877951180474e-05, "loss": 0.7461, "step": 1412 }, { "epoch": 1.80864, "grad_norm": 0.6396170854568481, "learning_rate": 4.7192877150860346e-05, "loss": 0.7051, "step": 1413 }, { "epoch": 1.80992, "grad_norm": 0.6736233234405518, "learning_rate": 4.719087635054022e-05, "loss": 0.7181, "step": 1414 }, { "epoch": 1.8112, "grad_norm": 0.7057044506072998, "learning_rate": 4.718887555022009e-05, "loss": 0.7447, "step": 1415 }, { "epoch": 1.8124799999999999, "grad_norm": 0.639115571975708, "learning_rate": 4.718687474989996e-05, "loss": 0.702, "step": 1416 }, { "epoch": 1.81376, "grad_norm": 0.6411137580871582, "learning_rate": 4.7184873949579834e-05, "loss": 0.7378, "step": 1417 }, { "epoch": 1.81504, "grad_norm": 0.618817925453186, "learning_rate": 4.7182873149259705e-05, "loss": 0.6965, "step": 1418 }, { "epoch": 1.8163200000000002, "grad_norm": 0.6112127304077148, "learning_rate": 4.718087234893958e-05, "loss": 0.7387, "step": 1419 }, { "epoch": 1.8176, "grad_norm": 0.6073436737060547, "learning_rate": 4.717887154861945e-05, "loss": 0.6605, "step": 1420 }, { "epoch": 1.81888, "grad_norm": 0.5831905007362366, "learning_rate": 4.717687074829932e-05, "loss": 0.6903, "step": 1421 }, { "epoch": 1.82016, "grad_norm": 0.6208034753799438, "learning_rate": 4.717486994797919e-05, "loss": 0.7061, "step": 1422 }, { "epoch": 1.82144, "grad_norm": 0.6343234181404114, "learning_rate": 4.7172869147659065e-05, "loss": 0.7128, "step": 1423 }, { "epoch": 1.82272, "grad_norm": 0.6611493229866028, "learning_rate": 4.7170868347338937e-05, "loss": 0.7567, "step": 1424 }, { "epoch": 1.8239999999999998, "grad_norm": 0.6101629734039307, "learning_rate": 4.716886754701881e-05, "loss": 0.7273, "step": 1425 }, { "epoch": 1.82528, "grad_norm": 0.5913015604019165, "learning_rate": 4.716686674669868e-05, "loss": 0.6477, "step": 1426 }, { "epoch": 1.82656, "grad_norm": 0.5725111365318298, "learning_rate": 4.716486594637856e-05, "loss": 0.7042, "step": 1427 }, { "epoch": 1.8278400000000001, "grad_norm": 0.5874539017677307, "learning_rate": 4.7162865146058424e-05, "loss": 0.6511, "step": 1428 }, { "epoch": 1.82912, "grad_norm": 0.6199377179145813, "learning_rate": 4.7160864345738296e-05, "loss": 0.6984, "step": 1429 }, { "epoch": 1.8304, "grad_norm": 0.6231164932250977, "learning_rate": 4.715886354541817e-05, "loss": 0.6653, "step": 1430 }, { "epoch": 1.83168, "grad_norm": 0.6409709453582764, "learning_rate": 4.715686274509804e-05, "loss": 0.7573, "step": 1431 }, { "epoch": 1.83296, "grad_norm": 0.5936411023139954, "learning_rate": 4.715486194477791e-05, "loss": 0.7043, "step": 1432 }, { "epoch": 1.8342399999999999, "grad_norm": 0.6537723541259766, "learning_rate": 4.715286114445778e-05, "loss": 0.7084, "step": 1433 }, { "epoch": 1.83552, "grad_norm": 0.5906792283058167, "learning_rate": 4.715086034413766e-05, "loss": 0.6799, "step": 1434 }, { "epoch": 1.8368, "grad_norm": 0.5938474535942078, "learning_rate": 4.7148859543817534e-05, "loss": 0.6764, "step": 1435 }, { "epoch": 1.8380800000000002, "grad_norm": 0.6261917948722839, "learning_rate": 4.71468587434974e-05, "loss": 0.7256, "step": 1436 }, { "epoch": 1.83936, "grad_norm": 0.6289463043212891, "learning_rate": 4.714485794317727e-05, "loss": 0.7277, "step": 1437 }, { "epoch": 1.84064, "grad_norm": 0.606020987033844, "learning_rate": 4.714285714285714e-05, "loss": 0.6845, "step": 1438 }, { "epoch": 1.84192, "grad_norm": 0.6375628709793091, "learning_rate": 4.7140856342537014e-05, "loss": 0.7181, "step": 1439 }, { "epoch": 1.8432, "grad_norm": 0.6486073136329651, "learning_rate": 4.7138855542216886e-05, "loss": 0.7067, "step": 1440 }, { "epoch": 1.84448, "grad_norm": 0.6745935082435608, "learning_rate": 4.7136854741896765e-05, "loss": 0.7309, "step": 1441 }, { "epoch": 1.8457599999999998, "grad_norm": 0.604806125164032, "learning_rate": 4.713485394157664e-05, "loss": 0.7268, "step": 1442 }, { "epoch": 1.84704, "grad_norm": 0.6913792490959167, "learning_rate": 4.713285314125651e-05, "loss": 0.7569, "step": 1443 }, { "epoch": 1.84832, "grad_norm": 0.5856841802597046, "learning_rate": 4.7130852340936374e-05, "loss": 0.6421, "step": 1444 }, { "epoch": 1.8496000000000001, "grad_norm": 0.6320177316665649, "learning_rate": 4.7128851540616246e-05, "loss": 0.7385, "step": 1445 }, { "epoch": 1.85088, "grad_norm": 0.6420151591300964, "learning_rate": 4.712685074029612e-05, "loss": 0.6881, "step": 1446 }, { "epoch": 1.85216, "grad_norm": 0.6870813965797424, "learning_rate": 4.712484993997599e-05, "loss": 0.7299, "step": 1447 }, { "epoch": 1.85344, "grad_norm": 0.6391651034355164, "learning_rate": 4.712284913965587e-05, "loss": 0.68, "step": 1448 }, { "epoch": 1.85472, "grad_norm": 0.681445300579071, "learning_rate": 4.712084833933574e-05, "loss": 0.7795, "step": 1449 }, { "epoch": 1.8559999999999999, "grad_norm": 0.6424078941345215, "learning_rate": 4.711884753901561e-05, "loss": 0.7234, "step": 1450 }, { "epoch": 1.85728, "grad_norm": 0.625423014163971, "learning_rate": 4.7116846738695484e-05, "loss": 0.6631, "step": 1451 }, { "epoch": 1.85856, "grad_norm": 0.611112117767334, "learning_rate": 4.711484593837535e-05, "loss": 0.7227, "step": 1452 }, { "epoch": 1.8598400000000002, "grad_norm": 0.6501821279525757, "learning_rate": 4.711284513805522e-05, "loss": 0.7494, "step": 1453 }, { "epoch": 1.86112, "grad_norm": 0.6695547699928284, "learning_rate": 4.711084433773509e-05, "loss": 0.7335, "step": 1454 }, { "epoch": 1.8624, "grad_norm": 0.6903998851776123, "learning_rate": 4.710884353741497e-05, "loss": 0.7594, "step": 1455 }, { "epoch": 1.86368, "grad_norm": 0.6672253608703613, "learning_rate": 4.710684273709484e-05, "loss": 0.7182, "step": 1456 }, { "epoch": 1.86496, "grad_norm": 0.6630045175552368, "learning_rate": 4.7104841936774715e-05, "loss": 0.7251, "step": 1457 }, { "epoch": 1.86624, "grad_norm": 0.6247894167900085, "learning_rate": 4.7102841136454587e-05, "loss": 0.7358, "step": 1458 }, { "epoch": 1.8675199999999998, "grad_norm": 0.595182478427887, "learning_rate": 4.710084033613446e-05, "loss": 0.6497, "step": 1459 }, { "epoch": 1.8688, "grad_norm": 0.6355342864990234, "learning_rate": 4.7098839535814323e-05, "loss": 0.6925, "step": 1460 }, { "epoch": 1.87008, "grad_norm": 0.6582742929458618, "learning_rate": 4.7096838735494195e-05, "loss": 0.7062, "step": 1461 }, { "epoch": 1.8713600000000001, "grad_norm": 0.5886150598526001, "learning_rate": 4.7094837935174074e-05, "loss": 0.6701, "step": 1462 }, { "epoch": 1.87264, "grad_norm": 0.6048754453659058, "learning_rate": 4.7092837134853946e-05, "loss": 0.6841, "step": 1463 }, { "epoch": 1.87392, "grad_norm": 0.6366539597511292, "learning_rate": 4.709083633453382e-05, "loss": 0.6991, "step": 1464 }, { "epoch": 1.8752, "grad_norm": 0.5844667553901672, "learning_rate": 4.708883553421369e-05, "loss": 0.6269, "step": 1465 }, { "epoch": 1.87648, "grad_norm": 0.5977579951286316, "learning_rate": 4.708683473389356e-05, "loss": 0.6561, "step": 1466 }, { "epoch": 1.8777599999999999, "grad_norm": 0.6138444542884827, "learning_rate": 4.708483393357343e-05, "loss": 0.6814, "step": 1467 }, { "epoch": 1.87904, "grad_norm": 0.6392626762390137, "learning_rate": 4.70828331332533e-05, "loss": 0.6931, "step": 1468 }, { "epoch": 1.88032, "grad_norm": 0.666708767414093, "learning_rate": 4.708083233293318e-05, "loss": 0.7474, "step": 1469 }, { "epoch": 1.8816000000000002, "grad_norm": 0.6352314949035645, "learning_rate": 4.707883153261305e-05, "loss": 0.7012, "step": 1470 }, { "epoch": 1.88288, "grad_norm": 0.6222551465034485, "learning_rate": 4.707683073229292e-05, "loss": 0.7133, "step": 1471 }, { "epoch": 1.88416, "grad_norm": 0.643947958946228, "learning_rate": 4.707482993197279e-05, "loss": 0.7556, "step": 1472 }, { "epoch": 1.88544, "grad_norm": 0.5932632684707642, "learning_rate": 4.7072829131652664e-05, "loss": 0.6847, "step": 1473 }, { "epoch": 1.88672, "grad_norm": 0.6231780648231506, "learning_rate": 4.7070828331332536e-05, "loss": 0.6971, "step": 1474 }, { "epoch": 1.888, "grad_norm": 0.6272242665290833, "learning_rate": 4.706882753101241e-05, "loss": 0.7149, "step": 1475 }, { "epoch": 1.8892799999999998, "grad_norm": 0.6251195073127747, "learning_rate": 4.706682673069228e-05, "loss": 0.685, "step": 1476 }, { "epoch": 1.89056, "grad_norm": 0.6322864890098572, "learning_rate": 4.706482593037215e-05, "loss": 0.7115, "step": 1477 }, { "epoch": 1.89184, "grad_norm": 0.6262922286987305, "learning_rate": 4.7062825130052024e-05, "loss": 0.7272, "step": 1478 }, { "epoch": 1.8931200000000001, "grad_norm": 0.6692927479743958, "learning_rate": 4.7060824329731896e-05, "loss": 0.7201, "step": 1479 }, { "epoch": 1.8944, "grad_norm": 0.58826744556427, "learning_rate": 4.705882352941177e-05, "loss": 0.6663, "step": 1480 }, { "epoch": 1.89568, "grad_norm": 0.6134018301963806, "learning_rate": 4.705682272909164e-05, "loss": 0.7319, "step": 1481 }, { "epoch": 1.89696, "grad_norm": 0.6586698889732361, "learning_rate": 4.705482192877151e-05, "loss": 0.7324, "step": 1482 }, { "epoch": 1.89824, "grad_norm": 0.6096124649047852, "learning_rate": 4.705282112845138e-05, "loss": 0.7266, "step": 1483 }, { "epoch": 1.8995199999999999, "grad_norm": 0.6256430149078369, "learning_rate": 4.7050820328131255e-05, "loss": 0.6926, "step": 1484 }, { "epoch": 1.9008, "grad_norm": 0.5955308675765991, "learning_rate": 4.704881952781113e-05, "loss": 0.7232, "step": 1485 }, { "epoch": 1.90208, "grad_norm": 0.6191076040267944, "learning_rate": 4.7046818727491e-05, "loss": 0.6879, "step": 1486 }, { "epoch": 1.9033600000000002, "grad_norm": 0.6182354688644409, "learning_rate": 4.704481792717087e-05, "loss": 0.7449, "step": 1487 }, { "epoch": 1.90464, "grad_norm": 0.6782128214836121, "learning_rate": 4.704281712685074e-05, "loss": 0.7688, "step": 1488 }, { "epoch": 1.90592, "grad_norm": 0.6402320265769958, "learning_rate": 4.7040816326530614e-05, "loss": 0.7376, "step": 1489 }, { "epoch": 1.9072, "grad_norm": 0.6744869351387024, "learning_rate": 4.7038815526210486e-05, "loss": 0.8047, "step": 1490 }, { "epoch": 1.90848, "grad_norm": 0.6262235045433044, "learning_rate": 4.703681472589036e-05, "loss": 0.6589, "step": 1491 }, { "epoch": 1.90976, "grad_norm": 0.6338122487068176, "learning_rate": 4.703481392557023e-05, "loss": 0.7067, "step": 1492 }, { "epoch": 1.9110399999999998, "grad_norm": 0.6535120010375977, "learning_rate": 4.70328131252501e-05, "loss": 0.6962, "step": 1493 }, { "epoch": 1.91232, "grad_norm": 0.6537796854972839, "learning_rate": 4.703081232492997e-05, "loss": 0.7182, "step": 1494 }, { "epoch": 1.9136, "grad_norm": 0.6153785586357117, "learning_rate": 4.7028811524609845e-05, "loss": 0.6872, "step": 1495 }, { "epoch": 1.9148800000000001, "grad_norm": 0.6132374405860901, "learning_rate": 4.702681072428972e-05, "loss": 0.6838, "step": 1496 }, { "epoch": 1.91616, "grad_norm": 0.6539681553840637, "learning_rate": 4.7024809923969596e-05, "loss": 0.7467, "step": 1497 }, { "epoch": 1.91744, "grad_norm": 0.6296167373657227, "learning_rate": 4.702280912364946e-05, "loss": 0.6825, "step": 1498 }, { "epoch": 1.91872, "grad_norm": 0.6461489796638489, "learning_rate": 4.702080832332933e-05, "loss": 0.7336, "step": 1499 }, { "epoch": 1.92, "grad_norm": 0.6219279170036316, "learning_rate": 4.7018807523009204e-05, "loss": 0.7172, "step": 1500 }, { "epoch": 1.9212799999999999, "grad_norm": 0.6014849543571472, "learning_rate": 4.7016806722689076e-05, "loss": 0.7528, "step": 1501 }, { "epoch": 1.92256, "grad_norm": 0.6287034749984741, "learning_rate": 4.701480592236895e-05, "loss": 0.7724, "step": 1502 }, { "epoch": 1.92384, "grad_norm": 0.6444774270057678, "learning_rate": 4.701280512204882e-05, "loss": 0.7873, "step": 1503 }, { "epoch": 1.9251200000000002, "grad_norm": 0.631040096282959, "learning_rate": 4.70108043217287e-05, "loss": 0.7204, "step": 1504 }, { "epoch": 1.9264000000000001, "grad_norm": 0.6389703750610352, "learning_rate": 4.700880352140857e-05, "loss": 0.7623, "step": 1505 }, { "epoch": 1.92768, "grad_norm": 0.6128464341163635, "learning_rate": 4.7006802721088436e-05, "loss": 0.7311, "step": 1506 }, { "epoch": 1.92896, "grad_norm": 0.6340726613998413, "learning_rate": 4.700480192076831e-05, "loss": 0.7277, "step": 1507 }, { "epoch": 1.93024, "grad_norm": 0.6153057813644409, "learning_rate": 4.700280112044818e-05, "loss": 0.7087, "step": 1508 }, { "epoch": 1.93152, "grad_norm": 0.6262139081954956, "learning_rate": 4.700080032012805e-05, "loss": 0.6813, "step": 1509 }, { "epoch": 1.9327999999999999, "grad_norm": 0.6372281908988953, "learning_rate": 4.699879951980792e-05, "loss": 0.6867, "step": 1510 }, { "epoch": 1.93408, "grad_norm": 0.6328516602516174, "learning_rate": 4.69967987194878e-05, "loss": 0.7376, "step": 1511 }, { "epoch": 1.93536, "grad_norm": 0.6271541118621826, "learning_rate": 4.6994797919167674e-05, "loss": 0.7694, "step": 1512 }, { "epoch": 1.9366400000000001, "grad_norm": 0.6466159224510193, "learning_rate": 4.6992797118847545e-05, "loss": 0.7072, "step": 1513 }, { "epoch": 1.93792, "grad_norm": 0.6909953355789185, "learning_rate": 4.699079631852741e-05, "loss": 0.7634, "step": 1514 }, { "epoch": 1.9392, "grad_norm": 0.6303381323814392, "learning_rate": 4.698879551820728e-05, "loss": 0.802, "step": 1515 }, { "epoch": 1.94048, "grad_norm": 0.6296682953834534, "learning_rate": 4.6986794717887154e-05, "loss": 0.7861, "step": 1516 }, { "epoch": 1.94176, "grad_norm": 0.6130115985870361, "learning_rate": 4.6984793917567026e-05, "loss": 0.697, "step": 1517 }, { "epoch": 1.9430399999999999, "grad_norm": 0.6313830614089966, "learning_rate": 4.69827931172469e-05, "loss": 0.7331, "step": 1518 }, { "epoch": 1.94432, "grad_norm": 0.5878955125808716, "learning_rate": 4.6980792316926777e-05, "loss": 0.6528, "step": 1519 }, { "epoch": 1.9456, "grad_norm": 0.6016858220100403, "learning_rate": 4.697879151660665e-05, "loss": 0.687, "step": 1520 }, { "epoch": 1.9468800000000002, "grad_norm": 0.6477680802345276, "learning_rate": 4.697679071628652e-05, "loss": 0.7101, "step": 1521 }, { "epoch": 1.9481600000000001, "grad_norm": 0.6151396632194519, "learning_rate": 4.6974789915966385e-05, "loss": 0.7067, "step": 1522 }, { "epoch": 1.94944, "grad_norm": 0.6041195392608643, "learning_rate": 4.697278911564626e-05, "loss": 0.7499, "step": 1523 }, { "epoch": 1.95072, "grad_norm": 0.6384760737419128, "learning_rate": 4.697078831532613e-05, "loss": 0.7452, "step": 1524 }, { "epoch": 1.952, "grad_norm": 0.6926515698432922, "learning_rate": 4.6968787515006e-05, "loss": 0.7656, "step": 1525 }, { "epoch": 1.95328, "grad_norm": 0.652237594127655, "learning_rate": 4.696678671468588e-05, "loss": 0.7503, "step": 1526 }, { "epoch": 1.9545599999999999, "grad_norm": 0.6000808477401733, "learning_rate": 4.696478591436575e-05, "loss": 0.6663, "step": 1527 }, { "epoch": 1.95584, "grad_norm": 0.6068828105926514, "learning_rate": 4.696278511404562e-05, "loss": 0.8046, "step": 1528 }, { "epoch": 1.95712, "grad_norm": 0.6311643123626709, "learning_rate": 4.6960784313725495e-05, "loss": 0.7473, "step": 1529 }, { "epoch": 1.9584000000000001, "grad_norm": 0.6560335755348206, "learning_rate": 4.695878351340536e-05, "loss": 0.7188, "step": 1530 }, { "epoch": 1.95968, "grad_norm": 0.6460652351379395, "learning_rate": 4.695678271308523e-05, "loss": 0.6784, "step": 1531 }, { "epoch": 1.96096, "grad_norm": 0.6522865295410156, "learning_rate": 4.6954781912765104e-05, "loss": 0.7078, "step": 1532 }, { "epoch": 1.96224, "grad_norm": 0.6220058798789978, "learning_rate": 4.695278111244498e-05, "loss": 0.7121, "step": 1533 }, { "epoch": 1.96352, "grad_norm": 0.6365997791290283, "learning_rate": 4.6950780312124854e-05, "loss": 0.6876, "step": 1534 }, { "epoch": 1.9647999999999999, "grad_norm": 0.6620326042175293, "learning_rate": 4.6948779511804726e-05, "loss": 0.7311, "step": 1535 }, { "epoch": 1.96608, "grad_norm": 0.6124232411384583, "learning_rate": 4.69467787114846e-05, "loss": 0.7134, "step": 1536 }, { "epoch": 1.96736, "grad_norm": 0.6828178763389587, "learning_rate": 4.694477791116447e-05, "loss": 0.7439, "step": 1537 }, { "epoch": 1.96864, "grad_norm": 0.6395293474197388, "learning_rate": 4.6942777110844335e-05, "loss": 0.7836, "step": 1538 }, { "epoch": 1.9699200000000001, "grad_norm": 0.6207759976387024, "learning_rate": 4.694077631052421e-05, "loss": 0.6739, "step": 1539 }, { "epoch": 1.9712, "grad_norm": 0.6403487324714661, "learning_rate": 4.6938775510204086e-05, "loss": 0.7665, "step": 1540 }, { "epoch": 1.97248, "grad_norm": 0.6810711026191711, "learning_rate": 4.693677470988396e-05, "loss": 0.7825, "step": 1541 }, { "epoch": 1.97376, "grad_norm": 0.6638922095298767, "learning_rate": 4.693477390956383e-05, "loss": 0.7517, "step": 1542 }, { "epoch": 1.97504, "grad_norm": 0.5913483500480652, "learning_rate": 4.69327731092437e-05, "loss": 0.6869, "step": 1543 }, { "epoch": 1.9763199999999999, "grad_norm": 0.6187199354171753, "learning_rate": 4.693077230892357e-05, "loss": 0.7321, "step": 1544 }, { "epoch": 1.9776, "grad_norm": 0.6598728895187378, "learning_rate": 4.6928771508603445e-05, "loss": 0.7358, "step": 1545 }, { "epoch": 1.97888, "grad_norm": 0.6453779935836792, "learning_rate": 4.692677070828331e-05, "loss": 0.7885, "step": 1546 }, { "epoch": 1.9801600000000001, "grad_norm": 0.62159663438797, "learning_rate": 4.692476990796319e-05, "loss": 0.6755, "step": 1547 }, { "epoch": 1.98144, "grad_norm": 0.5968536734580994, "learning_rate": 4.692276910764306e-05, "loss": 0.6611, "step": 1548 }, { "epoch": 1.98272, "grad_norm": 0.6375271081924438, "learning_rate": 4.692076830732293e-05, "loss": 0.6993, "step": 1549 }, { "epoch": 1.984, "grad_norm": 0.6619069576263428, "learning_rate": 4.6918767507002804e-05, "loss": 0.6928, "step": 1550 }, { "epoch": 1.98528, "grad_norm": 0.6253162026405334, "learning_rate": 4.6916766706682676e-05, "loss": 0.66, "step": 1551 }, { "epoch": 1.9865599999999999, "grad_norm": 0.6479495167732239, "learning_rate": 4.691476590636255e-05, "loss": 0.6365, "step": 1552 }, { "epoch": 1.98784, "grad_norm": 0.6379725337028503, "learning_rate": 4.691276510604242e-05, "loss": 0.7077, "step": 1553 }, { "epoch": 1.98912, "grad_norm": 0.6354682445526123, "learning_rate": 4.691076430572229e-05, "loss": 0.7052, "step": 1554 }, { "epoch": 1.9904, "grad_norm": 0.6414213180541992, "learning_rate": 4.6908763505402163e-05, "loss": 0.6586, "step": 1555 }, { "epoch": 1.9916800000000001, "grad_norm": 0.6093000173568726, "learning_rate": 4.6906762705082035e-05, "loss": 0.7352, "step": 1556 }, { "epoch": 1.99296, "grad_norm": 0.6520658135414124, "learning_rate": 4.690476190476191e-05, "loss": 0.7425, "step": 1557 }, { "epoch": 1.99424, "grad_norm": 0.635176420211792, "learning_rate": 4.690276110444178e-05, "loss": 0.7179, "step": 1558 }, { "epoch": 1.99552, "grad_norm": 0.6376418471336365, "learning_rate": 4.690076030412165e-05, "loss": 0.7567, "step": 1559 }, { "epoch": 1.9968, "grad_norm": 0.6329599022865295, "learning_rate": 4.689875950380152e-05, "loss": 0.7107, "step": 1560 }, { "epoch": 1.9980799999999999, "grad_norm": 0.6018700003623962, "learning_rate": 4.6896758703481395e-05, "loss": 0.6841, "step": 1561 }, { "epoch": 1.99936, "grad_norm": 0.6212313771247864, "learning_rate": 4.6894757903161266e-05, "loss": 0.7403, "step": 1562 }, { "epoch": 2.00064, "grad_norm": 1.2921080589294434, "learning_rate": 4.689275710284114e-05, "loss": 1.147, "step": 1563 }, { "epoch": 2.00192, "grad_norm": 0.6272991299629211, "learning_rate": 4.689075630252101e-05, "loss": 0.6922, "step": 1564 }, { "epoch": 2.0032, "grad_norm": 0.6323875188827515, "learning_rate": 4.688875550220088e-05, "loss": 0.721, "step": 1565 }, { "epoch": 2.00448, "grad_norm": 0.6615299582481384, "learning_rate": 4.6886754701880754e-05, "loss": 0.7782, "step": 1566 }, { "epoch": 2.00576, "grad_norm": 0.6855239868164062, "learning_rate": 4.6884753901560626e-05, "loss": 0.7006, "step": 1567 }, { "epoch": 2.00704, "grad_norm": 0.5969696640968323, "learning_rate": 4.68827531012405e-05, "loss": 0.6765, "step": 1568 }, { "epoch": 2.00832, "grad_norm": 0.6360341310501099, "learning_rate": 4.688075230092037e-05, "loss": 0.7551, "step": 1569 }, { "epoch": 2.0096, "grad_norm": 0.6344780325889587, "learning_rate": 4.687875150060024e-05, "loss": 0.7285, "step": 1570 }, { "epoch": 2.0108800000000002, "grad_norm": 0.6265085935592651, "learning_rate": 4.687675070028011e-05, "loss": 0.693, "step": 1571 }, { "epoch": 2.01216, "grad_norm": 0.6241620182991028, "learning_rate": 4.6874749899959985e-05, "loss": 0.6636, "step": 1572 }, { "epoch": 2.01344, "grad_norm": 0.6464859843254089, "learning_rate": 4.687274909963986e-05, "loss": 0.7299, "step": 1573 }, { "epoch": 2.01472, "grad_norm": 0.6327393651008606, "learning_rate": 4.687074829931973e-05, "loss": 0.6387, "step": 1574 }, { "epoch": 2.016, "grad_norm": 0.6345120072364807, "learning_rate": 4.686874749899961e-05, "loss": 0.6943, "step": 1575 }, { "epoch": 2.01728, "grad_norm": 0.6209046244621277, "learning_rate": 4.686674669867947e-05, "loss": 0.6712, "step": 1576 }, { "epoch": 2.01856, "grad_norm": 0.6531869173049927, "learning_rate": 4.6864745898359344e-05, "loss": 0.7195, "step": 1577 }, { "epoch": 2.01984, "grad_norm": 0.6831356287002563, "learning_rate": 4.6862745098039216e-05, "loss": 0.7457, "step": 1578 }, { "epoch": 2.02112, "grad_norm": 0.6502741575241089, "learning_rate": 4.686074429771909e-05, "loss": 0.7308, "step": 1579 }, { "epoch": 2.0224, "grad_norm": 0.661711573600769, "learning_rate": 4.685874349739896e-05, "loss": 0.7113, "step": 1580 }, { "epoch": 2.02368, "grad_norm": 0.6132927536964417, "learning_rate": 4.685674269707883e-05, "loss": 0.6587, "step": 1581 }, { "epoch": 2.02496, "grad_norm": 0.6607900261878967, "learning_rate": 4.685474189675871e-05, "loss": 0.6695, "step": 1582 }, { "epoch": 2.02624, "grad_norm": 0.6305644512176514, "learning_rate": 4.685274109643858e-05, "loss": 0.6834, "step": 1583 }, { "epoch": 2.02752, "grad_norm": 0.6461015939712524, "learning_rate": 4.685074029611845e-05, "loss": 0.7033, "step": 1584 }, { "epoch": 2.0288, "grad_norm": 0.6151067614555359, "learning_rate": 4.684873949579832e-05, "loss": 0.7138, "step": 1585 }, { "epoch": 2.03008, "grad_norm": 0.6099941730499268, "learning_rate": 4.684673869547819e-05, "loss": 0.6364, "step": 1586 }, { "epoch": 2.03136, "grad_norm": 0.6969642639160156, "learning_rate": 4.684473789515806e-05, "loss": 0.755, "step": 1587 }, { "epoch": 2.03264, "grad_norm": 0.6614975929260254, "learning_rate": 4.6842737094837935e-05, "loss": 0.7028, "step": 1588 }, { "epoch": 2.03392, "grad_norm": 0.6838937401771545, "learning_rate": 4.684073629451781e-05, "loss": 0.8004, "step": 1589 }, { "epoch": 2.0352, "grad_norm": 0.6104751229286194, "learning_rate": 4.6838735494197685e-05, "loss": 0.7308, "step": 1590 }, { "epoch": 2.03648, "grad_norm": 0.6918980479240417, "learning_rate": 4.683673469387756e-05, "loss": 0.7753, "step": 1591 }, { "epoch": 2.03776, "grad_norm": 0.6084235906600952, "learning_rate": 4.683473389355742e-05, "loss": 0.6957, "step": 1592 }, { "epoch": 2.03904, "grad_norm": 0.5943331718444824, "learning_rate": 4.6832733093237294e-05, "loss": 0.6609, "step": 1593 }, { "epoch": 2.04032, "grad_norm": 0.6553683876991272, "learning_rate": 4.6830732292917166e-05, "loss": 0.7259, "step": 1594 }, { "epoch": 2.0416, "grad_norm": 0.6380487680435181, "learning_rate": 4.682873149259704e-05, "loss": 0.7648, "step": 1595 }, { "epoch": 2.04288, "grad_norm": 0.6057306528091431, "learning_rate": 4.6826730692276916e-05, "loss": 0.6415, "step": 1596 }, { "epoch": 2.04416, "grad_norm": 0.6594085693359375, "learning_rate": 4.682472989195679e-05, "loss": 0.7322, "step": 1597 }, { "epoch": 2.04544, "grad_norm": 0.6331745982170105, "learning_rate": 4.682272909163666e-05, "loss": 0.7065, "step": 1598 }, { "epoch": 2.04672, "grad_norm": 0.7005310654640198, "learning_rate": 4.682072829131653e-05, "loss": 0.7582, "step": 1599 }, { "epoch": 2.048, "grad_norm": 0.6412195563316345, "learning_rate": 4.68187274909964e-05, "loss": 0.7186, "step": 1600 }, { "epoch": 2.04928, "grad_norm": 0.6990569233894348, "learning_rate": 4.681672669067627e-05, "loss": 0.7195, "step": 1601 }, { "epoch": 2.05056, "grad_norm": 0.6446850895881653, "learning_rate": 4.681472589035614e-05, "loss": 0.6461, "step": 1602 }, { "epoch": 2.05184, "grad_norm": 0.6356726288795471, "learning_rate": 4.681272509003602e-05, "loss": 0.6688, "step": 1603 }, { "epoch": 2.05312, "grad_norm": 0.6811710000038147, "learning_rate": 4.681072428971589e-05, "loss": 0.7049, "step": 1604 }, { "epoch": 2.0544, "grad_norm": 0.6494273543357849, "learning_rate": 4.680872348939576e-05, "loss": 0.6843, "step": 1605 }, { "epoch": 2.05568, "grad_norm": 0.6489901542663574, "learning_rate": 4.6806722689075635e-05, "loss": 0.6958, "step": 1606 }, { "epoch": 2.05696, "grad_norm": 0.6661378145217896, "learning_rate": 4.680472188875551e-05, "loss": 0.7385, "step": 1607 }, { "epoch": 2.05824, "grad_norm": 0.6342524290084839, "learning_rate": 4.680272108843537e-05, "loss": 0.7179, "step": 1608 }, { "epoch": 2.05952, "grad_norm": 0.6314605474472046, "learning_rate": 4.6800720288115244e-05, "loss": 0.7109, "step": 1609 }, { "epoch": 2.0608, "grad_norm": 0.665981650352478, "learning_rate": 4.679871948779512e-05, "loss": 0.7259, "step": 1610 }, { "epoch": 2.06208, "grad_norm": 0.6300519704818726, "learning_rate": 4.6796718687474994e-05, "loss": 0.7014, "step": 1611 }, { "epoch": 2.06336, "grad_norm": 0.6580458879470825, "learning_rate": 4.6794717887154866e-05, "loss": 0.6672, "step": 1612 }, { "epoch": 2.06464, "grad_norm": 0.669180154800415, "learning_rate": 4.679271708683474e-05, "loss": 0.6437, "step": 1613 }, { "epoch": 2.06592, "grad_norm": 0.690592348575592, "learning_rate": 4.679071628651461e-05, "loss": 0.7331, "step": 1614 }, { "epoch": 2.0672, "grad_norm": 0.6528162360191345, "learning_rate": 4.678871548619448e-05, "loss": 0.6798, "step": 1615 }, { "epoch": 2.06848, "grad_norm": 0.619225800037384, "learning_rate": 4.678671468587435e-05, "loss": 0.7003, "step": 1616 }, { "epoch": 2.06976, "grad_norm": 0.6643122434616089, "learning_rate": 4.6784713885554225e-05, "loss": 0.764, "step": 1617 }, { "epoch": 2.07104, "grad_norm": 0.6456024050712585, "learning_rate": 4.67827130852341e-05, "loss": 0.6792, "step": 1618 }, { "epoch": 2.07232, "grad_norm": 0.6370171308517456, "learning_rate": 4.678071228491397e-05, "loss": 0.6821, "step": 1619 }, { "epoch": 2.0736, "grad_norm": 0.646872341632843, "learning_rate": 4.677871148459384e-05, "loss": 0.6942, "step": 1620 }, { "epoch": 2.07488, "grad_norm": 0.6307012438774109, "learning_rate": 4.677671068427371e-05, "loss": 0.6781, "step": 1621 }, { "epoch": 2.07616, "grad_norm": 0.6719081401824951, "learning_rate": 4.6774709883953585e-05, "loss": 0.7221, "step": 1622 }, { "epoch": 2.07744, "grad_norm": 0.6334235072135925, "learning_rate": 4.6772709083633456e-05, "loss": 0.6652, "step": 1623 }, { "epoch": 2.07872, "grad_norm": 0.6129449009895325, "learning_rate": 4.677070828331333e-05, "loss": 0.6324, "step": 1624 }, { "epoch": 2.08, "grad_norm": 0.617857813835144, "learning_rate": 4.67687074829932e-05, "loss": 0.6846, "step": 1625 }, { "epoch": 2.08128, "grad_norm": 0.6237433552742004, "learning_rate": 4.676670668267307e-05, "loss": 0.6622, "step": 1626 }, { "epoch": 2.08256, "grad_norm": 0.606203019618988, "learning_rate": 4.6764705882352944e-05, "loss": 0.6895, "step": 1627 }, { "epoch": 2.08384, "grad_norm": 0.6327537894248962, "learning_rate": 4.6762705082032816e-05, "loss": 0.6087, "step": 1628 }, { "epoch": 2.08512, "grad_norm": 0.6112367510795593, "learning_rate": 4.676070428171269e-05, "loss": 0.6854, "step": 1629 }, { "epoch": 2.0864, "grad_norm": 0.5821278691291809, "learning_rate": 4.675870348139256e-05, "loss": 0.6405, "step": 1630 }, { "epoch": 2.08768, "grad_norm": 0.6095936894416809, "learning_rate": 4.675670268107243e-05, "loss": 0.6359, "step": 1631 }, { "epoch": 2.08896, "grad_norm": 0.6587210893630981, "learning_rate": 4.67547018807523e-05, "loss": 0.7537, "step": 1632 }, { "epoch": 2.09024, "grad_norm": 0.6856895089149475, "learning_rate": 4.6752701080432175e-05, "loss": 0.7386, "step": 1633 }, { "epoch": 2.09152, "grad_norm": 0.6704199910163879, "learning_rate": 4.675070028011205e-05, "loss": 0.7063, "step": 1634 }, { "epoch": 2.0928, "grad_norm": 0.6308407187461853, "learning_rate": 4.674869947979192e-05, "loss": 0.6713, "step": 1635 }, { "epoch": 2.09408, "grad_norm": 0.6690980195999146, "learning_rate": 4.674669867947179e-05, "loss": 0.6961, "step": 1636 }, { "epoch": 2.09536, "grad_norm": 0.6405916213989258, "learning_rate": 4.674469787915166e-05, "loss": 0.6758, "step": 1637 }, { "epoch": 2.09664, "grad_norm": 0.638323962688446, "learning_rate": 4.6742697078831534e-05, "loss": 0.7383, "step": 1638 }, { "epoch": 2.09792, "grad_norm": 0.6798389554023743, "learning_rate": 4.6740696278511406e-05, "loss": 0.7503, "step": 1639 }, { "epoch": 2.0992, "grad_norm": 0.6478346586227417, "learning_rate": 4.673869547819128e-05, "loss": 0.6709, "step": 1640 }, { "epoch": 2.10048, "grad_norm": 0.6550549864768982, "learning_rate": 4.673669467787115e-05, "loss": 0.7377, "step": 1641 }, { "epoch": 2.10176, "grad_norm": 0.611666738986969, "learning_rate": 4.673469387755102e-05, "loss": 0.6414, "step": 1642 }, { "epoch": 2.10304, "grad_norm": 0.6572237610816956, "learning_rate": 4.6732693077230894e-05, "loss": 0.7387, "step": 1643 }, { "epoch": 2.10432, "grad_norm": 0.6161869764328003, "learning_rate": 4.6730692276910765e-05, "loss": 0.6336, "step": 1644 }, { "epoch": 2.1056, "grad_norm": 0.6543852090835571, "learning_rate": 4.672869147659064e-05, "loss": 0.7487, "step": 1645 }, { "epoch": 2.10688, "grad_norm": 0.657584011554718, "learning_rate": 4.6726690676270516e-05, "loss": 0.7025, "step": 1646 }, { "epoch": 2.10816, "grad_norm": 0.6363831162452698, "learning_rate": 4.672468987595038e-05, "loss": 0.7098, "step": 1647 }, { "epoch": 2.10944, "grad_norm": 0.6271912455558777, "learning_rate": 4.672268907563025e-05, "loss": 0.6601, "step": 1648 }, { "epoch": 2.11072, "grad_norm": 0.6789674162864685, "learning_rate": 4.6720688275310125e-05, "loss": 0.739, "step": 1649 }, { "epoch": 2.112, "grad_norm": 0.6508240699768066, "learning_rate": 4.6718687474989997e-05, "loss": 0.7454, "step": 1650 }, { "epoch": 2.11328, "grad_norm": 0.6586835980415344, "learning_rate": 4.671668667466987e-05, "loss": 0.6957, "step": 1651 }, { "epoch": 2.11456, "grad_norm": 0.6778126955032349, "learning_rate": 4.671468587434974e-05, "loss": 0.7017, "step": 1652 }, { "epoch": 2.11584, "grad_norm": 0.6158040165901184, "learning_rate": 4.671268507402962e-05, "loss": 0.6696, "step": 1653 }, { "epoch": 2.11712, "grad_norm": 0.6658079624176025, "learning_rate": 4.671068427370949e-05, "loss": 0.735, "step": 1654 }, { "epoch": 2.1184, "grad_norm": 0.6727830171585083, "learning_rate": 4.6708683473389356e-05, "loss": 0.685, "step": 1655 }, { "epoch": 2.11968, "grad_norm": 0.614812970161438, "learning_rate": 4.670668267306923e-05, "loss": 0.6622, "step": 1656 }, { "epoch": 2.12096, "grad_norm": 0.6530376076698303, "learning_rate": 4.67046818727491e-05, "loss": 0.7208, "step": 1657 }, { "epoch": 2.12224, "grad_norm": 0.6509586572647095, "learning_rate": 4.670268107242897e-05, "loss": 0.7607, "step": 1658 }, { "epoch": 2.12352, "grad_norm": 0.6249855756759644, "learning_rate": 4.670068027210884e-05, "loss": 0.6638, "step": 1659 }, { "epoch": 2.1248, "grad_norm": 0.6153408885002136, "learning_rate": 4.669867947178872e-05, "loss": 0.699, "step": 1660 }, { "epoch": 2.12608, "grad_norm": 0.6448095440864563, "learning_rate": 4.6696678671468594e-05, "loss": 0.7032, "step": 1661 }, { "epoch": 2.12736, "grad_norm": 0.616129457950592, "learning_rate": 4.6694677871148466e-05, "loss": 0.68, "step": 1662 }, { "epoch": 2.12864, "grad_norm": 0.6616100072860718, "learning_rate": 4.669267707082833e-05, "loss": 0.6944, "step": 1663 }, { "epoch": 2.12992, "grad_norm": 0.6487681865692139, "learning_rate": 4.66906762705082e-05, "loss": 0.651, "step": 1664 }, { "epoch": 2.1312, "grad_norm": 0.6391069889068604, "learning_rate": 4.6688675470188074e-05, "loss": 0.6815, "step": 1665 }, { "epoch": 2.13248, "grad_norm": 0.6539847254753113, "learning_rate": 4.6686674669867946e-05, "loss": 0.7119, "step": 1666 }, { "epoch": 2.13376, "grad_norm": 0.6501457691192627, "learning_rate": 4.6684673869547825e-05, "loss": 0.6649, "step": 1667 }, { "epoch": 2.13504, "grad_norm": 0.6687941551208496, "learning_rate": 4.66826730692277e-05, "loss": 0.6558, "step": 1668 }, { "epoch": 2.13632, "grad_norm": 0.690743625164032, "learning_rate": 4.668067226890757e-05, "loss": 0.6878, "step": 1669 }, { "epoch": 2.1376, "grad_norm": 0.6361063122749329, "learning_rate": 4.667867146858744e-05, "loss": 0.7181, "step": 1670 }, { "epoch": 2.13888, "grad_norm": 0.6546103954315186, "learning_rate": 4.6676670668267306e-05, "loss": 0.7105, "step": 1671 }, { "epoch": 2.14016, "grad_norm": 0.6859259605407715, "learning_rate": 4.667466986794718e-05, "loss": 0.7206, "step": 1672 }, { "epoch": 2.1414400000000002, "grad_norm": 0.6436448097229004, "learning_rate": 4.667266906762705e-05, "loss": 0.6466, "step": 1673 }, { "epoch": 2.14272, "grad_norm": 0.6501901745796204, "learning_rate": 4.667066826730693e-05, "loss": 0.6934, "step": 1674 }, { "epoch": 2.144, "grad_norm": 0.6573314070701599, "learning_rate": 4.66686674669868e-05, "loss": 0.6902, "step": 1675 }, { "epoch": 2.14528, "grad_norm": 0.6532529592514038, "learning_rate": 4.666666666666667e-05, "loss": 0.6551, "step": 1676 }, { "epoch": 2.14656, "grad_norm": 0.6756119132041931, "learning_rate": 4.6664665866346543e-05, "loss": 0.7094, "step": 1677 }, { "epoch": 2.14784, "grad_norm": 0.6881611943244934, "learning_rate": 4.6662665066026415e-05, "loss": 0.7356, "step": 1678 }, { "epoch": 2.14912, "grad_norm": 0.6921321749687195, "learning_rate": 4.666066426570628e-05, "loss": 0.704, "step": 1679 }, { "epoch": 2.1504, "grad_norm": 0.6233019828796387, "learning_rate": 4.665866346538615e-05, "loss": 0.6581, "step": 1680 }, { "epoch": 2.15168, "grad_norm": 0.6320359110832214, "learning_rate": 4.665666266506603e-05, "loss": 0.7222, "step": 1681 }, { "epoch": 2.15296, "grad_norm": 0.6233236789703369, "learning_rate": 4.66546618647459e-05, "loss": 0.6272, "step": 1682 }, { "epoch": 2.15424, "grad_norm": 0.6322048306465149, "learning_rate": 4.6652661064425775e-05, "loss": 0.6732, "step": 1683 }, { "epoch": 2.15552, "grad_norm": 0.6570281386375427, "learning_rate": 4.6650660264105646e-05, "loss": 0.6651, "step": 1684 }, { "epoch": 2.1568, "grad_norm": 0.6593979597091675, "learning_rate": 4.664865946378552e-05, "loss": 0.7143, "step": 1685 }, { "epoch": 2.15808, "grad_norm": 0.6577056050300598, "learning_rate": 4.664665866346539e-05, "loss": 0.6509, "step": 1686 }, { "epoch": 2.15936, "grad_norm": 0.6036557555198669, "learning_rate": 4.6644657863145255e-05, "loss": 0.6528, "step": 1687 }, { "epoch": 2.16064, "grad_norm": 0.6405543684959412, "learning_rate": 4.6642657062825134e-05, "loss": 0.6667, "step": 1688 }, { "epoch": 2.16192, "grad_norm": 0.7007610201835632, "learning_rate": 4.6640656262505006e-05, "loss": 0.7203, "step": 1689 }, { "epoch": 2.1632, "grad_norm": 0.6296299695968628, "learning_rate": 4.663865546218488e-05, "loss": 0.6863, "step": 1690 }, { "epoch": 2.16448, "grad_norm": 0.6606785655021667, "learning_rate": 4.663665466186475e-05, "loss": 0.7131, "step": 1691 }, { "epoch": 2.16576, "grad_norm": 0.6546313166618347, "learning_rate": 4.663465386154462e-05, "loss": 0.6784, "step": 1692 }, { "epoch": 2.16704, "grad_norm": 0.6348084807395935, "learning_rate": 4.663265306122449e-05, "loss": 0.7113, "step": 1693 }, { "epoch": 2.16832, "grad_norm": 0.6655004024505615, "learning_rate": 4.6630652260904365e-05, "loss": 0.6833, "step": 1694 }, { "epoch": 2.1696, "grad_norm": 0.6627703905105591, "learning_rate": 4.662865146058424e-05, "loss": 0.6661, "step": 1695 }, { "epoch": 2.17088, "grad_norm": 0.6439046263694763, "learning_rate": 4.662665066026411e-05, "loss": 0.7075, "step": 1696 }, { "epoch": 2.17216, "grad_norm": 0.6709191203117371, "learning_rate": 4.662464985994398e-05, "loss": 0.7076, "step": 1697 }, { "epoch": 2.17344, "grad_norm": 0.6734464168548584, "learning_rate": 4.662264905962385e-05, "loss": 0.7296, "step": 1698 }, { "epoch": 2.1747199999999998, "grad_norm": 0.663478434085846, "learning_rate": 4.6620648259303724e-05, "loss": 0.6552, "step": 1699 }, { "epoch": 2.176, "grad_norm": 0.654140055179596, "learning_rate": 4.6618647458983596e-05, "loss": 0.7028, "step": 1700 }, { "epoch": 2.17728, "grad_norm": 0.6052113175392151, "learning_rate": 4.661664665866347e-05, "loss": 0.6056, "step": 1701 }, { "epoch": 2.17856, "grad_norm": 0.6727505922317505, "learning_rate": 4.661464585834334e-05, "loss": 0.6829, "step": 1702 }, { "epoch": 2.17984, "grad_norm": 0.6504889130592346, "learning_rate": 4.661264505802321e-05, "loss": 0.6782, "step": 1703 }, { "epoch": 2.18112, "grad_norm": 0.6764382123947144, "learning_rate": 4.6610644257703084e-05, "loss": 0.7543, "step": 1704 }, { "epoch": 2.1824, "grad_norm": 0.6401752829551697, "learning_rate": 4.6608643457382955e-05, "loss": 0.6898, "step": 1705 }, { "epoch": 2.18368, "grad_norm": 0.6742962002754211, "learning_rate": 4.660664265706283e-05, "loss": 0.6884, "step": 1706 }, { "epoch": 2.1849600000000002, "grad_norm": 0.6535323262214661, "learning_rate": 4.66046418567427e-05, "loss": 0.6647, "step": 1707 }, { "epoch": 2.18624, "grad_norm": 0.6376174092292786, "learning_rate": 4.660264105642257e-05, "loss": 0.6964, "step": 1708 }, { "epoch": 2.18752, "grad_norm": 0.6661503314971924, "learning_rate": 4.660064025610244e-05, "loss": 0.6898, "step": 1709 }, { "epoch": 2.1888, "grad_norm": 0.6385532021522522, "learning_rate": 4.6598639455782315e-05, "loss": 0.6951, "step": 1710 }, { "epoch": 2.19008, "grad_norm": 0.6412584185600281, "learning_rate": 4.6596638655462187e-05, "loss": 0.7281, "step": 1711 }, { "epoch": 2.19136, "grad_norm": 0.7024900317192078, "learning_rate": 4.659463785514206e-05, "loss": 0.7168, "step": 1712 }, { "epoch": 2.19264, "grad_norm": 0.6655505895614624, "learning_rate": 4.659263705482193e-05, "loss": 0.6731, "step": 1713 }, { "epoch": 2.19392, "grad_norm": 0.6673122048377991, "learning_rate": 4.65906362545018e-05, "loss": 0.757, "step": 1714 }, { "epoch": 2.1952, "grad_norm": 0.6695178151130676, "learning_rate": 4.6588635454181674e-05, "loss": 0.7752, "step": 1715 }, { "epoch": 2.19648, "grad_norm": 0.6591713428497314, "learning_rate": 4.658663465386155e-05, "loss": 0.6715, "step": 1716 }, { "epoch": 2.19776, "grad_norm": 0.6705875396728516, "learning_rate": 4.658463385354142e-05, "loss": 0.6875, "step": 1717 }, { "epoch": 2.19904, "grad_norm": 0.6780663728713989, "learning_rate": 4.658263305322129e-05, "loss": 0.697, "step": 1718 }, { "epoch": 2.20032, "grad_norm": 0.6735832691192627, "learning_rate": 4.658063225290116e-05, "loss": 0.6991, "step": 1719 }, { "epoch": 2.2016, "grad_norm": 0.640477180480957, "learning_rate": 4.657863145258103e-05, "loss": 0.694, "step": 1720 }, { "epoch": 2.20288, "grad_norm": 0.6748470067977905, "learning_rate": 4.6576630652260905e-05, "loss": 0.6939, "step": 1721 }, { "epoch": 2.20416, "grad_norm": 0.607291579246521, "learning_rate": 4.657462985194078e-05, "loss": 0.651, "step": 1722 }, { "epoch": 2.20544, "grad_norm": 0.6633931398391724, "learning_rate": 4.6572629051620656e-05, "loss": 0.7131, "step": 1723 }, { "epoch": 2.20672, "grad_norm": 0.7354316711425781, "learning_rate": 4.657062825130053e-05, "loss": 0.6963, "step": 1724 }, { "epoch": 2.208, "grad_norm": 0.708829402923584, "learning_rate": 4.656862745098039e-05, "loss": 0.736, "step": 1725 }, { "epoch": 2.20928, "grad_norm": 0.6382274031639099, "learning_rate": 4.6566626650660264e-05, "loss": 0.663, "step": 1726 }, { "epoch": 2.21056, "grad_norm": 0.6313372850418091, "learning_rate": 4.6564625850340136e-05, "loss": 0.6861, "step": 1727 }, { "epoch": 2.21184, "grad_norm": 0.6234421730041504, "learning_rate": 4.656262505002001e-05, "loss": 0.6846, "step": 1728 }, { "epoch": 2.21312, "grad_norm": 0.6155267953872681, "learning_rate": 4.656062424969988e-05, "loss": 0.6289, "step": 1729 }, { "epoch": 2.2144, "grad_norm": 0.6332612633705139, "learning_rate": 4.655862344937976e-05, "loss": 0.6752, "step": 1730 }, { "epoch": 2.21568, "grad_norm": 0.6155014634132385, "learning_rate": 4.655662264905963e-05, "loss": 0.6088, "step": 1731 }, { "epoch": 2.21696, "grad_norm": 0.6483979821205139, "learning_rate": 4.65546218487395e-05, "loss": 0.688, "step": 1732 }, { "epoch": 2.2182399999999998, "grad_norm": 0.6692848801612854, "learning_rate": 4.655262104841937e-05, "loss": 0.6918, "step": 1733 }, { "epoch": 2.21952, "grad_norm": 0.6489692330360413, "learning_rate": 4.655062024809924e-05, "loss": 0.7016, "step": 1734 }, { "epoch": 2.2208, "grad_norm": 0.6827375888824463, "learning_rate": 4.654861944777911e-05, "loss": 0.7165, "step": 1735 }, { "epoch": 2.22208, "grad_norm": 0.652456521987915, "learning_rate": 4.654661864745898e-05, "loss": 0.7366, "step": 1736 }, { "epoch": 2.22336, "grad_norm": 0.6942132711410522, "learning_rate": 4.654461784713886e-05, "loss": 0.674, "step": 1737 }, { "epoch": 2.22464, "grad_norm": 0.6576859354972839, "learning_rate": 4.6542617046818734e-05, "loss": 0.7573, "step": 1738 }, { "epoch": 2.22592, "grad_norm": 0.6581286191940308, "learning_rate": 4.6540616246498605e-05, "loss": 0.6484, "step": 1739 }, { "epoch": 2.2272, "grad_norm": 0.6722337007522583, "learning_rate": 4.653861544617848e-05, "loss": 0.7328, "step": 1740 }, { "epoch": 2.22848, "grad_norm": 0.6128942966461182, "learning_rate": 4.653661464585834e-05, "loss": 0.6935, "step": 1741 }, { "epoch": 2.22976, "grad_norm": 0.632098913192749, "learning_rate": 4.6534613845538214e-05, "loss": 0.6824, "step": 1742 }, { "epoch": 2.23104, "grad_norm": 0.6399368047714233, "learning_rate": 4.6532613045218086e-05, "loss": 0.6962, "step": 1743 }, { "epoch": 2.23232, "grad_norm": 0.6291220188140869, "learning_rate": 4.653061224489796e-05, "loss": 0.6935, "step": 1744 }, { "epoch": 2.2336, "grad_norm": 0.6679767966270447, "learning_rate": 4.6528611444577837e-05, "loss": 0.685, "step": 1745 }, { "epoch": 2.23488, "grad_norm": 0.652439534664154, "learning_rate": 4.652661064425771e-05, "loss": 0.731, "step": 1746 }, { "epoch": 2.23616, "grad_norm": 0.6402837038040161, "learning_rate": 4.652460984393758e-05, "loss": 0.6515, "step": 1747 }, { "epoch": 2.23744, "grad_norm": 0.6584830284118652, "learning_rate": 4.652260904361745e-05, "loss": 0.7117, "step": 1748 }, { "epoch": 2.23872, "grad_norm": 0.6643098592758179, "learning_rate": 4.652060824329732e-05, "loss": 0.6786, "step": 1749 }, { "epoch": 2.24, "grad_norm": 0.6529515385627747, "learning_rate": 4.651860744297719e-05, "loss": 0.6985, "step": 1750 }, { "epoch": 2.24128, "grad_norm": 0.6570754051208496, "learning_rate": 4.651660664265706e-05, "loss": 0.6892, "step": 1751 }, { "epoch": 2.24256, "grad_norm": 0.6258246898651123, "learning_rate": 4.651460584233694e-05, "loss": 0.6781, "step": 1752 }, { "epoch": 2.24384, "grad_norm": 0.6045702695846558, "learning_rate": 4.651260504201681e-05, "loss": 0.6712, "step": 1753 }, { "epoch": 2.24512, "grad_norm": 0.6391083002090454, "learning_rate": 4.651060424169668e-05, "loss": 0.6869, "step": 1754 }, { "epoch": 2.2464, "grad_norm": 0.6669121980667114, "learning_rate": 4.6508603441376555e-05, "loss": 0.7874, "step": 1755 }, { "epoch": 2.24768, "grad_norm": 0.6193825006484985, "learning_rate": 4.650660264105643e-05, "loss": 0.6625, "step": 1756 }, { "epoch": 2.24896, "grad_norm": 0.6318017840385437, "learning_rate": 4.650460184073629e-05, "loss": 0.6807, "step": 1757 }, { "epoch": 2.25024, "grad_norm": 0.6676175594329834, "learning_rate": 4.6502601040416164e-05, "loss": 0.6965, "step": 1758 }, { "epoch": 2.25152, "grad_norm": 0.652651846408844, "learning_rate": 4.650060024009604e-05, "loss": 0.6623, "step": 1759 }, { "epoch": 2.2528, "grad_norm": 0.6952486038208008, "learning_rate": 4.6498599439775914e-05, "loss": 0.7059, "step": 1760 }, { "epoch": 2.25408, "grad_norm": 0.713154673576355, "learning_rate": 4.6496598639455786e-05, "loss": 0.7397, "step": 1761 }, { "epoch": 2.25536, "grad_norm": 0.636633574962616, "learning_rate": 4.649459783913566e-05, "loss": 0.6193, "step": 1762 }, { "epoch": 2.25664, "grad_norm": 0.6551604866981506, "learning_rate": 4.649259703881553e-05, "loss": 0.7048, "step": 1763 }, { "epoch": 2.25792, "grad_norm": 0.6308698654174805, "learning_rate": 4.64905962384954e-05, "loss": 0.6646, "step": 1764 }, { "epoch": 2.2592, "grad_norm": 0.6496629118919373, "learning_rate": 4.648859543817527e-05, "loss": 0.6879, "step": 1765 }, { "epoch": 2.26048, "grad_norm": 0.6222250461578369, "learning_rate": 4.6486594637855145e-05, "loss": 0.6635, "step": 1766 }, { "epoch": 2.2617599999999998, "grad_norm": 0.6518458724021912, "learning_rate": 4.648459383753502e-05, "loss": 0.6757, "step": 1767 }, { "epoch": 2.26304, "grad_norm": 0.656060516834259, "learning_rate": 4.648259303721489e-05, "loss": 0.6818, "step": 1768 }, { "epoch": 2.26432, "grad_norm": 0.6683648228645325, "learning_rate": 4.648059223689476e-05, "loss": 0.7028, "step": 1769 }, { "epoch": 2.2656, "grad_norm": 0.6672934889793396, "learning_rate": 4.647859143657463e-05, "loss": 0.6928, "step": 1770 }, { "epoch": 2.26688, "grad_norm": 0.6802359223365784, "learning_rate": 4.6476590636254505e-05, "loss": 0.6616, "step": 1771 }, { "epoch": 2.26816, "grad_norm": 0.6378535628318787, "learning_rate": 4.647458983593438e-05, "loss": 0.689, "step": 1772 }, { "epoch": 2.26944, "grad_norm": 0.677901566028595, "learning_rate": 4.647258903561425e-05, "loss": 0.7436, "step": 1773 }, { "epoch": 2.27072, "grad_norm": 0.6340510249137878, "learning_rate": 4.647058823529412e-05, "loss": 0.7241, "step": 1774 }, { "epoch": 2.2720000000000002, "grad_norm": 0.6160155534744263, "learning_rate": 4.646858743497399e-05, "loss": 0.635, "step": 1775 }, { "epoch": 2.27328, "grad_norm": 0.6366074085235596, "learning_rate": 4.6466586634653864e-05, "loss": 0.6915, "step": 1776 }, { "epoch": 2.27456, "grad_norm": 0.6163462996482849, "learning_rate": 4.6464585834333736e-05, "loss": 0.6227, "step": 1777 }, { "epoch": 2.27584, "grad_norm": 0.6638872027397156, "learning_rate": 4.646258503401361e-05, "loss": 0.753, "step": 1778 }, { "epoch": 2.27712, "grad_norm": 0.6583823561668396, "learning_rate": 4.646058423369348e-05, "loss": 0.7791, "step": 1779 }, { "epoch": 2.2784, "grad_norm": 0.6398965120315552, "learning_rate": 4.645858343337335e-05, "loss": 0.6842, "step": 1780 }, { "epoch": 2.27968, "grad_norm": 0.6243705153465271, "learning_rate": 4.645658263305322e-05, "loss": 0.6953, "step": 1781 }, { "epoch": 2.28096, "grad_norm": 0.6466498374938965, "learning_rate": 4.6454581832733095e-05, "loss": 0.7053, "step": 1782 }, { "epoch": 2.28224, "grad_norm": 0.6308572292327881, "learning_rate": 4.645258103241297e-05, "loss": 0.6739, "step": 1783 }, { "epoch": 2.28352, "grad_norm": 0.6494999527931213, "learning_rate": 4.645058023209284e-05, "loss": 0.7472, "step": 1784 }, { "epoch": 2.2848, "grad_norm": 0.633786678314209, "learning_rate": 4.644857943177271e-05, "loss": 0.6696, "step": 1785 }, { "epoch": 2.28608, "grad_norm": 0.63746577501297, "learning_rate": 4.644657863145258e-05, "loss": 0.6897, "step": 1786 }, { "epoch": 2.28736, "grad_norm": 0.6449893116950989, "learning_rate": 4.6444577831132454e-05, "loss": 0.657, "step": 1787 }, { "epoch": 2.28864, "grad_norm": 0.6666371822357178, "learning_rate": 4.6442577030812326e-05, "loss": 0.7013, "step": 1788 }, { "epoch": 2.28992, "grad_norm": 0.6510623693466187, "learning_rate": 4.64405762304922e-05, "loss": 0.7175, "step": 1789 }, { "epoch": 2.2912, "grad_norm": 0.6616090536117554, "learning_rate": 4.643857543017207e-05, "loss": 0.6673, "step": 1790 }, { "epoch": 2.29248, "grad_norm": 0.7210375070571899, "learning_rate": 4.643657462985194e-05, "loss": 0.7318, "step": 1791 }, { "epoch": 2.29376, "grad_norm": 0.6955354809761047, "learning_rate": 4.6434573829531814e-05, "loss": 0.7548, "step": 1792 }, { "epoch": 2.29504, "grad_norm": 0.6595535278320312, "learning_rate": 4.6432573029211686e-05, "loss": 0.6672, "step": 1793 }, { "epoch": 2.29632, "grad_norm": 0.6795318126678467, "learning_rate": 4.6430572228891564e-05, "loss": 0.6465, "step": 1794 }, { "epoch": 2.2976, "grad_norm": 0.700689971446991, "learning_rate": 4.642857142857143e-05, "loss": 0.7189, "step": 1795 }, { "epoch": 2.29888, "grad_norm": 0.7070762515068054, "learning_rate": 4.64265706282513e-05, "loss": 0.7249, "step": 1796 }, { "epoch": 2.30016, "grad_norm": 0.6750733256340027, "learning_rate": 4.642456982793117e-05, "loss": 0.7291, "step": 1797 }, { "epoch": 2.30144, "grad_norm": 0.6518023610115051, "learning_rate": 4.6422569027611045e-05, "loss": 0.7376, "step": 1798 }, { "epoch": 2.30272, "grad_norm": 0.658316433429718, "learning_rate": 4.642056822729092e-05, "loss": 0.7014, "step": 1799 }, { "epoch": 2.304, "grad_norm": 0.6573535799980164, "learning_rate": 4.641856742697079e-05, "loss": 0.673, "step": 1800 }, { "epoch": 2.3052799999999998, "grad_norm": 0.6256746649742126, "learning_rate": 4.641656662665067e-05, "loss": 0.6777, "step": 1801 }, { "epoch": 2.30656, "grad_norm": 0.6481615900993347, "learning_rate": 4.641456582633054e-05, "loss": 0.6787, "step": 1802 }, { "epoch": 2.30784, "grad_norm": 0.6594406366348267, "learning_rate": 4.6412565026010404e-05, "loss": 0.6894, "step": 1803 }, { "epoch": 2.30912, "grad_norm": 0.6397424936294556, "learning_rate": 4.6410564225690276e-05, "loss": 0.7325, "step": 1804 }, { "epoch": 2.3104, "grad_norm": 0.6274937987327576, "learning_rate": 4.640856342537015e-05, "loss": 0.6842, "step": 1805 }, { "epoch": 2.31168, "grad_norm": 0.7071935534477234, "learning_rate": 4.640656262505002e-05, "loss": 0.7394, "step": 1806 }, { "epoch": 2.31296, "grad_norm": 0.6791349649429321, "learning_rate": 4.640456182472989e-05, "loss": 0.6907, "step": 1807 }, { "epoch": 2.31424, "grad_norm": 0.6307082772254944, "learning_rate": 4.640256102440977e-05, "loss": 0.6735, "step": 1808 }, { "epoch": 2.3155200000000002, "grad_norm": 0.6786848306655884, "learning_rate": 4.640056022408964e-05, "loss": 0.6944, "step": 1809 }, { "epoch": 2.3168, "grad_norm": 0.6838906407356262, "learning_rate": 4.6398559423769514e-05, "loss": 0.7096, "step": 1810 }, { "epoch": 2.31808, "grad_norm": 0.6599327921867371, "learning_rate": 4.639655862344938e-05, "loss": 0.6573, "step": 1811 }, { "epoch": 2.31936, "grad_norm": 0.6728073358535767, "learning_rate": 4.639455782312925e-05, "loss": 0.7074, "step": 1812 }, { "epoch": 2.32064, "grad_norm": 0.6599488258361816, "learning_rate": 4.639255702280912e-05, "loss": 0.6743, "step": 1813 }, { "epoch": 2.32192, "grad_norm": 0.7124109864234924, "learning_rate": 4.6390556222488995e-05, "loss": 0.8051, "step": 1814 }, { "epoch": 2.3232, "grad_norm": 0.6490312814712524, "learning_rate": 4.638855542216887e-05, "loss": 0.6868, "step": 1815 }, { "epoch": 2.32448, "grad_norm": 0.6607807278633118, "learning_rate": 4.6386554621848745e-05, "loss": 0.7233, "step": 1816 }, { "epoch": 2.32576, "grad_norm": 0.6332591772079468, "learning_rate": 4.638455382152862e-05, "loss": 0.6772, "step": 1817 }, { "epoch": 2.32704, "grad_norm": 0.6462684273719788, "learning_rate": 4.638255302120849e-05, "loss": 0.6809, "step": 1818 }, { "epoch": 2.32832, "grad_norm": 0.685043454170227, "learning_rate": 4.6380552220888354e-05, "loss": 0.672, "step": 1819 }, { "epoch": 2.3296, "grad_norm": 0.6536259651184082, "learning_rate": 4.6378551420568226e-05, "loss": 0.6845, "step": 1820 }, { "epoch": 2.33088, "grad_norm": 0.6470760703086853, "learning_rate": 4.63765506202481e-05, "loss": 0.6574, "step": 1821 }, { "epoch": 2.33216, "grad_norm": 0.6793741583824158, "learning_rate": 4.6374549819927976e-05, "loss": 0.7286, "step": 1822 }, { "epoch": 2.33344, "grad_norm": 0.633575975894928, "learning_rate": 4.637254901960785e-05, "loss": 0.689, "step": 1823 }, { "epoch": 2.33472, "grad_norm": 0.6770448088645935, "learning_rate": 4.637054821928772e-05, "loss": 0.6672, "step": 1824 }, { "epoch": 2.336, "grad_norm": 0.6367180943489075, "learning_rate": 4.636854741896759e-05, "loss": 0.712, "step": 1825 }, { "epoch": 2.33728, "grad_norm": 0.6444728970527649, "learning_rate": 4.6366546618647464e-05, "loss": 0.78, "step": 1826 }, { "epoch": 2.33856, "grad_norm": 0.6529805064201355, "learning_rate": 4.636454581832733e-05, "loss": 0.6594, "step": 1827 }, { "epoch": 2.33984, "grad_norm": 0.6504684090614319, "learning_rate": 4.63625450180072e-05, "loss": 0.6725, "step": 1828 }, { "epoch": 2.34112, "grad_norm": 0.645531177520752, "learning_rate": 4.636054421768708e-05, "loss": 0.6721, "step": 1829 }, { "epoch": 2.3424, "grad_norm": 0.6722829341888428, "learning_rate": 4.635854341736695e-05, "loss": 0.772, "step": 1830 }, { "epoch": 2.34368, "grad_norm": 0.6697701215744019, "learning_rate": 4.635654261704682e-05, "loss": 0.7057, "step": 1831 }, { "epoch": 2.34496, "grad_norm": 0.6601827144622803, "learning_rate": 4.6354541816726695e-05, "loss": 0.644, "step": 1832 }, { "epoch": 2.34624, "grad_norm": 0.6771488189697266, "learning_rate": 4.635254101640657e-05, "loss": 0.7449, "step": 1833 }, { "epoch": 2.34752, "grad_norm": 0.6421104073524475, "learning_rate": 4.635054021608644e-05, "loss": 0.7342, "step": 1834 }, { "epoch": 2.3487999999999998, "grad_norm": 0.6297141909599304, "learning_rate": 4.6348539415766304e-05, "loss": 0.706, "step": 1835 }, { "epoch": 2.35008, "grad_norm": 0.6440832018852234, "learning_rate": 4.634653861544618e-05, "loss": 0.7282, "step": 1836 }, { "epoch": 2.35136, "grad_norm": 0.6416788101196289, "learning_rate": 4.6344537815126054e-05, "loss": 0.6494, "step": 1837 }, { "epoch": 2.35264, "grad_norm": 0.6922294497489929, "learning_rate": 4.6342537014805926e-05, "loss": 0.7179, "step": 1838 }, { "epoch": 2.35392, "grad_norm": 0.674335241317749, "learning_rate": 4.63405362144858e-05, "loss": 0.6737, "step": 1839 }, { "epoch": 2.3552, "grad_norm": 0.6704332232475281, "learning_rate": 4.633853541416567e-05, "loss": 0.7289, "step": 1840 }, { "epoch": 2.35648, "grad_norm": 0.6815084218978882, "learning_rate": 4.633653461384554e-05, "loss": 0.6504, "step": 1841 }, { "epoch": 2.35776, "grad_norm": 0.6917728781700134, "learning_rate": 4.6334533813525413e-05, "loss": 0.7133, "step": 1842 }, { "epoch": 2.3590400000000002, "grad_norm": 0.6595756411552429, "learning_rate": 4.6332533013205285e-05, "loss": 0.6622, "step": 1843 }, { "epoch": 2.3603199999999998, "grad_norm": 0.6047163009643555, "learning_rate": 4.633053221288516e-05, "loss": 0.682, "step": 1844 }, { "epoch": 2.3616, "grad_norm": 0.6585202217102051, "learning_rate": 4.632853141256503e-05, "loss": 0.7206, "step": 1845 }, { "epoch": 2.36288, "grad_norm": 0.6378026604652405, "learning_rate": 4.63265306122449e-05, "loss": 0.6693, "step": 1846 }, { "epoch": 2.36416, "grad_norm": 0.6626023054122925, "learning_rate": 4.632452981192477e-05, "loss": 0.7427, "step": 1847 }, { "epoch": 2.36544, "grad_norm": 0.652944803237915, "learning_rate": 4.6322529011604645e-05, "loss": 0.6706, "step": 1848 }, { "epoch": 2.36672, "grad_norm": 0.6104753613471985, "learning_rate": 4.6320528211284516e-05, "loss": 0.6231, "step": 1849 }, { "epoch": 2.368, "grad_norm": 0.5891187191009521, "learning_rate": 4.631852741096439e-05, "loss": 0.6612, "step": 1850 }, { "epoch": 2.36928, "grad_norm": 0.6433348059654236, "learning_rate": 4.631652661064426e-05, "loss": 0.6903, "step": 1851 }, { "epoch": 2.3705600000000002, "grad_norm": 0.6553032994270325, "learning_rate": 4.631452581032413e-05, "loss": 0.6837, "step": 1852 }, { "epoch": 2.37184, "grad_norm": 0.6701604723930359, "learning_rate": 4.6312525010004004e-05, "loss": 0.7339, "step": 1853 }, { "epoch": 2.37312, "grad_norm": 0.6654971837997437, "learning_rate": 4.6310524209683876e-05, "loss": 0.7244, "step": 1854 }, { "epoch": 2.3744, "grad_norm": 0.6845753788948059, "learning_rate": 4.630852340936375e-05, "loss": 0.742, "step": 1855 }, { "epoch": 2.37568, "grad_norm": 0.651664137840271, "learning_rate": 4.630652260904362e-05, "loss": 0.6837, "step": 1856 }, { "epoch": 2.37696, "grad_norm": 0.7059352993965149, "learning_rate": 4.630452180872349e-05, "loss": 0.7274, "step": 1857 }, { "epoch": 2.37824, "grad_norm": 0.6303350329399109, "learning_rate": 4.630252100840336e-05, "loss": 0.7218, "step": 1858 }, { "epoch": 2.37952, "grad_norm": 0.6301616430282593, "learning_rate": 4.6300520208083235e-05, "loss": 0.6528, "step": 1859 }, { "epoch": 2.3808, "grad_norm": 0.6489315032958984, "learning_rate": 4.629851940776311e-05, "loss": 0.6807, "step": 1860 }, { "epoch": 2.38208, "grad_norm": 0.6146230697631836, "learning_rate": 4.629651860744298e-05, "loss": 0.671, "step": 1861 }, { "epoch": 2.38336, "grad_norm": 0.6300916075706482, "learning_rate": 4.629451780712285e-05, "loss": 0.6797, "step": 1862 }, { "epoch": 2.38464, "grad_norm": 0.6397101283073425, "learning_rate": 4.629251700680272e-05, "loss": 0.6652, "step": 1863 }, { "epoch": 2.38592, "grad_norm": 0.6682645678520203, "learning_rate": 4.6290516206482594e-05, "loss": 0.7132, "step": 1864 }, { "epoch": 2.3872, "grad_norm": 0.6869545578956604, "learning_rate": 4.6288515406162466e-05, "loss": 0.6986, "step": 1865 }, { "epoch": 2.38848, "grad_norm": 0.6472262144088745, "learning_rate": 4.628651460584234e-05, "loss": 0.6647, "step": 1866 }, { "epoch": 2.38976, "grad_norm": 0.6385058164596558, "learning_rate": 4.628451380552221e-05, "loss": 0.6489, "step": 1867 }, { "epoch": 2.39104, "grad_norm": 0.6617813110351562, "learning_rate": 4.628251300520208e-05, "loss": 0.715, "step": 1868 }, { "epoch": 2.39232, "grad_norm": 0.6766805648803711, "learning_rate": 4.6280512204881954e-05, "loss": 0.6902, "step": 1869 }, { "epoch": 2.3936, "grad_norm": 0.6299700736999512, "learning_rate": 4.6278511404561825e-05, "loss": 0.6678, "step": 1870 }, { "epoch": 2.39488, "grad_norm": 0.7133849263191223, "learning_rate": 4.62765106042417e-05, "loss": 0.7164, "step": 1871 }, { "epoch": 2.39616, "grad_norm": 0.6625071167945862, "learning_rate": 4.6274509803921576e-05, "loss": 0.7179, "step": 1872 }, { "epoch": 2.39744, "grad_norm": 0.6794329881668091, "learning_rate": 4.627250900360144e-05, "loss": 0.7199, "step": 1873 }, { "epoch": 2.39872, "grad_norm": 0.630755603313446, "learning_rate": 4.627050820328131e-05, "loss": 0.7723, "step": 1874 }, { "epoch": 2.4, "grad_norm": 0.6161158680915833, "learning_rate": 4.6268507402961185e-05, "loss": 0.5934, "step": 1875 }, { "epoch": 2.40128, "grad_norm": 0.6303229928016663, "learning_rate": 4.6266506602641057e-05, "loss": 0.689, "step": 1876 }, { "epoch": 2.40256, "grad_norm": 0.6833181977272034, "learning_rate": 4.626450580232093e-05, "loss": 0.7554, "step": 1877 }, { "epoch": 2.4038399999999998, "grad_norm": 0.6531204581260681, "learning_rate": 4.62625050020008e-05, "loss": 0.6816, "step": 1878 }, { "epoch": 2.40512, "grad_norm": 0.7215322256088257, "learning_rate": 4.626050420168068e-05, "loss": 0.7196, "step": 1879 }, { "epoch": 2.4064, "grad_norm": 0.6577185988426208, "learning_rate": 4.625850340136055e-05, "loss": 0.6797, "step": 1880 }, { "epoch": 2.40768, "grad_norm": 0.6590116620063782, "learning_rate": 4.6256502601040416e-05, "loss": 0.6712, "step": 1881 }, { "epoch": 2.40896, "grad_norm": 0.715766191482544, "learning_rate": 4.625450180072029e-05, "loss": 0.7324, "step": 1882 }, { "epoch": 2.41024, "grad_norm": 0.6765993237495422, "learning_rate": 4.625250100040016e-05, "loss": 0.726, "step": 1883 }, { "epoch": 2.41152, "grad_norm": 0.6403140425682068, "learning_rate": 4.625050020008003e-05, "loss": 0.6877, "step": 1884 }, { "epoch": 2.4128, "grad_norm": 0.6255505681037903, "learning_rate": 4.62484993997599e-05, "loss": 0.6142, "step": 1885 }, { "epoch": 2.4140800000000002, "grad_norm": 0.6832484006881714, "learning_rate": 4.624649859943978e-05, "loss": 0.6964, "step": 1886 }, { "epoch": 2.41536, "grad_norm": 0.6387737989425659, "learning_rate": 4.6244497799119654e-05, "loss": 0.7023, "step": 1887 }, { "epoch": 2.41664, "grad_norm": 0.6823368072509766, "learning_rate": 4.6242496998799526e-05, "loss": 0.7162, "step": 1888 }, { "epoch": 2.41792, "grad_norm": 0.6266238689422607, "learning_rate": 4.624049619847939e-05, "loss": 0.6378, "step": 1889 }, { "epoch": 2.4192, "grad_norm": 0.705518901348114, "learning_rate": 4.623849539815926e-05, "loss": 0.7405, "step": 1890 }, { "epoch": 2.42048, "grad_norm": 0.6303145885467529, "learning_rate": 4.6236494597839134e-05, "loss": 0.6536, "step": 1891 }, { "epoch": 2.42176, "grad_norm": 0.6812713146209717, "learning_rate": 4.6234493797519006e-05, "loss": 0.6988, "step": 1892 }, { "epoch": 2.42304, "grad_norm": 0.6497737169265747, "learning_rate": 4.6232492997198885e-05, "loss": 0.6926, "step": 1893 }, { "epoch": 2.42432, "grad_norm": 0.669630765914917, "learning_rate": 4.623049219687876e-05, "loss": 0.7162, "step": 1894 }, { "epoch": 2.4256, "grad_norm": 0.6586965918540955, "learning_rate": 4.622849139655863e-05, "loss": 0.6695, "step": 1895 }, { "epoch": 2.42688, "grad_norm": 0.6347070336341858, "learning_rate": 4.62264905962385e-05, "loss": 0.6818, "step": 1896 }, { "epoch": 2.42816, "grad_norm": 0.6584689617156982, "learning_rate": 4.6224489795918366e-05, "loss": 0.733, "step": 1897 }, { "epoch": 2.42944, "grad_norm": 0.6446535587310791, "learning_rate": 4.622248899559824e-05, "loss": 0.6938, "step": 1898 }, { "epoch": 2.43072, "grad_norm": 0.6775670051574707, "learning_rate": 4.622048819527811e-05, "loss": 0.7274, "step": 1899 }, { "epoch": 2.432, "grad_norm": 0.636137068271637, "learning_rate": 4.621848739495799e-05, "loss": 0.6261, "step": 1900 }, { "epoch": 2.43328, "grad_norm": 0.6404497623443604, "learning_rate": 4.621648659463786e-05, "loss": 0.6962, "step": 1901 }, { "epoch": 2.43456, "grad_norm": 0.661217451095581, "learning_rate": 4.621448579431773e-05, "loss": 0.7293, "step": 1902 }, { "epoch": 2.43584, "grad_norm": 0.6509891748428345, "learning_rate": 4.6212484993997603e-05, "loss": 0.7254, "step": 1903 }, { "epoch": 2.43712, "grad_norm": 0.644158661365509, "learning_rate": 4.6210484193677475e-05, "loss": 0.6958, "step": 1904 }, { "epoch": 2.4384, "grad_norm": 0.6828984022140503, "learning_rate": 4.620848339335734e-05, "loss": 0.6665, "step": 1905 }, { "epoch": 2.43968, "grad_norm": 0.6128406524658203, "learning_rate": 4.620648259303721e-05, "loss": 0.6624, "step": 1906 }, { "epoch": 2.44096, "grad_norm": 0.7112566232681274, "learning_rate": 4.620448179271709e-05, "loss": 0.7079, "step": 1907 }, { "epoch": 2.44224, "grad_norm": 0.6818531155586243, "learning_rate": 4.620248099239696e-05, "loss": 0.7034, "step": 1908 }, { "epoch": 2.44352, "grad_norm": 0.6345267295837402, "learning_rate": 4.6200480192076835e-05, "loss": 0.6841, "step": 1909 }, { "epoch": 2.4448, "grad_norm": 0.7214930057525635, "learning_rate": 4.6198479391756706e-05, "loss": 0.7026, "step": 1910 }, { "epoch": 2.44608, "grad_norm": 0.6622048616409302, "learning_rate": 4.619647859143658e-05, "loss": 0.6964, "step": 1911 }, { "epoch": 2.4473599999999998, "grad_norm": 0.6323049664497375, "learning_rate": 4.619447779111645e-05, "loss": 0.7194, "step": 1912 }, { "epoch": 2.44864, "grad_norm": 0.6646015644073486, "learning_rate": 4.6192476990796315e-05, "loss": 0.6881, "step": 1913 }, { "epoch": 2.44992, "grad_norm": 0.6655915975570679, "learning_rate": 4.6190476190476194e-05, "loss": 0.6974, "step": 1914 }, { "epoch": 2.4512, "grad_norm": 0.6498141288757324, "learning_rate": 4.6188475390156066e-05, "loss": 0.6626, "step": 1915 }, { "epoch": 2.45248, "grad_norm": 0.6257582306861877, "learning_rate": 4.618647458983594e-05, "loss": 0.6945, "step": 1916 }, { "epoch": 2.45376, "grad_norm": 0.6620806455612183, "learning_rate": 4.618447378951581e-05, "loss": 0.6976, "step": 1917 }, { "epoch": 2.45504, "grad_norm": 0.6166614890098572, "learning_rate": 4.618247298919568e-05, "loss": 0.7004, "step": 1918 }, { "epoch": 2.45632, "grad_norm": 0.6367224454879761, "learning_rate": 4.618047218887555e-05, "loss": 0.7098, "step": 1919 }, { "epoch": 2.4576000000000002, "grad_norm": 0.6246047019958496, "learning_rate": 4.6178471388555425e-05, "loss": 0.6933, "step": 1920 }, { "epoch": 2.45888, "grad_norm": 0.6195082664489746, "learning_rate": 4.61764705882353e-05, "loss": 0.6455, "step": 1921 }, { "epoch": 2.46016, "grad_norm": 0.6568025350570679, "learning_rate": 4.617446978791517e-05, "loss": 0.7072, "step": 1922 }, { "epoch": 2.46144, "grad_norm": 0.6700795292854309, "learning_rate": 4.617246898759504e-05, "loss": 0.7353, "step": 1923 }, { "epoch": 2.46272, "grad_norm": 0.6941995620727539, "learning_rate": 4.617046818727491e-05, "loss": 0.7014, "step": 1924 }, { "epoch": 2.464, "grad_norm": 0.6903062462806702, "learning_rate": 4.6168467386954784e-05, "loss": 0.7458, "step": 1925 }, { "epoch": 2.46528, "grad_norm": 0.6764419078826904, "learning_rate": 4.6166466586634656e-05, "loss": 0.6934, "step": 1926 }, { "epoch": 2.46656, "grad_norm": 0.6628318428993225, "learning_rate": 4.616446578631453e-05, "loss": 0.6735, "step": 1927 }, { "epoch": 2.46784, "grad_norm": 0.6923201084136963, "learning_rate": 4.61624649859944e-05, "loss": 0.7516, "step": 1928 }, { "epoch": 2.46912, "grad_norm": 0.6832184791564941, "learning_rate": 4.616046418567427e-05, "loss": 0.7106, "step": 1929 }, { "epoch": 2.4704, "grad_norm": 0.6620609164237976, "learning_rate": 4.6158463385354144e-05, "loss": 0.6961, "step": 1930 }, { "epoch": 2.47168, "grad_norm": 0.7005398273468018, "learning_rate": 4.6156462585034015e-05, "loss": 0.7223, "step": 1931 }, { "epoch": 2.47296, "grad_norm": 0.6928220987319946, "learning_rate": 4.615446178471389e-05, "loss": 0.7758, "step": 1932 }, { "epoch": 2.47424, "grad_norm": 0.6193888783454895, "learning_rate": 4.615246098439376e-05, "loss": 0.6345, "step": 1933 }, { "epoch": 2.47552, "grad_norm": 0.6694657802581787, "learning_rate": 4.615046018407363e-05, "loss": 0.6957, "step": 1934 }, { "epoch": 2.4768, "grad_norm": 0.6846767663955688, "learning_rate": 4.61484593837535e-05, "loss": 0.6667, "step": 1935 }, { "epoch": 2.47808, "grad_norm": 0.6446174383163452, "learning_rate": 4.6146458583433375e-05, "loss": 0.7216, "step": 1936 }, { "epoch": 2.47936, "grad_norm": 0.6601637601852417, "learning_rate": 4.6144457783113247e-05, "loss": 0.7262, "step": 1937 }, { "epoch": 2.48064, "grad_norm": 0.6476382613182068, "learning_rate": 4.614245698279312e-05, "loss": 0.7142, "step": 1938 }, { "epoch": 2.48192, "grad_norm": 0.6557601094245911, "learning_rate": 4.614045618247299e-05, "loss": 0.7386, "step": 1939 }, { "epoch": 2.4832, "grad_norm": 0.6669868230819702, "learning_rate": 4.613845538215286e-05, "loss": 0.7643, "step": 1940 }, { "epoch": 2.48448, "grad_norm": 0.6425213813781738, "learning_rate": 4.6136454581832734e-05, "loss": 0.643, "step": 1941 }, { "epoch": 2.48576, "grad_norm": 0.6689165234565735, "learning_rate": 4.613445378151261e-05, "loss": 0.7025, "step": 1942 }, { "epoch": 2.48704, "grad_norm": 0.6275350451469421, "learning_rate": 4.613245298119248e-05, "loss": 0.6482, "step": 1943 }, { "epoch": 2.48832, "grad_norm": 0.6056965589523315, "learning_rate": 4.613045218087235e-05, "loss": 0.6807, "step": 1944 }, { "epoch": 2.4896, "grad_norm": 0.6282046437263489, "learning_rate": 4.612845138055222e-05, "loss": 0.6564, "step": 1945 }, { "epoch": 2.4908799999999998, "grad_norm": 0.640059769153595, "learning_rate": 4.612645058023209e-05, "loss": 0.6873, "step": 1946 }, { "epoch": 2.49216, "grad_norm": 0.6487705707550049, "learning_rate": 4.6124449779911965e-05, "loss": 0.6516, "step": 1947 }, { "epoch": 2.49344, "grad_norm": 0.6214262843132019, "learning_rate": 4.612244897959184e-05, "loss": 0.699, "step": 1948 }, { "epoch": 2.49472, "grad_norm": 0.6714819073677063, "learning_rate": 4.6120448179271716e-05, "loss": 0.6946, "step": 1949 }, { "epoch": 2.496, "grad_norm": 0.6522096991539001, "learning_rate": 4.611844737895159e-05, "loss": 0.7193, "step": 1950 }, { "epoch": 2.49728, "grad_norm": 0.6565654277801514, "learning_rate": 4.611644657863145e-05, "loss": 0.7604, "step": 1951 }, { "epoch": 2.49856, "grad_norm": 0.602493166923523, "learning_rate": 4.6114445778311324e-05, "loss": 0.6623, "step": 1952 }, { "epoch": 2.49984, "grad_norm": 0.6453181505203247, "learning_rate": 4.6112444977991196e-05, "loss": 0.6349, "step": 1953 }, { "epoch": 2.5011200000000002, "grad_norm": 0.6643373966217041, "learning_rate": 4.611044417767107e-05, "loss": 0.7352, "step": 1954 }, { "epoch": 2.5023999999999997, "grad_norm": 0.6704357862472534, "learning_rate": 4.610844337735094e-05, "loss": 0.744, "step": 1955 }, { "epoch": 2.50368, "grad_norm": 0.6634871363639832, "learning_rate": 4.610644257703082e-05, "loss": 0.6748, "step": 1956 }, { "epoch": 2.50496, "grad_norm": 0.6585037708282471, "learning_rate": 4.610444177671069e-05, "loss": 0.6893, "step": 1957 }, { "epoch": 2.50624, "grad_norm": 0.6182425022125244, "learning_rate": 4.610244097639056e-05, "loss": 0.7226, "step": 1958 }, { "epoch": 2.50752, "grad_norm": 0.6366656422615051, "learning_rate": 4.610044017607043e-05, "loss": 0.6535, "step": 1959 }, { "epoch": 2.5088, "grad_norm": 0.6848028302192688, "learning_rate": 4.60984393757503e-05, "loss": 0.7085, "step": 1960 }, { "epoch": 2.51008, "grad_norm": 0.6243534684181213, "learning_rate": 4.609643857543017e-05, "loss": 0.6508, "step": 1961 }, { "epoch": 2.51136, "grad_norm": 0.6199184656143188, "learning_rate": 4.609443777511004e-05, "loss": 0.6997, "step": 1962 }, { "epoch": 2.51264, "grad_norm": 0.6194076538085938, "learning_rate": 4.6092436974789915e-05, "loss": 0.6587, "step": 1963 }, { "epoch": 2.51392, "grad_norm": 0.6427433490753174, "learning_rate": 4.6090436174469793e-05, "loss": 0.6716, "step": 1964 }, { "epoch": 2.5152, "grad_norm": 0.6448578238487244, "learning_rate": 4.6088435374149665e-05, "loss": 0.7232, "step": 1965 }, { "epoch": 2.51648, "grad_norm": 0.6704204678535461, "learning_rate": 4.608643457382954e-05, "loss": 0.7486, "step": 1966 }, { "epoch": 2.51776, "grad_norm": 0.6715102195739746, "learning_rate": 4.60844337735094e-05, "loss": 0.6453, "step": 1967 }, { "epoch": 2.51904, "grad_norm": 0.7025008201599121, "learning_rate": 4.6082432973189274e-05, "loss": 0.7043, "step": 1968 }, { "epoch": 2.52032, "grad_norm": 0.689388632774353, "learning_rate": 4.6080432172869146e-05, "loss": 0.686, "step": 1969 }, { "epoch": 2.5216, "grad_norm": 0.6979616284370422, "learning_rate": 4.607843137254902e-05, "loss": 0.7104, "step": 1970 }, { "epoch": 2.52288, "grad_norm": 0.6773265600204468, "learning_rate": 4.6076430572228896e-05, "loss": 0.7321, "step": 1971 }, { "epoch": 2.52416, "grad_norm": 0.6521515250205994, "learning_rate": 4.607442977190877e-05, "loss": 0.6446, "step": 1972 }, { "epoch": 2.52544, "grad_norm": 0.6768288612365723, "learning_rate": 4.607242897158864e-05, "loss": 0.672, "step": 1973 }, { "epoch": 2.52672, "grad_norm": 0.6806971430778503, "learning_rate": 4.607042817126851e-05, "loss": 0.7247, "step": 1974 }, { "epoch": 2.528, "grad_norm": 0.6461851596832275, "learning_rate": 4.606842737094838e-05, "loss": 0.6563, "step": 1975 }, { "epoch": 2.52928, "grad_norm": 0.6352160573005676, "learning_rate": 4.606642657062825e-05, "loss": 0.6648, "step": 1976 }, { "epoch": 2.53056, "grad_norm": 0.6496264338493347, "learning_rate": 4.606442577030812e-05, "loss": 0.7253, "step": 1977 }, { "epoch": 2.53184, "grad_norm": 0.6366921067237854, "learning_rate": 4.6062424969988e-05, "loss": 0.6862, "step": 1978 }, { "epoch": 2.5331200000000003, "grad_norm": 0.6239694952964783, "learning_rate": 4.606042416966787e-05, "loss": 0.6554, "step": 1979 }, { "epoch": 2.5343999999999998, "grad_norm": 0.6517849564552307, "learning_rate": 4.605842336934774e-05, "loss": 0.7166, "step": 1980 }, { "epoch": 2.53568, "grad_norm": 0.6458687782287598, "learning_rate": 4.6056422569027615e-05, "loss": 0.7159, "step": 1981 }, { "epoch": 2.53696, "grad_norm": 0.6582731008529663, "learning_rate": 4.605442176870749e-05, "loss": 0.6434, "step": 1982 }, { "epoch": 2.53824, "grad_norm": 0.6515088081359863, "learning_rate": 4.605242096838735e-05, "loss": 0.7313, "step": 1983 }, { "epoch": 2.53952, "grad_norm": 0.65621417760849, "learning_rate": 4.6050420168067224e-05, "loss": 0.657, "step": 1984 }, { "epoch": 2.5408, "grad_norm": 0.6879109740257263, "learning_rate": 4.60484193677471e-05, "loss": 0.6906, "step": 1985 }, { "epoch": 2.54208, "grad_norm": 0.6400864124298096, "learning_rate": 4.6046418567426974e-05, "loss": 0.703, "step": 1986 }, { "epoch": 2.54336, "grad_norm": 0.6810969114303589, "learning_rate": 4.6044417767106846e-05, "loss": 0.712, "step": 1987 }, { "epoch": 2.5446400000000002, "grad_norm": 0.6492802500724792, "learning_rate": 4.604241696678672e-05, "loss": 0.6362, "step": 1988 }, { "epoch": 2.5459199999999997, "grad_norm": 0.6736019253730774, "learning_rate": 4.604041616646659e-05, "loss": 0.7507, "step": 1989 }, { "epoch": 2.5472, "grad_norm": 0.6537976861000061, "learning_rate": 4.603841536614646e-05, "loss": 0.6868, "step": 1990 }, { "epoch": 2.54848, "grad_norm": 0.6979777812957764, "learning_rate": 4.603641456582633e-05, "loss": 0.7275, "step": 1991 }, { "epoch": 2.54976, "grad_norm": 0.6660913825035095, "learning_rate": 4.6034413765506205e-05, "loss": 0.6566, "step": 1992 }, { "epoch": 2.55104, "grad_norm": 0.6853949427604675, "learning_rate": 4.603241296518608e-05, "loss": 0.7594, "step": 1993 }, { "epoch": 2.55232, "grad_norm": 0.6438176035881042, "learning_rate": 4.603041216486595e-05, "loss": 0.6594, "step": 1994 }, { "epoch": 2.5536, "grad_norm": 0.6791808009147644, "learning_rate": 4.602841136454582e-05, "loss": 0.6871, "step": 1995 }, { "epoch": 2.55488, "grad_norm": 0.6433961391448975, "learning_rate": 4.602641056422569e-05, "loss": 0.6616, "step": 1996 }, { "epoch": 2.55616, "grad_norm": 0.6358078122138977, "learning_rate": 4.6024409763905565e-05, "loss": 0.6676, "step": 1997 }, { "epoch": 2.55744, "grad_norm": 0.6184052228927612, "learning_rate": 4.6022408963585437e-05, "loss": 0.7061, "step": 1998 }, { "epoch": 2.55872, "grad_norm": 0.6975054740905762, "learning_rate": 4.602040816326531e-05, "loss": 0.7147, "step": 1999 }, { "epoch": 2.56, "grad_norm": 0.7169183492660522, "learning_rate": 4.601840736294518e-05, "loss": 0.7385, "step": 2000 }, { "epoch": 2.56128, "grad_norm": 0.6839573383331299, "learning_rate": 4.601640656262505e-05, "loss": 0.7901, "step": 2001 }, { "epoch": 2.56256, "grad_norm": 0.6511650681495667, "learning_rate": 4.6014405762304924e-05, "loss": 0.7281, "step": 2002 }, { "epoch": 2.56384, "grad_norm": 0.6317888498306274, "learning_rate": 4.6012404961984796e-05, "loss": 0.6668, "step": 2003 }, { "epoch": 2.56512, "grad_norm": 0.6695280075073242, "learning_rate": 4.601040416166467e-05, "loss": 0.6941, "step": 2004 }, { "epoch": 2.5664, "grad_norm": 0.6781874895095825, "learning_rate": 4.600840336134454e-05, "loss": 0.7154, "step": 2005 }, { "epoch": 2.56768, "grad_norm": 0.6626363396644592, "learning_rate": 4.600640256102441e-05, "loss": 0.6992, "step": 2006 }, { "epoch": 2.56896, "grad_norm": 0.702985405921936, "learning_rate": 4.600440176070428e-05, "loss": 0.6923, "step": 2007 }, { "epoch": 2.57024, "grad_norm": 0.6602315306663513, "learning_rate": 4.6002400960384155e-05, "loss": 0.6645, "step": 2008 }, { "epoch": 2.57152, "grad_norm": 0.6675458550453186, "learning_rate": 4.600040016006403e-05, "loss": 0.6893, "step": 2009 }, { "epoch": 2.5728, "grad_norm": 0.7174723744392395, "learning_rate": 4.59983993597439e-05, "loss": 0.7271, "step": 2010 }, { "epoch": 2.57408, "grad_norm": 0.7206019163131714, "learning_rate": 4.599639855942377e-05, "loss": 0.711, "step": 2011 }, { "epoch": 2.57536, "grad_norm": 0.6554808616638184, "learning_rate": 4.599439775910364e-05, "loss": 0.6322, "step": 2012 }, { "epoch": 2.5766400000000003, "grad_norm": 0.6562361717224121, "learning_rate": 4.599239695878352e-05, "loss": 0.7233, "step": 2013 }, { "epoch": 2.5779199999999998, "grad_norm": 0.6553013920783997, "learning_rate": 4.5990396158463386e-05, "loss": 0.6693, "step": 2014 }, { "epoch": 2.5792, "grad_norm": 0.651155948638916, "learning_rate": 4.598839535814326e-05, "loss": 0.7005, "step": 2015 }, { "epoch": 2.58048, "grad_norm": 0.6626810431480408, "learning_rate": 4.598639455782313e-05, "loss": 0.6962, "step": 2016 }, { "epoch": 2.58176, "grad_norm": 0.6690220832824707, "learning_rate": 4.5984393757503e-05, "loss": 0.6897, "step": 2017 }, { "epoch": 2.58304, "grad_norm": 0.6718343496322632, "learning_rate": 4.5982392957182874e-05, "loss": 0.7071, "step": 2018 }, { "epoch": 2.58432, "grad_norm": 0.6581987142562866, "learning_rate": 4.5980392156862746e-05, "loss": 0.682, "step": 2019 }, { "epoch": 2.5856, "grad_norm": 0.6765472888946533, "learning_rate": 4.5978391356542624e-05, "loss": 0.7696, "step": 2020 }, { "epoch": 2.58688, "grad_norm": 0.630276083946228, "learning_rate": 4.5976390556222496e-05, "loss": 0.6681, "step": 2021 }, { "epoch": 2.5881600000000002, "grad_norm": 0.6415339112281799, "learning_rate": 4.597438975590236e-05, "loss": 0.6739, "step": 2022 }, { "epoch": 2.5894399999999997, "grad_norm": 0.6534903645515442, "learning_rate": 4.597238895558223e-05, "loss": 0.6743, "step": 2023 }, { "epoch": 2.59072, "grad_norm": 0.704707145690918, "learning_rate": 4.5970388155262105e-05, "loss": 0.749, "step": 2024 }, { "epoch": 2.592, "grad_norm": 0.6433389782905579, "learning_rate": 4.596838735494198e-05, "loss": 0.6899, "step": 2025 }, { "epoch": 2.59328, "grad_norm": 0.6151252388954163, "learning_rate": 4.596638655462185e-05, "loss": 0.6721, "step": 2026 }, { "epoch": 2.59456, "grad_norm": 0.6191518902778625, "learning_rate": 4.596438575430173e-05, "loss": 0.7129, "step": 2027 }, { "epoch": 2.59584, "grad_norm": 0.6529395580291748, "learning_rate": 4.59623849539816e-05, "loss": 0.6866, "step": 2028 }, { "epoch": 2.59712, "grad_norm": 0.6465093493461609, "learning_rate": 4.596038415366147e-05, "loss": 0.6547, "step": 2029 }, { "epoch": 2.5984, "grad_norm": 0.6575112342834473, "learning_rate": 4.5958383353341336e-05, "loss": 0.6982, "step": 2030 }, { "epoch": 2.59968, "grad_norm": 0.6782650947570801, "learning_rate": 4.595638255302121e-05, "loss": 0.7034, "step": 2031 }, { "epoch": 2.60096, "grad_norm": 0.7053045034408569, "learning_rate": 4.595438175270108e-05, "loss": 0.7387, "step": 2032 }, { "epoch": 2.60224, "grad_norm": 0.6682645678520203, "learning_rate": 4.595238095238095e-05, "loss": 0.6812, "step": 2033 }, { "epoch": 2.60352, "grad_norm": 0.6382338404655457, "learning_rate": 4.595038015206083e-05, "loss": 0.6417, "step": 2034 }, { "epoch": 2.6048, "grad_norm": 0.6588988900184631, "learning_rate": 4.59483793517407e-05, "loss": 0.7028, "step": 2035 }, { "epoch": 2.60608, "grad_norm": 0.6959824562072754, "learning_rate": 4.5946378551420574e-05, "loss": 0.7239, "step": 2036 }, { "epoch": 2.60736, "grad_norm": 0.6393638253211975, "learning_rate": 4.5944377751100446e-05, "loss": 0.7437, "step": 2037 }, { "epoch": 2.60864, "grad_norm": 0.6316011548042297, "learning_rate": 4.594237695078031e-05, "loss": 0.7059, "step": 2038 }, { "epoch": 2.60992, "grad_norm": 0.6398905515670776, "learning_rate": 4.594037615046018e-05, "loss": 0.692, "step": 2039 }, { "epoch": 2.6112, "grad_norm": 0.6583664417266846, "learning_rate": 4.5938375350140055e-05, "loss": 0.6969, "step": 2040 }, { "epoch": 2.61248, "grad_norm": 0.6351532340049744, "learning_rate": 4.593637454981993e-05, "loss": 0.6436, "step": 2041 }, { "epoch": 2.61376, "grad_norm": 0.6414986252784729, "learning_rate": 4.5934373749499805e-05, "loss": 0.6872, "step": 2042 }, { "epoch": 2.61504, "grad_norm": 0.632988452911377, "learning_rate": 4.593237294917968e-05, "loss": 0.7222, "step": 2043 }, { "epoch": 2.61632, "grad_norm": 0.6204145550727844, "learning_rate": 4.593037214885955e-05, "loss": 0.653, "step": 2044 }, { "epoch": 2.6176, "grad_norm": 0.6726123094558716, "learning_rate": 4.592837134853942e-05, "loss": 0.7185, "step": 2045 }, { "epoch": 2.61888, "grad_norm": 0.6624434590339661, "learning_rate": 4.5926370548219286e-05, "loss": 0.6756, "step": 2046 }, { "epoch": 2.6201600000000003, "grad_norm": 0.66437166929245, "learning_rate": 4.592436974789916e-05, "loss": 0.6542, "step": 2047 }, { "epoch": 2.6214399999999998, "grad_norm": 0.6689863801002502, "learning_rate": 4.5922368947579036e-05, "loss": 0.6601, "step": 2048 }, { "epoch": 2.62272, "grad_norm": 0.6922819018363953, "learning_rate": 4.592036814725891e-05, "loss": 0.7033, "step": 2049 }, { "epoch": 2.624, "grad_norm": 0.6346952319145203, "learning_rate": 4.591836734693878e-05, "loss": 0.6276, "step": 2050 }, { "epoch": 2.62528, "grad_norm": 0.6946719884872437, "learning_rate": 4.591636654661865e-05, "loss": 0.6687, "step": 2051 }, { "epoch": 2.62656, "grad_norm": 0.6633927226066589, "learning_rate": 4.5914365746298524e-05, "loss": 0.6556, "step": 2052 }, { "epoch": 2.62784, "grad_norm": 0.6315405964851379, "learning_rate": 4.5912364945978395e-05, "loss": 0.6286, "step": 2053 }, { "epoch": 2.62912, "grad_norm": 0.6600960493087769, "learning_rate": 4.591036414565826e-05, "loss": 0.7148, "step": 2054 }, { "epoch": 2.6304, "grad_norm": 0.6884539723396301, "learning_rate": 4.590836334533814e-05, "loss": 0.7345, "step": 2055 }, { "epoch": 2.6316800000000002, "grad_norm": 0.6405014395713806, "learning_rate": 4.590636254501801e-05, "loss": 0.6786, "step": 2056 }, { "epoch": 2.6329599999999997, "grad_norm": 0.6440674662590027, "learning_rate": 4.590436174469788e-05, "loss": 0.7232, "step": 2057 }, { "epoch": 2.63424, "grad_norm": 0.6079061627388, "learning_rate": 4.5902360944377755e-05, "loss": 0.6926, "step": 2058 }, { "epoch": 2.63552, "grad_norm": 0.6500112414360046, "learning_rate": 4.590036014405763e-05, "loss": 0.6942, "step": 2059 }, { "epoch": 2.6368, "grad_norm": 0.6738446950912476, "learning_rate": 4.58983593437375e-05, "loss": 0.7175, "step": 2060 }, { "epoch": 2.63808, "grad_norm": 0.6589712500572205, "learning_rate": 4.589635854341737e-05, "loss": 0.6648, "step": 2061 }, { "epoch": 2.63936, "grad_norm": 0.6929957866668701, "learning_rate": 4.589435774309724e-05, "loss": 0.7793, "step": 2062 }, { "epoch": 2.64064, "grad_norm": 0.6504958271980286, "learning_rate": 4.5892356942777114e-05, "loss": 0.6443, "step": 2063 }, { "epoch": 2.64192, "grad_norm": 0.6380278468132019, "learning_rate": 4.5890356142456986e-05, "loss": 0.7045, "step": 2064 }, { "epoch": 2.6432, "grad_norm": 0.6529109477996826, "learning_rate": 4.588835534213686e-05, "loss": 0.6324, "step": 2065 }, { "epoch": 2.64448, "grad_norm": 0.647123396396637, "learning_rate": 4.588635454181673e-05, "loss": 0.6828, "step": 2066 }, { "epoch": 2.64576, "grad_norm": 0.6754167079925537, "learning_rate": 4.58843537414966e-05, "loss": 0.6967, "step": 2067 }, { "epoch": 2.64704, "grad_norm": 0.7286065220832825, "learning_rate": 4.588235294117647e-05, "loss": 0.7515, "step": 2068 }, { "epoch": 2.64832, "grad_norm": 0.6146422624588013, "learning_rate": 4.5880352140856345e-05, "loss": 0.6442, "step": 2069 }, { "epoch": 2.6496, "grad_norm": 0.6109498143196106, "learning_rate": 4.587835134053622e-05, "loss": 0.6513, "step": 2070 }, { "epoch": 2.65088, "grad_norm": 0.6009126305580139, "learning_rate": 4.587635054021609e-05, "loss": 0.6343, "step": 2071 }, { "epoch": 2.65216, "grad_norm": 0.6297327280044556, "learning_rate": 4.587434973989596e-05, "loss": 0.6925, "step": 2072 }, { "epoch": 2.65344, "grad_norm": 0.6399157047271729, "learning_rate": 4.587234893957583e-05, "loss": 0.6359, "step": 2073 }, { "epoch": 2.65472, "grad_norm": 0.626350998878479, "learning_rate": 4.5870348139255704e-05, "loss": 0.6937, "step": 2074 }, { "epoch": 2.656, "grad_norm": 0.6313692927360535, "learning_rate": 4.5868347338935576e-05, "loss": 0.644, "step": 2075 }, { "epoch": 2.65728, "grad_norm": 0.671847403049469, "learning_rate": 4.586634653861545e-05, "loss": 0.7126, "step": 2076 }, { "epoch": 2.65856, "grad_norm": 0.688568651676178, "learning_rate": 4.586434573829532e-05, "loss": 0.6857, "step": 2077 }, { "epoch": 2.65984, "grad_norm": 0.6881417036056519, "learning_rate": 4.586234493797519e-05, "loss": 0.7783, "step": 2078 }, { "epoch": 2.66112, "grad_norm": 0.648615300655365, "learning_rate": 4.5860344137655064e-05, "loss": 0.6637, "step": 2079 }, { "epoch": 2.6624, "grad_norm": 0.6464314460754395, "learning_rate": 4.5858343337334936e-05, "loss": 0.6959, "step": 2080 }, { "epoch": 2.6636800000000003, "grad_norm": 0.6369978189468384, "learning_rate": 4.585634253701481e-05, "loss": 0.6529, "step": 2081 }, { "epoch": 2.6649599999999998, "grad_norm": 0.6303538084030151, "learning_rate": 4.585434173669468e-05, "loss": 0.6863, "step": 2082 }, { "epoch": 2.66624, "grad_norm": 0.6283568143844604, "learning_rate": 4.585234093637455e-05, "loss": 0.6879, "step": 2083 }, { "epoch": 2.66752, "grad_norm": 0.6396946310997009, "learning_rate": 4.585034013605442e-05, "loss": 0.6658, "step": 2084 }, { "epoch": 2.6688, "grad_norm": 0.6821319460868835, "learning_rate": 4.5848339335734295e-05, "loss": 0.7968, "step": 2085 }, { "epoch": 2.67008, "grad_norm": 0.6359503865242004, "learning_rate": 4.584633853541417e-05, "loss": 0.6889, "step": 2086 }, { "epoch": 2.67136, "grad_norm": 0.6165139079093933, "learning_rate": 4.584433773509404e-05, "loss": 0.641, "step": 2087 }, { "epoch": 2.67264, "grad_norm": 0.6269519329071045, "learning_rate": 4.584233693477391e-05, "loss": 0.6457, "step": 2088 }, { "epoch": 2.67392, "grad_norm": 0.6549934148788452, "learning_rate": 4.584033613445378e-05, "loss": 0.7314, "step": 2089 }, { "epoch": 2.6752000000000002, "grad_norm": 0.6697399616241455, "learning_rate": 4.5838335334133654e-05, "loss": 0.701, "step": 2090 }, { "epoch": 2.6764799999999997, "grad_norm": 0.6521748304367065, "learning_rate": 4.583633453381353e-05, "loss": 0.6884, "step": 2091 }, { "epoch": 2.67776, "grad_norm": 0.6441870331764221, "learning_rate": 4.58343337334934e-05, "loss": 0.6971, "step": 2092 }, { "epoch": 2.67904, "grad_norm": 0.679710328578949, "learning_rate": 4.583233293317327e-05, "loss": 0.7295, "step": 2093 }, { "epoch": 2.68032, "grad_norm": 0.6788901686668396, "learning_rate": 4.583033213285314e-05, "loss": 0.712, "step": 2094 }, { "epoch": 2.6816, "grad_norm": 0.6429007053375244, "learning_rate": 4.5828331332533013e-05, "loss": 0.7209, "step": 2095 }, { "epoch": 2.68288, "grad_norm": 0.6366958022117615, "learning_rate": 4.5826330532212885e-05, "loss": 0.6718, "step": 2096 }, { "epoch": 2.68416, "grad_norm": 0.6946751475334167, "learning_rate": 4.582432973189276e-05, "loss": 0.7622, "step": 2097 }, { "epoch": 2.68544, "grad_norm": 0.6607045531272888, "learning_rate": 4.5822328931572636e-05, "loss": 0.697, "step": 2098 }, { "epoch": 2.68672, "grad_norm": 0.6583904027938843, "learning_rate": 4.582032813125251e-05, "loss": 0.7422, "step": 2099 }, { "epoch": 2.6879999999999997, "grad_norm": 0.6572896838188171, "learning_rate": 4.581832733093237e-05, "loss": 0.7195, "step": 2100 }, { "epoch": 2.68928, "grad_norm": 0.6300676465034485, "learning_rate": 4.5816326530612245e-05, "loss": 0.6429, "step": 2101 }, { "epoch": 2.69056, "grad_norm": 0.6641280651092529, "learning_rate": 4.5814325730292116e-05, "loss": 0.7224, "step": 2102 }, { "epoch": 2.69184, "grad_norm": 0.6447017788887024, "learning_rate": 4.581232492997199e-05, "loss": 0.7085, "step": 2103 }, { "epoch": 2.69312, "grad_norm": 0.683560848236084, "learning_rate": 4.581032412965186e-05, "loss": 0.7218, "step": 2104 }, { "epoch": 2.6944, "grad_norm": 0.6947901248931885, "learning_rate": 4.580832332933174e-05, "loss": 0.6343, "step": 2105 }, { "epoch": 2.69568, "grad_norm": 0.6173972487449646, "learning_rate": 4.580632252901161e-05, "loss": 0.7048, "step": 2106 }, { "epoch": 2.69696, "grad_norm": 0.6312255263328552, "learning_rate": 4.580432172869148e-05, "loss": 0.6644, "step": 2107 }, { "epoch": 2.69824, "grad_norm": 0.6200210452079773, "learning_rate": 4.580232092837135e-05, "loss": 0.688, "step": 2108 }, { "epoch": 2.69952, "grad_norm": 0.6290738582611084, "learning_rate": 4.580032012805122e-05, "loss": 0.6392, "step": 2109 }, { "epoch": 2.7008, "grad_norm": 0.6984908580780029, "learning_rate": 4.579831932773109e-05, "loss": 0.7356, "step": 2110 }, { "epoch": 2.70208, "grad_norm": 0.6521112322807312, "learning_rate": 4.579631852741096e-05, "loss": 0.6927, "step": 2111 }, { "epoch": 2.70336, "grad_norm": 0.622561514377594, "learning_rate": 4.579431772709084e-05, "loss": 0.6233, "step": 2112 }, { "epoch": 2.70464, "grad_norm": 0.6508548259735107, "learning_rate": 4.5792316926770714e-05, "loss": 0.678, "step": 2113 }, { "epoch": 2.70592, "grad_norm": 0.6437860131263733, "learning_rate": 4.5790316126450586e-05, "loss": 0.6495, "step": 2114 }, { "epoch": 2.7072000000000003, "grad_norm": 0.6867465376853943, "learning_rate": 4.578831532613046e-05, "loss": 0.6763, "step": 2115 }, { "epoch": 2.7084799999999998, "grad_norm": 0.6844937801361084, "learning_rate": 4.578631452581032e-05, "loss": 0.7097, "step": 2116 }, { "epoch": 2.70976, "grad_norm": 0.647807240486145, "learning_rate": 4.5784313725490194e-05, "loss": 0.7127, "step": 2117 }, { "epoch": 2.71104, "grad_norm": 0.6669386625289917, "learning_rate": 4.5782312925170066e-05, "loss": 0.6953, "step": 2118 }, { "epoch": 2.71232, "grad_norm": 0.6261999607086182, "learning_rate": 4.5780312124849945e-05, "loss": 0.623, "step": 2119 }, { "epoch": 2.7136, "grad_norm": 0.655799925327301, "learning_rate": 4.577831132452982e-05, "loss": 0.6649, "step": 2120 }, { "epoch": 2.71488, "grad_norm": 0.5952524542808533, "learning_rate": 4.577631052420969e-05, "loss": 0.6339, "step": 2121 }, { "epoch": 2.71616, "grad_norm": 0.6749306321144104, "learning_rate": 4.577430972388956e-05, "loss": 0.7158, "step": 2122 }, { "epoch": 2.71744, "grad_norm": 0.6410848498344421, "learning_rate": 4.577230892356943e-05, "loss": 0.6914, "step": 2123 }, { "epoch": 2.7187200000000002, "grad_norm": 0.63245689868927, "learning_rate": 4.57703081232493e-05, "loss": 0.7241, "step": 2124 }, { "epoch": 2.7199999999999998, "grad_norm": 0.6515913009643555, "learning_rate": 4.576830732292917e-05, "loss": 0.7299, "step": 2125 }, { "epoch": 2.72128, "grad_norm": 0.6468390226364136, "learning_rate": 4.576630652260905e-05, "loss": 0.6929, "step": 2126 }, { "epoch": 2.72256, "grad_norm": 0.6881574988365173, "learning_rate": 4.576430572228892e-05, "loss": 0.7325, "step": 2127 }, { "epoch": 2.72384, "grad_norm": 0.6494055986404419, "learning_rate": 4.576230492196879e-05, "loss": 0.6923, "step": 2128 }, { "epoch": 2.72512, "grad_norm": 0.6755759119987488, "learning_rate": 4.576030412164866e-05, "loss": 0.6909, "step": 2129 }, { "epoch": 2.7264, "grad_norm": 0.6174530982971191, "learning_rate": 4.5758303321328535e-05, "loss": 0.6562, "step": 2130 }, { "epoch": 2.72768, "grad_norm": 0.7030787467956543, "learning_rate": 4.575630252100841e-05, "loss": 0.6916, "step": 2131 }, { "epoch": 2.72896, "grad_norm": 0.6742822527885437, "learning_rate": 4.575430172068827e-05, "loss": 0.6501, "step": 2132 }, { "epoch": 2.7302400000000002, "grad_norm": 0.6416477560997009, "learning_rate": 4.575230092036815e-05, "loss": 0.7267, "step": 2133 }, { "epoch": 2.7315199999999997, "grad_norm": 0.664212703704834, "learning_rate": 4.575030012004802e-05, "loss": 0.7052, "step": 2134 }, { "epoch": 2.7328, "grad_norm": 0.6820107102394104, "learning_rate": 4.5748299319727895e-05, "loss": 0.7232, "step": 2135 }, { "epoch": 2.73408, "grad_norm": 0.6734192967414856, "learning_rate": 4.5746298519407766e-05, "loss": 0.7513, "step": 2136 }, { "epoch": 2.73536, "grad_norm": 0.6329904794692993, "learning_rate": 4.574429771908764e-05, "loss": 0.5988, "step": 2137 }, { "epoch": 2.73664, "grad_norm": 0.6895211338996887, "learning_rate": 4.574229691876751e-05, "loss": 0.7194, "step": 2138 }, { "epoch": 2.73792, "grad_norm": 0.6408979892730713, "learning_rate": 4.574029611844738e-05, "loss": 0.6905, "step": 2139 }, { "epoch": 2.7392, "grad_norm": 0.6711249947547913, "learning_rate": 4.5738295318127254e-05, "loss": 0.7265, "step": 2140 }, { "epoch": 2.74048, "grad_norm": 0.6658822894096375, "learning_rate": 4.5736294517807126e-05, "loss": 0.6549, "step": 2141 }, { "epoch": 2.74176, "grad_norm": 0.6828049421310425, "learning_rate": 4.5734293717487e-05, "loss": 0.6709, "step": 2142 }, { "epoch": 2.74304, "grad_norm": 0.6909437775611877, "learning_rate": 4.573229291716687e-05, "loss": 0.7724, "step": 2143 }, { "epoch": 2.74432, "grad_norm": 0.639430582523346, "learning_rate": 4.573029211684674e-05, "loss": 0.6728, "step": 2144 }, { "epoch": 2.7456, "grad_norm": 0.6498085856437683, "learning_rate": 4.572829131652661e-05, "loss": 0.7088, "step": 2145 }, { "epoch": 2.74688, "grad_norm": 0.6685836911201477, "learning_rate": 4.5726290516206485e-05, "loss": 0.6646, "step": 2146 }, { "epoch": 2.74816, "grad_norm": 0.6966959834098816, "learning_rate": 4.572428971588636e-05, "loss": 0.7059, "step": 2147 }, { "epoch": 2.74944, "grad_norm": 0.6679326891899109, "learning_rate": 4.572228891556623e-05, "loss": 0.6814, "step": 2148 }, { "epoch": 2.7507200000000003, "grad_norm": 0.675516664981842, "learning_rate": 4.57202881152461e-05, "loss": 0.8011, "step": 2149 }, { "epoch": 2.752, "grad_norm": 0.661491334438324, "learning_rate": 4.571828731492597e-05, "loss": 0.6971, "step": 2150 }, { "epoch": 2.75328, "grad_norm": 0.6797171831130981, "learning_rate": 4.5716286514605844e-05, "loss": 0.6434, "step": 2151 }, { "epoch": 2.75456, "grad_norm": 0.6701538562774658, "learning_rate": 4.5714285714285716e-05, "loss": 0.6716, "step": 2152 }, { "epoch": 2.75584, "grad_norm": 0.651821494102478, "learning_rate": 4.571228491396559e-05, "loss": 0.6881, "step": 2153 }, { "epoch": 2.75712, "grad_norm": 0.6361088156700134, "learning_rate": 4.571028411364546e-05, "loss": 0.6827, "step": 2154 }, { "epoch": 2.7584, "grad_norm": 0.6350401043891907, "learning_rate": 4.570828331332533e-05, "loss": 0.7337, "step": 2155 }, { "epoch": 2.75968, "grad_norm": 0.7136991024017334, "learning_rate": 4.5706282513005204e-05, "loss": 0.6458, "step": 2156 }, { "epoch": 2.76096, "grad_norm": 0.6868817210197449, "learning_rate": 4.5704281712685075e-05, "loss": 0.7136, "step": 2157 }, { "epoch": 2.7622400000000003, "grad_norm": 0.676253616809845, "learning_rate": 4.570228091236495e-05, "loss": 0.7317, "step": 2158 }, { "epoch": 2.7635199999999998, "grad_norm": 0.6780885457992554, "learning_rate": 4.570028011204482e-05, "loss": 0.7259, "step": 2159 }, { "epoch": 2.7648, "grad_norm": 0.6793497204780579, "learning_rate": 4.569827931172469e-05, "loss": 0.7271, "step": 2160 }, { "epoch": 2.76608, "grad_norm": 0.6723445653915405, "learning_rate": 4.569627851140457e-05, "loss": 0.6884, "step": 2161 }, { "epoch": 2.76736, "grad_norm": 0.634547233581543, "learning_rate": 4.5694277711084435e-05, "loss": 0.6513, "step": 2162 }, { "epoch": 2.76864, "grad_norm": 0.6849969625473022, "learning_rate": 4.5692276910764307e-05, "loss": 0.742, "step": 2163 }, { "epoch": 2.76992, "grad_norm": 0.678898274898529, "learning_rate": 4.569027611044418e-05, "loss": 0.6751, "step": 2164 }, { "epoch": 2.7712, "grad_norm": 0.6287670135498047, "learning_rate": 4.568827531012405e-05, "loss": 0.6337, "step": 2165 }, { "epoch": 2.77248, "grad_norm": 0.6324494481086731, "learning_rate": 4.568627450980392e-05, "loss": 0.6311, "step": 2166 }, { "epoch": 2.7737600000000002, "grad_norm": 0.6523123383522034, "learning_rate": 4.5684273709483794e-05, "loss": 0.6919, "step": 2167 }, { "epoch": 2.7750399999999997, "grad_norm": 0.6603489518165588, "learning_rate": 4.568227290916367e-05, "loss": 0.6948, "step": 2168 }, { "epoch": 2.77632, "grad_norm": 0.6603338122367859, "learning_rate": 4.5680272108843544e-05, "loss": 0.6322, "step": 2169 }, { "epoch": 2.7776, "grad_norm": 0.6867691874504089, "learning_rate": 4.567827130852341e-05, "loss": 0.6886, "step": 2170 }, { "epoch": 2.77888, "grad_norm": 0.6577850580215454, "learning_rate": 4.567627050820328e-05, "loss": 0.6591, "step": 2171 }, { "epoch": 2.78016, "grad_norm": 0.6284435987472534, "learning_rate": 4.567426970788315e-05, "loss": 0.6754, "step": 2172 }, { "epoch": 2.78144, "grad_norm": 0.6540303826332092, "learning_rate": 4.5672268907563025e-05, "loss": 0.6868, "step": 2173 }, { "epoch": 2.78272, "grad_norm": 0.6498759984970093, "learning_rate": 4.56702681072429e-05, "loss": 0.7164, "step": 2174 }, { "epoch": 2.784, "grad_norm": 0.686326265335083, "learning_rate": 4.5668267306922776e-05, "loss": 0.7155, "step": 2175 }, { "epoch": 2.78528, "grad_norm": 0.6437170505523682, "learning_rate": 4.566626650660265e-05, "loss": 0.6718, "step": 2176 }, { "epoch": 2.78656, "grad_norm": 0.6241775155067444, "learning_rate": 4.566426570628252e-05, "loss": 0.6818, "step": 2177 }, { "epoch": 2.78784, "grad_norm": 0.6604918837547302, "learning_rate": 4.5662264905962384e-05, "loss": 0.6968, "step": 2178 }, { "epoch": 2.78912, "grad_norm": 0.6711975932121277, "learning_rate": 4.5660264105642256e-05, "loss": 0.6325, "step": 2179 }, { "epoch": 2.7904, "grad_norm": 0.6874549984931946, "learning_rate": 4.565826330532213e-05, "loss": 0.7189, "step": 2180 }, { "epoch": 2.79168, "grad_norm": 0.6761820316314697, "learning_rate": 4.5656262505002e-05, "loss": 0.6852, "step": 2181 }, { "epoch": 2.79296, "grad_norm": 0.6783404350280762, "learning_rate": 4.565426170468188e-05, "loss": 0.6999, "step": 2182 }, { "epoch": 2.79424, "grad_norm": 0.692419171333313, "learning_rate": 4.565226090436175e-05, "loss": 0.6883, "step": 2183 }, { "epoch": 2.79552, "grad_norm": 0.6205313205718994, "learning_rate": 4.565026010404162e-05, "loss": 0.6789, "step": 2184 }, { "epoch": 2.7968, "grad_norm": 0.6563310027122498, "learning_rate": 4.5648259303721494e-05, "loss": 0.6559, "step": 2185 }, { "epoch": 2.79808, "grad_norm": 0.6110013127326965, "learning_rate": 4.564625850340136e-05, "loss": 0.6487, "step": 2186 }, { "epoch": 2.79936, "grad_norm": 0.6310566067695618, "learning_rate": 4.564425770308123e-05, "loss": 0.6614, "step": 2187 }, { "epoch": 2.80064, "grad_norm": 0.6590198278427124, "learning_rate": 4.56422569027611e-05, "loss": 0.6885, "step": 2188 }, { "epoch": 2.80192, "grad_norm": 0.6192741394042969, "learning_rate": 4.5640256102440975e-05, "loss": 0.668, "step": 2189 }, { "epoch": 2.8032, "grad_norm": 0.6905452013015747, "learning_rate": 4.5638255302120853e-05, "loss": 0.7039, "step": 2190 }, { "epoch": 2.80448, "grad_norm": 0.7045333385467529, "learning_rate": 4.5636254501800725e-05, "loss": 0.7192, "step": 2191 }, { "epoch": 2.8057600000000003, "grad_norm": 0.6958116888999939, "learning_rate": 4.56342537014806e-05, "loss": 0.6825, "step": 2192 }, { "epoch": 2.8070399999999998, "grad_norm": 0.6681202054023743, "learning_rate": 4.563225290116047e-05, "loss": 0.6615, "step": 2193 }, { "epoch": 2.80832, "grad_norm": 0.6813097596168518, "learning_rate": 4.5630252100840334e-05, "loss": 0.7027, "step": 2194 }, { "epoch": 2.8096, "grad_norm": 0.6501699090003967, "learning_rate": 4.5628251300520206e-05, "loss": 0.6703, "step": 2195 }, { "epoch": 2.81088, "grad_norm": 0.6794613599777222, "learning_rate": 4.562625050020008e-05, "loss": 0.681, "step": 2196 }, { "epoch": 2.81216, "grad_norm": 0.6721237301826477, "learning_rate": 4.5624249699879956e-05, "loss": 0.6923, "step": 2197 }, { "epoch": 2.81344, "grad_norm": 0.6417657732963562, "learning_rate": 4.562224889955983e-05, "loss": 0.6285, "step": 2198 }, { "epoch": 2.81472, "grad_norm": 0.6990517973899841, "learning_rate": 4.56202480992397e-05, "loss": 0.6986, "step": 2199 }, { "epoch": 2.816, "grad_norm": 0.6576136350631714, "learning_rate": 4.561824729891957e-05, "loss": 0.7217, "step": 2200 }, { "epoch": 2.8172800000000002, "grad_norm": 0.6665319204330444, "learning_rate": 4.5616246498599444e-05, "loss": 0.6833, "step": 2201 }, { "epoch": 2.8185599999999997, "grad_norm": 0.6458298563957214, "learning_rate": 4.561424569827931e-05, "loss": 0.7176, "step": 2202 }, { "epoch": 2.81984, "grad_norm": 0.624575674533844, "learning_rate": 4.561224489795918e-05, "loss": 0.6285, "step": 2203 }, { "epoch": 2.82112, "grad_norm": 0.644964337348938, "learning_rate": 4.561024409763906e-05, "loss": 0.7044, "step": 2204 }, { "epoch": 2.8224, "grad_norm": 0.6390849947929382, "learning_rate": 4.560824329731893e-05, "loss": 0.7089, "step": 2205 }, { "epoch": 2.82368, "grad_norm": 0.6157050132751465, "learning_rate": 4.56062424969988e-05, "loss": 0.6307, "step": 2206 }, { "epoch": 2.82496, "grad_norm": 0.6521828770637512, "learning_rate": 4.5604241696678675e-05, "loss": 0.6278, "step": 2207 }, { "epoch": 2.82624, "grad_norm": 0.6566979289054871, "learning_rate": 4.560224089635855e-05, "loss": 0.698, "step": 2208 }, { "epoch": 2.82752, "grad_norm": 0.6705225110054016, "learning_rate": 4.560024009603842e-05, "loss": 0.7031, "step": 2209 }, { "epoch": 2.8288, "grad_norm": 0.6485233306884766, "learning_rate": 4.5598239295718284e-05, "loss": 0.6328, "step": 2210 }, { "epoch": 2.83008, "grad_norm": 0.6989656686782837, "learning_rate": 4.559623849539816e-05, "loss": 0.7367, "step": 2211 }, { "epoch": 2.83136, "grad_norm": 0.705863356590271, "learning_rate": 4.5594237695078034e-05, "loss": 0.6894, "step": 2212 }, { "epoch": 2.83264, "grad_norm": 0.65732741355896, "learning_rate": 4.5592236894757906e-05, "loss": 0.7177, "step": 2213 }, { "epoch": 2.83392, "grad_norm": 0.6545193195343018, "learning_rate": 4.559023609443778e-05, "loss": 0.7393, "step": 2214 }, { "epoch": 2.8352, "grad_norm": 0.6775088906288147, "learning_rate": 4.558823529411765e-05, "loss": 0.7286, "step": 2215 }, { "epoch": 2.83648, "grad_norm": 0.7127577662467957, "learning_rate": 4.558623449379752e-05, "loss": 0.6682, "step": 2216 }, { "epoch": 2.83776, "grad_norm": 0.6672844886779785, "learning_rate": 4.5584233693477394e-05, "loss": 0.6333, "step": 2217 }, { "epoch": 2.83904, "grad_norm": 0.6709384322166443, "learning_rate": 4.5582232893157265e-05, "loss": 0.6946, "step": 2218 }, { "epoch": 2.84032, "grad_norm": 0.6919859647750854, "learning_rate": 4.558023209283714e-05, "loss": 0.7289, "step": 2219 }, { "epoch": 2.8416, "grad_norm": 0.663409411907196, "learning_rate": 4.557823129251701e-05, "loss": 0.6832, "step": 2220 }, { "epoch": 2.84288, "grad_norm": 0.7317196130752563, "learning_rate": 4.557623049219688e-05, "loss": 0.7423, "step": 2221 }, { "epoch": 2.84416, "grad_norm": 0.6240825057029724, "learning_rate": 4.557422969187675e-05, "loss": 0.6365, "step": 2222 }, { "epoch": 2.84544, "grad_norm": 0.6449801921844482, "learning_rate": 4.5572228891556625e-05, "loss": 0.7191, "step": 2223 }, { "epoch": 2.84672, "grad_norm": 0.6569290161132812, "learning_rate": 4.5570228091236497e-05, "loss": 0.6668, "step": 2224 }, { "epoch": 2.848, "grad_norm": 0.6520342826843262, "learning_rate": 4.556822729091637e-05, "loss": 0.6671, "step": 2225 }, { "epoch": 2.8492800000000003, "grad_norm": 0.708814263343811, "learning_rate": 4.556622649059624e-05, "loss": 0.7664, "step": 2226 }, { "epoch": 2.8505599999999998, "grad_norm": 0.6431106925010681, "learning_rate": 4.556422569027611e-05, "loss": 0.6481, "step": 2227 }, { "epoch": 2.85184, "grad_norm": 0.6694732904434204, "learning_rate": 4.5562224889955984e-05, "loss": 0.6742, "step": 2228 }, { "epoch": 2.85312, "grad_norm": 0.6356359124183655, "learning_rate": 4.5560224089635856e-05, "loss": 0.6854, "step": 2229 }, { "epoch": 2.8544, "grad_norm": 0.6448719501495361, "learning_rate": 4.555822328931573e-05, "loss": 0.7208, "step": 2230 }, { "epoch": 2.85568, "grad_norm": 0.6238778233528137, "learning_rate": 4.55562224889956e-05, "loss": 0.6774, "step": 2231 }, { "epoch": 2.85696, "grad_norm": 0.6807205677032471, "learning_rate": 4.555422168867547e-05, "loss": 0.6773, "step": 2232 }, { "epoch": 2.85824, "grad_norm": 0.6966913938522339, "learning_rate": 4.555222088835534e-05, "loss": 0.7078, "step": 2233 }, { "epoch": 2.85952, "grad_norm": 0.666556715965271, "learning_rate": 4.5550220088035215e-05, "loss": 0.714, "step": 2234 }, { "epoch": 2.8608000000000002, "grad_norm": 0.6896193027496338, "learning_rate": 4.554821928771509e-05, "loss": 0.7264, "step": 2235 }, { "epoch": 2.8620799999999997, "grad_norm": 0.6884152293205261, "learning_rate": 4.554621848739496e-05, "loss": 0.7435, "step": 2236 }, { "epoch": 2.86336, "grad_norm": 0.7019590735435486, "learning_rate": 4.554421768707483e-05, "loss": 0.7466, "step": 2237 }, { "epoch": 2.86464, "grad_norm": 0.6901242733001709, "learning_rate": 4.55422168867547e-05, "loss": 0.6879, "step": 2238 }, { "epoch": 2.86592, "grad_norm": 0.6424590349197388, "learning_rate": 4.554021608643458e-05, "loss": 0.6982, "step": 2239 }, { "epoch": 2.8672, "grad_norm": 0.6135256290435791, "learning_rate": 4.5538215286114446e-05, "loss": 0.6547, "step": 2240 }, { "epoch": 2.86848, "grad_norm": 0.6518721580505371, "learning_rate": 4.553621448579432e-05, "loss": 0.6951, "step": 2241 }, { "epoch": 2.86976, "grad_norm": 0.6382459402084351, "learning_rate": 4.553421368547419e-05, "loss": 0.6453, "step": 2242 }, { "epoch": 2.87104, "grad_norm": 0.6710212230682373, "learning_rate": 4.553221288515406e-05, "loss": 0.718, "step": 2243 }, { "epoch": 2.87232, "grad_norm": 0.6573660373687744, "learning_rate": 4.5530212084833934e-05, "loss": 0.6752, "step": 2244 }, { "epoch": 2.8736, "grad_norm": 0.6997604966163635, "learning_rate": 4.5528211284513806e-05, "loss": 0.7185, "step": 2245 }, { "epoch": 2.87488, "grad_norm": 0.6322104930877686, "learning_rate": 4.5526210484193684e-05, "loss": 0.6109, "step": 2246 }, { "epoch": 2.87616, "grad_norm": 0.6722203493118286, "learning_rate": 4.5524209683873556e-05, "loss": 0.702, "step": 2247 }, { "epoch": 2.87744, "grad_norm": 0.6825948357582092, "learning_rate": 4.552220888355342e-05, "loss": 0.6802, "step": 2248 }, { "epoch": 2.87872, "grad_norm": 0.6763216853141785, "learning_rate": 4.552020808323329e-05, "loss": 0.7054, "step": 2249 }, { "epoch": 2.88, "grad_norm": 0.6940529346466064, "learning_rate": 4.5518207282913165e-05, "loss": 0.7395, "step": 2250 }, { "epoch": 2.88128, "grad_norm": 0.6334916949272156, "learning_rate": 4.551620648259304e-05, "loss": 0.6658, "step": 2251 }, { "epoch": 2.88256, "grad_norm": 0.6614584922790527, "learning_rate": 4.551420568227291e-05, "loss": 0.7377, "step": 2252 }, { "epoch": 2.88384, "grad_norm": 0.6642113924026489, "learning_rate": 4.551220488195279e-05, "loss": 0.6981, "step": 2253 }, { "epoch": 2.88512, "grad_norm": 0.624686598777771, "learning_rate": 4.551020408163266e-05, "loss": 0.7133, "step": 2254 }, { "epoch": 2.8864, "grad_norm": 0.67439204454422, "learning_rate": 4.550820328131253e-05, "loss": 0.7079, "step": 2255 }, { "epoch": 2.88768, "grad_norm": 0.6298664212226868, "learning_rate": 4.5506202480992396e-05, "loss": 0.6848, "step": 2256 }, { "epoch": 2.88896, "grad_norm": 0.604558527469635, "learning_rate": 4.550420168067227e-05, "loss": 0.6442, "step": 2257 }, { "epoch": 2.89024, "grad_norm": 0.6485775113105774, "learning_rate": 4.550220088035214e-05, "loss": 0.6867, "step": 2258 }, { "epoch": 2.89152, "grad_norm": 0.6755024194717407, "learning_rate": 4.550020008003201e-05, "loss": 0.705, "step": 2259 }, { "epoch": 2.8928000000000003, "grad_norm": 0.664548397064209, "learning_rate": 4.549819927971189e-05, "loss": 0.657, "step": 2260 }, { "epoch": 2.8940799999999998, "grad_norm": 0.6621407866477966, "learning_rate": 4.549619847939176e-05, "loss": 0.7003, "step": 2261 }, { "epoch": 2.89536, "grad_norm": 0.6824017763137817, "learning_rate": 4.5494197679071634e-05, "loss": 0.7026, "step": 2262 }, { "epoch": 2.89664, "grad_norm": 0.6626001000404358, "learning_rate": 4.5492196878751506e-05, "loss": 0.6712, "step": 2263 }, { "epoch": 2.89792, "grad_norm": 0.6786776781082153, "learning_rate": 4.549019607843137e-05, "loss": 0.6515, "step": 2264 }, { "epoch": 2.8992, "grad_norm": 0.677880048751831, "learning_rate": 4.548819527811124e-05, "loss": 0.6956, "step": 2265 }, { "epoch": 2.90048, "grad_norm": 0.6466307640075684, "learning_rate": 4.5486194477791115e-05, "loss": 0.7, "step": 2266 }, { "epoch": 2.90176, "grad_norm": 0.6700591444969177, "learning_rate": 4.548419367747099e-05, "loss": 0.708, "step": 2267 }, { "epoch": 2.90304, "grad_norm": 0.6479355096817017, "learning_rate": 4.5482192877150865e-05, "loss": 0.6921, "step": 2268 }, { "epoch": 2.9043200000000002, "grad_norm": 0.6609050631523132, "learning_rate": 4.548019207683074e-05, "loss": 0.7045, "step": 2269 }, { "epoch": 2.9055999999999997, "grad_norm": 0.6681724190711975, "learning_rate": 4.547819127651061e-05, "loss": 0.6764, "step": 2270 }, { "epoch": 2.90688, "grad_norm": 0.6616440415382385, "learning_rate": 4.547619047619048e-05, "loss": 0.6934, "step": 2271 }, { "epoch": 2.90816, "grad_norm": 0.6577678322792053, "learning_rate": 4.5474189675870346e-05, "loss": 0.7208, "step": 2272 }, { "epoch": 2.90944, "grad_norm": 0.6378509402275085, "learning_rate": 4.547218887555022e-05, "loss": 0.6296, "step": 2273 }, { "epoch": 2.91072, "grad_norm": 0.6398113965988159, "learning_rate": 4.5470188075230096e-05, "loss": 0.631, "step": 2274 }, { "epoch": 2.912, "grad_norm": 0.6845940947532654, "learning_rate": 4.546818727490997e-05, "loss": 0.7193, "step": 2275 }, { "epoch": 2.91328, "grad_norm": 0.6516909003257751, "learning_rate": 4.546618647458984e-05, "loss": 0.7033, "step": 2276 }, { "epoch": 2.91456, "grad_norm": 0.6223090887069702, "learning_rate": 4.546418567426971e-05, "loss": 0.6473, "step": 2277 }, { "epoch": 2.91584, "grad_norm": 0.6670078635215759, "learning_rate": 4.5462184873949584e-05, "loss": 0.7206, "step": 2278 }, { "epoch": 2.91712, "grad_norm": 0.6714773774147034, "learning_rate": 4.5460184073629455e-05, "loss": 0.6349, "step": 2279 }, { "epoch": 2.9184, "grad_norm": 0.6824267506599426, "learning_rate": 4.545818327330932e-05, "loss": 0.6335, "step": 2280 }, { "epoch": 2.91968, "grad_norm": 0.7514560222625732, "learning_rate": 4.54561824729892e-05, "loss": 0.7474, "step": 2281 }, { "epoch": 2.92096, "grad_norm": 0.6569649577140808, "learning_rate": 4.545418167266907e-05, "loss": 0.7029, "step": 2282 }, { "epoch": 2.92224, "grad_norm": 0.700855553150177, "learning_rate": 4.545218087234894e-05, "loss": 0.7706, "step": 2283 }, { "epoch": 2.92352, "grad_norm": 0.6456267833709717, "learning_rate": 4.5450180072028815e-05, "loss": 0.7021, "step": 2284 }, { "epoch": 2.9248, "grad_norm": 0.6533935070037842, "learning_rate": 4.5448179271708687e-05, "loss": 0.6852, "step": 2285 }, { "epoch": 2.92608, "grad_norm": 0.6358088850975037, "learning_rate": 4.544617847138856e-05, "loss": 0.7173, "step": 2286 }, { "epoch": 2.92736, "grad_norm": 0.6510772109031677, "learning_rate": 4.544417767106843e-05, "loss": 0.6866, "step": 2287 }, { "epoch": 2.92864, "grad_norm": 0.642501950263977, "learning_rate": 4.54421768707483e-05, "loss": 0.7078, "step": 2288 }, { "epoch": 2.92992, "grad_norm": 0.6543188095092773, "learning_rate": 4.5440176070428174e-05, "loss": 0.6925, "step": 2289 }, { "epoch": 2.9312, "grad_norm": 0.7026666402816772, "learning_rate": 4.5438175270108046e-05, "loss": 0.7661, "step": 2290 }, { "epoch": 2.93248, "grad_norm": 0.6860962510108948, "learning_rate": 4.543617446978792e-05, "loss": 0.7165, "step": 2291 }, { "epoch": 2.93376, "grad_norm": 0.6254853010177612, "learning_rate": 4.543417366946779e-05, "loss": 0.6598, "step": 2292 }, { "epoch": 2.93504, "grad_norm": 0.6507871150970459, "learning_rate": 4.543217286914766e-05, "loss": 0.6557, "step": 2293 }, { "epoch": 2.9363200000000003, "grad_norm": 0.6633324027061462, "learning_rate": 4.543017206882753e-05, "loss": 0.6782, "step": 2294 }, { "epoch": 2.9375999999999998, "grad_norm": 0.6420159935951233, "learning_rate": 4.5428171268507405e-05, "loss": 0.7025, "step": 2295 }, { "epoch": 2.93888, "grad_norm": 0.6702166795730591, "learning_rate": 4.542617046818728e-05, "loss": 0.7024, "step": 2296 }, { "epoch": 2.94016, "grad_norm": 0.6596609950065613, "learning_rate": 4.542416966786715e-05, "loss": 0.6926, "step": 2297 }, { "epoch": 2.94144, "grad_norm": 0.6574816703796387, "learning_rate": 4.542216886754702e-05, "loss": 0.6624, "step": 2298 }, { "epoch": 2.94272, "grad_norm": 0.6869375109672546, "learning_rate": 4.542016806722689e-05, "loss": 0.6966, "step": 2299 }, { "epoch": 2.944, "grad_norm": 0.659354031085968, "learning_rate": 4.5418167266906764e-05, "loss": 0.6866, "step": 2300 }, { "epoch": 2.94528, "grad_norm": 0.6519255042076111, "learning_rate": 4.5416166466586636e-05, "loss": 0.6968, "step": 2301 }, { "epoch": 2.94656, "grad_norm": 0.6179057955741882, "learning_rate": 4.541416566626651e-05, "loss": 0.6456, "step": 2302 }, { "epoch": 2.9478400000000002, "grad_norm": 0.6320080161094666, "learning_rate": 4.541216486594638e-05, "loss": 0.6268, "step": 2303 }, { "epoch": 2.9491199999999997, "grad_norm": 0.6388428807258606, "learning_rate": 4.541016406562625e-05, "loss": 0.7033, "step": 2304 }, { "epoch": 2.9504, "grad_norm": 0.6056840419769287, "learning_rate": 4.5408163265306124e-05, "loss": 0.6709, "step": 2305 }, { "epoch": 2.95168, "grad_norm": 0.6787373423576355, "learning_rate": 4.5406162464985996e-05, "loss": 0.6955, "step": 2306 }, { "epoch": 2.95296, "grad_norm": 0.6520240306854248, "learning_rate": 4.540416166466587e-05, "loss": 0.645, "step": 2307 }, { "epoch": 2.95424, "grad_norm": 0.6489017605781555, "learning_rate": 4.540216086434574e-05, "loss": 0.6428, "step": 2308 }, { "epoch": 2.95552, "grad_norm": 0.6715541481971741, "learning_rate": 4.540016006402561e-05, "loss": 0.6948, "step": 2309 }, { "epoch": 2.9568, "grad_norm": 0.7156697511672974, "learning_rate": 4.539815926370549e-05, "loss": 0.696, "step": 2310 }, { "epoch": 2.95808, "grad_norm": 0.6907376050949097, "learning_rate": 4.5396158463385355e-05, "loss": 0.7406, "step": 2311 }, { "epoch": 2.95936, "grad_norm": 0.6593761444091797, "learning_rate": 4.539415766306523e-05, "loss": 0.6731, "step": 2312 }, { "epoch": 2.96064, "grad_norm": 0.6654943227767944, "learning_rate": 4.53921568627451e-05, "loss": 0.6773, "step": 2313 }, { "epoch": 2.96192, "grad_norm": 0.6800094246864319, "learning_rate": 4.539015606242497e-05, "loss": 0.7691, "step": 2314 }, { "epoch": 2.9632, "grad_norm": 0.6977217793464661, "learning_rate": 4.538815526210484e-05, "loss": 0.6491, "step": 2315 }, { "epoch": 2.96448, "grad_norm": 0.6390252113342285, "learning_rate": 4.5386154461784714e-05, "loss": 0.6644, "step": 2316 }, { "epoch": 2.96576, "grad_norm": 0.6670240163803101, "learning_rate": 4.538415366146459e-05, "loss": 0.6976, "step": 2317 }, { "epoch": 2.96704, "grad_norm": 0.6732922196388245, "learning_rate": 4.5382152861144465e-05, "loss": 0.6924, "step": 2318 }, { "epoch": 2.96832, "grad_norm": 0.6866724491119385, "learning_rate": 4.538015206082433e-05, "loss": 0.6729, "step": 2319 }, { "epoch": 2.9696, "grad_norm": 0.6516912579536438, "learning_rate": 4.53781512605042e-05, "loss": 0.7145, "step": 2320 }, { "epoch": 2.97088, "grad_norm": 0.6390223503112793, "learning_rate": 4.5376150460184073e-05, "loss": 0.6462, "step": 2321 }, { "epoch": 2.97216, "grad_norm": 0.6120672821998596, "learning_rate": 4.5374149659863945e-05, "loss": 0.6401, "step": 2322 }, { "epoch": 2.97344, "grad_norm": 0.6073777675628662, "learning_rate": 4.537214885954382e-05, "loss": 0.6114, "step": 2323 }, { "epoch": 2.97472, "grad_norm": 0.698907732963562, "learning_rate": 4.5370148059223696e-05, "loss": 0.7557, "step": 2324 }, { "epoch": 2.976, "grad_norm": 0.7005800008773804, "learning_rate": 4.536814725890357e-05, "loss": 0.7338, "step": 2325 }, { "epoch": 2.97728, "grad_norm": 0.673547089099884, "learning_rate": 4.536614645858344e-05, "loss": 0.6493, "step": 2326 }, { "epoch": 2.97856, "grad_norm": 0.6851591467857361, "learning_rate": 4.5364145658263305e-05, "loss": 0.7215, "step": 2327 }, { "epoch": 2.9798400000000003, "grad_norm": 0.5817590355873108, "learning_rate": 4.5362144857943176e-05, "loss": 0.5828, "step": 2328 }, { "epoch": 2.9811199999999998, "grad_norm": 0.6559504866600037, "learning_rate": 4.536014405762305e-05, "loss": 0.6832, "step": 2329 }, { "epoch": 2.9824, "grad_norm": 0.6387779712677002, "learning_rate": 4.535814325730292e-05, "loss": 0.6812, "step": 2330 }, { "epoch": 2.98368, "grad_norm": 0.6166095733642578, "learning_rate": 4.53561424569828e-05, "loss": 0.6638, "step": 2331 }, { "epoch": 2.98496, "grad_norm": 0.6481328010559082, "learning_rate": 4.535414165666267e-05, "loss": 0.6361, "step": 2332 }, { "epoch": 2.98624, "grad_norm": 0.6827143430709839, "learning_rate": 4.535214085634254e-05, "loss": 0.687, "step": 2333 }, { "epoch": 2.98752, "grad_norm": 0.6266081929206848, "learning_rate": 4.5350140056022414e-05, "loss": 0.6573, "step": 2334 }, { "epoch": 2.9888, "grad_norm": 0.6480961441993713, "learning_rate": 4.534813925570228e-05, "loss": 0.6742, "step": 2335 }, { "epoch": 2.99008, "grad_norm": 0.6894843578338623, "learning_rate": 4.534613845538215e-05, "loss": 0.672, "step": 2336 }, { "epoch": 2.9913600000000002, "grad_norm": 0.6723330020904541, "learning_rate": 4.534413765506202e-05, "loss": 0.6999, "step": 2337 }, { "epoch": 2.9926399999999997, "grad_norm": 0.7179523706436157, "learning_rate": 4.53421368547419e-05, "loss": 0.7847, "step": 2338 }, { "epoch": 2.99392, "grad_norm": 0.648439884185791, "learning_rate": 4.5340136054421774e-05, "loss": 0.6769, "step": 2339 }, { "epoch": 2.9952, "grad_norm": 0.6338523030281067, "learning_rate": 4.5338135254101645e-05, "loss": 0.6469, "step": 2340 }, { "epoch": 2.99648, "grad_norm": 0.6572247743606567, "learning_rate": 4.533613445378152e-05, "loss": 0.7077, "step": 2341 }, { "epoch": 2.99776, "grad_norm": 0.6434239149093628, "learning_rate": 4.533413365346139e-05, "loss": 0.6655, "step": 2342 }, { "epoch": 2.99904, "grad_norm": 0.67351895570755, "learning_rate": 4.5332132853141254e-05, "loss": 0.7103, "step": 2343 }, { "epoch": 3.00032, "grad_norm": 1.486992359161377, "learning_rate": 4.5330132052821126e-05, "loss": 1.2061, "step": 2344 }, { "epoch": 3.0016, "grad_norm": 0.6494001150131226, "learning_rate": 4.5328131252501005e-05, "loss": 0.7025, "step": 2345 }, { "epoch": 3.00288, "grad_norm": 0.6204409003257751, "learning_rate": 4.532613045218088e-05, "loss": 0.665, "step": 2346 }, { "epoch": 3.00416, "grad_norm": 0.6547411680221558, "learning_rate": 4.532412965186075e-05, "loss": 0.6653, "step": 2347 }, { "epoch": 3.00544, "grad_norm": 0.6656926274299622, "learning_rate": 4.532212885154062e-05, "loss": 0.6842, "step": 2348 }, { "epoch": 3.00672, "grad_norm": 0.7050084471702576, "learning_rate": 4.532012805122049e-05, "loss": 0.6961, "step": 2349 }, { "epoch": 3.008, "grad_norm": 0.6964038610458374, "learning_rate": 4.5318127250900364e-05, "loss": 0.6978, "step": 2350 }, { "epoch": 3.00928, "grad_norm": 0.6723408699035645, "learning_rate": 4.531612645058023e-05, "loss": 0.7163, "step": 2351 }, { "epoch": 3.01056, "grad_norm": 0.6292080879211426, "learning_rate": 4.531412565026011e-05, "loss": 0.6484, "step": 2352 }, { "epoch": 3.01184, "grad_norm": 0.6262539625167847, "learning_rate": 4.531212484993998e-05, "loss": 0.6386, "step": 2353 }, { "epoch": 3.01312, "grad_norm": 0.650016725063324, "learning_rate": 4.531012404961985e-05, "loss": 0.6554, "step": 2354 }, { "epoch": 3.0144, "grad_norm": 0.6762639880180359, "learning_rate": 4.530812324929972e-05, "loss": 0.6389, "step": 2355 }, { "epoch": 3.01568, "grad_norm": 0.7196240425109863, "learning_rate": 4.5306122448979595e-05, "loss": 0.6837, "step": 2356 }, { "epoch": 3.01696, "grad_norm": 0.6458487510681152, "learning_rate": 4.530412164865947e-05, "loss": 0.6213, "step": 2357 }, { "epoch": 3.01824, "grad_norm": 0.6251707077026367, "learning_rate": 4.530212084833934e-05, "loss": 0.612, "step": 2358 }, { "epoch": 3.01952, "grad_norm": 0.6338829398155212, "learning_rate": 4.530012004801921e-05, "loss": 0.6327, "step": 2359 }, { "epoch": 3.0208, "grad_norm": 0.6653558611869812, "learning_rate": 4.529811924769908e-05, "loss": 0.6595, "step": 2360 }, { "epoch": 3.02208, "grad_norm": 0.677976131439209, "learning_rate": 4.5296118447378954e-05, "loss": 0.6325, "step": 2361 }, { "epoch": 3.02336, "grad_norm": 0.6878952980041504, "learning_rate": 4.5294117647058826e-05, "loss": 0.7399, "step": 2362 }, { "epoch": 3.02464, "grad_norm": 0.6632867455482483, "learning_rate": 4.52921168467387e-05, "loss": 0.7067, "step": 2363 }, { "epoch": 3.02592, "grad_norm": 0.6564420461654663, "learning_rate": 4.529011604641857e-05, "loss": 0.6509, "step": 2364 }, { "epoch": 3.0272, "grad_norm": 0.6492841243743896, "learning_rate": 4.528811524609844e-05, "loss": 0.6339, "step": 2365 }, { "epoch": 3.02848, "grad_norm": 0.682312548160553, "learning_rate": 4.5286114445778314e-05, "loss": 0.6906, "step": 2366 }, { "epoch": 3.02976, "grad_norm": 0.6361678242683411, "learning_rate": 4.5284113645458186e-05, "loss": 0.6743, "step": 2367 }, { "epoch": 3.03104, "grad_norm": 0.6740372776985168, "learning_rate": 4.528211284513806e-05, "loss": 0.643, "step": 2368 }, { "epoch": 3.03232, "grad_norm": 0.6516414880752563, "learning_rate": 4.528011204481793e-05, "loss": 0.6258, "step": 2369 }, { "epoch": 3.0336, "grad_norm": 0.677844762802124, "learning_rate": 4.52781112444978e-05, "loss": 0.7175, "step": 2370 }, { "epoch": 3.03488, "grad_norm": 0.6672567129135132, "learning_rate": 4.527611044417767e-05, "loss": 0.6363, "step": 2371 }, { "epoch": 3.03616, "grad_norm": 0.6582069993019104, "learning_rate": 4.5274109643857545e-05, "loss": 0.6774, "step": 2372 }, { "epoch": 3.03744, "grad_norm": 0.670803427696228, "learning_rate": 4.527210884353742e-05, "loss": 0.6809, "step": 2373 }, { "epoch": 3.03872, "grad_norm": 0.6593008041381836, "learning_rate": 4.527010804321729e-05, "loss": 0.648, "step": 2374 }, { "epoch": 3.04, "grad_norm": 0.6505773067474365, "learning_rate": 4.526810724289716e-05, "loss": 0.6273, "step": 2375 }, { "epoch": 3.04128, "grad_norm": 0.7069177031517029, "learning_rate": 4.526610644257703e-05, "loss": 0.7197, "step": 2376 }, { "epoch": 3.04256, "grad_norm": 0.6976633667945862, "learning_rate": 4.5264105642256904e-05, "loss": 0.6741, "step": 2377 }, { "epoch": 3.04384, "grad_norm": 0.7404224276542664, "learning_rate": 4.5262104841936776e-05, "loss": 0.7145, "step": 2378 }, { "epoch": 3.04512, "grad_norm": 0.6723030209541321, "learning_rate": 4.526010404161665e-05, "loss": 0.6488, "step": 2379 }, { "epoch": 3.0464, "grad_norm": 0.689201831817627, "learning_rate": 4.5258103241296527e-05, "loss": 0.7129, "step": 2380 }, { "epoch": 3.04768, "grad_norm": 0.7270947098731995, "learning_rate": 4.525610244097639e-05, "loss": 0.6891, "step": 2381 }, { "epoch": 3.04896, "grad_norm": 0.7034469246864319, "learning_rate": 4.5254101640656263e-05, "loss": 0.7154, "step": 2382 }, { "epoch": 3.05024, "grad_norm": 0.7317506074905396, "learning_rate": 4.5252100840336135e-05, "loss": 0.6951, "step": 2383 }, { "epoch": 3.05152, "grad_norm": 0.6207937598228455, "learning_rate": 4.525010004001601e-05, "loss": 0.6367, "step": 2384 }, { "epoch": 3.0528, "grad_norm": 0.6545078158378601, "learning_rate": 4.524809923969588e-05, "loss": 0.6703, "step": 2385 }, { "epoch": 3.05408, "grad_norm": 0.6788381934165955, "learning_rate": 4.524609843937575e-05, "loss": 0.6538, "step": 2386 }, { "epoch": 3.05536, "grad_norm": 0.64985191822052, "learning_rate": 4.524409763905563e-05, "loss": 0.681, "step": 2387 }, { "epoch": 3.05664, "grad_norm": 0.6559796333312988, "learning_rate": 4.52420968387355e-05, "loss": 0.7017, "step": 2388 }, { "epoch": 3.05792, "grad_norm": 0.677920937538147, "learning_rate": 4.5240096038415366e-05, "loss": 0.6879, "step": 2389 }, { "epoch": 3.0592, "grad_norm": 0.6159390211105347, "learning_rate": 4.523809523809524e-05, "loss": 0.6216, "step": 2390 }, { "epoch": 3.06048, "grad_norm": 0.7163643836975098, "learning_rate": 4.523609443777511e-05, "loss": 0.7317, "step": 2391 }, { "epoch": 3.06176, "grad_norm": 0.6356967091560364, "learning_rate": 4.523409363745498e-05, "loss": 0.6964, "step": 2392 }, { "epoch": 3.06304, "grad_norm": 0.7133451700210571, "learning_rate": 4.5232092837134854e-05, "loss": 0.683, "step": 2393 }, { "epoch": 3.06432, "grad_norm": 0.6646026372909546, "learning_rate": 4.523009203681473e-05, "loss": 0.6141, "step": 2394 }, { "epoch": 3.0656, "grad_norm": 0.6448561549186707, "learning_rate": 4.5228091236494604e-05, "loss": 0.6628, "step": 2395 }, { "epoch": 3.06688, "grad_norm": 0.6711647510528564, "learning_rate": 4.5226090436174476e-05, "loss": 0.6526, "step": 2396 }, { "epoch": 3.0681599999999998, "grad_norm": 0.6663058996200562, "learning_rate": 4.522408963585434e-05, "loss": 0.6812, "step": 2397 }, { "epoch": 3.06944, "grad_norm": 0.6636464595794678, "learning_rate": 4.522208883553421e-05, "loss": 0.6382, "step": 2398 }, { "epoch": 3.07072, "grad_norm": 0.6658351421356201, "learning_rate": 4.5220088035214085e-05, "loss": 0.7137, "step": 2399 }, { "epoch": 3.072, "grad_norm": 0.6860455870628357, "learning_rate": 4.521808723489396e-05, "loss": 0.679, "step": 2400 }, { "epoch": 3.07328, "grad_norm": 0.6328697800636292, "learning_rate": 4.5216086434573836e-05, "loss": 0.6579, "step": 2401 }, { "epoch": 3.07456, "grad_norm": 0.665577232837677, "learning_rate": 4.521408563425371e-05, "loss": 0.6864, "step": 2402 }, { "epoch": 3.07584, "grad_norm": 0.6703231334686279, "learning_rate": 4.521208483393358e-05, "loss": 0.6518, "step": 2403 }, { "epoch": 3.07712, "grad_norm": 0.7464866042137146, "learning_rate": 4.521008403361345e-05, "loss": 0.7464, "step": 2404 }, { "epoch": 3.0784, "grad_norm": 0.6916598677635193, "learning_rate": 4.5208083233293316e-05, "loss": 0.6668, "step": 2405 }, { "epoch": 3.07968, "grad_norm": 0.7007398009300232, "learning_rate": 4.520608243297319e-05, "loss": 0.6506, "step": 2406 }, { "epoch": 3.08096, "grad_norm": 0.6712905168533325, "learning_rate": 4.520408163265306e-05, "loss": 0.6568, "step": 2407 }, { "epoch": 3.08224, "grad_norm": 0.687311053276062, "learning_rate": 4.520208083233294e-05, "loss": 0.6959, "step": 2408 }, { "epoch": 3.08352, "grad_norm": 0.6788797378540039, "learning_rate": 4.520008003201281e-05, "loss": 0.6703, "step": 2409 }, { "epoch": 3.0848, "grad_norm": 0.6620363593101501, "learning_rate": 4.519807923169268e-05, "loss": 0.6754, "step": 2410 }, { "epoch": 3.08608, "grad_norm": 0.6966630220413208, "learning_rate": 4.5196078431372554e-05, "loss": 0.7254, "step": 2411 }, { "epoch": 3.08736, "grad_norm": 0.6657626032829285, "learning_rate": 4.5194077631052426e-05, "loss": 0.6667, "step": 2412 }, { "epoch": 3.08864, "grad_norm": 0.6981019377708435, "learning_rate": 4.519207683073229e-05, "loss": 0.6876, "step": 2413 }, { "epoch": 3.08992, "grad_norm": 0.7262740731239319, "learning_rate": 4.519007603041216e-05, "loss": 0.7387, "step": 2414 }, { "epoch": 3.0912, "grad_norm": 0.7206056714057922, "learning_rate": 4.5188075230092035e-05, "loss": 0.7254, "step": 2415 }, { "epoch": 3.09248, "grad_norm": 0.700833261013031, "learning_rate": 4.518607442977191e-05, "loss": 0.6647, "step": 2416 }, { "epoch": 3.09376, "grad_norm": 0.6799070239067078, "learning_rate": 4.5184073629451785e-05, "loss": 0.6117, "step": 2417 }, { "epoch": 3.09504, "grad_norm": 0.7079071998596191, "learning_rate": 4.518207282913166e-05, "loss": 0.714, "step": 2418 }, { "epoch": 3.09632, "grad_norm": 0.7541791200637817, "learning_rate": 4.518007202881153e-05, "loss": 0.7597, "step": 2419 }, { "epoch": 3.0976, "grad_norm": 0.7132668495178223, "learning_rate": 4.51780712284914e-05, "loss": 0.6674, "step": 2420 }, { "epoch": 3.09888, "grad_norm": 0.6781483292579651, "learning_rate": 4.5176070428171266e-05, "loss": 0.7066, "step": 2421 }, { "epoch": 3.10016, "grad_norm": 0.6593953371047974, "learning_rate": 4.517406962785114e-05, "loss": 0.6443, "step": 2422 }, { "epoch": 3.10144, "grad_norm": 0.6513901948928833, "learning_rate": 4.5172068827531016e-05, "loss": 0.6421, "step": 2423 }, { "epoch": 3.10272, "grad_norm": 0.6750826239585876, "learning_rate": 4.517006802721089e-05, "loss": 0.6934, "step": 2424 }, { "epoch": 3.104, "grad_norm": 0.6588460803031921, "learning_rate": 4.516806722689076e-05, "loss": 0.6222, "step": 2425 }, { "epoch": 3.10528, "grad_norm": 0.6957754492759705, "learning_rate": 4.516606642657063e-05, "loss": 0.7149, "step": 2426 }, { "epoch": 3.10656, "grad_norm": 0.644356906414032, "learning_rate": 4.5164065626250504e-05, "loss": 0.6543, "step": 2427 }, { "epoch": 3.10784, "grad_norm": 0.65400230884552, "learning_rate": 4.5162064825930376e-05, "loss": 0.6291, "step": 2428 }, { "epoch": 3.10912, "grad_norm": 0.6292628049850464, "learning_rate": 4.516006402561024e-05, "loss": 0.6608, "step": 2429 }, { "epoch": 3.1104, "grad_norm": 0.6592010855674744, "learning_rate": 4.515806322529012e-05, "loss": 0.6669, "step": 2430 }, { "epoch": 3.11168, "grad_norm": 0.6952319145202637, "learning_rate": 4.515606242496999e-05, "loss": 0.6414, "step": 2431 }, { "epoch": 3.11296, "grad_norm": 0.679885745048523, "learning_rate": 4.515406162464986e-05, "loss": 0.6805, "step": 2432 }, { "epoch": 3.11424, "grad_norm": 0.6636596918106079, "learning_rate": 4.5152060824329735e-05, "loss": 0.673, "step": 2433 }, { "epoch": 3.11552, "grad_norm": 0.6836968660354614, "learning_rate": 4.515006002400961e-05, "loss": 0.6919, "step": 2434 }, { "epoch": 3.1168, "grad_norm": 0.6769798994064331, "learning_rate": 4.514805922368948e-05, "loss": 0.6437, "step": 2435 }, { "epoch": 3.11808, "grad_norm": 0.6906677484512329, "learning_rate": 4.514605842336935e-05, "loss": 0.6757, "step": 2436 }, { "epoch": 3.11936, "grad_norm": 0.6719872951507568, "learning_rate": 4.514405762304922e-05, "loss": 0.7031, "step": 2437 }, { "epoch": 3.12064, "grad_norm": 0.667978048324585, "learning_rate": 4.5142056822729094e-05, "loss": 0.6664, "step": 2438 }, { "epoch": 3.12192, "grad_norm": 0.6768871545791626, "learning_rate": 4.5140056022408966e-05, "loss": 0.6375, "step": 2439 }, { "epoch": 3.1232, "grad_norm": 0.6770617961883545, "learning_rate": 4.513805522208884e-05, "loss": 0.7146, "step": 2440 }, { "epoch": 3.12448, "grad_norm": 0.7039771676063538, "learning_rate": 4.513605442176871e-05, "loss": 0.6729, "step": 2441 }, { "epoch": 3.12576, "grad_norm": 0.7217344641685486, "learning_rate": 4.513405362144858e-05, "loss": 0.7069, "step": 2442 }, { "epoch": 3.12704, "grad_norm": 0.6702495217323303, "learning_rate": 4.5132052821128454e-05, "loss": 0.707, "step": 2443 }, { "epoch": 3.12832, "grad_norm": 0.6823554635047913, "learning_rate": 4.5130052020808325e-05, "loss": 0.6586, "step": 2444 }, { "epoch": 3.1296, "grad_norm": 0.6816961765289307, "learning_rate": 4.51280512204882e-05, "loss": 0.716, "step": 2445 }, { "epoch": 3.13088, "grad_norm": 0.6727588176727295, "learning_rate": 4.512605042016807e-05, "loss": 0.6628, "step": 2446 }, { "epoch": 3.13216, "grad_norm": 0.6545873284339905, "learning_rate": 4.512404961984794e-05, "loss": 0.6654, "step": 2447 }, { "epoch": 3.1334400000000002, "grad_norm": 0.6535388827323914, "learning_rate": 4.512204881952781e-05, "loss": 0.6513, "step": 2448 }, { "epoch": 3.13472, "grad_norm": 0.6998023986816406, "learning_rate": 4.5120048019207685e-05, "loss": 0.6721, "step": 2449 }, { "epoch": 3.136, "grad_norm": 0.7023734450340271, "learning_rate": 4.5118047218887557e-05, "loss": 0.705, "step": 2450 }, { "epoch": 3.13728, "grad_norm": 0.6585432291030884, "learning_rate": 4.511604641856743e-05, "loss": 0.6781, "step": 2451 }, { "epoch": 3.13856, "grad_norm": 0.6865501403808594, "learning_rate": 4.51140456182473e-05, "loss": 0.6667, "step": 2452 }, { "epoch": 3.13984, "grad_norm": 0.6533448100090027, "learning_rate": 4.511204481792717e-05, "loss": 0.6903, "step": 2453 }, { "epoch": 3.14112, "grad_norm": 0.6679571270942688, "learning_rate": 4.5110044017607044e-05, "loss": 0.6596, "step": 2454 }, { "epoch": 3.1424, "grad_norm": 0.6311309337615967, "learning_rate": 4.5108043217286916e-05, "loss": 0.6224, "step": 2455 }, { "epoch": 3.14368, "grad_norm": 0.6664997935295105, "learning_rate": 4.510604241696679e-05, "loss": 0.6438, "step": 2456 }, { "epoch": 3.14496, "grad_norm": 0.6539314389228821, "learning_rate": 4.510404161664666e-05, "loss": 0.6767, "step": 2457 }, { "epoch": 3.14624, "grad_norm": 0.6571037769317627, "learning_rate": 4.510204081632654e-05, "loss": 0.6322, "step": 2458 }, { "epoch": 3.14752, "grad_norm": 0.6855980753898621, "learning_rate": 4.51000400160064e-05, "loss": 0.7187, "step": 2459 }, { "epoch": 3.1488, "grad_norm": 0.6939951181411743, "learning_rate": 4.5098039215686275e-05, "loss": 0.7095, "step": 2460 }, { "epoch": 3.15008, "grad_norm": 0.6432140469551086, "learning_rate": 4.509603841536615e-05, "loss": 0.6465, "step": 2461 }, { "epoch": 3.15136, "grad_norm": 0.670173704624176, "learning_rate": 4.509403761504602e-05, "loss": 0.6325, "step": 2462 }, { "epoch": 3.15264, "grad_norm": 0.6773591041564941, "learning_rate": 4.509203681472589e-05, "loss": 0.6375, "step": 2463 }, { "epoch": 3.15392, "grad_norm": 0.6705629229545593, "learning_rate": 4.509003601440576e-05, "loss": 0.6594, "step": 2464 }, { "epoch": 3.1552, "grad_norm": 0.7062819004058838, "learning_rate": 4.508803521408564e-05, "loss": 0.6698, "step": 2465 }, { "epoch": 3.15648, "grad_norm": 0.7161005139350891, "learning_rate": 4.508603441376551e-05, "loss": 0.643, "step": 2466 }, { "epoch": 3.15776, "grad_norm": 0.6911036968231201, "learning_rate": 4.508403361344538e-05, "loss": 0.6432, "step": 2467 }, { "epoch": 3.15904, "grad_norm": 0.6515189409255981, "learning_rate": 4.508203281312525e-05, "loss": 0.6367, "step": 2468 }, { "epoch": 3.16032, "grad_norm": 0.6516139507293701, "learning_rate": 4.508003201280512e-05, "loss": 0.6113, "step": 2469 }, { "epoch": 3.1616, "grad_norm": 0.687893807888031, "learning_rate": 4.5078031212484994e-05, "loss": 0.6723, "step": 2470 }, { "epoch": 3.16288, "grad_norm": 0.6721757650375366, "learning_rate": 4.5076030412164866e-05, "loss": 0.6546, "step": 2471 }, { "epoch": 3.16416, "grad_norm": 0.67435622215271, "learning_rate": 4.5074029611844744e-05, "loss": 0.6481, "step": 2472 }, { "epoch": 3.16544, "grad_norm": 0.6942387223243713, "learning_rate": 4.5072028811524616e-05, "loss": 0.7223, "step": 2473 }, { "epoch": 3.16672, "grad_norm": 0.717036247253418, "learning_rate": 4.507002801120449e-05, "loss": 0.6983, "step": 2474 }, { "epoch": 3.168, "grad_norm": 0.6933386325836182, "learning_rate": 4.506802721088435e-05, "loss": 0.6512, "step": 2475 }, { "epoch": 3.16928, "grad_norm": 0.6787186861038208, "learning_rate": 4.5066026410564225e-05, "loss": 0.7028, "step": 2476 }, { "epoch": 3.17056, "grad_norm": 0.6807264089584351, "learning_rate": 4.50640256102441e-05, "loss": 0.6713, "step": 2477 }, { "epoch": 3.17184, "grad_norm": 0.6492279767990112, "learning_rate": 4.506202480992397e-05, "loss": 0.6451, "step": 2478 }, { "epoch": 3.17312, "grad_norm": 0.6500716209411621, "learning_rate": 4.506002400960385e-05, "loss": 0.6752, "step": 2479 }, { "epoch": 3.1744, "grad_norm": 0.7048171162605286, "learning_rate": 4.505802320928372e-05, "loss": 0.73, "step": 2480 }, { "epoch": 3.17568, "grad_norm": 0.709684431552887, "learning_rate": 4.505602240896359e-05, "loss": 0.6861, "step": 2481 }, { "epoch": 3.1769600000000002, "grad_norm": 0.6857057213783264, "learning_rate": 4.505402160864346e-05, "loss": 0.6706, "step": 2482 }, { "epoch": 3.17824, "grad_norm": 0.7002062797546387, "learning_rate": 4.505202080832333e-05, "loss": 0.735, "step": 2483 }, { "epoch": 3.17952, "grad_norm": 0.6401265859603882, "learning_rate": 4.50500200080032e-05, "loss": 0.6662, "step": 2484 }, { "epoch": 3.1808, "grad_norm": 0.6491225361824036, "learning_rate": 4.504801920768307e-05, "loss": 0.6084, "step": 2485 }, { "epoch": 3.18208, "grad_norm": 0.7253592014312744, "learning_rate": 4.504601840736295e-05, "loss": 0.6837, "step": 2486 }, { "epoch": 3.18336, "grad_norm": 0.6980118751525879, "learning_rate": 4.504401760704282e-05, "loss": 0.663, "step": 2487 }, { "epoch": 3.18464, "grad_norm": 0.6534168124198914, "learning_rate": 4.5042016806722694e-05, "loss": 0.633, "step": 2488 }, { "epoch": 3.18592, "grad_norm": 0.7110708355903625, "learning_rate": 4.5040016006402566e-05, "loss": 0.6896, "step": 2489 }, { "epoch": 3.1872, "grad_norm": 0.7201905846595764, "learning_rate": 4.503801520608244e-05, "loss": 0.6583, "step": 2490 }, { "epoch": 3.18848, "grad_norm": 0.7668898701667786, "learning_rate": 4.50360144057623e-05, "loss": 0.7062, "step": 2491 }, { "epoch": 3.18976, "grad_norm": 0.6760601997375488, "learning_rate": 4.5034013605442174e-05, "loss": 0.6544, "step": 2492 }, { "epoch": 3.19104, "grad_norm": 0.7329314351081848, "learning_rate": 4.503201280512205e-05, "loss": 0.6991, "step": 2493 }, { "epoch": 3.19232, "grad_norm": 0.7096596956253052, "learning_rate": 4.5030012004801925e-05, "loss": 0.7134, "step": 2494 }, { "epoch": 3.1936, "grad_norm": 0.688031017780304, "learning_rate": 4.50280112044818e-05, "loss": 0.6861, "step": 2495 }, { "epoch": 3.19488, "grad_norm": 0.6957057118415833, "learning_rate": 4.502601040416167e-05, "loss": 0.6978, "step": 2496 }, { "epoch": 3.19616, "grad_norm": 0.6752075552940369, "learning_rate": 4.502400960384154e-05, "loss": 0.6878, "step": 2497 }, { "epoch": 3.19744, "grad_norm": 0.6884586811065674, "learning_rate": 4.502200880352141e-05, "loss": 0.6982, "step": 2498 }, { "epoch": 3.19872, "grad_norm": 0.6521093249320984, "learning_rate": 4.502000800320128e-05, "loss": 0.6606, "step": 2499 }, { "epoch": 3.2, "grad_norm": 0.6644425392150879, "learning_rate": 4.5018007202881156e-05, "loss": 0.6753, "step": 2500 }, { "epoch": 3.20128, "grad_norm": 0.6681901216506958, "learning_rate": 4.501600640256103e-05, "loss": 0.6476, "step": 2501 }, { "epoch": 3.20256, "grad_norm": 0.6761760711669922, "learning_rate": 4.50140056022409e-05, "loss": 0.7179, "step": 2502 }, { "epoch": 3.20384, "grad_norm": 0.6894738078117371, "learning_rate": 4.501200480192077e-05, "loss": 0.6737, "step": 2503 }, { "epoch": 3.20512, "grad_norm": 0.714769184589386, "learning_rate": 4.5010004001600644e-05, "loss": 0.6867, "step": 2504 }, { "epoch": 3.2064, "grad_norm": 0.6950749754905701, "learning_rate": 4.5008003201280515e-05, "loss": 0.6284, "step": 2505 }, { "epoch": 3.20768, "grad_norm": 0.7155801057815552, "learning_rate": 4.500600240096039e-05, "loss": 0.7747, "step": 2506 }, { "epoch": 3.20896, "grad_norm": 0.6814357042312622, "learning_rate": 4.500400160064026e-05, "loss": 0.6696, "step": 2507 }, { "epoch": 3.21024, "grad_norm": 0.6706905364990234, "learning_rate": 4.500200080032013e-05, "loss": 0.6327, "step": 2508 }, { "epoch": 3.21152, "grad_norm": 0.6843993067741394, "learning_rate": 4.5e-05, "loss": 0.6887, "step": 2509 }, { "epoch": 3.2128, "grad_norm": 0.690980076789856, "learning_rate": 4.4997999199679875e-05, "loss": 0.6454, "step": 2510 }, { "epoch": 3.21408, "grad_norm": 0.6931962370872498, "learning_rate": 4.4995998399359747e-05, "loss": 0.7268, "step": 2511 }, { "epoch": 3.21536, "grad_norm": 0.6837199926376343, "learning_rate": 4.499399759903962e-05, "loss": 0.6747, "step": 2512 }, { "epoch": 3.21664, "grad_norm": 0.6582213640213013, "learning_rate": 4.499199679871949e-05, "loss": 0.6761, "step": 2513 }, { "epoch": 3.21792, "grad_norm": 0.6401382088661194, "learning_rate": 4.498999599839936e-05, "loss": 0.6742, "step": 2514 }, { "epoch": 3.2192, "grad_norm": 0.6773951053619385, "learning_rate": 4.4987995198079234e-05, "loss": 0.6328, "step": 2515 }, { "epoch": 3.2204800000000002, "grad_norm": 0.7156820893287659, "learning_rate": 4.4985994397759106e-05, "loss": 0.7116, "step": 2516 }, { "epoch": 3.22176, "grad_norm": 0.6766327619552612, "learning_rate": 4.498399359743898e-05, "loss": 0.66, "step": 2517 }, { "epoch": 3.22304, "grad_norm": 0.6747710704803467, "learning_rate": 4.498199279711885e-05, "loss": 0.6555, "step": 2518 }, { "epoch": 3.22432, "grad_norm": 0.6878660917282104, "learning_rate": 4.497999199679872e-05, "loss": 0.7085, "step": 2519 }, { "epoch": 3.2256, "grad_norm": 0.7145971655845642, "learning_rate": 4.497799119647859e-05, "loss": 0.65, "step": 2520 }, { "epoch": 3.22688, "grad_norm": 0.664560854434967, "learning_rate": 4.4975990396158465e-05, "loss": 0.692, "step": 2521 }, { "epoch": 3.22816, "grad_norm": 0.6799471378326416, "learning_rate": 4.497398959583834e-05, "loss": 0.7305, "step": 2522 }, { "epoch": 3.22944, "grad_norm": 0.7038140892982483, "learning_rate": 4.497198879551821e-05, "loss": 0.6561, "step": 2523 }, { "epoch": 3.23072, "grad_norm": 0.6589545607566833, "learning_rate": 4.496998799519808e-05, "loss": 0.6906, "step": 2524 }, { "epoch": 3.232, "grad_norm": 0.6808072924613953, "learning_rate": 4.496798719487795e-05, "loss": 0.652, "step": 2525 }, { "epoch": 3.23328, "grad_norm": 0.6572796702384949, "learning_rate": 4.4965986394557824e-05, "loss": 0.6557, "step": 2526 }, { "epoch": 3.23456, "grad_norm": 0.6141901016235352, "learning_rate": 4.4963985594237696e-05, "loss": 0.6165, "step": 2527 }, { "epoch": 3.23584, "grad_norm": 0.6819775104522705, "learning_rate": 4.496198479391757e-05, "loss": 0.6397, "step": 2528 }, { "epoch": 3.23712, "grad_norm": 0.6745738387107849, "learning_rate": 4.495998399359744e-05, "loss": 0.6836, "step": 2529 }, { "epoch": 3.2384, "grad_norm": 0.7165648937225342, "learning_rate": 4.495798319327731e-05, "loss": 0.7437, "step": 2530 }, { "epoch": 3.23968, "grad_norm": 0.6725075840950012, "learning_rate": 4.4955982392957184e-05, "loss": 0.6651, "step": 2531 }, { "epoch": 3.24096, "grad_norm": 0.6513887643814087, "learning_rate": 4.4953981592637056e-05, "loss": 0.6218, "step": 2532 }, { "epoch": 3.24224, "grad_norm": 0.7130393385887146, "learning_rate": 4.495198079231693e-05, "loss": 0.7008, "step": 2533 }, { "epoch": 3.24352, "grad_norm": 0.7220726013183594, "learning_rate": 4.49499799919968e-05, "loss": 0.7437, "step": 2534 }, { "epoch": 3.2448, "grad_norm": 0.6959324479103088, "learning_rate": 4.494797919167667e-05, "loss": 0.6412, "step": 2535 }, { "epoch": 3.24608, "grad_norm": 0.6925476789474487, "learning_rate": 4.494597839135655e-05, "loss": 0.671, "step": 2536 }, { "epoch": 3.24736, "grad_norm": 0.6907139420509338, "learning_rate": 4.4943977591036415e-05, "loss": 0.7017, "step": 2537 }, { "epoch": 3.24864, "grad_norm": 0.7118213176727295, "learning_rate": 4.494197679071629e-05, "loss": 0.6902, "step": 2538 }, { "epoch": 3.24992, "grad_norm": 0.6986705660820007, "learning_rate": 4.493997599039616e-05, "loss": 0.6936, "step": 2539 }, { "epoch": 3.2512, "grad_norm": 0.6764949560165405, "learning_rate": 4.493797519007603e-05, "loss": 0.6506, "step": 2540 }, { "epoch": 3.25248, "grad_norm": 0.6302717328071594, "learning_rate": 4.49359743897559e-05, "loss": 0.6249, "step": 2541 }, { "epoch": 3.2537599999999998, "grad_norm": 0.6727375388145447, "learning_rate": 4.4933973589435774e-05, "loss": 0.6644, "step": 2542 }, { "epoch": 3.25504, "grad_norm": 0.6621202826499939, "learning_rate": 4.493197278911565e-05, "loss": 0.6559, "step": 2543 }, { "epoch": 3.25632, "grad_norm": 0.67811119556427, "learning_rate": 4.4929971988795525e-05, "loss": 0.6871, "step": 2544 }, { "epoch": 3.2576, "grad_norm": 0.7005154490470886, "learning_rate": 4.492797118847539e-05, "loss": 0.6732, "step": 2545 }, { "epoch": 3.25888, "grad_norm": 0.6961964964866638, "learning_rate": 4.492597038815526e-05, "loss": 0.6727, "step": 2546 }, { "epoch": 3.26016, "grad_norm": 0.6468445062637329, "learning_rate": 4.4923969587835133e-05, "loss": 0.6521, "step": 2547 }, { "epoch": 3.26144, "grad_norm": 0.6833783984184265, "learning_rate": 4.4921968787515005e-05, "loss": 0.6713, "step": 2548 }, { "epoch": 3.26272, "grad_norm": 0.6983469724655151, "learning_rate": 4.491996798719488e-05, "loss": 0.7341, "step": 2549 }, { "epoch": 3.2640000000000002, "grad_norm": 0.6976143717765808, "learning_rate": 4.4917967186874756e-05, "loss": 0.6902, "step": 2550 }, { "epoch": 3.26528, "grad_norm": 0.6914013028144836, "learning_rate": 4.491596638655463e-05, "loss": 0.6953, "step": 2551 }, { "epoch": 3.26656, "grad_norm": 0.6625963449478149, "learning_rate": 4.49139655862345e-05, "loss": 0.6624, "step": 2552 }, { "epoch": 3.26784, "grad_norm": 0.6785646677017212, "learning_rate": 4.4911964785914365e-05, "loss": 0.6387, "step": 2553 }, { "epoch": 3.26912, "grad_norm": 0.6610428094863892, "learning_rate": 4.4909963985594236e-05, "loss": 0.6896, "step": 2554 }, { "epoch": 3.2704, "grad_norm": 0.7095894813537598, "learning_rate": 4.490796318527411e-05, "loss": 0.6717, "step": 2555 }, { "epoch": 3.27168, "grad_norm": 0.6596770286560059, "learning_rate": 4.490596238495398e-05, "loss": 0.627, "step": 2556 }, { "epoch": 3.27296, "grad_norm": 0.6432579755783081, "learning_rate": 4.490396158463386e-05, "loss": 0.6196, "step": 2557 }, { "epoch": 3.27424, "grad_norm": 0.6961793303489685, "learning_rate": 4.490196078431373e-05, "loss": 0.6624, "step": 2558 }, { "epoch": 3.27552, "grad_norm": 0.6829546093940735, "learning_rate": 4.48999599839936e-05, "loss": 0.6567, "step": 2559 }, { "epoch": 3.2768, "grad_norm": 0.7708462476730347, "learning_rate": 4.4897959183673474e-05, "loss": 0.7334, "step": 2560 }, { "epoch": 3.27808, "grad_norm": 0.6607860326766968, "learning_rate": 4.489595838335334e-05, "loss": 0.6581, "step": 2561 }, { "epoch": 3.27936, "grad_norm": 0.6741865873336792, "learning_rate": 4.489395758303321e-05, "loss": 0.6928, "step": 2562 }, { "epoch": 3.28064, "grad_norm": 0.6427492499351501, "learning_rate": 4.489195678271308e-05, "loss": 0.6294, "step": 2563 }, { "epoch": 3.28192, "grad_norm": 0.6945633292198181, "learning_rate": 4.488995598239296e-05, "loss": 0.7027, "step": 2564 }, { "epoch": 3.2832, "grad_norm": 0.6338651776313782, "learning_rate": 4.4887955182072834e-05, "loss": 0.6274, "step": 2565 }, { "epoch": 3.28448, "grad_norm": 0.6843262314796448, "learning_rate": 4.4885954381752705e-05, "loss": 0.6814, "step": 2566 }, { "epoch": 3.28576, "grad_norm": 0.6916020512580872, "learning_rate": 4.488395358143258e-05, "loss": 0.7082, "step": 2567 }, { "epoch": 3.28704, "grad_norm": 0.6470593214035034, "learning_rate": 4.488195278111245e-05, "loss": 0.6058, "step": 2568 }, { "epoch": 3.28832, "grad_norm": 0.7212356328964233, "learning_rate": 4.4879951980792314e-05, "loss": 0.6994, "step": 2569 }, { "epoch": 3.2896, "grad_norm": 0.6888067722320557, "learning_rate": 4.4877951180472186e-05, "loss": 0.6621, "step": 2570 }, { "epoch": 3.29088, "grad_norm": 0.7158266305923462, "learning_rate": 4.4875950380152065e-05, "loss": 0.6782, "step": 2571 }, { "epoch": 3.29216, "grad_norm": 0.6973638534545898, "learning_rate": 4.4873949579831937e-05, "loss": 0.7213, "step": 2572 }, { "epoch": 3.29344, "grad_norm": 0.7036058306694031, "learning_rate": 4.487194877951181e-05, "loss": 0.6762, "step": 2573 }, { "epoch": 3.29472, "grad_norm": 0.6940634846687317, "learning_rate": 4.486994797919168e-05, "loss": 0.6541, "step": 2574 }, { "epoch": 3.296, "grad_norm": 0.7082141041755676, "learning_rate": 4.486794717887155e-05, "loss": 0.6707, "step": 2575 }, { "epoch": 3.2972799999999998, "grad_norm": 0.6922471523284912, "learning_rate": 4.4865946378551424e-05, "loss": 0.6843, "step": 2576 }, { "epoch": 3.29856, "grad_norm": 0.6800956130027771, "learning_rate": 4.486394557823129e-05, "loss": 0.6028, "step": 2577 }, { "epoch": 3.29984, "grad_norm": 0.6691498160362244, "learning_rate": 4.486194477791117e-05, "loss": 0.6706, "step": 2578 }, { "epoch": 3.30112, "grad_norm": 0.6471302509307861, "learning_rate": 4.485994397759104e-05, "loss": 0.6753, "step": 2579 }, { "epoch": 3.3024, "grad_norm": 0.6529038548469543, "learning_rate": 4.485794317727091e-05, "loss": 0.6006, "step": 2580 }, { "epoch": 3.30368, "grad_norm": 0.6317946314811707, "learning_rate": 4.485594237695078e-05, "loss": 0.6842, "step": 2581 }, { "epoch": 3.30496, "grad_norm": 0.6559657454490662, "learning_rate": 4.4853941576630655e-05, "loss": 0.6628, "step": 2582 }, { "epoch": 3.30624, "grad_norm": 0.6486949324607849, "learning_rate": 4.485194077631053e-05, "loss": 0.6775, "step": 2583 }, { "epoch": 3.3075200000000002, "grad_norm": 0.6692925691604614, "learning_rate": 4.48499399759904e-05, "loss": 0.6721, "step": 2584 }, { "epoch": 3.3088, "grad_norm": 0.6672170162200928, "learning_rate": 4.484793917567027e-05, "loss": 0.6424, "step": 2585 }, { "epoch": 3.31008, "grad_norm": 0.668408215045929, "learning_rate": 4.484593837535014e-05, "loss": 0.6445, "step": 2586 }, { "epoch": 3.31136, "grad_norm": 0.6862883567810059, "learning_rate": 4.4843937575030014e-05, "loss": 0.6994, "step": 2587 }, { "epoch": 3.31264, "grad_norm": 0.6536902189254761, "learning_rate": 4.4841936774709886e-05, "loss": 0.6149, "step": 2588 }, { "epoch": 3.31392, "grad_norm": 0.6626643538475037, "learning_rate": 4.483993597438976e-05, "loss": 0.6631, "step": 2589 }, { "epoch": 3.3152, "grad_norm": 0.6667909622192383, "learning_rate": 4.483793517406963e-05, "loss": 0.6781, "step": 2590 }, { "epoch": 3.31648, "grad_norm": 0.6631030440330505, "learning_rate": 4.48359343737495e-05, "loss": 0.6586, "step": 2591 }, { "epoch": 3.31776, "grad_norm": 0.6788957118988037, "learning_rate": 4.4833933573429374e-05, "loss": 0.6355, "step": 2592 }, { "epoch": 3.31904, "grad_norm": 0.7192303538322449, "learning_rate": 4.4831932773109246e-05, "loss": 0.6597, "step": 2593 }, { "epoch": 3.32032, "grad_norm": 0.7045040726661682, "learning_rate": 4.482993197278912e-05, "loss": 0.7322, "step": 2594 }, { "epoch": 3.3216, "grad_norm": 0.6571533679962158, "learning_rate": 4.482793117246899e-05, "loss": 0.6443, "step": 2595 }, { "epoch": 3.32288, "grad_norm": 0.6708471775054932, "learning_rate": 4.482593037214886e-05, "loss": 0.6391, "step": 2596 }, { "epoch": 3.32416, "grad_norm": 0.6779941916465759, "learning_rate": 4.482392957182873e-05, "loss": 0.6226, "step": 2597 }, { "epoch": 3.32544, "grad_norm": 0.7229152321815491, "learning_rate": 4.4821928771508605e-05, "loss": 0.6434, "step": 2598 }, { "epoch": 3.32672, "grad_norm": 0.7014409303665161, "learning_rate": 4.481992797118848e-05, "loss": 0.7082, "step": 2599 }, { "epoch": 3.328, "grad_norm": 0.6607677340507507, "learning_rate": 4.481792717086835e-05, "loss": 0.6574, "step": 2600 }, { "epoch": 3.32928, "grad_norm": 0.7238780856132507, "learning_rate": 4.481592637054822e-05, "loss": 0.6938, "step": 2601 }, { "epoch": 3.33056, "grad_norm": 0.6686570048332214, "learning_rate": 4.481392557022809e-05, "loss": 0.6405, "step": 2602 }, { "epoch": 3.33184, "grad_norm": 0.708084225654602, "learning_rate": 4.4811924769907964e-05, "loss": 0.6745, "step": 2603 }, { "epoch": 3.33312, "grad_norm": 0.684820830821991, "learning_rate": 4.4809923969587836e-05, "loss": 0.6385, "step": 2604 }, { "epoch": 3.3344, "grad_norm": 0.6804541945457458, "learning_rate": 4.480792316926771e-05, "loss": 0.677, "step": 2605 }, { "epoch": 3.33568, "grad_norm": 0.6551414728164673, "learning_rate": 4.4805922368947586e-05, "loss": 0.6381, "step": 2606 }, { "epoch": 3.33696, "grad_norm": 0.7107576131820679, "learning_rate": 4.480392156862745e-05, "loss": 0.7287, "step": 2607 }, { "epoch": 3.33824, "grad_norm": 0.6644242405891418, "learning_rate": 4.4801920768307323e-05, "loss": 0.6999, "step": 2608 }, { "epoch": 3.33952, "grad_norm": 0.6727672815322876, "learning_rate": 4.4799919967987195e-05, "loss": 0.6438, "step": 2609 }, { "epoch": 3.3407999999999998, "grad_norm": 0.67037433385849, "learning_rate": 4.479791916766707e-05, "loss": 0.6282, "step": 2610 }, { "epoch": 3.34208, "grad_norm": 0.650492250919342, "learning_rate": 4.479591836734694e-05, "loss": 0.6333, "step": 2611 }, { "epoch": 3.34336, "grad_norm": 0.6534383893013, "learning_rate": 4.479391756702681e-05, "loss": 0.671, "step": 2612 }, { "epoch": 3.34464, "grad_norm": 0.6915240287780762, "learning_rate": 4.479191676670669e-05, "loss": 0.6673, "step": 2613 }, { "epoch": 3.34592, "grad_norm": 0.6916387677192688, "learning_rate": 4.478991596638656e-05, "loss": 0.6572, "step": 2614 }, { "epoch": 3.3472, "grad_norm": 0.6631138324737549, "learning_rate": 4.4787915166066426e-05, "loss": 0.6512, "step": 2615 }, { "epoch": 3.34848, "grad_norm": 0.6894577145576477, "learning_rate": 4.47859143657463e-05, "loss": 0.6438, "step": 2616 }, { "epoch": 3.34976, "grad_norm": 0.6938015818595886, "learning_rate": 4.478391356542617e-05, "loss": 0.69, "step": 2617 }, { "epoch": 3.3510400000000002, "grad_norm": 0.6625933647155762, "learning_rate": 4.478191276510604e-05, "loss": 0.6358, "step": 2618 }, { "epoch": 3.35232, "grad_norm": 0.7050860524177551, "learning_rate": 4.4779911964785914e-05, "loss": 0.7098, "step": 2619 }, { "epoch": 3.3536, "grad_norm": 0.7034277319908142, "learning_rate": 4.477791116446579e-05, "loss": 0.7057, "step": 2620 }, { "epoch": 3.35488, "grad_norm": 0.7165651917457581, "learning_rate": 4.4775910364145664e-05, "loss": 0.7058, "step": 2621 }, { "epoch": 3.35616, "grad_norm": 0.6885167956352234, "learning_rate": 4.4773909563825536e-05, "loss": 0.6334, "step": 2622 }, { "epoch": 3.35744, "grad_norm": 0.6467083692550659, "learning_rate": 4.47719087635054e-05, "loss": 0.6097, "step": 2623 }, { "epoch": 3.35872, "grad_norm": 0.7264028787612915, "learning_rate": 4.476990796318527e-05, "loss": 0.7396, "step": 2624 }, { "epoch": 3.36, "grad_norm": 0.7132958769798279, "learning_rate": 4.4767907162865145e-05, "loss": 0.6642, "step": 2625 }, { "epoch": 3.36128, "grad_norm": 0.7386727333068848, "learning_rate": 4.476590636254502e-05, "loss": 0.726, "step": 2626 }, { "epoch": 3.36256, "grad_norm": 0.6800748705863953, "learning_rate": 4.4763905562224895e-05, "loss": 0.6936, "step": 2627 }, { "epoch": 3.36384, "grad_norm": 0.6800684332847595, "learning_rate": 4.476190476190477e-05, "loss": 0.6065, "step": 2628 }, { "epoch": 3.36512, "grad_norm": 0.6520423293113708, "learning_rate": 4.475990396158464e-05, "loss": 0.6725, "step": 2629 }, { "epoch": 3.3664, "grad_norm": 0.6812400221824646, "learning_rate": 4.475790316126451e-05, "loss": 0.6495, "step": 2630 }, { "epoch": 3.36768, "grad_norm": 0.6640177965164185, "learning_rate": 4.4755902360944376e-05, "loss": 0.6885, "step": 2631 }, { "epoch": 3.36896, "grad_norm": 0.6409942507743835, "learning_rate": 4.475390156062425e-05, "loss": 0.6343, "step": 2632 }, { "epoch": 3.37024, "grad_norm": 0.6690536141395569, "learning_rate": 4.475190076030412e-05, "loss": 0.6792, "step": 2633 }, { "epoch": 3.37152, "grad_norm": 0.7020560503005981, "learning_rate": 4.474989995998399e-05, "loss": 0.7442, "step": 2634 }, { "epoch": 3.3728, "grad_norm": 0.698049783706665, "learning_rate": 4.474789915966387e-05, "loss": 0.6836, "step": 2635 }, { "epoch": 3.37408, "grad_norm": 0.6824793815612793, "learning_rate": 4.474589835934374e-05, "loss": 0.6818, "step": 2636 }, { "epoch": 3.37536, "grad_norm": 0.6641604900360107, "learning_rate": 4.4743897559023614e-05, "loss": 0.6497, "step": 2637 }, { "epoch": 3.37664, "grad_norm": 0.6908559203147888, "learning_rate": 4.4741896758703486e-05, "loss": 0.6655, "step": 2638 }, { "epoch": 3.37792, "grad_norm": 0.6793168783187866, "learning_rate": 4.473989595838335e-05, "loss": 0.6648, "step": 2639 }, { "epoch": 3.3792, "grad_norm": 0.7036957144737244, "learning_rate": 4.473789515806322e-05, "loss": 0.6691, "step": 2640 }, { "epoch": 3.38048, "grad_norm": 0.684795081615448, "learning_rate": 4.4735894357743095e-05, "loss": 0.6324, "step": 2641 }, { "epoch": 3.38176, "grad_norm": 0.6578769683837891, "learning_rate": 4.473389355742297e-05, "loss": 0.6746, "step": 2642 }, { "epoch": 3.38304, "grad_norm": 0.6963245272636414, "learning_rate": 4.4731892757102845e-05, "loss": 0.7146, "step": 2643 }, { "epoch": 3.3843199999999998, "grad_norm": 0.6984001398086548, "learning_rate": 4.472989195678272e-05, "loss": 0.6317, "step": 2644 }, { "epoch": 3.3856, "grad_norm": 0.7258270978927612, "learning_rate": 4.472789115646259e-05, "loss": 0.6904, "step": 2645 }, { "epoch": 3.38688, "grad_norm": 0.6447382569313049, "learning_rate": 4.472589035614246e-05, "loss": 0.6477, "step": 2646 }, { "epoch": 3.38816, "grad_norm": 0.6710345149040222, "learning_rate": 4.4723889555822326e-05, "loss": 0.6563, "step": 2647 }, { "epoch": 3.38944, "grad_norm": 0.7310519218444824, "learning_rate": 4.47218887555022e-05, "loss": 0.6717, "step": 2648 }, { "epoch": 3.39072, "grad_norm": 0.7194939851760864, "learning_rate": 4.4719887955182076e-05, "loss": 0.6474, "step": 2649 }, { "epoch": 3.392, "grad_norm": 0.6350496411323547, "learning_rate": 4.471788715486195e-05, "loss": 0.6131, "step": 2650 }, { "epoch": 3.39328, "grad_norm": 0.6812024712562561, "learning_rate": 4.471588635454182e-05, "loss": 0.6778, "step": 2651 }, { "epoch": 3.3945600000000002, "grad_norm": 0.6636951565742493, "learning_rate": 4.471388555422169e-05, "loss": 0.6575, "step": 2652 }, { "epoch": 3.39584, "grad_norm": 0.6647194027900696, "learning_rate": 4.4711884753901564e-05, "loss": 0.6415, "step": 2653 }, { "epoch": 3.39712, "grad_norm": 0.6898042559623718, "learning_rate": 4.4709883953581436e-05, "loss": 0.7263, "step": 2654 }, { "epoch": 3.3984, "grad_norm": 0.7069958448410034, "learning_rate": 4.47078831532613e-05, "loss": 0.6351, "step": 2655 }, { "epoch": 3.39968, "grad_norm": 0.6806748509407043, "learning_rate": 4.470588235294118e-05, "loss": 0.665, "step": 2656 }, { "epoch": 3.40096, "grad_norm": 0.7422452569007874, "learning_rate": 4.470388155262105e-05, "loss": 0.73, "step": 2657 }, { "epoch": 3.40224, "grad_norm": 0.7665037512779236, "learning_rate": 4.470188075230092e-05, "loss": 0.7148, "step": 2658 }, { "epoch": 3.40352, "grad_norm": 0.7186295986175537, "learning_rate": 4.4699879951980795e-05, "loss": 0.6665, "step": 2659 }, { "epoch": 3.4048, "grad_norm": 0.705100417137146, "learning_rate": 4.469787915166067e-05, "loss": 0.6724, "step": 2660 }, { "epoch": 3.40608, "grad_norm": 0.6744673252105713, "learning_rate": 4.469587835134054e-05, "loss": 0.6658, "step": 2661 }, { "epoch": 3.40736, "grad_norm": 0.6781793236732483, "learning_rate": 4.469387755102041e-05, "loss": 0.6664, "step": 2662 }, { "epoch": 3.40864, "grad_norm": 0.6606245040893555, "learning_rate": 4.469187675070028e-05, "loss": 0.6189, "step": 2663 }, { "epoch": 3.40992, "grad_norm": 0.6804131865501404, "learning_rate": 4.4689875950380154e-05, "loss": 0.7092, "step": 2664 }, { "epoch": 3.4112, "grad_norm": 0.6703636050224304, "learning_rate": 4.4687875150060026e-05, "loss": 0.7, "step": 2665 }, { "epoch": 3.41248, "grad_norm": 0.6748145818710327, "learning_rate": 4.46858743497399e-05, "loss": 0.6143, "step": 2666 }, { "epoch": 3.41376, "grad_norm": 0.6967974305152893, "learning_rate": 4.468387354941977e-05, "loss": 0.6877, "step": 2667 }, { "epoch": 3.41504, "grad_norm": 0.6868359446525574, "learning_rate": 4.468187274909964e-05, "loss": 0.6435, "step": 2668 }, { "epoch": 3.41632, "grad_norm": 0.7249009013175964, "learning_rate": 4.4679871948779513e-05, "loss": 0.7003, "step": 2669 }, { "epoch": 3.4176, "grad_norm": 0.6801961064338684, "learning_rate": 4.4677871148459385e-05, "loss": 0.6565, "step": 2670 }, { "epoch": 3.41888, "grad_norm": 0.7236019372940063, "learning_rate": 4.467587034813926e-05, "loss": 0.6586, "step": 2671 }, { "epoch": 3.42016, "grad_norm": 0.6557265520095825, "learning_rate": 4.467386954781913e-05, "loss": 0.6919, "step": 2672 }, { "epoch": 3.42144, "grad_norm": 0.7524291276931763, "learning_rate": 4.4671868747499e-05, "loss": 0.7208, "step": 2673 }, { "epoch": 3.42272, "grad_norm": 0.7023764848709106, "learning_rate": 4.466986794717887e-05, "loss": 0.7148, "step": 2674 }, { "epoch": 3.424, "grad_norm": 0.7110322117805481, "learning_rate": 4.4667867146858745e-05, "loss": 0.7194, "step": 2675 }, { "epoch": 3.42528, "grad_norm": 0.668428361415863, "learning_rate": 4.4665866346538616e-05, "loss": 0.663, "step": 2676 }, { "epoch": 3.42656, "grad_norm": 0.6546775698661804, "learning_rate": 4.4663865546218495e-05, "loss": 0.5899, "step": 2677 }, { "epoch": 3.4278399999999998, "grad_norm": 0.6761878132820129, "learning_rate": 4.466186474589836e-05, "loss": 0.6309, "step": 2678 }, { "epoch": 3.42912, "grad_norm": 0.7158021330833435, "learning_rate": 4.465986394557823e-05, "loss": 0.737, "step": 2679 }, { "epoch": 3.4304, "grad_norm": 0.7490652799606323, "learning_rate": 4.4657863145258104e-05, "loss": 0.6946, "step": 2680 }, { "epoch": 3.43168, "grad_norm": 0.7451316118240356, "learning_rate": 4.4655862344937976e-05, "loss": 0.6851, "step": 2681 }, { "epoch": 3.43296, "grad_norm": 0.6753928065299988, "learning_rate": 4.465386154461785e-05, "loss": 0.6876, "step": 2682 }, { "epoch": 3.43424, "grad_norm": 0.6749107837677002, "learning_rate": 4.465186074429772e-05, "loss": 0.6256, "step": 2683 }, { "epoch": 3.43552, "grad_norm": 0.6775442361831665, "learning_rate": 4.46498599439776e-05, "loss": 0.7097, "step": 2684 }, { "epoch": 3.4368, "grad_norm": 0.7066680192947388, "learning_rate": 4.464785914365747e-05, "loss": 0.6892, "step": 2685 }, { "epoch": 3.4380800000000002, "grad_norm": 0.7244362831115723, "learning_rate": 4.4645858343337335e-05, "loss": 0.6675, "step": 2686 }, { "epoch": 3.4393599999999998, "grad_norm": 0.7037179470062256, "learning_rate": 4.464385754301721e-05, "loss": 0.7126, "step": 2687 }, { "epoch": 3.44064, "grad_norm": 0.6872085332870483, "learning_rate": 4.464185674269708e-05, "loss": 0.6409, "step": 2688 }, { "epoch": 3.44192, "grad_norm": 0.6594861149787903, "learning_rate": 4.463985594237695e-05, "loss": 0.6789, "step": 2689 }, { "epoch": 3.4432, "grad_norm": 0.6587255001068115, "learning_rate": 4.463785514205682e-05, "loss": 0.6327, "step": 2690 }, { "epoch": 3.44448, "grad_norm": 0.6550960540771484, "learning_rate": 4.46358543417367e-05, "loss": 0.5824, "step": 2691 }, { "epoch": 3.44576, "grad_norm": 0.7198725342750549, "learning_rate": 4.463385354141657e-05, "loss": 0.6853, "step": 2692 }, { "epoch": 3.44704, "grad_norm": 0.6870785355567932, "learning_rate": 4.4631852741096445e-05, "loss": 0.6366, "step": 2693 }, { "epoch": 3.44832, "grad_norm": 0.6846055388450623, "learning_rate": 4.462985194077631e-05, "loss": 0.6755, "step": 2694 }, { "epoch": 3.4496, "grad_norm": 0.6860753893852234, "learning_rate": 4.462785114045618e-05, "loss": 0.6605, "step": 2695 }, { "epoch": 3.45088, "grad_norm": 0.7068583369255066, "learning_rate": 4.4625850340136054e-05, "loss": 0.6775, "step": 2696 }, { "epoch": 3.45216, "grad_norm": 0.6669130921363831, "learning_rate": 4.4623849539815925e-05, "loss": 0.674, "step": 2697 }, { "epoch": 3.45344, "grad_norm": 0.6609745025634766, "learning_rate": 4.4621848739495804e-05, "loss": 0.6717, "step": 2698 }, { "epoch": 3.45472, "grad_norm": 0.6721084713935852, "learning_rate": 4.4619847939175676e-05, "loss": 0.6345, "step": 2699 }, { "epoch": 3.456, "grad_norm": 0.6484143733978271, "learning_rate": 4.461784713885555e-05, "loss": 0.6415, "step": 2700 }, { "epoch": 3.45728, "grad_norm": 0.6830167174339294, "learning_rate": 4.461584633853542e-05, "loss": 0.613, "step": 2701 }, { "epoch": 3.45856, "grad_norm": 0.7231410145759583, "learning_rate": 4.4613845538215285e-05, "loss": 0.7048, "step": 2702 }, { "epoch": 3.45984, "grad_norm": 0.6800318956375122, "learning_rate": 4.4611844737895157e-05, "loss": 0.6584, "step": 2703 }, { "epoch": 3.46112, "grad_norm": 0.6745948791503906, "learning_rate": 4.460984393757503e-05, "loss": 0.6266, "step": 2704 }, { "epoch": 3.4624, "grad_norm": 0.6727662086486816, "learning_rate": 4.460784313725491e-05, "loss": 0.6853, "step": 2705 }, { "epoch": 3.46368, "grad_norm": 0.7079063653945923, "learning_rate": 4.460584233693478e-05, "loss": 0.659, "step": 2706 }, { "epoch": 3.46496, "grad_norm": 0.6946644186973572, "learning_rate": 4.460384153661465e-05, "loss": 0.6874, "step": 2707 }, { "epoch": 3.46624, "grad_norm": 0.6658247113227844, "learning_rate": 4.460184073629452e-05, "loss": 0.6348, "step": 2708 }, { "epoch": 3.46752, "grad_norm": 0.6603049635887146, "learning_rate": 4.4599839935974395e-05, "loss": 0.6797, "step": 2709 }, { "epoch": 3.4688, "grad_norm": 0.6833632588386536, "learning_rate": 4.459783913565426e-05, "loss": 0.6698, "step": 2710 }, { "epoch": 3.47008, "grad_norm": 0.6787217855453491, "learning_rate": 4.459583833533413e-05, "loss": 0.6639, "step": 2711 }, { "epoch": 3.47136, "grad_norm": 0.6830318570137024, "learning_rate": 4.459383753501401e-05, "loss": 0.6569, "step": 2712 }, { "epoch": 3.47264, "grad_norm": 0.7140050530433655, "learning_rate": 4.459183673469388e-05, "loss": 0.7267, "step": 2713 }, { "epoch": 3.47392, "grad_norm": 0.7092685103416443, "learning_rate": 4.4589835934373754e-05, "loss": 0.6364, "step": 2714 }, { "epoch": 3.4752, "grad_norm": 0.6709200143814087, "learning_rate": 4.4587835134053626e-05, "loss": 0.6491, "step": 2715 }, { "epoch": 3.47648, "grad_norm": 0.6295605897903442, "learning_rate": 4.45858343337335e-05, "loss": 0.6316, "step": 2716 }, { "epoch": 3.47776, "grad_norm": 0.6191185116767883, "learning_rate": 4.458383353341337e-05, "loss": 0.6765, "step": 2717 }, { "epoch": 3.47904, "grad_norm": 0.6618558168411255, "learning_rate": 4.4581832733093234e-05, "loss": 0.6509, "step": 2718 }, { "epoch": 3.48032, "grad_norm": 0.6785896420478821, "learning_rate": 4.457983193277311e-05, "loss": 0.6234, "step": 2719 }, { "epoch": 3.4816, "grad_norm": 0.7022203803062439, "learning_rate": 4.4577831132452985e-05, "loss": 0.7037, "step": 2720 }, { "epoch": 3.4828799999999998, "grad_norm": 0.6869513988494873, "learning_rate": 4.457583033213286e-05, "loss": 0.6943, "step": 2721 }, { "epoch": 3.48416, "grad_norm": 0.7011018395423889, "learning_rate": 4.457382953181273e-05, "loss": 0.693, "step": 2722 }, { "epoch": 3.48544, "grad_norm": 0.6646562814712524, "learning_rate": 4.45718287314926e-05, "loss": 0.6767, "step": 2723 }, { "epoch": 3.48672, "grad_norm": 0.6229949593544006, "learning_rate": 4.456982793117247e-05, "loss": 0.6177, "step": 2724 }, { "epoch": 3.488, "grad_norm": 0.6661481261253357, "learning_rate": 4.4567827130852344e-05, "loss": 0.6401, "step": 2725 }, { "epoch": 3.48928, "grad_norm": 0.6885098218917847, "learning_rate": 4.4565826330532216e-05, "loss": 0.6593, "step": 2726 }, { "epoch": 3.49056, "grad_norm": 0.6794683933258057, "learning_rate": 4.456382553021209e-05, "loss": 0.641, "step": 2727 }, { "epoch": 3.49184, "grad_norm": 0.6937509775161743, "learning_rate": 4.456182472989196e-05, "loss": 0.7112, "step": 2728 }, { "epoch": 3.4931200000000002, "grad_norm": 0.7058037519454956, "learning_rate": 4.455982392957183e-05, "loss": 0.644, "step": 2729 }, { "epoch": 3.4944, "grad_norm": 0.7502039670944214, "learning_rate": 4.4557823129251704e-05, "loss": 0.6871, "step": 2730 }, { "epoch": 3.49568, "grad_norm": 0.6988638639450073, "learning_rate": 4.4555822328931575e-05, "loss": 0.6788, "step": 2731 }, { "epoch": 3.49696, "grad_norm": 0.7283390760421753, "learning_rate": 4.455382152861145e-05, "loss": 0.6979, "step": 2732 }, { "epoch": 3.49824, "grad_norm": 0.7279435396194458, "learning_rate": 4.455182072829132e-05, "loss": 0.7258, "step": 2733 }, { "epoch": 3.49952, "grad_norm": 0.64103102684021, "learning_rate": 4.454981992797119e-05, "loss": 0.6253, "step": 2734 }, { "epoch": 3.5008, "grad_norm": 0.6883367896080017, "learning_rate": 4.454781912765106e-05, "loss": 0.6615, "step": 2735 }, { "epoch": 3.50208, "grad_norm": 0.6586665511131287, "learning_rate": 4.4545818327330935e-05, "loss": 0.6741, "step": 2736 }, { "epoch": 3.50336, "grad_norm": 0.6852685213088989, "learning_rate": 4.4543817527010807e-05, "loss": 0.7288, "step": 2737 }, { "epoch": 3.50464, "grad_norm": 0.6741801500320435, "learning_rate": 4.454181672669068e-05, "loss": 0.7345, "step": 2738 }, { "epoch": 3.50592, "grad_norm": 0.6715037822723389, "learning_rate": 4.453981592637055e-05, "loss": 0.6525, "step": 2739 }, { "epoch": 3.5072, "grad_norm": 0.6797910332679749, "learning_rate": 4.453781512605042e-05, "loss": 0.668, "step": 2740 }, { "epoch": 3.50848, "grad_norm": 0.7185656428337097, "learning_rate": 4.4535814325730294e-05, "loss": 0.7052, "step": 2741 }, { "epoch": 3.50976, "grad_norm": 0.6579523086547852, "learning_rate": 4.4533813525410166e-05, "loss": 0.6825, "step": 2742 }, { "epoch": 3.51104, "grad_norm": 0.7128697037696838, "learning_rate": 4.453181272509004e-05, "loss": 0.7095, "step": 2743 }, { "epoch": 3.51232, "grad_norm": 0.6932650208473206, "learning_rate": 4.452981192476991e-05, "loss": 0.6745, "step": 2744 }, { "epoch": 3.5136, "grad_norm": 0.7073693871498108, "learning_rate": 4.452781112444978e-05, "loss": 0.6845, "step": 2745 }, { "epoch": 3.51488, "grad_norm": 0.6620619893074036, "learning_rate": 4.452581032412965e-05, "loss": 0.6135, "step": 2746 }, { "epoch": 3.51616, "grad_norm": 0.695382297039032, "learning_rate": 4.4523809523809525e-05, "loss": 0.6533, "step": 2747 }, { "epoch": 3.51744, "grad_norm": 0.6715102195739746, "learning_rate": 4.45218087234894e-05, "loss": 0.6409, "step": 2748 }, { "epoch": 3.51872, "grad_norm": 0.6616403460502625, "learning_rate": 4.451980792316927e-05, "loss": 0.6637, "step": 2749 }, { "epoch": 3.52, "grad_norm": 0.6827410459518433, "learning_rate": 4.451780712284914e-05, "loss": 0.6183, "step": 2750 }, { "epoch": 3.52128, "grad_norm": 0.7313747406005859, "learning_rate": 4.451580632252901e-05, "loss": 0.6647, "step": 2751 }, { "epoch": 3.52256, "grad_norm": 0.6950268149375916, "learning_rate": 4.4513805522208884e-05, "loss": 0.6705, "step": 2752 }, { "epoch": 3.52384, "grad_norm": 0.6453522443771362, "learning_rate": 4.4511804721888756e-05, "loss": 0.666, "step": 2753 }, { "epoch": 3.5251200000000003, "grad_norm": 0.6691879034042358, "learning_rate": 4.450980392156863e-05, "loss": 0.7045, "step": 2754 }, { "epoch": 3.5263999999999998, "grad_norm": 0.7268353700637817, "learning_rate": 4.450780312124851e-05, "loss": 0.6803, "step": 2755 }, { "epoch": 3.52768, "grad_norm": 0.6457922458648682, "learning_rate": 4.450580232092837e-05, "loss": 0.6423, "step": 2756 }, { "epoch": 3.52896, "grad_norm": 0.6515114307403564, "learning_rate": 4.4503801520608244e-05, "loss": 0.7078, "step": 2757 }, { "epoch": 3.53024, "grad_norm": 0.7063488960266113, "learning_rate": 4.4501800720288115e-05, "loss": 0.6547, "step": 2758 }, { "epoch": 3.53152, "grad_norm": 0.7043952345848083, "learning_rate": 4.449979991996799e-05, "loss": 0.6184, "step": 2759 }, { "epoch": 3.5328, "grad_norm": 0.7184996008872986, "learning_rate": 4.449779911964786e-05, "loss": 0.7037, "step": 2760 }, { "epoch": 3.53408, "grad_norm": 0.6916483640670776, "learning_rate": 4.449579831932773e-05, "loss": 0.6334, "step": 2761 }, { "epoch": 3.53536, "grad_norm": 0.7161397337913513, "learning_rate": 4.449379751900761e-05, "loss": 0.654, "step": 2762 }, { "epoch": 3.5366400000000002, "grad_norm": 0.7488949298858643, "learning_rate": 4.449179671868748e-05, "loss": 0.7023, "step": 2763 }, { "epoch": 3.5379199999999997, "grad_norm": 0.7058433890342712, "learning_rate": 4.448979591836735e-05, "loss": 0.6553, "step": 2764 }, { "epoch": 3.5392, "grad_norm": 0.6619923114776611, "learning_rate": 4.448779511804722e-05, "loss": 0.6527, "step": 2765 }, { "epoch": 3.54048, "grad_norm": 0.6837087869644165, "learning_rate": 4.448579431772709e-05, "loss": 0.6352, "step": 2766 }, { "epoch": 3.54176, "grad_norm": 0.7077759504318237, "learning_rate": 4.448379351740696e-05, "loss": 0.6671, "step": 2767 }, { "epoch": 3.54304, "grad_norm": 0.6346209049224854, "learning_rate": 4.4481792717086834e-05, "loss": 0.612, "step": 2768 }, { "epoch": 3.54432, "grad_norm": 0.6402051448822021, "learning_rate": 4.447979191676671e-05, "loss": 0.6399, "step": 2769 }, { "epoch": 3.5456, "grad_norm": 0.6839762926101685, "learning_rate": 4.4477791116446585e-05, "loss": 0.7407, "step": 2770 }, { "epoch": 3.54688, "grad_norm": 0.688944399356842, "learning_rate": 4.4475790316126456e-05, "loss": 0.6762, "step": 2771 }, { "epoch": 3.54816, "grad_norm": 0.642185628414154, "learning_rate": 4.447378951580632e-05, "loss": 0.6375, "step": 2772 }, { "epoch": 3.54944, "grad_norm": 0.6515256762504578, "learning_rate": 4.447178871548619e-05, "loss": 0.5938, "step": 2773 }, { "epoch": 3.55072, "grad_norm": 0.6609624624252319, "learning_rate": 4.4469787915166065e-05, "loss": 0.7017, "step": 2774 }, { "epoch": 3.552, "grad_norm": 0.6748828291893005, "learning_rate": 4.446778711484594e-05, "loss": 0.6911, "step": 2775 }, { "epoch": 3.55328, "grad_norm": 0.7154529094696045, "learning_rate": 4.4465786314525816e-05, "loss": 0.6708, "step": 2776 }, { "epoch": 3.55456, "grad_norm": 0.6632035374641418, "learning_rate": 4.446378551420569e-05, "loss": 0.6461, "step": 2777 }, { "epoch": 3.55584, "grad_norm": 0.7373719811439514, "learning_rate": 4.446178471388556e-05, "loss": 0.7789, "step": 2778 }, { "epoch": 3.55712, "grad_norm": 0.7006744146347046, "learning_rate": 4.445978391356543e-05, "loss": 0.6923, "step": 2779 }, { "epoch": 3.5584, "grad_norm": 0.699441134929657, "learning_rate": 4.4457783113245296e-05, "loss": 0.7505, "step": 2780 }, { "epoch": 3.55968, "grad_norm": 0.6537452936172485, "learning_rate": 4.445578231292517e-05, "loss": 0.671, "step": 2781 }, { "epoch": 3.56096, "grad_norm": 0.6859011650085449, "learning_rate": 4.445378151260504e-05, "loss": 0.6701, "step": 2782 }, { "epoch": 3.56224, "grad_norm": 0.6629458069801331, "learning_rate": 4.445178071228492e-05, "loss": 0.6463, "step": 2783 }, { "epoch": 3.56352, "grad_norm": 0.6698895692825317, "learning_rate": 4.444977991196479e-05, "loss": 0.6575, "step": 2784 }, { "epoch": 3.5648, "grad_norm": 0.7185133695602417, "learning_rate": 4.444777911164466e-05, "loss": 0.7278, "step": 2785 }, { "epoch": 3.56608, "grad_norm": 0.699368417263031, "learning_rate": 4.4445778311324534e-05, "loss": 0.7271, "step": 2786 }, { "epoch": 3.56736, "grad_norm": 0.6818522810935974, "learning_rate": 4.4443777511004406e-05, "loss": 0.6762, "step": 2787 }, { "epoch": 3.5686400000000003, "grad_norm": 0.6821078062057495, "learning_rate": 4.444177671068427e-05, "loss": 0.6452, "step": 2788 }, { "epoch": 3.5699199999999998, "grad_norm": 0.6925137042999268, "learning_rate": 4.443977591036414e-05, "loss": 0.6354, "step": 2789 }, { "epoch": 3.5712, "grad_norm": 0.6931013464927673, "learning_rate": 4.443777511004402e-05, "loss": 0.6438, "step": 2790 }, { "epoch": 3.57248, "grad_norm": 0.6680043935775757, "learning_rate": 4.4435774309723894e-05, "loss": 0.6732, "step": 2791 }, { "epoch": 3.57376, "grad_norm": 0.7447218894958496, "learning_rate": 4.4433773509403765e-05, "loss": 0.7314, "step": 2792 }, { "epoch": 3.57504, "grad_norm": 0.6944176554679871, "learning_rate": 4.443177270908364e-05, "loss": 0.6333, "step": 2793 }, { "epoch": 3.57632, "grad_norm": 0.7107727527618408, "learning_rate": 4.442977190876351e-05, "loss": 0.673, "step": 2794 }, { "epoch": 3.5776, "grad_norm": 0.6927473545074463, "learning_rate": 4.442777110844338e-05, "loss": 0.6667, "step": 2795 }, { "epoch": 3.57888, "grad_norm": 0.7122114896774292, "learning_rate": 4.4425770308123246e-05, "loss": 0.6544, "step": 2796 }, { "epoch": 3.5801600000000002, "grad_norm": 0.6794743537902832, "learning_rate": 4.4423769507803125e-05, "loss": 0.6743, "step": 2797 }, { "epoch": 3.5814399999999997, "grad_norm": 0.6645591259002686, "learning_rate": 4.4421768707482997e-05, "loss": 0.7121, "step": 2798 }, { "epoch": 3.58272, "grad_norm": 0.6730340123176575, "learning_rate": 4.441976790716287e-05, "loss": 0.6619, "step": 2799 }, { "epoch": 3.584, "grad_norm": 0.6491687297821045, "learning_rate": 4.441776710684274e-05, "loss": 0.6227, "step": 2800 }, { "epoch": 3.58528, "grad_norm": 0.698142945766449, "learning_rate": 4.441576630652261e-05, "loss": 0.6708, "step": 2801 }, { "epoch": 3.58656, "grad_norm": 0.7014725804328918, "learning_rate": 4.4413765506202484e-05, "loss": 0.6487, "step": 2802 }, { "epoch": 3.58784, "grad_norm": 0.6868982315063477, "learning_rate": 4.4411764705882356e-05, "loss": 0.719, "step": 2803 }, { "epoch": 3.58912, "grad_norm": 0.6748588681221008, "learning_rate": 4.440976390556223e-05, "loss": 0.6158, "step": 2804 }, { "epoch": 3.5904, "grad_norm": 0.7001504898071289, "learning_rate": 4.44077631052421e-05, "loss": 0.6752, "step": 2805 }, { "epoch": 3.59168, "grad_norm": 0.6581389904022217, "learning_rate": 4.440576230492197e-05, "loss": 0.6909, "step": 2806 }, { "epoch": 3.59296, "grad_norm": 0.6843972206115723, "learning_rate": 4.440376150460184e-05, "loss": 0.7346, "step": 2807 }, { "epoch": 3.59424, "grad_norm": 0.6761201620101929, "learning_rate": 4.4401760704281715e-05, "loss": 0.6827, "step": 2808 }, { "epoch": 3.59552, "grad_norm": 0.7041255235671997, "learning_rate": 4.439975990396159e-05, "loss": 0.6768, "step": 2809 }, { "epoch": 3.5968, "grad_norm": 0.650201141834259, "learning_rate": 4.439775910364146e-05, "loss": 0.6364, "step": 2810 }, { "epoch": 3.59808, "grad_norm": 0.7019467353820801, "learning_rate": 4.439575830332133e-05, "loss": 0.6991, "step": 2811 }, { "epoch": 3.59936, "grad_norm": 0.6987199783325195, "learning_rate": 4.43937575030012e-05, "loss": 0.6525, "step": 2812 }, { "epoch": 3.60064, "grad_norm": 0.6764307618141174, "learning_rate": 4.4391756702681074e-05, "loss": 0.6271, "step": 2813 }, { "epoch": 3.60192, "grad_norm": 0.6464497447013855, "learning_rate": 4.4389755902360946e-05, "loss": 0.6348, "step": 2814 }, { "epoch": 3.6032, "grad_norm": 0.704531192779541, "learning_rate": 4.438775510204082e-05, "loss": 0.6944, "step": 2815 }, { "epoch": 3.60448, "grad_norm": 0.7004350423812866, "learning_rate": 4.438575430172069e-05, "loss": 0.6299, "step": 2816 }, { "epoch": 3.60576, "grad_norm": 0.6948818564414978, "learning_rate": 4.438375350140056e-05, "loss": 0.6662, "step": 2817 }, { "epoch": 3.60704, "grad_norm": 0.668674647808075, "learning_rate": 4.4381752701080434e-05, "loss": 0.6203, "step": 2818 }, { "epoch": 3.60832, "grad_norm": 0.6800994277000427, "learning_rate": 4.4379751900760306e-05, "loss": 0.6619, "step": 2819 }, { "epoch": 3.6096, "grad_norm": 0.667832612991333, "learning_rate": 4.437775110044018e-05, "loss": 0.6441, "step": 2820 }, { "epoch": 3.61088, "grad_norm": 0.695292592048645, "learning_rate": 4.437575030012005e-05, "loss": 0.7054, "step": 2821 }, { "epoch": 3.6121600000000003, "grad_norm": 0.7079086303710938, "learning_rate": 4.437374949979992e-05, "loss": 0.6772, "step": 2822 }, { "epoch": 3.6134399999999998, "grad_norm": 0.6839944124221802, "learning_rate": 4.437174869947979e-05, "loss": 0.6553, "step": 2823 }, { "epoch": 3.61472, "grad_norm": 0.6628500819206238, "learning_rate": 4.4369747899159665e-05, "loss": 0.6787, "step": 2824 }, { "epoch": 3.616, "grad_norm": 0.6501266360282898, "learning_rate": 4.4367747098839543e-05, "loss": 0.6575, "step": 2825 }, { "epoch": 3.61728, "grad_norm": 0.6804488301277161, "learning_rate": 4.436574629851941e-05, "loss": 0.647, "step": 2826 }, { "epoch": 3.61856, "grad_norm": 0.7033595442771912, "learning_rate": 4.436374549819928e-05, "loss": 0.6988, "step": 2827 }, { "epoch": 3.61984, "grad_norm": 0.6809185147285461, "learning_rate": 4.436174469787915e-05, "loss": 0.6726, "step": 2828 }, { "epoch": 3.62112, "grad_norm": 0.7250016927719116, "learning_rate": 4.4359743897559024e-05, "loss": 0.6378, "step": 2829 }, { "epoch": 3.6224, "grad_norm": 0.6764475703239441, "learning_rate": 4.4357743097238896e-05, "loss": 0.6564, "step": 2830 }, { "epoch": 3.6236800000000002, "grad_norm": 0.7175212502479553, "learning_rate": 4.435574229691877e-05, "loss": 0.7327, "step": 2831 }, { "epoch": 3.6249599999999997, "grad_norm": 0.6657410264015198, "learning_rate": 4.4353741496598646e-05, "loss": 0.6475, "step": 2832 }, { "epoch": 3.62624, "grad_norm": 0.6739627122879028, "learning_rate": 4.435174069627852e-05, "loss": 0.6444, "step": 2833 }, { "epoch": 3.62752, "grad_norm": 0.7084604501724243, "learning_rate": 4.434973989595838e-05, "loss": 0.6923, "step": 2834 }, { "epoch": 3.6288, "grad_norm": 0.7034947276115417, "learning_rate": 4.4347739095638255e-05, "loss": 0.696, "step": 2835 }, { "epoch": 3.63008, "grad_norm": 0.6603802442550659, "learning_rate": 4.434573829531813e-05, "loss": 0.6625, "step": 2836 }, { "epoch": 3.63136, "grad_norm": 0.6719616055488586, "learning_rate": 4.4343737494998e-05, "loss": 0.6623, "step": 2837 }, { "epoch": 3.63264, "grad_norm": 0.6790746450424194, "learning_rate": 4.434173669467787e-05, "loss": 0.7016, "step": 2838 }, { "epoch": 3.63392, "grad_norm": 0.6688959002494812, "learning_rate": 4.433973589435775e-05, "loss": 0.6411, "step": 2839 }, { "epoch": 3.6352, "grad_norm": 0.7280130386352539, "learning_rate": 4.433773509403762e-05, "loss": 0.7061, "step": 2840 }, { "epoch": 3.63648, "grad_norm": 0.6903562545776367, "learning_rate": 4.433573429371749e-05, "loss": 0.6433, "step": 2841 }, { "epoch": 3.63776, "grad_norm": 0.713874101638794, "learning_rate": 4.433373349339736e-05, "loss": 0.6956, "step": 2842 }, { "epoch": 3.63904, "grad_norm": 0.6836928129196167, "learning_rate": 4.433173269307723e-05, "loss": 0.6829, "step": 2843 }, { "epoch": 3.64032, "grad_norm": 0.6967549920082092, "learning_rate": 4.43297318927571e-05, "loss": 0.6793, "step": 2844 }, { "epoch": 3.6416, "grad_norm": 0.7121291160583496, "learning_rate": 4.4327731092436974e-05, "loss": 0.6855, "step": 2845 }, { "epoch": 3.64288, "grad_norm": 0.6488426327705383, "learning_rate": 4.432573029211685e-05, "loss": 0.6063, "step": 2846 }, { "epoch": 3.64416, "grad_norm": 0.669741153717041, "learning_rate": 4.4323729491796724e-05, "loss": 0.6383, "step": 2847 }, { "epoch": 3.64544, "grad_norm": 0.6769450306892395, "learning_rate": 4.4321728691476596e-05, "loss": 0.6116, "step": 2848 }, { "epoch": 3.64672, "grad_norm": 0.6796509623527527, "learning_rate": 4.431972789115647e-05, "loss": 0.6567, "step": 2849 }, { "epoch": 3.648, "grad_norm": 0.6543831825256348, "learning_rate": 4.431772709083633e-05, "loss": 0.6122, "step": 2850 }, { "epoch": 3.64928, "grad_norm": 0.6729878187179565, "learning_rate": 4.4315726290516205e-05, "loss": 0.6636, "step": 2851 }, { "epoch": 3.65056, "grad_norm": 0.7151674628257751, "learning_rate": 4.431372549019608e-05, "loss": 0.6863, "step": 2852 }, { "epoch": 3.65184, "grad_norm": 0.6241359710693359, "learning_rate": 4.4311724689875955e-05, "loss": 0.6208, "step": 2853 }, { "epoch": 3.65312, "grad_norm": 0.6886858940124512, "learning_rate": 4.430972388955583e-05, "loss": 0.679, "step": 2854 }, { "epoch": 3.6544, "grad_norm": 0.7163287997245789, "learning_rate": 4.43077230892357e-05, "loss": 0.6783, "step": 2855 }, { "epoch": 3.6556800000000003, "grad_norm": 0.698589026927948, "learning_rate": 4.430572228891557e-05, "loss": 0.6695, "step": 2856 }, { "epoch": 3.6569599999999998, "grad_norm": 0.7296429872512817, "learning_rate": 4.430372148859544e-05, "loss": 0.7372, "step": 2857 }, { "epoch": 3.65824, "grad_norm": 0.6619814038276672, "learning_rate": 4.430172068827531e-05, "loss": 0.6973, "step": 2858 }, { "epoch": 3.65952, "grad_norm": 0.6731798052787781, "learning_rate": 4.429971988795518e-05, "loss": 0.652, "step": 2859 }, { "epoch": 3.6608, "grad_norm": 0.7068068981170654, "learning_rate": 4.429771908763505e-05, "loss": 0.715, "step": 2860 }, { "epoch": 3.66208, "grad_norm": 0.6874963641166687, "learning_rate": 4.429571828731493e-05, "loss": 0.7342, "step": 2861 }, { "epoch": 3.66336, "grad_norm": 0.6647247672080994, "learning_rate": 4.42937174869948e-05, "loss": 0.6638, "step": 2862 }, { "epoch": 3.66464, "grad_norm": 0.6708807349205017, "learning_rate": 4.4291716686674674e-05, "loss": 0.6494, "step": 2863 }, { "epoch": 3.66592, "grad_norm": 0.6945130825042725, "learning_rate": 4.4289715886354546e-05, "loss": 0.6974, "step": 2864 }, { "epoch": 3.6672000000000002, "grad_norm": 0.6519247889518738, "learning_rate": 4.428771508603442e-05, "loss": 0.5756, "step": 2865 }, { "epoch": 3.6684799999999997, "grad_norm": 0.6893465518951416, "learning_rate": 4.428571428571428e-05, "loss": 0.6507, "step": 2866 }, { "epoch": 3.66976, "grad_norm": 0.6804561018943787, "learning_rate": 4.4283713485394155e-05, "loss": 0.6864, "step": 2867 }, { "epoch": 3.67104, "grad_norm": 0.7122829556465149, "learning_rate": 4.428171268507403e-05, "loss": 0.7419, "step": 2868 }, { "epoch": 3.67232, "grad_norm": 0.64589524269104, "learning_rate": 4.4279711884753905e-05, "loss": 0.6927, "step": 2869 }, { "epoch": 3.6736, "grad_norm": 0.6762197613716125, "learning_rate": 4.427771108443378e-05, "loss": 0.6725, "step": 2870 }, { "epoch": 3.67488, "grad_norm": 0.6425807476043701, "learning_rate": 4.427571028411365e-05, "loss": 0.631, "step": 2871 }, { "epoch": 3.67616, "grad_norm": 0.6912189722061157, "learning_rate": 4.427370948379352e-05, "loss": 0.6934, "step": 2872 }, { "epoch": 3.67744, "grad_norm": 0.6705701947212219, "learning_rate": 4.427170868347339e-05, "loss": 0.6638, "step": 2873 }, { "epoch": 3.67872, "grad_norm": 0.7085809111595154, "learning_rate": 4.426970788315326e-05, "loss": 0.6887, "step": 2874 }, { "epoch": 3.68, "grad_norm": 0.683226466178894, "learning_rate": 4.4267707082833136e-05, "loss": 0.658, "step": 2875 }, { "epoch": 3.68128, "grad_norm": 0.6895299553871155, "learning_rate": 4.426570628251301e-05, "loss": 0.638, "step": 2876 }, { "epoch": 3.68256, "grad_norm": 0.6925261616706848, "learning_rate": 4.426370548219288e-05, "loss": 0.6365, "step": 2877 }, { "epoch": 3.68384, "grad_norm": 0.7004500031471252, "learning_rate": 4.426170468187275e-05, "loss": 0.6739, "step": 2878 }, { "epoch": 3.68512, "grad_norm": 0.7290331125259399, "learning_rate": 4.4259703881552624e-05, "loss": 0.6495, "step": 2879 }, { "epoch": 3.6864, "grad_norm": 0.6692587733268738, "learning_rate": 4.4257703081232496e-05, "loss": 0.6887, "step": 2880 }, { "epoch": 3.68768, "grad_norm": 0.6933017373085022, "learning_rate": 4.425570228091237e-05, "loss": 0.6722, "step": 2881 }, { "epoch": 3.68896, "grad_norm": 0.707172691822052, "learning_rate": 4.425370148059224e-05, "loss": 0.7151, "step": 2882 }, { "epoch": 3.69024, "grad_norm": 0.6823884844779968, "learning_rate": 4.425170068027211e-05, "loss": 0.6213, "step": 2883 }, { "epoch": 3.69152, "grad_norm": 0.7094587087631226, "learning_rate": 4.424969987995198e-05, "loss": 0.7033, "step": 2884 }, { "epoch": 3.6928, "grad_norm": 0.6758087277412415, "learning_rate": 4.4247699079631855e-05, "loss": 0.687, "step": 2885 }, { "epoch": 3.69408, "grad_norm": 0.6843044757843018, "learning_rate": 4.424569827931173e-05, "loss": 0.6536, "step": 2886 }, { "epoch": 3.69536, "grad_norm": 0.7044915556907654, "learning_rate": 4.42436974789916e-05, "loss": 0.7335, "step": 2887 }, { "epoch": 3.69664, "grad_norm": 0.6591890454292297, "learning_rate": 4.424169667867147e-05, "loss": 0.645, "step": 2888 }, { "epoch": 3.69792, "grad_norm": 0.7025469541549683, "learning_rate": 4.423969587835134e-05, "loss": 0.6961, "step": 2889 }, { "epoch": 3.6992000000000003, "grad_norm": 0.663060188293457, "learning_rate": 4.4237695078031214e-05, "loss": 0.6113, "step": 2890 }, { "epoch": 3.7004799999999998, "grad_norm": 0.6972171068191528, "learning_rate": 4.4235694277711086e-05, "loss": 0.6683, "step": 2891 }, { "epoch": 3.70176, "grad_norm": 0.7062972187995911, "learning_rate": 4.423369347739096e-05, "loss": 0.7317, "step": 2892 }, { "epoch": 3.70304, "grad_norm": 0.6868116855621338, "learning_rate": 4.423169267707083e-05, "loss": 0.6834, "step": 2893 }, { "epoch": 3.70432, "grad_norm": 0.6620085835456848, "learning_rate": 4.42296918767507e-05, "loss": 0.6806, "step": 2894 }, { "epoch": 3.7056, "grad_norm": 0.6830658912658691, "learning_rate": 4.4227691076430573e-05, "loss": 0.6771, "step": 2895 }, { "epoch": 3.70688, "grad_norm": 0.6770226359367371, "learning_rate": 4.4225690276110445e-05, "loss": 0.6476, "step": 2896 }, { "epoch": 3.70816, "grad_norm": 0.6751776933670044, "learning_rate": 4.422368947579032e-05, "loss": 0.6402, "step": 2897 }, { "epoch": 3.70944, "grad_norm": 0.7226163148880005, "learning_rate": 4.422168867547019e-05, "loss": 0.7128, "step": 2898 }, { "epoch": 3.7107200000000002, "grad_norm": 0.6574375033378601, "learning_rate": 4.421968787515006e-05, "loss": 0.7031, "step": 2899 }, { "epoch": 3.7119999999999997, "grad_norm": 0.6752791404724121, "learning_rate": 4.421768707482993e-05, "loss": 0.7141, "step": 2900 }, { "epoch": 3.71328, "grad_norm": 0.6757639646530151, "learning_rate": 4.4215686274509805e-05, "loss": 0.707, "step": 2901 }, { "epoch": 3.71456, "grad_norm": 0.6414822936058044, "learning_rate": 4.4213685474189676e-05, "loss": 0.6637, "step": 2902 }, { "epoch": 3.71584, "grad_norm": 0.7052628397941589, "learning_rate": 4.4211684673869555e-05, "loss": 0.6547, "step": 2903 }, { "epoch": 3.71712, "grad_norm": 0.6637598872184753, "learning_rate": 4.420968387354942e-05, "loss": 0.6694, "step": 2904 }, { "epoch": 3.7184, "grad_norm": 0.684610903263092, "learning_rate": 4.420768307322929e-05, "loss": 0.6892, "step": 2905 }, { "epoch": 3.71968, "grad_norm": 0.6572564244270325, "learning_rate": 4.4205682272909164e-05, "loss": 0.6579, "step": 2906 }, { "epoch": 3.72096, "grad_norm": 0.6893813014030457, "learning_rate": 4.4203681472589036e-05, "loss": 0.6612, "step": 2907 }, { "epoch": 3.72224, "grad_norm": 0.6515512466430664, "learning_rate": 4.420168067226891e-05, "loss": 0.6371, "step": 2908 }, { "epoch": 3.72352, "grad_norm": 0.7329177260398865, "learning_rate": 4.419967987194878e-05, "loss": 0.7172, "step": 2909 }, { "epoch": 3.7248, "grad_norm": 0.6419927477836609, "learning_rate": 4.419767907162866e-05, "loss": 0.6113, "step": 2910 }, { "epoch": 3.72608, "grad_norm": 0.6344096064567566, "learning_rate": 4.419567827130853e-05, "loss": 0.6277, "step": 2911 }, { "epoch": 3.72736, "grad_norm": 0.7253386974334717, "learning_rate": 4.4193677470988395e-05, "loss": 0.6749, "step": 2912 }, { "epoch": 3.72864, "grad_norm": 0.6708924174308777, "learning_rate": 4.419167667066827e-05, "loss": 0.6019, "step": 2913 }, { "epoch": 3.72992, "grad_norm": 0.6822198033332825, "learning_rate": 4.418967587034814e-05, "loss": 0.6393, "step": 2914 }, { "epoch": 3.7312, "grad_norm": 0.6997161507606506, "learning_rate": 4.418767507002801e-05, "loss": 0.668, "step": 2915 }, { "epoch": 3.73248, "grad_norm": 0.67588210105896, "learning_rate": 4.418567426970788e-05, "loss": 0.6117, "step": 2916 }, { "epoch": 3.73376, "grad_norm": 0.7122563123703003, "learning_rate": 4.418367346938776e-05, "loss": 0.6749, "step": 2917 }, { "epoch": 3.73504, "grad_norm": 0.681527316570282, "learning_rate": 4.418167266906763e-05, "loss": 0.682, "step": 2918 }, { "epoch": 3.73632, "grad_norm": 0.6719216704368591, "learning_rate": 4.4179671868747505e-05, "loss": 0.684, "step": 2919 }, { "epoch": 3.7376, "grad_norm": 0.6405019164085388, "learning_rate": 4.417767106842737e-05, "loss": 0.6527, "step": 2920 }, { "epoch": 3.73888, "grad_norm": 0.6861347556114197, "learning_rate": 4.417567026810724e-05, "loss": 0.6983, "step": 2921 }, { "epoch": 3.74016, "grad_norm": 0.6605308055877686, "learning_rate": 4.4173669467787114e-05, "loss": 0.6757, "step": 2922 }, { "epoch": 3.74144, "grad_norm": 0.6555644869804382, "learning_rate": 4.4171668667466985e-05, "loss": 0.681, "step": 2923 }, { "epoch": 3.7427200000000003, "grad_norm": 0.6855587363243103, "learning_rate": 4.4169667867146864e-05, "loss": 0.6807, "step": 2924 }, { "epoch": 3.7439999999999998, "grad_norm": 0.7204174399375916, "learning_rate": 4.4167667066826736e-05, "loss": 0.7226, "step": 2925 }, { "epoch": 3.74528, "grad_norm": 0.6882557272911072, "learning_rate": 4.416566626650661e-05, "loss": 0.7127, "step": 2926 }, { "epoch": 3.74656, "grad_norm": 0.6996170878410339, "learning_rate": 4.416366546618648e-05, "loss": 0.6309, "step": 2927 }, { "epoch": 3.74784, "grad_norm": 0.6991925239562988, "learning_rate": 4.4161664665866345e-05, "loss": 0.6571, "step": 2928 }, { "epoch": 3.74912, "grad_norm": 0.7202059030532837, "learning_rate": 4.4159663865546217e-05, "loss": 0.6948, "step": 2929 }, { "epoch": 3.7504, "grad_norm": 0.7513526678085327, "learning_rate": 4.415766306522609e-05, "loss": 0.7387, "step": 2930 }, { "epoch": 3.75168, "grad_norm": 0.6940328478813171, "learning_rate": 4.415566226490597e-05, "loss": 0.7083, "step": 2931 }, { "epoch": 3.75296, "grad_norm": 0.665745198726654, "learning_rate": 4.415366146458584e-05, "loss": 0.6208, "step": 2932 }, { "epoch": 3.7542400000000002, "grad_norm": 0.7266408801078796, "learning_rate": 4.415166066426571e-05, "loss": 0.6952, "step": 2933 }, { "epoch": 3.7555199999999997, "grad_norm": 0.7092468738555908, "learning_rate": 4.414965986394558e-05, "loss": 0.6157, "step": 2934 }, { "epoch": 3.7568, "grad_norm": 0.6691955327987671, "learning_rate": 4.4147659063625454e-05, "loss": 0.644, "step": 2935 }, { "epoch": 3.75808, "grad_norm": 0.679655909538269, "learning_rate": 4.414565826330532e-05, "loss": 0.6441, "step": 2936 }, { "epoch": 3.75936, "grad_norm": 0.7097312211990356, "learning_rate": 4.414365746298519e-05, "loss": 0.7353, "step": 2937 }, { "epoch": 3.76064, "grad_norm": 0.730174720287323, "learning_rate": 4.414165666266507e-05, "loss": 0.6899, "step": 2938 }, { "epoch": 3.76192, "grad_norm": 0.6965400576591492, "learning_rate": 4.413965586234494e-05, "loss": 0.654, "step": 2939 }, { "epoch": 3.7632, "grad_norm": 0.7156361937522888, "learning_rate": 4.4137655062024814e-05, "loss": 0.6471, "step": 2940 }, { "epoch": 3.76448, "grad_norm": 0.7233887910842896, "learning_rate": 4.4135654261704686e-05, "loss": 0.6766, "step": 2941 }, { "epoch": 3.76576, "grad_norm": 0.708551287651062, "learning_rate": 4.413365346138456e-05, "loss": 0.6497, "step": 2942 }, { "epoch": 3.76704, "grad_norm": 0.6958677768707275, "learning_rate": 4.413165266106443e-05, "loss": 0.6522, "step": 2943 }, { "epoch": 3.76832, "grad_norm": 0.693276047706604, "learning_rate": 4.4129651860744294e-05, "loss": 0.6597, "step": 2944 }, { "epoch": 3.7696, "grad_norm": 0.7062802910804749, "learning_rate": 4.412765106042417e-05, "loss": 0.6558, "step": 2945 }, { "epoch": 3.77088, "grad_norm": 0.7109881639480591, "learning_rate": 4.4125650260104045e-05, "loss": 0.6426, "step": 2946 }, { "epoch": 3.77216, "grad_norm": 0.6618438959121704, "learning_rate": 4.412364945978392e-05, "loss": 0.6421, "step": 2947 }, { "epoch": 3.77344, "grad_norm": 0.6410247683525085, "learning_rate": 4.412164865946379e-05, "loss": 0.644, "step": 2948 }, { "epoch": 3.77472, "grad_norm": 0.705467939376831, "learning_rate": 4.411964785914366e-05, "loss": 0.6873, "step": 2949 }, { "epoch": 3.776, "grad_norm": 0.6557707190513611, "learning_rate": 4.411764705882353e-05, "loss": 0.6677, "step": 2950 }, { "epoch": 3.77728, "grad_norm": 0.6946511268615723, "learning_rate": 4.4115646258503404e-05, "loss": 0.6773, "step": 2951 }, { "epoch": 3.77856, "grad_norm": 0.6707186102867126, "learning_rate": 4.4113645458183276e-05, "loss": 0.6772, "step": 2952 }, { "epoch": 3.77984, "grad_norm": 0.6657763123512268, "learning_rate": 4.411164465786315e-05, "loss": 0.6664, "step": 2953 }, { "epoch": 3.78112, "grad_norm": 0.6746175289154053, "learning_rate": 4.410964385754302e-05, "loss": 0.6286, "step": 2954 }, { "epoch": 3.7824, "grad_norm": 0.6620097756385803, "learning_rate": 4.410764305722289e-05, "loss": 0.6133, "step": 2955 }, { "epoch": 3.78368, "grad_norm": 0.6786773204803467, "learning_rate": 4.4105642256902763e-05, "loss": 0.6386, "step": 2956 }, { "epoch": 3.78496, "grad_norm": 0.6965882182121277, "learning_rate": 4.4103641456582635e-05, "loss": 0.6586, "step": 2957 }, { "epoch": 3.7862400000000003, "grad_norm": 0.7074950337409973, "learning_rate": 4.410164065626251e-05, "loss": 0.6971, "step": 2958 }, { "epoch": 3.7875199999999998, "grad_norm": 0.6736770272254944, "learning_rate": 4.409963985594238e-05, "loss": 0.6395, "step": 2959 }, { "epoch": 3.7888, "grad_norm": 0.736041784286499, "learning_rate": 4.409763905562225e-05, "loss": 0.6891, "step": 2960 }, { "epoch": 3.79008, "grad_norm": 0.6859921813011169, "learning_rate": 4.409563825530212e-05, "loss": 0.6503, "step": 2961 }, { "epoch": 3.79136, "grad_norm": 0.7309426069259644, "learning_rate": 4.4093637454981995e-05, "loss": 0.7584, "step": 2962 }, { "epoch": 3.79264, "grad_norm": 0.6685876846313477, "learning_rate": 4.4091636654661866e-05, "loss": 0.6737, "step": 2963 }, { "epoch": 3.79392, "grad_norm": 0.6798075437545776, "learning_rate": 4.408963585434174e-05, "loss": 0.6157, "step": 2964 }, { "epoch": 3.7952, "grad_norm": 0.677910327911377, "learning_rate": 4.408763505402161e-05, "loss": 0.6221, "step": 2965 }, { "epoch": 3.79648, "grad_norm": 0.7607006430625916, "learning_rate": 4.408563425370149e-05, "loss": 0.7309, "step": 2966 }, { "epoch": 3.7977600000000002, "grad_norm": 0.6692076921463013, "learning_rate": 4.4083633453381354e-05, "loss": 0.6585, "step": 2967 }, { "epoch": 3.7990399999999998, "grad_norm": 0.6770291328430176, "learning_rate": 4.4081632653061226e-05, "loss": 0.6702, "step": 2968 }, { "epoch": 3.80032, "grad_norm": 0.7067806720733643, "learning_rate": 4.40796318527411e-05, "loss": 0.6986, "step": 2969 }, { "epoch": 3.8016, "grad_norm": 0.6886568069458008, "learning_rate": 4.407763105242097e-05, "loss": 0.7016, "step": 2970 }, { "epoch": 3.80288, "grad_norm": 0.6451705694198608, "learning_rate": 4.407563025210084e-05, "loss": 0.6291, "step": 2971 }, { "epoch": 3.80416, "grad_norm": 0.663578450679779, "learning_rate": 4.407362945178071e-05, "loss": 0.6653, "step": 2972 }, { "epoch": 3.80544, "grad_norm": 0.71304851770401, "learning_rate": 4.4071628651460585e-05, "loss": 0.6694, "step": 2973 }, { "epoch": 3.80672, "grad_norm": 0.6625712513923645, "learning_rate": 4.4069627851140464e-05, "loss": 0.6296, "step": 2974 }, { "epoch": 3.808, "grad_norm": 0.6760493516921997, "learning_rate": 4.406762705082033e-05, "loss": 0.6438, "step": 2975 }, { "epoch": 3.80928, "grad_norm": 0.6758971214294434, "learning_rate": 4.40656262505002e-05, "loss": 0.7146, "step": 2976 }, { "epoch": 3.8105599999999997, "grad_norm": 0.6622211337089539, "learning_rate": 4.406362545018007e-05, "loss": 0.6595, "step": 2977 }, { "epoch": 3.81184, "grad_norm": 0.6578704714775085, "learning_rate": 4.4061624649859944e-05, "loss": 0.6264, "step": 2978 }, { "epoch": 3.81312, "grad_norm": 0.7323094606399536, "learning_rate": 4.4059623849539816e-05, "loss": 0.693, "step": 2979 }, { "epoch": 3.8144, "grad_norm": 0.6730750799179077, "learning_rate": 4.405762304921969e-05, "loss": 0.6267, "step": 2980 }, { "epoch": 3.81568, "grad_norm": 0.7026966214179993, "learning_rate": 4.405562224889957e-05, "loss": 0.6686, "step": 2981 }, { "epoch": 3.81696, "grad_norm": 0.7159264087677002, "learning_rate": 4.405362144857944e-05, "loss": 0.6856, "step": 2982 }, { "epoch": 3.81824, "grad_norm": 0.6785261034965515, "learning_rate": 4.4051620648259304e-05, "loss": 0.7115, "step": 2983 }, { "epoch": 3.81952, "grad_norm": 0.6551268100738525, "learning_rate": 4.4049619847939175e-05, "loss": 0.6466, "step": 2984 }, { "epoch": 3.8208, "grad_norm": 0.7004695534706116, "learning_rate": 4.404761904761905e-05, "loss": 0.6808, "step": 2985 }, { "epoch": 3.82208, "grad_norm": 0.7341533303260803, "learning_rate": 4.404561824729892e-05, "loss": 0.7086, "step": 2986 }, { "epoch": 3.82336, "grad_norm": 0.6831992268562317, "learning_rate": 4.404361744697879e-05, "loss": 0.6795, "step": 2987 }, { "epoch": 3.82464, "grad_norm": 0.6428318023681641, "learning_rate": 4.404161664665867e-05, "loss": 0.6104, "step": 2988 }, { "epoch": 3.82592, "grad_norm": 0.6942797899246216, "learning_rate": 4.403961584633854e-05, "loss": 0.7077, "step": 2989 }, { "epoch": 3.8272, "grad_norm": 0.6718285083770752, "learning_rate": 4.403761504601841e-05, "loss": 0.6673, "step": 2990 }, { "epoch": 3.82848, "grad_norm": 0.669506311416626, "learning_rate": 4.403561424569828e-05, "loss": 0.6671, "step": 2991 }, { "epoch": 3.8297600000000003, "grad_norm": 0.6954408884048462, "learning_rate": 4.403361344537815e-05, "loss": 0.66, "step": 2992 }, { "epoch": 3.83104, "grad_norm": 0.6513415575027466, "learning_rate": 4.403161264505802e-05, "loss": 0.6519, "step": 2993 }, { "epoch": 3.83232, "grad_norm": 0.6512577533721924, "learning_rate": 4.4029611844737894e-05, "loss": 0.6356, "step": 2994 }, { "epoch": 3.8336, "grad_norm": 0.69619220495224, "learning_rate": 4.402761104441777e-05, "loss": 0.6911, "step": 2995 }, { "epoch": 3.83488, "grad_norm": 0.6886332035064697, "learning_rate": 4.4025610244097645e-05, "loss": 0.7061, "step": 2996 }, { "epoch": 3.83616, "grad_norm": 0.7018471360206604, "learning_rate": 4.4023609443777516e-05, "loss": 0.6589, "step": 2997 }, { "epoch": 3.83744, "grad_norm": 0.6925556659698486, "learning_rate": 4.402160864345739e-05, "loss": 0.7039, "step": 2998 }, { "epoch": 3.83872, "grad_norm": 0.7044040560722351, "learning_rate": 4.401960784313725e-05, "loss": 0.7379, "step": 2999 }, { "epoch": 3.84, "grad_norm": 0.6619036197662354, "learning_rate": 4.4017607042817125e-05, "loss": 0.6497, "step": 3000 }, { "epoch": 3.8412800000000002, "grad_norm": 0.677604615688324, "learning_rate": 4.4015606242497e-05, "loss": 0.6742, "step": 3001 }, { "epoch": 3.8425599999999998, "grad_norm": 0.6716386079788208, "learning_rate": 4.4013605442176876e-05, "loss": 0.6129, "step": 3002 }, { "epoch": 3.84384, "grad_norm": 0.6958229541778564, "learning_rate": 4.401160464185675e-05, "loss": 0.6419, "step": 3003 }, { "epoch": 3.84512, "grad_norm": 0.6683501601219177, "learning_rate": 4.400960384153662e-05, "loss": 0.583, "step": 3004 }, { "epoch": 3.8464, "grad_norm": 0.6913495063781738, "learning_rate": 4.400760304121649e-05, "loss": 0.6845, "step": 3005 }, { "epoch": 3.84768, "grad_norm": 0.7633008360862732, "learning_rate": 4.400560224089636e-05, "loss": 0.7119, "step": 3006 }, { "epoch": 3.84896, "grad_norm": 0.68817138671875, "learning_rate": 4.400360144057623e-05, "loss": 0.6531, "step": 3007 }, { "epoch": 3.85024, "grad_norm": 0.6548225283622742, "learning_rate": 4.40016006402561e-05, "loss": 0.6511, "step": 3008 }, { "epoch": 3.85152, "grad_norm": 0.7012094259262085, "learning_rate": 4.399959983993598e-05, "loss": 0.6826, "step": 3009 }, { "epoch": 3.8528000000000002, "grad_norm": 0.7215191721916199, "learning_rate": 4.399759903961585e-05, "loss": 0.7206, "step": 3010 }, { "epoch": 3.8540799999999997, "grad_norm": 0.7269889116287231, "learning_rate": 4.399559823929572e-05, "loss": 0.6886, "step": 3011 }, { "epoch": 3.85536, "grad_norm": 0.6882228255271912, "learning_rate": 4.3993597438975594e-05, "loss": 0.6738, "step": 3012 }, { "epoch": 3.85664, "grad_norm": 0.6564989686012268, "learning_rate": 4.3991596638655466e-05, "loss": 0.614, "step": 3013 }, { "epoch": 3.85792, "grad_norm": 0.7501393556594849, "learning_rate": 4.398959583833534e-05, "loss": 0.6957, "step": 3014 }, { "epoch": 3.8592, "grad_norm": 0.6881626844406128, "learning_rate": 4.39875950380152e-05, "loss": 0.6503, "step": 3015 }, { "epoch": 3.86048, "grad_norm": 0.7230293154716492, "learning_rate": 4.398559423769508e-05, "loss": 0.6886, "step": 3016 }, { "epoch": 3.86176, "grad_norm": 0.7025682330131531, "learning_rate": 4.3983593437374954e-05, "loss": 0.6752, "step": 3017 }, { "epoch": 3.86304, "grad_norm": 0.7032774686813354, "learning_rate": 4.3981592637054825e-05, "loss": 0.6771, "step": 3018 }, { "epoch": 3.86432, "grad_norm": 0.6938534379005432, "learning_rate": 4.39795918367347e-05, "loss": 0.6688, "step": 3019 }, { "epoch": 3.8656, "grad_norm": 0.6492636203765869, "learning_rate": 4.397759103641457e-05, "loss": 0.6702, "step": 3020 }, { "epoch": 3.86688, "grad_norm": 0.698173463344574, "learning_rate": 4.397559023609444e-05, "loss": 0.616, "step": 3021 }, { "epoch": 3.86816, "grad_norm": 0.7170678377151489, "learning_rate": 4.397358943577431e-05, "loss": 0.6048, "step": 3022 }, { "epoch": 3.86944, "grad_norm": 0.6741195917129517, "learning_rate": 4.3971588635454185e-05, "loss": 0.6607, "step": 3023 }, { "epoch": 3.87072, "grad_norm": 0.7016419172286987, "learning_rate": 4.3969587835134056e-05, "loss": 0.7229, "step": 3024 }, { "epoch": 3.872, "grad_norm": 0.6970353126525879, "learning_rate": 4.396758703481393e-05, "loss": 0.6658, "step": 3025 }, { "epoch": 3.87328, "grad_norm": 0.6936695575714111, "learning_rate": 4.39655862344938e-05, "loss": 0.7113, "step": 3026 }, { "epoch": 3.87456, "grad_norm": 0.6938142776489258, "learning_rate": 4.396358543417367e-05, "loss": 0.6535, "step": 3027 }, { "epoch": 3.87584, "grad_norm": 0.6950214505195618, "learning_rate": 4.3961584633853544e-05, "loss": 0.701, "step": 3028 }, { "epoch": 3.87712, "grad_norm": 0.6649338603019714, "learning_rate": 4.3959583833533416e-05, "loss": 0.635, "step": 3029 }, { "epoch": 3.8784, "grad_norm": 0.7024021744728088, "learning_rate": 4.395758303321329e-05, "loss": 0.7062, "step": 3030 }, { "epoch": 3.87968, "grad_norm": 0.6412244439125061, "learning_rate": 4.395558223289316e-05, "loss": 0.6081, "step": 3031 }, { "epoch": 3.88096, "grad_norm": 0.6740350127220154, "learning_rate": 4.395358143257303e-05, "loss": 0.6404, "step": 3032 }, { "epoch": 3.88224, "grad_norm": 0.647661566734314, "learning_rate": 4.39515806322529e-05, "loss": 0.6506, "step": 3033 }, { "epoch": 3.88352, "grad_norm": 0.6819959878921509, "learning_rate": 4.3949579831932775e-05, "loss": 0.6258, "step": 3034 }, { "epoch": 3.8848000000000003, "grad_norm": 0.7153730988502502, "learning_rate": 4.394757903161265e-05, "loss": 0.6579, "step": 3035 }, { "epoch": 3.8860799999999998, "grad_norm": 0.6946179270744324, "learning_rate": 4.394557823129252e-05, "loss": 0.6526, "step": 3036 }, { "epoch": 3.88736, "grad_norm": 0.6954408288002014, "learning_rate": 4.394357743097239e-05, "loss": 0.6596, "step": 3037 }, { "epoch": 3.88864, "grad_norm": 0.6411071419715881, "learning_rate": 4.394157663065226e-05, "loss": 0.6556, "step": 3038 }, { "epoch": 3.88992, "grad_norm": 0.6873469948768616, "learning_rate": 4.3939575830332134e-05, "loss": 0.6791, "step": 3039 }, { "epoch": 3.8912, "grad_norm": 0.6711555123329163, "learning_rate": 4.3937575030012006e-05, "loss": 0.6624, "step": 3040 }, { "epoch": 3.89248, "grad_norm": 0.6283385753631592, "learning_rate": 4.393557422969188e-05, "loss": 0.6152, "step": 3041 }, { "epoch": 3.89376, "grad_norm": 0.6898487210273743, "learning_rate": 4.393357342937175e-05, "loss": 0.7011, "step": 3042 }, { "epoch": 3.89504, "grad_norm": 0.6428130865097046, "learning_rate": 4.393157262905162e-05, "loss": 0.6157, "step": 3043 }, { "epoch": 3.8963200000000002, "grad_norm": 0.6543678641319275, "learning_rate": 4.39295718287315e-05, "loss": 0.6196, "step": 3044 }, { "epoch": 3.8975999999999997, "grad_norm": 0.6453118324279785, "learning_rate": 4.3927571028411365e-05, "loss": 0.6832, "step": 3045 }, { "epoch": 3.89888, "grad_norm": 0.6822550892829895, "learning_rate": 4.392557022809124e-05, "loss": 0.6523, "step": 3046 }, { "epoch": 3.90016, "grad_norm": 0.7187889814376831, "learning_rate": 4.392356942777111e-05, "loss": 0.7118, "step": 3047 }, { "epoch": 3.90144, "grad_norm": 0.7157731056213379, "learning_rate": 4.392156862745098e-05, "loss": 0.7197, "step": 3048 }, { "epoch": 3.90272, "grad_norm": 0.686944305896759, "learning_rate": 4.391956782713085e-05, "loss": 0.6051, "step": 3049 }, { "epoch": 3.904, "grad_norm": 0.7068070769309998, "learning_rate": 4.3917567026810725e-05, "loss": 0.7256, "step": 3050 }, { "epoch": 3.90528, "grad_norm": 0.7173423767089844, "learning_rate": 4.3915566226490603e-05, "loss": 0.6701, "step": 3051 }, { "epoch": 3.90656, "grad_norm": 0.6758041977882385, "learning_rate": 4.3913565426170475e-05, "loss": 0.6383, "step": 3052 }, { "epoch": 3.90784, "grad_norm": 0.6902790665626526, "learning_rate": 4.391156462585034e-05, "loss": 0.6533, "step": 3053 }, { "epoch": 3.90912, "grad_norm": 0.70408034324646, "learning_rate": 4.390956382553021e-05, "loss": 0.6577, "step": 3054 }, { "epoch": 3.9104, "grad_norm": 0.6818796396255493, "learning_rate": 4.3907563025210084e-05, "loss": 0.6786, "step": 3055 }, { "epoch": 3.91168, "grad_norm": 0.6927520036697388, "learning_rate": 4.3905562224889956e-05, "loss": 0.6924, "step": 3056 }, { "epoch": 3.91296, "grad_norm": 0.7096193432807922, "learning_rate": 4.390356142456983e-05, "loss": 0.6852, "step": 3057 }, { "epoch": 3.91424, "grad_norm": 0.6638479828834534, "learning_rate": 4.3901560624249706e-05, "loss": 0.6388, "step": 3058 }, { "epoch": 3.91552, "grad_norm": 0.7214940190315247, "learning_rate": 4.389955982392958e-05, "loss": 0.7158, "step": 3059 }, { "epoch": 3.9168, "grad_norm": 0.681658148765564, "learning_rate": 4.389755902360945e-05, "loss": 0.657, "step": 3060 }, { "epoch": 3.91808, "grad_norm": 0.6954881548881531, "learning_rate": 4.3895558223289315e-05, "loss": 0.7083, "step": 3061 }, { "epoch": 3.91936, "grad_norm": 0.681389331817627, "learning_rate": 4.389355742296919e-05, "loss": 0.6743, "step": 3062 }, { "epoch": 3.92064, "grad_norm": 0.688248336315155, "learning_rate": 4.389155662264906e-05, "loss": 0.6632, "step": 3063 }, { "epoch": 3.92192, "grad_norm": 0.7140293717384338, "learning_rate": 4.388955582232893e-05, "loss": 0.6964, "step": 3064 }, { "epoch": 3.9232, "grad_norm": 0.692408561706543, "learning_rate": 4.388755502200881e-05, "loss": 0.6953, "step": 3065 }, { "epoch": 3.92448, "grad_norm": 0.6348261833190918, "learning_rate": 4.388555422168868e-05, "loss": 0.6256, "step": 3066 }, { "epoch": 3.92576, "grad_norm": 0.6396204233169556, "learning_rate": 4.388355342136855e-05, "loss": 0.6958, "step": 3067 }, { "epoch": 3.92704, "grad_norm": 0.6573483347892761, "learning_rate": 4.3881552621048425e-05, "loss": 0.6314, "step": 3068 }, { "epoch": 3.9283200000000003, "grad_norm": 0.6830142736434937, "learning_rate": 4.387955182072829e-05, "loss": 0.6347, "step": 3069 }, { "epoch": 3.9295999999999998, "grad_norm": 0.6613914370536804, "learning_rate": 4.387755102040816e-05, "loss": 0.7057, "step": 3070 }, { "epoch": 3.93088, "grad_norm": 0.7034798860549927, "learning_rate": 4.3875550220088034e-05, "loss": 0.6773, "step": 3071 }, { "epoch": 3.93216, "grad_norm": 0.741765022277832, "learning_rate": 4.387354941976791e-05, "loss": 0.6941, "step": 3072 }, { "epoch": 3.93344, "grad_norm": 0.7319201827049255, "learning_rate": 4.3871548619447784e-05, "loss": 0.7408, "step": 3073 }, { "epoch": 3.93472, "grad_norm": 0.66302090883255, "learning_rate": 4.3869547819127656e-05, "loss": 0.624, "step": 3074 }, { "epoch": 3.936, "grad_norm": 0.6886874437332153, "learning_rate": 4.386754701880753e-05, "loss": 0.6537, "step": 3075 }, { "epoch": 3.93728, "grad_norm": 0.6793836951255798, "learning_rate": 4.38655462184874e-05, "loss": 0.6656, "step": 3076 }, { "epoch": 3.93856, "grad_norm": 0.6942064762115479, "learning_rate": 4.3863545418167265e-05, "loss": 0.7116, "step": 3077 }, { "epoch": 3.9398400000000002, "grad_norm": 0.6668751239776611, "learning_rate": 4.386154461784714e-05, "loss": 0.626, "step": 3078 }, { "epoch": 3.9411199999999997, "grad_norm": 0.6711156964302063, "learning_rate": 4.385954381752701e-05, "loss": 0.6653, "step": 3079 }, { "epoch": 3.9424, "grad_norm": 0.6742311716079712, "learning_rate": 4.385754301720689e-05, "loss": 0.7238, "step": 3080 }, { "epoch": 3.94368, "grad_norm": 0.6560841202735901, "learning_rate": 4.385554221688676e-05, "loss": 0.6364, "step": 3081 }, { "epoch": 3.94496, "grad_norm": 0.6376678347587585, "learning_rate": 4.385354141656663e-05, "loss": 0.6447, "step": 3082 }, { "epoch": 3.94624, "grad_norm": 0.6691120862960815, "learning_rate": 4.38515406162465e-05, "loss": 0.6557, "step": 3083 }, { "epoch": 3.94752, "grad_norm": 0.6742989420890808, "learning_rate": 4.3849539815926375e-05, "loss": 0.6029, "step": 3084 }, { "epoch": 3.9488, "grad_norm": 0.7199971675872803, "learning_rate": 4.384753901560624e-05, "loss": 0.6881, "step": 3085 }, { "epoch": 3.95008, "grad_norm": 0.7232701182365417, "learning_rate": 4.384553821528611e-05, "loss": 0.6548, "step": 3086 }, { "epoch": 3.95136, "grad_norm": 0.7027313113212585, "learning_rate": 4.384353741496599e-05, "loss": 0.6444, "step": 3087 }, { "epoch": 3.95264, "grad_norm": 0.6895410418510437, "learning_rate": 4.384153661464586e-05, "loss": 0.6935, "step": 3088 }, { "epoch": 3.95392, "grad_norm": 0.6641124486923218, "learning_rate": 4.3839535814325734e-05, "loss": 0.6709, "step": 3089 }, { "epoch": 3.9552, "grad_norm": 0.6645619869232178, "learning_rate": 4.3837535014005606e-05, "loss": 0.6736, "step": 3090 }, { "epoch": 3.95648, "grad_norm": 0.680464506149292, "learning_rate": 4.383553421368548e-05, "loss": 0.6601, "step": 3091 }, { "epoch": 3.95776, "grad_norm": 0.6810153126716614, "learning_rate": 4.383353341336535e-05, "loss": 0.6463, "step": 3092 }, { "epoch": 3.95904, "grad_norm": 0.6904779076576233, "learning_rate": 4.3831532613045215e-05, "loss": 0.6375, "step": 3093 }, { "epoch": 3.96032, "grad_norm": 0.7193115949630737, "learning_rate": 4.382953181272509e-05, "loss": 0.6649, "step": 3094 }, { "epoch": 3.9616, "grad_norm": 0.6670559644699097, "learning_rate": 4.3827531012404965e-05, "loss": 0.6505, "step": 3095 }, { "epoch": 3.96288, "grad_norm": 0.707496166229248, "learning_rate": 4.382553021208484e-05, "loss": 0.6726, "step": 3096 }, { "epoch": 3.96416, "grad_norm": 0.6769973635673523, "learning_rate": 4.382352941176471e-05, "loss": 0.6529, "step": 3097 }, { "epoch": 3.96544, "grad_norm": 0.6613832712173462, "learning_rate": 4.382152861144458e-05, "loss": 0.6218, "step": 3098 }, { "epoch": 3.96672, "grad_norm": 0.6404051184654236, "learning_rate": 4.381952781112445e-05, "loss": 0.6685, "step": 3099 }, { "epoch": 3.968, "grad_norm": 0.7350611090660095, "learning_rate": 4.3817527010804324e-05, "loss": 0.7538, "step": 3100 }, { "epoch": 3.96928, "grad_norm": 0.7339857220649719, "learning_rate": 4.3815526210484196e-05, "loss": 0.7104, "step": 3101 }, { "epoch": 3.97056, "grad_norm": 0.6971475481987, "learning_rate": 4.381352541016407e-05, "loss": 0.6595, "step": 3102 }, { "epoch": 3.9718400000000003, "grad_norm": 0.6795316934585571, "learning_rate": 4.381152460984394e-05, "loss": 0.63, "step": 3103 }, { "epoch": 3.9731199999999998, "grad_norm": 0.6590287685394287, "learning_rate": 4.380952380952381e-05, "loss": 0.6013, "step": 3104 }, { "epoch": 3.9744, "grad_norm": 0.6492599248886108, "learning_rate": 4.3807523009203684e-05, "loss": 0.6626, "step": 3105 }, { "epoch": 3.97568, "grad_norm": 0.7017719149589539, "learning_rate": 4.3805522208883556e-05, "loss": 0.6685, "step": 3106 }, { "epoch": 3.97696, "grad_norm": 0.6492130160331726, "learning_rate": 4.380352140856343e-05, "loss": 0.6498, "step": 3107 }, { "epoch": 3.97824, "grad_norm": 0.6498051881790161, "learning_rate": 4.38015206082433e-05, "loss": 0.6152, "step": 3108 }, { "epoch": 3.97952, "grad_norm": 0.710146963596344, "learning_rate": 4.379951980792317e-05, "loss": 0.7109, "step": 3109 }, { "epoch": 3.9808, "grad_norm": 0.6580629348754883, "learning_rate": 4.379751900760304e-05, "loss": 0.6464, "step": 3110 }, { "epoch": 3.98208, "grad_norm": 0.671927809715271, "learning_rate": 4.3795518207282915e-05, "loss": 0.7003, "step": 3111 }, { "epoch": 3.9833600000000002, "grad_norm": 0.64473956823349, "learning_rate": 4.379351740696279e-05, "loss": 0.6789, "step": 3112 }, { "epoch": 3.9846399999999997, "grad_norm": 0.6603186130523682, "learning_rate": 4.379151660664266e-05, "loss": 0.6558, "step": 3113 }, { "epoch": 3.98592, "grad_norm": 0.7090316414833069, "learning_rate": 4.378951580632253e-05, "loss": 0.6923, "step": 3114 }, { "epoch": 3.9872, "grad_norm": 0.6954898238182068, "learning_rate": 4.37875150060024e-05, "loss": 0.6341, "step": 3115 }, { "epoch": 3.98848, "grad_norm": 0.6501321196556091, "learning_rate": 4.3785514205682274e-05, "loss": 0.6481, "step": 3116 }, { "epoch": 3.98976, "grad_norm": 0.6654294729232788, "learning_rate": 4.3783513405362146e-05, "loss": 0.6379, "step": 3117 }, { "epoch": 3.99104, "grad_norm": 0.698321521282196, "learning_rate": 4.378151260504202e-05, "loss": 0.6881, "step": 3118 }, { "epoch": 3.99232, "grad_norm": 0.7432014346122742, "learning_rate": 4.377951180472189e-05, "loss": 0.6836, "step": 3119 }, { "epoch": 3.9936, "grad_norm": 0.6992458701133728, "learning_rate": 4.377751100440176e-05, "loss": 0.7221, "step": 3120 }, { "epoch": 3.99488, "grad_norm": 0.6965485215187073, "learning_rate": 4.377551020408163e-05, "loss": 0.604, "step": 3121 }, { "epoch": 3.99616, "grad_norm": 0.7211214900016785, "learning_rate": 4.377350940376151e-05, "loss": 0.7074, "step": 3122 }, { "epoch": 3.99744, "grad_norm": 0.6553539633750916, "learning_rate": 4.377150860344138e-05, "loss": 0.6204, "step": 3123 }, { "epoch": 3.99872, "grad_norm": 0.6686910390853882, "learning_rate": 4.376950780312125e-05, "loss": 0.7271, "step": 3124 }, { "epoch": 4.0, "grad_norm": 1.5496490001678467, "learning_rate": 4.376750700280112e-05, "loss": 1.3281, "step": 3125 }, { "epoch": 4.00128, "grad_norm": 0.6808484792709351, "learning_rate": 4.376550620248099e-05, "loss": 0.6388, "step": 3126 }, { "epoch": 4.00256, "grad_norm": 0.6682015061378479, "learning_rate": 4.3763505402160865e-05, "loss": 0.6512, "step": 3127 }, { "epoch": 4.00384, "grad_norm": 0.6814844608306885, "learning_rate": 4.3761504601840736e-05, "loss": 0.6496, "step": 3128 }, { "epoch": 4.00512, "grad_norm": 0.698432981967926, "learning_rate": 4.3759503801520615e-05, "loss": 0.6272, "step": 3129 }, { "epoch": 4.0064, "grad_norm": 0.6985964179039001, "learning_rate": 4.375750300120049e-05, "loss": 0.6447, "step": 3130 }, { "epoch": 4.00768, "grad_norm": 0.648468554019928, "learning_rate": 4.375550220088035e-05, "loss": 0.6205, "step": 3131 }, { "epoch": 4.00896, "grad_norm": 0.694003164768219, "learning_rate": 4.3753501400560224e-05, "loss": 0.6613, "step": 3132 }, { "epoch": 4.01024, "grad_norm": 0.7265833616256714, "learning_rate": 4.3751500600240096e-05, "loss": 0.623, "step": 3133 }, { "epoch": 4.01152, "grad_norm": 0.7631153464317322, "learning_rate": 4.374949979991997e-05, "loss": 0.6963, "step": 3134 }, { "epoch": 4.0128, "grad_norm": 0.7075992822647095, "learning_rate": 4.374749899959984e-05, "loss": 0.6754, "step": 3135 }, { "epoch": 4.01408, "grad_norm": 0.7054523229598999, "learning_rate": 4.374549819927972e-05, "loss": 0.6145, "step": 3136 }, { "epoch": 4.01536, "grad_norm": 0.6870067119598389, "learning_rate": 4.374349739895959e-05, "loss": 0.663, "step": 3137 }, { "epoch": 4.01664, "grad_norm": 0.6565841436386108, "learning_rate": 4.374149659863946e-05, "loss": 0.6111, "step": 3138 }, { "epoch": 4.01792, "grad_norm": 0.6983011960983276, "learning_rate": 4.373949579831933e-05, "loss": 0.6528, "step": 3139 }, { "epoch": 4.0192, "grad_norm": 0.6741187572479248, "learning_rate": 4.37374949979992e-05, "loss": 0.6625, "step": 3140 }, { "epoch": 4.02048, "grad_norm": 0.6667806506156921, "learning_rate": 4.373549419767907e-05, "loss": 0.629, "step": 3141 }, { "epoch": 4.0217600000000004, "grad_norm": 0.6826735734939575, "learning_rate": 4.373349339735894e-05, "loss": 0.6538, "step": 3142 }, { "epoch": 4.02304, "grad_norm": 0.7120842933654785, "learning_rate": 4.373149259703882e-05, "loss": 0.6368, "step": 3143 }, { "epoch": 4.02432, "grad_norm": 0.6919832825660706, "learning_rate": 4.372949179671869e-05, "loss": 0.6451, "step": 3144 }, { "epoch": 4.0256, "grad_norm": 0.7019147276878357, "learning_rate": 4.3727490996398565e-05, "loss": 0.6605, "step": 3145 }, { "epoch": 4.02688, "grad_norm": 0.7638674378395081, "learning_rate": 4.3725490196078437e-05, "loss": 0.6817, "step": 3146 }, { "epoch": 4.02816, "grad_norm": 0.7084218263626099, "learning_rate": 4.37234893957583e-05, "loss": 0.6572, "step": 3147 }, { "epoch": 4.02944, "grad_norm": 0.6604458093643188, "learning_rate": 4.3721488595438174e-05, "loss": 0.6048, "step": 3148 }, { "epoch": 4.03072, "grad_norm": 0.6742260456085205, "learning_rate": 4.3719487795118045e-05, "loss": 0.6481, "step": 3149 }, { "epoch": 4.032, "grad_norm": 0.666289210319519, "learning_rate": 4.3717486994797924e-05, "loss": 0.6421, "step": 3150 }, { "epoch": 4.03328, "grad_norm": 0.6697606444358826, "learning_rate": 4.3715486194477796e-05, "loss": 0.6479, "step": 3151 }, { "epoch": 4.03456, "grad_norm": 0.7304966449737549, "learning_rate": 4.371348539415767e-05, "loss": 0.6792, "step": 3152 }, { "epoch": 4.03584, "grad_norm": 0.6816575527191162, "learning_rate": 4.371148459383754e-05, "loss": 0.601, "step": 3153 }, { "epoch": 4.03712, "grad_norm": 0.7284610271453857, "learning_rate": 4.370948379351741e-05, "loss": 0.64, "step": 3154 }, { "epoch": 4.0384, "grad_norm": 0.7191203832626343, "learning_rate": 4.3707482993197277e-05, "loss": 0.64, "step": 3155 }, { "epoch": 4.03968, "grad_norm": 0.7250388264656067, "learning_rate": 4.370548219287715e-05, "loss": 0.696, "step": 3156 }, { "epoch": 4.04096, "grad_norm": 0.6809269785881042, "learning_rate": 4.370348139255703e-05, "loss": 0.6339, "step": 3157 }, { "epoch": 4.04224, "grad_norm": 0.6679171323776245, "learning_rate": 4.37014805922369e-05, "loss": 0.6256, "step": 3158 }, { "epoch": 4.04352, "grad_norm": 0.7094346880912781, "learning_rate": 4.369947979191677e-05, "loss": 0.6825, "step": 3159 }, { "epoch": 4.0448, "grad_norm": 0.6463338732719421, "learning_rate": 4.369747899159664e-05, "loss": 0.6085, "step": 3160 }, { "epoch": 4.04608, "grad_norm": 0.6958500146865845, "learning_rate": 4.3695478191276514e-05, "loss": 0.6654, "step": 3161 }, { "epoch": 4.04736, "grad_norm": 0.7284070253372192, "learning_rate": 4.3693477390956386e-05, "loss": 0.6706, "step": 3162 }, { "epoch": 4.04864, "grad_norm": 0.7025485038757324, "learning_rate": 4.369147659063625e-05, "loss": 0.677, "step": 3163 }, { "epoch": 4.04992, "grad_norm": 0.7521453499794006, "learning_rate": 4.368947579031613e-05, "loss": 0.7294, "step": 3164 }, { "epoch": 4.0512, "grad_norm": 0.6905098557472229, "learning_rate": 4.3687474989996e-05, "loss": 0.7215, "step": 3165 }, { "epoch": 4.05248, "grad_norm": 0.7111669182777405, "learning_rate": 4.3685474189675874e-05, "loss": 0.6637, "step": 3166 }, { "epoch": 4.05376, "grad_norm": 0.7028812766075134, "learning_rate": 4.3683473389355746e-05, "loss": 0.665, "step": 3167 }, { "epoch": 4.05504, "grad_norm": 0.7170726656913757, "learning_rate": 4.368147258903562e-05, "loss": 0.6078, "step": 3168 }, { "epoch": 4.05632, "grad_norm": 0.702850878238678, "learning_rate": 4.367947178871549e-05, "loss": 0.6381, "step": 3169 }, { "epoch": 4.0576, "grad_norm": 0.7177186012268066, "learning_rate": 4.367747098839536e-05, "loss": 0.6339, "step": 3170 }, { "epoch": 4.05888, "grad_norm": 0.7100415229797363, "learning_rate": 4.367547018807523e-05, "loss": 0.6966, "step": 3171 }, { "epoch": 4.06016, "grad_norm": 0.6687876582145691, "learning_rate": 4.3673469387755105e-05, "loss": 0.6423, "step": 3172 }, { "epoch": 4.06144, "grad_norm": 0.7261446714401245, "learning_rate": 4.367146858743498e-05, "loss": 0.6613, "step": 3173 }, { "epoch": 4.06272, "grad_norm": 0.6871818900108337, "learning_rate": 4.366946778711485e-05, "loss": 0.6553, "step": 3174 }, { "epoch": 4.064, "grad_norm": 0.6970064640045166, "learning_rate": 4.366746698679472e-05, "loss": 0.6334, "step": 3175 }, { "epoch": 4.06528, "grad_norm": 0.6694562435150146, "learning_rate": 4.366546618647459e-05, "loss": 0.5959, "step": 3176 }, { "epoch": 4.06656, "grad_norm": 0.7168928384780884, "learning_rate": 4.3663465386154464e-05, "loss": 0.6292, "step": 3177 }, { "epoch": 4.06784, "grad_norm": 0.6959748864173889, "learning_rate": 4.3661464585834336e-05, "loss": 0.6437, "step": 3178 }, { "epoch": 4.06912, "grad_norm": 0.6844011545181274, "learning_rate": 4.365946378551421e-05, "loss": 0.6478, "step": 3179 }, { "epoch": 4.0704, "grad_norm": 0.7261403799057007, "learning_rate": 4.365746298519408e-05, "loss": 0.7068, "step": 3180 }, { "epoch": 4.07168, "grad_norm": 0.7560855746269226, "learning_rate": 4.365546218487395e-05, "loss": 0.6783, "step": 3181 }, { "epoch": 4.07296, "grad_norm": 0.6811922788619995, "learning_rate": 4.3653461384553823e-05, "loss": 0.6544, "step": 3182 }, { "epoch": 4.07424, "grad_norm": 0.7000817060470581, "learning_rate": 4.3651460584233695e-05, "loss": 0.7211, "step": 3183 }, { "epoch": 4.07552, "grad_norm": 0.7141358852386475, "learning_rate": 4.364945978391357e-05, "loss": 0.6627, "step": 3184 }, { "epoch": 4.0768, "grad_norm": 0.7260948419570923, "learning_rate": 4.364745898359344e-05, "loss": 0.6885, "step": 3185 }, { "epoch": 4.07808, "grad_norm": 0.7163599133491516, "learning_rate": 4.364545818327331e-05, "loss": 0.6581, "step": 3186 }, { "epoch": 4.07936, "grad_norm": 0.6844797730445862, "learning_rate": 4.364345738295318e-05, "loss": 0.6654, "step": 3187 }, { "epoch": 4.08064, "grad_norm": 0.6640191078186035, "learning_rate": 4.3641456582633055e-05, "loss": 0.5865, "step": 3188 }, { "epoch": 4.08192, "grad_norm": 0.6174594163894653, "learning_rate": 4.3639455782312926e-05, "loss": 0.595, "step": 3189 }, { "epoch": 4.0832, "grad_norm": 0.6691709160804749, "learning_rate": 4.36374549819928e-05, "loss": 0.6298, "step": 3190 }, { "epoch": 4.08448, "grad_norm": 0.7091382741928101, "learning_rate": 4.363545418167267e-05, "loss": 0.6369, "step": 3191 }, { "epoch": 4.08576, "grad_norm": 0.6721692681312561, "learning_rate": 4.363345338135254e-05, "loss": 0.6149, "step": 3192 }, { "epoch": 4.08704, "grad_norm": 0.7344068288803101, "learning_rate": 4.3631452581032414e-05, "loss": 0.6966, "step": 3193 }, { "epoch": 4.08832, "grad_norm": 0.7092353105545044, "learning_rate": 4.3629451780712286e-05, "loss": 0.6499, "step": 3194 }, { "epoch": 4.0896, "grad_norm": 0.7213243246078491, "learning_rate": 4.362745098039216e-05, "loss": 0.6762, "step": 3195 }, { "epoch": 4.09088, "grad_norm": 0.734781801700592, "learning_rate": 4.362545018007203e-05, "loss": 0.7091, "step": 3196 }, { "epoch": 4.09216, "grad_norm": 0.7117027044296265, "learning_rate": 4.36234493797519e-05, "loss": 0.6111, "step": 3197 }, { "epoch": 4.09344, "grad_norm": 0.7281467914581299, "learning_rate": 4.362144857943177e-05, "loss": 0.6682, "step": 3198 }, { "epoch": 4.09472, "grad_norm": 0.7199225425720215, "learning_rate": 4.3619447779111645e-05, "loss": 0.6847, "step": 3199 }, { "epoch": 4.096, "grad_norm": 0.6403080224990845, "learning_rate": 4.3617446978791524e-05, "loss": 0.6134, "step": 3200 }, { "epoch": 4.09728, "grad_norm": 0.7282823920249939, "learning_rate": 4.361544617847139e-05, "loss": 0.6369, "step": 3201 }, { "epoch": 4.09856, "grad_norm": 0.673549234867096, "learning_rate": 4.361344537815126e-05, "loss": 0.6577, "step": 3202 }, { "epoch": 4.09984, "grad_norm": 0.717682421207428, "learning_rate": 4.361144457783113e-05, "loss": 0.6413, "step": 3203 }, { "epoch": 4.10112, "grad_norm": 0.6735298037528992, "learning_rate": 4.3609443777511004e-05, "loss": 0.6404, "step": 3204 }, { "epoch": 4.1024, "grad_norm": 0.715994656085968, "learning_rate": 4.3607442977190876e-05, "loss": 0.677, "step": 3205 }, { "epoch": 4.10368, "grad_norm": 0.7662949562072754, "learning_rate": 4.360544217687075e-05, "loss": 0.7697, "step": 3206 }, { "epoch": 4.10496, "grad_norm": 0.7299420833587646, "learning_rate": 4.360344137655063e-05, "loss": 0.688, "step": 3207 }, { "epoch": 4.10624, "grad_norm": 0.7071677446365356, "learning_rate": 4.36014405762305e-05, "loss": 0.6872, "step": 3208 }, { "epoch": 4.10752, "grad_norm": 0.7498626708984375, "learning_rate": 4.3599439775910364e-05, "loss": 0.647, "step": 3209 }, { "epoch": 4.1088, "grad_norm": 0.6997010707855225, "learning_rate": 4.3597438975590235e-05, "loss": 0.5932, "step": 3210 }, { "epoch": 4.11008, "grad_norm": 0.6870605945587158, "learning_rate": 4.359543817527011e-05, "loss": 0.6438, "step": 3211 }, { "epoch": 4.11136, "grad_norm": 0.6903749704360962, "learning_rate": 4.359343737494998e-05, "loss": 0.6307, "step": 3212 }, { "epoch": 4.11264, "grad_norm": 0.7005348801612854, "learning_rate": 4.359143657462985e-05, "loss": 0.6214, "step": 3213 }, { "epoch": 4.11392, "grad_norm": 0.6793337464332581, "learning_rate": 4.358943577430973e-05, "loss": 0.6668, "step": 3214 }, { "epoch": 4.1152, "grad_norm": 0.7198171019554138, "learning_rate": 4.35874349739896e-05, "loss": 0.6384, "step": 3215 }, { "epoch": 4.11648, "grad_norm": 0.7152752876281738, "learning_rate": 4.358543417366947e-05, "loss": 0.6446, "step": 3216 }, { "epoch": 4.11776, "grad_norm": 0.7162675857543945, "learning_rate": 4.358343337334934e-05, "loss": 0.6621, "step": 3217 }, { "epoch": 4.11904, "grad_norm": 0.72551029920578, "learning_rate": 4.358143257302921e-05, "loss": 0.6708, "step": 3218 }, { "epoch": 4.12032, "grad_norm": 0.7436588406562805, "learning_rate": 4.357943177270908e-05, "loss": 0.6543, "step": 3219 }, { "epoch": 4.1216, "grad_norm": 0.7259624600410461, "learning_rate": 4.3577430972388954e-05, "loss": 0.6642, "step": 3220 }, { "epoch": 4.12288, "grad_norm": 0.6752135157585144, "learning_rate": 4.357543017206883e-05, "loss": 0.6495, "step": 3221 }, { "epoch": 4.12416, "grad_norm": 0.698948323726654, "learning_rate": 4.3573429371748704e-05, "loss": 0.703, "step": 3222 }, { "epoch": 4.12544, "grad_norm": 0.7195350527763367, "learning_rate": 4.3571428571428576e-05, "loss": 0.6924, "step": 3223 }, { "epoch": 4.12672, "grad_norm": 0.6823775172233582, "learning_rate": 4.356942777110845e-05, "loss": 0.6378, "step": 3224 }, { "epoch": 4.128, "grad_norm": 0.719937801361084, "learning_rate": 4.356742697078831e-05, "loss": 0.6959, "step": 3225 }, { "epoch": 4.12928, "grad_norm": 0.6940419673919678, "learning_rate": 4.3565426170468185e-05, "loss": 0.6788, "step": 3226 }, { "epoch": 4.13056, "grad_norm": 0.7241349220275879, "learning_rate": 4.356342537014806e-05, "loss": 0.6565, "step": 3227 }, { "epoch": 4.13184, "grad_norm": 0.7489647269248962, "learning_rate": 4.3561424569827936e-05, "loss": 0.6415, "step": 3228 }, { "epoch": 4.13312, "grad_norm": 0.7111489176750183, "learning_rate": 4.355942376950781e-05, "loss": 0.6258, "step": 3229 }, { "epoch": 4.1344, "grad_norm": 0.6695640087127686, "learning_rate": 4.355742296918768e-05, "loss": 0.612, "step": 3230 }, { "epoch": 4.13568, "grad_norm": 0.6810483336448669, "learning_rate": 4.355542216886755e-05, "loss": 0.6202, "step": 3231 }, { "epoch": 4.13696, "grad_norm": 0.7654765844345093, "learning_rate": 4.355342136854742e-05, "loss": 0.6865, "step": 3232 }, { "epoch": 4.13824, "grad_norm": 0.7125704288482666, "learning_rate": 4.355142056822729e-05, "loss": 0.6864, "step": 3233 }, { "epoch": 4.13952, "grad_norm": 0.7397105097770691, "learning_rate": 4.354941976790716e-05, "loss": 0.6494, "step": 3234 }, { "epoch": 4.1408, "grad_norm": 0.6764322519302368, "learning_rate": 4.354741896758704e-05, "loss": 0.6203, "step": 3235 }, { "epoch": 4.14208, "grad_norm": 0.6737890839576721, "learning_rate": 4.354541816726691e-05, "loss": 0.6531, "step": 3236 }, { "epoch": 4.14336, "grad_norm": 0.6234689950942993, "learning_rate": 4.354341736694678e-05, "loss": 0.5897, "step": 3237 }, { "epoch": 4.14464, "grad_norm": 0.7278350591659546, "learning_rate": 4.3541416566626654e-05, "loss": 0.7032, "step": 3238 }, { "epoch": 4.14592, "grad_norm": 0.645071804523468, "learning_rate": 4.3539415766306526e-05, "loss": 0.6347, "step": 3239 }, { "epoch": 4.1472, "grad_norm": 0.6949378848075867, "learning_rate": 4.35374149659864e-05, "loss": 0.624, "step": 3240 }, { "epoch": 4.14848, "grad_norm": 0.6974506974220276, "learning_rate": 4.353541416566626e-05, "loss": 0.6416, "step": 3241 }, { "epoch": 4.14976, "grad_norm": 0.7012295722961426, "learning_rate": 4.353341336534614e-05, "loss": 0.6061, "step": 3242 }, { "epoch": 4.15104, "grad_norm": 0.6732747554779053, "learning_rate": 4.3531412565026013e-05, "loss": 0.6482, "step": 3243 }, { "epoch": 4.15232, "grad_norm": 0.6656915545463562, "learning_rate": 4.3529411764705885e-05, "loss": 0.6304, "step": 3244 }, { "epoch": 4.1536, "grad_norm": 0.6815019845962524, "learning_rate": 4.352741096438576e-05, "loss": 0.5652, "step": 3245 }, { "epoch": 4.15488, "grad_norm": 0.739524245262146, "learning_rate": 4.352541016406563e-05, "loss": 0.6389, "step": 3246 }, { "epoch": 4.15616, "grad_norm": 0.641751766204834, "learning_rate": 4.35234093637455e-05, "loss": 0.6259, "step": 3247 }, { "epoch": 4.15744, "grad_norm": 0.6507452130317688, "learning_rate": 4.352140856342537e-05, "loss": 0.5613, "step": 3248 }, { "epoch": 4.15872, "grad_norm": 0.7066096663475037, "learning_rate": 4.3519407763105245e-05, "loss": 0.5779, "step": 3249 }, { "epoch": 4.16, "grad_norm": 0.6899321675300598, "learning_rate": 4.3517406962785116e-05, "loss": 0.6098, "step": 3250 }, { "epoch": 4.16128, "grad_norm": 0.7460212707519531, "learning_rate": 4.351540616246499e-05, "loss": 0.6969, "step": 3251 }, { "epoch": 4.16256, "grad_norm": 0.6735222339630127, "learning_rate": 4.351340536214486e-05, "loss": 0.5961, "step": 3252 }, { "epoch": 4.16384, "grad_norm": 0.668106198310852, "learning_rate": 4.351140456182473e-05, "loss": 0.588, "step": 3253 }, { "epoch": 4.16512, "grad_norm": 0.7419319748878479, "learning_rate": 4.3509403761504604e-05, "loss": 0.6707, "step": 3254 }, { "epoch": 4.1664, "grad_norm": 0.7336167693138123, "learning_rate": 4.3507402961184476e-05, "loss": 0.6756, "step": 3255 }, { "epoch": 4.16768, "grad_norm": 0.7352126240730286, "learning_rate": 4.350540216086435e-05, "loss": 0.6582, "step": 3256 }, { "epoch": 4.16896, "grad_norm": 0.6964594721794128, "learning_rate": 4.350340136054422e-05, "loss": 0.6481, "step": 3257 }, { "epoch": 4.17024, "grad_norm": 0.6809775233268738, "learning_rate": 4.350140056022409e-05, "loss": 0.6216, "step": 3258 }, { "epoch": 4.17152, "grad_norm": 0.7008639574050903, "learning_rate": 4.349939975990396e-05, "loss": 0.6148, "step": 3259 }, { "epoch": 4.1728, "grad_norm": 0.7400866150856018, "learning_rate": 4.3497398959583835e-05, "loss": 0.6512, "step": 3260 }, { "epoch": 4.17408, "grad_norm": 0.6811804175376892, "learning_rate": 4.349539815926371e-05, "loss": 0.6134, "step": 3261 }, { "epoch": 4.17536, "grad_norm": 0.6632633805274963, "learning_rate": 4.349339735894358e-05, "loss": 0.6315, "step": 3262 }, { "epoch": 4.17664, "grad_norm": 0.6872732043266296, "learning_rate": 4.349139655862345e-05, "loss": 0.6406, "step": 3263 }, { "epoch": 4.17792, "grad_norm": 0.7408565282821655, "learning_rate": 4.348939575830332e-05, "loss": 0.6544, "step": 3264 }, { "epoch": 4.1792, "grad_norm": 0.7121074199676514, "learning_rate": 4.3487394957983194e-05, "loss": 0.6336, "step": 3265 }, { "epoch": 4.18048, "grad_norm": 0.7045162916183472, "learning_rate": 4.3485394157663066e-05, "loss": 0.7007, "step": 3266 }, { "epoch": 4.18176, "grad_norm": 0.7155641317367554, "learning_rate": 4.348339335734294e-05, "loss": 0.7133, "step": 3267 }, { "epoch": 4.18304, "grad_norm": 0.7041541934013367, "learning_rate": 4.348139255702281e-05, "loss": 0.6453, "step": 3268 }, { "epoch": 4.18432, "grad_norm": 0.7294322848320007, "learning_rate": 4.347939175670268e-05, "loss": 0.6818, "step": 3269 }, { "epoch": 4.1856, "grad_norm": 0.6842989325523376, "learning_rate": 4.347739095638256e-05, "loss": 0.654, "step": 3270 }, { "epoch": 4.18688, "grad_norm": 0.7636930346488953, "learning_rate": 4.3475390156062425e-05, "loss": 0.6433, "step": 3271 }, { "epoch": 4.18816, "grad_norm": 0.7250775694847107, "learning_rate": 4.34733893557423e-05, "loss": 0.6537, "step": 3272 }, { "epoch": 4.18944, "grad_norm": 0.7232015132904053, "learning_rate": 4.347138855542217e-05, "loss": 0.6162, "step": 3273 }, { "epoch": 4.19072, "grad_norm": 0.7661274671554565, "learning_rate": 4.346938775510204e-05, "loss": 0.6299, "step": 3274 }, { "epoch": 4.192, "grad_norm": 0.6594750285148621, "learning_rate": 4.346738695478191e-05, "loss": 0.6141, "step": 3275 }, { "epoch": 4.19328, "grad_norm": 0.7808811664581299, "learning_rate": 4.3465386154461785e-05, "loss": 0.7105, "step": 3276 }, { "epoch": 4.19456, "grad_norm": 0.6678184866905212, "learning_rate": 4.346338535414166e-05, "loss": 0.618, "step": 3277 }, { "epoch": 4.19584, "grad_norm": 0.7320860624313354, "learning_rate": 4.3461384553821535e-05, "loss": 0.6352, "step": 3278 }, { "epoch": 4.19712, "grad_norm": 0.7203635573387146, "learning_rate": 4.34593837535014e-05, "loss": 0.6398, "step": 3279 }, { "epoch": 4.1984, "grad_norm": 0.714809238910675, "learning_rate": 4.345738295318127e-05, "loss": 0.6567, "step": 3280 }, { "epoch": 4.19968, "grad_norm": 0.6897768378257751, "learning_rate": 4.3455382152861144e-05, "loss": 0.6413, "step": 3281 }, { "epoch": 4.20096, "grad_norm": 0.7241793274879456, "learning_rate": 4.3453381352541016e-05, "loss": 0.6331, "step": 3282 }, { "epoch": 4.20224, "grad_norm": 0.6980943083763123, "learning_rate": 4.345138055222089e-05, "loss": 0.6244, "step": 3283 }, { "epoch": 4.20352, "grad_norm": 0.7093860507011414, "learning_rate": 4.3449379751900766e-05, "loss": 0.6415, "step": 3284 }, { "epoch": 4.2048, "grad_norm": 0.7445345520973206, "learning_rate": 4.344737895158064e-05, "loss": 0.6459, "step": 3285 }, { "epoch": 4.20608, "grad_norm": 0.6987217664718628, "learning_rate": 4.344537815126051e-05, "loss": 0.6554, "step": 3286 }, { "epoch": 4.2073599999999995, "grad_norm": 0.7271133065223694, "learning_rate": 4.3443377350940375e-05, "loss": 0.6412, "step": 3287 }, { "epoch": 4.20864, "grad_norm": 0.7157135605812073, "learning_rate": 4.344137655062025e-05, "loss": 0.6508, "step": 3288 }, { "epoch": 4.20992, "grad_norm": 0.666031539440155, "learning_rate": 4.343937575030012e-05, "loss": 0.6324, "step": 3289 }, { "epoch": 4.2112, "grad_norm": 0.7204000353813171, "learning_rate": 4.343737494997999e-05, "loss": 0.6359, "step": 3290 }, { "epoch": 4.21248, "grad_norm": 0.6386025547981262, "learning_rate": 4.343537414965987e-05, "loss": 0.5998, "step": 3291 }, { "epoch": 4.21376, "grad_norm": 0.7287147641181946, "learning_rate": 4.343337334933974e-05, "loss": 0.6929, "step": 3292 }, { "epoch": 4.21504, "grad_norm": 0.7094889283180237, "learning_rate": 4.343137254901961e-05, "loss": 0.6417, "step": 3293 }, { "epoch": 4.21632, "grad_norm": 0.6859930753707886, "learning_rate": 4.3429371748699485e-05, "loss": 0.6024, "step": 3294 }, { "epoch": 4.2176, "grad_norm": 0.6752464771270752, "learning_rate": 4.342737094837935e-05, "loss": 0.6713, "step": 3295 }, { "epoch": 4.21888, "grad_norm": 0.6904641389846802, "learning_rate": 4.342537014805922e-05, "loss": 0.636, "step": 3296 }, { "epoch": 4.22016, "grad_norm": 0.6931844353675842, "learning_rate": 4.3423369347739094e-05, "loss": 0.6189, "step": 3297 }, { "epoch": 4.22144, "grad_norm": 0.6663493514060974, "learning_rate": 4.342136854741897e-05, "loss": 0.5673, "step": 3298 }, { "epoch": 4.22272, "grad_norm": 0.7185217142105103, "learning_rate": 4.3419367747098844e-05, "loss": 0.6528, "step": 3299 }, { "epoch": 4.224, "grad_norm": 0.7160610556602478, "learning_rate": 4.3417366946778716e-05, "loss": 0.6577, "step": 3300 }, { "epoch": 4.22528, "grad_norm": 0.6835800409317017, "learning_rate": 4.341536614645859e-05, "loss": 0.6558, "step": 3301 }, { "epoch": 4.22656, "grad_norm": 0.7105692028999329, "learning_rate": 4.341336534613846e-05, "loss": 0.6875, "step": 3302 }, { "epoch": 4.22784, "grad_norm": 0.7384511232376099, "learning_rate": 4.3411364545818325e-05, "loss": 0.6484, "step": 3303 }, { "epoch": 4.22912, "grad_norm": 0.7124598622322083, "learning_rate": 4.34093637454982e-05, "loss": 0.6571, "step": 3304 }, { "epoch": 4.2304, "grad_norm": 0.718474268913269, "learning_rate": 4.340736294517807e-05, "loss": 0.6457, "step": 3305 }, { "epoch": 4.23168, "grad_norm": 0.6663171648979187, "learning_rate": 4.340536214485795e-05, "loss": 0.592, "step": 3306 }, { "epoch": 4.23296, "grad_norm": 0.6899667382240295, "learning_rate": 4.340336134453782e-05, "loss": 0.6845, "step": 3307 }, { "epoch": 4.23424, "grad_norm": 0.7246834635734558, "learning_rate": 4.340136054421769e-05, "loss": 0.6739, "step": 3308 }, { "epoch": 4.23552, "grad_norm": 0.7182645797729492, "learning_rate": 4.339935974389756e-05, "loss": 0.6249, "step": 3309 }, { "epoch": 4.2368, "grad_norm": 0.706695020198822, "learning_rate": 4.3397358943577435e-05, "loss": 0.6775, "step": 3310 }, { "epoch": 4.23808, "grad_norm": 0.7705980539321899, "learning_rate": 4.33953581432573e-05, "loss": 0.7309, "step": 3311 }, { "epoch": 4.23936, "grad_norm": 0.7092674970626831, "learning_rate": 4.339335734293717e-05, "loss": 0.6438, "step": 3312 }, { "epoch": 4.24064, "grad_norm": 0.7281712889671326, "learning_rate": 4.339135654261705e-05, "loss": 0.6273, "step": 3313 }, { "epoch": 4.24192, "grad_norm": 0.7132616639137268, "learning_rate": 4.338935574229692e-05, "loss": 0.6189, "step": 3314 }, { "epoch": 4.2432, "grad_norm": 0.707158625125885, "learning_rate": 4.3387354941976794e-05, "loss": 0.6604, "step": 3315 }, { "epoch": 4.24448, "grad_norm": 0.7403045892715454, "learning_rate": 4.3385354141656666e-05, "loss": 0.6983, "step": 3316 }, { "epoch": 4.24576, "grad_norm": 0.7587504982948303, "learning_rate": 4.338335334133654e-05, "loss": 0.6572, "step": 3317 }, { "epoch": 4.24704, "grad_norm": 0.682956337928772, "learning_rate": 4.338135254101641e-05, "loss": 0.6519, "step": 3318 }, { "epoch": 4.24832, "grad_norm": 0.6858289241790771, "learning_rate": 4.3379351740696275e-05, "loss": 0.6171, "step": 3319 }, { "epoch": 4.2496, "grad_norm": 0.7262416481971741, "learning_rate": 4.337735094037615e-05, "loss": 0.6585, "step": 3320 }, { "epoch": 4.25088, "grad_norm": 0.6875229477882385, "learning_rate": 4.3375350140056025e-05, "loss": 0.6579, "step": 3321 }, { "epoch": 4.25216, "grad_norm": 0.7535980343818665, "learning_rate": 4.33733493397359e-05, "loss": 0.7405, "step": 3322 }, { "epoch": 4.25344, "grad_norm": 0.6897569298744202, "learning_rate": 4.337134853941577e-05, "loss": 0.6639, "step": 3323 }, { "epoch": 4.25472, "grad_norm": 0.7273359298706055, "learning_rate": 4.336934773909564e-05, "loss": 0.6645, "step": 3324 }, { "epoch": 4.256, "grad_norm": 0.7021262645721436, "learning_rate": 4.336734693877551e-05, "loss": 0.5955, "step": 3325 }, { "epoch": 4.25728, "grad_norm": 0.6609035134315491, "learning_rate": 4.3365346138455384e-05, "loss": 0.6416, "step": 3326 }, { "epoch": 4.25856, "grad_norm": 0.8022797703742981, "learning_rate": 4.3363345338135256e-05, "loss": 0.6363, "step": 3327 }, { "epoch": 4.25984, "grad_norm": 0.7287960648536682, "learning_rate": 4.336134453781513e-05, "loss": 0.6815, "step": 3328 }, { "epoch": 4.26112, "grad_norm": 0.6661964654922485, "learning_rate": 4.3359343737495e-05, "loss": 0.6532, "step": 3329 }, { "epoch": 4.2624, "grad_norm": 0.7338867783546448, "learning_rate": 4.335734293717487e-05, "loss": 0.7156, "step": 3330 }, { "epoch": 4.26368, "grad_norm": 0.7184217572212219, "learning_rate": 4.3355342136854744e-05, "loss": 0.6892, "step": 3331 }, { "epoch": 4.26496, "grad_norm": 0.715719997882843, "learning_rate": 4.3353341336534615e-05, "loss": 0.6508, "step": 3332 }, { "epoch": 4.26624, "grad_norm": 0.6484602689743042, "learning_rate": 4.335134053621449e-05, "loss": 0.6111, "step": 3333 }, { "epoch": 4.26752, "grad_norm": 0.6997168064117432, "learning_rate": 4.334933973589436e-05, "loss": 0.6312, "step": 3334 }, { "epoch": 4.2688, "grad_norm": 0.7127143740653992, "learning_rate": 4.334733893557423e-05, "loss": 0.6445, "step": 3335 }, { "epoch": 4.27008, "grad_norm": 0.736754298210144, "learning_rate": 4.33453381352541e-05, "loss": 0.6678, "step": 3336 }, { "epoch": 4.27136, "grad_norm": 0.7001957893371582, "learning_rate": 4.3343337334933975e-05, "loss": 0.6383, "step": 3337 }, { "epoch": 4.27264, "grad_norm": 0.6919710040092468, "learning_rate": 4.334133653461385e-05, "loss": 0.6978, "step": 3338 }, { "epoch": 4.27392, "grad_norm": 0.7082419991493225, "learning_rate": 4.333933573429372e-05, "loss": 0.6756, "step": 3339 }, { "epoch": 4.2752, "grad_norm": 0.7254000306129456, "learning_rate": 4.333733493397359e-05, "loss": 0.6301, "step": 3340 }, { "epoch": 4.27648, "grad_norm": 0.7085341811180115, "learning_rate": 4.333533413365347e-05, "loss": 0.6495, "step": 3341 }, { "epoch": 4.27776, "grad_norm": 0.699360728263855, "learning_rate": 4.3333333333333334e-05, "loss": 0.6426, "step": 3342 }, { "epoch": 4.27904, "grad_norm": 0.7209745049476624, "learning_rate": 4.3331332533013206e-05, "loss": 0.6308, "step": 3343 }, { "epoch": 4.28032, "grad_norm": 0.7370860576629639, "learning_rate": 4.332933173269308e-05, "loss": 0.623, "step": 3344 }, { "epoch": 4.2816, "grad_norm": 0.7051844596862793, "learning_rate": 4.332733093237295e-05, "loss": 0.6661, "step": 3345 }, { "epoch": 4.2828800000000005, "grad_norm": 0.6612274050712585, "learning_rate": 4.332533013205282e-05, "loss": 0.5542, "step": 3346 }, { "epoch": 4.28416, "grad_norm": 0.7362515926361084, "learning_rate": 4.332332933173269e-05, "loss": 0.7012, "step": 3347 }, { "epoch": 4.28544, "grad_norm": 0.7105290293693542, "learning_rate": 4.332132853141257e-05, "loss": 0.6536, "step": 3348 }, { "epoch": 4.28672, "grad_norm": 0.7252454161643982, "learning_rate": 4.3319327731092444e-05, "loss": 0.6018, "step": 3349 }, { "epoch": 4.288, "grad_norm": 0.706749677658081, "learning_rate": 4.331732693077231e-05, "loss": 0.6721, "step": 3350 }, { "epoch": 4.28928, "grad_norm": 0.6935198307037354, "learning_rate": 4.331532613045218e-05, "loss": 0.6268, "step": 3351 }, { "epoch": 4.29056, "grad_norm": 0.708602786064148, "learning_rate": 4.331332533013205e-05, "loss": 0.6309, "step": 3352 }, { "epoch": 4.29184, "grad_norm": 0.6960015892982483, "learning_rate": 4.3311324529811924e-05, "loss": 0.6418, "step": 3353 }, { "epoch": 4.29312, "grad_norm": 0.7291163206100464, "learning_rate": 4.3309323729491796e-05, "loss": 0.6651, "step": 3354 }, { "epoch": 4.2943999999999996, "grad_norm": 0.7136040925979614, "learning_rate": 4.3307322929171675e-05, "loss": 0.6568, "step": 3355 }, { "epoch": 4.29568, "grad_norm": 0.7023734450340271, "learning_rate": 4.330532212885155e-05, "loss": 0.6535, "step": 3356 }, { "epoch": 4.29696, "grad_norm": 0.706221878528595, "learning_rate": 4.330332132853142e-05, "loss": 0.6287, "step": 3357 }, { "epoch": 4.29824, "grad_norm": 0.6975945830345154, "learning_rate": 4.3301320528211284e-05, "loss": 0.635, "step": 3358 }, { "epoch": 4.29952, "grad_norm": 0.7090380787849426, "learning_rate": 4.3299319727891156e-05, "loss": 0.6462, "step": 3359 }, { "epoch": 4.3008, "grad_norm": 0.7626847624778748, "learning_rate": 4.329731892757103e-05, "loss": 0.6438, "step": 3360 }, { "epoch": 4.30208, "grad_norm": 0.6872966289520264, "learning_rate": 4.32953181272509e-05, "loss": 0.6682, "step": 3361 }, { "epoch": 4.30336, "grad_norm": 0.6952762603759766, "learning_rate": 4.329331732693078e-05, "loss": 0.6368, "step": 3362 }, { "epoch": 4.30464, "grad_norm": 0.7490139007568359, "learning_rate": 4.329131652661065e-05, "loss": 0.6352, "step": 3363 }, { "epoch": 4.30592, "grad_norm": 0.6900007128715515, "learning_rate": 4.328931572629052e-05, "loss": 0.652, "step": 3364 }, { "epoch": 4.3072, "grad_norm": 0.7102410197257996, "learning_rate": 4.3287314925970394e-05, "loss": 0.6628, "step": 3365 }, { "epoch": 4.30848, "grad_norm": 0.7227296233177185, "learning_rate": 4.328531412565026e-05, "loss": 0.6377, "step": 3366 }, { "epoch": 4.30976, "grad_norm": 0.7517803311347961, "learning_rate": 4.328331332533013e-05, "loss": 0.7112, "step": 3367 }, { "epoch": 4.31104, "grad_norm": 0.6790129542350769, "learning_rate": 4.328131252501e-05, "loss": 0.5814, "step": 3368 }, { "epoch": 4.31232, "grad_norm": 0.6734822988510132, "learning_rate": 4.327931172468988e-05, "loss": 0.6569, "step": 3369 }, { "epoch": 4.3136, "grad_norm": 0.6671125888824463, "learning_rate": 4.327731092436975e-05, "loss": 0.6558, "step": 3370 }, { "epoch": 4.31488, "grad_norm": 0.6953800916671753, "learning_rate": 4.3275310124049625e-05, "loss": 0.6398, "step": 3371 }, { "epoch": 4.31616, "grad_norm": 0.669097900390625, "learning_rate": 4.3273309323729497e-05, "loss": 0.6175, "step": 3372 }, { "epoch": 4.31744, "grad_norm": 0.6786286234855652, "learning_rate": 4.327130852340937e-05, "loss": 0.6225, "step": 3373 }, { "epoch": 4.31872, "grad_norm": 0.7077885866165161, "learning_rate": 4.3269307723089233e-05, "loss": 0.6297, "step": 3374 }, { "epoch": 4.32, "grad_norm": 0.7308838367462158, "learning_rate": 4.3267306922769105e-05, "loss": 0.6341, "step": 3375 }, { "epoch": 4.32128, "grad_norm": 0.7163229584693909, "learning_rate": 4.3265306122448984e-05, "loss": 0.6429, "step": 3376 }, { "epoch": 4.32256, "grad_norm": 0.7708077430725098, "learning_rate": 4.3263305322128856e-05, "loss": 0.7227, "step": 3377 }, { "epoch": 4.32384, "grad_norm": 0.694496214389801, "learning_rate": 4.326130452180873e-05, "loss": 0.6162, "step": 3378 }, { "epoch": 4.32512, "grad_norm": 0.666400671005249, "learning_rate": 4.32593037214886e-05, "loss": 0.5697, "step": 3379 }, { "epoch": 4.3264, "grad_norm": 0.7009501457214355, "learning_rate": 4.325730292116847e-05, "loss": 0.664, "step": 3380 }, { "epoch": 4.32768, "grad_norm": 0.7188718914985657, "learning_rate": 4.325530212084834e-05, "loss": 0.6408, "step": 3381 }, { "epoch": 4.32896, "grad_norm": 0.6969630122184753, "learning_rate": 4.325330132052821e-05, "loss": 0.6023, "step": 3382 }, { "epoch": 4.33024, "grad_norm": 0.6989743113517761, "learning_rate": 4.325130052020809e-05, "loss": 0.652, "step": 3383 }, { "epoch": 4.33152, "grad_norm": 0.7306556701660156, "learning_rate": 4.324929971988796e-05, "loss": 0.7016, "step": 3384 }, { "epoch": 4.3328, "grad_norm": 0.6765083074569702, "learning_rate": 4.324729891956783e-05, "loss": 0.6264, "step": 3385 }, { "epoch": 4.33408, "grad_norm": 0.6959035396575928, "learning_rate": 4.32452981192477e-05, "loss": 0.5973, "step": 3386 }, { "epoch": 4.33536, "grad_norm": 0.6781784892082214, "learning_rate": 4.3243297318927574e-05, "loss": 0.6569, "step": 3387 }, { "epoch": 4.33664, "grad_norm": 0.7198589444160461, "learning_rate": 4.3241296518607446e-05, "loss": 0.6785, "step": 3388 }, { "epoch": 4.33792, "grad_norm": 0.74173504114151, "learning_rate": 4.323929571828732e-05, "loss": 0.6489, "step": 3389 }, { "epoch": 4.3392, "grad_norm": 0.7088286876678467, "learning_rate": 4.323729491796719e-05, "loss": 0.6311, "step": 3390 }, { "epoch": 4.34048, "grad_norm": 0.7243875861167908, "learning_rate": 4.323529411764706e-05, "loss": 0.64, "step": 3391 }, { "epoch": 4.34176, "grad_norm": 0.7260725498199463, "learning_rate": 4.3233293317326934e-05, "loss": 0.6259, "step": 3392 }, { "epoch": 4.34304, "grad_norm": 0.6951209902763367, "learning_rate": 4.3231292517006806e-05, "loss": 0.6162, "step": 3393 }, { "epoch": 4.34432, "grad_norm": 0.6746131181716919, "learning_rate": 4.322929171668668e-05, "loss": 0.6122, "step": 3394 }, { "epoch": 4.3456, "grad_norm": 0.7057126760482788, "learning_rate": 4.322729091636655e-05, "loss": 0.6491, "step": 3395 }, { "epoch": 4.34688, "grad_norm": 0.7594478726387024, "learning_rate": 4.322529011604642e-05, "loss": 0.6743, "step": 3396 }, { "epoch": 4.34816, "grad_norm": 0.6961939930915833, "learning_rate": 4.322328931572629e-05, "loss": 0.6814, "step": 3397 }, { "epoch": 4.3494399999999995, "grad_norm": 0.7641561627388, "learning_rate": 4.3221288515406165e-05, "loss": 0.6787, "step": 3398 }, { "epoch": 4.35072, "grad_norm": 0.7088523507118225, "learning_rate": 4.321928771508604e-05, "loss": 0.6936, "step": 3399 }, { "epoch": 4.352, "grad_norm": 0.7125248908996582, "learning_rate": 4.321728691476591e-05, "loss": 0.6512, "step": 3400 }, { "epoch": 4.35328, "grad_norm": 0.7159749269485474, "learning_rate": 4.321528611444578e-05, "loss": 0.6013, "step": 3401 }, { "epoch": 4.35456, "grad_norm": 0.7057827115058899, "learning_rate": 4.321328531412565e-05, "loss": 0.6754, "step": 3402 }, { "epoch": 4.35584, "grad_norm": 0.7021535038948059, "learning_rate": 4.3211284513805524e-05, "loss": 0.6203, "step": 3403 }, { "epoch": 4.35712, "grad_norm": 0.6961078643798828, "learning_rate": 4.3209283713485396e-05, "loss": 0.6115, "step": 3404 }, { "epoch": 4.3584, "grad_norm": 0.6545549035072327, "learning_rate": 4.320728291316527e-05, "loss": 0.5919, "step": 3405 }, { "epoch": 4.35968, "grad_norm": 0.7263978123664856, "learning_rate": 4.320528211284514e-05, "loss": 0.6691, "step": 3406 }, { "epoch": 4.36096, "grad_norm": 0.7528616189956665, "learning_rate": 4.320328131252501e-05, "loss": 0.7051, "step": 3407 }, { "epoch": 4.36224, "grad_norm": 0.6853641867637634, "learning_rate": 4.320128051220488e-05, "loss": 0.6242, "step": 3408 }, { "epoch": 4.36352, "grad_norm": 0.7724465727806091, "learning_rate": 4.3199279711884755e-05, "loss": 0.5945, "step": 3409 }, { "epoch": 4.3648, "grad_norm": 0.6866872310638428, "learning_rate": 4.319727891156463e-05, "loss": 0.6668, "step": 3410 }, { "epoch": 4.36608, "grad_norm": 0.7362887263298035, "learning_rate": 4.3195278111244506e-05, "loss": 0.6773, "step": 3411 }, { "epoch": 4.36736, "grad_norm": 0.7267001867294312, "learning_rate": 4.319327731092437e-05, "loss": 0.651, "step": 3412 }, { "epoch": 4.36864, "grad_norm": 0.7169755697250366, "learning_rate": 4.319127651060424e-05, "loss": 0.7017, "step": 3413 }, { "epoch": 4.3699200000000005, "grad_norm": 0.7027722597122192, "learning_rate": 4.3189275710284115e-05, "loss": 0.5944, "step": 3414 }, { "epoch": 4.3712, "grad_norm": 0.7033854722976685, "learning_rate": 4.3187274909963986e-05, "loss": 0.6433, "step": 3415 }, { "epoch": 4.37248, "grad_norm": 0.7184159755706787, "learning_rate": 4.318527410964386e-05, "loss": 0.6733, "step": 3416 }, { "epoch": 4.37376, "grad_norm": 0.7299154996871948, "learning_rate": 4.318327330932373e-05, "loss": 0.7094, "step": 3417 }, { "epoch": 4.37504, "grad_norm": 0.6535490155220032, "learning_rate": 4.31812725090036e-05, "loss": 0.6, "step": 3418 }, { "epoch": 4.37632, "grad_norm": 0.6924900412559509, "learning_rate": 4.317927170868348e-05, "loss": 0.632, "step": 3419 }, { "epoch": 4.3776, "grad_norm": 0.7481140494346619, "learning_rate": 4.3177270908363346e-05, "loss": 0.6669, "step": 3420 }, { "epoch": 4.37888, "grad_norm": 0.6912330985069275, "learning_rate": 4.317527010804322e-05, "loss": 0.6514, "step": 3421 }, { "epoch": 4.38016, "grad_norm": 0.7058601379394531, "learning_rate": 4.317326930772309e-05, "loss": 0.66, "step": 3422 }, { "epoch": 4.38144, "grad_norm": 0.6704264879226685, "learning_rate": 4.317126850740296e-05, "loss": 0.6386, "step": 3423 }, { "epoch": 4.38272, "grad_norm": 0.6729428172111511, "learning_rate": 4.316926770708283e-05, "loss": 0.6369, "step": 3424 }, { "epoch": 4.384, "grad_norm": 0.6993991732597351, "learning_rate": 4.3167266906762705e-05, "loss": 0.6756, "step": 3425 }, { "epoch": 4.38528, "grad_norm": 0.7253281474113464, "learning_rate": 4.3165266106442584e-05, "loss": 0.6467, "step": 3426 }, { "epoch": 4.38656, "grad_norm": 0.7546789646148682, "learning_rate": 4.3163265306122455e-05, "loss": 0.6914, "step": 3427 }, { "epoch": 4.38784, "grad_norm": 0.7461205720901489, "learning_rate": 4.316126450580232e-05, "loss": 0.6633, "step": 3428 }, { "epoch": 4.38912, "grad_norm": 0.7659752368927002, "learning_rate": 4.315926370548219e-05, "loss": 0.6514, "step": 3429 }, { "epoch": 4.3904, "grad_norm": 0.7351868152618408, "learning_rate": 4.3157262905162064e-05, "loss": 0.7025, "step": 3430 }, { "epoch": 4.39168, "grad_norm": 0.7607386708259583, "learning_rate": 4.3155262104841936e-05, "loss": 0.6971, "step": 3431 }, { "epoch": 4.39296, "grad_norm": 0.7106710076332092, "learning_rate": 4.315326130452181e-05, "loss": 0.6642, "step": 3432 }, { "epoch": 4.39424, "grad_norm": 0.716480553150177, "learning_rate": 4.3151260504201687e-05, "loss": 0.6401, "step": 3433 }, { "epoch": 4.39552, "grad_norm": 0.6813743710517883, "learning_rate": 4.314925970388156e-05, "loss": 0.6335, "step": 3434 }, { "epoch": 4.3968, "grad_norm": 0.6941514611244202, "learning_rate": 4.314725890356143e-05, "loss": 0.6294, "step": 3435 }, { "epoch": 4.39808, "grad_norm": 0.75897616147995, "learning_rate": 4.3145258103241295e-05, "loss": 0.674, "step": 3436 }, { "epoch": 4.39936, "grad_norm": 0.6969784498214722, "learning_rate": 4.314325730292117e-05, "loss": 0.6424, "step": 3437 }, { "epoch": 4.40064, "grad_norm": 0.6732988953590393, "learning_rate": 4.314125650260104e-05, "loss": 0.6206, "step": 3438 }, { "epoch": 4.40192, "grad_norm": 0.6822874546051025, "learning_rate": 4.313925570228091e-05, "loss": 0.5994, "step": 3439 }, { "epoch": 4.4032, "grad_norm": 0.6838374733924866, "learning_rate": 4.313725490196079e-05, "loss": 0.6224, "step": 3440 }, { "epoch": 4.40448, "grad_norm": 0.7581614851951599, "learning_rate": 4.313525410164066e-05, "loss": 0.6899, "step": 3441 }, { "epoch": 4.40576, "grad_norm": 0.7927963733673096, "learning_rate": 4.313325330132053e-05, "loss": 0.6671, "step": 3442 }, { "epoch": 4.40704, "grad_norm": 0.7558853030204773, "learning_rate": 4.3131252501000405e-05, "loss": 0.6714, "step": 3443 }, { "epoch": 4.40832, "grad_norm": 0.6957927346229553, "learning_rate": 4.312925170068027e-05, "loss": 0.6465, "step": 3444 }, { "epoch": 4.4096, "grad_norm": 0.7612014412879944, "learning_rate": 4.312725090036014e-05, "loss": 0.6855, "step": 3445 }, { "epoch": 4.41088, "grad_norm": 0.735772430896759, "learning_rate": 4.3125250100040014e-05, "loss": 0.6411, "step": 3446 }, { "epoch": 4.41216, "grad_norm": 0.6750103235244751, "learning_rate": 4.312324929971989e-05, "loss": 0.5845, "step": 3447 }, { "epoch": 4.41344, "grad_norm": 0.7323096990585327, "learning_rate": 4.3121248499399764e-05, "loss": 0.6724, "step": 3448 }, { "epoch": 4.41472, "grad_norm": 0.706743597984314, "learning_rate": 4.3119247699079636e-05, "loss": 0.6313, "step": 3449 }, { "epoch": 4.416, "grad_norm": 0.6990499496459961, "learning_rate": 4.311724689875951e-05, "loss": 0.6688, "step": 3450 }, { "epoch": 4.41728, "grad_norm": 0.7238355875015259, "learning_rate": 4.311524609843938e-05, "loss": 0.6343, "step": 3451 }, { "epoch": 4.41856, "grad_norm": 0.6958813667297363, "learning_rate": 4.3113245298119245e-05, "loss": 0.6341, "step": 3452 }, { "epoch": 4.41984, "grad_norm": 0.7154257893562317, "learning_rate": 4.311124449779912e-05, "loss": 0.6938, "step": 3453 }, { "epoch": 4.42112, "grad_norm": 0.6367705464363098, "learning_rate": 4.3109243697478996e-05, "loss": 0.5721, "step": 3454 }, { "epoch": 4.4224, "grad_norm": 0.6861926913261414, "learning_rate": 4.310724289715887e-05, "loss": 0.6292, "step": 3455 }, { "epoch": 4.42368, "grad_norm": 0.7064390182495117, "learning_rate": 4.310524209683874e-05, "loss": 0.6467, "step": 3456 }, { "epoch": 4.4249600000000004, "grad_norm": 0.6897706985473633, "learning_rate": 4.310324129651861e-05, "loss": 0.6367, "step": 3457 }, { "epoch": 4.42624, "grad_norm": 0.7397942543029785, "learning_rate": 4.310124049619848e-05, "loss": 0.7199, "step": 3458 }, { "epoch": 4.42752, "grad_norm": 0.7196327447891235, "learning_rate": 4.3099239695878355e-05, "loss": 0.6154, "step": 3459 }, { "epoch": 4.4288, "grad_norm": 0.7079979777336121, "learning_rate": 4.309723889555822e-05, "loss": 0.6571, "step": 3460 }, { "epoch": 4.43008, "grad_norm": 0.7943414449691772, "learning_rate": 4.30952380952381e-05, "loss": 0.7078, "step": 3461 }, { "epoch": 4.43136, "grad_norm": 0.6748960614204407, "learning_rate": 4.309323729491797e-05, "loss": 0.6341, "step": 3462 }, { "epoch": 4.43264, "grad_norm": 0.7459009885787964, "learning_rate": 4.309123649459784e-05, "loss": 0.72, "step": 3463 }, { "epoch": 4.43392, "grad_norm": 0.7245545983314514, "learning_rate": 4.3089235694277714e-05, "loss": 0.6007, "step": 3464 }, { "epoch": 4.4352, "grad_norm": 0.6897006630897522, "learning_rate": 4.3087234893957586e-05, "loss": 0.6911, "step": 3465 }, { "epoch": 4.4364799999999995, "grad_norm": 0.684449315071106, "learning_rate": 4.308523409363746e-05, "loss": 0.627, "step": 3466 }, { "epoch": 4.43776, "grad_norm": 0.7077491283416748, "learning_rate": 4.308323329331733e-05, "loss": 0.6295, "step": 3467 }, { "epoch": 4.43904, "grad_norm": 0.716648280620575, "learning_rate": 4.30812324929972e-05, "loss": 0.601, "step": 3468 }, { "epoch": 4.44032, "grad_norm": 0.7259345650672913, "learning_rate": 4.3079231692677073e-05, "loss": 0.6443, "step": 3469 }, { "epoch": 4.4416, "grad_norm": 0.7068425416946411, "learning_rate": 4.3077230892356945e-05, "loss": 0.6623, "step": 3470 }, { "epoch": 4.44288, "grad_norm": 0.713869571685791, "learning_rate": 4.307523009203682e-05, "loss": 0.6596, "step": 3471 }, { "epoch": 4.44416, "grad_norm": 0.6864603757858276, "learning_rate": 4.307322929171669e-05, "loss": 0.6143, "step": 3472 }, { "epoch": 4.44544, "grad_norm": 0.7068229913711548, "learning_rate": 4.307122849139656e-05, "loss": 0.6255, "step": 3473 }, { "epoch": 4.44672, "grad_norm": 0.7183898091316223, "learning_rate": 4.306922769107643e-05, "loss": 0.705, "step": 3474 }, { "epoch": 4.448, "grad_norm": 0.6923708915710449, "learning_rate": 4.3067226890756305e-05, "loss": 0.6048, "step": 3475 }, { "epoch": 4.44928, "grad_norm": 0.7035982608795166, "learning_rate": 4.3065226090436176e-05, "loss": 0.6131, "step": 3476 }, { "epoch": 4.45056, "grad_norm": 0.7768038511276245, "learning_rate": 4.306322529011605e-05, "loss": 0.7338, "step": 3477 }, { "epoch": 4.45184, "grad_norm": 0.7020729780197144, "learning_rate": 4.306122448979592e-05, "loss": 0.6507, "step": 3478 }, { "epoch": 4.45312, "grad_norm": 0.6677333116531372, "learning_rate": 4.305922368947579e-05, "loss": 0.6101, "step": 3479 }, { "epoch": 4.4544, "grad_norm": 0.6806728839874268, "learning_rate": 4.3057222889155664e-05, "loss": 0.6016, "step": 3480 }, { "epoch": 4.45568, "grad_norm": 0.7218868732452393, "learning_rate": 4.3055222088835536e-05, "loss": 0.6874, "step": 3481 }, { "epoch": 4.45696, "grad_norm": 0.757403552532196, "learning_rate": 4.305322128851541e-05, "loss": 0.7041, "step": 3482 }, { "epoch": 4.45824, "grad_norm": 0.725426435470581, "learning_rate": 4.305122048819528e-05, "loss": 0.6705, "step": 3483 }, { "epoch": 4.45952, "grad_norm": 0.7142929434776306, "learning_rate": 4.304921968787515e-05, "loss": 0.6241, "step": 3484 }, { "epoch": 4.4608, "grad_norm": 0.7178435325622559, "learning_rate": 4.304721888755502e-05, "loss": 0.6537, "step": 3485 }, { "epoch": 4.46208, "grad_norm": 0.7174624800682068, "learning_rate": 4.3045218087234895e-05, "loss": 0.6261, "step": 3486 }, { "epoch": 4.46336, "grad_norm": 0.7432010173797607, "learning_rate": 4.304321728691477e-05, "loss": 0.6189, "step": 3487 }, { "epoch": 4.46464, "grad_norm": 0.7320737838745117, "learning_rate": 4.304121648659464e-05, "loss": 0.6203, "step": 3488 }, { "epoch": 4.46592, "grad_norm": 0.7159371972084045, "learning_rate": 4.303921568627452e-05, "loss": 0.6565, "step": 3489 }, { "epoch": 4.4672, "grad_norm": 0.6766809225082397, "learning_rate": 4.303721488595438e-05, "loss": 0.6285, "step": 3490 }, { "epoch": 4.46848, "grad_norm": 0.7048515677452087, "learning_rate": 4.3035214085634254e-05, "loss": 0.6259, "step": 3491 }, { "epoch": 4.46976, "grad_norm": 0.7184696197509766, "learning_rate": 4.3033213285314126e-05, "loss": 0.6429, "step": 3492 }, { "epoch": 4.47104, "grad_norm": 0.7331928014755249, "learning_rate": 4.3031212484994e-05, "loss": 0.6189, "step": 3493 }, { "epoch": 4.47232, "grad_norm": 0.6746116876602173, "learning_rate": 4.302921168467387e-05, "loss": 0.6499, "step": 3494 }, { "epoch": 4.4736, "grad_norm": 0.6947497725486755, "learning_rate": 4.302721088435374e-05, "loss": 0.6577, "step": 3495 }, { "epoch": 4.47488, "grad_norm": 0.7183555960655212, "learning_rate": 4.302521008403362e-05, "loss": 0.6201, "step": 3496 }, { "epoch": 4.47616, "grad_norm": 0.7647238373756409, "learning_rate": 4.302320928371349e-05, "loss": 0.6564, "step": 3497 }, { "epoch": 4.47744, "grad_norm": 0.6781284809112549, "learning_rate": 4.302120848339336e-05, "loss": 0.6367, "step": 3498 }, { "epoch": 4.47872, "grad_norm": 0.7044297456741333, "learning_rate": 4.301920768307323e-05, "loss": 0.6508, "step": 3499 }, { "epoch": 4.48, "grad_norm": 0.7338101863861084, "learning_rate": 4.30172068827531e-05, "loss": 0.6807, "step": 3500 }, { "epoch": 4.48128, "grad_norm": 0.7334948182106018, "learning_rate": 4.301520608243297e-05, "loss": 0.6962, "step": 3501 }, { "epoch": 4.48256, "grad_norm": 0.7243069410324097, "learning_rate": 4.3013205282112845e-05, "loss": 0.6448, "step": 3502 }, { "epoch": 4.48384, "grad_norm": 0.7103859782218933, "learning_rate": 4.301120448179272e-05, "loss": 0.6347, "step": 3503 }, { "epoch": 4.48512, "grad_norm": 0.7307097911834717, "learning_rate": 4.3009203681472595e-05, "loss": 0.6947, "step": 3504 }, { "epoch": 4.4864, "grad_norm": 0.6719437837600708, "learning_rate": 4.300720288115247e-05, "loss": 0.6101, "step": 3505 }, { "epoch": 4.48768, "grad_norm": 0.6755344867706299, "learning_rate": 4.300520208083233e-05, "loss": 0.6134, "step": 3506 }, { "epoch": 4.48896, "grad_norm": 0.7292615175247192, "learning_rate": 4.3003201280512204e-05, "loss": 0.7104, "step": 3507 }, { "epoch": 4.49024, "grad_norm": 0.7445333003997803, "learning_rate": 4.3001200480192076e-05, "loss": 0.702, "step": 3508 }, { "epoch": 4.49152, "grad_norm": 0.7050941586494446, "learning_rate": 4.299919967987195e-05, "loss": 0.6471, "step": 3509 }, { "epoch": 4.4928, "grad_norm": 0.7550082206726074, "learning_rate": 4.2997198879551826e-05, "loss": 0.6283, "step": 3510 }, { "epoch": 4.49408, "grad_norm": 0.6943626999855042, "learning_rate": 4.29951980792317e-05, "loss": 0.6155, "step": 3511 }, { "epoch": 4.49536, "grad_norm": 0.7097110152244568, "learning_rate": 4.299319727891157e-05, "loss": 0.6762, "step": 3512 }, { "epoch": 4.49664, "grad_norm": 0.7257136106491089, "learning_rate": 4.299119647859144e-05, "loss": 0.6559, "step": 3513 }, { "epoch": 4.49792, "grad_norm": 0.7039051651954651, "learning_rate": 4.298919567827131e-05, "loss": 0.6605, "step": 3514 }, { "epoch": 4.4992, "grad_norm": 0.7470154762268066, "learning_rate": 4.298719487795118e-05, "loss": 0.6569, "step": 3515 }, { "epoch": 4.50048, "grad_norm": 0.7026588916778564, "learning_rate": 4.298519407763105e-05, "loss": 0.6678, "step": 3516 }, { "epoch": 4.50176, "grad_norm": 0.7094910144805908, "learning_rate": 4.298319327731093e-05, "loss": 0.6832, "step": 3517 }, { "epoch": 4.50304, "grad_norm": 0.701600968837738, "learning_rate": 4.29811924769908e-05, "loss": 0.6583, "step": 3518 }, { "epoch": 4.50432, "grad_norm": 0.6822985410690308, "learning_rate": 4.297919167667067e-05, "loss": 0.6335, "step": 3519 }, { "epoch": 4.5056, "grad_norm": 0.7292996048927307, "learning_rate": 4.2977190876350545e-05, "loss": 0.6355, "step": 3520 }, { "epoch": 4.50688, "grad_norm": 0.6827433705329895, "learning_rate": 4.297519007603042e-05, "loss": 0.6288, "step": 3521 }, { "epoch": 4.50816, "grad_norm": 0.6858897805213928, "learning_rate": 4.297318927571028e-05, "loss": 0.6333, "step": 3522 }, { "epoch": 4.50944, "grad_norm": 0.7058318853378296, "learning_rate": 4.2971188475390154e-05, "loss": 0.6742, "step": 3523 }, { "epoch": 4.51072, "grad_norm": 0.7074992060661316, "learning_rate": 4.296918767507003e-05, "loss": 0.6459, "step": 3524 }, { "epoch": 4.5120000000000005, "grad_norm": 0.7430311441421509, "learning_rate": 4.2967186874749904e-05, "loss": 0.6278, "step": 3525 }, { "epoch": 4.51328, "grad_norm": 0.6989936828613281, "learning_rate": 4.2965186074429776e-05, "loss": 0.6302, "step": 3526 }, { "epoch": 4.51456, "grad_norm": 0.7324742674827576, "learning_rate": 4.296318527410965e-05, "loss": 0.6347, "step": 3527 }, { "epoch": 4.51584, "grad_norm": 0.8047772645950317, "learning_rate": 4.296118447378952e-05, "loss": 0.6524, "step": 3528 }, { "epoch": 4.51712, "grad_norm": 0.7454036474227905, "learning_rate": 4.295918367346939e-05, "loss": 0.6636, "step": 3529 }, { "epoch": 4.5184, "grad_norm": 0.6896642446517944, "learning_rate": 4.295718287314926e-05, "loss": 0.6145, "step": 3530 }, { "epoch": 4.51968, "grad_norm": 0.7129753828048706, "learning_rate": 4.295518207282913e-05, "loss": 0.6929, "step": 3531 }, { "epoch": 4.52096, "grad_norm": 0.6888583898544312, "learning_rate": 4.295318127250901e-05, "loss": 0.6411, "step": 3532 }, { "epoch": 4.52224, "grad_norm": 0.7414783239364624, "learning_rate": 4.295118047218888e-05, "loss": 0.7311, "step": 3533 }, { "epoch": 4.5235199999999995, "grad_norm": 0.7330256104469299, "learning_rate": 4.294917967186875e-05, "loss": 0.6781, "step": 3534 }, { "epoch": 4.5248, "grad_norm": 0.6906652450561523, "learning_rate": 4.294717887154862e-05, "loss": 0.6435, "step": 3535 }, { "epoch": 4.52608, "grad_norm": 0.677453875541687, "learning_rate": 4.2945178071228495e-05, "loss": 0.6618, "step": 3536 }, { "epoch": 4.52736, "grad_norm": 0.7467184066772461, "learning_rate": 4.2943177270908366e-05, "loss": 0.6991, "step": 3537 }, { "epoch": 4.52864, "grad_norm": 0.7100308537483215, "learning_rate": 4.294117647058823e-05, "loss": 0.6298, "step": 3538 }, { "epoch": 4.52992, "grad_norm": 0.6982895135879517, "learning_rate": 4.293917567026811e-05, "loss": 0.6826, "step": 3539 }, { "epoch": 4.5312, "grad_norm": 0.6765713095664978, "learning_rate": 4.293717486994798e-05, "loss": 0.6307, "step": 3540 }, { "epoch": 4.53248, "grad_norm": 0.6806417107582092, "learning_rate": 4.2935174069627854e-05, "loss": 0.6495, "step": 3541 }, { "epoch": 4.53376, "grad_norm": 0.7208904027938843, "learning_rate": 4.2933173269307726e-05, "loss": 0.6566, "step": 3542 }, { "epoch": 4.53504, "grad_norm": 0.7589842677116394, "learning_rate": 4.29311724689876e-05, "loss": 0.7152, "step": 3543 }, { "epoch": 4.53632, "grad_norm": 0.7217769622802734, "learning_rate": 4.292917166866747e-05, "loss": 0.6694, "step": 3544 }, { "epoch": 4.5376, "grad_norm": 0.681645393371582, "learning_rate": 4.292717086834734e-05, "loss": 0.6182, "step": 3545 }, { "epoch": 4.53888, "grad_norm": 0.76906818151474, "learning_rate": 4.292517006802721e-05, "loss": 0.6863, "step": 3546 }, { "epoch": 4.54016, "grad_norm": 0.6864603161811829, "learning_rate": 4.2923169267707085e-05, "loss": 0.5935, "step": 3547 }, { "epoch": 4.54144, "grad_norm": 0.7130399942398071, "learning_rate": 4.292116846738696e-05, "loss": 0.6145, "step": 3548 }, { "epoch": 4.54272, "grad_norm": 0.7169683575630188, "learning_rate": 4.291916766706683e-05, "loss": 0.6562, "step": 3549 }, { "epoch": 4.5440000000000005, "grad_norm": 0.7166752815246582, "learning_rate": 4.29171668667467e-05, "loss": 0.6539, "step": 3550 }, { "epoch": 4.54528, "grad_norm": 0.681341290473938, "learning_rate": 4.291516606642657e-05, "loss": 0.6518, "step": 3551 }, { "epoch": 4.54656, "grad_norm": 0.6968530416488647, "learning_rate": 4.2913165266106444e-05, "loss": 0.6345, "step": 3552 }, { "epoch": 4.54784, "grad_norm": 0.7314958572387695, "learning_rate": 4.2911164465786316e-05, "loss": 0.7076, "step": 3553 }, { "epoch": 4.54912, "grad_norm": 0.7102290987968445, "learning_rate": 4.290916366546619e-05, "loss": 0.6552, "step": 3554 }, { "epoch": 4.5504, "grad_norm": 0.7079906463623047, "learning_rate": 4.290716286514606e-05, "loss": 0.6323, "step": 3555 }, { "epoch": 4.55168, "grad_norm": 0.6791037917137146, "learning_rate": 4.290516206482593e-05, "loss": 0.6335, "step": 3556 }, { "epoch": 4.55296, "grad_norm": 0.6969685554504395, "learning_rate": 4.2903161264505804e-05, "loss": 0.6266, "step": 3557 }, { "epoch": 4.55424, "grad_norm": 0.7246193885803223, "learning_rate": 4.2901160464185675e-05, "loss": 0.6518, "step": 3558 }, { "epoch": 4.55552, "grad_norm": 0.679791271686554, "learning_rate": 4.289915966386555e-05, "loss": 0.6229, "step": 3559 }, { "epoch": 4.5568, "grad_norm": 0.7322858572006226, "learning_rate": 4.289715886354542e-05, "loss": 0.6321, "step": 3560 }, { "epoch": 4.55808, "grad_norm": 0.6904955506324768, "learning_rate": 4.289515806322529e-05, "loss": 0.6772, "step": 3561 }, { "epoch": 4.55936, "grad_norm": 0.746525764465332, "learning_rate": 4.289315726290516e-05, "loss": 0.6534, "step": 3562 }, { "epoch": 4.56064, "grad_norm": 0.7122935056686401, "learning_rate": 4.2891156462585035e-05, "loss": 0.6796, "step": 3563 }, { "epoch": 4.56192, "grad_norm": 0.7172386050224304, "learning_rate": 4.2889155662264907e-05, "loss": 0.6768, "step": 3564 }, { "epoch": 4.5632, "grad_norm": 0.7053015232086182, "learning_rate": 4.288715486194478e-05, "loss": 0.5911, "step": 3565 }, { "epoch": 4.56448, "grad_norm": 0.6957263946533203, "learning_rate": 4.288515406162465e-05, "loss": 0.6538, "step": 3566 }, { "epoch": 4.56576, "grad_norm": 0.6875421404838562, "learning_rate": 4.288315326130453e-05, "loss": 0.6839, "step": 3567 }, { "epoch": 4.56704, "grad_norm": 0.7036089301109314, "learning_rate": 4.2881152460984394e-05, "loss": 0.6077, "step": 3568 }, { "epoch": 4.56832, "grad_norm": 0.6996341943740845, "learning_rate": 4.2879151660664266e-05, "loss": 0.6304, "step": 3569 }, { "epoch": 4.5696, "grad_norm": 0.7085795402526855, "learning_rate": 4.287715086034414e-05, "loss": 0.6497, "step": 3570 }, { "epoch": 4.57088, "grad_norm": 0.6893650889396667, "learning_rate": 4.287515006002401e-05, "loss": 0.6503, "step": 3571 }, { "epoch": 4.57216, "grad_norm": 0.6926917433738708, "learning_rate": 4.287314925970388e-05, "loss": 0.6245, "step": 3572 }, { "epoch": 4.57344, "grad_norm": 0.7077941298484802, "learning_rate": 4.287114845938375e-05, "loss": 0.6522, "step": 3573 }, { "epoch": 4.57472, "grad_norm": 0.7533298134803772, "learning_rate": 4.286914765906363e-05, "loss": 0.6637, "step": 3574 }, { "epoch": 4.576, "grad_norm": 0.7265738844871521, "learning_rate": 4.2867146858743504e-05, "loss": 0.6432, "step": 3575 }, { "epoch": 4.57728, "grad_norm": 0.7495373487472534, "learning_rate": 4.286514605842337e-05, "loss": 0.6487, "step": 3576 }, { "epoch": 4.5785599999999995, "grad_norm": 0.7241270542144775, "learning_rate": 4.286314525810324e-05, "loss": 0.6962, "step": 3577 }, { "epoch": 4.57984, "grad_norm": 0.6941715478897095, "learning_rate": 4.286114445778311e-05, "loss": 0.6421, "step": 3578 }, { "epoch": 4.58112, "grad_norm": 0.7148897647857666, "learning_rate": 4.2859143657462984e-05, "loss": 0.6584, "step": 3579 }, { "epoch": 4.5824, "grad_norm": 0.7130563855171204, "learning_rate": 4.2857142857142856e-05, "loss": 0.6817, "step": 3580 }, { "epoch": 4.58368, "grad_norm": 0.7111728191375732, "learning_rate": 4.2855142056822735e-05, "loss": 0.6443, "step": 3581 }, { "epoch": 4.58496, "grad_norm": 0.7034164667129517, "learning_rate": 4.285314125650261e-05, "loss": 0.6662, "step": 3582 }, { "epoch": 4.58624, "grad_norm": 0.6821185946464539, "learning_rate": 4.285114045618248e-05, "loss": 0.6381, "step": 3583 }, { "epoch": 4.58752, "grad_norm": 0.6892850995063782, "learning_rate": 4.2849139655862344e-05, "loss": 0.6515, "step": 3584 }, { "epoch": 4.5888, "grad_norm": 0.7385504245758057, "learning_rate": 4.2847138855542216e-05, "loss": 0.6632, "step": 3585 }, { "epoch": 4.59008, "grad_norm": 0.6548694372177124, "learning_rate": 4.284513805522209e-05, "loss": 0.609, "step": 3586 }, { "epoch": 4.59136, "grad_norm": 0.7125710248947144, "learning_rate": 4.284313725490196e-05, "loss": 0.6506, "step": 3587 }, { "epoch": 4.59264, "grad_norm": 0.7181362509727478, "learning_rate": 4.284113645458184e-05, "loss": 0.5887, "step": 3588 }, { "epoch": 4.59392, "grad_norm": 0.6801038980484009, "learning_rate": 4.283913565426171e-05, "loss": 0.6322, "step": 3589 }, { "epoch": 4.5952, "grad_norm": 0.7283415198326111, "learning_rate": 4.283713485394158e-05, "loss": 0.6793, "step": 3590 }, { "epoch": 4.59648, "grad_norm": 0.7576428055763245, "learning_rate": 4.2835134053621453e-05, "loss": 0.6391, "step": 3591 }, { "epoch": 4.59776, "grad_norm": 0.7399201393127441, "learning_rate": 4.283313325330132e-05, "loss": 0.6719, "step": 3592 }, { "epoch": 4.5990400000000005, "grad_norm": 0.7038607001304626, "learning_rate": 4.283113245298119e-05, "loss": 0.6885, "step": 3593 }, { "epoch": 4.60032, "grad_norm": 0.7106660604476929, "learning_rate": 4.282913165266106e-05, "loss": 0.6382, "step": 3594 }, { "epoch": 4.6016, "grad_norm": 0.7221371531486511, "learning_rate": 4.282713085234094e-05, "loss": 0.6697, "step": 3595 }, { "epoch": 4.60288, "grad_norm": 0.702472984790802, "learning_rate": 4.282513005202081e-05, "loss": 0.6555, "step": 3596 }, { "epoch": 4.60416, "grad_norm": 0.6985082626342773, "learning_rate": 4.2823129251700685e-05, "loss": 0.6591, "step": 3597 }, { "epoch": 4.60544, "grad_norm": 0.6914473176002502, "learning_rate": 4.2821128451380556e-05, "loss": 0.6284, "step": 3598 }, { "epoch": 4.60672, "grad_norm": 0.7164996266365051, "learning_rate": 4.281912765106043e-05, "loss": 0.6409, "step": 3599 }, { "epoch": 4.608, "grad_norm": 0.7067193388938904, "learning_rate": 4.2817126850740293e-05, "loss": 0.6891, "step": 3600 }, { "epoch": 4.60928, "grad_norm": 0.672056257724762, "learning_rate": 4.2815126050420165e-05, "loss": 0.6533, "step": 3601 }, { "epoch": 4.6105599999999995, "grad_norm": 0.6592856049537659, "learning_rate": 4.2813125250100044e-05, "loss": 0.5726, "step": 3602 }, { "epoch": 4.61184, "grad_norm": 0.7327611446380615, "learning_rate": 4.2811124449779916e-05, "loss": 0.6566, "step": 3603 }, { "epoch": 4.61312, "grad_norm": 0.7556269764900208, "learning_rate": 4.280912364945979e-05, "loss": 0.7096, "step": 3604 }, { "epoch": 4.6144, "grad_norm": 0.7422052025794983, "learning_rate": 4.280712284913966e-05, "loss": 0.7039, "step": 3605 }, { "epoch": 4.61568, "grad_norm": 0.7220710515975952, "learning_rate": 4.280512204881953e-05, "loss": 0.6709, "step": 3606 }, { "epoch": 4.61696, "grad_norm": 0.7694050669670105, "learning_rate": 4.28031212484994e-05, "loss": 0.6938, "step": 3607 }, { "epoch": 4.61824, "grad_norm": 0.7003862261772156, "learning_rate": 4.280112044817927e-05, "loss": 0.6526, "step": 3608 }, { "epoch": 4.61952, "grad_norm": 0.7154176831245422, "learning_rate": 4.279911964785915e-05, "loss": 0.6106, "step": 3609 }, { "epoch": 4.6208, "grad_norm": 0.6721168160438538, "learning_rate": 4.279711884753902e-05, "loss": 0.6439, "step": 3610 }, { "epoch": 4.62208, "grad_norm": 0.7174552083015442, "learning_rate": 4.279511804721889e-05, "loss": 0.6373, "step": 3611 }, { "epoch": 4.62336, "grad_norm": 0.7415494918823242, "learning_rate": 4.279311724689876e-05, "loss": 0.6797, "step": 3612 }, { "epoch": 4.62464, "grad_norm": 0.6769813299179077, "learning_rate": 4.2791116446578634e-05, "loss": 0.5868, "step": 3613 }, { "epoch": 4.62592, "grad_norm": 0.6615371108055115, "learning_rate": 4.2789115646258506e-05, "loss": 0.5537, "step": 3614 }, { "epoch": 4.6272, "grad_norm": 0.7274612188339233, "learning_rate": 4.278711484593838e-05, "loss": 0.6443, "step": 3615 }, { "epoch": 4.62848, "grad_norm": 0.7486844658851624, "learning_rate": 4.278511404561825e-05, "loss": 0.6367, "step": 3616 }, { "epoch": 4.62976, "grad_norm": 0.7376373410224915, "learning_rate": 4.278311324529812e-05, "loss": 0.6621, "step": 3617 }, { "epoch": 4.6310400000000005, "grad_norm": 0.6872167587280273, "learning_rate": 4.2781112444977994e-05, "loss": 0.6174, "step": 3618 }, { "epoch": 4.63232, "grad_norm": 0.7116283774375916, "learning_rate": 4.2779111644657865e-05, "loss": 0.6833, "step": 3619 }, { "epoch": 4.6336, "grad_norm": 0.7109349966049194, "learning_rate": 4.277711084433774e-05, "loss": 0.6226, "step": 3620 }, { "epoch": 4.63488, "grad_norm": 0.6518250703811646, "learning_rate": 4.277511004401761e-05, "loss": 0.6072, "step": 3621 }, { "epoch": 4.63616, "grad_norm": 0.7209012508392334, "learning_rate": 4.277310924369748e-05, "loss": 0.6737, "step": 3622 }, { "epoch": 4.63744, "grad_norm": 0.7458158731460571, "learning_rate": 4.277110844337735e-05, "loss": 0.702, "step": 3623 }, { "epoch": 4.63872, "grad_norm": 0.7005342245101929, "learning_rate": 4.2769107643057225e-05, "loss": 0.6082, "step": 3624 }, { "epoch": 4.64, "grad_norm": 0.7359268665313721, "learning_rate": 4.27671068427371e-05, "loss": 0.6727, "step": 3625 }, { "epoch": 4.64128, "grad_norm": 0.7191507816314697, "learning_rate": 4.276510604241697e-05, "loss": 0.6743, "step": 3626 }, { "epoch": 4.64256, "grad_norm": 0.7470155954360962, "learning_rate": 4.276310524209684e-05, "loss": 0.6833, "step": 3627 }, { "epoch": 4.64384, "grad_norm": 0.7260749936103821, "learning_rate": 4.276110444177671e-05, "loss": 0.6241, "step": 3628 }, { "epoch": 4.64512, "grad_norm": 0.7276033759117126, "learning_rate": 4.2759103641456584e-05, "loss": 0.6384, "step": 3629 }, { "epoch": 4.6464, "grad_norm": 0.7192178964614868, "learning_rate": 4.275710284113646e-05, "loss": 0.6837, "step": 3630 }, { "epoch": 4.64768, "grad_norm": 0.7400460839271545, "learning_rate": 4.275510204081633e-05, "loss": 0.6588, "step": 3631 }, { "epoch": 4.64896, "grad_norm": 0.7040890455245972, "learning_rate": 4.27531012404962e-05, "loss": 0.6355, "step": 3632 }, { "epoch": 4.65024, "grad_norm": 0.7764822840690613, "learning_rate": 4.275110044017607e-05, "loss": 0.6686, "step": 3633 }, { "epoch": 4.65152, "grad_norm": 0.72344571352005, "learning_rate": 4.274909963985594e-05, "loss": 0.6094, "step": 3634 }, { "epoch": 4.6528, "grad_norm": 0.7050177454948425, "learning_rate": 4.2747098839535815e-05, "loss": 0.6337, "step": 3635 }, { "epoch": 4.65408, "grad_norm": 0.7142139673233032, "learning_rate": 4.274509803921569e-05, "loss": 0.6422, "step": 3636 }, { "epoch": 4.65536, "grad_norm": 0.7412136793136597, "learning_rate": 4.274309723889556e-05, "loss": 0.6324, "step": 3637 }, { "epoch": 4.65664, "grad_norm": 0.7400050163269043, "learning_rate": 4.274109643857544e-05, "loss": 0.6371, "step": 3638 }, { "epoch": 4.65792, "grad_norm": 0.6862472295761108, "learning_rate": 4.27390956382553e-05, "loss": 0.6114, "step": 3639 }, { "epoch": 4.6592, "grad_norm": 0.6816151142120361, "learning_rate": 4.2737094837935174e-05, "loss": 0.6169, "step": 3640 }, { "epoch": 4.66048, "grad_norm": 0.7262451648712158, "learning_rate": 4.2735094037615046e-05, "loss": 0.6691, "step": 3641 }, { "epoch": 4.66176, "grad_norm": 0.6490700244903564, "learning_rate": 4.273309323729492e-05, "loss": 0.5776, "step": 3642 }, { "epoch": 4.66304, "grad_norm": 0.701505184173584, "learning_rate": 4.273109243697479e-05, "loss": 0.6501, "step": 3643 }, { "epoch": 4.66432, "grad_norm": 0.731262743473053, "learning_rate": 4.272909163665466e-05, "loss": 0.6893, "step": 3644 }, { "epoch": 4.6655999999999995, "grad_norm": 0.6694958209991455, "learning_rate": 4.272709083633454e-05, "loss": 0.6047, "step": 3645 }, { "epoch": 4.66688, "grad_norm": 0.7336912155151367, "learning_rate": 4.272509003601441e-05, "loss": 0.6718, "step": 3646 }, { "epoch": 4.66816, "grad_norm": 0.7041752338409424, "learning_rate": 4.272308923569428e-05, "loss": 0.6913, "step": 3647 }, { "epoch": 4.66944, "grad_norm": 0.7393541932106018, "learning_rate": 4.272108843537415e-05, "loss": 0.6946, "step": 3648 }, { "epoch": 4.67072, "grad_norm": 0.6891679167747498, "learning_rate": 4.271908763505402e-05, "loss": 0.5869, "step": 3649 }, { "epoch": 4.672, "grad_norm": 0.6804254055023193, "learning_rate": 4.271708683473389e-05, "loss": 0.6375, "step": 3650 }, { "epoch": 4.67328, "grad_norm": 0.7413250803947449, "learning_rate": 4.2715086034413765e-05, "loss": 0.6526, "step": 3651 }, { "epoch": 4.67456, "grad_norm": 0.7123025059700012, "learning_rate": 4.2713085234093644e-05, "loss": 0.6193, "step": 3652 }, { "epoch": 4.67584, "grad_norm": 0.6694692373275757, "learning_rate": 4.2711084433773515e-05, "loss": 0.6437, "step": 3653 }, { "epoch": 4.67712, "grad_norm": 0.744706392288208, "learning_rate": 4.270908363345339e-05, "loss": 0.648, "step": 3654 }, { "epoch": 4.6784, "grad_norm": 0.7176006436347961, "learning_rate": 4.270708283313325e-05, "loss": 0.6077, "step": 3655 }, { "epoch": 4.67968, "grad_norm": 0.7272926568984985, "learning_rate": 4.2705082032813124e-05, "loss": 0.6571, "step": 3656 }, { "epoch": 4.68096, "grad_norm": 0.6795682907104492, "learning_rate": 4.2703081232492996e-05, "loss": 0.6645, "step": 3657 }, { "epoch": 4.68224, "grad_norm": 0.7655096650123596, "learning_rate": 4.270108043217287e-05, "loss": 0.6812, "step": 3658 }, { "epoch": 4.68352, "grad_norm": 0.7039996981620789, "learning_rate": 4.2699079631852747e-05, "loss": 0.6863, "step": 3659 }, { "epoch": 4.6848, "grad_norm": 0.7228350639343262, "learning_rate": 4.269707883153262e-05, "loss": 0.6519, "step": 3660 }, { "epoch": 4.6860800000000005, "grad_norm": 0.658035397529602, "learning_rate": 4.269507803121249e-05, "loss": 0.6366, "step": 3661 }, { "epoch": 4.68736, "grad_norm": 0.7360743880271912, "learning_rate": 4.269307723089236e-05, "loss": 0.6543, "step": 3662 }, { "epoch": 4.68864, "grad_norm": 0.6831920146942139, "learning_rate": 4.269107643057223e-05, "loss": 0.6334, "step": 3663 }, { "epoch": 4.68992, "grad_norm": 0.7199382185935974, "learning_rate": 4.26890756302521e-05, "loss": 0.6388, "step": 3664 }, { "epoch": 4.6912, "grad_norm": 0.6735736131668091, "learning_rate": 4.268707482993197e-05, "loss": 0.6249, "step": 3665 }, { "epoch": 4.69248, "grad_norm": 0.7085421681404114, "learning_rate": 4.268507402961185e-05, "loss": 0.6752, "step": 3666 }, { "epoch": 4.69376, "grad_norm": 0.7214930057525635, "learning_rate": 4.268307322929172e-05, "loss": 0.6651, "step": 3667 }, { "epoch": 4.69504, "grad_norm": 0.7339044213294983, "learning_rate": 4.268107242897159e-05, "loss": 0.7262, "step": 3668 }, { "epoch": 4.69632, "grad_norm": 0.707331120967865, "learning_rate": 4.2679071628651465e-05, "loss": 0.5875, "step": 3669 }, { "epoch": 4.6975999999999996, "grad_norm": 0.7132313847541809, "learning_rate": 4.267707082833134e-05, "loss": 0.6412, "step": 3670 }, { "epoch": 4.69888, "grad_norm": 0.6542635560035706, "learning_rate": 4.26750700280112e-05, "loss": 0.6243, "step": 3671 }, { "epoch": 4.70016, "grad_norm": 0.6929904818534851, "learning_rate": 4.2673069227691074e-05, "loss": 0.6621, "step": 3672 }, { "epoch": 4.70144, "grad_norm": 0.6636141538619995, "learning_rate": 4.267106842737095e-05, "loss": 0.6205, "step": 3673 }, { "epoch": 4.70272, "grad_norm": 0.7498050332069397, "learning_rate": 4.2669067627050824e-05, "loss": 0.6606, "step": 3674 }, { "epoch": 4.704, "grad_norm": 0.6959406137466431, "learning_rate": 4.2667066826730696e-05, "loss": 0.627, "step": 3675 }, { "epoch": 4.70528, "grad_norm": 0.7800446152687073, "learning_rate": 4.266506602641057e-05, "loss": 0.6928, "step": 3676 }, { "epoch": 4.70656, "grad_norm": 0.7218673825263977, "learning_rate": 4.266306522609044e-05, "loss": 0.6479, "step": 3677 }, { "epoch": 4.70784, "grad_norm": 0.6821399331092834, "learning_rate": 4.266106442577031e-05, "loss": 0.6622, "step": 3678 }, { "epoch": 4.70912, "grad_norm": 0.6904776692390442, "learning_rate": 4.265906362545018e-05, "loss": 0.6627, "step": 3679 }, { "epoch": 4.7104, "grad_norm": 0.6957124471664429, "learning_rate": 4.2657062825130056e-05, "loss": 0.6226, "step": 3680 }, { "epoch": 4.71168, "grad_norm": 0.7129904627799988, "learning_rate": 4.265506202480993e-05, "loss": 0.6552, "step": 3681 }, { "epoch": 4.71296, "grad_norm": 0.7037960886955261, "learning_rate": 4.26530612244898e-05, "loss": 0.6661, "step": 3682 }, { "epoch": 4.71424, "grad_norm": 0.7117835283279419, "learning_rate": 4.265106042416967e-05, "loss": 0.6382, "step": 3683 }, { "epoch": 4.71552, "grad_norm": 0.7245675325393677, "learning_rate": 4.264905962384954e-05, "loss": 0.625, "step": 3684 }, { "epoch": 4.7168, "grad_norm": 0.7148410677909851, "learning_rate": 4.2647058823529415e-05, "loss": 0.6358, "step": 3685 }, { "epoch": 4.7180800000000005, "grad_norm": 0.7175400257110596, "learning_rate": 4.264505802320929e-05, "loss": 0.6145, "step": 3686 }, { "epoch": 4.71936, "grad_norm": 0.7203708291053772, "learning_rate": 4.264305722288916e-05, "loss": 0.6923, "step": 3687 }, { "epoch": 4.7206399999999995, "grad_norm": 0.7105974555015564, "learning_rate": 4.264105642256903e-05, "loss": 0.6218, "step": 3688 }, { "epoch": 4.72192, "grad_norm": 0.7175532579421997, "learning_rate": 4.26390556222489e-05, "loss": 0.6648, "step": 3689 }, { "epoch": 4.7232, "grad_norm": 0.7026408910751343, "learning_rate": 4.2637054821928774e-05, "loss": 0.6619, "step": 3690 }, { "epoch": 4.72448, "grad_norm": 0.7409774661064148, "learning_rate": 4.2635054021608646e-05, "loss": 0.7161, "step": 3691 }, { "epoch": 4.72576, "grad_norm": 0.6791418194770813, "learning_rate": 4.263305322128852e-05, "loss": 0.6204, "step": 3692 }, { "epoch": 4.72704, "grad_norm": 0.6994455456733704, "learning_rate": 4.263105242096839e-05, "loss": 0.6185, "step": 3693 }, { "epoch": 4.72832, "grad_norm": 0.7309840321540833, "learning_rate": 4.262905162064826e-05, "loss": 0.7147, "step": 3694 }, { "epoch": 4.7296, "grad_norm": 0.6696488261222839, "learning_rate": 4.262705082032813e-05, "loss": 0.5993, "step": 3695 }, { "epoch": 4.73088, "grad_norm": 0.7723877429962158, "learning_rate": 4.2625050020008005e-05, "loss": 0.6397, "step": 3696 }, { "epoch": 4.73216, "grad_norm": 0.6929299831390381, "learning_rate": 4.262304921968788e-05, "loss": 0.6304, "step": 3697 }, { "epoch": 4.73344, "grad_norm": 0.6986497640609741, "learning_rate": 4.262104841936775e-05, "loss": 0.6173, "step": 3698 }, { "epoch": 4.73472, "grad_norm": 0.7609420418739319, "learning_rate": 4.261904761904762e-05, "loss": 0.6912, "step": 3699 }, { "epoch": 4.736, "grad_norm": 0.7289432287216187, "learning_rate": 4.261704681872749e-05, "loss": 0.6354, "step": 3700 }, { "epoch": 4.73728, "grad_norm": 0.7391143441200256, "learning_rate": 4.2615046018407365e-05, "loss": 0.6755, "step": 3701 }, { "epoch": 4.73856, "grad_norm": 0.7171617746353149, "learning_rate": 4.2613045218087236e-05, "loss": 0.6639, "step": 3702 }, { "epoch": 4.73984, "grad_norm": 0.7035807371139526, "learning_rate": 4.261104441776711e-05, "loss": 0.6142, "step": 3703 }, { "epoch": 4.7411200000000004, "grad_norm": 0.6329298615455627, "learning_rate": 4.260904361744698e-05, "loss": 0.6249, "step": 3704 }, { "epoch": 4.7424, "grad_norm": 0.6702702045440674, "learning_rate": 4.260704281712685e-05, "loss": 0.5998, "step": 3705 }, { "epoch": 4.74368, "grad_norm": 0.7044887542724609, "learning_rate": 4.2605042016806724e-05, "loss": 0.6308, "step": 3706 }, { "epoch": 4.74496, "grad_norm": 0.6639620065689087, "learning_rate": 4.2603041216486596e-05, "loss": 0.6565, "step": 3707 }, { "epoch": 4.74624, "grad_norm": 0.72665935754776, "learning_rate": 4.2601040416166474e-05, "loss": 0.6819, "step": 3708 }, { "epoch": 4.74752, "grad_norm": 0.7186412811279297, "learning_rate": 4.259903961584634e-05, "loss": 0.6577, "step": 3709 }, { "epoch": 4.7488, "grad_norm": 0.6942617893218994, "learning_rate": 4.259703881552621e-05, "loss": 0.6603, "step": 3710 }, { "epoch": 4.75008, "grad_norm": 0.6832451224327087, "learning_rate": 4.259503801520608e-05, "loss": 0.6013, "step": 3711 }, { "epoch": 4.75136, "grad_norm": 0.765796959400177, "learning_rate": 4.2593037214885955e-05, "loss": 0.6965, "step": 3712 }, { "epoch": 4.7526399999999995, "grad_norm": 0.7410770654678345, "learning_rate": 4.259103641456583e-05, "loss": 0.6428, "step": 3713 }, { "epoch": 4.75392, "grad_norm": 0.7163513898849487, "learning_rate": 4.25890356142457e-05, "loss": 0.6708, "step": 3714 }, { "epoch": 4.7552, "grad_norm": 0.7363733053207397, "learning_rate": 4.258703481392558e-05, "loss": 0.6724, "step": 3715 }, { "epoch": 4.75648, "grad_norm": 0.6866661310195923, "learning_rate": 4.258503401360545e-05, "loss": 0.6056, "step": 3716 }, { "epoch": 4.75776, "grad_norm": 0.723304271697998, "learning_rate": 4.2583033213285314e-05, "loss": 0.6742, "step": 3717 }, { "epoch": 4.75904, "grad_norm": 0.730629026889801, "learning_rate": 4.2581032412965186e-05, "loss": 0.6486, "step": 3718 }, { "epoch": 4.76032, "grad_norm": 0.6811598539352417, "learning_rate": 4.257903161264506e-05, "loss": 0.6246, "step": 3719 }, { "epoch": 4.7616, "grad_norm": 0.7193059921264648, "learning_rate": 4.257703081232493e-05, "loss": 0.6695, "step": 3720 }, { "epoch": 4.76288, "grad_norm": 0.7182266116142273, "learning_rate": 4.25750300120048e-05, "loss": 0.6379, "step": 3721 }, { "epoch": 4.76416, "grad_norm": 0.6965410113334656, "learning_rate": 4.257302921168468e-05, "loss": 0.6782, "step": 3722 }, { "epoch": 4.76544, "grad_norm": 0.7410017251968384, "learning_rate": 4.257102841136455e-05, "loss": 0.6226, "step": 3723 }, { "epoch": 4.76672, "grad_norm": 0.6835784316062927, "learning_rate": 4.2569027611044424e-05, "loss": 0.6244, "step": 3724 }, { "epoch": 4.768, "grad_norm": 0.7124270796775818, "learning_rate": 4.256702681072429e-05, "loss": 0.6385, "step": 3725 }, { "epoch": 4.76928, "grad_norm": 0.6720713376998901, "learning_rate": 4.256502601040416e-05, "loss": 0.6582, "step": 3726 }, { "epoch": 4.77056, "grad_norm": 0.690466046333313, "learning_rate": 4.256302521008403e-05, "loss": 0.6574, "step": 3727 }, { "epoch": 4.77184, "grad_norm": 0.7075904011726379, "learning_rate": 4.2561024409763905e-05, "loss": 0.6623, "step": 3728 }, { "epoch": 4.7731200000000005, "grad_norm": 0.7090795636177063, "learning_rate": 4.255902360944378e-05, "loss": 0.6237, "step": 3729 }, { "epoch": 4.7744, "grad_norm": 0.6967689394950867, "learning_rate": 4.2557022809123655e-05, "loss": 0.6401, "step": 3730 }, { "epoch": 4.77568, "grad_norm": 0.7227605581283569, "learning_rate": 4.255502200880353e-05, "loss": 0.6841, "step": 3731 }, { "epoch": 4.77696, "grad_norm": 0.7367991209030151, "learning_rate": 4.25530212084834e-05, "loss": 0.6267, "step": 3732 }, { "epoch": 4.77824, "grad_norm": 0.655534565448761, "learning_rate": 4.2551020408163264e-05, "loss": 0.5815, "step": 3733 }, { "epoch": 4.77952, "grad_norm": 0.7497329115867615, "learning_rate": 4.2549019607843136e-05, "loss": 0.6308, "step": 3734 }, { "epoch": 4.7808, "grad_norm": 0.7356445789337158, "learning_rate": 4.254701880752301e-05, "loss": 0.6944, "step": 3735 }, { "epoch": 4.78208, "grad_norm": 0.7261192202568054, "learning_rate": 4.2545018007202886e-05, "loss": 0.6922, "step": 3736 }, { "epoch": 4.78336, "grad_norm": 0.727255642414093, "learning_rate": 4.254301720688276e-05, "loss": 0.6582, "step": 3737 }, { "epoch": 4.78464, "grad_norm": 0.7310218811035156, "learning_rate": 4.254101640656263e-05, "loss": 0.6671, "step": 3738 }, { "epoch": 4.78592, "grad_norm": 0.7008660435676575, "learning_rate": 4.25390156062425e-05, "loss": 0.6518, "step": 3739 }, { "epoch": 4.7872, "grad_norm": 0.6646680235862732, "learning_rate": 4.2537014805922374e-05, "loss": 0.6003, "step": 3740 }, { "epoch": 4.78848, "grad_norm": 0.6931464076042175, "learning_rate": 4.253501400560224e-05, "loss": 0.624, "step": 3741 }, { "epoch": 4.78976, "grad_norm": 0.7339210510253906, "learning_rate": 4.253301320528211e-05, "loss": 0.6385, "step": 3742 }, { "epoch": 4.79104, "grad_norm": 0.7324719429016113, "learning_rate": 4.253101240496199e-05, "loss": 0.6915, "step": 3743 }, { "epoch": 4.79232, "grad_norm": 0.7005221843719482, "learning_rate": 4.252901160464186e-05, "loss": 0.6452, "step": 3744 }, { "epoch": 4.7936, "grad_norm": 0.7293890118598938, "learning_rate": 4.252701080432173e-05, "loss": 0.6334, "step": 3745 }, { "epoch": 4.79488, "grad_norm": 0.709510862827301, "learning_rate": 4.2525010004001605e-05, "loss": 0.6718, "step": 3746 }, { "epoch": 4.79616, "grad_norm": 0.7241347432136536, "learning_rate": 4.252300920368148e-05, "loss": 0.6326, "step": 3747 }, { "epoch": 4.79744, "grad_norm": 0.6769399642944336, "learning_rate": 4.252100840336135e-05, "loss": 0.6442, "step": 3748 }, { "epoch": 4.79872, "grad_norm": 0.7203426361083984, "learning_rate": 4.2519007603041214e-05, "loss": 0.644, "step": 3749 }, { "epoch": 4.8, "grad_norm": 0.734929084777832, "learning_rate": 4.2517006802721085e-05, "loss": 0.6644, "step": 3750 }, { "epoch": 4.80128, "grad_norm": 0.7082551717758179, "learning_rate": 4.2515006002400964e-05, "loss": 0.6084, "step": 3751 }, { "epoch": 4.80256, "grad_norm": 0.6955980062484741, "learning_rate": 4.2513005202080836e-05, "loss": 0.6303, "step": 3752 }, { "epoch": 4.80384, "grad_norm": 0.6883904933929443, "learning_rate": 4.251100440176071e-05, "loss": 0.6464, "step": 3753 }, { "epoch": 4.80512, "grad_norm": 0.6787061095237732, "learning_rate": 4.250900360144058e-05, "loss": 0.6351, "step": 3754 }, { "epoch": 4.8064, "grad_norm": 0.6913610696792603, "learning_rate": 4.250700280112045e-05, "loss": 0.5745, "step": 3755 }, { "epoch": 4.8076799999999995, "grad_norm": 0.7253073453903198, "learning_rate": 4.2505002000800323e-05, "loss": 0.6662, "step": 3756 }, { "epoch": 4.80896, "grad_norm": 0.7172198295593262, "learning_rate": 4.250300120048019e-05, "loss": 0.6726, "step": 3757 }, { "epoch": 4.81024, "grad_norm": 0.7208426594734192, "learning_rate": 4.250100040016007e-05, "loss": 0.6478, "step": 3758 }, { "epoch": 4.81152, "grad_norm": 0.6597497463226318, "learning_rate": 4.249899959983994e-05, "loss": 0.6372, "step": 3759 }, { "epoch": 4.8128, "grad_norm": 0.7280398011207581, "learning_rate": 4.249699879951981e-05, "loss": 0.6449, "step": 3760 }, { "epoch": 4.81408, "grad_norm": 0.7056030035018921, "learning_rate": 4.249499799919968e-05, "loss": 0.6638, "step": 3761 }, { "epoch": 4.81536, "grad_norm": 0.7291399836540222, "learning_rate": 4.2492997198879555e-05, "loss": 0.6717, "step": 3762 }, { "epoch": 4.81664, "grad_norm": 0.7240449786186218, "learning_rate": 4.2490996398559426e-05, "loss": 0.6896, "step": 3763 }, { "epoch": 4.81792, "grad_norm": 0.6927871704101562, "learning_rate": 4.24889955982393e-05, "loss": 0.6627, "step": 3764 }, { "epoch": 4.8192, "grad_norm": 0.6421213150024414, "learning_rate": 4.248699479791917e-05, "loss": 0.6177, "step": 3765 }, { "epoch": 4.82048, "grad_norm": 0.6733725666999817, "learning_rate": 4.248499399759904e-05, "loss": 0.5987, "step": 3766 }, { "epoch": 4.82176, "grad_norm": 0.7016422152519226, "learning_rate": 4.2482993197278914e-05, "loss": 0.6563, "step": 3767 }, { "epoch": 4.82304, "grad_norm": 0.662580132484436, "learning_rate": 4.2480992396958786e-05, "loss": 0.6167, "step": 3768 }, { "epoch": 4.82432, "grad_norm": 0.6980850696563721, "learning_rate": 4.247899159663866e-05, "loss": 0.5772, "step": 3769 }, { "epoch": 4.8256, "grad_norm": 0.6765894293785095, "learning_rate": 4.247699079631853e-05, "loss": 0.6503, "step": 3770 }, { "epoch": 4.82688, "grad_norm": 0.739733099937439, "learning_rate": 4.24749899959984e-05, "loss": 0.7014, "step": 3771 }, { "epoch": 4.8281600000000005, "grad_norm": 0.706731915473938, "learning_rate": 4.247298919567827e-05, "loss": 0.6687, "step": 3772 }, { "epoch": 4.82944, "grad_norm": 0.7055926322937012, "learning_rate": 4.2470988395358145e-05, "loss": 0.6354, "step": 3773 }, { "epoch": 4.83072, "grad_norm": 0.7980726361274719, "learning_rate": 4.246898759503802e-05, "loss": 0.7241, "step": 3774 }, { "epoch": 4.832, "grad_norm": 0.6939478516578674, "learning_rate": 4.246698679471789e-05, "loss": 0.621, "step": 3775 }, { "epoch": 4.83328, "grad_norm": 0.6984978318214417, "learning_rate": 4.246498599439776e-05, "loss": 0.591, "step": 3776 }, { "epoch": 4.83456, "grad_norm": 0.7327542901039124, "learning_rate": 4.246298519407763e-05, "loss": 0.6889, "step": 3777 }, { "epoch": 4.83584, "grad_norm": 0.7143282890319824, "learning_rate": 4.2460984393757504e-05, "loss": 0.6558, "step": 3778 }, { "epoch": 4.83712, "grad_norm": 0.7296421527862549, "learning_rate": 4.2458983593437376e-05, "loss": 0.6599, "step": 3779 }, { "epoch": 4.8384, "grad_norm": 0.7196743488311768, "learning_rate": 4.245698279311725e-05, "loss": 0.6421, "step": 3780 }, { "epoch": 4.8396799999999995, "grad_norm": 0.6790671944618225, "learning_rate": 4.245498199279712e-05, "loss": 0.6075, "step": 3781 }, { "epoch": 4.84096, "grad_norm": 0.7062405347824097, "learning_rate": 4.245298119247699e-05, "loss": 0.6633, "step": 3782 }, { "epoch": 4.84224, "grad_norm": 0.766210675239563, "learning_rate": 4.2450980392156864e-05, "loss": 0.6746, "step": 3783 }, { "epoch": 4.84352, "grad_norm": 0.6822754740715027, "learning_rate": 4.2448979591836735e-05, "loss": 0.6352, "step": 3784 }, { "epoch": 4.8448, "grad_norm": 0.6726470589637756, "learning_rate": 4.244697879151661e-05, "loss": 0.6153, "step": 3785 }, { "epoch": 4.84608, "grad_norm": 0.7688694596290588, "learning_rate": 4.2444977991196486e-05, "loss": 0.7037, "step": 3786 }, { "epoch": 4.84736, "grad_norm": 0.6525343060493469, "learning_rate": 4.244297719087635e-05, "loss": 0.6385, "step": 3787 }, { "epoch": 4.84864, "grad_norm": 0.7031245827674866, "learning_rate": 4.244097639055622e-05, "loss": 0.6051, "step": 3788 }, { "epoch": 4.84992, "grad_norm": 0.7337315082550049, "learning_rate": 4.2438975590236095e-05, "loss": 0.6638, "step": 3789 }, { "epoch": 4.8512, "grad_norm": 0.7109333276748657, "learning_rate": 4.2436974789915967e-05, "loss": 0.6656, "step": 3790 }, { "epoch": 4.85248, "grad_norm": 0.7319778800010681, "learning_rate": 4.243497398959584e-05, "loss": 0.6261, "step": 3791 }, { "epoch": 4.85376, "grad_norm": 0.7297192811965942, "learning_rate": 4.243297318927571e-05, "loss": 0.703, "step": 3792 }, { "epoch": 4.85504, "grad_norm": 0.7348429560661316, "learning_rate": 4.243097238895559e-05, "loss": 0.6583, "step": 3793 }, { "epoch": 4.85632, "grad_norm": 0.7114925384521484, "learning_rate": 4.242897158863546e-05, "loss": 0.6621, "step": 3794 }, { "epoch": 4.8576, "grad_norm": 0.679014265537262, "learning_rate": 4.2426970788315326e-05, "loss": 0.615, "step": 3795 }, { "epoch": 4.85888, "grad_norm": 0.7080205678939819, "learning_rate": 4.24249699879952e-05, "loss": 0.653, "step": 3796 }, { "epoch": 4.8601600000000005, "grad_norm": 0.7360637187957764, "learning_rate": 4.242296918767507e-05, "loss": 0.6624, "step": 3797 }, { "epoch": 4.86144, "grad_norm": 0.7157953977584839, "learning_rate": 4.242096838735494e-05, "loss": 0.6692, "step": 3798 }, { "epoch": 4.86272, "grad_norm": 0.7022566199302673, "learning_rate": 4.241896758703481e-05, "loss": 0.6586, "step": 3799 }, { "epoch": 4.864, "grad_norm": 0.7275310158729553, "learning_rate": 4.241696678671469e-05, "loss": 0.6602, "step": 3800 }, { "epoch": 4.86528, "grad_norm": 0.7159553170204163, "learning_rate": 4.2414965986394564e-05, "loss": 0.6299, "step": 3801 }, { "epoch": 4.86656, "grad_norm": 0.7265822887420654, "learning_rate": 4.2412965186074436e-05, "loss": 0.5922, "step": 3802 }, { "epoch": 4.86784, "grad_norm": 0.6696851253509521, "learning_rate": 4.24109643857543e-05, "loss": 0.6128, "step": 3803 }, { "epoch": 4.86912, "grad_norm": 0.7184013724327087, "learning_rate": 4.240896358543417e-05, "loss": 0.6601, "step": 3804 }, { "epoch": 4.8704, "grad_norm": 0.7109546661376953, "learning_rate": 4.2406962785114044e-05, "loss": 0.6546, "step": 3805 }, { "epoch": 4.87168, "grad_norm": 0.7122465372085571, "learning_rate": 4.2404961984793916e-05, "loss": 0.6324, "step": 3806 }, { "epoch": 4.87296, "grad_norm": 0.7610680460929871, "learning_rate": 4.2402961184473795e-05, "loss": 0.6355, "step": 3807 }, { "epoch": 4.87424, "grad_norm": 0.7229766845703125, "learning_rate": 4.240096038415367e-05, "loss": 0.6396, "step": 3808 }, { "epoch": 4.87552, "grad_norm": 0.7144048810005188, "learning_rate": 4.239895958383354e-05, "loss": 0.6485, "step": 3809 }, { "epoch": 4.8768, "grad_norm": 0.7222286462783813, "learning_rate": 4.239695878351341e-05, "loss": 0.6919, "step": 3810 }, { "epoch": 4.87808, "grad_norm": 0.700534462928772, "learning_rate": 4.2394957983193276e-05, "loss": 0.6468, "step": 3811 }, { "epoch": 4.87936, "grad_norm": 0.696043074131012, "learning_rate": 4.239295718287315e-05, "loss": 0.589, "step": 3812 }, { "epoch": 4.88064, "grad_norm": 0.6939215064048767, "learning_rate": 4.239095638255302e-05, "loss": 0.6358, "step": 3813 }, { "epoch": 4.88192, "grad_norm": 0.6854797601699829, "learning_rate": 4.23889555822329e-05, "loss": 0.6574, "step": 3814 }, { "epoch": 4.8832, "grad_norm": 0.7024257183074951, "learning_rate": 4.238695478191277e-05, "loss": 0.6688, "step": 3815 }, { "epoch": 4.88448, "grad_norm": 0.7299116849899292, "learning_rate": 4.238495398159264e-05, "loss": 0.6921, "step": 3816 }, { "epoch": 4.88576, "grad_norm": 0.7275465130805969, "learning_rate": 4.2382953181272513e-05, "loss": 0.6454, "step": 3817 }, { "epoch": 4.88704, "grad_norm": 0.7085564136505127, "learning_rate": 4.2380952380952385e-05, "loss": 0.6523, "step": 3818 }, { "epoch": 4.88832, "grad_norm": 0.7188389897346497, "learning_rate": 4.237895158063225e-05, "loss": 0.6946, "step": 3819 }, { "epoch": 4.8896, "grad_norm": 0.72255939245224, "learning_rate": 4.237695078031212e-05, "loss": 0.6166, "step": 3820 }, { "epoch": 4.89088, "grad_norm": 0.6671111583709717, "learning_rate": 4.2374949979992e-05, "loss": 0.6173, "step": 3821 }, { "epoch": 4.89216, "grad_norm": 0.7809231877326965, "learning_rate": 4.237294917967187e-05, "loss": 0.7262, "step": 3822 }, { "epoch": 4.89344, "grad_norm": 0.6766269207000732, "learning_rate": 4.2370948379351745e-05, "loss": 0.6216, "step": 3823 }, { "epoch": 4.8947199999999995, "grad_norm": 0.7346859574317932, "learning_rate": 4.2368947579031616e-05, "loss": 0.6529, "step": 3824 }, { "epoch": 4.896, "grad_norm": 0.7254279851913452, "learning_rate": 4.236694677871149e-05, "loss": 0.6319, "step": 3825 }, { "epoch": 4.89728, "grad_norm": 0.7144622802734375, "learning_rate": 4.236494597839136e-05, "loss": 0.6586, "step": 3826 }, { "epoch": 4.89856, "grad_norm": 0.7005541324615479, "learning_rate": 4.2362945178071225e-05, "loss": 0.67, "step": 3827 }, { "epoch": 4.89984, "grad_norm": 0.7724364995956421, "learning_rate": 4.2360944377751104e-05, "loss": 0.663, "step": 3828 }, { "epoch": 4.90112, "grad_norm": 0.7261497974395752, "learning_rate": 4.2358943577430976e-05, "loss": 0.6781, "step": 3829 }, { "epoch": 4.9024, "grad_norm": 0.673323929309845, "learning_rate": 4.235694277711085e-05, "loss": 0.6236, "step": 3830 }, { "epoch": 4.90368, "grad_norm": 0.6874783635139465, "learning_rate": 4.235494197679072e-05, "loss": 0.644, "step": 3831 }, { "epoch": 4.90496, "grad_norm": 0.7447608709335327, "learning_rate": 4.235294117647059e-05, "loss": 0.7078, "step": 3832 }, { "epoch": 4.90624, "grad_norm": 0.7306890487670898, "learning_rate": 4.235094037615046e-05, "loss": 0.649, "step": 3833 }, { "epoch": 4.90752, "grad_norm": 0.7219198942184448, "learning_rate": 4.2348939575830335e-05, "loss": 0.6817, "step": 3834 }, { "epoch": 4.9088, "grad_norm": 0.7355913519859314, "learning_rate": 4.234693877551021e-05, "loss": 0.6291, "step": 3835 }, { "epoch": 4.91008, "grad_norm": 0.7445414662361145, "learning_rate": 4.234493797519008e-05, "loss": 0.6903, "step": 3836 }, { "epoch": 4.91136, "grad_norm": 0.6713249087333679, "learning_rate": 4.234293717486995e-05, "loss": 0.6459, "step": 3837 }, { "epoch": 4.91264, "grad_norm": 0.7056488990783691, "learning_rate": 4.234093637454982e-05, "loss": 0.6648, "step": 3838 }, { "epoch": 4.91392, "grad_norm": 0.6989180445671082, "learning_rate": 4.2338935574229694e-05, "loss": 0.705, "step": 3839 }, { "epoch": 4.9152000000000005, "grad_norm": 0.753159761428833, "learning_rate": 4.2336934773909566e-05, "loss": 0.6622, "step": 3840 }, { "epoch": 4.91648, "grad_norm": 0.7392045855522156, "learning_rate": 4.233493397358944e-05, "loss": 0.6205, "step": 3841 }, { "epoch": 4.91776, "grad_norm": 0.7536989450454712, "learning_rate": 4.233293317326931e-05, "loss": 0.6748, "step": 3842 }, { "epoch": 4.91904, "grad_norm": 0.7107492089271545, "learning_rate": 4.233093237294918e-05, "loss": 0.6372, "step": 3843 }, { "epoch": 4.92032, "grad_norm": 0.6968305110931396, "learning_rate": 4.2328931572629054e-05, "loss": 0.6463, "step": 3844 }, { "epoch": 4.9216, "grad_norm": 0.7517611980438232, "learning_rate": 4.2326930772308925e-05, "loss": 0.6505, "step": 3845 }, { "epoch": 4.92288, "grad_norm": 0.7284044623374939, "learning_rate": 4.23249299719888e-05, "loss": 0.6767, "step": 3846 }, { "epoch": 4.92416, "grad_norm": 0.676810085773468, "learning_rate": 4.232292917166867e-05, "loss": 0.6505, "step": 3847 }, { "epoch": 4.92544, "grad_norm": 0.7427054643630981, "learning_rate": 4.232092837134854e-05, "loss": 0.684, "step": 3848 }, { "epoch": 4.9267199999999995, "grad_norm": 0.7146655917167664, "learning_rate": 4.231892757102841e-05, "loss": 0.6527, "step": 3849 }, { "epoch": 4.928, "grad_norm": 0.690930187702179, "learning_rate": 4.2316926770708285e-05, "loss": 0.6584, "step": 3850 }, { "epoch": 4.92928, "grad_norm": 0.742597222328186, "learning_rate": 4.2314925970388157e-05, "loss": 0.6644, "step": 3851 }, { "epoch": 4.93056, "grad_norm": 0.7021815776824951, "learning_rate": 4.231292517006803e-05, "loss": 0.6765, "step": 3852 }, { "epoch": 4.93184, "grad_norm": 0.7302278876304626, "learning_rate": 4.23109243697479e-05, "loss": 0.6875, "step": 3853 }, { "epoch": 4.93312, "grad_norm": 0.7111605405807495, "learning_rate": 4.230892356942777e-05, "loss": 0.6808, "step": 3854 }, { "epoch": 4.9344, "grad_norm": 0.7146978378295898, "learning_rate": 4.2306922769107644e-05, "loss": 0.598, "step": 3855 }, { "epoch": 4.93568, "grad_norm": 0.6885932683944702, "learning_rate": 4.230492196878752e-05, "loss": 0.6331, "step": 3856 }, { "epoch": 4.93696, "grad_norm": 0.7933337092399597, "learning_rate": 4.230292116846739e-05, "loss": 0.7657, "step": 3857 }, { "epoch": 4.93824, "grad_norm": 0.7346404790878296, "learning_rate": 4.230092036814726e-05, "loss": 0.7045, "step": 3858 }, { "epoch": 4.93952, "grad_norm": 0.7179169058799744, "learning_rate": 4.229891956782713e-05, "loss": 0.6378, "step": 3859 }, { "epoch": 4.9408, "grad_norm": 0.7097733020782471, "learning_rate": 4.2296918767507e-05, "loss": 0.6371, "step": 3860 }, { "epoch": 4.94208, "grad_norm": 0.7483527064323425, "learning_rate": 4.2294917967186875e-05, "loss": 0.6334, "step": 3861 }, { "epoch": 4.94336, "grad_norm": 0.7159331440925598, "learning_rate": 4.229291716686675e-05, "loss": 0.6409, "step": 3862 }, { "epoch": 4.94464, "grad_norm": 0.7627670764923096, "learning_rate": 4.229091636654662e-05, "loss": 0.6671, "step": 3863 }, { "epoch": 4.94592, "grad_norm": 0.7190044522285461, "learning_rate": 4.22889155662265e-05, "loss": 0.6906, "step": 3864 }, { "epoch": 4.9472000000000005, "grad_norm": 0.7264943718910217, "learning_rate": 4.228691476590636e-05, "loss": 0.6485, "step": 3865 }, { "epoch": 4.94848, "grad_norm": 0.7001417875289917, "learning_rate": 4.2284913965586234e-05, "loss": 0.6528, "step": 3866 }, { "epoch": 4.94976, "grad_norm": 0.7295548319816589, "learning_rate": 4.2282913165266106e-05, "loss": 0.6047, "step": 3867 }, { "epoch": 4.95104, "grad_norm": 0.7287285327911377, "learning_rate": 4.228091236494598e-05, "loss": 0.6203, "step": 3868 }, { "epoch": 4.95232, "grad_norm": 0.6717093586921692, "learning_rate": 4.227891156462585e-05, "loss": 0.642, "step": 3869 }, { "epoch": 4.9536, "grad_norm": 0.7274335622787476, "learning_rate": 4.227691076430572e-05, "loss": 0.7194, "step": 3870 }, { "epoch": 4.95488, "grad_norm": 0.7339951395988464, "learning_rate": 4.22749099639856e-05, "loss": 0.6511, "step": 3871 }, { "epoch": 4.95616, "grad_norm": 0.7088412046432495, "learning_rate": 4.227290916366547e-05, "loss": 0.6424, "step": 3872 }, { "epoch": 4.95744, "grad_norm": 0.6983089447021484, "learning_rate": 4.227090836334534e-05, "loss": 0.6186, "step": 3873 }, { "epoch": 4.95872, "grad_norm": 0.727528989315033, "learning_rate": 4.226890756302521e-05, "loss": 0.616, "step": 3874 }, { "epoch": 4.96, "grad_norm": 0.732072651386261, "learning_rate": 4.226690676270508e-05, "loss": 0.6259, "step": 3875 }, { "epoch": 4.96128, "grad_norm": 0.6999890208244324, "learning_rate": 4.226490596238495e-05, "loss": 0.677, "step": 3876 }, { "epoch": 4.96256, "grad_norm": 0.6825425028800964, "learning_rate": 4.2262905162064825e-05, "loss": 0.6053, "step": 3877 }, { "epoch": 4.96384, "grad_norm": 0.7285329699516296, "learning_rate": 4.2260904361744703e-05, "loss": 0.696, "step": 3878 }, { "epoch": 4.96512, "grad_norm": 0.6932041645050049, "learning_rate": 4.2258903561424575e-05, "loss": 0.6412, "step": 3879 }, { "epoch": 4.9664, "grad_norm": 0.7156229019165039, "learning_rate": 4.225690276110445e-05, "loss": 0.648, "step": 3880 }, { "epoch": 4.96768, "grad_norm": 0.7821218371391296, "learning_rate": 4.225490196078431e-05, "loss": 0.72, "step": 3881 }, { "epoch": 4.96896, "grad_norm": 0.6983861327171326, "learning_rate": 4.2252901160464184e-05, "loss": 0.6516, "step": 3882 }, { "epoch": 4.97024, "grad_norm": 0.7392165660858154, "learning_rate": 4.2250900360144056e-05, "loss": 0.6855, "step": 3883 }, { "epoch": 4.97152, "grad_norm": 0.7489770650863647, "learning_rate": 4.224889955982393e-05, "loss": 0.658, "step": 3884 }, { "epoch": 4.9728, "grad_norm": 0.6763161420822144, "learning_rate": 4.2246898759503806e-05, "loss": 0.6016, "step": 3885 }, { "epoch": 4.97408, "grad_norm": 0.7260777354240417, "learning_rate": 4.224489795918368e-05, "loss": 0.6908, "step": 3886 }, { "epoch": 4.97536, "grad_norm": 0.7529964447021484, "learning_rate": 4.224289715886355e-05, "loss": 0.6736, "step": 3887 }, { "epoch": 4.97664, "grad_norm": 0.6947912573814392, "learning_rate": 4.224089635854342e-05, "loss": 0.6518, "step": 3888 }, { "epoch": 4.97792, "grad_norm": 0.6871297955513, "learning_rate": 4.223889555822329e-05, "loss": 0.6536, "step": 3889 }, { "epoch": 4.9792, "grad_norm": 0.7125989198684692, "learning_rate": 4.223689475790316e-05, "loss": 0.7032, "step": 3890 }, { "epoch": 4.98048, "grad_norm": 0.7379283308982849, "learning_rate": 4.223489395758303e-05, "loss": 0.6589, "step": 3891 }, { "epoch": 4.9817599999999995, "grad_norm": 0.7186259031295776, "learning_rate": 4.223289315726291e-05, "loss": 0.6572, "step": 3892 }, { "epoch": 4.98304, "grad_norm": 0.7020161151885986, "learning_rate": 4.223089235694278e-05, "loss": 0.6087, "step": 3893 }, { "epoch": 4.98432, "grad_norm": 0.7505249977111816, "learning_rate": 4.222889155662265e-05, "loss": 0.7485, "step": 3894 }, { "epoch": 4.9856, "grad_norm": 0.7452868223190308, "learning_rate": 4.2226890756302525e-05, "loss": 0.6414, "step": 3895 }, { "epoch": 4.98688, "grad_norm": 0.7297542691230774, "learning_rate": 4.22248899559824e-05, "loss": 0.6662, "step": 3896 }, { "epoch": 4.98816, "grad_norm": 0.7233591079711914, "learning_rate": 4.222288915566226e-05, "loss": 0.6443, "step": 3897 }, { "epoch": 4.98944, "grad_norm": 0.6811563968658447, "learning_rate": 4.2220888355342134e-05, "loss": 0.6349, "step": 3898 }, { "epoch": 4.99072, "grad_norm": 0.7110039591789246, "learning_rate": 4.221888755502201e-05, "loss": 0.5868, "step": 3899 }, { "epoch": 4.992, "grad_norm": 0.685947060585022, "learning_rate": 4.2216886754701884e-05, "loss": 0.6747, "step": 3900 }, { "epoch": 4.99328, "grad_norm": 0.6793825626373291, "learning_rate": 4.2214885954381756e-05, "loss": 0.6335, "step": 3901 }, { "epoch": 4.99456, "grad_norm": 0.7450732588768005, "learning_rate": 4.221288515406163e-05, "loss": 0.6845, "step": 3902 }, { "epoch": 4.99584, "grad_norm": 0.7151440978050232, "learning_rate": 4.22108843537415e-05, "loss": 0.6479, "step": 3903 }, { "epoch": 4.99712, "grad_norm": 0.7348343133926392, "learning_rate": 4.220888355342137e-05, "loss": 0.6799, "step": 3904 }, { "epoch": 4.9984, "grad_norm": 0.6973733305931091, "learning_rate": 4.220688275310124e-05, "loss": 0.6555, "step": 3905 }, { "epoch": 4.99968, "grad_norm": 0.7058578133583069, "learning_rate": 4.2204881952781115e-05, "loss": 0.6924, "step": 3906 }, { "epoch": 5.00096, "grad_norm": 1.5164827108383179, "learning_rate": 4.220288115246099e-05, "loss": 1.0917, "step": 3907 }, { "epoch": 5.00224, "grad_norm": 0.7053406834602356, "learning_rate": 4.220088035214086e-05, "loss": 0.6068, "step": 3908 }, { "epoch": 5.00352, "grad_norm": 0.6764478087425232, "learning_rate": 4.219887955182073e-05, "loss": 0.6047, "step": 3909 }, { "epoch": 5.0048, "grad_norm": 0.6720277070999146, "learning_rate": 4.21968787515006e-05, "loss": 0.6328, "step": 3910 }, { "epoch": 5.00608, "grad_norm": 0.715262234210968, "learning_rate": 4.2194877951180475e-05, "loss": 0.6962, "step": 3911 }, { "epoch": 5.00736, "grad_norm": 0.6942645907402039, "learning_rate": 4.219287715086035e-05, "loss": 0.6243, "step": 3912 }, { "epoch": 5.00864, "grad_norm": 0.7017389535903931, "learning_rate": 4.219087635054022e-05, "loss": 0.6112, "step": 3913 }, { "epoch": 5.00992, "grad_norm": 0.7179716229438782, "learning_rate": 4.218887555022009e-05, "loss": 0.6935, "step": 3914 }, { "epoch": 5.0112, "grad_norm": 0.7195164561271667, "learning_rate": 4.218687474989996e-05, "loss": 0.6682, "step": 3915 }, { "epoch": 5.01248, "grad_norm": 0.7197250723838806, "learning_rate": 4.2184873949579834e-05, "loss": 0.6436, "step": 3916 }, { "epoch": 5.01376, "grad_norm": 0.7025637626647949, "learning_rate": 4.2182873149259706e-05, "loss": 0.6336, "step": 3917 }, { "epoch": 5.01504, "grad_norm": 0.7276149392127991, "learning_rate": 4.218087234893958e-05, "loss": 0.6207, "step": 3918 }, { "epoch": 5.01632, "grad_norm": 0.7081112265586853, "learning_rate": 4.217887154861945e-05, "loss": 0.6396, "step": 3919 }, { "epoch": 5.0176, "grad_norm": 0.7482070922851562, "learning_rate": 4.217687074829932e-05, "loss": 0.632, "step": 3920 }, { "epoch": 5.01888, "grad_norm": 0.7229052186012268, "learning_rate": 4.217486994797919e-05, "loss": 0.5806, "step": 3921 }, { "epoch": 5.02016, "grad_norm": 0.678459644317627, "learning_rate": 4.2172869147659065e-05, "loss": 0.5644, "step": 3922 }, { "epoch": 5.02144, "grad_norm": 0.7244837880134583, "learning_rate": 4.217086834733894e-05, "loss": 0.6354, "step": 3923 }, { "epoch": 5.02272, "grad_norm": 0.752399206161499, "learning_rate": 4.216886754701881e-05, "loss": 0.6326, "step": 3924 }, { "epoch": 5.024, "grad_norm": 0.7514381408691406, "learning_rate": 4.216686674669868e-05, "loss": 0.6534, "step": 3925 }, { "epoch": 5.02528, "grad_norm": 0.720856249332428, "learning_rate": 4.216486594637855e-05, "loss": 0.6045, "step": 3926 }, { "epoch": 5.02656, "grad_norm": 0.7397941946983337, "learning_rate": 4.2162865146058424e-05, "loss": 0.6208, "step": 3927 }, { "epoch": 5.02784, "grad_norm": 0.7684844732284546, "learning_rate": 4.2160864345738296e-05, "loss": 0.6398, "step": 3928 }, { "epoch": 5.02912, "grad_norm": 0.7364361882209778, "learning_rate": 4.215886354541817e-05, "loss": 0.6167, "step": 3929 }, { "epoch": 5.0304, "grad_norm": 0.6819130182266235, "learning_rate": 4.215686274509804e-05, "loss": 0.5566, "step": 3930 }, { "epoch": 5.03168, "grad_norm": 0.7268734574317932, "learning_rate": 4.215486194477791e-05, "loss": 0.6476, "step": 3931 }, { "epoch": 5.03296, "grad_norm": 0.7218887805938721, "learning_rate": 4.2152861144457784e-05, "loss": 0.6149, "step": 3932 }, { "epoch": 5.03424, "grad_norm": 0.7900970578193665, "learning_rate": 4.2150860344137656e-05, "loss": 0.6828, "step": 3933 }, { "epoch": 5.03552, "grad_norm": 0.6771689653396606, "learning_rate": 4.2148859543817534e-05, "loss": 0.5821, "step": 3934 }, { "epoch": 5.0368, "grad_norm": 0.7154511213302612, "learning_rate": 4.21468587434974e-05, "loss": 0.5964, "step": 3935 }, { "epoch": 5.03808, "grad_norm": 0.7221072912216187, "learning_rate": 4.214485794317727e-05, "loss": 0.6088, "step": 3936 }, { "epoch": 5.03936, "grad_norm": 0.7826116681098938, "learning_rate": 4.214285714285714e-05, "loss": 0.6514, "step": 3937 }, { "epoch": 5.04064, "grad_norm": 0.7515047192573547, "learning_rate": 4.2140856342537015e-05, "loss": 0.6776, "step": 3938 }, { "epoch": 5.04192, "grad_norm": 0.7475928664207458, "learning_rate": 4.213885554221689e-05, "loss": 0.6478, "step": 3939 }, { "epoch": 5.0432, "grad_norm": 0.7153226733207703, "learning_rate": 4.213685474189676e-05, "loss": 0.6474, "step": 3940 }, { "epoch": 5.04448, "grad_norm": 0.6863680481910706, "learning_rate": 4.213485394157664e-05, "loss": 0.6281, "step": 3941 }, { "epoch": 5.04576, "grad_norm": 0.6896254420280457, "learning_rate": 4.213285314125651e-05, "loss": 0.6058, "step": 3942 }, { "epoch": 5.04704, "grad_norm": 0.7509266138076782, "learning_rate": 4.2130852340936374e-05, "loss": 0.6445, "step": 3943 }, { "epoch": 5.04832, "grad_norm": 0.7177383303642273, "learning_rate": 4.2128851540616246e-05, "loss": 0.6522, "step": 3944 }, { "epoch": 5.0496, "grad_norm": 0.7128506302833557, "learning_rate": 4.212685074029612e-05, "loss": 0.6011, "step": 3945 }, { "epoch": 5.05088, "grad_norm": 0.6913168430328369, "learning_rate": 4.212484993997599e-05, "loss": 0.6598, "step": 3946 }, { "epoch": 5.05216, "grad_norm": 0.740969181060791, "learning_rate": 4.212284913965586e-05, "loss": 0.6409, "step": 3947 }, { "epoch": 5.05344, "grad_norm": 0.7747688293457031, "learning_rate": 4.212084833933574e-05, "loss": 0.6727, "step": 3948 }, { "epoch": 5.05472, "grad_norm": 0.7710026502609253, "learning_rate": 4.211884753901561e-05, "loss": 0.6585, "step": 3949 }, { "epoch": 5.056, "grad_norm": 0.713817834854126, "learning_rate": 4.2116846738695484e-05, "loss": 0.6174, "step": 3950 }, { "epoch": 5.05728, "grad_norm": 0.7268139123916626, "learning_rate": 4.211484593837535e-05, "loss": 0.6679, "step": 3951 }, { "epoch": 5.05856, "grad_norm": 0.7151246666908264, "learning_rate": 4.211284513805522e-05, "loss": 0.5802, "step": 3952 }, { "epoch": 5.05984, "grad_norm": 0.6851016283035278, "learning_rate": 4.211084433773509e-05, "loss": 0.6258, "step": 3953 }, { "epoch": 5.06112, "grad_norm": 0.7177205681800842, "learning_rate": 4.2108843537414965e-05, "loss": 0.6276, "step": 3954 }, { "epoch": 5.0624, "grad_norm": 0.7391200661659241, "learning_rate": 4.210684273709484e-05, "loss": 0.6175, "step": 3955 }, { "epoch": 5.06368, "grad_norm": 0.7397950887680054, "learning_rate": 4.2104841936774715e-05, "loss": 0.6149, "step": 3956 }, { "epoch": 5.06496, "grad_norm": 0.7378024458885193, "learning_rate": 4.210284113645459e-05, "loss": 0.6524, "step": 3957 }, { "epoch": 5.06624, "grad_norm": 0.7197895050048828, "learning_rate": 4.210084033613446e-05, "loss": 0.6747, "step": 3958 }, { "epoch": 5.06752, "grad_norm": 0.7385547161102295, "learning_rate": 4.2098839535814324e-05, "loss": 0.6381, "step": 3959 }, { "epoch": 5.0688, "grad_norm": 0.7747622728347778, "learning_rate": 4.2096838735494196e-05, "loss": 0.6198, "step": 3960 }, { "epoch": 5.07008, "grad_norm": 0.7808424830436707, "learning_rate": 4.209483793517407e-05, "loss": 0.6496, "step": 3961 }, { "epoch": 5.07136, "grad_norm": 0.7174487709999084, "learning_rate": 4.2092837134853946e-05, "loss": 0.5677, "step": 3962 }, { "epoch": 5.07264, "grad_norm": 0.7417564392089844, "learning_rate": 4.209083633453382e-05, "loss": 0.6681, "step": 3963 }, { "epoch": 5.07392, "grad_norm": 0.654253363609314, "learning_rate": 4.208883553421369e-05, "loss": 0.5682, "step": 3964 }, { "epoch": 5.0752, "grad_norm": 0.7092409133911133, "learning_rate": 4.208683473389356e-05, "loss": 0.6277, "step": 3965 }, { "epoch": 5.07648, "grad_norm": 0.7257412672042847, "learning_rate": 4.2084833933573434e-05, "loss": 0.6708, "step": 3966 }, { "epoch": 5.07776, "grad_norm": 0.7401608228683472, "learning_rate": 4.20828331332533e-05, "loss": 0.6563, "step": 3967 }, { "epoch": 5.07904, "grad_norm": 0.7241030335426331, "learning_rate": 4.208083233293317e-05, "loss": 0.6504, "step": 3968 }, { "epoch": 5.08032, "grad_norm": 0.7237942218780518, "learning_rate": 4.207883153261305e-05, "loss": 0.6548, "step": 3969 }, { "epoch": 5.0816, "grad_norm": 0.7459864616394043, "learning_rate": 4.207683073229292e-05, "loss": 0.6404, "step": 3970 }, { "epoch": 5.08288, "grad_norm": 0.7194232940673828, "learning_rate": 4.207482993197279e-05, "loss": 0.6405, "step": 3971 }, { "epoch": 5.08416, "grad_norm": 0.7170876860618591, "learning_rate": 4.2072829131652665e-05, "loss": 0.6667, "step": 3972 }, { "epoch": 5.08544, "grad_norm": 0.7066750526428223, "learning_rate": 4.207082833133254e-05, "loss": 0.6285, "step": 3973 }, { "epoch": 5.08672, "grad_norm": 0.7601994276046753, "learning_rate": 4.206882753101241e-05, "loss": 0.7017, "step": 3974 }, { "epoch": 5.088, "grad_norm": 0.7173680663108826, "learning_rate": 4.2066826730692274e-05, "loss": 0.6869, "step": 3975 }, { "epoch": 5.08928, "grad_norm": 0.7139931321144104, "learning_rate": 4.2064825930372145e-05, "loss": 0.6175, "step": 3976 }, { "epoch": 5.09056, "grad_norm": 0.7550209760665894, "learning_rate": 4.2062825130052024e-05, "loss": 0.6521, "step": 3977 }, { "epoch": 5.09184, "grad_norm": 0.7335852384567261, "learning_rate": 4.2060824329731896e-05, "loss": 0.6432, "step": 3978 }, { "epoch": 5.09312, "grad_norm": 0.7130416035652161, "learning_rate": 4.205882352941177e-05, "loss": 0.616, "step": 3979 }, { "epoch": 5.0944, "grad_norm": 0.7009579539299011, "learning_rate": 4.205682272909164e-05, "loss": 0.6022, "step": 3980 }, { "epoch": 5.09568, "grad_norm": 0.7348793148994446, "learning_rate": 4.205482192877151e-05, "loss": 0.6118, "step": 3981 }, { "epoch": 5.09696, "grad_norm": 0.7403352856636047, "learning_rate": 4.205282112845138e-05, "loss": 0.6397, "step": 3982 }, { "epoch": 5.09824, "grad_norm": 0.7160905003547668, "learning_rate": 4.205082032813125e-05, "loss": 0.6391, "step": 3983 }, { "epoch": 5.09952, "grad_norm": 0.7039486765861511, "learning_rate": 4.204881952781113e-05, "loss": 0.6062, "step": 3984 }, { "epoch": 5.1008, "grad_norm": 0.752713680267334, "learning_rate": 4.2046818727491e-05, "loss": 0.6827, "step": 3985 }, { "epoch": 5.10208, "grad_norm": 0.7083015441894531, "learning_rate": 4.204481792717087e-05, "loss": 0.6033, "step": 3986 }, { "epoch": 5.10336, "grad_norm": 0.7464533448219299, "learning_rate": 4.204281712685074e-05, "loss": 0.6603, "step": 3987 }, { "epoch": 5.10464, "grad_norm": 0.713887095451355, "learning_rate": 4.2040816326530615e-05, "loss": 0.6253, "step": 3988 }, { "epoch": 5.10592, "grad_norm": 0.7454844117164612, "learning_rate": 4.2038815526210486e-05, "loss": 0.6518, "step": 3989 }, { "epoch": 5.1072, "grad_norm": 0.770743191242218, "learning_rate": 4.203681472589036e-05, "loss": 0.6907, "step": 3990 }, { "epoch": 5.10848, "grad_norm": 0.7465303540229797, "learning_rate": 4.203481392557023e-05, "loss": 0.7005, "step": 3991 }, { "epoch": 5.10976, "grad_norm": 0.745933473110199, "learning_rate": 4.20328131252501e-05, "loss": 0.6115, "step": 3992 }, { "epoch": 5.11104, "grad_norm": 0.6949446797370911, "learning_rate": 4.2030812324929974e-05, "loss": 0.5874, "step": 3993 }, { "epoch": 5.11232, "grad_norm": 0.7319580912590027, "learning_rate": 4.2028811524609846e-05, "loss": 0.6294, "step": 3994 }, { "epoch": 5.1136, "grad_norm": 0.7834770679473877, "learning_rate": 4.202681072428972e-05, "loss": 0.7396, "step": 3995 }, { "epoch": 5.11488, "grad_norm": 0.7234769463539124, "learning_rate": 4.202480992396959e-05, "loss": 0.6397, "step": 3996 }, { "epoch": 5.11616, "grad_norm": 0.7542564868927002, "learning_rate": 4.202280912364946e-05, "loss": 0.6028, "step": 3997 }, { "epoch": 5.11744, "grad_norm": 0.7732244729995728, "learning_rate": 4.202080832332933e-05, "loss": 0.6938, "step": 3998 }, { "epoch": 5.11872, "grad_norm": 0.6997548937797546, "learning_rate": 4.2018807523009205e-05, "loss": 0.6206, "step": 3999 }, { "epoch": 5.12, "grad_norm": 0.7281977534294128, "learning_rate": 4.201680672268908e-05, "loss": 0.6279, "step": 4000 }, { "epoch": 5.12128, "grad_norm": 0.7627147436141968, "learning_rate": 4.201480592236895e-05, "loss": 0.6297, "step": 4001 }, { "epoch": 5.12256, "grad_norm": 0.7351235151290894, "learning_rate": 4.201280512204882e-05, "loss": 0.6132, "step": 4002 }, { "epoch": 5.12384, "grad_norm": 0.7031277418136597, "learning_rate": 4.201080432172869e-05, "loss": 0.6561, "step": 4003 }, { "epoch": 5.12512, "grad_norm": 0.7446048259735107, "learning_rate": 4.2008803521408564e-05, "loss": 0.6526, "step": 4004 }, { "epoch": 5.1264, "grad_norm": 0.7449467778205872, "learning_rate": 4.200680272108844e-05, "loss": 0.6027, "step": 4005 }, { "epoch": 5.12768, "grad_norm": 0.8109471201896667, "learning_rate": 4.200480192076831e-05, "loss": 0.6602, "step": 4006 }, { "epoch": 5.12896, "grad_norm": 0.7701667547225952, "learning_rate": 4.200280112044818e-05, "loss": 0.6192, "step": 4007 }, { "epoch": 5.13024, "grad_norm": 0.7170472741127014, "learning_rate": 4.200080032012805e-05, "loss": 0.5924, "step": 4008 }, { "epoch": 5.13152, "grad_norm": 0.7401517033576965, "learning_rate": 4.1998799519807924e-05, "loss": 0.6098, "step": 4009 }, { "epoch": 5.1328, "grad_norm": 0.7323094606399536, "learning_rate": 4.1996798719487795e-05, "loss": 0.6426, "step": 4010 }, { "epoch": 5.13408, "grad_norm": 0.73775315284729, "learning_rate": 4.199479791916767e-05, "loss": 0.6091, "step": 4011 }, { "epoch": 5.13536, "grad_norm": 0.7261705994606018, "learning_rate": 4.1992797118847546e-05, "loss": 0.6217, "step": 4012 }, { "epoch": 5.13664, "grad_norm": 0.7077757716178894, "learning_rate": 4.199079631852742e-05, "loss": 0.6288, "step": 4013 }, { "epoch": 5.13792, "grad_norm": 0.7402573823928833, "learning_rate": 4.198879551820728e-05, "loss": 0.6528, "step": 4014 }, { "epoch": 5.1392, "grad_norm": 0.7423076629638672, "learning_rate": 4.1986794717887155e-05, "loss": 0.6715, "step": 4015 }, { "epoch": 5.14048, "grad_norm": 0.76578289270401, "learning_rate": 4.1984793917567026e-05, "loss": 0.6479, "step": 4016 }, { "epoch": 5.14176, "grad_norm": 0.723196268081665, "learning_rate": 4.19827931172469e-05, "loss": 0.6415, "step": 4017 }, { "epoch": 5.14304, "grad_norm": 0.7332058548927307, "learning_rate": 4.198079231692677e-05, "loss": 0.6373, "step": 4018 }, { "epoch": 5.1443200000000004, "grad_norm": 0.7015236616134644, "learning_rate": 4.197879151660665e-05, "loss": 0.5996, "step": 4019 }, { "epoch": 5.1456, "grad_norm": 0.743124783039093, "learning_rate": 4.197679071628652e-05, "loss": 0.5971, "step": 4020 }, { "epoch": 5.14688, "grad_norm": 0.7382779717445374, "learning_rate": 4.197478991596639e-05, "loss": 0.6285, "step": 4021 }, { "epoch": 5.14816, "grad_norm": 0.7928086519241333, "learning_rate": 4.197278911564626e-05, "loss": 0.6687, "step": 4022 }, { "epoch": 5.14944, "grad_norm": 0.7693018913269043, "learning_rate": 4.197078831532613e-05, "loss": 0.6553, "step": 4023 }, { "epoch": 5.15072, "grad_norm": 0.7531712055206299, "learning_rate": 4.1968787515006e-05, "loss": 0.642, "step": 4024 }, { "epoch": 5.152, "grad_norm": 0.7181296944618225, "learning_rate": 4.196678671468587e-05, "loss": 0.5862, "step": 4025 }, { "epoch": 5.15328, "grad_norm": 0.724727988243103, "learning_rate": 4.196478591436575e-05, "loss": 0.6467, "step": 4026 }, { "epoch": 5.15456, "grad_norm": 0.7461944222450256, "learning_rate": 4.1962785114045624e-05, "loss": 0.6054, "step": 4027 }, { "epoch": 5.15584, "grad_norm": 0.7383855581283569, "learning_rate": 4.1960784313725496e-05, "loss": 0.621, "step": 4028 }, { "epoch": 5.15712, "grad_norm": 0.6796069145202637, "learning_rate": 4.195878351340537e-05, "loss": 0.5679, "step": 4029 }, { "epoch": 5.1584, "grad_norm": 0.7139230370521545, "learning_rate": 4.195678271308523e-05, "loss": 0.5988, "step": 4030 }, { "epoch": 5.15968, "grad_norm": 0.7442138195037842, "learning_rate": 4.1954781912765104e-05, "loss": 0.5973, "step": 4031 }, { "epoch": 5.16096, "grad_norm": 0.7515295147895813, "learning_rate": 4.1952781112444976e-05, "loss": 0.5769, "step": 4032 }, { "epoch": 5.16224, "grad_norm": 0.7667818665504456, "learning_rate": 4.1950780312124855e-05, "loss": 0.6609, "step": 4033 }, { "epoch": 5.16352, "grad_norm": 0.7336476445198059, "learning_rate": 4.194877951180473e-05, "loss": 0.6163, "step": 4034 }, { "epoch": 5.1648, "grad_norm": 0.6899265646934509, "learning_rate": 4.19467787114846e-05, "loss": 0.5985, "step": 4035 }, { "epoch": 5.16608, "grad_norm": 0.6779667735099792, "learning_rate": 4.194477791116447e-05, "loss": 0.57, "step": 4036 }, { "epoch": 5.16736, "grad_norm": 0.6793948411941528, "learning_rate": 4.194277711084434e-05, "loss": 0.5694, "step": 4037 }, { "epoch": 5.16864, "grad_norm": 0.763278067111969, "learning_rate": 4.194077631052421e-05, "loss": 0.6917, "step": 4038 }, { "epoch": 5.16992, "grad_norm": 0.699497640132904, "learning_rate": 4.193877551020408e-05, "loss": 0.5704, "step": 4039 }, { "epoch": 5.1712, "grad_norm": 0.7154186964035034, "learning_rate": 4.193677470988396e-05, "loss": 0.6079, "step": 4040 }, { "epoch": 5.17248, "grad_norm": 0.7278900146484375, "learning_rate": 4.193477390956383e-05, "loss": 0.6887, "step": 4041 }, { "epoch": 5.17376, "grad_norm": 0.7596189379692078, "learning_rate": 4.19327731092437e-05, "loss": 0.7018, "step": 4042 }, { "epoch": 5.17504, "grad_norm": 0.7543389201164246, "learning_rate": 4.1930772308923573e-05, "loss": 0.6161, "step": 4043 }, { "epoch": 5.17632, "grad_norm": 0.7368336915969849, "learning_rate": 4.1928771508603445e-05, "loss": 0.642, "step": 4044 }, { "epoch": 5.1776, "grad_norm": 0.7439759969711304, "learning_rate": 4.192677070828332e-05, "loss": 0.6999, "step": 4045 }, { "epoch": 5.17888, "grad_norm": 0.695544958114624, "learning_rate": 4.192476990796318e-05, "loss": 0.6256, "step": 4046 }, { "epoch": 5.18016, "grad_norm": 0.7475451231002808, "learning_rate": 4.192276910764306e-05, "loss": 0.648, "step": 4047 }, { "epoch": 5.18144, "grad_norm": 0.740695059299469, "learning_rate": 4.192076830732293e-05, "loss": 0.598, "step": 4048 }, { "epoch": 5.18272, "grad_norm": 0.7205529808998108, "learning_rate": 4.1918767507002805e-05, "loss": 0.6135, "step": 4049 }, { "epoch": 5.184, "grad_norm": 0.6971973776817322, "learning_rate": 4.1916766706682676e-05, "loss": 0.6237, "step": 4050 }, { "epoch": 5.18528, "grad_norm": 0.7059743404388428, "learning_rate": 4.191476590636255e-05, "loss": 0.6197, "step": 4051 }, { "epoch": 5.18656, "grad_norm": 0.7417049407958984, "learning_rate": 4.191276510604242e-05, "loss": 0.6321, "step": 4052 }, { "epoch": 5.18784, "grad_norm": 0.726333498954773, "learning_rate": 4.191076430572229e-05, "loss": 0.5983, "step": 4053 }, { "epoch": 5.18912, "grad_norm": 0.7621744275093079, "learning_rate": 4.1908763505402164e-05, "loss": 0.675, "step": 4054 }, { "epoch": 5.1904, "grad_norm": 0.7966973185539246, "learning_rate": 4.1906762705082036e-05, "loss": 0.6198, "step": 4055 }, { "epoch": 5.19168, "grad_norm": 0.7543626427650452, "learning_rate": 4.190476190476191e-05, "loss": 0.6282, "step": 4056 }, { "epoch": 5.19296, "grad_norm": 0.7392343282699585, "learning_rate": 4.190276110444178e-05, "loss": 0.6496, "step": 4057 }, { "epoch": 5.19424, "grad_norm": 0.7553439140319824, "learning_rate": 4.190076030412165e-05, "loss": 0.6604, "step": 4058 }, { "epoch": 5.19552, "grad_norm": 0.7107070088386536, "learning_rate": 4.189875950380152e-05, "loss": 0.5846, "step": 4059 }, { "epoch": 5.1968, "grad_norm": 0.7690718173980713, "learning_rate": 4.1896758703481395e-05, "loss": 0.6315, "step": 4060 }, { "epoch": 5.19808, "grad_norm": 0.7583000063896179, "learning_rate": 4.189475790316127e-05, "loss": 0.6565, "step": 4061 }, { "epoch": 5.19936, "grad_norm": 0.735431432723999, "learning_rate": 4.189275710284114e-05, "loss": 0.6382, "step": 4062 }, { "epoch": 5.20064, "grad_norm": 0.749560534954071, "learning_rate": 4.189075630252101e-05, "loss": 0.6199, "step": 4063 }, { "epoch": 5.20192, "grad_norm": 0.7637155055999756, "learning_rate": 4.188875550220088e-05, "loss": 0.6392, "step": 4064 }, { "epoch": 5.2032, "grad_norm": 0.7183476686477661, "learning_rate": 4.1886754701880754e-05, "loss": 0.6105, "step": 4065 }, { "epoch": 5.20448, "grad_norm": 0.7134882807731628, "learning_rate": 4.1884753901560626e-05, "loss": 0.6176, "step": 4066 }, { "epoch": 5.20576, "grad_norm": 0.6925643682479858, "learning_rate": 4.18827531012405e-05, "loss": 0.6148, "step": 4067 }, { "epoch": 5.20704, "grad_norm": 0.6939037442207336, "learning_rate": 4.188075230092037e-05, "loss": 0.5917, "step": 4068 }, { "epoch": 5.20832, "grad_norm": 0.7910222411155701, "learning_rate": 4.187875150060024e-05, "loss": 0.7064, "step": 4069 }, { "epoch": 5.2096, "grad_norm": 0.7466183304786682, "learning_rate": 4.1876750700280114e-05, "loss": 0.6069, "step": 4070 }, { "epoch": 5.21088, "grad_norm": 0.7494035363197327, "learning_rate": 4.1874749899959985e-05, "loss": 0.6614, "step": 4071 }, { "epoch": 5.21216, "grad_norm": 0.7295969724655151, "learning_rate": 4.187274909963986e-05, "loss": 0.6364, "step": 4072 }, { "epoch": 5.21344, "grad_norm": 0.7598521709442139, "learning_rate": 4.187074829931973e-05, "loss": 0.6466, "step": 4073 }, { "epoch": 5.21472, "grad_norm": 0.7255356907844543, "learning_rate": 4.18687474989996e-05, "loss": 0.6205, "step": 4074 }, { "epoch": 5.216, "grad_norm": 0.7490739822387695, "learning_rate": 4.186674669867948e-05, "loss": 0.6438, "step": 4075 }, { "epoch": 5.21728, "grad_norm": 0.7887585759162903, "learning_rate": 4.1864745898359345e-05, "loss": 0.5891, "step": 4076 }, { "epoch": 5.21856, "grad_norm": 0.7423108220100403, "learning_rate": 4.1862745098039217e-05, "loss": 0.6278, "step": 4077 }, { "epoch": 5.21984, "grad_norm": 0.747302234172821, "learning_rate": 4.186074429771909e-05, "loss": 0.6105, "step": 4078 }, { "epoch": 5.22112, "grad_norm": 0.7417611479759216, "learning_rate": 4.185874349739896e-05, "loss": 0.6077, "step": 4079 }, { "epoch": 5.2224, "grad_norm": 0.7022831439971924, "learning_rate": 4.185674269707883e-05, "loss": 0.5837, "step": 4080 }, { "epoch": 5.22368, "grad_norm": 0.7441087365150452, "learning_rate": 4.1854741896758704e-05, "loss": 0.6113, "step": 4081 }, { "epoch": 5.22496, "grad_norm": 0.7969503998756409, "learning_rate": 4.185274109643858e-05, "loss": 0.6077, "step": 4082 }, { "epoch": 5.22624, "grad_norm": 0.7303327918052673, "learning_rate": 4.1850740296118454e-05, "loss": 0.5847, "step": 4083 }, { "epoch": 5.22752, "grad_norm": 0.7135294675827026, "learning_rate": 4.184873949579832e-05, "loss": 0.5898, "step": 4084 }, { "epoch": 5.2288, "grad_norm": 0.6808853149414062, "learning_rate": 4.184673869547819e-05, "loss": 0.6139, "step": 4085 }, { "epoch": 5.23008, "grad_norm": 0.7298513054847717, "learning_rate": 4.184473789515806e-05, "loss": 0.6338, "step": 4086 }, { "epoch": 5.2313600000000005, "grad_norm": 0.7220539450645447, "learning_rate": 4.1842737094837935e-05, "loss": 0.5868, "step": 4087 }, { "epoch": 5.23264, "grad_norm": 0.7567940354347229, "learning_rate": 4.184073629451781e-05, "loss": 0.626, "step": 4088 }, { "epoch": 5.23392, "grad_norm": 0.7327399253845215, "learning_rate": 4.183873549419768e-05, "loss": 0.6274, "step": 4089 }, { "epoch": 5.2352, "grad_norm": 0.7534685134887695, "learning_rate": 4.183673469387756e-05, "loss": 0.6031, "step": 4090 }, { "epoch": 5.23648, "grad_norm": 0.7302762269973755, "learning_rate": 4.183473389355743e-05, "loss": 0.5695, "step": 4091 }, { "epoch": 5.23776, "grad_norm": 0.737391471862793, "learning_rate": 4.1832733093237294e-05, "loss": 0.6894, "step": 4092 }, { "epoch": 5.23904, "grad_norm": 0.7553057670593262, "learning_rate": 4.1830732292917166e-05, "loss": 0.6557, "step": 4093 }, { "epoch": 5.24032, "grad_norm": 0.7426884770393372, "learning_rate": 4.182873149259704e-05, "loss": 0.6648, "step": 4094 }, { "epoch": 5.2416, "grad_norm": 0.7494625449180603, "learning_rate": 4.182673069227691e-05, "loss": 0.6473, "step": 4095 }, { "epoch": 5.24288, "grad_norm": 0.6882435083389282, "learning_rate": 4.182472989195678e-05, "loss": 0.5835, "step": 4096 }, { "epoch": 5.24416, "grad_norm": 0.7787857055664062, "learning_rate": 4.182272909163666e-05, "loss": 0.6566, "step": 4097 }, { "epoch": 5.24544, "grad_norm": 0.8027628660202026, "learning_rate": 4.182072829131653e-05, "loss": 0.6464, "step": 4098 }, { "epoch": 5.24672, "grad_norm": 0.7405392527580261, "learning_rate": 4.1818727490996404e-05, "loss": 0.6006, "step": 4099 }, { "epoch": 5.248, "grad_norm": 0.7443488836288452, "learning_rate": 4.181672669067627e-05, "loss": 0.646, "step": 4100 }, { "epoch": 5.24928, "grad_norm": 0.8020368218421936, "learning_rate": 4.181472589035614e-05, "loss": 0.6962, "step": 4101 }, { "epoch": 5.25056, "grad_norm": 0.7345948219299316, "learning_rate": 4.181272509003601e-05, "loss": 0.6171, "step": 4102 }, { "epoch": 5.25184, "grad_norm": 0.7310644388198853, "learning_rate": 4.1810724289715885e-05, "loss": 0.6516, "step": 4103 }, { "epoch": 5.25312, "grad_norm": 0.6996403336524963, "learning_rate": 4.1808723489395763e-05, "loss": 0.5719, "step": 4104 }, { "epoch": 5.2544, "grad_norm": 0.683502197265625, "learning_rate": 4.1806722689075635e-05, "loss": 0.5978, "step": 4105 }, { "epoch": 5.25568, "grad_norm": 0.7105028629302979, "learning_rate": 4.180472188875551e-05, "loss": 0.6489, "step": 4106 }, { "epoch": 5.25696, "grad_norm": 0.7716310620307922, "learning_rate": 4.180272108843538e-05, "loss": 0.6468, "step": 4107 }, { "epoch": 5.25824, "grad_norm": 0.7370600700378418, "learning_rate": 4.1800720288115244e-05, "loss": 0.5911, "step": 4108 }, { "epoch": 5.25952, "grad_norm": 0.7595857381820679, "learning_rate": 4.1798719487795116e-05, "loss": 0.6627, "step": 4109 }, { "epoch": 5.2608, "grad_norm": 0.7154218554496765, "learning_rate": 4.179671868747499e-05, "loss": 0.6439, "step": 4110 }, { "epoch": 5.26208, "grad_norm": 0.7370497584342957, "learning_rate": 4.1794717887154866e-05, "loss": 0.6511, "step": 4111 }, { "epoch": 5.26336, "grad_norm": 0.7522472143173218, "learning_rate": 4.179271708683474e-05, "loss": 0.6171, "step": 4112 }, { "epoch": 5.26464, "grad_norm": 0.7183180451393127, "learning_rate": 4.179071628651461e-05, "loss": 0.6296, "step": 4113 }, { "epoch": 5.26592, "grad_norm": 0.7467809319496155, "learning_rate": 4.178871548619448e-05, "loss": 0.6705, "step": 4114 }, { "epoch": 5.2672, "grad_norm": 0.7425380349159241, "learning_rate": 4.1786714685874354e-05, "loss": 0.6152, "step": 4115 }, { "epoch": 5.26848, "grad_norm": 0.729131281375885, "learning_rate": 4.178471388555422e-05, "loss": 0.6485, "step": 4116 }, { "epoch": 5.26976, "grad_norm": 0.7531429529190063, "learning_rate": 4.178271308523409e-05, "loss": 0.702, "step": 4117 }, { "epoch": 5.27104, "grad_norm": 0.6844903826713562, "learning_rate": 4.178071228491397e-05, "loss": 0.5557, "step": 4118 }, { "epoch": 5.27232, "grad_norm": 0.6600164175033569, "learning_rate": 4.177871148459384e-05, "loss": 0.5779, "step": 4119 }, { "epoch": 5.2736, "grad_norm": 0.7316297888755798, "learning_rate": 4.177671068427371e-05, "loss": 0.6198, "step": 4120 }, { "epoch": 5.27488, "grad_norm": 0.7616431713104248, "learning_rate": 4.1774709883953585e-05, "loss": 0.6581, "step": 4121 }, { "epoch": 5.27616, "grad_norm": 0.7071192264556885, "learning_rate": 4.177270908363346e-05, "loss": 0.616, "step": 4122 }, { "epoch": 5.27744, "grad_norm": 0.717705249786377, "learning_rate": 4.177070828331333e-05, "loss": 0.6081, "step": 4123 }, { "epoch": 5.27872, "grad_norm": 0.7297554612159729, "learning_rate": 4.1768707482993194e-05, "loss": 0.6139, "step": 4124 }, { "epoch": 5.28, "grad_norm": 0.7143238186836243, "learning_rate": 4.176670668267307e-05, "loss": 0.6375, "step": 4125 }, { "epoch": 5.28128, "grad_norm": 0.701789915561676, "learning_rate": 4.1764705882352944e-05, "loss": 0.5887, "step": 4126 }, { "epoch": 5.28256, "grad_norm": 0.7739312052726746, "learning_rate": 4.1762705082032816e-05, "loss": 0.6815, "step": 4127 }, { "epoch": 5.28384, "grad_norm": 0.714019238948822, "learning_rate": 4.176070428171269e-05, "loss": 0.6148, "step": 4128 }, { "epoch": 5.28512, "grad_norm": 0.7301841378211975, "learning_rate": 4.175870348139256e-05, "loss": 0.6268, "step": 4129 }, { "epoch": 5.2864, "grad_norm": 0.7254041433334351, "learning_rate": 4.175670268107243e-05, "loss": 0.6933, "step": 4130 }, { "epoch": 5.28768, "grad_norm": 0.7699323892593384, "learning_rate": 4.1754701880752304e-05, "loss": 0.7007, "step": 4131 }, { "epoch": 5.28896, "grad_norm": 0.7059085965156555, "learning_rate": 4.1752701080432175e-05, "loss": 0.6194, "step": 4132 }, { "epoch": 5.29024, "grad_norm": 0.7336937785148621, "learning_rate": 4.175070028011205e-05, "loss": 0.6656, "step": 4133 }, { "epoch": 5.29152, "grad_norm": 0.7493408918380737, "learning_rate": 4.174869947979192e-05, "loss": 0.6766, "step": 4134 }, { "epoch": 5.2928, "grad_norm": 0.7297886610031128, "learning_rate": 4.174669867947179e-05, "loss": 0.6352, "step": 4135 }, { "epoch": 5.29408, "grad_norm": 0.7195264101028442, "learning_rate": 4.174469787915166e-05, "loss": 0.6102, "step": 4136 }, { "epoch": 5.29536, "grad_norm": 0.7068727612495422, "learning_rate": 4.1742697078831535e-05, "loss": 0.556, "step": 4137 }, { "epoch": 5.29664, "grad_norm": 0.7307595014572144, "learning_rate": 4.1740696278511407e-05, "loss": 0.6653, "step": 4138 }, { "epoch": 5.29792, "grad_norm": 0.7298409938812256, "learning_rate": 4.173869547819128e-05, "loss": 0.6587, "step": 4139 }, { "epoch": 5.2992, "grad_norm": 0.7297065258026123, "learning_rate": 4.173669467787115e-05, "loss": 0.63, "step": 4140 }, { "epoch": 5.30048, "grad_norm": 0.7633817791938782, "learning_rate": 4.173469387755102e-05, "loss": 0.6555, "step": 4141 }, { "epoch": 5.30176, "grad_norm": 0.7700405716896057, "learning_rate": 4.1732693077230894e-05, "loss": 0.6576, "step": 4142 }, { "epoch": 5.30304, "grad_norm": 0.7559324502944946, "learning_rate": 4.1730692276910766e-05, "loss": 0.6633, "step": 4143 }, { "epoch": 5.30432, "grad_norm": 0.782576322555542, "learning_rate": 4.172869147659064e-05, "loss": 0.639, "step": 4144 }, { "epoch": 5.3056, "grad_norm": 0.7763882875442505, "learning_rate": 4.172669067627051e-05, "loss": 0.6591, "step": 4145 }, { "epoch": 5.30688, "grad_norm": 0.7644296288490295, "learning_rate": 4.172468987595038e-05, "loss": 0.5824, "step": 4146 }, { "epoch": 5.30816, "grad_norm": 0.7676565647125244, "learning_rate": 4.172268907563025e-05, "loss": 0.6401, "step": 4147 }, { "epoch": 5.30944, "grad_norm": 0.7719744443893433, "learning_rate": 4.1720688275310125e-05, "loss": 0.701, "step": 4148 }, { "epoch": 5.31072, "grad_norm": 0.7409470677375793, "learning_rate": 4.171868747499e-05, "loss": 0.596, "step": 4149 }, { "epoch": 5.312, "grad_norm": 0.7712857723236084, "learning_rate": 4.171668667466987e-05, "loss": 0.6428, "step": 4150 }, { "epoch": 5.31328, "grad_norm": 0.8165924549102783, "learning_rate": 4.171468587434974e-05, "loss": 0.6534, "step": 4151 }, { "epoch": 5.31456, "grad_norm": 0.7560482025146484, "learning_rate": 4.171268507402961e-05, "loss": 0.6532, "step": 4152 }, { "epoch": 5.31584, "grad_norm": 0.740051805973053, "learning_rate": 4.171068427370949e-05, "loss": 0.5908, "step": 4153 }, { "epoch": 5.31712, "grad_norm": 0.7237354516983032, "learning_rate": 4.1708683473389356e-05, "loss": 0.6267, "step": 4154 }, { "epoch": 5.3184000000000005, "grad_norm": 0.7441720962524414, "learning_rate": 4.170668267306923e-05, "loss": 0.5965, "step": 4155 }, { "epoch": 5.31968, "grad_norm": 0.770770788192749, "learning_rate": 4.17046818727491e-05, "loss": 0.6421, "step": 4156 }, { "epoch": 5.32096, "grad_norm": 0.7334791421890259, "learning_rate": 4.170268107242897e-05, "loss": 0.6324, "step": 4157 }, { "epoch": 5.32224, "grad_norm": 0.7998023629188538, "learning_rate": 4.1700680272108844e-05, "loss": 0.6521, "step": 4158 }, { "epoch": 5.32352, "grad_norm": 0.7167679071426392, "learning_rate": 4.1698679471788716e-05, "loss": 0.6568, "step": 4159 }, { "epoch": 5.3248, "grad_norm": 0.6926052570343018, "learning_rate": 4.1696678671468594e-05, "loss": 0.5946, "step": 4160 }, { "epoch": 5.32608, "grad_norm": 0.704468846321106, "learning_rate": 4.1694677871148466e-05, "loss": 0.5969, "step": 4161 }, { "epoch": 5.32736, "grad_norm": 0.7154397368431091, "learning_rate": 4.169267707082833e-05, "loss": 0.6305, "step": 4162 }, { "epoch": 5.32864, "grad_norm": 0.7278268337249756, "learning_rate": 4.16906762705082e-05, "loss": 0.6246, "step": 4163 }, { "epoch": 5.3299199999999995, "grad_norm": 0.7837611436843872, "learning_rate": 4.1688675470188075e-05, "loss": 0.6799, "step": 4164 }, { "epoch": 5.3312, "grad_norm": 0.7822878360748291, "learning_rate": 4.168667466986795e-05, "loss": 0.6387, "step": 4165 }, { "epoch": 5.33248, "grad_norm": 0.7381898760795593, "learning_rate": 4.168467386954782e-05, "loss": 0.6071, "step": 4166 }, { "epoch": 5.33376, "grad_norm": 0.7611213326454163, "learning_rate": 4.16826730692277e-05, "loss": 0.5542, "step": 4167 }, { "epoch": 5.33504, "grad_norm": 0.7357604503631592, "learning_rate": 4.168067226890757e-05, "loss": 0.6474, "step": 4168 }, { "epoch": 5.33632, "grad_norm": 0.7721554636955261, "learning_rate": 4.167867146858744e-05, "loss": 0.61, "step": 4169 }, { "epoch": 5.3376, "grad_norm": 0.7680994868278503, "learning_rate": 4.1676670668267306e-05, "loss": 0.672, "step": 4170 }, { "epoch": 5.33888, "grad_norm": 0.7215784192085266, "learning_rate": 4.167466986794718e-05, "loss": 0.6394, "step": 4171 }, { "epoch": 5.34016, "grad_norm": 0.7135915160179138, "learning_rate": 4.167266906762705e-05, "loss": 0.6101, "step": 4172 }, { "epoch": 5.34144, "grad_norm": 0.7465267181396484, "learning_rate": 4.167066826730692e-05, "loss": 0.6649, "step": 4173 }, { "epoch": 5.34272, "grad_norm": 0.7213309407234192, "learning_rate": 4.16686674669868e-05, "loss": 0.595, "step": 4174 }, { "epoch": 5.344, "grad_norm": 0.7122322916984558, "learning_rate": 4.166666666666667e-05, "loss": 0.643, "step": 4175 }, { "epoch": 5.34528, "grad_norm": 0.7451832890510559, "learning_rate": 4.1664665866346544e-05, "loss": 0.6457, "step": 4176 }, { "epoch": 5.34656, "grad_norm": 0.7480894923210144, "learning_rate": 4.1662665066026416e-05, "loss": 0.6788, "step": 4177 }, { "epoch": 5.34784, "grad_norm": 0.742680013179779, "learning_rate": 4.166066426570628e-05, "loss": 0.6303, "step": 4178 }, { "epoch": 5.34912, "grad_norm": 0.7494868636131287, "learning_rate": 4.165866346538615e-05, "loss": 0.6294, "step": 4179 }, { "epoch": 5.3504, "grad_norm": 0.7331695556640625, "learning_rate": 4.1656662665066025e-05, "loss": 0.6088, "step": 4180 }, { "epoch": 5.35168, "grad_norm": 0.7193648219108582, "learning_rate": 4.16546618647459e-05, "loss": 0.624, "step": 4181 }, { "epoch": 5.35296, "grad_norm": 0.7384651303291321, "learning_rate": 4.1652661064425775e-05, "loss": 0.6541, "step": 4182 }, { "epoch": 5.35424, "grad_norm": 0.7119219899177551, "learning_rate": 4.165066026410565e-05, "loss": 0.5827, "step": 4183 }, { "epoch": 5.35552, "grad_norm": 0.7343322038650513, "learning_rate": 4.164865946378552e-05, "loss": 0.5935, "step": 4184 }, { "epoch": 5.3568, "grad_norm": 0.7368735671043396, "learning_rate": 4.164665866346539e-05, "loss": 0.6512, "step": 4185 }, { "epoch": 5.35808, "grad_norm": 0.7362673282623291, "learning_rate": 4.1644657863145256e-05, "loss": 0.5961, "step": 4186 }, { "epoch": 5.35936, "grad_norm": 0.7422880530357361, "learning_rate": 4.164265706282513e-05, "loss": 0.5847, "step": 4187 }, { "epoch": 5.36064, "grad_norm": 0.7207329869270325, "learning_rate": 4.1640656262505006e-05, "loss": 0.645, "step": 4188 }, { "epoch": 5.36192, "grad_norm": 0.7351892590522766, "learning_rate": 4.163865546218488e-05, "loss": 0.6344, "step": 4189 }, { "epoch": 5.3632, "grad_norm": 0.7149160504341125, "learning_rate": 4.163665466186475e-05, "loss": 0.6381, "step": 4190 }, { "epoch": 5.36448, "grad_norm": 0.7507062554359436, "learning_rate": 4.163465386154462e-05, "loss": 0.6487, "step": 4191 }, { "epoch": 5.36576, "grad_norm": 0.718169629573822, "learning_rate": 4.1632653061224494e-05, "loss": 0.5903, "step": 4192 }, { "epoch": 5.36704, "grad_norm": 0.6719139218330383, "learning_rate": 4.1630652260904365e-05, "loss": 0.5836, "step": 4193 }, { "epoch": 5.36832, "grad_norm": 0.704992413520813, "learning_rate": 4.162865146058423e-05, "loss": 0.6264, "step": 4194 }, { "epoch": 5.3696, "grad_norm": 0.7397636771202087, "learning_rate": 4.16266506602641e-05, "loss": 0.6152, "step": 4195 }, { "epoch": 5.37088, "grad_norm": 0.7924968600273132, "learning_rate": 4.162464985994398e-05, "loss": 0.6927, "step": 4196 }, { "epoch": 5.37216, "grad_norm": 0.7544575929641724, "learning_rate": 4.162264905962385e-05, "loss": 0.6515, "step": 4197 }, { "epoch": 5.37344, "grad_norm": 0.743186891078949, "learning_rate": 4.1620648259303725e-05, "loss": 0.5953, "step": 4198 }, { "epoch": 5.37472, "grad_norm": 0.708406925201416, "learning_rate": 4.16186474589836e-05, "loss": 0.6146, "step": 4199 }, { "epoch": 5.376, "grad_norm": 0.7445898652076721, "learning_rate": 4.161664665866347e-05, "loss": 0.6632, "step": 4200 }, { "epoch": 5.37728, "grad_norm": 0.7561652660369873, "learning_rate": 4.161464585834334e-05, "loss": 0.678, "step": 4201 }, { "epoch": 5.37856, "grad_norm": 0.7312771081924438, "learning_rate": 4.1612645058023205e-05, "loss": 0.5748, "step": 4202 }, { "epoch": 5.37984, "grad_norm": 0.7716453671455383, "learning_rate": 4.1610644257703084e-05, "loss": 0.6249, "step": 4203 }, { "epoch": 5.38112, "grad_norm": 0.7312607765197754, "learning_rate": 4.1608643457382956e-05, "loss": 0.66, "step": 4204 }, { "epoch": 5.3824, "grad_norm": 0.7225040197372437, "learning_rate": 4.160664265706283e-05, "loss": 0.6272, "step": 4205 }, { "epoch": 5.38368, "grad_norm": 0.7225370407104492, "learning_rate": 4.16046418567427e-05, "loss": 0.6611, "step": 4206 }, { "epoch": 5.38496, "grad_norm": 0.7278191447257996, "learning_rate": 4.160264105642257e-05, "loss": 0.562, "step": 4207 }, { "epoch": 5.38624, "grad_norm": 0.7607426047325134, "learning_rate": 4.160064025610244e-05, "loss": 0.6992, "step": 4208 }, { "epoch": 5.38752, "grad_norm": 0.7173566818237305, "learning_rate": 4.1598639455782315e-05, "loss": 0.6655, "step": 4209 }, { "epoch": 5.3888, "grad_norm": 0.6946642398834229, "learning_rate": 4.159663865546219e-05, "loss": 0.5802, "step": 4210 }, { "epoch": 5.39008, "grad_norm": 0.7513164281845093, "learning_rate": 4.159463785514206e-05, "loss": 0.6538, "step": 4211 }, { "epoch": 5.39136, "grad_norm": 0.7534035444259644, "learning_rate": 4.159263705482193e-05, "loss": 0.6571, "step": 4212 }, { "epoch": 5.39264, "grad_norm": 0.7139204740524292, "learning_rate": 4.15906362545018e-05, "loss": 0.5496, "step": 4213 }, { "epoch": 5.39392, "grad_norm": 0.7388843894004822, "learning_rate": 4.1588635454181674e-05, "loss": 0.6338, "step": 4214 }, { "epoch": 5.3952, "grad_norm": 0.7447206974029541, "learning_rate": 4.1586634653861546e-05, "loss": 0.5863, "step": 4215 }, { "epoch": 5.39648, "grad_norm": 0.7396455407142639, "learning_rate": 4.158463385354142e-05, "loss": 0.6237, "step": 4216 }, { "epoch": 5.39776, "grad_norm": 0.7606258392333984, "learning_rate": 4.158263305322129e-05, "loss": 0.66, "step": 4217 }, { "epoch": 5.39904, "grad_norm": 0.8394240140914917, "learning_rate": 4.158063225290116e-05, "loss": 0.6537, "step": 4218 }, { "epoch": 5.40032, "grad_norm": 0.7464375495910645, "learning_rate": 4.1578631452581034e-05, "loss": 0.6092, "step": 4219 }, { "epoch": 5.4016, "grad_norm": 0.7182469367980957, "learning_rate": 4.1576630652260906e-05, "loss": 0.6298, "step": 4220 }, { "epoch": 5.40288, "grad_norm": 0.772256076335907, "learning_rate": 4.157462985194078e-05, "loss": 0.6736, "step": 4221 }, { "epoch": 5.40416, "grad_norm": 0.7597767114639282, "learning_rate": 4.157262905162065e-05, "loss": 0.6706, "step": 4222 }, { "epoch": 5.4054400000000005, "grad_norm": 0.7585070729255676, "learning_rate": 4.157062825130052e-05, "loss": 0.6311, "step": 4223 }, { "epoch": 5.40672, "grad_norm": 0.7339140772819519, "learning_rate": 4.156862745098039e-05, "loss": 0.6276, "step": 4224 }, { "epoch": 5.408, "grad_norm": 0.7652080059051514, "learning_rate": 4.1566626650660265e-05, "loss": 0.6573, "step": 4225 }, { "epoch": 5.40928, "grad_norm": 0.7553825974464417, "learning_rate": 4.156462585034014e-05, "loss": 0.6619, "step": 4226 }, { "epoch": 5.41056, "grad_norm": 0.753115177154541, "learning_rate": 4.156262505002001e-05, "loss": 0.6916, "step": 4227 }, { "epoch": 5.41184, "grad_norm": 0.7845631241798401, "learning_rate": 4.156062424969988e-05, "loss": 0.6693, "step": 4228 }, { "epoch": 5.41312, "grad_norm": 0.7271414995193481, "learning_rate": 4.155862344937975e-05, "loss": 0.5909, "step": 4229 }, { "epoch": 5.4144, "grad_norm": 0.7683008313179016, "learning_rate": 4.1556622649059624e-05, "loss": 0.617, "step": 4230 }, { "epoch": 5.41568, "grad_norm": 0.7922500967979431, "learning_rate": 4.15546218487395e-05, "loss": 0.6502, "step": 4231 }, { "epoch": 5.4169599999999996, "grad_norm": 0.7644626498222351, "learning_rate": 4.155262104841937e-05, "loss": 0.6364, "step": 4232 }, { "epoch": 5.41824, "grad_norm": 0.7368267774581909, "learning_rate": 4.155062024809924e-05, "loss": 0.6217, "step": 4233 }, { "epoch": 5.41952, "grad_norm": 0.7373183369636536, "learning_rate": 4.154861944777911e-05, "loss": 0.6186, "step": 4234 }, { "epoch": 5.4208, "grad_norm": 0.7321687936782837, "learning_rate": 4.1546618647458983e-05, "loss": 0.6367, "step": 4235 }, { "epoch": 5.42208, "grad_norm": 0.7199650406837463, "learning_rate": 4.1544617847138855e-05, "loss": 0.5642, "step": 4236 }, { "epoch": 5.42336, "grad_norm": 0.7745632529258728, "learning_rate": 4.154261704681873e-05, "loss": 0.6663, "step": 4237 }, { "epoch": 5.42464, "grad_norm": 0.7231164574623108, "learning_rate": 4.1540616246498606e-05, "loss": 0.5956, "step": 4238 }, { "epoch": 5.42592, "grad_norm": 0.7279819250106812, "learning_rate": 4.153861544617848e-05, "loss": 0.6507, "step": 4239 }, { "epoch": 5.4272, "grad_norm": 0.7030388116836548, "learning_rate": 4.153661464585834e-05, "loss": 0.616, "step": 4240 }, { "epoch": 5.42848, "grad_norm": 0.7477138042449951, "learning_rate": 4.1534613845538215e-05, "loss": 0.6875, "step": 4241 }, { "epoch": 5.42976, "grad_norm": 0.6878876686096191, "learning_rate": 4.1532613045218086e-05, "loss": 0.5856, "step": 4242 }, { "epoch": 5.43104, "grad_norm": 0.7027300596237183, "learning_rate": 4.153061224489796e-05, "loss": 0.6155, "step": 4243 }, { "epoch": 5.43232, "grad_norm": 0.7319774627685547, "learning_rate": 4.152861144457783e-05, "loss": 0.6515, "step": 4244 }, { "epoch": 5.4336, "grad_norm": 0.7408891916275024, "learning_rate": 4.152661064425771e-05, "loss": 0.6834, "step": 4245 }, { "epoch": 5.43488, "grad_norm": 0.7682157754898071, "learning_rate": 4.152460984393758e-05, "loss": 0.6244, "step": 4246 }, { "epoch": 5.43616, "grad_norm": 0.7219270467758179, "learning_rate": 4.152260904361745e-05, "loss": 0.631, "step": 4247 }, { "epoch": 5.43744, "grad_norm": 0.7537044882774353, "learning_rate": 4.152060824329732e-05, "loss": 0.6456, "step": 4248 }, { "epoch": 5.43872, "grad_norm": 0.7409535050392151, "learning_rate": 4.151860744297719e-05, "loss": 0.6145, "step": 4249 }, { "epoch": 5.44, "grad_norm": 0.7161388993263245, "learning_rate": 4.151660664265706e-05, "loss": 0.608, "step": 4250 }, { "epoch": 5.44128, "grad_norm": 0.7184132933616638, "learning_rate": 4.151460584233693e-05, "loss": 0.6225, "step": 4251 }, { "epoch": 5.44256, "grad_norm": 0.7418175935745239, "learning_rate": 4.151260504201681e-05, "loss": 0.6434, "step": 4252 }, { "epoch": 5.44384, "grad_norm": 0.791469395160675, "learning_rate": 4.1510604241696684e-05, "loss": 0.7016, "step": 4253 }, { "epoch": 5.44512, "grad_norm": 0.7191916108131409, "learning_rate": 4.1508603441376556e-05, "loss": 0.5732, "step": 4254 }, { "epoch": 5.4464, "grad_norm": 0.723224937915802, "learning_rate": 4.150660264105643e-05, "loss": 0.6206, "step": 4255 }, { "epoch": 5.44768, "grad_norm": 0.7462792992591858, "learning_rate": 4.150460184073629e-05, "loss": 0.6278, "step": 4256 }, { "epoch": 5.44896, "grad_norm": 0.7277225255966187, "learning_rate": 4.1502601040416164e-05, "loss": 0.6239, "step": 4257 }, { "epoch": 5.45024, "grad_norm": 0.7746177911758423, "learning_rate": 4.1500600240096036e-05, "loss": 0.6549, "step": 4258 }, { "epoch": 5.45152, "grad_norm": 0.7281146049499512, "learning_rate": 4.1498599439775915e-05, "loss": 0.6261, "step": 4259 }, { "epoch": 5.4528, "grad_norm": 0.751801609992981, "learning_rate": 4.149659863945579e-05, "loss": 0.6514, "step": 4260 }, { "epoch": 5.45408, "grad_norm": 0.7374839782714844, "learning_rate": 4.149459783913566e-05, "loss": 0.6498, "step": 4261 }, { "epoch": 5.45536, "grad_norm": 0.7305448055267334, "learning_rate": 4.149259703881553e-05, "loss": 0.5913, "step": 4262 }, { "epoch": 5.45664, "grad_norm": 0.7178943753242493, "learning_rate": 4.14905962384954e-05, "loss": 0.6412, "step": 4263 }, { "epoch": 5.45792, "grad_norm": 0.7705517411231995, "learning_rate": 4.148859543817527e-05, "loss": 0.6451, "step": 4264 }, { "epoch": 5.4592, "grad_norm": 0.7343666553497314, "learning_rate": 4.148659463785514e-05, "loss": 0.632, "step": 4265 }, { "epoch": 5.4604800000000004, "grad_norm": 0.7479756474494934, "learning_rate": 4.148459383753502e-05, "loss": 0.6502, "step": 4266 }, { "epoch": 5.46176, "grad_norm": 0.7750630378723145, "learning_rate": 4.148259303721489e-05, "loss": 0.6519, "step": 4267 }, { "epoch": 5.46304, "grad_norm": 0.7741780281066895, "learning_rate": 4.148059223689476e-05, "loss": 0.6251, "step": 4268 }, { "epoch": 5.46432, "grad_norm": 0.7677576541900635, "learning_rate": 4.147859143657463e-05, "loss": 0.6463, "step": 4269 }, { "epoch": 5.4656, "grad_norm": 0.7549638748168945, "learning_rate": 4.1476590636254505e-05, "loss": 0.6795, "step": 4270 }, { "epoch": 5.46688, "grad_norm": 0.7305397391319275, "learning_rate": 4.147458983593438e-05, "loss": 0.6243, "step": 4271 }, { "epoch": 5.46816, "grad_norm": 0.7237127423286438, "learning_rate": 4.147258903561424e-05, "loss": 0.6224, "step": 4272 }, { "epoch": 5.46944, "grad_norm": 0.6945486068725586, "learning_rate": 4.147058823529412e-05, "loss": 0.6099, "step": 4273 }, { "epoch": 5.47072, "grad_norm": 0.7361446619033813, "learning_rate": 4.146858743497399e-05, "loss": 0.5984, "step": 4274 }, { "epoch": 5.4719999999999995, "grad_norm": 0.6917027235031128, "learning_rate": 4.1466586634653865e-05, "loss": 0.5837, "step": 4275 }, { "epoch": 5.47328, "grad_norm": 0.7172991633415222, "learning_rate": 4.1464585834333736e-05, "loss": 0.6568, "step": 4276 }, { "epoch": 5.47456, "grad_norm": 0.6892058253288269, "learning_rate": 4.146258503401361e-05, "loss": 0.6219, "step": 4277 }, { "epoch": 5.47584, "grad_norm": 0.7407313585281372, "learning_rate": 4.146058423369348e-05, "loss": 0.6509, "step": 4278 }, { "epoch": 5.47712, "grad_norm": 0.7310953736305237, "learning_rate": 4.145858343337335e-05, "loss": 0.6227, "step": 4279 }, { "epoch": 5.4784, "grad_norm": 0.7653133273124695, "learning_rate": 4.1456582633053224e-05, "loss": 0.6519, "step": 4280 }, { "epoch": 5.47968, "grad_norm": 0.7355585098266602, "learning_rate": 4.1454581832733096e-05, "loss": 0.6163, "step": 4281 }, { "epoch": 5.48096, "grad_norm": 0.7199105620384216, "learning_rate": 4.145258103241297e-05, "loss": 0.6104, "step": 4282 }, { "epoch": 5.48224, "grad_norm": 0.7551198601722717, "learning_rate": 4.145058023209284e-05, "loss": 0.6599, "step": 4283 }, { "epoch": 5.48352, "grad_norm": 0.7141267657279968, "learning_rate": 4.144857943177271e-05, "loss": 0.5802, "step": 4284 }, { "epoch": 5.4848, "grad_norm": 0.7375865578651428, "learning_rate": 4.144657863145258e-05, "loss": 0.6395, "step": 4285 }, { "epoch": 5.48608, "grad_norm": 0.7291966676712036, "learning_rate": 4.1444577831132455e-05, "loss": 0.6074, "step": 4286 }, { "epoch": 5.48736, "grad_norm": 0.7387133836746216, "learning_rate": 4.144257703081233e-05, "loss": 0.6659, "step": 4287 }, { "epoch": 5.48864, "grad_norm": 0.7290977835655212, "learning_rate": 4.14405762304922e-05, "loss": 0.5999, "step": 4288 }, { "epoch": 5.48992, "grad_norm": 0.7163506746292114, "learning_rate": 4.143857543017207e-05, "loss": 0.5901, "step": 4289 }, { "epoch": 5.4912, "grad_norm": 0.7519278526306152, "learning_rate": 4.143657462985194e-05, "loss": 0.6746, "step": 4290 }, { "epoch": 5.49248, "grad_norm": 0.7682675123214722, "learning_rate": 4.1434573829531814e-05, "loss": 0.6694, "step": 4291 }, { "epoch": 5.49376, "grad_norm": 0.7485939264297485, "learning_rate": 4.1432573029211686e-05, "loss": 0.6073, "step": 4292 }, { "epoch": 5.49504, "grad_norm": 0.7766785621643066, "learning_rate": 4.143057222889156e-05, "loss": 0.5799, "step": 4293 }, { "epoch": 5.49632, "grad_norm": 0.7536609768867493, "learning_rate": 4.1428571428571437e-05, "loss": 0.6169, "step": 4294 }, { "epoch": 5.4976, "grad_norm": 0.725071907043457, "learning_rate": 4.14265706282513e-05, "loss": 0.6393, "step": 4295 }, { "epoch": 5.49888, "grad_norm": 0.7046527862548828, "learning_rate": 4.1424569827931173e-05, "loss": 0.5988, "step": 4296 }, { "epoch": 5.50016, "grad_norm": 0.7888898849487305, "learning_rate": 4.1422569027611045e-05, "loss": 0.6408, "step": 4297 }, { "epoch": 5.50144, "grad_norm": 0.7429561614990234, "learning_rate": 4.142056822729092e-05, "loss": 0.6463, "step": 4298 }, { "epoch": 5.50272, "grad_norm": 0.7440699934959412, "learning_rate": 4.141856742697079e-05, "loss": 0.652, "step": 4299 }, { "epoch": 5.504, "grad_norm": 0.7715908885002136, "learning_rate": 4.141656662665066e-05, "loss": 0.6796, "step": 4300 }, { "epoch": 5.50528, "grad_norm": 0.714020311832428, "learning_rate": 4.141456582633054e-05, "loss": 0.5743, "step": 4301 }, { "epoch": 5.50656, "grad_norm": 0.7244009375572205, "learning_rate": 4.141256502601041e-05, "loss": 0.6498, "step": 4302 }, { "epoch": 5.50784, "grad_norm": 0.7662729620933533, "learning_rate": 4.1410564225690276e-05, "loss": 0.6456, "step": 4303 }, { "epoch": 5.50912, "grad_norm": 0.6930511593818665, "learning_rate": 4.140856342537015e-05, "loss": 0.5915, "step": 4304 }, { "epoch": 5.5104, "grad_norm": 0.7424132227897644, "learning_rate": 4.140656262505002e-05, "loss": 0.6082, "step": 4305 }, { "epoch": 5.51168, "grad_norm": 0.7727630734443665, "learning_rate": 4.140456182472989e-05, "loss": 0.6735, "step": 4306 }, { "epoch": 5.51296, "grad_norm": 0.6813573837280273, "learning_rate": 4.1402561024409764e-05, "loss": 0.5995, "step": 4307 }, { "epoch": 5.51424, "grad_norm": 0.7176674008369446, "learning_rate": 4.1400560224089636e-05, "loss": 0.6404, "step": 4308 }, { "epoch": 5.51552, "grad_norm": 0.74003666639328, "learning_rate": 4.1398559423769514e-05, "loss": 0.6219, "step": 4309 }, { "epoch": 5.5168, "grad_norm": 0.705366313457489, "learning_rate": 4.1396558623449386e-05, "loss": 0.6144, "step": 4310 }, { "epoch": 5.51808, "grad_norm": 0.70579594373703, "learning_rate": 4.139455782312925e-05, "loss": 0.6345, "step": 4311 }, { "epoch": 5.51936, "grad_norm": 0.776077926158905, "learning_rate": 4.139255702280912e-05, "loss": 0.6859, "step": 4312 }, { "epoch": 5.52064, "grad_norm": 0.7283969521522522, "learning_rate": 4.1390556222488995e-05, "loss": 0.5956, "step": 4313 }, { "epoch": 5.52192, "grad_norm": 0.7156528234481812, "learning_rate": 4.138855542216887e-05, "loss": 0.6284, "step": 4314 }, { "epoch": 5.5232, "grad_norm": 0.7174420356750488, "learning_rate": 4.138655462184874e-05, "loss": 0.5826, "step": 4315 }, { "epoch": 5.52448, "grad_norm": 0.7152808904647827, "learning_rate": 4.138455382152862e-05, "loss": 0.5983, "step": 4316 }, { "epoch": 5.52576, "grad_norm": 0.7197295427322388, "learning_rate": 4.138255302120849e-05, "loss": 0.6156, "step": 4317 }, { "epoch": 5.5270399999999995, "grad_norm": 0.7645232677459717, "learning_rate": 4.138055222088836e-05, "loss": 0.6525, "step": 4318 }, { "epoch": 5.52832, "grad_norm": 0.6872177720069885, "learning_rate": 4.1378551420568226e-05, "loss": 0.6044, "step": 4319 }, { "epoch": 5.5296, "grad_norm": 0.7118226289749146, "learning_rate": 4.13765506202481e-05, "loss": 0.6176, "step": 4320 }, { "epoch": 5.53088, "grad_norm": 0.7357558608055115, "learning_rate": 4.137454981992797e-05, "loss": 0.6369, "step": 4321 }, { "epoch": 5.53216, "grad_norm": 0.7684040069580078, "learning_rate": 4.137254901960784e-05, "loss": 0.639, "step": 4322 }, { "epoch": 5.53344, "grad_norm": 0.7693250179290771, "learning_rate": 4.137054821928772e-05, "loss": 0.6801, "step": 4323 }, { "epoch": 5.53472, "grad_norm": 0.6828309893608093, "learning_rate": 4.136854741896759e-05, "loss": 0.6232, "step": 4324 }, { "epoch": 5.536, "grad_norm": 0.7307888865470886, "learning_rate": 4.1366546618647464e-05, "loss": 0.5854, "step": 4325 }, { "epoch": 5.53728, "grad_norm": 0.7778929471969604, "learning_rate": 4.1364545818327336e-05, "loss": 0.6335, "step": 4326 }, { "epoch": 5.53856, "grad_norm": 0.7381669878959656, "learning_rate": 4.13625450180072e-05, "loss": 0.6113, "step": 4327 }, { "epoch": 5.53984, "grad_norm": 0.7228230834007263, "learning_rate": 4.136054421768707e-05, "loss": 0.6167, "step": 4328 }, { "epoch": 5.54112, "grad_norm": 0.7991678714752197, "learning_rate": 4.1358543417366945e-05, "loss": 0.6445, "step": 4329 }, { "epoch": 5.5424, "grad_norm": 0.7551771402359009, "learning_rate": 4.1356542617046823e-05, "loss": 0.6068, "step": 4330 }, { "epoch": 5.54368, "grad_norm": 0.7122798562049866, "learning_rate": 4.1354541816726695e-05, "loss": 0.6084, "step": 4331 }, { "epoch": 5.54496, "grad_norm": 0.7480770349502563, "learning_rate": 4.135254101640657e-05, "loss": 0.6652, "step": 4332 }, { "epoch": 5.54624, "grad_norm": 0.7483392953872681, "learning_rate": 4.135054021608644e-05, "loss": 0.6547, "step": 4333 }, { "epoch": 5.5475200000000005, "grad_norm": 0.7390307784080505, "learning_rate": 4.134853941576631e-05, "loss": 0.6251, "step": 4334 }, { "epoch": 5.5488, "grad_norm": 0.7169946432113647, "learning_rate": 4.1346538615446176e-05, "loss": 0.5833, "step": 4335 }, { "epoch": 5.55008, "grad_norm": 0.799982488155365, "learning_rate": 4.134453781512605e-05, "loss": 0.6722, "step": 4336 }, { "epoch": 5.55136, "grad_norm": 0.7108681201934814, "learning_rate": 4.1342537014805926e-05, "loss": 0.5692, "step": 4337 }, { "epoch": 5.55264, "grad_norm": 0.7432281374931335, "learning_rate": 4.13405362144858e-05, "loss": 0.6642, "step": 4338 }, { "epoch": 5.55392, "grad_norm": 0.7395031452178955, "learning_rate": 4.133853541416567e-05, "loss": 0.6601, "step": 4339 }, { "epoch": 5.5552, "grad_norm": 0.7047814130783081, "learning_rate": 4.133653461384554e-05, "loss": 0.6566, "step": 4340 }, { "epoch": 5.55648, "grad_norm": 0.7198272347450256, "learning_rate": 4.1334533813525414e-05, "loss": 0.6494, "step": 4341 }, { "epoch": 5.55776, "grad_norm": 0.690862238407135, "learning_rate": 4.1332533013205286e-05, "loss": 0.5978, "step": 4342 }, { "epoch": 5.5590399999999995, "grad_norm": 0.7230415940284729, "learning_rate": 4.133053221288515e-05, "loss": 0.6449, "step": 4343 }, { "epoch": 5.56032, "grad_norm": 0.7639696002006531, "learning_rate": 4.132853141256503e-05, "loss": 0.659, "step": 4344 }, { "epoch": 5.5616, "grad_norm": 0.7363530397415161, "learning_rate": 4.13265306122449e-05, "loss": 0.5586, "step": 4345 }, { "epoch": 5.56288, "grad_norm": 0.7204784750938416, "learning_rate": 4.132452981192477e-05, "loss": 0.6629, "step": 4346 }, { "epoch": 5.56416, "grad_norm": 0.787613570690155, "learning_rate": 4.1322529011604645e-05, "loss": 0.6596, "step": 4347 }, { "epoch": 5.56544, "grad_norm": 0.7643619179725647, "learning_rate": 4.132052821128452e-05, "loss": 0.6955, "step": 4348 }, { "epoch": 5.56672, "grad_norm": 0.7102790474891663, "learning_rate": 4.131852741096439e-05, "loss": 0.6573, "step": 4349 }, { "epoch": 5.568, "grad_norm": 0.7346861362457275, "learning_rate": 4.131652661064426e-05, "loss": 0.638, "step": 4350 }, { "epoch": 5.56928, "grad_norm": 0.7016168832778931, "learning_rate": 4.131452581032413e-05, "loss": 0.641, "step": 4351 }, { "epoch": 5.57056, "grad_norm": 0.7079355120658875, "learning_rate": 4.1312525010004004e-05, "loss": 0.6284, "step": 4352 }, { "epoch": 5.57184, "grad_norm": 0.7500919699668884, "learning_rate": 4.1310524209683876e-05, "loss": 0.6272, "step": 4353 }, { "epoch": 5.57312, "grad_norm": 0.708759605884552, "learning_rate": 4.130852340936375e-05, "loss": 0.5711, "step": 4354 }, { "epoch": 5.5744, "grad_norm": 0.7522294521331787, "learning_rate": 4.130652260904362e-05, "loss": 0.6849, "step": 4355 }, { "epoch": 5.57568, "grad_norm": 0.7236372828483582, "learning_rate": 4.130452180872349e-05, "loss": 0.612, "step": 4356 }, { "epoch": 5.57696, "grad_norm": 0.7170467972755432, "learning_rate": 4.1302521008403364e-05, "loss": 0.6623, "step": 4357 }, { "epoch": 5.57824, "grad_norm": 0.7366024255752563, "learning_rate": 4.1300520208083235e-05, "loss": 0.6025, "step": 4358 }, { "epoch": 5.5795200000000005, "grad_norm": 0.7273461222648621, "learning_rate": 4.129851940776311e-05, "loss": 0.6107, "step": 4359 }, { "epoch": 5.5808, "grad_norm": 0.7220586538314819, "learning_rate": 4.129651860744298e-05, "loss": 0.5956, "step": 4360 }, { "epoch": 5.58208, "grad_norm": 0.6963488459587097, "learning_rate": 4.129451780712285e-05, "loss": 0.6292, "step": 4361 }, { "epoch": 5.58336, "grad_norm": 0.7539929747581482, "learning_rate": 4.129251700680272e-05, "loss": 0.6307, "step": 4362 }, { "epoch": 5.58464, "grad_norm": 0.7906867265701294, "learning_rate": 4.1290516206482595e-05, "loss": 0.6993, "step": 4363 }, { "epoch": 5.58592, "grad_norm": 0.7354369163513184, "learning_rate": 4.1288515406162467e-05, "loss": 0.6817, "step": 4364 }, { "epoch": 5.5872, "grad_norm": 0.7213559150695801, "learning_rate": 4.128651460584234e-05, "loss": 0.601, "step": 4365 }, { "epoch": 5.58848, "grad_norm": 0.782337486743927, "learning_rate": 4.128451380552221e-05, "loss": 0.6586, "step": 4366 }, { "epoch": 5.58976, "grad_norm": 0.720512330532074, "learning_rate": 4.128251300520208e-05, "loss": 0.5993, "step": 4367 }, { "epoch": 5.59104, "grad_norm": 0.7558559775352478, "learning_rate": 4.1280512204881954e-05, "loss": 0.6544, "step": 4368 }, { "epoch": 5.59232, "grad_norm": 0.7189001441001892, "learning_rate": 4.1278511404561826e-05, "loss": 0.5955, "step": 4369 }, { "epoch": 5.5936, "grad_norm": 0.7266587018966675, "learning_rate": 4.12765106042417e-05, "loss": 0.6626, "step": 4370 }, { "epoch": 5.59488, "grad_norm": 0.7241358160972595, "learning_rate": 4.127450980392157e-05, "loss": 0.5832, "step": 4371 }, { "epoch": 5.59616, "grad_norm": 0.732850968837738, "learning_rate": 4.127250900360145e-05, "loss": 0.6341, "step": 4372 }, { "epoch": 5.59744, "grad_norm": 0.7497940063476562, "learning_rate": 4.127050820328131e-05, "loss": 0.6735, "step": 4373 }, { "epoch": 5.59872, "grad_norm": 0.7972435355186462, "learning_rate": 4.1268507402961185e-05, "loss": 0.6529, "step": 4374 }, { "epoch": 5.6, "grad_norm": 0.7178497314453125, "learning_rate": 4.126650660264106e-05, "loss": 0.5903, "step": 4375 }, { "epoch": 5.60128, "grad_norm": 0.7551731467247009, "learning_rate": 4.126450580232093e-05, "loss": 0.6286, "step": 4376 }, { "epoch": 5.60256, "grad_norm": 0.7458837032318115, "learning_rate": 4.12625050020008e-05, "loss": 0.6696, "step": 4377 }, { "epoch": 5.60384, "grad_norm": 0.7230226993560791, "learning_rate": 4.126050420168067e-05, "loss": 0.6121, "step": 4378 }, { "epoch": 5.60512, "grad_norm": 0.714648425579071, "learning_rate": 4.125850340136055e-05, "loss": 0.6175, "step": 4379 }, { "epoch": 5.6064, "grad_norm": 0.713045060634613, "learning_rate": 4.125650260104042e-05, "loss": 0.6181, "step": 4380 }, { "epoch": 5.60768, "grad_norm": 0.7239891886711121, "learning_rate": 4.125450180072029e-05, "loss": 0.6454, "step": 4381 }, { "epoch": 5.60896, "grad_norm": 0.7390968799591064, "learning_rate": 4.125250100040016e-05, "loss": 0.598, "step": 4382 }, { "epoch": 5.61024, "grad_norm": 0.7643166780471802, "learning_rate": 4.125050020008003e-05, "loss": 0.6299, "step": 4383 }, { "epoch": 5.61152, "grad_norm": 0.7632148861885071, "learning_rate": 4.1248499399759904e-05, "loss": 0.6601, "step": 4384 }, { "epoch": 5.6128, "grad_norm": 0.74639493227005, "learning_rate": 4.1246498599439776e-05, "loss": 0.7099, "step": 4385 }, { "epoch": 5.6140799999999995, "grad_norm": 0.7428092956542969, "learning_rate": 4.1244497799119654e-05, "loss": 0.6596, "step": 4386 }, { "epoch": 5.61536, "grad_norm": 0.7230287790298462, "learning_rate": 4.1242496998799526e-05, "loss": 0.603, "step": 4387 }, { "epoch": 5.61664, "grad_norm": 0.7466754913330078, "learning_rate": 4.12404961984794e-05, "loss": 0.702, "step": 4388 }, { "epoch": 5.61792, "grad_norm": 0.7360090017318726, "learning_rate": 4.123849539815926e-05, "loss": 0.6036, "step": 4389 }, { "epoch": 5.6192, "grad_norm": 0.7444376945495605, "learning_rate": 4.1236494597839135e-05, "loss": 0.6554, "step": 4390 }, { "epoch": 5.62048, "grad_norm": 0.7407775521278381, "learning_rate": 4.123449379751901e-05, "loss": 0.6132, "step": 4391 }, { "epoch": 5.62176, "grad_norm": 0.6796741485595703, "learning_rate": 4.123249299719888e-05, "loss": 0.5356, "step": 4392 }, { "epoch": 5.62304, "grad_norm": 0.7613147497177124, "learning_rate": 4.123049219687876e-05, "loss": 0.6522, "step": 4393 }, { "epoch": 5.62432, "grad_norm": 0.7880995273590088, "learning_rate": 4.122849139655863e-05, "loss": 0.6211, "step": 4394 }, { "epoch": 5.6256, "grad_norm": 0.7633781433105469, "learning_rate": 4.12264905962385e-05, "loss": 0.6276, "step": 4395 }, { "epoch": 5.62688, "grad_norm": 0.7847978472709656, "learning_rate": 4.122448979591837e-05, "loss": 0.6798, "step": 4396 }, { "epoch": 5.62816, "grad_norm": 0.7588475346565247, "learning_rate": 4.122248899559824e-05, "loss": 0.642, "step": 4397 }, { "epoch": 5.62944, "grad_norm": 0.7453870177268982, "learning_rate": 4.122048819527811e-05, "loss": 0.623, "step": 4398 }, { "epoch": 5.63072, "grad_norm": 0.7075188755989075, "learning_rate": 4.121848739495798e-05, "loss": 0.6164, "step": 4399 }, { "epoch": 5.632, "grad_norm": 0.7136649489402771, "learning_rate": 4.121648659463786e-05, "loss": 0.6288, "step": 4400 }, { "epoch": 5.63328, "grad_norm": 0.7545991539955139, "learning_rate": 4.121448579431773e-05, "loss": 0.6107, "step": 4401 }, { "epoch": 5.6345600000000005, "grad_norm": 0.7157647013664246, "learning_rate": 4.1212484993997604e-05, "loss": 0.6143, "step": 4402 }, { "epoch": 5.63584, "grad_norm": 0.6989755630493164, "learning_rate": 4.1210484193677476e-05, "loss": 0.5937, "step": 4403 }, { "epoch": 5.63712, "grad_norm": 0.7509260177612305, "learning_rate": 4.120848339335735e-05, "loss": 0.6746, "step": 4404 }, { "epoch": 5.6384, "grad_norm": 0.7333345413208008, "learning_rate": 4.120648259303721e-05, "loss": 0.6164, "step": 4405 }, { "epoch": 5.63968, "grad_norm": 0.739132285118103, "learning_rate": 4.1204481792717085e-05, "loss": 0.6531, "step": 4406 }, { "epoch": 5.64096, "grad_norm": 0.7224278450012207, "learning_rate": 4.120248099239696e-05, "loss": 0.5819, "step": 4407 }, { "epoch": 5.64224, "grad_norm": 0.7341471910476685, "learning_rate": 4.1200480192076835e-05, "loss": 0.607, "step": 4408 }, { "epoch": 5.64352, "grad_norm": 0.7280572056770325, "learning_rate": 4.119847939175671e-05, "loss": 0.6057, "step": 4409 }, { "epoch": 5.6448, "grad_norm": 0.759056568145752, "learning_rate": 4.119647859143658e-05, "loss": 0.629, "step": 4410 }, { "epoch": 5.6460799999999995, "grad_norm": 0.7521056532859802, "learning_rate": 4.119447779111645e-05, "loss": 0.6231, "step": 4411 }, { "epoch": 5.64736, "grad_norm": 0.7467377185821533, "learning_rate": 4.119247699079632e-05, "loss": 0.6442, "step": 4412 }, { "epoch": 5.64864, "grad_norm": 0.7342444658279419, "learning_rate": 4.119047619047619e-05, "loss": 0.6292, "step": 4413 }, { "epoch": 5.64992, "grad_norm": 0.7450172901153564, "learning_rate": 4.1188475390156066e-05, "loss": 0.5998, "step": 4414 }, { "epoch": 5.6512, "grad_norm": 0.7140272855758667, "learning_rate": 4.118647458983594e-05, "loss": 0.6113, "step": 4415 }, { "epoch": 5.65248, "grad_norm": 0.6929570436477661, "learning_rate": 4.118447378951581e-05, "loss": 0.6088, "step": 4416 }, { "epoch": 5.65376, "grad_norm": 0.7283996939659119, "learning_rate": 4.118247298919568e-05, "loss": 0.6608, "step": 4417 }, { "epoch": 5.65504, "grad_norm": 0.7235706448554993, "learning_rate": 4.1180472188875554e-05, "loss": 0.653, "step": 4418 }, { "epoch": 5.65632, "grad_norm": 0.7216994166374207, "learning_rate": 4.1178471388555425e-05, "loss": 0.6264, "step": 4419 }, { "epoch": 5.6576, "grad_norm": 0.7781568169593811, "learning_rate": 4.11764705882353e-05, "loss": 0.6419, "step": 4420 }, { "epoch": 5.65888, "grad_norm": 0.8012334704399109, "learning_rate": 4.117446978791516e-05, "loss": 0.6591, "step": 4421 }, { "epoch": 5.66016, "grad_norm": 0.7411988973617554, "learning_rate": 4.117246898759504e-05, "loss": 0.6353, "step": 4422 }, { "epoch": 5.66144, "grad_norm": 0.7188833355903625, "learning_rate": 4.117046818727491e-05, "loss": 0.5684, "step": 4423 }, { "epoch": 5.66272, "grad_norm": 0.7834116816520691, "learning_rate": 4.1168467386954785e-05, "loss": 0.6605, "step": 4424 }, { "epoch": 5.664, "grad_norm": 0.7423588037490845, "learning_rate": 4.1166466586634657e-05, "loss": 0.6259, "step": 4425 }, { "epoch": 5.66528, "grad_norm": 0.732661783695221, "learning_rate": 4.116446578631453e-05, "loss": 0.5846, "step": 4426 }, { "epoch": 5.6665600000000005, "grad_norm": 0.6886220574378967, "learning_rate": 4.11624649859944e-05, "loss": 0.5771, "step": 4427 }, { "epoch": 5.66784, "grad_norm": 0.7514188885688782, "learning_rate": 4.116046418567427e-05, "loss": 0.6466, "step": 4428 }, { "epoch": 5.66912, "grad_norm": 0.7303938269615173, "learning_rate": 4.1158463385354144e-05, "loss": 0.6405, "step": 4429 }, { "epoch": 5.6704, "grad_norm": 0.7757360935211182, "learning_rate": 4.1156462585034016e-05, "loss": 0.6433, "step": 4430 }, { "epoch": 5.67168, "grad_norm": 0.7613855004310608, "learning_rate": 4.115446178471389e-05, "loss": 0.6125, "step": 4431 }, { "epoch": 5.67296, "grad_norm": 0.739494800567627, "learning_rate": 4.115246098439376e-05, "loss": 0.5834, "step": 4432 }, { "epoch": 5.67424, "grad_norm": 0.7127769589424133, "learning_rate": 4.115046018407363e-05, "loss": 0.5963, "step": 4433 }, { "epoch": 5.67552, "grad_norm": 0.7135427594184875, "learning_rate": 4.11484593837535e-05, "loss": 0.6307, "step": 4434 }, { "epoch": 5.6768, "grad_norm": 0.7762594819068909, "learning_rate": 4.1146458583433375e-05, "loss": 0.666, "step": 4435 }, { "epoch": 5.67808, "grad_norm": 0.7698591351509094, "learning_rate": 4.114445778311325e-05, "loss": 0.5811, "step": 4436 }, { "epoch": 5.67936, "grad_norm": 0.7814648151397705, "learning_rate": 4.114245698279312e-05, "loss": 0.6203, "step": 4437 }, { "epoch": 5.68064, "grad_norm": 0.7443010210990906, "learning_rate": 4.114045618247299e-05, "loss": 0.6657, "step": 4438 }, { "epoch": 5.68192, "grad_norm": 0.691595196723938, "learning_rate": 4.113845538215286e-05, "loss": 0.6411, "step": 4439 }, { "epoch": 5.6832, "grad_norm": 0.7466495037078857, "learning_rate": 4.1136454581832734e-05, "loss": 0.6843, "step": 4440 }, { "epoch": 5.68448, "grad_norm": 0.7032425999641418, "learning_rate": 4.1134453781512606e-05, "loss": 0.5646, "step": 4441 }, { "epoch": 5.68576, "grad_norm": 0.7595518827438354, "learning_rate": 4.113245298119248e-05, "loss": 0.6562, "step": 4442 }, { "epoch": 5.68704, "grad_norm": 0.7430517077445984, "learning_rate": 4.113045218087235e-05, "loss": 0.6283, "step": 4443 }, { "epoch": 5.68832, "grad_norm": 0.7672387957572937, "learning_rate": 4.112845138055222e-05, "loss": 0.6236, "step": 4444 }, { "epoch": 5.6896, "grad_norm": 0.7632207274436951, "learning_rate": 4.1126450580232094e-05, "loss": 0.6524, "step": 4445 }, { "epoch": 5.69088, "grad_norm": 0.7333277463912964, "learning_rate": 4.1124449779911966e-05, "loss": 0.6461, "step": 4446 }, { "epoch": 5.69216, "grad_norm": 0.7459308505058289, "learning_rate": 4.112244897959184e-05, "loss": 0.6702, "step": 4447 }, { "epoch": 5.69344, "grad_norm": 0.7271379232406616, "learning_rate": 4.112044817927171e-05, "loss": 0.6392, "step": 4448 }, { "epoch": 5.69472, "grad_norm": 0.7518323659896851, "learning_rate": 4.111844737895158e-05, "loss": 0.6591, "step": 4449 }, { "epoch": 5.696, "grad_norm": 0.7861558794975281, "learning_rate": 4.111644657863146e-05, "loss": 0.6533, "step": 4450 }, { "epoch": 5.69728, "grad_norm": 0.7315780520439148, "learning_rate": 4.1114445778311325e-05, "loss": 0.6908, "step": 4451 }, { "epoch": 5.69856, "grad_norm": 0.7502129673957825, "learning_rate": 4.11124449779912e-05, "loss": 0.6766, "step": 4452 }, { "epoch": 5.69984, "grad_norm": 0.7338371872901917, "learning_rate": 4.111044417767107e-05, "loss": 0.6115, "step": 4453 }, { "epoch": 5.7011199999999995, "grad_norm": 0.7228209972381592, "learning_rate": 4.110844337735094e-05, "loss": 0.6202, "step": 4454 }, { "epoch": 5.7024, "grad_norm": 0.7461691498756409, "learning_rate": 4.110644257703081e-05, "loss": 0.6349, "step": 4455 }, { "epoch": 5.70368, "grad_norm": 0.7166462540626526, "learning_rate": 4.1104441776710684e-05, "loss": 0.6712, "step": 4456 }, { "epoch": 5.70496, "grad_norm": 0.7871848940849304, "learning_rate": 4.110244097639056e-05, "loss": 0.6437, "step": 4457 }, { "epoch": 5.70624, "grad_norm": 0.7596803307533264, "learning_rate": 4.1100440176070435e-05, "loss": 0.6566, "step": 4458 }, { "epoch": 5.70752, "grad_norm": 0.7356720566749573, "learning_rate": 4.10984393757503e-05, "loss": 0.6138, "step": 4459 }, { "epoch": 5.7088, "grad_norm": 0.7193202376365662, "learning_rate": 4.109643857543017e-05, "loss": 0.6187, "step": 4460 }, { "epoch": 5.71008, "grad_norm": 0.7889663577079773, "learning_rate": 4.1094437775110043e-05, "loss": 0.6713, "step": 4461 }, { "epoch": 5.71136, "grad_norm": 0.7016068696975708, "learning_rate": 4.1092436974789915e-05, "loss": 0.5817, "step": 4462 }, { "epoch": 5.71264, "grad_norm": 0.7515783905982971, "learning_rate": 4.109043617446979e-05, "loss": 0.6497, "step": 4463 }, { "epoch": 5.71392, "grad_norm": 0.7173634171485901, "learning_rate": 4.1088435374149666e-05, "loss": 0.5583, "step": 4464 }, { "epoch": 5.7152, "grad_norm": 0.7629589438438416, "learning_rate": 4.108643457382954e-05, "loss": 0.6278, "step": 4465 }, { "epoch": 5.71648, "grad_norm": 0.7493870854377747, "learning_rate": 4.108443377350941e-05, "loss": 0.6373, "step": 4466 }, { "epoch": 5.71776, "grad_norm": 0.7296896576881409, "learning_rate": 4.1082432973189275e-05, "loss": 0.6011, "step": 4467 }, { "epoch": 5.71904, "grad_norm": 0.7143272757530212, "learning_rate": 4.1080432172869146e-05, "loss": 0.6313, "step": 4468 }, { "epoch": 5.72032, "grad_norm": 0.7367825508117676, "learning_rate": 4.107843137254902e-05, "loss": 0.6217, "step": 4469 }, { "epoch": 5.7216000000000005, "grad_norm": 0.7655727863311768, "learning_rate": 4.107643057222889e-05, "loss": 0.6714, "step": 4470 }, { "epoch": 5.72288, "grad_norm": 0.7177988290786743, "learning_rate": 4.107442977190877e-05, "loss": 0.5762, "step": 4471 }, { "epoch": 5.72416, "grad_norm": 0.7632842063903809, "learning_rate": 4.107242897158864e-05, "loss": 0.6587, "step": 4472 }, { "epoch": 5.72544, "grad_norm": 0.7221323251724243, "learning_rate": 4.107042817126851e-05, "loss": 0.6062, "step": 4473 }, { "epoch": 5.72672, "grad_norm": 0.7250041365623474, "learning_rate": 4.1068427370948384e-05, "loss": 0.609, "step": 4474 }, { "epoch": 5.728, "grad_norm": 0.7287482619285583, "learning_rate": 4.106642657062825e-05, "loss": 0.6225, "step": 4475 }, { "epoch": 5.72928, "grad_norm": 0.7629007697105408, "learning_rate": 4.106442577030812e-05, "loss": 0.6546, "step": 4476 }, { "epoch": 5.73056, "grad_norm": 0.7124512791633606, "learning_rate": 4.106242496998799e-05, "loss": 0.6365, "step": 4477 }, { "epoch": 5.73184, "grad_norm": 0.7133152484893799, "learning_rate": 4.106042416966787e-05, "loss": 0.6063, "step": 4478 }, { "epoch": 5.7331199999999995, "grad_norm": 0.7545233964920044, "learning_rate": 4.1058423369347744e-05, "loss": 0.6563, "step": 4479 }, { "epoch": 5.7344, "grad_norm": 0.755072832107544, "learning_rate": 4.1056422569027615e-05, "loss": 0.6586, "step": 4480 }, { "epoch": 5.73568, "grad_norm": 0.7463574409484863, "learning_rate": 4.105442176870749e-05, "loss": 0.6427, "step": 4481 }, { "epoch": 5.73696, "grad_norm": 0.7250651121139526, "learning_rate": 4.105242096838736e-05, "loss": 0.6494, "step": 4482 }, { "epoch": 5.73824, "grad_norm": 0.7188123464584351, "learning_rate": 4.1050420168067224e-05, "loss": 0.6417, "step": 4483 }, { "epoch": 5.73952, "grad_norm": 0.758231520652771, "learning_rate": 4.1048419367747096e-05, "loss": 0.6194, "step": 4484 }, { "epoch": 5.7408, "grad_norm": 0.7206960320472717, "learning_rate": 4.1046418567426975e-05, "loss": 0.6222, "step": 4485 }, { "epoch": 5.74208, "grad_norm": 0.7939612865447998, "learning_rate": 4.1044417767106847e-05, "loss": 0.6451, "step": 4486 }, { "epoch": 5.74336, "grad_norm": 0.7785453796386719, "learning_rate": 4.104241696678672e-05, "loss": 0.6497, "step": 4487 }, { "epoch": 5.74464, "grad_norm": 0.7556130290031433, "learning_rate": 4.104041616646659e-05, "loss": 0.6284, "step": 4488 }, { "epoch": 5.74592, "grad_norm": 0.6738141775131226, "learning_rate": 4.103841536614646e-05, "loss": 0.6161, "step": 4489 }, { "epoch": 5.7472, "grad_norm": 0.7317538857460022, "learning_rate": 4.1036414565826334e-05, "loss": 0.6297, "step": 4490 }, { "epoch": 5.74848, "grad_norm": 0.7428886890411377, "learning_rate": 4.10344137655062e-05, "loss": 0.5963, "step": 4491 }, { "epoch": 5.74976, "grad_norm": 0.7947216629981995, "learning_rate": 4.103241296518608e-05, "loss": 0.695, "step": 4492 }, { "epoch": 5.75104, "grad_norm": 0.8010162115097046, "learning_rate": 4.103041216486595e-05, "loss": 0.7321, "step": 4493 }, { "epoch": 5.75232, "grad_norm": 0.7321368455886841, "learning_rate": 4.102841136454582e-05, "loss": 0.6789, "step": 4494 }, { "epoch": 5.7536000000000005, "grad_norm": 0.7535264492034912, "learning_rate": 4.102641056422569e-05, "loss": 0.6474, "step": 4495 }, { "epoch": 5.75488, "grad_norm": 0.7041172385215759, "learning_rate": 4.1024409763905565e-05, "loss": 0.5921, "step": 4496 }, { "epoch": 5.75616, "grad_norm": 0.7587586641311646, "learning_rate": 4.102240896358544e-05, "loss": 0.6006, "step": 4497 }, { "epoch": 5.75744, "grad_norm": 0.7653579115867615, "learning_rate": 4.102040816326531e-05, "loss": 0.6526, "step": 4498 }, { "epoch": 5.75872, "grad_norm": 0.7497640252113342, "learning_rate": 4.101840736294518e-05, "loss": 0.6286, "step": 4499 }, { "epoch": 5.76, "grad_norm": 0.7529040575027466, "learning_rate": 4.101640656262505e-05, "loss": 0.6229, "step": 4500 }, { "epoch": 5.76128, "grad_norm": 0.7931720018386841, "learning_rate": 4.1014405762304924e-05, "loss": 0.6423, "step": 4501 }, { "epoch": 5.76256, "grad_norm": 0.7556605935096741, "learning_rate": 4.1012404961984796e-05, "loss": 0.5925, "step": 4502 }, { "epoch": 5.76384, "grad_norm": 0.7954467535018921, "learning_rate": 4.101040416166467e-05, "loss": 0.6246, "step": 4503 }, { "epoch": 5.76512, "grad_norm": 0.7651516199111938, "learning_rate": 4.100840336134454e-05, "loss": 0.6147, "step": 4504 }, { "epoch": 5.7664, "grad_norm": 0.758766770362854, "learning_rate": 4.100640256102441e-05, "loss": 0.6635, "step": 4505 }, { "epoch": 5.76768, "grad_norm": 0.7414392828941345, "learning_rate": 4.1004401760704284e-05, "loss": 0.6027, "step": 4506 }, { "epoch": 5.76896, "grad_norm": 0.7388911247253418, "learning_rate": 4.1002400960384156e-05, "loss": 0.6582, "step": 4507 }, { "epoch": 5.77024, "grad_norm": 0.7597686052322388, "learning_rate": 4.100040016006403e-05, "loss": 0.6223, "step": 4508 }, { "epoch": 5.77152, "grad_norm": 0.7913456559181213, "learning_rate": 4.09983993597439e-05, "loss": 0.6457, "step": 4509 }, { "epoch": 5.7728, "grad_norm": 0.7759582996368408, "learning_rate": 4.099639855942377e-05, "loss": 0.6386, "step": 4510 }, { "epoch": 5.77408, "grad_norm": 0.7403820753097534, "learning_rate": 4.099439775910364e-05, "loss": 0.6217, "step": 4511 }, { "epoch": 5.77536, "grad_norm": 0.7305609583854675, "learning_rate": 4.0992396958783515e-05, "loss": 0.6371, "step": 4512 }, { "epoch": 5.77664, "grad_norm": 0.7057348489761353, "learning_rate": 4.099039615846339e-05, "loss": 0.62, "step": 4513 }, { "epoch": 5.77792, "grad_norm": 0.7573639750480652, "learning_rate": 4.098839535814326e-05, "loss": 0.639, "step": 4514 }, { "epoch": 5.7792, "grad_norm": 0.7069675326347351, "learning_rate": 4.098639455782313e-05, "loss": 0.5887, "step": 4515 }, { "epoch": 5.78048, "grad_norm": 0.7522568702697754, "learning_rate": 4.0984393757503e-05, "loss": 0.6604, "step": 4516 }, { "epoch": 5.78176, "grad_norm": 0.7098150253295898, "learning_rate": 4.0982392957182874e-05, "loss": 0.6172, "step": 4517 }, { "epoch": 5.78304, "grad_norm": 0.7658361792564392, "learning_rate": 4.0980392156862746e-05, "loss": 0.7318, "step": 4518 }, { "epoch": 5.78432, "grad_norm": 0.7216033935546875, "learning_rate": 4.097839135654262e-05, "loss": 0.6195, "step": 4519 }, { "epoch": 5.7856, "grad_norm": 0.7166069149971008, "learning_rate": 4.0976390556222497e-05, "loss": 0.5746, "step": 4520 }, { "epoch": 5.78688, "grad_norm": 0.7510432004928589, "learning_rate": 4.097438975590236e-05, "loss": 0.6373, "step": 4521 }, { "epoch": 5.7881599999999995, "grad_norm": 0.700448751449585, "learning_rate": 4.0972388955582233e-05, "loss": 0.5698, "step": 4522 }, { "epoch": 5.78944, "grad_norm": 0.7070586681365967, "learning_rate": 4.0970388155262105e-05, "loss": 0.6247, "step": 4523 }, { "epoch": 5.79072, "grad_norm": 0.6846281886100769, "learning_rate": 4.096838735494198e-05, "loss": 0.625, "step": 4524 }, { "epoch": 5.792, "grad_norm": 0.745806872844696, "learning_rate": 4.096638655462185e-05, "loss": 0.6629, "step": 4525 }, { "epoch": 5.79328, "grad_norm": 0.7132704854011536, "learning_rate": 4.096438575430172e-05, "loss": 0.6222, "step": 4526 }, { "epoch": 5.79456, "grad_norm": 0.751122236251831, "learning_rate": 4.09623849539816e-05, "loss": 0.6406, "step": 4527 }, { "epoch": 5.79584, "grad_norm": 0.7574059367179871, "learning_rate": 4.096038415366147e-05, "loss": 0.6449, "step": 4528 }, { "epoch": 5.79712, "grad_norm": 0.6792533993721008, "learning_rate": 4.0958383353341336e-05, "loss": 0.5987, "step": 4529 }, { "epoch": 5.7984, "grad_norm": 0.6878370046615601, "learning_rate": 4.095638255302121e-05, "loss": 0.5901, "step": 4530 }, { "epoch": 5.79968, "grad_norm": 0.7586196660995483, "learning_rate": 4.095438175270108e-05, "loss": 0.6344, "step": 4531 }, { "epoch": 5.80096, "grad_norm": 0.7338054180145264, "learning_rate": 4.095238095238095e-05, "loss": 0.6223, "step": 4532 }, { "epoch": 5.80224, "grad_norm": 0.7303552627563477, "learning_rate": 4.0950380152060824e-05, "loss": 0.5915, "step": 4533 }, { "epoch": 5.80352, "grad_norm": 0.7648298740386963, "learning_rate": 4.0948379351740696e-05, "loss": 0.615, "step": 4534 }, { "epoch": 5.8048, "grad_norm": 0.8030759692192078, "learning_rate": 4.0946378551420574e-05, "loss": 0.6604, "step": 4535 }, { "epoch": 5.80608, "grad_norm": 0.8049156069755554, "learning_rate": 4.0944377751100446e-05, "loss": 0.7182, "step": 4536 }, { "epoch": 5.80736, "grad_norm": 0.7284945845603943, "learning_rate": 4.094237695078031e-05, "loss": 0.6061, "step": 4537 }, { "epoch": 5.8086400000000005, "grad_norm": 0.6970748901367188, "learning_rate": 4.094037615046018e-05, "loss": 0.6016, "step": 4538 }, { "epoch": 5.80992, "grad_norm": 0.7719080448150635, "learning_rate": 4.0938375350140055e-05, "loss": 0.7058, "step": 4539 }, { "epoch": 5.8112, "grad_norm": 0.7578266859054565, "learning_rate": 4.093637454981993e-05, "loss": 0.6276, "step": 4540 }, { "epoch": 5.81248, "grad_norm": 0.7241922616958618, "learning_rate": 4.09343737494998e-05, "loss": 0.6167, "step": 4541 }, { "epoch": 5.81376, "grad_norm": 0.8004947304725647, "learning_rate": 4.093237294917968e-05, "loss": 0.7253, "step": 4542 }, { "epoch": 5.81504, "grad_norm": 0.7189851999282837, "learning_rate": 4.093037214885955e-05, "loss": 0.6462, "step": 4543 }, { "epoch": 5.81632, "grad_norm": 0.7319324016571045, "learning_rate": 4.092837134853942e-05, "loss": 0.6575, "step": 4544 }, { "epoch": 5.8176, "grad_norm": 0.7121530175209045, "learning_rate": 4.0926370548219286e-05, "loss": 0.6186, "step": 4545 }, { "epoch": 5.81888, "grad_norm": 0.7266112565994263, "learning_rate": 4.092436974789916e-05, "loss": 0.6121, "step": 4546 }, { "epoch": 5.82016, "grad_norm": 0.7524951696395874, "learning_rate": 4.092236894757903e-05, "loss": 0.6035, "step": 4547 }, { "epoch": 5.82144, "grad_norm": 0.7526006698608398, "learning_rate": 4.09203681472589e-05, "loss": 0.6494, "step": 4548 }, { "epoch": 5.82272, "grad_norm": 0.7238689064979553, "learning_rate": 4.091836734693878e-05, "loss": 0.6186, "step": 4549 }, { "epoch": 5.824, "grad_norm": 0.7418443560600281, "learning_rate": 4.091636654661865e-05, "loss": 0.6611, "step": 4550 }, { "epoch": 5.82528, "grad_norm": 0.7511699795722961, "learning_rate": 4.0914365746298524e-05, "loss": 0.6564, "step": 4551 }, { "epoch": 5.82656, "grad_norm": 0.6964154243469238, "learning_rate": 4.0912364945978396e-05, "loss": 0.6026, "step": 4552 }, { "epoch": 5.82784, "grad_norm": 0.7538017630577087, "learning_rate": 4.091036414565826e-05, "loss": 0.7241, "step": 4553 }, { "epoch": 5.82912, "grad_norm": 0.7111532092094421, "learning_rate": 4.090836334533813e-05, "loss": 0.6199, "step": 4554 }, { "epoch": 5.8304, "grad_norm": 0.7004513740539551, "learning_rate": 4.0906362545018005e-05, "loss": 0.6281, "step": 4555 }, { "epoch": 5.83168, "grad_norm": 0.743412971496582, "learning_rate": 4.090436174469788e-05, "loss": 0.6546, "step": 4556 }, { "epoch": 5.83296, "grad_norm": 0.7199406623840332, "learning_rate": 4.0902360944377755e-05, "loss": 0.6265, "step": 4557 }, { "epoch": 5.83424, "grad_norm": 0.7323152422904968, "learning_rate": 4.090036014405763e-05, "loss": 0.6174, "step": 4558 }, { "epoch": 5.83552, "grad_norm": 0.7763957381248474, "learning_rate": 4.08983593437375e-05, "loss": 0.6362, "step": 4559 }, { "epoch": 5.8368, "grad_norm": 0.7239270806312561, "learning_rate": 4.089635854341737e-05, "loss": 0.594, "step": 4560 }, { "epoch": 5.83808, "grad_norm": 0.7253373265266418, "learning_rate": 4.0894357743097236e-05, "loss": 0.6342, "step": 4561 }, { "epoch": 5.83936, "grad_norm": 0.7696877717971802, "learning_rate": 4.089235694277711e-05, "loss": 0.6545, "step": 4562 }, { "epoch": 5.8406400000000005, "grad_norm": 0.7727090716362, "learning_rate": 4.0890356142456986e-05, "loss": 0.6611, "step": 4563 }, { "epoch": 5.84192, "grad_norm": 0.6735355854034424, "learning_rate": 4.088835534213686e-05, "loss": 0.5948, "step": 4564 }, { "epoch": 5.8431999999999995, "grad_norm": 0.7272473573684692, "learning_rate": 4.088635454181673e-05, "loss": 0.6286, "step": 4565 }, { "epoch": 5.84448, "grad_norm": 0.760036289691925, "learning_rate": 4.08843537414966e-05, "loss": 0.6463, "step": 4566 }, { "epoch": 5.84576, "grad_norm": 0.7171865701675415, "learning_rate": 4.0882352941176474e-05, "loss": 0.6485, "step": 4567 }, { "epoch": 5.84704, "grad_norm": 0.7382311820983887, "learning_rate": 4.0880352140856346e-05, "loss": 0.6391, "step": 4568 }, { "epoch": 5.84832, "grad_norm": 0.7612113356590271, "learning_rate": 4.087835134053621e-05, "loss": 0.6734, "step": 4569 }, { "epoch": 5.8496, "grad_norm": 0.7614792585372925, "learning_rate": 4.087635054021609e-05, "loss": 0.635, "step": 4570 }, { "epoch": 5.85088, "grad_norm": 0.8073767423629761, "learning_rate": 4.087434973989596e-05, "loss": 0.6494, "step": 4571 }, { "epoch": 5.85216, "grad_norm": 0.7585006952285767, "learning_rate": 4.087234893957583e-05, "loss": 0.6522, "step": 4572 }, { "epoch": 5.85344, "grad_norm": 0.7650215029716492, "learning_rate": 4.0870348139255705e-05, "loss": 0.6874, "step": 4573 }, { "epoch": 5.85472, "grad_norm": 0.7307962775230408, "learning_rate": 4.086834733893558e-05, "loss": 0.6017, "step": 4574 }, { "epoch": 5.856, "grad_norm": 0.7022853493690491, "learning_rate": 4.086634653861545e-05, "loss": 0.6255, "step": 4575 }, { "epoch": 5.85728, "grad_norm": 0.7452864050865173, "learning_rate": 4.086434573829532e-05, "loss": 0.6482, "step": 4576 }, { "epoch": 5.85856, "grad_norm": 0.686421811580658, "learning_rate": 4.086234493797519e-05, "loss": 0.5918, "step": 4577 }, { "epoch": 5.85984, "grad_norm": 0.7287219166755676, "learning_rate": 4.0860344137655064e-05, "loss": 0.6691, "step": 4578 }, { "epoch": 5.86112, "grad_norm": 0.7395564913749695, "learning_rate": 4.0858343337334936e-05, "loss": 0.6626, "step": 4579 }, { "epoch": 5.8624, "grad_norm": 0.727815568447113, "learning_rate": 4.085634253701481e-05, "loss": 0.6524, "step": 4580 }, { "epoch": 5.8636800000000004, "grad_norm": 0.7862719893455505, "learning_rate": 4.085434173669468e-05, "loss": 0.6451, "step": 4581 }, { "epoch": 5.86496, "grad_norm": 0.7924978137016296, "learning_rate": 4.085234093637455e-05, "loss": 0.6689, "step": 4582 }, { "epoch": 5.86624, "grad_norm": 0.7216591835021973, "learning_rate": 4.0850340136054423e-05, "loss": 0.6297, "step": 4583 }, { "epoch": 5.86752, "grad_norm": 0.7293557524681091, "learning_rate": 4.0848339335734295e-05, "loss": 0.65, "step": 4584 }, { "epoch": 5.8688, "grad_norm": 0.7389594912528992, "learning_rate": 4.084633853541417e-05, "loss": 0.6201, "step": 4585 }, { "epoch": 5.87008, "grad_norm": 0.7208465337753296, "learning_rate": 4.084433773509404e-05, "loss": 0.5968, "step": 4586 }, { "epoch": 5.87136, "grad_norm": 0.7426589131355286, "learning_rate": 4.084233693477391e-05, "loss": 0.6313, "step": 4587 }, { "epoch": 5.87264, "grad_norm": 0.7646098732948303, "learning_rate": 4.084033613445378e-05, "loss": 0.7042, "step": 4588 }, { "epoch": 5.87392, "grad_norm": 0.7568240165710449, "learning_rate": 4.0838335334133655e-05, "loss": 0.6097, "step": 4589 }, { "epoch": 5.8751999999999995, "grad_norm": 0.7447047829627991, "learning_rate": 4.0836334533813526e-05, "loss": 0.6462, "step": 4590 }, { "epoch": 5.87648, "grad_norm": 0.7372913360595703, "learning_rate": 4.0834333733493405e-05, "loss": 0.6434, "step": 4591 }, { "epoch": 5.87776, "grad_norm": 0.7013605833053589, "learning_rate": 4.083233293317327e-05, "loss": 0.6086, "step": 4592 }, { "epoch": 5.87904, "grad_norm": 0.7706699967384338, "learning_rate": 4.083033213285314e-05, "loss": 0.6739, "step": 4593 }, { "epoch": 5.88032, "grad_norm": 0.7354162931442261, "learning_rate": 4.0828331332533014e-05, "loss": 0.6037, "step": 4594 }, { "epoch": 5.8816, "grad_norm": 0.7437216639518738, "learning_rate": 4.0826330532212886e-05, "loss": 0.6416, "step": 4595 }, { "epoch": 5.88288, "grad_norm": 0.7383934259414673, "learning_rate": 4.082432973189276e-05, "loss": 0.6556, "step": 4596 }, { "epoch": 5.88416, "grad_norm": 0.7448809146881104, "learning_rate": 4.082232893157263e-05, "loss": 0.6923, "step": 4597 }, { "epoch": 5.88544, "grad_norm": 0.7986266613006592, "learning_rate": 4.082032813125251e-05, "loss": 0.6663, "step": 4598 }, { "epoch": 5.88672, "grad_norm": 0.7175683975219727, "learning_rate": 4.081832733093238e-05, "loss": 0.6453, "step": 4599 }, { "epoch": 5.888, "grad_norm": 0.7115585803985596, "learning_rate": 4.0816326530612245e-05, "loss": 0.5905, "step": 4600 }, { "epoch": 5.88928, "grad_norm": 0.6863852739334106, "learning_rate": 4.081432573029212e-05, "loss": 0.5642, "step": 4601 }, { "epoch": 5.89056, "grad_norm": 0.6771440505981445, "learning_rate": 4.081232492997199e-05, "loss": 0.565, "step": 4602 }, { "epoch": 5.89184, "grad_norm": 0.6936319470405579, "learning_rate": 4.081032412965186e-05, "loss": 0.5993, "step": 4603 }, { "epoch": 5.89312, "grad_norm": 0.7660918831825256, "learning_rate": 4.080832332933173e-05, "loss": 0.6337, "step": 4604 }, { "epoch": 5.8944, "grad_norm": 0.7964367270469666, "learning_rate": 4.080632252901161e-05, "loss": 0.6357, "step": 4605 }, { "epoch": 5.8956800000000005, "grad_norm": 0.7397758364677429, "learning_rate": 4.080432172869148e-05, "loss": 0.5938, "step": 4606 }, { "epoch": 5.89696, "grad_norm": 0.7152984738349915, "learning_rate": 4.0802320928371355e-05, "loss": 0.5801, "step": 4607 }, { "epoch": 5.89824, "grad_norm": 0.7661724090576172, "learning_rate": 4.080032012805122e-05, "loss": 0.6988, "step": 4608 }, { "epoch": 5.89952, "grad_norm": 0.7536041140556335, "learning_rate": 4.079831932773109e-05, "loss": 0.6331, "step": 4609 }, { "epoch": 5.9008, "grad_norm": 0.7481626868247986, "learning_rate": 4.0796318527410964e-05, "loss": 0.6484, "step": 4610 }, { "epoch": 5.90208, "grad_norm": 0.7638240456581116, "learning_rate": 4.0794317727090835e-05, "loss": 0.6959, "step": 4611 }, { "epoch": 5.90336, "grad_norm": 0.7237250804901123, "learning_rate": 4.0792316926770714e-05, "loss": 0.605, "step": 4612 }, { "epoch": 5.90464, "grad_norm": 0.7751877903938293, "learning_rate": 4.0790316126450586e-05, "loss": 0.6425, "step": 4613 }, { "epoch": 5.90592, "grad_norm": 0.7242938280105591, "learning_rate": 4.078831532613046e-05, "loss": 0.616, "step": 4614 }, { "epoch": 5.9072, "grad_norm": 0.7469364404678345, "learning_rate": 4.078631452581033e-05, "loss": 0.6072, "step": 4615 }, { "epoch": 5.90848, "grad_norm": 0.728795051574707, "learning_rate": 4.0784313725490195e-05, "loss": 0.6552, "step": 4616 }, { "epoch": 5.90976, "grad_norm": 0.7057138085365295, "learning_rate": 4.078231292517007e-05, "loss": 0.6434, "step": 4617 }, { "epoch": 5.91104, "grad_norm": 0.7345725893974304, "learning_rate": 4.078031212484994e-05, "loss": 0.6371, "step": 4618 }, { "epoch": 5.91232, "grad_norm": 0.7135785818099976, "learning_rate": 4.077831132452982e-05, "loss": 0.6094, "step": 4619 }, { "epoch": 5.9136, "grad_norm": 0.7003117799758911, "learning_rate": 4.077631052420969e-05, "loss": 0.6538, "step": 4620 }, { "epoch": 5.91488, "grad_norm": 0.7230803966522217, "learning_rate": 4.077430972388956e-05, "loss": 0.6399, "step": 4621 }, { "epoch": 5.91616, "grad_norm": 0.7477660179138184, "learning_rate": 4.077230892356943e-05, "loss": 0.6334, "step": 4622 }, { "epoch": 5.91744, "grad_norm": 0.7172017693519592, "learning_rate": 4.0770308123249305e-05, "loss": 0.6714, "step": 4623 }, { "epoch": 5.91872, "grad_norm": 0.7592531442642212, "learning_rate": 4.076830732292917e-05, "loss": 0.6453, "step": 4624 }, { "epoch": 5.92, "grad_norm": 0.6898649334907532, "learning_rate": 4.076630652260904e-05, "loss": 0.6417, "step": 4625 }, { "epoch": 5.92128, "grad_norm": 0.7369380593299866, "learning_rate": 4.076430572228892e-05, "loss": 0.6391, "step": 4626 }, { "epoch": 5.92256, "grad_norm": 0.7180305123329163, "learning_rate": 4.076230492196879e-05, "loss": 0.5914, "step": 4627 }, { "epoch": 5.92384, "grad_norm": 0.7108614444732666, "learning_rate": 4.0760304121648664e-05, "loss": 0.629, "step": 4628 }, { "epoch": 5.92512, "grad_norm": 0.753671407699585, "learning_rate": 4.0758303321328536e-05, "loss": 0.6227, "step": 4629 }, { "epoch": 5.9264, "grad_norm": 0.7389430403709412, "learning_rate": 4.075630252100841e-05, "loss": 0.6287, "step": 4630 }, { "epoch": 5.92768, "grad_norm": 0.7096794843673706, "learning_rate": 4.075430172068828e-05, "loss": 0.6025, "step": 4631 }, { "epoch": 5.92896, "grad_norm": 0.7167031764984131, "learning_rate": 4.0752300920368144e-05, "loss": 0.5903, "step": 4632 }, { "epoch": 5.9302399999999995, "grad_norm": 0.7445734143257141, "learning_rate": 4.075030012004802e-05, "loss": 0.6246, "step": 4633 }, { "epoch": 5.93152, "grad_norm": 0.7259752750396729, "learning_rate": 4.0748299319727895e-05, "loss": 0.6677, "step": 4634 }, { "epoch": 5.9328, "grad_norm": 0.7165055871009827, "learning_rate": 4.074629851940777e-05, "loss": 0.6135, "step": 4635 }, { "epoch": 5.93408, "grad_norm": 0.7381249666213989, "learning_rate": 4.074429771908764e-05, "loss": 0.616, "step": 4636 }, { "epoch": 5.93536, "grad_norm": 0.7970700263977051, "learning_rate": 4.074229691876751e-05, "loss": 0.7101, "step": 4637 }, { "epoch": 5.93664, "grad_norm": 0.7447617053985596, "learning_rate": 4.074029611844738e-05, "loss": 0.6006, "step": 4638 }, { "epoch": 5.93792, "grad_norm": 0.7397119402885437, "learning_rate": 4.0738295318127254e-05, "loss": 0.6462, "step": 4639 }, { "epoch": 5.9392, "grad_norm": 0.6990648508071899, "learning_rate": 4.073629451780712e-05, "loss": 0.6027, "step": 4640 }, { "epoch": 5.94048, "grad_norm": 0.739801824092865, "learning_rate": 4.0734293717487e-05, "loss": 0.6539, "step": 4641 }, { "epoch": 5.94176, "grad_norm": 0.7326043248176575, "learning_rate": 4.073229291716687e-05, "loss": 0.6548, "step": 4642 }, { "epoch": 5.94304, "grad_norm": 0.733765184879303, "learning_rate": 4.073029211684674e-05, "loss": 0.6622, "step": 4643 }, { "epoch": 5.94432, "grad_norm": 0.7281367778778076, "learning_rate": 4.0728291316526614e-05, "loss": 0.6045, "step": 4644 }, { "epoch": 5.9456, "grad_norm": 0.7234017848968506, "learning_rate": 4.0726290516206485e-05, "loss": 0.5911, "step": 4645 }, { "epoch": 5.94688, "grad_norm": 0.7438223958015442, "learning_rate": 4.072428971588636e-05, "loss": 0.6348, "step": 4646 }, { "epoch": 5.94816, "grad_norm": 0.7301828861236572, "learning_rate": 4.072228891556623e-05, "loss": 0.6204, "step": 4647 }, { "epoch": 5.94944, "grad_norm": 0.7404515743255615, "learning_rate": 4.07202881152461e-05, "loss": 0.6279, "step": 4648 }, { "epoch": 5.9507200000000005, "grad_norm": 0.6974015831947327, "learning_rate": 4.071828731492597e-05, "loss": 0.6196, "step": 4649 }, { "epoch": 5.952, "grad_norm": 0.7563602924346924, "learning_rate": 4.0716286514605845e-05, "loss": 0.6058, "step": 4650 }, { "epoch": 5.95328, "grad_norm": 0.7107272744178772, "learning_rate": 4.0714285714285717e-05, "loss": 0.6388, "step": 4651 }, { "epoch": 5.95456, "grad_norm": 0.7473395466804504, "learning_rate": 4.071228491396559e-05, "loss": 0.586, "step": 4652 }, { "epoch": 5.95584, "grad_norm": 0.6987488865852356, "learning_rate": 4.071028411364546e-05, "loss": 0.5951, "step": 4653 }, { "epoch": 5.95712, "grad_norm": 0.7390433549880981, "learning_rate": 4.070828331332533e-05, "loss": 0.6402, "step": 4654 }, { "epoch": 5.9584, "grad_norm": 0.7470793128013611, "learning_rate": 4.0706282513005204e-05, "loss": 0.6361, "step": 4655 }, { "epoch": 5.95968, "grad_norm": 0.7746595740318298, "learning_rate": 4.0704281712685076e-05, "loss": 0.667, "step": 4656 }, { "epoch": 5.96096, "grad_norm": 0.7226885557174683, "learning_rate": 4.070228091236495e-05, "loss": 0.6213, "step": 4657 }, { "epoch": 5.9622399999999995, "grad_norm": 0.7263903021812439, "learning_rate": 4.070028011204482e-05, "loss": 0.6263, "step": 4658 }, { "epoch": 5.96352, "grad_norm": 0.7590324282646179, "learning_rate": 4.069827931172469e-05, "loss": 0.6146, "step": 4659 }, { "epoch": 5.9648, "grad_norm": 0.7753758430480957, "learning_rate": 4.069627851140456e-05, "loss": 0.6689, "step": 4660 }, { "epoch": 5.96608, "grad_norm": 0.7328972220420837, "learning_rate": 4.0694277711084435e-05, "loss": 0.6338, "step": 4661 }, { "epoch": 5.96736, "grad_norm": 0.7797688841819763, "learning_rate": 4.069227691076431e-05, "loss": 0.6802, "step": 4662 }, { "epoch": 5.96864, "grad_norm": 0.741576075553894, "learning_rate": 4.069027611044418e-05, "loss": 0.6109, "step": 4663 }, { "epoch": 5.96992, "grad_norm": 0.7838340997695923, "learning_rate": 4.068827531012405e-05, "loss": 0.6586, "step": 4664 }, { "epoch": 5.9712, "grad_norm": 0.7517712712287903, "learning_rate": 4.068627450980392e-05, "loss": 0.6261, "step": 4665 }, { "epoch": 5.97248, "grad_norm": 0.803371787071228, "learning_rate": 4.0684273709483794e-05, "loss": 0.6969, "step": 4666 }, { "epoch": 5.97376, "grad_norm": 0.7221397161483765, "learning_rate": 4.0682272909163666e-05, "loss": 0.6129, "step": 4667 }, { "epoch": 5.97504, "grad_norm": 0.7079233527183533, "learning_rate": 4.068027210884354e-05, "loss": 0.6356, "step": 4668 }, { "epoch": 5.97632, "grad_norm": 0.6837298274040222, "learning_rate": 4.067827130852342e-05, "loss": 0.6188, "step": 4669 }, { "epoch": 5.9776, "grad_norm": 0.7204573154449463, "learning_rate": 4.067627050820328e-05, "loss": 0.6068, "step": 4670 }, { "epoch": 5.97888, "grad_norm": 0.7004908919334412, "learning_rate": 4.0674269707883154e-05, "loss": 0.5853, "step": 4671 }, { "epoch": 5.98016, "grad_norm": 0.6996192932128906, "learning_rate": 4.0672268907563026e-05, "loss": 0.5958, "step": 4672 }, { "epoch": 5.98144, "grad_norm": 0.7411110401153564, "learning_rate": 4.06702681072429e-05, "loss": 0.5999, "step": 4673 }, { "epoch": 5.9827200000000005, "grad_norm": 0.7705451250076294, "learning_rate": 4.066826730692277e-05, "loss": 0.7017, "step": 4674 }, { "epoch": 5.984, "grad_norm": 0.7225644588470459, "learning_rate": 4.066626650660264e-05, "loss": 0.5662, "step": 4675 }, { "epoch": 5.98528, "grad_norm": 0.7521904706954956, "learning_rate": 4.066426570628252e-05, "loss": 0.6083, "step": 4676 }, { "epoch": 5.98656, "grad_norm": 0.756615161895752, "learning_rate": 4.066226490596239e-05, "loss": 0.6185, "step": 4677 }, { "epoch": 5.98784, "grad_norm": 0.733359158039093, "learning_rate": 4.066026410564226e-05, "loss": 0.6312, "step": 4678 }, { "epoch": 5.98912, "grad_norm": 0.774350643157959, "learning_rate": 4.065826330532213e-05, "loss": 0.6328, "step": 4679 }, { "epoch": 5.9904, "grad_norm": 0.7676503658294678, "learning_rate": 4.0656262505002e-05, "loss": 0.6359, "step": 4680 }, { "epoch": 5.99168, "grad_norm": 0.7357990741729736, "learning_rate": 4.065426170468187e-05, "loss": 0.6147, "step": 4681 }, { "epoch": 5.99296, "grad_norm": 0.7393483519554138, "learning_rate": 4.0652260904361744e-05, "loss": 0.6331, "step": 4682 }, { "epoch": 5.99424, "grad_norm": 0.7861780524253845, "learning_rate": 4.065026010404162e-05, "loss": 0.6724, "step": 4683 }, { "epoch": 5.99552, "grad_norm": 0.739687979221344, "learning_rate": 4.0648259303721495e-05, "loss": 0.6299, "step": 4684 }, { "epoch": 5.9968, "grad_norm": 0.7430374622344971, "learning_rate": 4.0646258503401366e-05, "loss": 0.645, "step": 4685 }, { "epoch": 5.99808, "grad_norm": 0.7406571507453918, "learning_rate": 4.064425770308123e-05, "loss": 0.6604, "step": 4686 }, { "epoch": 5.99936, "grad_norm": 0.729308009147644, "learning_rate": 4.06422569027611e-05, "loss": 0.6399, "step": 4687 }, { "epoch": 6.00064, "grad_norm": 1.4821006059646606, "learning_rate": 4.0640256102440975e-05, "loss": 1.1371, "step": 4688 }, { "epoch": 6.00192, "grad_norm": 0.7121221423149109, "learning_rate": 4.063825530212085e-05, "loss": 0.6295, "step": 4689 }, { "epoch": 6.0032, "grad_norm": 0.7151453495025635, "learning_rate": 4.0636254501800726e-05, "loss": 0.6071, "step": 4690 }, { "epoch": 6.00448, "grad_norm": 0.7339667677879333, "learning_rate": 4.06342537014806e-05, "loss": 0.6585, "step": 4691 }, { "epoch": 6.00576, "grad_norm": 0.7404779195785522, "learning_rate": 4.063225290116047e-05, "loss": 0.6038, "step": 4692 }, { "epoch": 6.00704, "grad_norm": 0.7343531250953674, "learning_rate": 4.063025210084034e-05, "loss": 0.6758, "step": 4693 }, { "epoch": 6.00832, "grad_norm": 0.7415891885757446, "learning_rate": 4.0628251300520206e-05, "loss": 0.6148, "step": 4694 }, { "epoch": 6.0096, "grad_norm": 0.7926338911056519, "learning_rate": 4.062625050020008e-05, "loss": 0.6918, "step": 4695 }, { "epoch": 6.01088, "grad_norm": 0.7479919791221619, "learning_rate": 4.062424969987995e-05, "loss": 0.6289, "step": 4696 }, { "epoch": 6.01216, "grad_norm": 0.7507800459861755, "learning_rate": 4.062224889955983e-05, "loss": 0.6093, "step": 4697 }, { "epoch": 6.01344, "grad_norm": 0.7076855301856995, "learning_rate": 4.06202480992397e-05, "loss": 0.5928, "step": 4698 }, { "epoch": 6.01472, "grad_norm": 0.7334170341491699, "learning_rate": 4.061824729891957e-05, "loss": 0.6195, "step": 4699 }, { "epoch": 6.016, "grad_norm": 0.7351385951042175, "learning_rate": 4.0616246498599444e-05, "loss": 0.6166, "step": 4700 }, { "epoch": 6.01728, "grad_norm": 0.7694382071495056, "learning_rate": 4.0614245698279316e-05, "loss": 0.6092, "step": 4701 }, { "epoch": 6.01856, "grad_norm": 0.7713647484779358, "learning_rate": 4.061224489795918e-05, "loss": 0.6331, "step": 4702 }, { "epoch": 6.01984, "grad_norm": 0.7566975355148315, "learning_rate": 4.061024409763905e-05, "loss": 0.6276, "step": 4703 }, { "epoch": 6.02112, "grad_norm": 0.6920115351676941, "learning_rate": 4.060824329731893e-05, "loss": 0.5527, "step": 4704 }, { "epoch": 6.0224, "grad_norm": 0.7560464143753052, "learning_rate": 4.0606242496998804e-05, "loss": 0.6171, "step": 4705 }, { "epoch": 6.02368, "grad_norm": 0.8139127492904663, "learning_rate": 4.0604241696678675e-05, "loss": 0.6635, "step": 4706 }, { "epoch": 6.02496, "grad_norm": 0.7818084955215454, "learning_rate": 4.060224089635855e-05, "loss": 0.6318, "step": 4707 }, { "epoch": 6.02624, "grad_norm": 0.7569645047187805, "learning_rate": 4.060024009603842e-05, "loss": 0.6523, "step": 4708 }, { "epoch": 6.02752, "grad_norm": 0.7755677103996277, "learning_rate": 4.059823929571829e-05, "loss": 0.6469, "step": 4709 }, { "epoch": 6.0288, "grad_norm": 0.762385904788971, "learning_rate": 4.0596238495398156e-05, "loss": 0.616, "step": 4710 }, { "epoch": 6.03008, "grad_norm": 0.7784088253974915, "learning_rate": 4.0594237695078035e-05, "loss": 0.6624, "step": 4711 }, { "epoch": 6.03136, "grad_norm": 0.7360923886299133, "learning_rate": 4.0592236894757907e-05, "loss": 0.5984, "step": 4712 }, { "epoch": 6.03264, "grad_norm": 0.7154642343521118, "learning_rate": 4.059023609443778e-05, "loss": 0.6125, "step": 4713 }, { "epoch": 6.03392, "grad_norm": 0.7827834486961365, "learning_rate": 4.058823529411765e-05, "loss": 0.5754, "step": 4714 }, { "epoch": 6.0352, "grad_norm": 0.7146974205970764, "learning_rate": 4.058623449379752e-05, "loss": 0.584, "step": 4715 }, { "epoch": 6.03648, "grad_norm": 0.7260125875473022, "learning_rate": 4.0584233693477394e-05, "loss": 0.6057, "step": 4716 }, { "epoch": 6.03776, "grad_norm": 0.7503035068511963, "learning_rate": 4.0582232893157266e-05, "loss": 0.6242, "step": 4717 }, { "epoch": 6.03904, "grad_norm": 0.7332813143730164, "learning_rate": 4.058023209283714e-05, "loss": 0.5681, "step": 4718 }, { "epoch": 6.04032, "grad_norm": 0.7520899772644043, "learning_rate": 4.057823129251701e-05, "loss": 0.6185, "step": 4719 }, { "epoch": 6.0416, "grad_norm": 0.7898109555244446, "learning_rate": 4.057623049219688e-05, "loss": 0.6065, "step": 4720 }, { "epoch": 6.04288, "grad_norm": 0.7639850378036499, "learning_rate": 4.057422969187675e-05, "loss": 0.6498, "step": 4721 }, { "epoch": 6.04416, "grad_norm": 0.7639596462249756, "learning_rate": 4.0572228891556625e-05, "loss": 0.6272, "step": 4722 }, { "epoch": 6.04544, "grad_norm": 0.7639127969741821, "learning_rate": 4.05702280912365e-05, "loss": 0.6034, "step": 4723 }, { "epoch": 6.04672, "grad_norm": 0.7917043566703796, "learning_rate": 4.056822729091637e-05, "loss": 0.6657, "step": 4724 }, { "epoch": 6.048, "grad_norm": 0.7585735321044922, "learning_rate": 4.056622649059624e-05, "loss": 0.5913, "step": 4725 }, { "epoch": 6.04928, "grad_norm": 0.7555089592933655, "learning_rate": 4.056422569027611e-05, "loss": 0.6295, "step": 4726 }, { "epoch": 6.05056, "grad_norm": 0.7650047540664673, "learning_rate": 4.0562224889955984e-05, "loss": 0.5904, "step": 4727 }, { "epoch": 6.05184, "grad_norm": 0.7548428177833557, "learning_rate": 4.0560224089635856e-05, "loss": 0.6117, "step": 4728 }, { "epoch": 6.05312, "grad_norm": 0.7310490012168884, "learning_rate": 4.055822328931573e-05, "loss": 0.6228, "step": 4729 }, { "epoch": 6.0544, "grad_norm": 0.7640831470489502, "learning_rate": 4.05562224889956e-05, "loss": 0.6002, "step": 4730 }, { "epoch": 6.05568, "grad_norm": 0.7306190133094788, "learning_rate": 4.055422168867547e-05, "loss": 0.5617, "step": 4731 }, { "epoch": 6.05696, "grad_norm": 0.767707109451294, "learning_rate": 4.0552220888355344e-05, "loss": 0.6073, "step": 4732 }, { "epoch": 6.05824, "grad_norm": 0.7210080027580261, "learning_rate": 4.0550220088035216e-05, "loss": 0.5746, "step": 4733 }, { "epoch": 6.05952, "grad_norm": 0.7745203971862793, "learning_rate": 4.054821928771509e-05, "loss": 0.6033, "step": 4734 }, { "epoch": 6.0608, "grad_norm": 0.7430739998817444, "learning_rate": 4.054621848739496e-05, "loss": 0.6069, "step": 4735 }, { "epoch": 6.06208, "grad_norm": 0.7837331891059875, "learning_rate": 4.054421768707483e-05, "loss": 0.6397, "step": 4736 }, { "epoch": 6.06336, "grad_norm": 0.7566797137260437, "learning_rate": 4.05422168867547e-05, "loss": 0.6293, "step": 4737 }, { "epoch": 6.06464, "grad_norm": 0.7480584979057312, "learning_rate": 4.0540216086434575e-05, "loss": 0.621, "step": 4738 }, { "epoch": 6.06592, "grad_norm": 0.797978401184082, "learning_rate": 4.0538215286114453e-05, "loss": 0.6476, "step": 4739 }, { "epoch": 6.0672, "grad_norm": 0.7844875454902649, "learning_rate": 4.053621448579432e-05, "loss": 0.6381, "step": 4740 }, { "epoch": 6.06848, "grad_norm": 0.7457473278045654, "learning_rate": 4.053421368547419e-05, "loss": 0.6175, "step": 4741 }, { "epoch": 6.06976, "grad_norm": 0.7767608761787415, "learning_rate": 4.053221288515406e-05, "loss": 0.6053, "step": 4742 }, { "epoch": 6.07104, "grad_norm": 0.7705170512199402, "learning_rate": 4.0530212084833934e-05, "loss": 0.6364, "step": 4743 }, { "epoch": 6.07232, "grad_norm": 0.7489408254623413, "learning_rate": 4.0528211284513806e-05, "loss": 0.6041, "step": 4744 }, { "epoch": 6.0736, "grad_norm": 0.7729488611221313, "learning_rate": 4.052621048419368e-05, "loss": 0.6407, "step": 4745 }, { "epoch": 6.07488, "grad_norm": 0.7681214213371277, "learning_rate": 4.0524209683873556e-05, "loss": 0.6271, "step": 4746 }, { "epoch": 6.07616, "grad_norm": 0.7444661855697632, "learning_rate": 4.052220888355343e-05, "loss": 0.6305, "step": 4747 }, { "epoch": 6.07744, "grad_norm": 0.7598939538002014, "learning_rate": 4.0520208083233293e-05, "loss": 0.6313, "step": 4748 }, { "epoch": 6.07872, "grad_norm": 0.7359374761581421, "learning_rate": 4.0518207282913165e-05, "loss": 0.5767, "step": 4749 }, { "epoch": 6.08, "grad_norm": 0.711103081703186, "learning_rate": 4.051620648259304e-05, "loss": 0.5656, "step": 4750 }, { "epoch": 6.08128, "grad_norm": 0.7639884352684021, "learning_rate": 4.051420568227291e-05, "loss": 0.6345, "step": 4751 }, { "epoch": 6.08256, "grad_norm": 0.772283673286438, "learning_rate": 4.051220488195278e-05, "loss": 0.6508, "step": 4752 }, { "epoch": 6.08384, "grad_norm": 0.7561479210853577, "learning_rate": 4.051020408163265e-05, "loss": 0.6016, "step": 4753 }, { "epoch": 6.08512, "grad_norm": 0.7787504196166992, "learning_rate": 4.050820328131253e-05, "loss": 0.6232, "step": 4754 }, { "epoch": 6.0864, "grad_norm": 0.7278993129730225, "learning_rate": 4.05062024809924e-05, "loss": 0.6724, "step": 4755 }, { "epoch": 6.08768, "grad_norm": 0.7617911100387573, "learning_rate": 4.050420168067227e-05, "loss": 0.6296, "step": 4756 }, { "epoch": 6.08896, "grad_norm": 0.7467544674873352, "learning_rate": 4.050220088035214e-05, "loss": 0.5833, "step": 4757 }, { "epoch": 6.09024, "grad_norm": 0.8187177777290344, "learning_rate": 4.050020008003201e-05, "loss": 0.6642, "step": 4758 }, { "epoch": 6.09152, "grad_norm": 0.7508170008659363, "learning_rate": 4.0498199279711884e-05, "loss": 0.5794, "step": 4759 }, { "epoch": 6.0928, "grad_norm": 0.7760382294654846, "learning_rate": 4.0496198479391756e-05, "loss": 0.6437, "step": 4760 }, { "epoch": 6.09408, "grad_norm": 0.762019693851471, "learning_rate": 4.0494197679071634e-05, "loss": 0.5662, "step": 4761 }, { "epoch": 6.09536, "grad_norm": 0.7687118053436279, "learning_rate": 4.0492196878751506e-05, "loss": 0.6324, "step": 4762 }, { "epoch": 6.09664, "grad_norm": 0.7786553502082825, "learning_rate": 4.049019607843138e-05, "loss": 0.5897, "step": 4763 }, { "epoch": 6.09792, "grad_norm": 0.7534022927284241, "learning_rate": 4.048819527811124e-05, "loss": 0.5713, "step": 4764 }, { "epoch": 6.0992, "grad_norm": 0.786078929901123, "learning_rate": 4.0486194477791115e-05, "loss": 0.5774, "step": 4765 }, { "epoch": 6.10048, "grad_norm": 0.7434951663017273, "learning_rate": 4.048419367747099e-05, "loss": 0.6195, "step": 4766 }, { "epoch": 6.10176, "grad_norm": 0.7410392761230469, "learning_rate": 4.048219287715086e-05, "loss": 0.6466, "step": 4767 }, { "epoch": 6.10304, "grad_norm": 0.7867603302001953, "learning_rate": 4.048019207683074e-05, "loss": 0.6098, "step": 4768 }, { "epoch": 6.10432, "grad_norm": 0.7912724614143372, "learning_rate": 4.047819127651061e-05, "loss": 0.66, "step": 4769 }, { "epoch": 6.1056, "grad_norm": 0.6925088167190552, "learning_rate": 4.047619047619048e-05, "loss": 0.5844, "step": 4770 }, { "epoch": 6.10688, "grad_norm": 0.7406015992164612, "learning_rate": 4.047418967587035e-05, "loss": 0.6517, "step": 4771 }, { "epoch": 6.10816, "grad_norm": 0.7543079257011414, "learning_rate": 4.047218887555022e-05, "loss": 0.5988, "step": 4772 }, { "epoch": 6.10944, "grad_norm": 0.7825755476951599, "learning_rate": 4.047018807523009e-05, "loss": 0.6316, "step": 4773 }, { "epoch": 6.11072, "grad_norm": 0.7861376404762268, "learning_rate": 4.046818727490996e-05, "loss": 0.6427, "step": 4774 }, { "epoch": 6.112, "grad_norm": 0.7529568672180176, "learning_rate": 4.046618647458984e-05, "loss": 0.6156, "step": 4775 }, { "epoch": 6.11328, "grad_norm": 0.7438963055610657, "learning_rate": 4.046418567426971e-05, "loss": 0.6108, "step": 4776 }, { "epoch": 6.11456, "grad_norm": 0.7456843852996826, "learning_rate": 4.0462184873949584e-05, "loss": 0.6296, "step": 4777 }, { "epoch": 6.11584, "grad_norm": 0.7504041194915771, "learning_rate": 4.0460184073629456e-05, "loss": 0.5838, "step": 4778 }, { "epoch": 6.11712, "grad_norm": 0.7819758653640747, "learning_rate": 4.045818327330933e-05, "loss": 0.5966, "step": 4779 }, { "epoch": 6.1184, "grad_norm": 0.7668342590332031, "learning_rate": 4.045618247298919e-05, "loss": 0.6158, "step": 4780 }, { "epoch": 6.11968, "grad_norm": 0.7753564715385437, "learning_rate": 4.0454181672669065e-05, "loss": 0.642, "step": 4781 }, { "epoch": 6.12096, "grad_norm": 0.8022623062133789, "learning_rate": 4.045218087234894e-05, "loss": 0.6358, "step": 4782 }, { "epoch": 6.12224, "grad_norm": 0.7484129667282104, "learning_rate": 4.0450180072028815e-05, "loss": 0.5627, "step": 4783 }, { "epoch": 6.12352, "grad_norm": 0.7918726205825806, "learning_rate": 4.044817927170869e-05, "loss": 0.6377, "step": 4784 }, { "epoch": 6.1248, "grad_norm": 0.7575948238372803, "learning_rate": 4.044617847138856e-05, "loss": 0.589, "step": 4785 }, { "epoch": 6.12608, "grad_norm": 0.7408061623573303, "learning_rate": 4.044417767106843e-05, "loss": 0.6352, "step": 4786 }, { "epoch": 6.12736, "grad_norm": 0.7478961944580078, "learning_rate": 4.04421768707483e-05, "loss": 0.5852, "step": 4787 }, { "epoch": 6.12864, "grad_norm": 0.7341578006744385, "learning_rate": 4.044017607042817e-05, "loss": 0.6114, "step": 4788 }, { "epoch": 6.12992, "grad_norm": 0.7750809192657471, "learning_rate": 4.0438175270108046e-05, "loss": 0.6121, "step": 4789 }, { "epoch": 6.1312, "grad_norm": 0.7869114875793457, "learning_rate": 4.043617446978792e-05, "loss": 0.6637, "step": 4790 }, { "epoch": 6.13248, "grad_norm": 0.7512006163597107, "learning_rate": 4.043417366946779e-05, "loss": 0.5679, "step": 4791 }, { "epoch": 6.13376, "grad_norm": 0.7822096347808838, "learning_rate": 4.043217286914766e-05, "loss": 0.6191, "step": 4792 }, { "epoch": 6.13504, "grad_norm": 0.7540210485458374, "learning_rate": 4.0430172068827534e-05, "loss": 0.6011, "step": 4793 }, { "epoch": 6.1363199999999996, "grad_norm": 0.7164133191108704, "learning_rate": 4.0428171268507406e-05, "loss": 0.5114, "step": 4794 }, { "epoch": 6.1376, "grad_norm": 0.8122145533561707, "learning_rate": 4.042617046818728e-05, "loss": 0.6451, "step": 4795 }, { "epoch": 6.13888, "grad_norm": 0.8163143396377563, "learning_rate": 4.042416966786715e-05, "loss": 0.6086, "step": 4796 }, { "epoch": 6.14016, "grad_norm": 0.7786663770675659, "learning_rate": 4.042216886754702e-05, "loss": 0.6447, "step": 4797 }, { "epoch": 6.14144, "grad_norm": 0.7091884016990662, "learning_rate": 4.042016806722689e-05, "loss": 0.5588, "step": 4798 }, { "epoch": 6.14272, "grad_norm": 0.7768360376358032, "learning_rate": 4.0418167266906765e-05, "loss": 0.6151, "step": 4799 }, { "epoch": 6.144, "grad_norm": 0.7751429677009583, "learning_rate": 4.041616646658664e-05, "loss": 0.6076, "step": 4800 }, { "epoch": 6.14528, "grad_norm": 0.7756529450416565, "learning_rate": 4.041416566626651e-05, "loss": 0.6237, "step": 4801 }, { "epoch": 6.14656, "grad_norm": 0.7572968006134033, "learning_rate": 4.041216486594638e-05, "loss": 0.6407, "step": 4802 }, { "epoch": 6.14784, "grad_norm": 0.7175725698471069, "learning_rate": 4.041016406562625e-05, "loss": 0.588, "step": 4803 }, { "epoch": 6.14912, "grad_norm": 0.7753421068191528, "learning_rate": 4.0408163265306124e-05, "loss": 0.628, "step": 4804 }, { "epoch": 6.1504, "grad_norm": 0.777458131313324, "learning_rate": 4.0406162464985996e-05, "loss": 0.5963, "step": 4805 }, { "epoch": 6.15168, "grad_norm": 0.7486628890037537, "learning_rate": 4.040416166466587e-05, "loss": 0.5623, "step": 4806 }, { "epoch": 6.15296, "grad_norm": 0.7569031715393066, "learning_rate": 4.040216086434574e-05, "loss": 0.6062, "step": 4807 }, { "epoch": 6.15424, "grad_norm": 0.7664810419082642, "learning_rate": 4.040016006402561e-05, "loss": 0.6069, "step": 4808 }, { "epoch": 6.15552, "grad_norm": 0.7901382446289062, "learning_rate": 4.0398159263705483e-05, "loss": 0.6281, "step": 4809 }, { "epoch": 6.1568, "grad_norm": 0.7850843667984009, "learning_rate": 4.0396158463385355e-05, "loss": 0.6222, "step": 4810 }, { "epoch": 6.15808, "grad_norm": 0.77830570936203, "learning_rate": 4.039415766306523e-05, "loss": 0.6369, "step": 4811 }, { "epoch": 6.15936, "grad_norm": 0.7659288048744202, "learning_rate": 4.03921568627451e-05, "loss": 0.6099, "step": 4812 }, { "epoch": 6.16064, "grad_norm": 0.7957006096839905, "learning_rate": 4.039015606242497e-05, "loss": 0.6153, "step": 4813 }, { "epoch": 6.16192, "grad_norm": 0.7597688436508179, "learning_rate": 4.038815526210484e-05, "loss": 0.632, "step": 4814 }, { "epoch": 6.1632, "grad_norm": 0.7524228692054749, "learning_rate": 4.0386154461784715e-05, "loss": 0.5995, "step": 4815 }, { "epoch": 6.16448, "grad_norm": 0.7459036111831665, "learning_rate": 4.0384153661464586e-05, "loss": 0.6228, "step": 4816 }, { "epoch": 6.16576, "grad_norm": 0.7470423579216003, "learning_rate": 4.0382152861144465e-05, "loss": 0.6378, "step": 4817 }, { "epoch": 6.16704, "grad_norm": 0.7385801076889038, "learning_rate": 4.038015206082433e-05, "loss": 0.6017, "step": 4818 }, { "epoch": 6.16832, "grad_norm": 0.7643359303474426, "learning_rate": 4.03781512605042e-05, "loss": 0.6293, "step": 4819 }, { "epoch": 6.1696, "grad_norm": 0.7111765146255493, "learning_rate": 4.0376150460184074e-05, "loss": 0.582, "step": 4820 }, { "epoch": 6.17088, "grad_norm": 0.780404806137085, "learning_rate": 4.0374149659863946e-05, "loss": 0.6365, "step": 4821 }, { "epoch": 6.17216, "grad_norm": 0.7903060913085938, "learning_rate": 4.037214885954382e-05, "loss": 0.7153, "step": 4822 }, { "epoch": 6.17344, "grad_norm": 0.7531589865684509, "learning_rate": 4.037014805922369e-05, "loss": 0.5813, "step": 4823 }, { "epoch": 6.17472, "grad_norm": 0.7753871083259583, "learning_rate": 4.036814725890357e-05, "loss": 0.6235, "step": 4824 }, { "epoch": 6.176, "grad_norm": 0.7611346244812012, "learning_rate": 4.036614645858344e-05, "loss": 0.6133, "step": 4825 }, { "epoch": 6.17728, "grad_norm": 0.7633965611457825, "learning_rate": 4.0364145658263305e-05, "loss": 0.591, "step": 4826 }, { "epoch": 6.17856, "grad_norm": 0.8028787970542908, "learning_rate": 4.036214485794318e-05, "loss": 0.6118, "step": 4827 }, { "epoch": 6.17984, "grad_norm": 0.7600660920143127, "learning_rate": 4.036014405762305e-05, "loss": 0.6283, "step": 4828 }, { "epoch": 6.18112, "grad_norm": 0.754428505897522, "learning_rate": 4.035814325730292e-05, "loss": 0.6278, "step": 4829 }, { "epoch": 6.1824, "grad_norm": 0.7525807619094849, "learning_rate": 4.035614245698279e-05, "loss": 0.5935, "step": 4830 }, { "epoch": 6.18368, "grad_norm": 0.7541505694389343, "learning_rate": 4.035414165666267e-05, "loss": 0.6048, "step": 4831 }, { "epoch": 6.18496, "grad_norm": 0.7696326375007629, "learning_rate": 4.035214085634254e-05, "loss": 0.6422, "step": 4832 }, { "epoch": 6.18624, "grad_norm": 0.7631934881210327, "learning_rate": 4.0350140056022415e-05, "loss": 0.6121, "step": 4833 }, { "epoch": 6.18752, "grad_norm": 0.7651000022888184, "learning_rate": 4.034813925570228e-05, "loss": 0.5951, "step": 4834 }, { "epoch": 6.1888, "grad_norm": 0.7359318137168884, "learning_rate": 4.034613845538215e-05, "loss": 0.5828, "step": 4835 }, { "epoch": 6.19008, "grad_norm": 0.7644410729408264, "learning_rate": 4.0344137655062024e-05, "loss": 0.6218, "step": 4836 }, { "epoch": 6.19136, "grad_norm": 0.7283311486244202, "learning_rate": 4.0342136854741895e-05, "loss": 0.5594, "step": 4837 }, { "epoch": 6.19264, "grad_norm": 0.7427007555961609, "learning_rate": 4.0340136054421774e-05, "loss": 0.5902, "step": 4838 }, { "epoch": 6.19392, "grad_norm": 0.7666735649108887, "learning_rate": 4.0338135254101646e-05, "loss": 0.6229, "step": 4839 }, { "epoch": 6.1952, "grad_norm": 0.7470300197601318, "learning_rate": 4.033613445378152e-05, "loss": 0.6134, "step": 4840 }, { "epoch": 6.19648, "grad_norm": 0.7347720265388489, "learning_rate": 4.033413365346139e-05, "loss": 0.5755, "step": 4841 }, { "epoch": 6.19776, "grad_norm": 0.7179660797119141, "learning_rate": 4.0332132853141255e-05, "loss": 0.54, "step": 4842 }, { "epoch": 6.19904, "grad_norm": 0.7514517903327942, "learning_rate": 4.0330132052821127e-05, "loss": 0.6363, "step": 4843 }, { "epoch": 6.20032, "grad_norm": 0.7676318883895874, "learning_rate": 4.0328131252501e-05, "loss": 0.6533, "step": 4844 }, { "epoch": 6.2016, "grad_norm": 0.830500602722168, "learning_rate": 4.032613045218088e-05, "loss": 0.7654, "step": 4845 }, { "epoch": 6.20288, "grad_norm": 0.7603785395622253, "learning_rate": 4.032412965186075e-05, "loss": 0.5547, "step": 4846 }, { "epoch": 6.20416, "grad_norm": 0.7514224648475647, "learning_rate": 4.032212885154062e-05, "loss": 0.5948, "step": 4847 }, { "epoch": 6.20544, "grad_norm": 0.757417619228363, "learning_rate": 4.032012805122049e-05, "loss": 0.5691, "step": 4848 }, { "epoch": 6.20672, "grad_norm": 0.719901978969574, "learning_rate": 4.0318127250900364e-05, "loss": 0.6315, "step": 4849 }, { "epoch": 6.208, "grad_norm": 0.7711414098739624, "learning_rate": 4.031612645058023e-05, "loss": 0.5801, "step": 4850 }, { "epoch": 6.20928, "grad_norm": 0.7761551737785339, "learning_rate": 4.03141256502601e-05, "loss": 0.5992, "step": 4851 }, { "epoch": 6.21056, "grad_norm": 0.7462658286094666, "learning_rate": 4.031212484993998e-05, "loss": 0.6378, "step": 4852 }, { "epoch": 6.21184, "grad_norm": 0.7466690540313721, "learning_rate": 4.031012404961985e-05, "loss": 0.6151, "step": 4853 }, { "epoch": 6.21312, "grad_norm": 0.8062450885772705, "learning_rate": 4.0308123249299724e-05, "loss": 0.6893, "step": 4854 }, { "epoch": 6.2144, "grad_norm": 0.8179064989089966, "learning_rate": 4.0306122448979596e-05, "loss": 0.6184, "step": 4855 }, { "epoch": 6.21568, "grad_norm": 0.7624944448471069, "learning_rate": 4.030412164865947e-05, "loss": 0.6016, "step": 4856 }, { "epoch": 6.21696, "grad_norm": 0.7305824756622314, "learning_rate": 4.030212084833934e-05, "loss": 0.6336, "step": 4857 }, { "epoch": 6.21824, "grad_norm": 0.7892305850982666, "learning_rate": 4.0300120048019204e-05, "loss": 0.6435, "step": 4858 }, { "epoch": 6.21952, "grad_norm": 0.7659081220626831, "learning_rate": 4.029811924769908e-05, "loss": 0.6336, "step": 4859 }, { "epoch": 6.2208, "grad_norm": 0.729543149471283, "learning_rate": 4.0296118447378955e-05, "loss": 0.6063, "step": 4860 }, { "epoch": 6.22208, "grad_norm": 0.7288227677345276, "learning_rate": 4.029411764705883e-05, "loss": 0.5951, "step": 4861 }, { "epoch": 6.22336, "grad_norm": 0.7727043032646179, "learning_rate": 4.02921168467387e-05, "loss": 0.6412, "step": 4862 }, { "epoch": 6.22464, "grad_norm": 0.7801682949066162, "learning_rate": 4.029011604641857e-05, "loss": 0.6209, "step": 4863 }, { "epoch": 6.22592, "grad_norm": 0.7641760110855103, "learning_rate": 4.028811524609844e-05, "loss": 0.6342, "step": 4864 }, { "epoch": 6.2272, "grad_norm": 0.7689356803894043, "learning_rate": 4.0286114445778314e-05, "loss": 0.6232, "step": 4865 }, { "epoch": 6.22848, "grad_norm": 0.7414539456367493, "learning_rate": 4.028411364545818e-05, "loss": 0.6114, "step": 4866 }, { "epoch": 6.22976, "grad_norm": 0.7376709580421448, "learning_rate": 4.028211284513806e-05, "loss": 0.5968, "step": 4867 }, { "epoch": 6.23104, "grad_norm": 0.785620391368866, "learning_rate": 4.028011204481793e-05, "loss": 0.6389, "step": 4868 }, { "epoch": 6.23232, "grad_norm": 0.768951416015625, "learning_rate": 4.02781112444978e-05, "loss": 0.5874, "step": 4869 }, { "epoch": 6.2336, "grad_norm": 0.7968980669975281, "learning_rate": 4.0276110444177673e-05, "loss": 0.6834, "step": 4870 }, { "epoch": 6.23488, "grad_norm": 0.769282341003418, "learning_rate": 4.0274109643857545e-05, "loss": 0.6257, "step": 4871 }, { "epoch": 6.23616, "grad_norm": 0.7746549844741821, "learning_rate": 4.027210884353742e-05, "loss": 0.6179, "step": 4872 }, { "epoch": 6.23744, "grad_norm": 0.7487301230430603, "learning_rate": 4.027010804321729e-05, "loss": 0.6463, "step": 4873 }, { "epoch": 6.23872, "grad_norm": 0.7296451330184937, "learning_rate": 4.026810724289716e-05, "loss": 0.6646, "step": 4874 }, { "epoch": 6.24, "grad_norm": 0.7273462414741516, "learning_rate": 4.026610644257703e-05, "loss": 0.5974, "step": 4875 }, { "epoch": 6.24128, "grad_norm": 0.7543179392814636, "learning_rate": 4.0264105642256905e-05, "loss": 0.6767, "step": 4876 }, { "epoch": 6.24256, "grad_norm": 0.7834622859954834, "learning_rate": 4.0262104841936776e-05, "loss": 0.6277, "step": 4877 }, { "epoch": 6.24384, "grad_norm": 0.7536554932594299, "learning_rate": 4.026010404161665e-05, "loss": 0.5714, "step": 4878 }, { "epoch": 6.24512, "grad_norm": 0.7422727942466736, "learning_rate": 4.025810324129652e-05, "loss": 0.5911, "step": 4879 }, { "epoch": 6.2464, "grad_norm": 0.7820923924446106, "learning_rate": 4.025610244097639e-05, "loss": 0.6481, "step": 4880 }, { "epoch": 6.24768, "grad_norm": 0.6983485221862793, "learning_rate": 4.0254101640656264e-05, "loss": 0.5551, "step": 4881 }, { "epoch": 6.24896, "grad_norm": 0.7593510746955872, "learning_rate": 4.0252100840336136e-05, "loss": 0.6049, "step": 4882 }, { "epoch": 6.25024, "grad_norm": 0.803278386592865, "learning_rate": 4.025010004001601e-05, "loss": 0.6087, "step": 4883 }, { "epoch": 6.25152, "grad_norm": 0.7537515163421631, "learning_rate": 4.024809923969588e-05, "loss": 0.5856, "step": 4884 }, { "epoch": 6.2528, "grad_norm": 0.7161216735839844, "learning_rate": 4.024609843937575e-05, "loss": 0.5604, "step": 4885 }, { "epoch": 6.25408, "grad_norm": 0.7833333015441895, "learning_rate": 4.024409763905562e-05, "loss": 0.6248, "step": 4886 }, { "epoch": 6.25536, "grad_norm": 0.7813183069229126, "learning_rate": 4.0242096838735495e-05, "loss": 0.6205, "step": 4887 }, { "epoch": 6.25664, "grad_norm": 0.8205659985542297, "learning_rate": 4.024009603841537e-05, "loss": 0.582, "step": 4888 }, { "epoch": 6.25792, "grad_norm": 0.7965737581253052, "learning_rate": 4.023809523809524e-05, "loss": 0.6541, "step": 4889 }, { "epoch": 6.2592, "grad_norm": 0.7237341403961182, "learning_rate": 4.023609443777511e-05, "loss": 0.6384, "step": 4890 }, { "epoch": 6.26048, "grad_norm": 0.7347778081893921, "learning_rate": 4.023409363745498e-05, "loss": 0.6078, "step": 4891 }, { "epoch": 6.26176, "grad_norm": 0.7442294359207153, "learning_rate": 4.0232092837134854e-05, "loss": 0.6534, "step": 4892 }, { "epoch": 6.26304, "grad_norm": 0.7912835478782654, "learning_rate": 4.0230092036814726e-05, "loss": 0.5893, "step": 4893 }, { "epoch": 6.26432, "grad_norm": 0.7610131502151489, "learning_rate": 4.02280912364946e-05, "loss": 0.6303, "step": 4894 }, { "epoch": 6.2656, "grad_norm": 0.7433664798736572, "learning_rate": 4.022609043617448e-05, "loss": 0.6046, "step": 4895 }, { "epoch": 6.2668800000000005, "grad_norm": 0.7865737080574036, "learning_rate": 4.022408963585434e-05, "loss": 0.6401, "step": 4896 }, { "epoch": 6.26816, "grad_norm": 0.7448906898498535, "learning_rate": 4.0222088835534214e-05, "loss": 0.6217, "step": 4897 }, { "epoch": 6.26944, "grad_norm": 0.7785893678665161, "learning_rate": 4.0220088035214085e-05, "loss": 0.6827, "step": 4898 }, { "epoch": 6.27072, "grad_norm": 0.7872070670127869, "learning_rate": 4.021808723489396e-05, "loss": 0.6266, "step": 4899 }, { "epoch": 6.272, "grad_norm": 0.7371118068695068, "learning_rate": 4.021608643457383e-05, "loss": 0.6029, "step": 4900 }, { "epoch": 6.27328, "grad_norm": 0.7747698426246643, "learning_rate": 4.02140856342537e-05, "loss": 0.6047, "step": 4901 }, { "epoch": 6.27456, "grad_norm": 0.7733140587806702, "learning_rate": 4.021208483393358e-05, "loss": 0.6443, "step": 4902 }, { "epoch": 6.27584, "grad_norm": 0.7372586727142334, "learning_rate": 4.021008403361345e-05, "loss": 0.6, "step": 4903 }, { "epoch": 6.27712, "grad_norm": 0.807420015335083, "learning_rate": 4.020808323329332e-05, "loss": 0.6279, "step": 4904 }, { "epoch": 6.2783999999999995, "grad_norm": 0.7645445466041565, "learning_rate": 4.020608243297319e-05, "loss": 0.6336, "step": 4905 }, { "epoch": 6.27968, "grad_norm": 0.7852516770362854, "learning_rate": 4.020408163265306e-05, "loss": 0.6219, "step": 4906 }, { "epoch": 6.28096, "grad_norm": 0.7741815447807312, "learning_rate": 4.020208083233293e-05, "loss": 0.6526, "step": 4907 }, { "epoch": 6.28224, "grad_norm": 0.7737951278686523, "learning_rate": 4.0200080032012804e-05, "loss": 0.6533, "step": 4908 }, { "epoch": 6.28352, "grad_norm": 0.7810575366020203, "learning_rate": 4.019807923169268e-05, "loss": 0.6677, "step": 4909 }, { "epoch": 6.2848, "grad_norm": 0.7242299914360046, "learning_rate": 4.0196078431372555e-05, "loss": 0.5908, "step": 4910 }, { "epoch": 6.28608, "grad_norm": 0.7727022767066956, "learning_rate": 4.0194077631052426e-05, "loss": 0.592, "step": 4911 }, { "epoch": 6.28736, "grad_norm": 0.7893999814987183, "learning_rate": 4.019207683073229e-05, "loss": 0.6288, "step": 4912 }, { "epoch": 6.28864, "grad_norm": 0.7878124713897705, "learning_rate": 4.019007603041216e-05, "loss": 0.6547, "step": 4913 }, { "epoch": 6.28992, "grad_norm": 0.7988905906677246, "learning_rate": 4.0188075230092035e-05, "loss": 0.6392, "step": 4914 }, { "epoch": 6.2912, "grad_norm": 0.7619364261627197, "learning_rate": 4.018607442977191e-05, "loss": 0.6155, "step": 4915 }, { "epoch": 6.29248, "grad_norm": 0.8226683735847473, "learning_rate": 4.0184073629451786e-05, "loss": 0.6236, "step": 4916 }, { "epoch": 6.29376, "grad_norm": 0.77449631690979, "learning_rate": 4.018207282913166e-05, "loss": 0.5779, "step": 4917 }, { "epoch": 6.29504, "grad_norm": 0.7637079358100891, "learning_rate": 4.018007202881153e-05, "loss": 0.5976, "step": 4918 }, { "epoch": 6.29632, "grad_norm": 0.7683913707733154, "learning_rate": 4.01780712284914e-05, "loss": 0.6022, "step": 4919 }, { "epoch": 6.2976, "grad_norm": 0.7907748818397522, "learning_rate": 4.0176070428171266e-05, "loss": 0.5968, "step": 4920 }, { "epoch": 6.29888, "grad_norm": 0.7320314049720764, "learning_rate": 4.017406962785114e-05, "loss": 0.5732, "step": 4921 }, { "epoch": 6.30016, "grad_norm": 0.8165443539619446, "learning_rate": 4.017206882753101e-05, "loss": 0.6384, "step": 4922 }, { "epoch": 6.30144, "grad_norm": 0.8002688884735107, "learning_rate": 4.017006802721089e-05, "loss": 0.5705, "step": 4923 }, { "epoch": 6.30272, "grad_norm": 0.7913237810134888, "learning_rate": 4.016806722689076e-05, "loss": 0.6072, "step": 4924 }, { "epoch": 6.304, "grad_norm": 0.7377355098724365, "learning_rate": 4.016606642657063e-05, "loss": 0.6112, "step": 4925 }, { "epoch": 6.30528, "grad_norm": 0.680272102355957, "learning_rate": 4.0164065626250504e-05, "loss": 0.5723, "step": 4926 }, { "epoch": 6.30656, "grad_norm": 0.68758225440979, "learning_rate": 4.0162064825930376e-05, "loss": 0.5122, "step": 4927 }, { "epoch": 6.30784, "grad_norm": 0.8090822100639343, "learning_rate": 4.016006402561024e-05, "loss": 0.6358, "step": 4928 }, { "epoch": 6.30912, "grad_norm": 0.7640751600265503, "learning_rate": 4.015806322529011e-05, "loss": 0.6516, "step": 4929 }, { "epoch": 6.3104, "grad_norm": 0.7689977288246155, "learning_rate": 4.015606242496999e-05, "loss": 0.602, "step": 4930 }, { "epoch": 6.31168, "grad_norm": 0.7796737551689148, "learning_rate": 4.0154061624649864e-05, "loss": 0.5984, "step": 4931 }, { "epoch": 6.31296, "grad_norm": 0.7571960687637329, "learning_rate": 4.0152060824329735e-05, "loss": 0.6333, "step": 4932 }, { "epoch": 6.31424, "grad_norm": 0.773962676525116, "learning_rate": 4.015006002400961e-05, "loss": 0.5782, "step": 4933 }, { "epoch": 6.31552, "grad_norm": 0.8556650280952454, "learning_rate": 4.014805922368948e-05, "loss": 0.692, "step": 4934 }, { "epoch": 6.3168, "grad_norm": 0.7910232543945312, "learning_rate": 4.014605842336935e-05, "loss": 0.6353, "step": 4935 }, { "epoch": 6.31808, "grad_norm": 0.7409533858299255, "learning_rate": 4.0144057623049216e-05, "loss": 0.5861, "step": 4936 }, { "epoch": 6.31936, "grad_norm": 0.7902359366416931, "learning_rate": 4.0142056822729095e-05, "loss": 0.6325, "step": 4937 }, { "epoch": 6.32064, "grad_norm": 0.7420294880867004, "learning_rate": 4.0140056022408967e-05, "loss": 0.5833, "step": 4938 }, { "epoch": 6.32192, "grad_norm": 0.7837445139884949, "learning_rate": 4.013805522208884e-05, "loss": 0.6377, "step": 4939 }, { "epoch": 6.3232, "grad_norm": 0.7776581645011902, "learning_rate": 4.013605442176871e-05, "loss": 0.6328, "step": 4940 }, { "epoch": 6.32448, "grad_norm": 0.7739065885543823, "learning_rate": 4.013405362144858e-05, "loss": 0.5716, "step": 4941 }, { "epoch": 6.32576, "grad_norm": 0.7609522342681885, "learning_rate": 4.0132052821128454e-05, "loss": 0.5454, "step": 4942 }, { "epoch": 6.32704, "grad_norm": 0.7803019285202026, "learning_rate": 4.0130052020808326e-05, "loss": 0.6351, "step": 4943 }, { "epoch": 6.32832, "grad_norm": 0.7924274206161499, "learning_rate": 4.01280512204882e-05, "loss": 0.6234, "step": 4944 }, { "epoch": 6.3296, "grad_norm": 0.7934367060661316, "learning_rate": 4.012605042016807e-05, "loss": 0.632, "step": 4945 }, { "epoch": 6.33088, "grad_norm": 0.7212319374084473, "learning_rate": 4.012404961984794e-05, "loss": 0.6216, "step": 4946 }, { "epoch": 6.33216, "grad_norm": 0.7968099117279053, "learning_rate": 4.012204881952781e-05, "loss": 0.6441, "step": 4947 }, { "epoch": 6.33344, "grad_norm": 0.7849327921867371, "learning_rate": 4.0120048019207685e-05, "loss": 0.6567, "step": 4948 }, { "epoch": 6.33472, "grad_norm": 0.7716954350471497, "learning_rate": 4.011804721888756e-05, "loss": 0.6313, "step": 4949 }, { "epoch": 6.336, "grad_norm": 0.7645626664161682, "learning_rate": 4.011604641856743e-05, "loss": 0.6403, "step": 4950 }, { "epoch": 6.33728, "grad_norm": 0.7575722932815552, "learning_rate": 4.01140456182473e-05, "loss": 0.5777, "step": 4951 }, { "epoch": 6.33856, "grad_norm": 0.7596458196640015, "learning_rate": 4.011204481792717e-05, "loss": 0.6296, "step": 4952 }, { "epoch": 6.33984, "grad_norm": 0.7410492897033691, "learning_rate": 4.0110044017607044e-05, "loss": 0.5836, "step": 4953 }, { "epoch": 6.34112, "grad_norm": 0.8087933659553528, "learning_rate": 4.0108043217286916e-05, "loss": 0.6035, "step": 4954 }, { "epoch": 6.3424, "grad_norm": 0.7586066126823425, "learning_rate": 4.010604241696679e-05, "loss": 0.6121, "step": 4955 }, { "epoch": 6.34368, "grad_norm": 0.7876436114311218, "learning_rate": 4.010404161664666e-05, "loss": 0.6206, "step": 4956 }, { "epoch": 6.34496, "grad_norm": 0.7594673037528992, "learning_rate": 4.010204081632653e-05, "loss": 0.6085, "step": 4957 }, { "epoch": 6.34624, "grad_norm": 0.7009496092796326, "learning_rate": 4.010004001600641e-05, "loss": 0.5725, "step": 4958 }, { "epoch": 6.34752, "grad_norm": 0.7607222199440002, "learning_rate": 4.0098039215686276e-05, "loss": 0.6079, "step": 4959 }, { "epoch": 6.3488, "grad_norm": 0.8146457076072693, "learning_rate": 4.009603841536615e-05, "loss": 0.6636, "step": 4960 }, { "epoch": 6.35008, "grad_norm": 0.757946252822876, "learning_rate": 4.009403761504602e-05, "loss": 0.6275, "step": 4961 }, { "epoch": 6.35136, "grad_norm": 0.7005823850631714, "learning_rate": 4.009203681472589e-05, "loss": 0.547, "step": 4962 }, { "epoch": 6.35264, "grad_norm": 0.7344463467597961, "learning_rate": 4.009003601440576e-05, "loss": 0.6197, "step": 4963 }, { "epoch": 6.3539200000000005, "grad_norm": 0.7441943883895874, "learning_rate": 4.0088035214085635e-05, "loss": 0.6031, "step": 4964 }, { "epoch": 6.3552, "grad_norm": 0.7756435871124268, "learning_rate": 4.0086034413765513e-05, "loss": 0.6007, "step": 4965 }, { "epoch": 6.35648, "grad_norm": 0.771628737449646, "learning_rate": 4.0084033613445385e-05, "loss": 0.6, "step": 4966 }, { "epoch": 6.35776, "grad_norm": 0.7456242442131042, "learning_rate": 4.008203281312525e-05, "loss": 0.5856, "step": 4967 }, { "epoch": 6.35904, "grad_norm": 0.7749770879745483, "learning_rate": 4.008003201280512e-05, "loss": 0.6364, "step": 4968 }, { "epoch": 6.36032, "grad_norm": 0.7612026929855347, "learning_rate": 4.0078031212484994e-05, "loss": 0.5996, "step": 4969 }, { "epoch": 6.3616, "grad_norm": 0.7761407494544983, "learning_rate": 4.0076030412164866e-05, "loss": 0.6143, "step": 4970 }, { "epoch": 6.36288, "grad_norm": 0.7732192873954773, "learning_rate": 4.007402961184474e-05, "loss": 0.6296, "step": 4971 }, { "epoch": 6.36416, "grad_norm": 0.7865023016929626, "learning_rate": 4.0072028811524616e-05, "loss": 0.6159, "step": 4972 }, { "epoch": 6.3654399999999995, "grad_norm": 0.7385578155517578, "learning_rate": 4.007002801120449e-05, "loss": 0.6024, "step": 4973 }, { "epoch": 6.36672, "grad_norm": 0.7962034344673157, "learning_rate": 4.006802721088436e-05, "loss": 0.6887, "step": 4974 }, { "epoch": 6.368, "grad_norm": 0.7819473147392273, "learning_rate": 4.0066026410564225e-05, "loss": 0.6156, "step": 4975 }, { "epoch": 6.36928, "grad_norm": 0.7631332278251648, "learning_rate": 4.00640256102441e-05, "loss": 0.6126, "step": 4976 }, { "epoch": 6.37056, "grad_norm": 0.7788332104682922, "learning_rate": 4.006202480992397e-05, "loss": 0.6718, "step": 4977 }, { "epoch": 6.37184, "grad_norm": 0.7646488547325134, "learning_rate": 4.006002400960384e-05, "loss": 0.6265, "step": 4978 }, { "epoch": 6.37312, "grad_norm": 0.7828549742698669, "learning_rate": 4.005802320928371e-05, "loss": 0.6718, "step": 4979 }, { "epoch": 6.3744, "grad_norm": 0.7134932279586792, "learning_rate": 4.005602240896359e-05, "loss": 0.5275, "step": 4980 }, { "epoch": 6.37568, "grad_norm": 0.7558833360671997, "learning_rate": 4.005402160864346e-05, "loss": 0.6531, "step": 4981 }, { "epoch": 6.37696, "grad_norm": 0.7762933373451233, "learning_rate": 4.0052020808323335e-05, "loss": 0.5951, "step": 4982 }, { "epoch": 6.37824, "grad_norm": 0.7445576786994934, "learning_rate": 4.00500200080032e-05, "loss": 0.5966, "step": 4983 }, { "epoch": 6.37952, "grad_norm": 0.7995836734771729, "learning_rate": 4.004801920768307e-05, "loss": 0.6623, "step": 4984 }, { "epoch": 6.3808, "grad_norm": 0.7452435493469238, "learning_rate": 4.0046018407362944e-05, "loss": 0.5924, "step": 4985 }, { "epoch": 6.38208, "grad_norm": 0.8057966828346252, "learning_rate": 4.0044017607042816e-05, "loss": 0.6282, "step": 4986 }, { "epoch": 6.38336, "grad_norm": 0.8227620124816895, "learning_rate": 4.0042016806722694e-05, "loss": 0.6227, "step": 4987 }, { "epoch": 6.38464, "grad_norm": 0.8170056343078613, "learning_rate": 4.0040016006402566e-05, "loss": 0.6289, "step": 4988 }, { "epoch": 6.38592, "grad_norm": 0.7389938831329346, "learning_rate": 4.003801520608244e-05, "loss": 0.639, "step": 4989 }, { "epoch": 6.3872, "grad_norm": 0.7633137106895447, "learning_rate": 4.003601440576231e-05, "loss": 0.6226, "step": 4990 }, { "epoch": 6.38848, "grad_norm": 0.7744930982589722, "learning_rate": 4.0034013605442175e-05, "loss": 0.6296, "step": 4991 }, { "epoch": 6.38976, "grad_norm": 0.7420474886894226, "learning_rate": 4.003201280512205e-05, "loss": 0.6408, "step": 4992 }, { "epoch": 6.39104, "grad_norm": 0.7508412003517151, "learning_rate": 4.003001200480192e-05, "loss": 0.5994, "step": 4993 }, { "epoch": 6.39232, "grad_norm": 0.783890962600708, "learning_rate": 4.00280112044818e-05, "loss": 0.6633, "step": 4994 }, { "epoch": 6.3936, "grad_norm": 0.7705927491188049, "learning_rate": 4.002601040416167e-05, "loss": 0.6623, "step": 4995 }, { "epoch": 6.39488, "grad_norm": 0.7970103621482849, "learning_rate": 4.002400960384154e-05, "loss": 0.6653, "step": 4996 }, { "epoch": 6.39616, "grad_norm": 0.8254691958427429, "learning_rate": 4.002200880352141e-05, "loss": 0.6138, "step": 4997 }, { "epoch": 6.39744, "grad_norm": 0.7935686707496643, "learning_rate": 4.0020008003201285e-05, "loss": 0.6288, "step": 4998 }, { "epoch": 6.39872, "grad_norm": 0.7537709474563599, "learning_rate": 4.001800720288115e-05, "loss": 0.5739, "step": 4999 }, { "epoch": 6.4, "grad_norm": 0.7888927459716797, "learning_rate": 4.001600640256102e-05, "loss": 0.6501, "step": 5000 }, { "epoch": 6.40128, "grad_norm": 0.7110080718994141, "learning_rate": 4.00140056022409e-05, "loss": 0.5607, "step": 5001 }, { "epoch": 6.40256, "grad_norm": 0.7363867163658142, "learning_rate": 4.001200480192077e-05, "loss": 0.6134, "step": 5002 }, { "epoch": 6.40384, "grad_norm": 0.7256652116775513, "learning_rate": 4.0010004001600644e-05, "loss": 0.5868, "step": 5003 }, { "epoch": 6.40512, "grad_norm": 0.7670110464096069, "learning_rate": 4.0008003201280516e-05, "loss": 0.5823, "step": 5004 }, { "epoch": 6.4064, "grad_norm": 0.778918445110321, "learning_rate": 4.000600240096039e-05, "loss": 0.6436, "step": 5005 }, { "epoch": 6.40768, "grad_norm": 0.7200184464454651, "learning_rate": 4.000400160064026e-05, "loss": 0.5635, "step": 5006 }, { "epoch": 6.40896, "grad_norm": 0.7693450450897217, "learning_rate": 4.0002000800320125e-05, "loss": 0.6088, "step": 5007 }, { "epoch": 6.41024, "grad_norm": 0.7648208737373352, "learning_rate": 4e-05, "loss": 0.6234, "step": 5008 }, { "epoch": 6.41152, "grad_norm": 0.784186840057373, "learning_rate": 3.9997999199679875e-05, "loss": 0.6424, "step": 5009 }, { "epoch": 6.4128, "grad_norm": 0.7577515244483948, "learning_rate": 3.999599839935975e-05, "loss": 0.6087, "step": 5010 }, { "epoch": 6.41408, "grad_norm": 0.7444120645523071, "learning_rate": 3.999399759903962e-05, "loss": 0.5809, "step": 5011 }, { "epoch": 6.41536, "grad_norm": 0.7742969393730164, "learning_rate": 3.999199679871949e-05, "loss": 0.6226, "step": 5012 }, { "epoch": 6.41664, "grad_norm": 0.7459928393363953, "learning_rate": 3.998999599839936e-05, "loss": 0.5572, "step": 5013 }, { "epoch": 6.41792, "grad_norm": 0.7630308270454407, "learning_rate": 3.9987995198079234e-05, "loss": 0.6129, "step": 5014 }, { "epoch": 6.4192, "grad_norm": 0.7499691843986511, "learning_rate": 3.9985994397759106e-05, "loss": 0.5652, "step": 5015 }, { "epoch": 6.42048, "grad_norm": 0.7551511526107788, "learning_rate": 3.998399359743898e-05, "loss": 0.6122, "step": 5016 }, { "epoch": 6.42176, "grad_norm": 0.8000392913818359, "learning_rate": 3.998199279711885e-05, "loss": 0.6467, "step": 5017 }, { "epoch": 6.42304, "grad_norm": 0.7956501245498657, "learning_rate": 3.997999199679872e-05, "loss": 0.6459, "step": 5018 }, { "epoch": 6.42432, "grad_norm": 0.7600113153457642, "learning_rate": 3.9977991196478594e-05, "loss": 0.6714, "step": 5019 }, { "epoch": 6.4256, "grad_norm": 0.7289563417434692, "learning_rate": 3.9975990396158466e-05, "loss": 0.5878, "step": 5020 }, { "epoch": 6.42688, "grad_norm": 0.7440637350082397, "learning_rate": 3.997398959583834e-05, "loss": 0.6087, "step": 5021 }, { "epoch": 6.42816, "grad_norm": 0.8092576265335083, "learning_rate": 3.997198879551821e-05, "loss": 0.6633, "step": 5022 }, { "epoch": 6.42944, "grad_norm": 0.7994837164878845, "learning_rate": 3.996998799519808e-05, "loss": 0.5933, "step": 5023 }, { "epoch": 6.43072, "grad_norm": 0.7767707109451294, "learning_rate": 3.996798719487795e-05, "loss": 0.653, "step": 5024 }, { "epoch": 6.432, "grad_norm": 0.780889093875885, "learning_rate": 3.9965986394557825e-05, "loss": 0.7127, "step": 5025 }, { "epoch": 6.43328, "grad_norm": 0.7731380462646484, "learning_rate": 3.99639855942377e-05, "loss": 0.6146, "step": 5026 }, { "epoch": 6.43456, "grad_norm": 0.7348530292510986, "learning_rate": 3.996198479391757e-05, "loss": 0.5598, "step": 5027 }, { "epoch": 6.43584, "grad_norm": 0.7755183577537537, "learning_rate": 3.995998399359744e-05, "loss": 0.6158, "step": 5028 }, { "epoch": 6.43712, "grad_norm": 0.7527555227279663, "learning_rate": 3.995798319327731e-05, "loss": 0.6186, "step": 5029 }, { "epoch": 6.4384, "grad_norm": 0.7150756120681763, "learning_rate": 3.9955982392957184e-05, "loss": 0.5473, "step": 5030 }, { "epoch": 6.43968, "grad_norm": 0.7636802792549133, "learning_rate": 3.9953981592637056e-05, "loss": 0.6463, "step": 5031 }, { "epoch": 6.4409600000000005, "grad_norm": 0.8033157587051392, "learning_rate": 3.995198079231693e-05, "loss": 0.6095, "step": 5032 }, { "epoch": 6.44224, "grad_norm": 0.7744354009628296, "learning_rate": 3.99499799919968e-05, "loss": 0.6034, "step": 5033 }, { "epoch": 6.44352, "grad_norm": 0.7521213293075562, "learning_rate": 3.994797919167667e-05, "loss": 0.6075, "step": 5034 }, { "epoch": 6.4448, "grad_norm": 0.7580350041389465, "learning_rate": 3.9945978391356543e-05, "loss": 0.6328, "step": 5035 }, { "epoch": 6.44608, "grad_norm": 0.7581676244735718, "learning_rate": 3.994397759103642e-05, "loss": 0.5721, "step": 5036 }, { "epoch": 6.44736, "grad_norm": 0.7432277202606201, "learning_rate": 3.994197679071629e-05, "loss": 0.5626, "step": 5037 }, { "epoch": 6.44864, "grad_norm": 0.7694210410118103, "learning_rate": 3.993997599039616e-05, "loss": 0.6688, "step": 5038 }, { "epoch": 6.44992, "grad_norm": 0.7114573121070862, "learning_rate": 3.993797519007603e-05, "loss": 0.5948, "step": 5039 }, { "epoch": 6.4512, "grad_norm": 0.7279742360115051, "learning_rate": 3.99359743897559e-05, "loss": 0.5615, "step": 5040 }, { "epoch": 6.4524799999999995, "grad_norm": 0.7480107545852661, "learning_rate": 3.9933973589435775e-05, "loss": 0.605, "step": 5041 }, { "epoch": 6.45376, "grad_norm": 0.7534326314926147, "learning_rate": 3.9931972789115646e-05, "loss": 0.5966, "step": 5042 }, { "epoch": 6.45504, "grad_norm": 0.7341785430908203, "learning_rate": 3.9929971988795525e-05, "loss": 0.5677, "step": 5043 }, { "epoch": 6.45632, "grad_norm": 0.7698660492897034, "learning_rate": 3.99279711884754e-05, "loss": 0.6088, "step": 5044 }, { "epoch": 6.4576, "grad_norm": 0.777713418006897, "learning_rate": 3.992597038815526e-05, "loss": 0.6248, "step": 5045 }, { "epoch": 6.45888, "grad_norm": 0.7565256357192993, "learning_rate": 3.9923969587835134e-05, "loss": 0.6141, "step": 5046 }, { "epoch": 6.46016, "grad_norm": 0.7639655470848083, "learning_rate": 3.9921968787515006e-05, "loss": 0.6114, "step": 5047 }, { "epoch": 6.46144, "grad_norm": 0.7447236776351929, "learning_rate": 3.991996798719488e-05, "loss": 0.6114, "step": 5048 }, { "epoch": 6.46272, "grad_norm": 0.7928009033203125, "learning_rate": 3.991796718687475e-05, "loss": 0.6378, "step": 5049 }, { "epoch": 6.464, "grad_norm": 0.8144562244415283, "learning_rate": 3.991596638655463e-05, "loss": 0.6235, "step": 5050 }, { "epoch": 6.46528, "grad_norm": 0.6955716609954834, "learning_rate": 3.99139655862345e-05, "loss": 0.5668, "step": 5051 }, { "epoch": 6.46656, "grad_norm": 0.756885290145874, "learning_rate": 3.991196478591437e-05, "loss": 0.6575, "step": 5052 }, { "epoch": 6.46784, "grad_norm": 0.7933226227760315, "learning_rate": 3.990996398559424e-05, "loss": 0.6359, "step": 5053 }, { "epoch": 6.46912, "grad_norm": 0.764266848564148, "learning_rate": 3.990796318527411e-05, "loss": 0.6044, "step": 5054 }, { "epoch": 6.4704, "grad_norm": 0.770219087600708, "learning_rate": 3.990596238495398e-05, "loss": 0.6511, "step": 5055 }, { "epoch": 6.47168, "grad_norm": 0.8092508912086487, "learning_rate": 3.990396158463385e-05, "loss": 0.6976, "step": 5056 }, { "epoch": 6.47296, "grad_norm": 0.7517983913421631, "learning_rate": 3.990196078431373e-05, "loss": 0.5846, "step": 5057 }, { "epoch": 6.47424, "grad_norm": 0.7917564511299133, "learning_rate": 3.98999599839936e-05, "loss": 0.652, "step": 5058 }, { "epoch": 6.47552, "grad_norm": 0.7756854295730591, "learning_rate": 3.9897959183673475e-05, "loss": 0.5847, "step": 5059 }, { "epoch": 6.4768, "grad_norm": 0.7966377139091492, "learning_rate": 3.9895958383353347e-05, "loss": 0.5864, "step": 5060 }, { "epoch": 6.47808, "grad_norm": 0.793876051902771, "learning_rate": 3.989395758303321e-05, "loss": 0.5858, "step": 5061 }, { "epoch": 6.47936, "grad_norm": 0.7717666029930115, "learning_rate": 3.9891956782713084e-05, "loss": 0.6202, "step": 5062 }, { "epoch": 6.48064, "grad_norm": 0.7384968400001526, "learning_rate": 3.9889955982392955e-05, "loss": 0.6262, "step": 5063 }, { "epoch": 6.48192, "grad_norm": 0.7273770570755005, "learning_rate": 3.9887955182072834e-05, "loss": 0.5539, "step": 5064 }, { "epoch": 6.4832, "grad_norm": 0.7145727276802063, "learning_rate": 3.9885954381752706e-05, "loss": 0.5845, "step": 5065 }, { "epoch": 6.48448, "grad_norm": 0.7780152559280396, "learning_rate": 3.988395358143258e-05, "loss": 0.596, "step": 5066 }, { "epoch": 6.48576, "grad_norm": 0.8011901378631592, "learning_rate": 3.988195278111245e-05, "loss": 0.6526, "step": 5067 }, { "epoch": 6.48704, "grad_norm": 0.780985414981842, "learning_rate": 3.987995198079232e-05, "loss": 0.5757, "step": 5068 }, { "epoch": 6.48832, "grad_norm": 0.7850869297981262, "learning_rate": 3.9877951180472187e-05, "loss": 0.639, "step": 5069 }, { "epoch": 6.4896, "grad_norm": 0.7978724837303162, "learning_rate": 3.987595038015206e-05, "loss": 0.5953, "step": 5070 }, { "epoch": 6.49088, "grad_norm": 0.7666551470756531, "learning_rate": 3.987394957983194e-05, "loss": 0.5911, "step": 5071 }, { "epoch": 6.49216, "grad_norm": 0.8040878176689148, "learning_rate": 3.987194877951181e-05, "loss": 0.6198, "step": 5072 }, { "epoch": 6.49344, "grad_norm": 0.7172399759292603, "learning_rate": 3.986994797919168e-05, "loss": 0.6161, "step": 5073 }, { "epoch": 6.49472, "grad_norm": 0.7347850203514099, "learning_rate": 3.986794717887155e-05, "loss": 0.6082, "step": 5074 }, { "epoch": 6.496, "grad_norm": 0.7927334904670715, "learning_rate": 3.9865946378551424e-05, "loss": 0.6269, "step": 5075 }, { "epoch": 6.49728, "grad_norm": 0.7884413003921509, "learning_rate": 3.9863945578231296e-05, "loss": 0.6417, "step": 5076 }, { "epoch": 6.49856, "grad_norm": 0.7497519850730896, "learning_rate": 3.986194477791116e-05, "loss": 0.5945, "step": 5077 }, { "epoch": 6.49984, "grad_norm": 0.8451282382011414, "learning_rate": 3.985994397759104e-05, "loss": 0.6606, "step": 5078 }, { "epoch": 6.50112, "grad_norm": 0.8098703026771545, "learning_rate": 3.985794317727091e-05, "loss": 0.6289, "step": 5079 }, { "epoch": 6.5024, "grad_norm": 0.8050368428230286, "learning_rate": 3.9855942376950784e-05, "loss": 0.6251, "step": 5080 }, { "epoch": 6.50368, "grad_norm": 0.7607022523880005, "learning_rate": 3.9853941576630656e-05, "loss": 0.5769, "step": 5081 }, { "epoch": 6.50496, "grad_norm": 0.7871085405349731, "learning_rate": 3.985194077631053e-05, "loss": 0.6361, "step": 5082 }, { "epoch": 6.50624, "grad_norm": 0.7968281507492065, "learning_rate": 3.98499399759904e-05, "loss": 0.6802, "step": 5083 }, { "epoch": 6.5075199999999995, "grad_norm": 0.7657204270362854, "learning_rate": 3.984793917567027e-05, "loss": 0.6098, "step": 5084 }, { "epoch": 6.5088, "grad_norm": 0.7545149922370911, "learning_rate": 3.984593837535014e-05, "loss": 0.6096, "step": 5085 }, { "epoch": 6.51008, "grad_norm": 0.7787616848945618, "learning_rate": 3.9843937575030015e-05, "loss": 0.6258, "step": 5086 }, { "epoch": 6.51136, "grad_norm": 0.795444905757904, "learning_rate": 3.984193677470989e-05, "loss": 0.6293, "step": 5087 }, { "epoch": 6.51264, "grad_norm": 0.7523403763771057, "learning_rate": 3.983993597438976e-05, "loss": 0.6098, "step": 5088 }, { "epoch": 6.51392, "grad_norm": 0.7614408731460571, "learning_rate": 3.983793517406963e-05, "loss": 0.5827, "step": 5089 }, { "epoch": 6.5152, "grad_norm": 0.7404950857162476, "learning_rate": 3.98359343737495e-05, "loss": 0.6255, "step": 5090 }, { "epoch": 6.51648, "grad_norm": 0.7416658997535706, "learning_rate": 3.9833933573429374e-05, "loss": 0.6169, "step": 5091 }, { "epoch": 6.51776, "grad_norm": 0.7457707524299622, "learning_rate": 3.9831932773109246e-05, "loss": 0.5626, "step": 5092 }, { "epoch": 6.51904, "grad_norm": 0.7906910181045532, "learning_rate": 3.982993197278912e-05, "loss": 0.6424, "step": 5093 }, { "epoch": 6.52032, "grad_norm": 0.798050045967102, "learning_rate": 3.982793117246899e-05, "loss": 0.6044, "step": 5094 }, { "epoch": 6.5216, "grad_norm": 0.7886822819709778, "learning_rate": 3.982593037214886e-05, "loss": 0.637, "step": 5095 }, { "epoch": 6.52288, "grad_norm": 0.7528558373451233, "learning_rate": 3.9823929571828733e-05, "loss": 0.5701, "step": 5096 }, { "epoch": 6.52416, "grad_norm": 0.78963303565979, "learning_rate": 3.9821928771508605e-05, "loss": 0.6763, "step": 5097 }, { "epoch": 6.52544, "grad_norm": 0.7659367918968201, "learning_rate": 3.981992797118848e-05, "loss": 0.6181, "step": 5098 }, { "epoch": 6.52672, "grad_norm": 0.7585764527320862, "learning_rate": 3.981792717086835e-05, "loss": 0.609, "step": 5099 }, { "epoch": 6.5280000000000005, "grad_norm": 0.7747802138328552, "learning_rate": 3.981592637054822e-05, "loss": 0.6033, "step": 5100 }, { "epoch": 6.52928, "grad_norm": 0.7253979444503784, "learning_rate": 3.981392557022809e-05, "loss": 0.5512, "step": 5101 }, { "epoch": 6.53056, "grad_norm": 0.7542280554771423, "learning_rate": 3.9811924769907965e-05, "loss": 0.6486, "step": 5102 }, { "epoch": 6.53184, "grad_norm": 0.7517480254173279, "learning_rate": 3.9809923969587836e-05, "loss": 0.6327, "step": 5103 }, { "epoch": 6.53312, "grad_norm": 0.7435446381568909, "learning_rate": 3.980792316926771e-05, "loss": 0.6079, "step": 5104 }, { "epoch": 6.5344, "grad_norm": 0.7595201730728149, "learning_rate": 3.980592236894758e-05, "loss": 0.604, "step": 5105 }, { "epoch": 6.53568, "grad_norm": 0.7278428673744202, "learning_rate": 3.980392156862745e-05, "loss": 0.6111, "step": 5106 }, { "epoch": 6.53696, "grad_norm": 0.7786182165145874, "learning_rate": 3.9801920768307324e-05, "loss": 0.6558, "step": 5107 }, { "epoch": 6.53824, "grad_norm": 0.8050516247749329, "learning_rate": 3.9799919967987196e-05, "loss": 0.6563, "step": 5108 }, { "epoch": 6.5395199999999996, "grad_norm": 0.7350043058395386, "learning_rate": 3.979791916766707e-05, "loss": 0.6049, "step": 5109 }, { "epoch": 6.5408, "grad_norm": 0.7202991843223572, "learning_rate": 3.979591836734694e-05, "loss": 0.5954, "step": 5110 }, { "epoch": 6.54208, "grad_norm": 0.7836583256721497, "learning_rate": 3.979391756702681e-05, "loss": 0.6103, "step": 5111 }, { "epoch": 6.54336, "grad_norm": 0.7699684500694275, "learning_rate": 3.979191676670668e-05, "loss": 0.6243, "step": 5112 }, { "epoch": 6.54464, "grad_norm": 0.8052793145179749, "learning_rate": 3.9789915966386555e-05, "loss": 0.6234, "step": 5113 }, { "epoch": 6.54592, "grad_norm": 0.7986263632774353, "learning_rate": 3.9787915166066434e-05, "loss": 0.6337, "step": 5114 }, { "epoch": 6.5472, "grad_norm": 0.7898523807525635, "learning_rate": 3.97859143657463e-05, "loss": 0.6222, "step": 5115 }, { "epoch": 6.54848, "grad_norm": 0.7434815764427185, "learning_rate": 3.978391356542617e-05, "loss": 0.6167, "step": 5116 }, { "epoch": 6.54976, "grad_norm": 0.7909148931503296, "learning_rate": 3.978191276510604e-05, "loss": 0.6231, "step": 5117 }, { "epoch": 6.55104, "grad_norm": 0.7848829030990601, "learning_rate": 3.9779911964785914e-05, "loss": 0.6086, "step": 5118 }, { "epoch": 6.55232, "grad_norm": 0.7340389490127563, "learning_rate": 3.9777911164465786e-05, "loss": 0.5672, "step": 5119 }, { "epoch": 6.5536, "grad_norm": 0.8030784726142883, "learning_rate": 3.977591036414566e-05, "loss": 0.5965, "step": 5120 }, { "epoch": 6.55488, "grad_norm": 0.783450186252594, "learning_rate": 3.977390956382554e-05, "loss": 0.6128, "step": 5121 }, { "epoch": 6.55616, "grad_norm": 0.7899596095085144, "learning_rate": 3.977190876350541e-05, "loss": 0.6482, "step": 5122 }, { "epoch": 6.55744, "grad_norm": 0.7937018275260925, "learning_rate": 3.9769907963185274e-05, "loss": 0.6245, "step": 5123 }, { "epoch": 6.55872, "grad_norm": 0.7522284388542175, "learning_rate": 3.9767907162865145e-05, "loss": 0.5998, "step": 5124 }, { "epoch": 6.5600000000000005, "grad_norm": 0.7983690500259399, "learning_rate": 3.976590636254502e-05, "loss": 0.6299, "step": 5125 }, { "epoch": 6.56128, "grad_norm": 0.7660322785377502, "learning_rate": 3.976390556222489e-05, "loss": 0.6138, "step": 5126 }, { "epoch": 6.5625599999999995, "grad_norm": 0.8127145767211914, "learning_rate": 3.976190476190476e-05, "loss": 0.5622, "step": 5127 }, { "epoch": 6.56384, "grad_norm": 0.8016258478164673, "learning_rate": 3.975990396158464e-05, "loss": 0.6375, "step": 5128 }, { "epoch": 6.56512, "grad_norm": 0.756418764591217, "learning_rate": 3.975790316126451e-05, "loss": 0.6423, "step": 5129 }, { "epoch": 6.5664, "grad_norm": 0.7691940069198608, "learning_rate": 3.975590236094438e-05, "loss": 0.6323, "step": 5130 }, { "epoch": 6.56768, "grad_norm": 0.7534066438674927, "learning_rate": 3.975390156062425e-05, "loss": 0.6174, "step": 5131 }, { "epoch": 6.56896, "grad_norm": 0.8069169521331787, "learning_rate": 3.975190076030412e-05, "loss": 0.6324, "step": 5132 }, { "epoch": 6.57024, "grad_norm": 0.7579085230827332, "learning_rate": 3.974989995998399e-05, "loss": 0.6137, "step": 5133 }, { "epoch": 6.57152, "grad_norm": 0.759647011756897, "learning_rate": 3.9747899159663864e-05, "loss": 0.597, "step": 5134 }, { "epoch": 6.5728, "grad_norm": 0.7449114918708801, "learning_rate": 3.974589835934374e-05, "loss": 0.5865, "step": 5135 }, { "epoch": 6.57408, "grad_norm": 0.7468611001968384, "learning_rate": 3.9743897559023614e-05, "loss": 0.5255, "step": 5136 }, { "epoch": 6.57536, "grad_norm": 0.8116476535797119, "learning_rate": 3.9741896758703486e-05, "loss": 0.6977, "step": 5137 }, { "epoch": 6.57664, "grad_norm": 0.7768375873565674, "learning_rate": 3.973989595838336e-05, "loss": 0.6375, "step": 5138 }, { "epoch": 6.57792, "grad_norm": 0.751890242099762, "learning_rate": 3.973789515806322e-05, "loss": 0.5899, "step": 5139 }, { "epoch": 6.5792, "grad_norm": 0.7473777532577515, "learning_rate": 3.9735894357743095e-05, "loss": 0.5912, "step": 5140 }, { "epoch": 6.58048, "grad_norm": 0.7794525623321533, "learning_rate": 3.973389355742297e-05, "loss": 0.6331, "step": 5141 }, { "epoch": 6.58176, "grad_norm": 0.7751368880271912, "learning_rate": 3.9731892757102846e-05, "loss": 0.627, "step": 5142 }, { "epoch": 6.5830400000000004, "grad_norm": 0.7366233468055725, "learning_rate": 3.972989195678272e-05, "loss": 0.6061, "step": 5143 }, { "epoch": 6.58432, "grad_norm": 0.7520456910133362, "learning_rate": 3.972789115646259e-05, "loss": 0.6021, "step": 5144 }, { "epoch": 6.5856, "grad_norm": 0.7733432650566101, "learning_rate": 3.972589035614246e-05, "loss": 0.5873, "step": 5145 }, { "epoch": 6.58688, "grad_norm": 0.7415776252746582, "learning_rate": 3.972388955582233e-05, "loss": 0.5845, "step": 5146 }, { "epoch": 6.58816, "grad_norm": 0.7746995091438293, "learning_rate": 3.97218887555022e-05, "loss": 0.5892, "step": 5147 }, { "epoch": 6.58944, "grad_norm": 0.736659049987793, "learning_rate": 3.971988795518207e-05, "loss": 0.5841, "step": 5148 }, { "epoch": 6.59072, "grad_norm": 0.7600561380386353, "learning_rate": 3.971788715486195e-05, "loss": 0.5765, "step": 5149 }, { "epoch": 6.592, "grad_norm": 0.7675446271896362, "learning_rate": 3.971588635454182e-05, "loss": 0.6319, "step": 5150 }, { "epoch": 6.59328, "grad_norm": 0.7852813601493835, "learning_rate": 3.971388555422169e-05, "loss": 0.6026, "step": 5151 }, { "epoch": 6.5945599999999995, "grad_norm": 0.7649344801902771, "learning_rate": 3.9711884753901564e-05, "loss": 0.6173, "step": 5152 }, { "epoch": 6.59584, "grad_norm": 0.7568708062171936, "learning_rate": 3.9709883953581436e-05, "loss": 0.6124, "step": 5153 }, { "epoch": 6.59712, "grad_norm": 0.7136748433113098, "learning_rate": 3.970788315326131e-05, "loss": 0.6117, "step": 5154 }, { "epoch": 6.5984, "grad_norm": 0.8341229557991028, "learning_rate": 3.970588235294117e-05, "loss": 0.6245, "step": 5155 }, { "epoch": 6.59968, "grad_norm": 0.8061261773109436, "learning_rate": 3.970388155262105e-05, "loss": 0.6325, "step": 5156 }, { "epoch": 6.60096, "grad_norm": 0.7360426187515259, "learning_rate": 3.9701880752300923e-05, "loss": 0.6087, "step": 5157 }, { "epoch": 6.60224, "grad_norm": 0.7481448650360107, "learning_rate": 3.9699879951980795e-05, "loss": 0.5674, "step": 5158 }, { "epoch": 6.60352, "grad_norm": 0.7844505310058594, "learning_rate": 3.969787915166067e-05, "loss": 0.6471, "step": 5159 }, { "epoch": 6.6048, "grad_norm": 0.8189229965209961, "learning_rate": 3.969587835134054e-05, "loss": 0.6488, "step": 5160 }, { "epoch": 6.60608, "grad_norm": 0.7833296656608582, "learning_rate": 3.969387755102041e-05, "loss": 0.6595, "step": 5161 }, { "epoch": 6.60736, "grad_norm": 0.7521294951438904, "learning_rate": 3.969187675070028e-05, "loss": 0.6137, "step": 5162 }, { "epoch": 6.60864, "grad_norm": 0.7876133918762207, "learning_rate": 3.9689875950380155e-05, "loss": 0.6154, "step": 5163 }, { "epoch": 6.60992, "grad_norm": 0.7582443952560425, "learning_rate": 3.9687875150060026e-05, "loss": 0.5877, "step": 5164 }, { "epoch": 6.6112, "grad_norm": 0.7658904790878296, "learning_rate": 3.96858743497399e-05, "loss": 0.647, "step": 5165 }, { "epoch": 6.61248, "grad_norm": 0.7374891638755798, "learning_rate": 3.968387354941977e-05, "loss": 0.6219, "step": 5166 }, { "epoch": 6.61376, "grad_norm": 0.7132728695869446, "learning_rate": 3.968187274909964e-05, "loss": 0.5317, "step": 5167 }, { "epoch": 6.6150400000000005, "grad_norm": 0.7678626775741577, "learning_rate": 3.9679871948779514e-05, "loss": 0.5931, "step": 5168 }, { "epoch": 6.61632, "grad_norm": 0.8119401335716248, "learning_rate": 3.9677871148459386e-05, "loss": 0.6636, "step": 5169 }, { "epoch": 6.6176, "grad_norm": 0.7630581259727478, "learning_rate": 3.967587034813926e-05, "loss": 0.6401, "step": 5170 }, { "epoch": 6.61888, "grad_norm": 0.7518944144248962, "learning_rate": 3.967386954781913e-05, "loss": 0.6174, "step": 5171 }, { "epoch": 6.62016, "grad_norm": 0.7786079049110413, "learning_rate": 3.9671868747499e-05, "loss": 0.6526, "step": 5172 }, { "epoch": 6.62144, "grad_norm": 0.8287822604179382, "learning_rate": 3.966986794717887e-05, "loss": 0.713, "step": 5173 }, { "epoch": 6.62272, "grad_norm": 0.7201914191246033, "learning_rate": 3.9667867146858745e-05, "loss": 0.5696, "step": 5174 }, { "epoch": 6.624, "grad_norm": 0.7631100416183472, "learning_rate": 3.966586634653862e-05, "loss": 0.6302, "step": 5175 }, { "epoch": 6.62528, "grad_norm": 0.769008994102478, "learning_rate": 3.966386554621849e-05, "loss": 0.5921, "step": 5176 }, { "epoch": 6.62656, "grad_norm": 0.8017969727516174, "learning_rate": 3.966186474589836e-05, "loss": 0.6579, "step": 5177 }, { "epoch": 6.62784, "grad_norm": 0.7824108600616455, "learning_rate": 3.965986394557823e-05, "loss": 0.6594, "step": 5178 }, { "epoch": 6.62912, "grad_norm": 0.7433846592903137, "learning_rate": 3.9657863145258104e-05, "loss": 0.624, "step": 5179 }, { "epoch": 6.6304, "grad_norm": 0.7518701553344727, "learning_rate": 3.9655862344937976e-05, "loss": 0.6058, "step": 5180 }, { "epoch": 6.63168, "grad_norm": 0.7567682266235352, "learning_rate": 3.965386154461785e-05, "loss": 0.6081, "step": 5181 }, { "epoch": 6.63296, "grad_norm": 0.7529020309448242, "learning_rate": 3.965186074429772e-05, "loss": 0.6016, "step": 5182 }, { "epoch": 6.63424, "grad_norm": 0.7778772115707397, "learning_rate": 3.964985994397759e-05, "loss": 0.6077, "step": 5183 }, { "epoch": 6.63552, "grad_norm": 0.7256116271018982, "learning_rate": 3.964785914365747e-05, "loss": 0.5214, "step": 5184 }, { "epoch": 6.6368, "grad_norm": 0.820444643497467, "learning_rate": 3.9645858343337335e-05, "loss": 0.6515, "step": 5185 }, { "epoch": 6.63808, "grad_norm": 0.7796686291694641, "learning_rate": 3.964385754301721e-05, "loss": 0.6436, "step": 5186 }, { "epoch": 6.63936, "grad_norm": 0.7803254127502441, "learning_rate": 3.964185674269708e-05, "loss": 0.6102, "step": 5187 }, { "epoch": 6.64064, "grad_norm": 0.7486667037010193, "learning_rate": 3.963985594237695e-05, "loss": 0.65, "step": 5188 }, { "epoch": 6.64192, "grad_norm": 0.7695333361625671, "learning_rate": 3.963785514205682e-05, "loss": 0.6523, "step": 5189 }, { "epoch": 6.6432, "grad_norm": 0.7668389678001404, "learning_rate": 3.9635854341736695e-05, "loss": 0.6391, "step": 5190 }, { "epoch": 6.64448, "grad_norm": 0.7639618515968323, "learning_rate": 3.963385354141657e-05, "loss": 0.6011, "step": 5191 }, { "epoch": 6.64576, "grad_norm": 0.7803028225898743, "learning_rate": 3.9631852741096445e-05, "loss": 0.6203, "step": 5192 }, { "epoch": 6.64704, "grad_norm": 0.7438964247703552, "learning_rate": 3.962985194077631e-05, "loss": 0.5911, "step": 5193 }, { "epoch": 6.64832, "grad_norm": 0.7312869429588318, "learning_rate": 3.962785114045618e-05, "loss": 0.5731, "step": 5194 }, { "epoch": 6.6495999999999995, "grad_norm": 0.7577333450317383, "learning_rate": 3.9625850340136054e-05, "loss": 0.6012, "step": 5195 }, { "epoch": 6.65088, "grad_norm": 0.7712530493736267, "learning_rate": 3.9623849539815926e-05, "loss": 0.6512, "step": 5196 }, { "epoch": 6.65216, "grad_norm": 0.7147469520568848, "learning_rate": 3.96218487394958e-05, "loss": 0.6153, "step": 5197 }, { "epoch": 6.65344, "grad_norm": 0.7817697525024414, "learning_rate": 3.961984793917567e-05, "loss": 0.6256, "step": 5198 }, { "epoch": 6.65472, "grad_norm": 0.7692977786064148, "learning_rate": 3.961784713885555e-05, "loss": 0.5991, "step": 5199 }, { "epoch": 6.656, "grad_norm": 0.7339797616004944, "learning_rate": 3.961584633853542e-05, "loss": 0.5563, "step": 5200 }, { "epoch": 6.65728, "grad_norm": 0.7555981278419495, "learning_rate": 3.9613845538215285e-05, "loss": 0.6133, "step": 5201 }, { "epoch": 6.65856, "grad_norm": 0.7478959560394287, "learning_rate": 3.961184473789516e-05, "loss": 0.614, "step": 5202 }, { "epoch": 6.65984, "grad_norm": 0.7598313689231873, "learning_rate": 3.960984393757503e-05, "loss": 0.6197, "step": 5203 }, { "epoch": 6.66112, "grad_norm": 0.7408673763275146, "learning_rate": 3.96078431372549e-05, "loss": 0.5736, "step": 5204 }, { "epoch": 6.6624, "grad_norm": 0.8013281226158142, "learning_rate": 3.960584233693477e-05, "loss": 0.6783, "step": 5205 }, { "epoch": 6.66368, "grad_norm": 0.7539663910865784, "learning_rate": 3.960384153661465e-05, "loss": 0.5824, "step": 5206 }, { "epoch": 6.66496, "grad_norm": 0.7687118649482727, "learning_rate": 3.960184073629452e-05, "loss": 0.6185, "step": 5207 }, { "epoch": 6.66624, "grad_norm": 0.803480327129364, "learning_rate": 3.9599839935974395e-05, "loss": 0.5955, "step": 5208 }, { "epoch": 6.66752, "grad_norm": 0.7527867555618286, "learning_rate": 3.959783913565426e-05, "loss": 0.6128, "step": 5209 }, { "epoch": 6.6688, "grad_norm": 0.7492735981941223, "learning_rate": 3.959583833533413e-05, "loss": 0.6112, "step": 5210 }, { "epoch": 6.6700800000000005, "grad_norm": 0.757946252822876, "learning_rate": 3.9593837535014004e-05, "loss": 0.57, "step": 5211 }, { "epoch": 6.67136, "grad_norm": 0.7378002405166626, "learning_rate": 3.9591836734693876e-05, "loss": 0.5691, "step": 5212 }, { "epoch": 6.67264, "grad_norm": 0.7848967909812927, "learning_rate": 3.9589835934373754e-05, "loss": 0.6498, "step": 5213 }, { "epoch": 6.67392, "grad_norm": 0.7497094869613647, "learning_rate": 3.9587835134053626e-05, "loss": 0.5919, "step": 5214 }, { "epoch": 6.6752, "grad_norm": 0.7710285782814026, "learning_rate": 3.95858343337335e-05, "loss": 0.623, "step": 5215 }, { "epoch": 6.67648, "grad_norm": 0.7714881300926208, "learning_rate": 3.958383353341337e-05, "loss": 0.6167, "step": 5216 }, { "epoch": 6.67776, "grad_norm": 0.8070691823959351, "learning_rate": 3.9581832733093235e-05, "loss": 0.627, "step": 5217 }, { "epoch": 6.67904, "grad_norm": 0.7932981848716736, "learning_rate": 3.957983193277311e-05, "loss": 0.6538, "step": 5218 }, { "epoch": 6.68032, "grad_norm": 0.7265870571136475, "learning_rate": 3.957783113245298e-05, "loss": 0.5791, "step": 5219 }, { "epoch": 6.6815999999999995, "grad_norm": 0.7722101211547852, "learning_rate": 3.957583033213286e-05, "loss": 0.6188, "step": 5220 }, { "epoch": 6.68288, "grad_norm": 0.7940667271614075, "learning_rate": 3.957382953181273e-05, "loss": 0.5663, "step": 5221 }, { "epoch": 6.68416, "grad_norm": 0.7684674263000488, "learning_rate": 3.95718287314926e-05, "loss": 0.6121, "step": 5222 }, { "epoch": 6.68544, "grad_norm": 0.7839207053184509, "learning_rate": 3.956982793117247e-05, "loss": 0.6378, "step": 5223 }, { "epoch": 6.68672, "grad_norm": 0.6971110701560974, "learning_rate": 3.9567827130852345e-05, "loss": 0.5632, "step": 5224 }, { "epoch": 6.688, "grad_norm": 0.7478116750717163, "learning_rate": 3.956582633053221e-05, "loss": 0.6001, "step": 5225 }, { "epoch": 6.68928, "grad_norm": 0.7529265880584717, "learning_rate": 3.956382553021208e-05, "loss": 0.647, "step": 5226 }, { "epoch": 6.69056, "grad_norm": 0.7169869542121887, "learning_rate": 3.956182472989196e-05, "loss": 0.609, "step": 5227 }, { "epoch": 6.69184, "grad_norm": 0.7155503034591675, "learning_rate": 3.955982392957183e-05, "loss": 0.5816, "step": 5228 }, { "epoch": 6.69312, "grad_norm": 0.7647979855537415, "learning_rate": 3.9557823129251704e-05, "loss": 0.6386, "step": 5229 }, { "epoch": 6.6944, "grad_norm": 0.717627227306366, "learning_rate": 3.9555822328931576e-05, "loss": 0.6124, "step": 5230 }, { "epoch": 6.69568, "grad_norm": 0.7740793824195862, "learning_rate": 3.955382152861145e-05, "loss": 0.6819, "step": 5231 }, { "epoch": 6.69696, "grad_norm": 0.7482873797416687, "learning_rate": 3.955182072829132e-05, "loss": 0.5816, "step": 5232 }, { "epoch": 6.69824, "grad_norm": 0.8073453903198242, "learning_rate": 3.9549819927971185e-05, "loss": 0.6147, "step": 5233 }, { "epoch": 6.69952, "grad_norm": 0.775709867477417, "learning_rate": 3.954781912765106e-05, "loss": 0.6217, "step": 5234 }, { "epoch": 6.7008, "grad_norm": 0.8085676431655884, "learning_rate": 3.9545818327330935e-05, "loss": 0.6666, "step": 5235 }, { "epoch": 6.7020800000000005, "grad_norm": 0.7595890164375305, "learning_rate": 3.954381752701081e-05, "loss": 0.64, "step": 5236 }, { "epoch": 6.70336, "grad_norm": 0.7835769653320312, "learning_rate": 3.954181672669068e-05, "loss": 0.663, "step": 5237 }, { "epoch": 6.70464, "grad_norm": 0.797363817691803, "learning_rate": 3.953981592637055e-05, "loss": 0.6874, "step": 5238 }, { "epoch": 6.70592, "grad_norm": 0.7739813923835754, "learning_rate": 3.953781512605042e-05, "loss": 0.5998, "step": 5239 }, { "epoch": 6.7072, "grad_norm": 0.7902897000312805, "learning_rate": 3.9535814325730294e-05, "loss": 0.6097, "step": 5240 }, { "epoch": 6.70848, "grad_norm": 0.756549060344696, "learning_rate": 3.9533813525410166e-05, "loss": 0.6184, "step": 5241 }, { "epoch": 6.70976, "grad_norm": 0.73802250623703, "learning_rate": 3.953181272509004e-05, "loss": 0.5682, "step": 5242 }, { "epoch": 6.71104, "grad_norm": 0.8189078569412231, "learning_rate": 3.952981192476991e-05, "loss": 0.651, "step": 5243 }, { "epoch": 6.71232, "grad_norm": 0.7824820280075073, "learning_rate": 3.952781112444978e-05, "loss": 0.6341, "step": 5244 }, { "epoch": 6.7136, "grad_norm": 0.788250207901001, "learning_rate": 3.9525810324129654e-05, "loss": 0.5819, "step": 5245 }, { "epoch": 6.71488, "grad_norm": 0.7493855953216553, "learning_rate": 3.9523809523809526e-05, "loss": 0.6157, "step": 5246 }, { "epoch": 6.71616, "grad_norm": 0.7709637880325317, "learning_rate": 3.95218087234894e-05, "loss": 0.6511, "step": 5247 }, { "epoch": 6.71744, "grad_norm": 0.7652736306190491, "learning_rate": 3.951980792316927e-05, "loss": 0.5601, "step": 5248 }, { "epoch": 6.71872, "grad_norm": 0.8294394612312317, "learning_rate": 3.951780712284914e-05, "loss": 0.6263, "step": 5249 }, { "epoch": 6.72, "grad_norm": 0.7636884450912476, "learning_rate": 3.951580632252901e-05, "loss": 0.5633, "step": 5250 }, { "epoch": 6.72128, "grad_norm": 0.7744940519332886, "learning_rate": 3.9513805522208885e-05, "loss": 0.6129, "step": 5251 }, { "epoch": 6.72256, "grad_norm": 0.8224226236343384, "learning_rate": 3.951180472188876e-05, "loss": 0.6453, "step": 5252 }, { "epoch": 6.72384, "grad_norm": 0.7555708885192871, "learning_rate": 3.950980392156863e-05, "loss": 0.6153, "step": 5253 }, { "epoch": 6.72512, "grad_norm": 0.7619531750679016, "learning_rate": 3.95078031212485e-05, "loss": 0.6237, "step": 5254 }, { "epoch": 6.7264, "grad_norm": 0.7484665513038635, "learning_rate": 3.950580232092838e-05, "loss": 0.5713, "step": 5255 }, { "epoch": 6.72768, "grad_norm": 0.8010841608047485, "learning_rate": 3.9503801520608244e-05, "loss": 0.6829, "step": 5256 }, { "epoch": 6.72896, "grad_norm": 0.7087874412536621, "learning_rate": 3.9501800720288116e-05, "loss": 0.6001, "step": 5257 }, { "epoch": 6.73024, "grad_norm": 0.7901442050933838, "learning_rate": 3.949979991996799e-05, "loss": 0.6637, "step": 5258 }, { "epoch": 6.73152, "grad_norm": 0.78165602684021, "learning_rate": 3.949779911964786e-05, "loss": 0.654, "step": 5259 }, { "epoch": 6.7328, "grad_norm": 0.768568754196167, "learning_rate": 3.949579831932773e-05, "loss": 0.6469, "step": 5260 }, { "epoch": 6.73408, "grad_norm": 0.7904379963874817, "learning_rate": 3.94937975190076e-05, "loss": 0.6348, "step": 5261 }, { "epoch": 6.73536, "grad_norm": 0.7367870211601257, "learning_rate": 3.949179671868748e-05, "loss": 0.5939, "step": 5262 }, { "epoch": 6.7366399999999995, "grad_norm": 0.7719533443450928, "learning_rate": 3.9489795918367354e-05, "loss": 0.6251, "step": 5263 }, { "epoch": 6.73792, "grad_norm": 0.7805905938148499, "learning_rate": 3.948779511804722e-05, "loss": 0.5785, "step": 5264 }, { "epoch": 6.7392, "grad_norm": 0.7767636775970459, "learning_rate": 3.948579431772709e-05, "loss": 0.5854, "step": 5265 }, { "epoch": 6.74048, "grad_norm": 0.7528958916664124, "learning_rate": 3.948379351740696e-05, "loss": 0.5972, "step": 5266 }, { "epoch": 6.74176, "grad_norm": 0.7544768452644348, "learning_rate": 3.9481792717086835e-05, "loss": 0.6143, "step": 5267 }, { "epoch": 6.74304, "grad_norm": 0.8174731731414795, "learning_rate": 3.9479791916766706e-05, "loss": 0.6503, "step": 5268 }, { "epoch": 6.74432, "grad_norm": 0.7854768633842468, "learning_rate": 3.9477791116446585e-05, "loss": 0.6503, "step": 5269 }, { "epoch": 6.7456, "grad_norm": 0.7965456247329712, "learning_rate": 3.947579031612646e-05, "loss": 0.6077, "step": 5270 }, { "epoch": 6.74688, "grad_norm": 0.7633908987045288, "learning_rate": 3.947378951580633e-05, "loss": 0.6433, "step": 5271 }, { "epoch": 6.74816, "grad_norm": 0.7727089524269104, "learning_rate": 3.9471788715486194e-05, "loss": 0.6058, "step": 5272 }, { "epoch": 6.74944, "grad_norm": 0.7607753276824951, "learning_rate": 3.9469787915166066e-05, "loss": 0.6032, "step": 5273 }, { "epoch": 6.75072, "grad_norm": 0.7452098727226257, "learning_rate": 3.946778711484594e-05, "loss": 0.581, "step": 5274 }, { "epoch": 6.752, "grad_norm": 0.7961545586585999, "learning_rate": 3.946578631452581e-05, "loss": 0.6433, "step": 5275 }, { "epoch": 6.75328, "grad_norm": 0.7878684401512146, "learning_rate": 3.946378551420569e-05, "loss": 0.6343, "step": 5276 }, { "epoch": 6.75456, "grad_norm": 0.7796621322631836, "learning_rate": 3.946178471388556e-05, "loss": 0.6007, "step": 5277 }, { "epoch": 6.75584, "grad_norm": 0.7304415106773376, "learning_rate": 3.945978391356543e-05, "loss": 0.5637, "step": 5278 }, { "epoch": 6.7571200000000005, "grad_norm": 0.8092875480651855, "learning_rate": 3.9457783113245304e-05, "loss": 0.6487, "step": 5279 }, { "epoch": 6.7584, "grad_norm": 0.7983285188674927, "learning_rate": 3.945578231292517e-05, "loss": 0.6291, "step": 5280 }, { "epoch": 6.75968, "grad_norm": 0.7523446679115295, "learning_rate": 3.945378151260504e-05, "loss": 0.5886, "step": 5281 }, { "epoch": 6.76096, "grad_norm": 0.7880832552909851, "learning_rate": 3.945178071228491e-05, "loss": 0.6386, "step": 5282 }, { "epoch": 6.76224, "grad_norm": 0.7898958921432495, "learning_rate": 3.944977991196479e-05, "loss": 0.616, "step": 5283 }, { "epoch": 6.76352, "grad_norm": 0.7464529275894165, "learning_rate": 3.944777911164466e-05, "loss": 0.5919, "step": 5284 }, { "epoch": 6.7648, "grad_norm": 0.7837912440299988, "learning_rate": 3.9445778311324535e-05, "loss": 0.6378, "step": 5285 }, { "epoch": 6.76608, "grad_norm": 0.7559735774993896, "learning_rate": 3.9443777511004407e-05, "loss": 0.6033, "step": 5286 }, { "epoch": 6.76736, "grad_norm": 0.7761905193328857, "learning_rate": 3.944177671068428e-05, "loss": 0.6288, "step": 5287 }, { "epoch": 6.7686399999999995, "grad_norm": 0.7161477208137512, "learning_rate": 3.9439775910364143e-05, "loss": 0.595, "step": 5288 }, { "epoch": 6.76992, "grad_norm": 0.7817723155021667, "learning_rate": 3.9437775110044015e-05, "loss": 0.6474, "step": 5289 }, { "epoch": 6.7712, "grad_norm": 0.7308269739151001, "learning_rate": 3.9435774309723894e-05, "loss": 0.6068, "step": 5290 }, { "epoch": 6.77248, "grad_norm": 0.8274186253547668, "learning_rate": 3.9433773509403766e-05, "loss": 0.6928, "step": 5291 }, { "epoch": 6.77376, "grad_norm": 0.7699143886566162, "learning_rate": 3.943177270908364e-05, "loss": 0.636, "step": 5292 }, { "epoch": 6.77504, "grad_norm": 0.7616050243377686, "learning_rate": 3.942977190876351e-05, "loss": 0.6021, "step": 5293 }, { "epoch": 6.77632, "grad_norm": 0.7531759142875671, "learning_rate": 3.942777110844338e-05, "loss": 0.5899, "step": 5294 }, { "epoch": 6.7776, "grad_norm": 0.7871730327606201, "learning_rate": 3.942577030812325e-05, "loss": 0.6186, "step": 5295 }, { "epoch": 6.77888, "grad_norm": 0.7265611290931702, "learning_rate": 3.942376950780312e-05, "loss": 0.5459, "step": 5296 }, { "epoch": 6.78016, "grad_norm": 0.7594027519226074, "learning_rate": 3.9421768707483e-05, "loss": 0.6046, "step": 5297 }, { "epoch": 6.78144, "grad_norm": 0.792248010635376, "learning_rate": 3.941976790716287e-05, "loss": 0.6421, "step": 5298 }, { "epoch": 6.78272, "grad_norm": 0.8279687166213989, "learning_rate": 3.941776710684274e-05, "loss": 0.6959, "step": 5299 }, { "epoch": 6.784, "grad_norm": 0.764121413230896, "learning_rate": 3.941576630652261e-05, "loss": 0.6368, "step": 5300 }, { "epoch": 6.78528, "grad_norm": 0.7647132873535156, "learning_rate": 3.9413765506202484e-05, "loss": 0.6331, "step": 5301 }, { "epoch": 6.78656, "grad_norm": 0.7814575433731079, "learning_rate": 3.9411764705882356e-05, "loss": 0.647, "step": 5302 }, { "epoch": 6.78784, "grad_norm": 0.7337337732315063, "learning_rate": 3.940976390556223e-05, "loss": 0.5798, "step": 5303 }, { "epoch": 6.7891200000000005, "grad_norm": 0.7567817568778992, "learning_rate": 3.94077631052421e-05, "loss": 0.6858, "step": 5304 }, { "epoch": 6.7904, "grad_norm": 0.7290568351745605, "learning_rate": 3.940576230492197e-05, "loss": 0.6001, "step": 5305 }, { "epoch": 6.79168, "grad_norm": 0.7724208235740662, "learning_rate": 3.9403761504601844e-05, "loss": 0.6107, "step": 5306 }, { "epoch": 6.79296, "grad_norm": 0.7930365800857544, "learning_rate": 3.9401760704281716e-05, "loss": 0.6773, "step": 5307 }, { "epoch": 6.79424, "grad_norm": 0.7742063999176025, "learning_rate": 3.939975990396159e-05, "loss": 0.6405, "step": 5308 }, { "epoch": 6.79552, "grad_norm": 0.808784008026123, "learning_rate": 3.939775910364146e-05, "loss": 0.6853, "step": 5309 }, { "epoch": 6.7968, "grad_norm": 0.7806289196014404, "learning_rate": 3.939575830332133e-05, "loss": 0.6544, "step": 5310 }, { "epoch": 6.79808, "grad_norm": 0.7165932655334473, "learning_rate": 3.93937575030012e-05, "loss": 0.5988, "step": 5311 }, { "epoch": 6.79936, "grad_norm": 0.7418404221534729, "learning_rate": 3.9391756702681075e-05, "loss": 0.6256, "step": 5312 }, { "epoch": 6.80064, "grad_norm": 0.7737796902656555, "learning_rate": 3.938975590236095e-05, "loss": 0.5891, "step": 5313 }, { "epoch": 6.80192, "grad_norm": 0.7775099873542786, "learning_rate": 3.938775510204082e-05, "loss": 0.6219, "step": 5314 }, { "epoch": 6.8032, "grad_norm": 0.7535067796707153, "learning_rate": 3.938575430172069e-05, "loss": 0.5918, "step": 5315 }, { "epoch": 6.80448, "grad_norm": 0.7628635764122009, "learning_rate": 3.938375350140056e-05, "loss": 0.5637, "step": 5316 }, { "epoch": 6.80576, "grad_norm": 0.7356747984886169, "learning_rate": 3.9381752701080434e-05, "loss": 0.5817, "step": 5317 }, { "epoch": 6.80704, "grad_norm": 0.7762408256530762, "learning_rate": 3.9379751900760306e-05, "loss": 0.6193, "step": 5318 }, { "epoch": 6.80832, "grad_norm": 0.74337238073349, "learning_rate": 3.937775110044018e-05, "loss": 0.6156, "step": 5319 }, { "epoch": 6.8096, "grad_norm": 0.7793139815330505, "learning_rate": 3.937575030012005e-05, "loss": 0.6768, "step": 5320 }, { "epoch": 6.81088, "grad_norm": 0.7772572040557861, "learning_rate": 3.937374949979992e-05, "loss": 0.6463, "step": 5321 }, { "epoch": 6.81216, "grad_norm": 0.7629466652870178, "learning_rate": 3.9371748699479793e-05, "loss": 0.6277, "step": 5322 }, { "epoch": 6.81344, "grad_norm": 0.7419812083244324, "learning_rate": 3.9369747899159665e-05, "loss": 0.5655, "step": 5323 }, { "epoch": 6.81472, "grad_norm": 0.7378270030021667, "learning_rate": 3.936774709883954e-05, "loss": 0.5853, "step": 5324 }, { "epoch": 6.816, "grad_norm": 0.7700802683830261, "learning_rate": 3.936574629851941e-05, "loss": 0.6331, "step": 5325 }, { "epoch": 6.81728, "grad_norm": 0.7285279631614685, "learning_rate": 3.936374549819928e-05, "loss": 0.6299, "step": 5326 }, { "epoch": 6.81856, "grad_norm": 0.7254111766815186, "learning_rate": 3.936174469787915e-05, "loss": 0.6142, "step": 5327 }, { "epoch": 6.81984, "grad_norm": 0.7612648606300354, "learning_rate": 3.9359743897559025e-05, "loss": 0.6056, "step": 5328 }, { "epoch": 6.82112, "grad_norm": 0.7452360391616821, "learning_rate": 3.9357743097238896e-05, "loss": 0.6107, "step": 5329 }, { "epoch": 6.8224, "grad_norm": 0.7449943423271179, "learning_rate": 3.935574229691877e-05, "loss": 0.6078, "step": 5330 }, { "epoch": 6.8236799999999995, "grad_norm": 0.728469729423523, "learning_rate": 3.935374149659864e-05, "loss": 0.593, "step": 5331 }, { "epoch": 6.82496, "grad_norm": 0.8360829949378967, "learning_rate": 3.935174069627851e-05, "loss": 0.6543, "step": 5332 }, { "epoch": 6.82624, "grad_norm": 0.8084598779678345, "learning_rate": 3.934973989595839e-05, "loss": 0.662, "step": 5333 }, { "epoch": 6.82752, "grad_norm": 0.758673369884491, "learning_rate": 3.9347739095638256e-05, "loss": 0.6426, "step": 5334 }, { "epoch": 6.8288, "grad_norm": 0.7647375464439392, "learning_rate": 3.934573829531813e-05, "loss": 0.5502, "step": 5335 }, { "epoch": 6.83008, "grad_norm": 0.7250184416770935, "learning_rate": 3.9343737494998e-05, "loss": 0.6173, "step": 5336 }, { "epoch": 6.83136, "grad_norm": 0.7183948159217834, "learning_rate": 3.934173669467787e-05, "loss": 0.5485, "step": 5337 }, { "epoch": 6.83264, "grad_norm": 0.7605459690093994, "learning_rate": 3.933973589435774e-05, "loss": 0.5859, "step": 5338 }, { "epoch": 6.83392, "grad_norm": 0.7369337677955627, "learning_rate": 3.9337735094037615e-05, "loss": 0.6079, "step": 5339 }, { "epoch": 6.8352, "grad_norm": 0.7497186660766602, "learning_rate": 3.9335734293717494e-05, "loss": 0.5758, "step": 5340 }, { "epoch": 6.83648, "grad_norm": 0.76474928855896, "learning_rate": 3.9333733493397365e-05, "loss": 0.645, "step": 5341 }, { "epoch": 6.83776, "grad_norm": 0.7687287926673889, "learning_rate": 3.933173269307723e-05, "loss": 0.6172, "step": 5342 }, { "epoch": 6.83904, "grad_norm": 0.7535450458526611, "learning_rate": 3.93297318927571e-05, "loss": 0.5974, "step": 5343 }, { "epoch": 6.84032, "grad_norm": 0.7823262214660645, "learning_rate": 3.9327731092436974e-05, "loss": 0.5917, "step": 5344 }, { "epoch": 6.8416, "grad_norm": 0.765173077583313, "learning_rate": 3.9325730292116846e-05, "loss": 0.67, "step": 5345 }, { "epoch": 6.84288, "grad_norm": 0.7360157370567322, "learning_rate": 3.932372949179672e-05, "loss": 0.6097, "step": 5346 }, { "epoch": 6.8441600000000005, "grad_norm": 0.7717430591583252, "learning_rate": 3.9321728691476597e-05, "loss": 0.6088, "step": 5347 }, { "epoch": 6.84544, "grad_norm": 0.7661362886428833, "learning_rate": 3.931972789115647e-05, "loss": 0.6418, "step": 5348 }, { "epoch": 6.84672, "grad_norm": 0.7785105109214783, "learning_rate": 3.931772709083634e-05, "loss": 0.603, "step": 5349 }, { "epoch": 6.848, "grad_norm": 0.7660150527954102, "learning_rate": 3.9315726290516205e-05, "loss": 0.6095, "step": 5350 }, { "epoch": 6.84928, "grad_norm": 0.7388876676559448, "learning_rate": 3.931372549019608e-05, "loss": 0.5756, "step": 5351 }, { "epoch": 6.85056, "grad_norm": 0.7814899682998657, "learning_rate": 3.931172468987595e-05, "loss": 0.6265, "step": 5352 }, { "epoch": 6.85184, "grad_norm": 0.7103585600852966, "learning_rate": 3.930972388955582e-05, "loss": 0.5408, "step": 5353 }, { "epoch": 6.85312, "grad_norm": 0.7650427222251892, "learning_rate": 3.93077230892357e-05, "loss": 0.6405, "step": 5354 }, { "epoch": 6.8544, "grad_norm": 0.7595013380050659, "learning_rate": 3.930572228891557e-05, "loss": 0.6052, "step": 5355 }, { "epoch": 6.8556799999999996, "grad_norm": 0.7787649631500244, "learning_rate": 3.930372148859544e-05, "loss": 0.6362, "step": 5356 }, { "epoch": 6.85696, "grad_norm": 0.7371984720230103, "learning_rate": 3.9301720688275315e-05, "loss": 0.6212, "step": 5357 }, { "epoch": 6.85824, "grad_norm": 0.755862295627594, "learning_rate": 3.929971988795518e-05, "loss": 0.5907, "step": 5358 }, { "epoch": 6.85952, "grad_norm": 0.7287248969078064, "learning_rate": 3.929771908763505e-05, "loss": 0.5912, "step": 5359 }, { "epoch": 6.8608, "grad_norm": 0.7712188363075256, "learning_rate": 3.9295718287314924e-05, "loss": 0.6261, "step": 5360 }, { "epoch": 6.86208, "grad_norm": 0.8140777945518494, "learning_rate": 3.92937174869948e-05, "loss": 0.6052, "step": 5361 }, { "epoch": 6.86336, "grad_norm": 0.7377832531929016, "learning_rate": 3.9291716686674674e-05, "loss": 0.5631, "step": 5362 }, { "epoch": 6.86464, "grad_norm": 0.75379878282547, "learning_rate": 3.9289715886354546e-05, "loss": 0.6071, "step": 5363 }, { "epoch": 6.86592, "grad_norm": 0.7823106646537781, "learning_rate": 3.928771508603442e-05, "loss": 0.6651, "step": 5364 }, { "epoch": 6.8672, "grad_norm": 0.7541218400001526, "learning_rate": 3.928571428571429e-05, "loss": 0.6412, "step": 5365 }, { "epoch": 6.86848, "grad_norm": 0.7208373546600342, "learning_rate": 3.9283713485394155e-05, "loss": 0.605, "step": 5366 }, { "epoch": 6.86976, "grad_norm": 0.7709495425224304, "learning_rate": 3.928171268507403e-05, "loss": 0.6041, "step": 5367 }, { "epoch": 6.87104, "grad_norm": 0.8122734427452087, "learning_rate": 3.9279711884753906e-05, "loss": 0.6647, "step": 5368 }, { "epoch": 6.87232, "grad_norm": 0.7816949486732483, "learning_rate": 3.927771108443378e-05, "loss": 0.6798, "step": 5369 }, { "epoch": 6.8736, "grad_norm": 0.7514051198959351, "learning_rate": 3.927571028411365e-05, "loss": 0.5785, "step": 5370 }, { "epoch": 6.87488, "grad_norm": 0.8315825462341309, "learning_rate": 3.927370948379352e-05, "loss": 0.6399, "step": 5371 }, { "epoch": 6.8761600000000005, "grad_norm": 0.7676405310630798, "learning_rate": 3.927170868347339e-05, "loss": 0.6142, "step": 5372 }, { "epoch": 6.87744, "grad_norm": 0.7865943312644958, "learning_rate": 3.9269707883153265e-05, "loss": 0.6319, "step": 5373 }, { "epoch": 6.8787199999999995, "grad_norm": 0.7144403457641602, "learning_rate": 3.926770708283313e-05, "loss": 0.5566, "step": 5374 }, { "epoch": 6.88, "grad_norm": 0.8031983971595764, "learning_rate": 3.926570628251301e-05, "loss": 0.6461, "step": 5375 }, { "epoch": 6.88128, "grad_norm": 0.761948823928833, "learning_rate": 3.926370548219288e-05, "loss": 0.6207, "step": 5376 }, { "epoch": 6.88256, "grad_norm": 0.7399935126304626, "learning_rate": 3.926170468187275e-05, "loss": 0.6265, "step": 5377 }, { "epoch": 6.88384, "grad_norm": 0.7543573975563049, "learning_rate": 3.9259703881552624e-05, "loss": 0.6648, "step": 5378 }, { "epoch": 6.88512, "grad_norm": 0.7618049383163452, "learning_rate": 3.9257703081232496e-05, "loss": 0.6135, "step": 5379 }, { "epoch": 6.8864, "grad_norm": 0.7822875380516052, "learning_rate": 3.925570228091237e-05, "loss": 0.6238, "step": 5380 }, { "epoch": 6.88768, "grad_norm": 0.7895389795303345, "learning_rate": 3.925370148059224e-05, "loss": 0.613, "step": 5381 }, { "epoch": 6.88896, "grad_norm": 0.7635522484779358, "learning_rate": 3.925170068027211e-05, "loss": 0.581, "step": 5382 }, { "epoch": 6.89024, "grad_norm": 0.761661171913147, "learning_rate": 3.9249699879951983e-05, "loss": 0.5799, "step": 5383 }, { "epoch": 6.89152, "grad_norm": 0.7741929888725281, "learning_rate": 3.9247699079631855e-05, "loss": 0.6283, "step": 5384 }, { "epoch": 6.8928, "grad_norm": 0.7595140933990479, "learning_rate": 3.924569827931173e-05, "loss": 0.6398, "step": 5385 }, { "epoch": 6.89408, "grad_norm": 0.7731903195381165, "learning_rate": 3.92436974789916e-05, "loss": 0.5852, "step": 5386 }, { "epoch": 6.89536, "grad_norm": 0.7807347774505615, "learning_rate": 3.924169667867147e-05, "loss": 0.6452, "step": 5387 }, { "epoch": 6.89664, "grad_norm": 0.7398995757102966, "learning_rate": 3.923969587835134e-05, "loss": 0.6283, "step": 5388 }, { "epoch": 6.89792, "grad_norm": 0.7703590393066406, "learning_rate": 3.9237695078031215e-05, "loss": 0.5941, "step": 5389 }, { "epoch": 6.8992, "grad_norm": 0.8060065507888794, "learning_rate": 3.9235694277711086e-05, "loss": 0.6347, "step": 5390 }, { "epoch": 6.90048, "grad_norm": 0.7907054424285889, "learning_rate": 3.923369347739096e-05, "loss": 0.6734, "step": 5391 }, { "epoch": 6.90176, "grad_norm": 0.7742048501968384, "learning_rate": 3.923169267707083e-05, "loss": 0.6362, "step": 5392 }, { "epoch": 6.90304, "grad_norm": 0.7606136798858643, "learning_rate": 3.92296918767507e-05, "loss": 0.6501, "step": 5393 }, { "epoch": 6.90432, "grad_norm": 0.7519256472587585, "learning_rate": 3.9227691076430574e-05, "loss": 0.6411, "step": 5394 }, { "epoch": 6.9056, "grad_norm": 0.7524474859237671, "learning_rate": 3.9225690276110446e-05, "loss": 0.6171, "step": 5395 }, { "epoch": 6.90688, "grad_norm": 0.8051052093505859, "learning_rate": 3.922368947579032e-05, "loss": 0.6251, "step": 5396 }, { "epoch": 6.90816, "grad_norm": 0.7711687684059143, "learning_rate": 3.922168867547019e-05, "loss": 0.6428, "step": 5397 }, { "epoch": 6.90944, "grad_norm": 0.7528000473976135, "learning_rate": 3.921968787515006e-05, "loss": 0.6054, "step": 5398 }, { "epoch": 6.9107199999999995, "grad_norm": 0.7490524649620056, "learning_rate": 3.921768707482993e-05, "loss": 0.5886, "step": 5399 }, { "epoch": 6.912, "grad_norm": 0.7919133901596069, "learning_rate": 3.9215686274509805e-05, "loss": 0.5964, "step": 5400 }, { "epoch": 6.91328, "grad_norm": 0.8205950856208801, "learning_rate": 3.921368547418968e-05, "loss": 0.6063, "step": 5401 }, { "epoch": 6.91456, "grad_norm": 0.7822876572608948, "learning_rate": 3.921168467386955e-05, "loss": 0.5971, "step": 5402 }, { "epoch": 6.91584, "grad_norm": 0.7439231276512146, "learning_rate": 3.920968387354943e-05, "loss": 0.5844, "step": 5403 }, { "epoch": 6.91712, "grad_norm": 0.7648040056228638, "learning_rate": 3.920768307322929e-05, "loss": 0.6627, "step": 5404 }, { "epoch": 6.9184, "grad_norm": 0.7816339135169983, "learning_rate": 3.9205682272909164e-05, "loss": 0.6154, "step": 5405 }, { "epoch": 6.91968, "grad_norm": 0.7725468873977661, "learning_rate": 3.9203681472589036e-05, "loss": 0.6032, "step": 5406 }, { "epoch": 6.92096, "grad_norm": 0.7589234709739685, "learning_rate": 3.920168067226891e-05, "loss": 0.6349, "step": 5407 }, { "epoch": 6.92224, "grad_norm": 0.7441187500953674, "learning_rate": 3.919967987194878e-05, "loss": 0.5901, "step": 5408 }, { "epoch": 6.92352, "grad_norm": 0.8013135194778442, "learning_rate": 3.919767907162865e-05, "loss": 0.6455, "step": 5409 }, { "epoch": 6.9248, "grad_norm": 0.7458381652832031, "learning_rate": 3.919567827130853e-05, "loss": 0.589, "step": 5410 }, { "epoch": 6.92608, "grad_norm": 0.7533714771270752, "learning_rate": 3.91936774709884e-05, "loss": 0.5634, "step": 5411 }, { "epoch": 6.92736, "grad_norm": 0.7798910140991211, "learning_rate": 3.919167667066827e-05, "loss": 0.6227, "step": 5412 }, { "epoch": 6.92864, "grad_norm": 0.7700549960136414, "learning_rate": 3.918967587034814e-05, "loss": 0.6561, "step": 5413 }, { "epoch": 6.92992, "grad_norm": 0.7472692728042603, "learning_rate": 3.918767507002801e-05, "loss": 0.6014, "step": 5414 }, { "epoch": 6.9312000000000005, "grad_norm": 0.7239599227905273, "learning_rate": 3.918567426970788e-05, "loss": 0.5716, "step": 5415 }, { "epoch": 6.93248, "grad_norm": 0.7691959738731384, "learning_rate": 3.9183673469387755e-05, "loss": 0.6603, "step": 5416 }, { "epoch": 6.93376, "grad_norm": 0.8300781846046448, "learning_rate": 3.918167266906763e-05, "loss": 0.6178, "step": 5417 }, { "epoch": 6.93504, "grad_norm": 0.7766384482383728, "learning_rate": 3.9179671868747505e-05, "loss": 0.6278, "step": 5418 }, { "epoch": 6.93632, "grad_norm": 0.7828888297080994, "learning_rate": 3.917767106842738e-05, "loss": 0.6175, "step": 5419 }, { "epoch": 6.9376, "grad_norm": 0.7322559356689453, "learning_rate": 3.917567026810724e-05, "loss": 0.5805, "step": 5420 }, { "epoch": 6.93888, "grad_norm": 0.7959531545639038, "learning_rate": 3.9173669467787114e-05, "loss": 0.6813, "step": 5421 }, { "epoch": 6.94016, "grad_norm": 0.7419981360435486, "learning_rate": 3.9171668667466986e-05, "loss": 0.6288, "step": 5422 }, { "epoch": 6.94144, "grad_norm": 0.7409440875053406, "learning_rate": 3.916966786714686e-05, "loss": 0.5965, "step": 5423 }, { "epoch": 6.94272, "grad_norm": 0.7452877759933472, "learning_rate": 3.916766706682673e-05, "loss": 0.6342, "step": 5424 }, { "epoch": 6.944, "grad_norm": 0.7688354849815369, "learning_rate": 3.916566626650661e-05, "loss": 0.611, "step": 5425 }, { "epoch": 6.94528, "grad_norm": 0.7577010989189148, "learning_rate": 3.916366546618648e-05, "loss": 0.6594, "step": 5426 }, { "epoch": 6.94656, "grad_norm": 0.7698047161102295, "learning_rate": 3.916166466586635e-05, "loss": 0.6134, "step": 5427 }, { "epoch": 6.94784, "grad_norm": 0.7376213073730469, "learning_rate": 3.915966386554622e-05, "loss": 0.6034, "step": 5428 }, { "epoch": 6.94912, "grad_norm": 0.7267906665802002, "learning_rate": 3.915766306522609e-05, "loss": 0.5529, "step": 5429 }, { "epoch": 6.9504, "grad_norm": 0.7915275692939758, "learning_rate": 3.915566226490596e-05, "loss": 0.6388, "step": 5430 }, { "epoch": 6.95168, "grad_norm": 0.8539730906486511, "learning_rate": 3.915366146458583e-05, "loss": 0.6788, "step": 5431 }, { "epoch": 6.95296, "grad_norm": 0.808468222618103, "learning_rate": 3.915166066426571e-05, "loss": 0.6558, "step": 5432 }, { "epoch": 6.95424, "grad_norm": 0.8144415616989136, "learning_rate": 3.914965986394558e-05, "loss": 0.6735, "step": 5433 }, { "epoch": 6.95552, "grad_norm": 0.7447432279586792, "learning_rate": 3.9147659063625455e-05, "loss": 0.5727, "step": 5434 }, { "epoch": 6.9568, "grad_norm": 0.7698988914489746, "learning_rate": 3.914565826330533e-05, "loss": 0.6138, "step": 5435 }, { "epoch": 6.95808, "grad_norm": 0.8165004849433899, "learning_rate": 3.914365746298519e-05, "loss": 0.6303, "step": 5436 }, { "epoch": 6.95936, "grad_norm": 0.771243155002594, "learning_rate": 3.9141656662665064e-05, "loss": 0.6554, "step": 5437 }, { "epoch": 6.96064, "grad_norm": 0.7581053376197815, "learning_rate": 3.9139655862344936e-05, "loss": 0.6171, "step": 5438 }, { "epoch": 6.96192, "grad_norm": 0.7608379125595093, "learning_rate": 3.9137655062024814e-05, "loss": 0.5761, "step": 5439 }, { "epoch": 6.9632, "grad_norm": 0.7522656321525574, "learning_rate": 3.9135654261704686e-05, "loss": 0.5646, "step": 5440 }, { "epoch": 6.96448, "grad_norm": 0.7819010019302368, "learning_rate": 3.913365346138456e-05, "loss": 0.6509, "step": 5441 }, { "epoch": 6.9657599999999995, "grad_norm": 0.783852756023407, "learning_rate": 3.913165266106443e-05, "loss": 0.6482, "step": 5442 }, { "epoch": 6.96704, "grad_norm": 0.7638618350028992, "learning_rate": 3.91296518607443e-05, "loss": 0.6488, "step": 5443 }, { "epoch": 6.96832, "grad_norm": 0.7872533798217773, "learning_rate": 3.912765106042417e-05, "loss": 0.6425, "step": 5444 }, { "epoch": 6.9696, "grad_norm": 0.8017598986625671, "learning_rate": 3.912565026010404e-05, "loss": 0.6332, "step": 5445 }, { "epoch": 6.97088, "grad_norm": 0.784069836139679, "learning_rate": 3.912364945978392e-05, "loss": 0.6307, "step": 5446 }, { "epoch": 6.97216, "grad_norm": 0.8013083338737488, "learning_rate": 3.912164865946379e-05, "loss": 0.6808, "step": 5447 }, { "epoch": 6.97344, "grad_norm": 0.7447139024734497, "learning_rate": 3.911964785914366e-05, "loss": 0.6048, "step": 5448 }, { "epoch": 6.97472, "grad_norm": 0.7573011517524719, "learning_rate": 3.911764705882353e-05, "loss": 0.5808, "step": 5449 }, { "epoch": 6.976, "grad_norm": 0.7706106901168823, "learning_rate": 3.9115646258503405e-05, "loss": 0.5773, "step": 5450 }, { "epoch": 6.97728, "grad_norm": 0.8079164624214172, "learning_rate": 3.9113645458183276e-05, "loss": 0.6569, "step": 5451 }, { "epoch": 6.97856, "grad_norm": 0.720331609249115, "learning_rate": 3.911164465786314e-05, "loss": 0.6154, "step": 5452 }, { "epoch": 6.97984, "grad_norm": 0.7962987422943115, "learning_rate": 3.910964385754302e-05, "loss": 0.6097, "step": 5453 }, { "epoch": 6.98112, "grad_norm": 0.7449235916137695, "learning_rate": 3.910764305722289e-05, "loss": 0.585, "step": 5454 }, { "epoch": 6.9824, "grad_norm": 0.7487616539001465, "learning_rate": 3.9105642256902764e-05, "loss": 0.6123, "step": 5455 }, { "epoch": 6.98368, "grad_norm": 0.775963544845581, "learning_rate": 3.9103641456582636e-05, "loss": 0.6661, "step": 5456 }, { "epoch": 6.98496, "grad_norm": 0.710671603679657, "learning_rate": 3.910164065626251e-05, "loss": 0.5817, "step": 5457 }, { "epoch": 6.9862400000000004, "grad_norm": 0.7331653237342834, "learning_rate": 3.909963985594238e-05, "loss": 0.5741, "step": 5458 }, { "epoch": 6.98752, "grad_norm": 0.7193625569343567, "learning_rate": 3.909763905562225e-05, "loss": 0.5515, "step": 5459 }, { "epoch": 6.9888, "grad_norm": 0.7541481852531433, "learning_rate": 3.909563825530212e-05, "loss": 0.595, "step": 5460 }, { "epoch": 6.99008, "grad_norm": 0.7617225646972656, "learning_rate": 3.9093637454981995e-05, "loss": 0.6112, "step": 5461 }, { "epoch": 6.99136, "grad_norm": 0.7736883163452148, "learning_rate": 3.909163665466187e-05, "loss": 0.6292, "step": 5462 }, { "epoch": 6.99264, "grad_norm": 0.796480655670166, "learning_rate": 3.908963585434174e-05, "loss": 0.6122, "step": 5463 }, { "epoch": 6.99392, "grad_norm": 0.7853350043296814, "learning_rate": 3.908763505402161e-05, "loss": 0.6188, "step": 5464 }, { "epoch": 6.9952, "grad_norm": 0.7956370115280151, "learning_rate": 3.908563425370148e-05, "loss": 0.6699, "step": 5465 }, { "epoch": 6.99648, "grad_norm": 0.7397189736366272, "learning_rate": 3.9083633453381354e-05, "loss": 0.647, "step": 5466 }, { "epoch": 6.9977599999999995, "grad_norm": 0.6985254287719727, "learning_rate": 3.9081632653061226e-05, "loss": 0.5586, "step": 5467 }, { "epoch": 6.99904, "grad_norm": 0.8187921047210693, "learning_rate": 3.90796318527411e-05, "loss": 0.6705, "step": 5468 }, { "epoch": 7.00032, "grad_norm": 1.61716628074646, "learning_rate": 3.907763105242097e-05, "loss": 1.0948, "step": 5469 }, { "epoch": 7.0016, "grad_norm": 0.7572510838508606, "learning_rate": 3.907563025210084e-05, "loss": 0.5787, "step": 5470 }, { "epoch": 7.00288, "grad_norm": 0.7337214350700378, "learning_rate": 3.9073629451780714e-05, "loss": 0.579, "step": 5471 }, { "epoch": 7.00416, "grad_norm": 0.7118316888809204, "learning_rate": 3.9071628651460585e-05, "loss": 0.5866, "step": 5472 }, { "epoch": 7.00544, "grad_norm": 0.7408485412597656, "learning_rate": 3.906962785114046e-05, "loss": 0.6048, "step": 5473 }, { "epoch": 7.00672, "grad_norm": 0.7442348003387451, "learning_rate": 3.906762705082033e-05, "loss": 0.5916, "step": 5474 }, { "epoch": 7.008, "grad_norm": 0.7323760390281677, "learning_rate": 3.90656262505002e-05, "loss": 0.6146, "step": 5475 }, { "epoch": 7.00928, "grad_norm": 0.7773112654685974, "learning_rate": 3.906362545018007e-05, "loss": 0.6156, "step": 5476 }, { "epoch": 7.01056, "grad_norm": 0.7394047975540161, "learning_rate": 3.9061624649859945e-05, "loss": 0.5846, "step": 5477 }, { "epoch": 7.01184, "grad_norm": 0.7753980755805969, "learning_rate": 3.9059623849539817e-05, "loss": 0.6151, "step": 5478 }, { "epoch": 7.01312, "grad_norm": 0.7161932587623596, "learning_rate": 3.905762304921969e-05, "loss": 0.5535, "step": 5479 }, { "epoch": 7.0144, "grad_norm": 0.7652511596679688, "learning_rate": 3.905562224889956e-05, "loss": 0.6409, "step": 5480 }, { "epoch": 7.01568, "grad_norm": 0.7910155653953552, "learning_rate": 3.905362144857944e-05, "loss": 0.6257, "step": 5481 }, { "epoch": 7.01696, "grad_norm": 0.7763440012931824, "learning_rate": 3.9051620648259304e-05, "loss": 0.5843, "step": 5482 }, { "epoch": 7.01824, "grad_norm": 0.806786298751831, "learning_rate": 3.9049619847939176e-05, "loss": 0.5773, "step": 5483 }, { "epoch": 7.01952, "grad_norm": 0.7779539227485657, "learning_rate": 3.904761904761905e-05, "loss": 0.6031, "step": 5484 }, { "epoch": 7.0208, "grad_norm": 0.7639070153236389, "learning_rate": 3.904561824729892e-05, "loss": 0.5415, "step": 5485 }, { "epoch": 7.02208, "grad_norm": 0.7557491064071655, "learning_rate": 3.904361744697879e-05, "loss": 0.5762, "step": 5486 }, { "epoch": 7.02336, "grad_norm": 0.7613850235939026, "learning_rate": 3.904161664665866e-05, "loss": 0.6029, "step": 5487 }, { "epoch": 7.02464, "grad_norm": 0.8083841800689697, "learning_rate": 3.903961584633854e-05, "loss": 0.6372, "step": 5488 }, { "epoch": 7.02592, "grad_norm": 0.7833201885223389, "learning_rate": 3.9037615046018414e-05, "loss": 0.6152, "step": 5489 }, { "epoch": 7.0272, "grad_norm": 0.7935197949409485, "learning_rate": 3.903561424569828e-05, "loss": 0.5762, "step": 5490 }, { "epoch": 7.02848, "grad_norm": 0.7885215878486633, "learning_rate": 3.903361344537815e-05, "loss": 0.6361, "step": 5491 }, { "epoch": 7.02976, "grad_norm": 0.770488440990448, "learning_rate": 3.903161264505802e-05, "loss": 0.6102, "step": 5492 }, { "epoch": 7.03104, "grad_norm": 0.7559887170791626, "learning_rate": 3.9029611844737894e-05, "loss": 0.601, "step": 5493 }, { "epoch": 7.03232, "grad_norm": 0.8430263996124268, "learning_rate": 3.9027611044417766e-05, "loss": 0.6871, "step": 5494 }, { "epoch": 7.0336, "grad_norm": 0.729887068271637, "learning_rate": 3.9025610244097645e-05, "loss": 0.5806, "step": 5495 }, { "epoch": 7.03488, "grad_norm": 0.7851121425628662, "learning_rate": 3.902360944377752e-05, "loss": 0.5748, "step": 5496 }, { "epoch": 7.03616, "grad_norm": 0.7651329040527344, "learning_rate": 3.902160864345739e-05, "loss": 0.6004, "step": 5497 }, { "epoch": 7.03744, "grad_norm": 0.7575582265853882, "learning_rate": 3.9019607843137254e-05, "loss": 0.6509, "step": 5498 }, { "epoch": 7.03872, "grad_norm": 0.7376059293746948, "learning_rate": 3.9017607042817126e-05, "loss": 0.5799, "step": 5499 }, { "epoch": 7.04, "grad_norm": 0.723619282245636, "learning_rate": 3.9015606242497e-05, "loss": 0.6004, "step": 5500 }, { "epoch": 7.04128, "grad_norm": 0.803107738494873, "learning_rate": 3.901360544217687e-05, "loss": 0.5998, "step": 5501 }, { "epoch": 7.04256, "grad_norm": 0.8073145151138306, "learning_rate": 3.901160464185675e-05, "loss": 0.5843, "step": 5502 }, { "epoch": 7.04384, "grad_norm": 0.8166995644569397, "learning_rate": 3.900960384153662e-05, "loss": 0.6536, "step": 5503 }, { "epoch": 7.04512, "grad_norm": 0.7663634419441223, "learning_rate": 3.900760304121649e-05, "loss": 0.6081, "step": 5504 }, { "epoch": 7.0464, "grad_norm": 0.8213377594947815, "learning_rate": 3.9005602240896364e-05, "loss": 0.5922, "step": 5505 }, { "epoch": 7.04768, "grad_norm": 0.7828342318534851, "learning_rate": 3.900360144057623e-05, "loss": 0.6146, "step": 5506 }, { "epoch": 7.04896, "grad_norm": 0.8070378303527832, "learning_rate": 3.90016006402561e-05, "loss": 0.6231, "step": 5507 }, { "epoch": 7.05024, "grad_norm": 0.7503546476364136, "learning_rate": 3.899959983993597e-05, "loss": 0.5776, "step": 5508 }, { "epoch": 7.05152, "grad_norm": 0.8065857291221619, "learning_rate": 3.899759903961585e-05, "loss": 0.6244, "step": 5509 }, { "epoch": 7.0528, "grad_norm": 0.7999621629714966, "learning_rate": 3.899559823929572e-05, "loss": 0.5803, "step": 5510 }, { "epoch": 7.05408, "grad_norm": 0.7979943752288818, "learning_rate": 3.8993597438975595e-05, "loss": 0.5895, "step": 5511 }, { "epoch": 7.05536, "grad_norm": 0.8131271004676819, "learning_rate": 3.8991596638655467e-05, "loss": 0.615, "step": 5512 }, { "epoch": 7.05664, "grad_norm": 0.7797995209693909, "learning_rate": 3.898959583833534e-05, "loss": 0.5712, "step": 5513 }, { "epoch": 7.05792, "grad_norm": 0.8302449584007263, "learning_rate": 3.8987595038015203e-05, "loss": 0.6598, "step": 5514 }, { "epoch": 7.0592, "grad_norm": 0.817923367023468, "learning_rate": 3.8985594237695075e-05, "loss": 0.627, "step": 5515 }, { "epoch": 7.06048, "grad_norm": 0.8435928821563721, "learning_rate": 3.8983593437374954e-05, "loss": 0.6078, "step": 5516 }, { "epoch": 7.06176, "grad_norm": 0.7841606736183167, "learning_rate": 3.8981592637054826e-05, "loss": 0.6069, "step": 5517 }, { "epoch": 7.06304, "grad_norm": 0.7932061553001404, "learning_rate": 3.89795918367347e-05, "loss": 0.5993, "step": 5518 }, { "epoch": 7.06432, "grad_norm": 0.8158665895462036, "learning_rate": 3.897759103641457e-05, "loss": 0.6412, "step": 5519 }, { "epoch": 7.0656, "grad_norm": 0.7837883830070496, "learning_rate": 3.897559023609444e-05, "loss": 0.6011, "step": 5520 }, { "epoch": 7.06688, "grad_norm": 0.8409795761108398, "learning_rate": 3.897358943577431e-05, "loss": 0.5954, "step": 5521 }, { "epoch": 7.06816, "grad_norm": 0.818547785282135, "learning_rate": 3.897158863545418e-05, "loss": 0.6266, "step": 5522 }, { "epoch": 7.06944, "grad_norm": 0.8386947512626648, "learning_rate": 3.896958783513406e-05, "loss": 0.6724, "step": 5523 }, { "epoch": 7.07072, "grad_norm": 0.7939981818199158, "learning_rate": 3.896758703481393e-05, "loss": 0.571, "step": 5524 }, { "epoch": 7.072, "grad_norm": 0.8104544281959534, "learning_rate": 3.89655862344938e-05, "loss": 0.5906, "step": 5525 }, { "epoch": 7.07328, "grad_norm": 0.805988609790802, "learning_rate": 3.896358543417367e-05, "loss": 0.6188, "step": 5526 }, { "epoch": 7.07456, "grad_norm": 0.7390158176422119, "learning_rate": 3.8961584633853544e-05, "loss": 0.5718, "step": 5527 }, { "epoch": 7.07584, "grad_norm": 0.798747181892395, "learning_rate": 3.8959583833533416e-05, "loss": 0.5949, "step": 5528 }, { "epoch": 7.07712, "grad_norm": 0.8065986633300781, "learning_rate": 3.895758303321329e-05, "loss": 0.6211, "step": 5529 }, { "epoch": 7.0784, "grad_norm": 0.8075457215309143, "learning_rate": 3.895558223289316e-05, "loss": 0.5692, "step": 5530 }, { "epoch": 7.07968, "grad_norm": 0.8485049605369568, "learning_rate": 3.895358143257303e-05, "loss": 0.6845, "step": 5531 }, { "epoch": 7.08096, "grad_norm": 0.7971506118774414, "learning_rate": 3.8951580632252904e-05, "loss": 0.615, "step": 5532 }, { "epoch": 7.08224, "grad_norm": 0.7371413707733154, "learning_rate": 3.8949579831932776e-05, "loss": 0.5338, "step": 5533 }, { "epoch": 7.08352, "grad_norm": 0.7684740424156189, "learning_rate": 3.894757903161265e-05, "loss": 0.5948, "step": 5534 }, { "epoch": 7.0848, "grad_norm": 0.8026983737945557, "learning_rate": 3.894557823129252e-05, "loss": 0.5749, "step": 5535 }, { "epoch": 7.08608, "grad_norm": 0.8087561726570129, "learning_rate": 3.894357743097239e-05, "loss": 0.604, "step": 5536 }, { "epoch": 7.08736, "grad_norm": 0.7562788724899292, "learning_rate": 3.894157663065226e-05, "loss": 0.5552, "step": 5537 }, { "epoch": 7.08864, "grad_norm": 0.8130525946617126, "learning_rate": 3.8939575830332135e-05, "loss": 0.6516, "step": 5538 }, { "epoch": 7.08992, "grad_norm": 0.7703153491020203, "learning_rate": 3.893757503001201e-05, "loss": 0.5952, "step": 5539 }, { "epoch": 7.0912, "grad_norm": 0.8727670311927795, "learning_rate": 3.893557422969188e-05, "loss": 0.6897, "step": 5540 }, { "epoch": 7.09248, "grad_norm": 0.7629865407943726, "learning_rate": 3.893357342937175e-05, "loss": 0.6019, "step": 5541 }, { "epoch": 7.09376, "grad_norm": 0.7899230122566223, "learning_rate": 3.893157262905162e-05, "loss": 0.6197, "step": 5542 }, { "epoch": 7.09504, "grad_norm": 0.7999109625816345, "learning_rate": 3.8929571828731494e-05, "loss": 0.6019, "step": 5543 }, { "epoch": 7.09632, "grad_norm": 0.7331894040107727, "learning_rate": 3.8927571028411366e-05, "loss": 0.5198, "step": 5544 }, { "epoch": 7.0976, "grad_norm": 0.8195477724075317, "learning_rate": 3.892557022809124e-05, "loss": 0.6371, "step": 5545 }, { "epoch": 7.09888, "grad_norm": 0.8432417511940002, "learning_rate": 3.892356942777111e-05, "loss": 0.6299, "step": 5546 }, { "epoch": 7.10016, "grad_norm": 0.8083561062812805, "learning_rate": 3.892156862745098e-05, "loss": 0.587, "step": 5547 }, { "epoch": 7.10144, "grad_norm": 0.8173826932907104, "learning_rate": 3.891956782713085e-05, "loss": 0.6131, "step": 5548 }, { "epoch": 7.10272, "grad_norm": 0.786679208278656, "learning_rate": 3.8917567026810725e-05, "loss": 0.5952, "step": 5549 }, { "epoch": 7.104, "grad_norm": 0.7349452376365662, "learning_rate": 3.89155662264906e-05, "loss": 0.5611, "step": 5550 }, { "epoch": 7.10528, "grad_norm": 0.7959557771682739, "learning_rate": 3.891356542617047e-05, "loss": 0.6117, "step": 5551 }, { "epoch": 7.10656, "grad_norm": 0.7580567002296448, "learning_rate": 3.891156462585034e-05, "loss": 0.6021, "step": 5552 }, { "epoch": 7.10784, "grad_norm": 0.797226071357727, "learning_rate": 3.890956382553021e-05, "loss": 0.6083, "step": 5553 }, { "epoch": 7.10912, "grad_norm": 0.8558824062347412, "learning_rate": 3.8907563025210084e-05, "loss": 0.6534, "step": 5554 }, { "epoch": 7.1104, "grad_norm": 0.7972661256790161, "learning_rate": 3.8905562224889956e-05, "loss": 0.6106, "step": 5555 }, { "epoch": 7.11168, "grad_norm": 0.8266356587409973, "learning_rate": 3.890356142456983e-05, "loss": 0.6189, "step": 5556 }, { "epoch": 7.11296, "grad_norm": 0.8113858103752136, "learning_rate": 3.89015606242497e-05, "loss": 0.5916, "step": 5557 }, { "epoch": 7.11424, "grad_norm": 0.7940702438354492, "learning_rate": 3.889955982392957e-05, "loss": 0.6358, "step": 5558 }, { "epoch": 7.11552, "grad_norm": 0.8410483598709106, "learning_rate": 3.889755902360945e-05, "loss": 0.6397, "step": 5559 }, { "epoch": 7.1168, "grad_norm": 0.897203266620636, "learning_rate": 3.8895558223289316e-05, "loss": 0.6482, "step": 5560 }, { "epoch": 7.11808, "grad_norm": 0.781025230884552, "learning_rate": 3.889355742296919e-05, "loss": 0.5471, "step": 5561 }, { "epoch": 7.11936, "grad_norm": 0.7567470073699951, "learning_rate": 3.889155662264906e-05, "loss": 0.5391, "step": 5562 }, { "epoch": 7.12064, "grad_norm": 0.8244337439537048, "learning_rate": 3.888955582232893e-05, "loss": 0.6146, "step": 5563 }, { "epoch": 7.12192, "grad_norm": 0.8109177947044373, "learning_rate": 3.88875550220088e-05, "loss": 0.5691, "step": 5564 }, { "epoch": 7.1232, "grad_norm": 0.7812331914901733, "learning_rate": 3.8885554221688675e-05, "loss": 0.6075, "step": 5565 }, { "epoch": 7.12448, "grad_norm": 0.7723196148872375, "learning_rate": 3.8883553421368554e-05, "loss": 0.6153, "step": 5566 }, { "epoch": 7.12576, "grad_norm": 0.790398120880127, "learning_rate": 3.8881552621048425e-05, "loss": 0.6043, "step": 5567 }, { "epoch": 7.12704, "grad_norm": 0.7210956811904907, "learning_rate": 3.887955182072829e-05, "loss": 0.5861, "step": 5568 }, { "epoch": 7.12832, "grad_norm": 0.7791348695755005, "learning_rate": 3.887755102040816e-05, "loss": 0.618, "step": 5569 }, { "epoch": 7.1296, "grad_norm": 0.793732225894928, "learning_rate": 3.8875550220088034e-05, "loss": 0.6127, "step": 5570 }, { "epoch": 7.13088, "grad_norm": 0.7834117412567139, "learning_rate": 3.8873549419767906e-05, "loss": 0.5885, "step": 5571 }, { "epoch": 7.13216, "grad_norm": 0.8224747180938721, "learning_rate": 3.887154861944778e-05, "loss": 0.6073, "step": 5572 }, { "epoch": 7.13344, "grad_norm": 0.7827395796775818, "learning_rate": 3.8869547819127657e-05, "loss": 0.5888, "step": 5573 }, { "epoch": 7.13472, "grad_norm": 0.7737107872962952, "learning_rate": 3.886754701880753e-05, "loss": 0.6167, "step": 5574 }, { "epoch": 7.136, "grad_norm": 0.8110948801040649, "learning_rate": 3.88655462184874e-05, "loss": 0.6296, "step": 5575 }, { "epoch": 7.13728, "grad_norm": 0.7978016138076782, "learning_rate": 3.8863545418167265e-05, "loss": 0.6459, "step": 5576 }, { "epoch": 7.13856, "grad_norm": 0.7949130535125732, "learning_rate": 3.886154461784714e-05, "loss": 0.622, "step": 5577 }, { "epoch": 7.13984, "grad_norm": 0.7578536868095398, "learning_rate": 3.885954381752701e-05, "loss": 0.5487, "step": 5578 }, { "epoch": 7.14112, "grad_norm": 0.778209388256073, "learning_rate": 3.885754301720688e-05, "loss": 0.6221, "step": 5579 }, { "epoch": 7.1424, "grad_norm": 0.7351183891296387, "learning_rate": 3.885554221688676e-05, "loss": 0.5419, "step": 5580 }, { "epoch": 7.14368, "grad_norm": 0.811617374420166, "learning_rate": 3.885354141656663e-05, "loss": 0.6545, "step": 5581 }, { "epoch": 7.14496, "grad_norm": 0.7293868064880371, "learning_rate": 3.88515406162465e-05, "loss": 0.5763, "step": 5582 }, { "epoch": 7.14624, "grad_norm": 0.7808999419212341, "learning_rate": 3.8849539815926375e-05, "loss": 0.5435, "step": 5583 }, { "epoch": 7.14752, "grad_norm": 0.7816929221153259, "learning_rate": 3.884753901560624e-05, "loss": 0.6055, "step": 5584 }, { "epoch": 7.1488, "grad_norm": 0.8008720278739929, "learning_rate": 3.884553821528611e-05, "loss": 0.6337, "step": 5585 }, { "epoch": 7.15008, "grad_norm": 0.7841701507568359, "learning_rate": 3.8843537414965984e-05, "loss": 0.6371, "step": 5586 }, { "epoch": 7.15136, "grad_norm": 0.7534654140472412, "learning_rate": 3.884153661464586e-05, "loss": 0.6057, "step": 5587 }, { "epoch": 7.15264, "grad_norm": 0.7844194769859314, "learning_rate": 3.8839535814325734e-05, "loss": 0.6094, "step": 5588 }, { "epoch": 7.15392, "grad_norm": 0.8300028443336487, "learning_rate": 3.8837535014005606e-05, "loss": 0.6388, "step": 5589 }, { "epoch": 7.1552, "grad_norm": 0.7904089093208313, "learning_rate": 3.883553421368548e-05, "loss": 0.601, "step": 5590 }, { "epoch": 7.15648, "grad_norm": 0.7863451838493347, "learning_rate": 3.883353341336535e-05, "loss": 0.6357, "step": 5591 }, { "epoch": 7.15776, "grad_norm": 0.7628374695777893, "learning_rate": 3.8831532613045215e-05, "loss": 0.5687, "step": 5592 }, { "epoch": 7.15904, "grad_norm": 0.8106046915054321, "learning_rate": 3.882953181272509e-05, "loss": 0.642, "step": 5593 }, { "epoch": 7.16032, "grad_norm": 0.7651321887969971, "learning_rate": 3.8827531012404966e-05, "loss": 0.608, "step": 5594 }, { "epoch": 7.1616, "grad_norm": 0.7789268493652344, "learning_rate": 3.882553021208484e-05, "loss": 0.6187, "step": 5595 }, { "epoch": 7.16288, "grad_norm": 0.7903891801834106, "learning_rate": 3.882352941176471e-05, "loss": 0.5915, "step": 5596 }, { "epoch": 7.16416, "grad_norm": 0.7863123416900635, "learning_rate": 3.882152861144458e-05, "loss": 0.6097, "step": 5597 }, { "epoch": 7.16544, "grad_norm": 0.7845132350921631, "learning_rate": 3.881952781112445e-05, "loss": 0.5963, "step": 5598 }, { "epoch": 7.16672, "grad_norm": 0.7682058811187744, "learning_rate": 3.8817527010804325e-05, "loss": 0.5804, "step": 5599 }, { "epoch": 7.168, "grad_norm": 0.7563507556915283, "learning_rate": 3.881552621048419e-05, "loss": 0.5514, "step": 5600 }, { "epoch": 7.16928, "grad_norm": 0.7621744275093079, "learning_rate": 3.881352541016407e-05, "loss": 0.5595, "step": 5601 }, { "epoch": 7.17056, "grad_norm": 0.773902416229248, "learning_rate": 3.881152460984394e-05, "loss": 0.5483, "step": 5602 }, { "epoch": 7.1718399999999995, "grad_norm": 0.7652880549430847, "learning_rate": 3.880952380952381e-05, "loss": 0.5439, "step": 5603 }, { "epoch": 7.17312, "grad_norm": 0.8013029098510742, "learning_rate": 3.8807523009203684e-05, "loss": 0.583, "step": 5604 }, { "epoch": 7.1744, "grad_norm": 0.7590806484222412, "learning_rate": 3.8805522208883556e-05, "loss": 0.569, "step": 5605 }, { "epoch": 7.17568, "grad_norm": 0.8396809697151184, "learning_rate": 3.880352140856343e-05, "loss": 0.6549, "step": 5606 }, { "epoch": 7.17696, "grad_norm": 0.796314537525177, "learning_rate": 3.88015206082433e-05, "loss": 0.6438, "step": 5607 }, { "epoch": 7.17824, "grad_norm": 0.7749351859092712, "learning_rate": 3.879951980792317e-05, "loss": 0.5674, "step": 5608 }, { "epoch": 7.17952, "grad_norm": 0.7739863395690918, "learning_rate": 3.8797519007603043e-05, "loss": 0.5771, "step": 5609 }, { "epoch": 7.1808, "grad_norm": 0.808387279510498, "learning_rate": 3.8795518207282915e-05, "loss": 0.5839, "step": 5610 }, { "epoch": 7.18208, "grad_norm": 0.7463328838348389, "learning_rate": 3.879351740696279e-05, "loss": 0.5557, "step": 5611 }, { "epoch": 7.18336, "grad_norm": 0.7860255837440491, "learning_rate": 3.879151660664266e-05, "loss": 0.5787, "step": 5612 }, { "epoch": 7.18464, "grad_norm": 0.841917097568512, "learning_rate": 3.878951580632253e-05, "loss": 0.6375, "step": 5613 }, { "epoch": 7.18592, "grad_norm": 0.8427509665489197, "learning_rate": 3.87875150060024e-05, "loss": 0.6356, "step": 5614 }, { "epoch": 7.1872, "grad_norm": 0.8131973147392273, "learning_rate": 3.8785514205682275e-05, "loss": 0.6143, "step": 5615 }, { "epoch": 7.18848, "grad_norm": 0.8563538193702698, "learning_rate": 3.8783513405362146e-05, "loss": 0.6376, "step": 5616 }, { "epoch": 7.18976, "grad_norm": 0.8263671398162842, "learning_rate": 3.878151260504202e-05, "loss": 0.5873, "step": 5617 }, { "epoch": 7.19104, "grad_norm": 0.7478400468826294, "learning_rate": 3.877951180472189e-05, "loss": 0.5703, "step": 5618 }, { "epoch": 7.19232, "grad_norm": 0.7577512860298157, "learning_rate": 3.877751100440176e-05, "loss": 0.5557, "step": 5619 }, { "epoch": 7.1936, "grad_norm": 0.7820968627929688, "learning_rate": 3.8775510204081634e-05, "loss": 0.5891, "step": 5620 }, { "epoch": 7.19488, "grad_norm": 0.7701216340065002, "learning_rate": 3.8773509403761506e-05, "loss": 0.6057, "step": 5621 }, { "epoch": 7.19616, "grad_norm": 0.7920276522636414, "learning_rate": 3.8771508603441384e-05, "loss": 0.5919, "step": 5622 }, { "epoch": 7.19744, "grad_norm": 0.8216814398765564, "learning_rate": 3.876950780312125e-05, "loss": 0.6177, "step": 5623 }, { "epoch": 7.19872, "grad_norm": 0.8043842911720276, "learning_rate": 3.876750700280112e-05, "loss": 0.5935, "step": 5624 }, { "epoch": 7.2, "grad_norm": 0.7516394853591919, "learning_rate": 3.876550620248099e-05, "loss": 0.5912, "step": 5625 }, { "epoch": 7.20128, "grad_norm": 0.7623422145843506, "learning_rate": 3.8763505402160865e-05, "loss": 0.5907, "step": 5626 }, { "epoch": 7.20256, "grad_norm": 0.8073017597198486, "learning_rate": 3.876150460184074e-05, "loss": 0.6344, "step": 5627 }, { "epoch": 7.20384, "grad_norm": 0.7998178005218506, "learning_rate": 3.875950380152061e-05, "loss": 0.6338, "step": 5628 }, { "epoch": 7.20512, "grad_norm": 0.8303821086883545, "learning_rate": 3.875750300120049e-05, "loss": 0.6152, "step": 5629 }, { "epoch": 7.2064, "grad_norm": 0.758705735206604, "learning_rate": 3.875550220088036e-05, "loss": 0.5706, "step": 5630 }, { "epoch": 7.20768, "grad_norm": 0.7329617738723755, "learning_rate": 3.8753501400560224e-05, "loss": 0.5728, "step": 5631 }, { "epoch": 7.20896, "grad_norm": 0.7714441418647766, "learning_rate": 3.8751500600240096e-05, "loss": 0.6087, "step": 5632 }, { "epoch": 7.21024, "grad_norm": 0.8715619444847107, "learning_rate": 3.874949979991997e-05, "loss": 0.6635, "step": 5633 }, { "epoch": 7.21152, "grad_norm": 0.7906916737556458, "learning_rate": 3.874749899959984e-05, "loss": 0.5903, "step": 5634 }, { "epoch": 7.2128, "grad_norm": 0.7834184765815735, "learning_rate": 3.874549819927971e-05, "loss": 0.6234, "step": 5635 }, { "epoch": 7.21408, "grad_norm": 0.7641972899436951, "learning_rate": 3.874349739895959e-05, "loss": 0.5475, "step": 5636 }, { "epoch": 7.21536, "grad_norm": 0.7720842957496643, "learning_rate": 3.874149659863946e-05, "loss": 0.5907, "step": 5637 }, { "epoch": 7.21664, "grad_norm": 0.8000057339668274, "learning_rate": 3.8739495798319334e-05, "loss": 0.6073, "step": 5638 }, { "epoch": 7.21792, "grad_norm": 0.7984986901283264, "learning_rate": 3.87374949979992e-05, "loss": 0.5555, "step": 5639 }, { "epoch": 7.2192, "grad_norm": 0.7842923998832703, "learning_rate": 3.873549419767907e-05, "loss": 0.5406, "step": 5640 }, { "epoch": 7.22048, "grad_norm": 0.7952711582183838, "learning_rate": 3.873349339735894e-05, "loss": 0.6319, "step": 5641 }, { "epoch": 7.22176, "grad_norm": 0.7931072115898132, "learning_rate": 3.8731492597038815e-05, "loss": 0.623, "step": 5642 }, { "epoch": 7.22304, "grad_norm": 0.8495784997940063, "learning_rate": 3.872949179671869e-05, "loss": 0.6935, "step": 5643 }, { "epoch": 7.22432, "grad_norm": 0.7872684001922607, "learning_rate": 3.8727490996398565e-05, "loss": 0.608, "step": 5644 }, { "epoch": 7.2256, "grad_norm": 0.7937207221984863, "learning_rate": 3.872549019607844e-05, "loss": 0.6136, "step": 5645 }, { "epoch": 7.22688, "grad_norm": 0.8002104163169861, "learning_rate": 3.872348939575831e-05, "loss": 0.6445, "step": 5646 }, { "epoch": 7.22816, "grad_norm": 0.7604816555976868, "learning_rate": 3.8721488595438174e-05, "loss": 0.5837, "step": 5647 }, { "epoch": 7.22944, "grad_norm": 0.804574728012085, "learning_rate": 3.8719487795118046e-05, "loss": 0.6667, "step": 5648 }, { "epoch": 7.23072, "grad_norm": 0.7907571792602539, "learning_rate": 3.871748699479792e-05, "loss": 0.6271, "step": 5649 }, { "epoch": 7.232, "grad_norm": 0.7569406628608704, "learning_rate": 3.871548619447779e-05, "loss": 0.5681, "step": 5650 }, { "epoch": 7.23328, "grad_norm": 0.7395563721656799, "learning_rate": 3.871348539415767e-05, "loss": 0.5615, "step": 5651 }, { "epoch": 7.23456, "grad_norm": 0.7570566534996033, "learning_rate": 3.871148459383754e-05, "loss": 0.5757, "step": 5652 }, { "epoch": 7.23584, "grad_norm": 0.7983739376068115, "learning_rate": 3.870948379351741e-05, "loss": 0.5978, "step": 5653 }, { "epoch": 7.23712, "grad_norm": 0.787769079208374, "learning_rate": 3.8707482993197284e-05, "loss": 0.5987, "step": 5654 }, { "epoch": 7.2384, "grad_norm": 0.8369766473770142, "learning_rate": 3.870548219287715e-05, "loss": 0.6217, "step": 5655 }, { "epoch": 7.23968, "grad_norm": 0.7803592681884766, "learning_rate": 3.870348139255702e-05, "loss": 0.5908, "step": 5656 }, { "epoch": 7.24096, "grad_norm": 0.7458162307739258, "learning_rate": 3.870148059223689e-05, "loss": 0.5794, "step": 5657 }, { "epoch": 7.24224, "grad_norm": 0.7700108885765076, "learning_rate": 3.869947979191677e-05, "loss": 0.6299, "step": 5658 }, { "epoch": 7.24352, "grad_norm": 0.7748832702636719, "learning_rate": 3.869747899159664e-05, "loss": 0.5857, "step": 5659 }, { "epoch": 7.2448, "grad_norm": 0.7574285864830017, "learning_rate": 3.8695478191276515e-05, "loss": 0.5838, "step": 5660 }, { "epoch": 7.24608, "grad_norm": 0.8172944188117981, "learning_rate": 3.869347739095639e-05, "loss": 0.6803, "step": 5661 }, { "epoch": 7.24736, "grad_norm": 0.79927659034729, "learning_rate": 3.869147659063626e-05, "loss": 0.5985, "step": 5662 }, { "epoch": 7.24864, "grad_norm": 0.7518450617790222, "learning_rate": 3.8689475790316124e-05, "loss": 0.5637, "step": 5663 }, { "epoch": 7.24992, "grad_norm": 0.7577493190765381, "learning_rate": 3.8687474989995996e-05, "loss": 0.5303, "step": 5664 }, { "epoch": 7.2512, "grad_norm": 0.771763801574707, "learning_rate": 3.8685474189675874e-05, "loss": 0.5408, "step": 5665 }, { "epoch": 7.25248, "grad_norm": 0.8028543591499329, "learning_rate": 3.8683473389355746e-05, "loss": 0.5607, "step": 5666 }, { "epoch": 7.25376, "grad_norm": 0.8177222013473511, "learning_rate": 3.868147258903562e-05, "loss": 0.5871, "step": 5667 }, { "epoch": 7.25504, "grad_norm": 0.772655189037323, "learning_rate": 3.867947178871549e-05, "loss": 0.5901, "step": 5668 }, { "epoch": 7.25632, "grad_norm": 0.8063568472862244, "learning_rate": 3.867747098839536e-05, "loss": 0.5927, "step": 5669 }, { "epoch": 7.2576, "grad_norm": 0.8081983327865601, "learning_rate": 3.8675470188075233e-05, "loss": 0.6183, "step": 5670 }, { "epoch": 7.2588799999999996, "grad_norm": 0.7956660985946655, "learning_rate": 3.86734693877551e-05, "loss": 0.5789, "step": 5671 }, { "epoch": 7.26016, "grad_norm": 0.7728344798088074, "learning_rate": 3.867146858743498e-05, "loss": 0.5848, "step": 5672 }, { "epoch": 7.26144, "grad_norm": 0.7772632837295532, "learning_rate": 3.866946778711485e-05, "loss": 0.5777, "step": 5673 }, { "epoch": 7.26272, "grad_norm": 0.7768100500106812, "learning_rate": 3.866746698679472e-05, "loss": 0.5562, "step": 5674 }, { "epoch": 7.264, "grad_norm": 0.7781060934066772, "learning_rate": 3.866546618647459e-05, "loss": 0.5728, "step": 5675 }, { "epoch": 7.26528, "grad_norm": 0.810567319393158, "learning_rate": 3.8663465386154465e-05, "loss": 0.5866, "step": 5676 }, { "epoch": 7.26656, "grad_norm": 0.8131520748138428, "learning_rate": 3.8661464585834336e-05, "loss": 0.6461, "step": 5677 }, { "epoch": 7.26784, "grad_norm": 0.8413848876953125, "learning_rate": 3.865946378551421e-05, "loss": 0.6273, "step": 5678 }, { "epoch": 7.26912, "grad_norm": 0.7927761673927307, "learning_rate": 3.865746298519408e-05, "loss": 0.5788, "step": 5679 }, { "epoch": 7.2704, "grad_norm": 0.7904192805290222, "learning_rate": 3.865546218487395e-05, "loss": 0.5934, "step": 5680 }, { "epoch": 7.27168, "grad_norm": 0.8133504986763, "learning_rate": 3.8653461384553824e-05, "loss": 0.5641, "step": 5681 }, { "epoch": 7.27296, "grad_norm": 0.8121780157089233, "learning_rate": 3.8651460584233696e-05, "loss": 0.6126, "step": 5682 }, { "epoch": 7.27424, "grad_norm": 0.850824773311615, "learning_rate": 3.864945978391357e-05, "loss": 0.6447, "step": 5683 }, { "epoch": 7.27552, "grad_norm": 0.7701993584632874, "learning_rate": 3.864745898359344e-05, "loss": 0.5702, "step": 5684 }, { "epoch": 7.2768, "grad_norm": 0.8420401811599731, "learning_rate": 3.864545818327331e-05, "loss": 0.6527, "step": 5685 }, { "epoch": 7.27808, "grad_norm": 0.7517603039741516, "learning_rate": 3.864345738295318e-05, "loss": 0.5792, "step": 5686 }, { "epoch": 7.27936, "grad_norm": 0.877324640750885, "learning_rate": 3.8641456582633055e-05, "loss": 0.6342, "step": 5687 }, { "epoch": 7.28064, "grad_norm": 0.7491464614868164, "learning_rate": 3.863945578231293e-05, "loss": 0.5504, "step": 5688 }, { "epoch": 7.28192, "grad_norm": 0.7788039445877075, "learning_rate": 3.86374549819928e-05, "loss": 0.6107, "step": 5689 }, { "epoch": 7.2832, "grad_norm": 0.8021331429481506, "learning_rate": 3.863545418167267e-05, "loss": 0.6289, "step": 5690 }, { "epoch": 7.28448, "grad_norm": 0.7916176915168762, "learning_rate": 3.863345338135254e-05, "loss": 0.5962, "step": 5691 }, { "epoch": 7.28576, "grad_norm": 0.7986366152763367, "learning_rate": 3.8631452581032414e-05, "loss": 0.5942, "step": 5692 }, { "epoch": 7.28704, "grad_norm": 0.8029093742370605, "learning_rate": 3.8629451780712286e-05, "loss": 0.6185, "step": 5693 }, { "epoch": 7.28832, "grad_norm": 0.7949596047401428, "learning_rate": 3.862745098039216e-05, "loss": 0.6337, "step": 5694 }, { "epoch": 7.2896, "grad_norm": 0.7814388871192932, "learning_rate": 3.862545018007203e-05, "loss": 0.6188, "step": 5695 }, { "epoch": 7.29088, "grad_norm": 0.8712118268013, "learning_rate": 3.86234493797519e-05, "loss": 0.6191, "step": 5696 }, { "epoch": 7.29216, "grad_norm": 0.769758939743042, "learning_rate": 3.8621448579431774e-05, "loss": 0.5759, "step": 5697 }, { "epoch": 7.29344, "grad_norm": 0.7669387459754944, "learning_rate": 3.8619447779111645e-05, "loss": 0.5509, "step": 5698 }, { "epoch": 7.29472, "grad_norm": 0.7945777177810669, "learning_rate": 3.861744697879152e-05, "loss": 0.6417, "step": 5699 }, { "epoch": 7.296, "grad_norm": 0.764313280582428, "learning_rate": 3.8615446178471396e-05, "loss": 0.5851, "step": 5700 }, { "epoch": 7.29728, "grad_norm": 0.7636333107948303, "learning_rate": 3.861344537815126e-05, "loss": 0.5799, "step": 5701 }, { "epoch": 7.29856, "grad_norm": 0.7744512557983398, "learning_rate": 3.861144457783113e-05, "loss": 0.5415, "step": 5702 }, { "epoch": 7.29984, "grad_norm": 0.7718966007232666, "learning_rate": 3.8609443777511005e-05, "loss": 0.6007, "step": 5703 }, { "epoch": 7.30112, "grad_norm": 0.7986489534378052, "learning_rate": 3.8607442977190877e-05, "loss": 0.6244, "step": 5704 }, { "epoch": 7.3024000000000004, "grad_norm": 0.7697789072990417, "learning_rate": 3.860544217687075e-05, "loss": 0.6394, "step": 5705 }, { "epoch": 7.30368, "grad_norm": 0.781538188457489, "learning_rate": 3.860344137655062e-05, "loss": 0.6007, "step": 5706 }, { "epoch": 7.30496, "grad_norm": 0.7859370112419128, "learning_rate": 3.86014405762305e-05, "loss": 0.6103, "step": 5707 }, { "epoch": 7.30624, "grad_norm": 0.8621463775634766, "learning_rate": 3.859943977591037e-05, "loss": 0.6401, "step": 5708 }, { "epoch": 7.30752, "grad_norm": 0.8109675645828247, "learning_rate": 3.8597438975590236e-05, "loss": 0.611, "step": 5709 }, { "epoch": 7.3088, "grad_norm": 0.7787231206893921, "learning_rate": 3.859543817527011e-05, "loss": 0.5673, "step": 5710 }, { "epoch": 7.31008, "grad_norm": 0.7826237678527832, "learning_rate": 3.859343737494998e-05, "loss": 0.6092, "step": 5711 }, { "epoch": 7.31136, "grad_norm": 0.8043519258499146, "learning_rate": 3.859143657462985e-05, "loss": 0.6405, "step": 5712 }, { "epoch": 7.31264, "grad_norm": 0.7601256370544434, "learning_rate": 3.858943577430972e-05, "loss": 0.5687, "step": 5713 }, { "epoch": 7.3139199999999995, "grad_norm": 0.8053227663040161, "learning_rate": 3.85874349739896e-05, "loss": 0.6097, "step": 5714 }, { "epoch": 7.3152, "grad_norm": 0.7957972288131714, "learning_rate": 3.8585434173669474e-05, "loss": 0.6215, "step": 5715 }, { "epoch": 7.31648, "grad_norm": 0.7682824730873108, "learning_rate": 3.8583433373349346e-05, "loss": 0.6002, "step": 5716 }, { "epoch": 7.31776, "grad_norm": 0.7858462333679199, "learning_rate": 3.858143257302921e-05, "loss": 0.6155, "step": 5717 }, { "epoch": 7.31904, "grad_norm": 0.7957653999328613, "learning_rate": 3.857943177270908e-05, "loss": 0.5728, "step": 5718 }, { "epoch": 7.32032, "grad_norm": 0.7708908915519714, "learning_rate": 3.8577430972388954e-05, "loss": 0.6013, "step": 5719 }, { "epoch": 7.3216, "grad_norm": 0.7898252010345459, "learning_rate": 3.8575430172068826e-05, "loss": 0.6068, "step": 5720 }, { "epoch": 7.32288, "grad_norm": 0.8056557178497314, "learning_rate": 3.8573429371748705e-05, "loss": 0.6074, "step": 5721 }, { "epoch": 7.32416, "grad_norm": 0.7734809517860413, "learning_rate": 3.857142857142858e-05, "loss": 0.6006, "step": 5722 }, { "epoch": 7.32544, "grad_norm": 0.8016178607940674, "learning_rate": 3.856942777110845e-05, "loss": 0.6086, "step": 5723 }, { "epoch": 7.32672, "grad_norm": 0.8026044964790344, "learning_rate": 3.856742697078832e-05, "loss": 0.6075, "step": 5724 }, { "epoch": 7.328, "grad_norm": 0.7688097953796387, "learning_rate": 3.8565426170468186e-05, "loss": 0.5736, "step": 5725 }, { "epoch": 7.32928, "grad_norm": 0.7733714580535889, "learning_rate": 3.856342537014806e-05, "loss": 0.5723, "step": 5726 }, { "epoch": 7.33056, "grad_norm": 0.7920982837677002, "learning_rate": 3.856142456982793e-05, "loss": 0.6015, "step": 5727 }, { "epoch": 7.33184, "grad_norm": 0.8239418864250183, "learning_rate": 3.855942376950781e-05, "loss": 0.5999, "step": 5728 }, { "epoch": 7.33312, "grad_norm": 0.7626153826713562, "learning_rate": 3.855742296918768e-05, "loss": 0.5769, "step": 5729 }, { "epoch": 7.3344, "grad_norm": 0.7836630344390869, "learning_rate": 3.855542216886755e-05, "loss": 0.5713, "step": 5730 }, { "epoch": 7.33568, "grad_norm": 0.8354891538619995, "learning_rate": 3.8553421368547423e-05, "loss": 0.5959, "step": 5731 }, { "epoch": 7.33696, "grad_norm": 0.8115443587303162, "learning_rate": 3.8551420568227295e-05, "loss": 0.581, "step": 5732 }, { "epoch": 7.33824, "grad_norm": 0.787783682346344, "learning_rate": 3.854941976790716e-05, "loss": 0.6043, "step": 5733 }, { "epoch": 7.33952, "grad_norm": 0.7985721230506897, "learning_rate": 3.854741896758703e-05, "loss": 0.6119, "step": 5734 }, { "epoch": 7.3408, "grad_norm": 0.8209099173545837, "learning_rate": 3.854541816726691e-05, "loss": 0.6216, "step": 5735 }, { "epoch": 7.34208, "grad_norm": 0.7716638445854187, "learning_rate": 3.854341736694678e-05, "loss": 0.5866, "step": 5736 }, { "epoch": 7.34336, "grad_norm": 0.7463488578796387, "learning_rate": 3.8541416566626655e-05, "loss": 0.5584, "step": 5737 }, { "epoch": 7.34464, "grad_norm": 0.7812283635139465, "learning_rate": 3.8539415766306526e-05, "loss": 0.6046, "step": 5738 }, { "epoch": 7.34592, "grad_norm": 0.830808699131012, "learning_rate": 3.85374149659864e-05, "loss": 0.6134, "step": 5739 }, { "epoch": 7.3472, "grad_norm": 0.802952766418457, "learning_rate": 3.853541416566627e-05, "loss": 0.6095, "step": 5740 }, { "epoch": 7.34848, "grad_norm": 0.8335705995559692, "learning_rate": 3.8533413365346135e-05, "loss": 0.6473, "step": 5741 }, { "epoch": 7.34976, "grad_norm": 0.8194312453269958, "learning_rate": 3.8531412565026014e-05, "loss": 0.6392, "step": 5742 }, { "epoch": 7.35104, "grad_norm": 0.8065541386604309, "learning_rate": 3.8529411764705886e-05, "loss": 0.5786, "step": 5743 }, { "epoch": 7.35232, "grad_norm": 0.8424248695373535, "learning_rate": 3.852741096438576e-05, "loss": 0.6275, "step": 5744 }, { "epoch": 7.3536, "grad_norm": 0.8039935827255249, "learning_rate": 3.852541016406563e-05, "loss": 0.595, "step": 5745 }, { "epoch": 7.35488, "grad_norm": 0.8151816129684448, "learning_rate": 3.85234093637455e-05, "loss": 0.6375, "step": 5746 }, { "epoch": 7.35616, "grad_norm": 0.7974145412445068, "learning_rate": 3.852140856342537e-05, "loss": 0.605, "step": 5747 }, { "epoch": 7.35744, "grad_norm": 0.8081220984458923, "learning_rate": 3.8519407763105245e-05, "loss": 0.6031, "step": 5748 }, { "epoch": 7.35872, "grad_norm": 0.7766823768615723, "learning_rate": 3.851740696278512e-05, "loss": 0.6233, "step": 5749 }, { "epoch": 7.36, "grad_norm": 0.7977601885795593, "learning_rate": 3.851540616246499e-05, "loss": 0.6105, "step": 5750 }, { "epoch": 7.36128, "grad_norm": 0.7957237958908081, "learning_rate": 3.851340536214486e-05, "loss": 0.6086, "step": 5751 }, { "epoch": 7.36256, "grad_norm": 0.7621792554855347, "learning_rate": 3.851140456182473e-05, "loss": 0.6113, "step": 5752 }, { "epoch": 7.36384, "grad_norm": 0.7617015242576599, "learning_rate": 3.8509403761504604e-05, "loss": 0.5529, "step": 5753 }, { "epoch": 7.36512, "grad_norm": 0.7769597768783569, "learning_rate": 3.8507402961184476e-05, "loss": 0.6458, "step": 5754 }, { "epoch": 7.3664, "grad_norm": 0.7622930407524109, "learning_rate": 3.850540216086435e-05, "loss": 0.5758, "step": 5755 }, { "epoch": 7.36768, "grad_norm": 0.8526008129119873, "learning_rate": 3.850340136054422e-05, "loss": 0.6412, "step": 5756 }, { "epoch": 7.36896, "grad_norm": 0.8075965046882629, "learning_rate": 3.850140056022409e-05, "loss": 0.598, "step": 5757 }, { "epoch": 7.37024, "grad_norm": 0.859012246131897, "learning_rate": 3.8499399759903964e-05, "loss": 0.6844, "step": 5758 }, { "epoch": 7.37152, "grad_norm": 0.7848984003067017, "learning_rate": 3.8497398959583835e-05, "loss": 0.5879, "step": 5759 }, { "epoch": 7.3728, "grad_norm": 0.7643441557884216, "learning_rate": 3.849539815926371e-05, "loss": 0.5704, "step": 5760 }, { "epoch": 7.37408, "grad_norm": 0.8113600015640259, "learning_rate": 3.849339735894358e-05, "loss": 0.6399, "step": 5761 }, { "epoch": 7.37536, "grad_norm": 0.7590932846069336, "learning_rate": 3.849139655862345e-05, "loss": 0.5771, "step": 5762 }, { "epoch": 7.37664, "grad_norm": 0.7780318260192871, "learning_rate": 3.848939575830332e-05, "loss": 0.6144, "step": 5763 }, { "epoch": 7.37792, "grad_norm": 0.8140581846237183, "learning_rate": 3.8487394957983195e-05, "loss": 0.6571, "step": 5764 }, { "epoch": 7.3792, "grad_norm": 0.7549552321434021, "learning_rate": 3.8485394157663067e-05, "loss": 0.5123, "step": 5765 }, { "epoch": 7.38048, "grad_norm": 0.7963429093360901, "learning_rate": 3.848339335734294e-05, "loss": 0.5821, "step": 5766 }, { "epoch": 7.38176, "grad_norm": 0.8283103704452515, "learning_rate": 3.848139255702281e-05, "loss": 0.627, "step": 5767 }, { "epoch": 7.38304, "grad_norm": 0.7662546038627625, "learning_rate": 3.847939175670268e-05, "loss": 0.5702, "step": 5768 }, { "epoch": 7.38432, "grad_norm": 0.780431866645813, "learning_rate": 3.8477390956382554e-05, "loss": 0.6072, "step": 5769 }, { "epoch": 7.3856, "grad_norm": 0.8025130033493042, "learning_rate": 3.8475390156062426e-05, "loss": 0.5984, "step": 5770 }, { "epoch": 7.38688, "grad_norm": 0.7883024215698242, "learning_rate": 3.84733893557423e-05, "loss": 0.5951, "step": 5771 }, { "epoch": 7.38816, "grad_norm": 0.8061680197715759, "learning_rate": 3.847138855542217e-05, "loss": 0.6058, "step": 5772 }, { "epoch": 7.3894400000000005, "grad_norm": 0.7774277925491333, "learning_rate": 3.846938775510204e-05, "loss": 0.5983, "step": 5773 }, { "epoch": 7.39072, "grad_norm": 0.787751317024231, "learning_rate": 3.846738695478191e-05, "loss": 0.5822, "step": 5774 }, { "epoch": 7.392, "grad_norm": 0.830544650554657, "learning_rate": 3.8465386154461785e-05, "loss": 0.6172, "step": 5775 }, { "epoch": 7.39328, "grad_norm": 0.8001280426979065, "learning_rate": 3.846338535414166e-05, "loss": 0.6249, "step": 5776 }, { "epoch": 7.39456, "grad_norm": 0.7862173914909363, "learning_rate": 3.846138455382153e-05, "loss": 0.5628, "step": 5777 }, { "epoch": 7.39584, "grad_norm": 0.8081876039505005, "learning_rate": 3.845938375350141e-05, "loss": 0.6007, "step": 5778 }, { "epoch": 7.39712, "grad_norm": 0.7967897057533264, "learning_rate": 3.845738295318127e-05, "loss": 0.5906, "step": 5779 }, { "epoch": 7.3984, "grad_norm": 0.8136133551597595, "learning_rate": 3.8455382152861144e-05, "loss": 0.5849, "step": 5780 }, { "epoch": 7.39968, "grad_norm": 0.7975330948829651, "learning_rate": 3.8453381352541016e-05, "loss": 0.6159, "step": 5781 }, { "epoch": 7.4009599999999995, "grad_norm": 0.8437215089797974, "learning_rate": 3.845138055222089e-05, "loss": 0.6324, "step": 5782 }, { "epoch": 7.40224, "grad_norm": 0.8381403684616089, "learning_rate": 3.844937975190076e-05, "loss": 0.6193, "step": 5783 }, { "epoch": 7.40352, "grad_norm": 0.7790165543556213, "learning_rate": 3.844737895158063e-05, "loss": 0.6107, "step": 5784 }, { "epoch": 7.4048, "grad_norm": 0.8007838726043701, "learning_rate": 3.844537815126051e-05, "loss": 0.6304, "step": 5785 }, { "epoch": 7.40608, "grad_norm": 0.8209007978439331, "learning_rate": 3.844337735094038e-05, "loss": 0.6089, "step": 5786 }, { "epoch": 7.40736, "grad_norm": 0.7682439684867859, "learning_rate": 3.844137655062025e-05, "loss": 0.5974, "step": 5787 }, { "epoch": 7.40864, "grad_norm": 0.7677670121192932, "learning_rate": 3.843937575030012e-05, "loss": 0.5678, "step": 5788 }, { "epoch": 7.40992, "grad_norm": 0.8111715912818909, "learning_rate": 3.843737494997999e-05, "loss": 0.5916, "step": 5789 }, { "epoch": 7.4112, "grad_norm": 0.7806997895240784, "learning_rate": 3.843537414965986e-05, "loss": 0.6144, "step": 5790 }, { "epoch": 7.41248, "grad_norm": 0.7777438759803772, "learning_rate": 3.8433373349339735e-05, "loss": 0.5683, "step": 5791 }, { "epoch": 7.41376, "grad_norm": 0.751512885093689, "learning_rate": 3.8431372549019614e-05, "loss": 0.5775, "step": 5792 }, { "epoch": 7.41504, "grad_norm": 0.7952166199684143, "learning_rate": 3.8429371748699485e-05, "loss": 0.5878, "step": 5793 }, { "epoch": 7.41632, "grad_norm": 0.7829790711402893, "learning_rate": 3.842737094837936e-05, "loss": 0.6195, "step": 5794 }, { "epoch": 7.4176, "grad_norm": 0.7563794851303101, "learning_rate": 3.842537014805922e-05, "loss": 0.5918, "step": 5795 }, { "epoch": 7.41888, "grad_norm": 0.816536009311676, "learning_rate": 3.8423369347739094e-05, "loss": 0.6219, "step": 5796 }, { "epoch": 7.42016, "grad_norm": 0.7779079079627991, "learning_rate": 3.8421368547418966e-05, "loss": 0.6168, "step": 5797 }, { "epoch": 7.42144, "grad_norm": 0.8121761083602905, "learning_rate": 3.841936774709884e-05, "loss": 0.625, "step": 5798 }, { "epoch": 7.42272, "grad_norm": 0.7565649151802063, "learning_rate": 3.8417366946778717e-05, "loss": 0.5407, "step": 5799 }, { "epoch": 7.424, "grad_norm": 0.7441564798355103, "learning_rate": 3.841536614645859e-05, "loss": 0.5511, "step": 5800 }, { "epoch": 7.42528, "grad_norm": 0.8287448883056641, "learning_rate": 3.841336534613846e-05, "loss": 0.6299, "step": 5801 }, { "epoch": 7.42656, "grad_norm": 0.771754801273346, "learning_rate": 3.841136454581833e-05, "loss": 0.5718, "step": 5802 }, { "epoch": 7.42784, "grad_norm": 0.7989311814308167, "learning_rate": 3.84093637454982e-05, "loss": 0.6068, "step": 5803 }, { "epoch": 7.42912, "grad_norm": 0.7287749648094177, "learning_rate": 3.840736294517807e-05, "loss": 0.5552, "step": 5804 }, { "epoch": 7.4304, "grad_norm": 0.8139730095863342, "learning_rate": 3.840536214485794e-05, "loss": 0.623, "step": 5805 }, { "epoch": 7.43168, "grad_norm": 0.8090022206306458, "learning_rate": 3.840336134453782e-05, "loss": 0.5875, "step": 5806 }, { "epoch": 7.43296, "grad_norm": 0.793850302696228, "learning_rate": 3.840136054421769e-05, "loss": 0.5948, "step": 5807 }, { "epoch": 7.43424, "grad_norm": 0.8325986862182617, "learning_rate": 3.839935974389756e-05, "loss": 0.6497, "step": 5808 }, { "epoch": 7.43552, "grad_norm": 0.7771223783493042, "learning_rate": 3.8397358943577435e-05, "loss": 0.584, "step": 5809 }, { "epoch": 7.4368, "grad_norm": 0.8267020583152771, "learning_rate": 3.839535814325731e-05, "loss": 0.6556, "step": 5810 }, { "epoch": 7.43808, "grad_norm": 0.8165448904037476, "learning_rate": 3.839335734293717e-05, "loss": 0.6132, "step": 5811 }, { "epoch": 7.43936, "grad_norm": 0.8215618133544922, "learning_rate": 3.8391356542617044e-05, "loss": 0.6034, "step": 5812 }, { "epoch": 7.44064, "grad_norm": 0.8251504302024841, "learning_rate": 3.838935574229692e-05, "loss": 0.6194, "step": 5813 }, { "epoch": 7.44192, "grad_norm": 0.803627073764801, "learning_rate": 3.8387354941976794e-05, "loss": 0.5718, "step": 5814 }, { "epoch": 7.4432, "grad_norm": 0.7754432559013367, "learning_rate": 3.8385354141656666e-05, "loss": 0.5652, "step": 5815 }, { "epoch": 7.44448, "grad_norm": 0.7755488753318787, "learning_rate": 3.838335334133654e-05, "loss": 0.6237, "step": 5816 }, { "epoch": 7.44576, "grad_norm": 0.8018819689750671, "learning_rate": 3.838135254101641e-05, "loss": 0.615, "step": 5817 }, { "epoch": 7.44704, "grad_norm": 0.7852720618247986, "learning_rate": 3.837935174069628e-05, "loss": 0.6045, "step": 5818 }, { "epoch": 7.44832, "grad_norm": 0.784041166305542, "learning_rate": 3.837735094037615e-05, "loss": 0.6095, "step": 5819 }, { "epoch": 7.4496, "grad_norm": 0.8439889550209045, "learning_rate": 3.8375350140056026e-05, "loss": 0.7184, "step": 5820 }, { "epoch": 7.45088, "grad_norm": 0.8502903580665588, "learning_rate": 3.83733493397359e-05, "loss": 0.6358, "step": 5821 }, { "epoch": 7.45216, "grad_norm": 0.7948433756828308, "learning_rate": 3.837134853941577e-05, "loss": 0.5715, "step": 5822 }, { "epoch": 7.45344, "grad_norm": 0.7365151047706604, "learning_rate": 3.836934773909564e-05, "loss": 0.567, "step": 5823 }, { "epoch": 7.45472, "grad_norm": 0.8041279315948486, "learning_rate": 3.836734693877551e-05, "loss": 0.6025, "step": 5824 }, { "epoch": 7.456, "grad_norm": 0.7834358215332031, "learning_rate": 3.8365346138455385e-05, "loss": 0.5941, "step": 5825 }, { "epoch": 7.45728, "grad_norm": 0.7606781721115112, "learning_rate": 3.836334533813526e-05, "loss": 0.5979, "step": 5826 }, { "epoch": 7.45856, "grad_norm": 0.7791493535041809, "learning_rate": 3.836134453781513e-05, "loss": 0.5739, "step": 5827 }, { "epoch": 7.45984, "grad_norm": 0.8152048587799072, "learning_rate": 3.8359343737495e-05, "loss": 0.6035, "step": 5828 }, { "epoch": 7.46112, "grad_norm": 0.8163161277770996, "learning_rate": 3.835734293717487e-05, "loss": 0.6061, "step": 5829 }, { "epoch": 7.4624, "grad_norm": 0.7904027104377747, "learning_rate": 3.8355342136854744e-05, "loss": 0.6001, "step": 5830 }, { "epoch": 7.46368, "grad_norm": 0.8345395922660828, "learning_rate": 3.8353341336534616e-05, "loss": 0.6474, "step": 5831 }, { "epoch": 7.46496, "grad_norm": 0.764450192451477, "learning_rate": 3.835134053621449e-05, "loss": 0.5939, "step": 5832 }, { "epoch": 7.46624, "grad_norm": 0.7856200933456421, "learning_rate": 3.834933973589436e-05, "loss": 0.5863, "step": 5833 }, { "epoch": 7.46752, "grad_norm": 0.756513237953186, "learning_rate": 3.834733893557423e-05, "loss": 0.5699, "step": 5834 }, { "epoch": 7.4688, "grad_norm": 0.8095502257347107, "learning_rate": 3.83453381352541e-05, "loss": 0.6145, "step": 5835 }, { "epoch": 7.47008, "grad_norm": 0.7780006527900696, "learning_rate": 3.8343337334933975e-05, "loss": 0.622, "step": 5836 }, { "epoch": 7.47136, "grad_norm": 0.7881510257720947, "learning_rate": 3.834133653461385e-05, "loss": 0.5873, "step": 5837 }, { "epoch": 7.47264, "grad_norm": 0.7660511136054993, "learning_rate": 3.833933573429372e-05, "loss": 0.6096, "step": 5838 }, { "epoch": 7.47392, "grad_norm": 0.7153475284576416, "learning_rate": 3.833733493397359e-05, "loss": 0.5051, "step": 5839 }, { "epoch": 7.4752, "grad_norm": 0.7922013401985168, "learning_rate": 3.833533413365346e-05, "loss": 0.6159, "step": 5840 }, { "epoch": 7.4764800000000005, "grad_norm": 0.7256450057029724, "learning_rate": 3.8333333333333334e-05, "loss": 0.5558, "step": 5841 }, { "epoch": 7.47776, "grad_norm": 0.7810547351837158, "learning_rate": 3.8331332533013206e-05, "loss": 0.6134, "step": 5842 }, { "epoch": 7.47904, "grad_norm": 0.7948379516601562, "learning_rate": 3.832933173269308e-05, "loss": 0.6025, "step": 5843 }, { "epoch": 7.48032, "grad_norm": 0.7824628353118896, "learning_rate": 3.832733093237295e-05, "loss": 0.5658, "step": 5844 }, { "epoch": 7.4816, "grad_norm": 0.8361453413963318, "learning_rate": 3.832533013205282e-05, "loss": 0.6285, "step": 5845 }, { "epoch": 7.48288, "grad_norm": 0.8043081164360046, "learning_rate": 3.8323329331732694e-05, "loss": 0.6019, "step": 5846 }, { "epoch": 7.48416, "grad_norm": 0.7815614342689514, "learning_rate": 3.8321328531412566e-05, "loss": 0.5788, "step": 5847 }, { "epoch": 7.48544, "grad_norm": 0.8125666379928589, "learning_rate": 3.8319327731092444e-05, "loss": 0.5843, "step": 5848 }, { "epoch": 7.48672, "grad_norm": 0.8060123920440674, "learning_rate": 3.831732693077231e-05, "loss": 0.605, "step": 5849 }, { "epoch": 7.4879999999999995, "grad_norm": 0.7951981425285339, "learning_rate": 3.831532613045218e-05, "loss": 0.5823, "step": 5850 }, { "epoch": 7.48928, "grad_norm": 0.767137885093689, "learning_rate": 3.831332533013205e-05, "loss": 0.6463, "step": 5851 }, { "epoch": 7.49056, "grad_norm": 0.7987186312675476, "learning_rate": 3.8311324529811925e-05, "loss": 0.5696, "step": 5852 }, { "epoch": 7.49184, "grad_norm": 0.8147026896476746, "learning_rate": 3.83093237294918e-05, "loss": 0.6369, "step": 5853 }, { "epoch": 7.49312, "grad_norm": 0.7695271968841553, "learning_rate": 3.830732292917167e-05, "loss": 0.6022, "step": 5854 }, { "epoch": 7.4944, "grad_norm": 0.7904877066612244, "learning_rate": 3.830532212885155e-05, "loss": 0.5893, "step": 5855 }, { "epoch": 7.49568, "grad_norm": 0.7861725687980652, "learning_rate": 3.830332132853142e-05, "loss": 0.6368, "step": 5856 }, { "epoch": 7.49696, "grad_norm": 0.7913157343864441, "learning_rate": 3.8301320528211284e-05, "loss": 0.6459, "step": 5857 }, { "epoch": 7.49824, "grad_norm": 0.7918976545333862, "learning_rate": 3.8299319727891156e-05, "loss": 0.5793, "step": 5858 }, { "epoch": 7.49952, "grad_norm": 0.8110979199409485, "learning_rate": 3.829731892757103e-05, "loss": 0.589, "step": 5859 }, { "epoch": 7.5008, "grad_norm": 0.8010942339897156, "learning_rate": 3.82953181272509e-05, "loss": 0.5936, "step": 5860 }, { "epoch": 7.50208, "grad_norm": 0.8619551062583923, "learning_rate": 3.829331732693077e-05, "loss": 0.6526, "step": 5861 }, { "epoch": 7.50336, "grad_norm": 0.7884719371795654, "learning_rate": 3.829131652661065e-05, "loss": 0.5931, "step": 5862 }, { "epoch": 7.50464, "grad_norm": 0.7682931423187256, "learning_rate": 3.828931572629052e-05, "loss": 0.5891, "step": 5863 }, { "epoch": 7.50592, "grad_norm": 0.7763561606407166, "learning_rate": 3.8287314925970394e-05, "loss": 0.5551, "step": 5864 }, { "epoch": 7.5072, "grad_norm": 0.7990860342979431, "learning_rate": 3.828531412565026e-05, "loss": 0.586, "step": 5865 }, { "epoch": 7.5084800000000005, "grad_norm": 0.7618961334228516, "learning_rate": 3.828331332533013e-05, "loss": 0.5892, "step": 5866 }, { "epoch": 7.50976, "grad_norm": 0.8227397203445435, "learning_rate": 3.828131252501e-05, "loss": 0.5808, "step": 5867 }, { "epoch": 7.51104, "grad_norm": 0.7972336411476135, "learning_rate": 3.8279311724689875e-05, "loss": 0.5937, "step": 5868 }, { "epoch": 7.51232, "grad_norm": 0.7725977301597595, "learning_rate": 3.8277310924369746e-05, "loss": 0.5742, "step": 5869 }, { "epoch": 7.5136, "grad_norm": 0.7537288665771484, "learning_rate": 3.8275310124049625e-05, "loss": 0.5535, "step": 5870 }, { "epoch": 7.51488, "grad_norm": 0.8171656131744385, "learning_rate": 3.82733093237295e-05, "loss": 0.5987, "step": 5871 }, { "epoch": 7.51616, "grad_norm": 0.8476085662841797, "learning_rate": 3.827130852340937e-05, "loss": 0.6341, "step": 5872 }, { "epoch": 7.51744, "grad_norm": 0.7884910106658936, "learning_rate": 3.8269307723089234e-05, "loss": 0.5655, "step": 5873 }, { "epoch": 7.51872, "grad_norm": 0.7917491793632507, "learning_rate": 3.8267306922769106e-05, "loss": 0.5747, "step": 5874 }, { "epoch": 7.52, "grad_norm": 0.7862417697906494, "learning_rate": 3.826530612244898e-05, "loss": 0.5739, "step": 5875 }, { "epoch": 7.52128, "grad_norm": 0.7860782146453857, "learning_rate": 3.826330532212885e-05, "loss": 0.5958, "step": 5876 }, { "epoch": 7.52256, "grad_norm": 0.7811517715454102, "learning_rate": 3.826130452180873e-05, "loss": 0.5845, "step": 5877 }, { "epoch": 7.52384, "grad_norm": 0.7601850628852844, "learning_rate": 3.82593037214886e-05, "loss": 0.5989, "step": 5878 }, { "epoch": 7.52512, "grad_norm": 0.727415919303894, "learning_rate": 3.825730292116847e-05, "loss": 0.5449, "step": 5879 }, { "epoch": 7.5264, "grad_norm": 0.8012895584106445, "learning_rate": 3.8255302120848344e-05, "loss": 0.5999, "step": 5880 }, { "epoch": 7.52768, "grad_norm": 0.7879264950752258, "learning_rate": 3.825330132052821e-05, "loss": 0.5992, "step": 5881 }, { "epoch": 7.52896, "grad_norm": 0.7692047953605652, "learning_rate": 3.825130052020808e-05, "loss": 0.5393, "step": 5882 }, { "epoch": 7.53024, "grad_norm": 0.8411558270454407, "learning_rate": 3.824929971988795e-05, "loss": 0.593, "step": 5883 }, { "epoch": 7.53152, "grad_norm": 0.8363029956817627, "learning_rate": 3.824729891956783e-05, "loss": 0.6034, "step": 5884 }, { "epoch": 7.5328, "grad_norm": 0.7915286421775818, "learning_rate": 3.82452981192477e-05, "loss": 0.5933, "step": 5885 }, { "epoch": 7.53408, "grad_norm": 0.8054776787757874, "learning_rate": 3.8243297318927575e-05, "loss": 0.6009, "step": 5886 }, { "epoch": 7.53536, "grad_norm": 0.7464303970336914, "learning_rate": 3.824129651860745e-05, "loss": 0.5387, "step": 5887 }, { "epoch": 7.53664, "grad_norm": 0.7987693548202515, "learning_rate": 3.823929571828732e-05, "loss": 0.5608, "step": 5888 }, { "epoch": 7.53792, "grad_norm": 0.7968281507492065, "learning_rate": 3.8237294917967184e-05, "loss": 0.605, "step": 5889 }, { "epoch": 7.5392, "grad_norm": 0.7655385136604309, "learning_rate": 3.8235294117647055e-05, "loss": 0.5751, "step": 5890 }, { "epoch": 7.54048, "grad_norm": 0.8068759441375732, "learning_rate": 3.8233293317326934e-05, "loss": 0.6387, "step": 5891 }, { "epoch": 7.54176, "grad_norm": 0.7618523240089417, "learning_rate": 3.8231292517006806e-05, "loss": 0.5853, "step": 5892 }, { "epoch": 7.5430399999999995, "grad_norm": 0.7742576599121094, "learning_rate": 3.822929171668668e-05, "loss": 0.5768, "step": 5893 }, { "epoch": 7.54432, "grad_norm": 0.7480475902557373, "learning_rate": 3.822729091636655e-05, "loss": 0.5423, "step": 5894 }, { "epoch": 7.5456, "grad_norm": 0.780965268611908, "learning_rate": 3.822529011604642e-05, "loss": 0.6088, "step": 5895 }, { "epoch": 7.54688, "grad_norm": 0.8063462376594543, "learning_rate": 3.8223289315726293e-05, "loss": 0.5745, "step": 5896 }, { "epoch": 7.54816, "grad_norm": 0.8213686347007751, "learning_rate": 3.822128851540616e-05, "loss": 0.6227, "step": 5897 }, { "epoch": 7.54944, "grad_norm": 0.8014224767684937, "learning_rate": 3.821928771508604e-05, "loss": 0.6174, "step": 5898 }, { "epoch": 7.55072, "grad_norm": 0.8072211146354675, "learning_rate": 3.821728691476591e-05, "loss": 0.6356, "step": 5899 }, { "epoch": 7.552, "grad_norm": 0.8300227522850037, "learning_rate": 3.821528611444578e-05, "loss": 0.62, "step": 5900 }, { "epoch": 7.55328, "grad_norm": 0.7829582095146179, "learning_rate": 3.821328531412565e-05, "loss": 0.5797, "step": 5901 }, { "epoch": 7.55456, "grad_norm": 0.8026710152626038, "learning_rate": 3.8211284513805525e-05, "loss": 0.573, "step": 5902 }, { "epoch": 7.55584, "grad_norm": 0.8413091897964478, "learning_rate": 3.8209283713485396e-05, "loss": 0.6193, "step": 5903 }, { "epoch": 7.55712, "grad_norm": 0.7783622741699219, "learning_rate": 3.820728291316527e-05, "loss": 0.6419, "step": 5904 }, { "epoch": 7.5584, "grad_norm": 0.804766058921814, "learning_rate": 3.820528211284514e-05, "loss": 0.5698, "step": 5905 }, { "epoch": 7.55968, "grad_norm": 0.7991184592247009, "learning_rate": 3.820328131252501e-05, "loss": 0.6626, "step": 5906 }, { "epoch": 7.56096, "grad_norm": 0.8455173969268799, "learning_rate": 3.8201280512204884e-05, "loss": 0.6492, "step": 5907 }, { "epoch": 7.56224, "grad_norm": 0.8010911345481873, "learning_rate": 3.8199279711884756e-05, "loss": 0.625, "step": 5908 }, { "epoch": 7.5635200000000005, "grad_norm": 0.7779704928398132, "learning_rate": 3.819727891156463e-05, "loss": 0.5656, "step": 5909 }, { "epoch": 7.5648, "grad_norm": 0.7678871750831604, "learning_rate": 3.81952781112445e-05, "loss": 0.5712, "step": 5910 }, { "epoch": 7.56608, "grad_norm": 0.7485666871070862, "learning_rate": 3.819327731092437e-05, "loss": 0.5766, "step": 5911 }, { "epoch": 7.56736, "grad_norm": 0.8102645874023438, "learning_rate": 3.819127651060424e-05, "loss": 0.6139, "step": 5912 }, { "epoch": 7.56864, "grad_norm": 0.8365371227264404, "learning_rate": 3.8189275710284115e-05, "loss": 0.6378, "step": 5913 }, { "epoch": 7.56992, "grad_norm": 0.7460724115371704, "learning_rate": 3.818727490996399e-05, "loss": 0.5879, "step": 5914 }, { "epoch": 7.5712, "grad_norm": 0.8165127038955688, "learning_rate": 3.818527410964386e-05, "loss": 0.5859, "step": 5915 }, { "epoch": 7.57248, "grad_norm": 0.8666216135025024, "learning_rate": 3.818327330932373e-05, "loss": 0.6287, "step": 5916 }, { "epoch": 7.57376, "grad_norm": 0.8045700788497925, "learning_rate": 3.81812725090036e-05, "loss": 0.6372, "step": 5917 }, { "epoch": 7.5750399999999996, "grad_norm": 0.7294300198554993, "learning_rate": 3.8179271708683474e-05, "loss": 0.5485, "step": 5918 }, { "epoch": 7.57632, "grad_norm": 0.7684528827667236, "learning_rate": 3.817727090836335e-05, "loss": 0.5719, "step": 5919 }, { "epoch": 7.5776, "grad_norm": 0.8162046670913696, "learning_rate": 3.817527010804322e-05, "loss": 0.6237, "step": 5920 }, { "epoch": 7.57888, "grad_norm": 0.7951498031616211, "learning_rate": 3.817326930772309e-05, "loss": 0.6499, "step": 5921 }, { "epoch": 7.58016, "grad_norm": 0.7112811803817749, "learning_rate": 3.817126850740296e-05, "loss": 0.5304, "step": 5922 }, { "epoch": 7.58144, "grad_norm": 0.794484555721283, "learning_rate": 3.8169267707082834e-05, "loss": 0.6157, "step": 5923 }, { "epoch": 7.58272, "grad_norm": 0.769756019115448, "learning_rate": 3.8167266906762705e-05, "loss": 0.5664, "step": 5924 }, { "epoch": 7.584, "grad_norm": 0.8022031784057617, "learning_rate": 3.816526610644258e-05, "loss": 0.6011, "step": 5925 }, { "epoch": 7.58528, "grad_norm": 0.8301845788955688, "learning_rate": 3.8163265306122456e-05, "loss": 0.6288, "step": 5926 }, { "epoch": 7.58656, "grad_norm": 0.7498910427093506, "learning_rate": 3.816126450580233e-05, "loss": 0.5681, "step": 5927 }, { "epoch": 7.58784, "grad_norm": 0.8285461068153381, "learning_rate": 3.815926370548219e-05, "loss": 0.6179, "step": 5928 }, { "epoch": 7.58912, "grad_norm": 0.795591413974762, "learning_rate": 3.8157262905162065e-05, "loss": 0.6189, "step": 5929 }, { "epoch": 7.5904, "grad_norm": 0.7645912170410156, "learning_rate": 3.8155262104841937e-05, "loss": 0.558, "step": 5930 }, { "epoch": 7.59168, "grad_norm": 0.7344127297401428, "learning_rate": 3.815326130452181e-05, "loss": 0.5886, "step": 5931 }, { "epoch": 7.59296, "grad_norm": 0.7792043685913086, "learning_rate": 3.815126050420168e-05, "loss": 0.5778, "step": 5932 }, { "epoch": 7.59424, "grad_norm": 0.7682183980941772, "learning_rate": 3.814925970388156e-05, "loss": 0.6103, "step": 5933 }, { "epoch": 7.5955200000000005, "grad_norm": 0.8293752670288086, "learning_rate": 3.814725890356143e-05, "loss": 0.6615, "step": 5934 }, { "epoch": 7.5968, "grad_norm": 0.8193676471710205, "learning_rate": 3.81452581032413e-05, "loss": 0.6054, "step": 5935 }, { "epoch": 7.5980799999999995, "grad_norm": 0.799968957901001, "learning_rate": 3.814325730292117e-05, "loss": 0.5967, "step": 5936 }, { "epoch": 7.59936, "grad_norm": 0.7881608605384827, "learning_rate": 3.814125650260104e-05, "loss": 0.5734, "step": 5937 }, { "epoch": 7.60064, "grad_norm": 0.7928145527839661, "learning_rate": 3.813925570228091e-05, "loss": 0.62, "step": 5938 }, { "epoch": 7.60192, "grad_norm": 0.8052318096160889, "learning_rate": 3.813725490196078e-05, "loss": 0.6097, "step": 5939 }, { "epoch": 7.6032, "grad_norm": 0.7783374786376953, "learning_rate": 3.813525410164066e-05, "loss": 0.624, "step": 5940 }, { "epoch": 7.60448, "grad_norm": 0.8174735307693481, "learning_rate": 3.8133253301320534e-05, "loss": 0.5862, "step": 5941 }, { "epoch": 7.60576, "grad_norm": 0.8126026391983032, "learning_rate": 3.8131252501000406e-05, "loss": 0.6185, "step": 5942 }, { "epoch": 7.60704, "grad_norm": 0.796207070350647, "learning_rate": 3.812925170068028e-05, "loss": 0.5721, "step": 5943 }, { "epoch": 7.60832, "grad_norm": 0.8204118013381958, "learning_rate": 3.812725090036014e-05, "loss": 0.6101, "step": 5944 }, { "epoch": 7.6096, "grad_norm": 0.8275149464607239, "learning_rate": 3.8125250100040014e-05, "loss": 0.6486, "step": 5945 }, { "epoch": 7.61088, "grad_norm": 0.8166235089302063, "learning_rate": 3.8123249299719886e-05, "loss": 0.5877, "step": 5946 }, { "epoch": 7.61216, "grad_norm": 0.7479138970375061, "learning_rate": 3.8121248499399765e-05, "loss": 0.5492, "step": 5947 }, { "epoch": 7.61344, "grad_norm": 0.8284672498703003, "learning_rate": 3.811924769907964e-05, "loss": 0.6257, "step": 5948 }, { "epoch": 7.61472, "grad_norm": 0.7393089532852173, "learning_rate": 3.811724689875951e-05, "loss": 0.5608, "step": 5949 }, { "epoch": 7.616, "grad_norm": 0.8117731213569641, "learning_rate": 3.811524609843938e-05, "loss": 0.6299, "step": 5950 }, { "epoch": 7.61728, "grad_norm": 0.7665390968322754, "learning_rate": 3.811324529811925e-05, "loss": 0.5933, "step": 5951 }, { "epoch": 7.61856, "grad_norm": 0.7984145879745483, "learning_rate": 3.811124449779912e-05, "loss": 0.6006, "step": 5952 }, { "epoch": 7.61984, "grad_norm": 0.7704722881317139, "learning_rate": 3.810924369747899e-05, "loss": 0.5863, "step": 5953 }, { "epoch": 7.62112, "grad_norm": 0.845035970211029, "learning_rate": 3.810724289715887e-05, "loss": 0.6554, "step": 5954 }, { "epoch": 7.6224, "grad_norm": 0.8365921378135681, "learning_rate": 3.810524209683874e-05, "loss": 0.6329, "step": 5955 }, { "epoch": 7.62368, "grad_norm": 0.8249934911727905, "learning_rate": 3.810324129651861e-05, "loss": 0.5811, "step": 5956 }, { "epoch": 7.62496, "grad_norm": 0.7940043807029724, "learning_rate": 3.8101240496198483e-05, "loss": 0.5887, "step": 5957 }, { "epoch": 7.62624, "grad_norm": 0.8075708746910095, "learning_rate": 3.8099239695878355e-05, "loss": 0.6427, "step": 5958 }, { "epoch": 7.62752, "grad_norm": 0.7509844303131104, "learning_rate": 3.809723889555823e-05, "loss": 0.5248, "step": 5959 }, { "epoch": 7.6288, "grad_norm": 0.8051660060882568, "learning_rate": 3.809523809523809e-05, "loss": 0.591, "step": 5960 }, { "epoch": 7.6300799999999995, "grad_norm": 0.8326472640037537, "learning_rate": 3.809323729491797e-05, "loss": 0.6136, "step": 5961 }, { "epoch": 7.63136, "grad_norm": 0.792871356010437, "learning_rate": 3.809123649459784e-05, "loss": 0.5931, "step": 5962 }, { "epoch": 7.63264, "grad_norm": 0.7740362882614136, "learning_rate": 3.8089235694277715e-05, "loss": 0.5771, "step": 5963 }, { "epoch": 7.63392, "grad_norm": 0.8323248624801636, "learning_rate": 3.8087234893957586e-05, "loss": 0.6383, "step": 5964 }, { "epoch": 7.6352, "grad_norm": 0.8095336556434631, "learning_rate": 3.808523409363746e-05, "loss": 0.5937, "step": 5965 }, { "epoch": 7.63648, "grad_norm": 0.8045695424079895, "learning_rate": 3.808323329331733e-05, "loss": 0.5883, "step": 5966 }, { "epoch": 7.63776, "grad_norm": 0.825339138507843, "learning_rate": 3.80812324929972e-05, "loss": 0.5761, "step": 5967 }, { "epoch": 7.63904, "grad_norm": 0.8067010641098022, "learning_rate": 3.8079231692677074e-05, "loss": 0.6219, "step": 5968 }, { "epoch": 7.64032, "grad_norm": 0.7915658950805664, "learning_rate": 3.8077230892356946e-05, "loss": 0.5803, "step": 5969 }, { "epoch": 7.6416, "grad_norm": 0.7723389267921448, "learning_rate": 3.807523009203682e-05, "loss": 0.5954, "step": 5970 }, { "epoch": 7.64288, "grad_norm": 0.8201357126235962, "learning_rate": 3.807322929171669e-05, "loss": 0.627, "step": 5971 }, { "epoch": 7.64416, "grad_norm": 0.8042327761650085, "learning_rate": 3.807122849139656e-05, "loss": 0.6212, "step": 5972 }, { "epoch": 7.64544, "grad_norm": 0.8056513667106628, "learning_rate": 3.806922769107643e-05, "loss": 0.5762, "step": 5973 }, { "epoch": 7.64672, "grad_norm": 0.7721224427223206, "learning_rate": 3.8067226890756305e-05, "loss": 0.6134, "step": 5974 }, { "epoch": 7.648, "grad_norm": 0.8109544515609741, "learning_rate": 3.806522609043618e-05, "loss": 0.6003, "step": 5975 }, { "epoch": 7.64928, "grad_norm": 0.7777767181396484, "learning_rate": 3.806322529011605e-05, "loss": 0.6004, "step": 5976 }, { "epoch": 7.6505600000000005, "grad_norm": 0.8008784651756287, "learning_rate": 3.806122448979592e-05, "loss": 0.6076, "step": 5977 }, { "epoch": 7.65184, "grad_norm": 0.8032917976379395, "learning_rate": 3.805922368947579e-05, "loss": 0.595, "step": 5978 }, { "epoch": 7.65312, "grad_norm": 0.7758605480194092, "learning_rate": 3.8057222889155664e-05, "loss": 0.5175, "step": 5979 }, { "epoch": 7.6544, "grad_norm": 0.7716826796531677, "learning_rate": 3.8055222088835536e-05, "loss": 0.603, "step": 5980 }, { "epoch": 7.65568, "grad_norm": 0.7985824346542358, "learning_rate": 3.805322128851541e-05, "loss": 0.602, "step": 5981 }, { "epoch": 7.65696, "grad_norm": 0.8194901347160339, "learning_rate": 3.805122048819528e-05, "loss": 0.6157, "step": 5982 }, { "epoch": 7.65824, "grad_norm": 0.8596216440200806, "learning_rate": 3.804921968787515e-05, "loss": 0.6033, "step": 5983 }, { "epoch": 7.65952, "grad_norm": 0.8513696193695068, "learning_rate": 3.8047218887555024e-05, "loss": 0.6309, "step": 5984 }, { "epoch": 7.6608, "grad_norm": 0.8196579813957214, "learning_rate": 3.8045218087234895e-05, "loss": 0.6085, "step": 5985 }, { "epoch": 7.66208, "grad_norm": 0.7808190584182739, "learning_rate": 3.804321728691477e-05, "loss": 0.6266, "step": 5986 }, { "epoch": 7.66336, "grad_norm": 0.792535662651062, "learning_rate": 3.804121648659464e-05, "loss": 0.5974, "step": 5987 }, { "epoch": 7.66464, "grad_norm": 0.7937912940979004, "learning_rate": 3.803921568627451e-05, "loss": 0.6269, "step": 5988 }, { "epoch": 7.66592, "grad_norm": 0.7414507269859314, "learning_rate": 3.803721488595438e-05, "loss": 0.575, "step": 5989 }, { "epoch": 7.6672, "grad_norm": 0.8128142952919006, "learning_rate": 3.8035214085634255e-05, "loss": 0.5785, "step": 5990 }, { "epoch": 7.66848, "grad_norm": 0.8728705644607544, "learning_rate": 3.8033213285314127e-05, "loss": 0.6354, "step": 5991 }, { "epoch": 7.66976, "grad_norm": 0.7969766855239868, "learning_rate": 3.8031212484994e-05, "loss": 0.6223, "step": 5992 }, { "epoch": 7.67104, "grad_norm": 0.8199662566184998, "learning_rate": 3.802921168467387e-05, "loss": 0.6278, "step": 5993 }, { "epoch": 7.67232, "grad_norm": 0.7900199890136719, "learning_rate": 3.802721088435374e-05, "loss": 0.5949, "step": 5994 }, { "epoch": 7.6736, "grad_norm": 0.7966949939727783, "learning_rate": 3.8025210084033614e-05, "loss": 0.6385, "step": 5995 }, { "epoch": 7.67488, "grad_norm": 0.7564438581466675, "learning_rate": 3.8023209283713486e-05, "loss": 0.5726, "step": 5996 }, { "epoch": 7.67616, "grad_norm": 0.8227134943008423, "learning_rate": 3.8021208483393364e-05, "loss": 0.6368, "step": 5997 }, { "epoch": 7.67744, "grad_norm": 0.8298145532608032, "learning_rate": 3.801920768307323e-05, "loss": 0.6483, "step": 5998 }, { "epoch": 7.67872, "grad_norm": 0.8414880037307739, "learning_rate": 3.80172068827531e-05, "loss": 0.6564, "step": 5999 }, { "epoch": 7.68, "grad_norm": 0.7608658075332642, "learning_rate": 3.801520608243297e-05, "loss": 0.5326, "step": 6000 }, { "epoch": 7.68128, "grad_norm": 0.816177487373352, "learning_rate": 3.8013205282112845e-05, "loss": 0.5995, "step": 6001 }, { "epoch": 7.68256, "grad_norm": 0.7969012260437012, "learning_rate": 3.801120448179272e-05, "loss": 0.576, "step": 6002 }, { "epoch": 7.68384, "grad_norm": 0.786906361579895, "learning_rate": 3.800920368147259e-05, "loss": 0.6084, "step": 6003 }, { "epoch": 7.6851199999999995, "grad_norm": 0.764916718006134, "learning_rate": 3.800720288115247e-05, "loss": 0.6095, "step": 6004 }, { "epoch": 7.6864, "grad_norm": 0.7754150629043579, "learning_rate": 3.800520208083234e-05, "loss": 0.6024, "step": 6005 }, { "epoch": 7.68768, "grad_norm": 0.7677636742591858, "learning_rate": 3.8003201280512204e-05, "loss": 0.5886, "step": 6006 }, { "epoch": 7.68896, "grad_norm": 0.8274985551834106, "learning_rate": 3.8001200480192076e-05, "loss": 0.6117, "step": 6007 }, { "epoch": 7.69024, "grad_norm": 0.8422303199768066, "learning_rate": 3.799919967987195e-05, "loss": 0.6012, "step": 6008 }, { "epoch": 7.69152, "grad_norm": 0.7773991227149963, "learning_rate": 3.799719887955182e-05, "loss": 0.6188, "step": 6009 }, { "epoch": 7.6928, "grad_norm": 0.8241961002349854, "learning_rate": 3.799519807923169e-05, "loss": 0.6102, "step": 6010 }, { "epoch": 7.69408, "grad_norm": 0.7824380397796631, "learning_rate": 3.799319727891157e-05, "loss": 0.6254, "step": 6011 }, { "epoch": 7.69536, "grad_norm": 0.7920189499855042, "learning_rate": 3.799119647859144e-05, "loss": 0.687, "step": 6012 }, { "epoch": 7.69664, "grad_norm": 0.7902247309684753, "learning_rate": 3.7989195678271314e-05, "loss": 0.5669, "step": 6013 }, { "epoch": 7.69792, "grad_norm": 0.8157680630683899, "learning_rate": 3.798719487795118e-05, "loss": 0.6307, "step": 6014 }, { "epoch": 7.6992, "grad_norm": 0.8133694529533386, "learning_rate": 3.798519407763105e-05, "loss": 0.6101, "step": 6015 }, { "epoch": 7.70048, "grad_norm": 0.8677835464477539, "learning_rate": 3.798319327731092e-05, "loss": 0.6809, "step": 6016 }, { "epoch": 7.70176, "grad_norm": 0.7828230261802673, "learning_rate": 3.7981192476990795e-05, "loss": 0.5994, "step": 6017 }, { "epoch": 7.70304, "grad_norm": 0.8306964039802551, "learning_rate": 3.7979191676670673e-05, "loss": 0.6098, "step": 6018 }, { "epoch": 7.70432, "grad_norm": 0.7977209687232971, "learning_rate": 3.7977190876350545e-05, "loss": 0.6301, "step": 6019 }, { "epoch": 7.7056000000000004, "grad_norm": 0.7888930439949036, "learning_rate": 3.797519007603042e-05, "loss": 0.6073, "step": 6020 }, { "epoch": 7.70688, "grad_norm": 0.813720166683197, "learning_rate": 3.797318927571029e-05, "loss": 0.6056, "step": 6021 }, { "epoch": 7.70816, "grad_norm": 0.7712581157684326, "learning_rate": 3.7971188475390154e-05, "loss": 0.6198, "step": 6022 }, { "epoch": 7.70944, "grad_norm": 0.8599300384521484, "learning_rate": 3.7969187675070026e-05, "loss": 0.6541, "step": 6023 }, { "epoch": 7.71072, "grad_norm": 0.8106684684753418, "learning_rate": 3.79671868747499e-05, "loss": 0.625, "step": 6024 }, { "epoch": 7.712, "grad_norm": 0.8189576268196106, "learning_rate": 3.7965186074429776e-05, "loss": 0.6379, "step": 6025 }, { "epoch": 7.71328, "grad_norm": 0.8133044838905334, "learning_rate": 3.796318527410965e-05, "loss": 0.6289, "step": 6026 }, { "epoch": 7.71456, "grad_norm": 0.7700027227401733, "learning_rate": 3.796118447378952e-05, "loss": 0.5445, "step": 6027 }, { "epoch": 7.71584, "grad_norm": 0.7435338497161865, "learning_rate": 3.795918367346939e-05, "loss": 0.5761, "step": 6028 }, { "epoch": 7.7171199999999995, "grad_norm": 0.7466886639595032, "learning_rate": 3.7957182873149264e-05, "loss": 0.563, "step": 6029 }, { "epoch": 7.7184, "grad_norm": 0.7899182438850403, "learning_rate": 3.795518207282913e-05, "loss": 0.5442, "step": 6030 }, { "epoch": 7.71968, "grad_norm": 0.8155211806297302, "learning_rate": 3.7953181272509e-05, "loss": 0.6445, "step": 6031 }, { "epoch": 7.72096, "grad_norm": 0.8087602257728577, "learning_rate": 3.795118047218888e-05, "loss": 0.6062, "step": 6032 }, { "epoch": 7.72224, "grad_norm": 0.7604132890701294, "learning_rate": 3.794917967186875e-05, "loss": 0.6147, "step": 6033 }, { "epoch": 7.72352, "grad_norm": 0.8204428553581238, "learning_rate": 3.794717887154862e-05, "loss": 0.5717, "step": 6034 }, { "epoch": 7.7248, "grad_norm": 0.7669458985328674, "learning_rate": 3.7945178071228495e-05, "loss": 0.5662, "step": 6035 }, { "epoch": 7.72608, "grad_norm": 0.765567421913147, "learning_rate": 3.794317727090837e-05, "loss": 0.5558, "step": 6036 }, { "epoch": 7.72736, "grad_norm": 0.8454002141952515, "learning_rate": 3.794117647058824e-05, "loss": 0.6204, "step": 6037 }, { "epoch": 7.72864, "grad_norm": 0.7641090750694275, "learning_rate": 3.7939175670268104e-05, "loss": 0.5612, "step": 6038 }, { "epoch": 7.72992, "grad_norm": 0.8416698575019836, "learning_rate": 3.793717486994798e-05, "loss": 0.63, "step": 6039 }, { "epoch": 7.7312, "grad_norm": 0.8227651119232178, "learning_rate": 3.7935174069627854e-05, "loss": 0.6666, "step": 6040 }, { "epoch": 7.73248, "grad_norm": 0.8543378114700317, "learning_rate": 3.7933173269307726e-05, "loss": 0.6023, "step": 6041 }, { "epoch": 7.73376, "grad_norm": 0.761553943157196, "learning_rate": 3.79311724689876e-05, "loss": 0.583, "step": 6042 }, { "epoch": 7.73504, "grad_norm": 0.8009664416313171, "learning_rate": 3.792917166866747e-05, "loss": 0.6192, "step": 6043 }, { "epoch": 7.73632, "grad_norm": 0.7856703400611877, "learning_rate": 3.792717086834734e-05, "loss": 0.5523, "step": 6044 }, { "epoch": 7.7376000000000005, "grad_norm": 0.7818490862846375, "learning_rate": 3.7925170068027214e-05, "loss": 0.6144, "step": 6045 }, { "epoch": 7.73888, "grad_norm": 0.7923960089683533, "learning_rate": 3.7923169267707085e-05, "loss": 0.6022, "step": 6046 }, { "epoch": 7.74016, "grad_norm": 0.769059956073761, "learning_rate": 3.792116846738696e-05, "loss": 0.5452, "step": 6047 }, { "epoch": 7.74144, "grad_norm": 0.7857373952865601, "learning_rate": 3.791916766706683e-05, "loss": 0.6226, "step": 6048 }, { "epoch": 7.74272, "grad_norm": 0.7994586825370789, "learning_rate": 3.79171668667467e-05, "loss": 0.5875, "step": 6049 }, { "epoch": 7.744, "grad_norm": 0.7892863154411316, "learning_rate": 3.791516606642657e-05, "loss": 0.6333, "step": 6050 }, { "epoch": 7.74528, "grad_norm": 0.8037270903587341, "learning_rate": 3.7913165266106445e-05, "loss": 0.5936, "step": 6051 }, { "epoch": 7.74656, "grad_norm": 0.7748731374740601, "learning_rate": 3.7911164465786317e-05, "loss": 0.5943, "step": 6052 }, { "epoch": 7.74784, "grad_norm": 0.7597405910491943, "learning_rate": 3.790916366546619e-05, "loss": 0.5837, "step": 6053 }, { "epoch": 7.74912, "grad_norm": 0.8196899890899658, "learning_rate": 3.790716286514606e-05, "loss": 0.6103, "step": 6054 }, { "epoch": 7.7504, "grad_norm": 0.7854442000389099, "learning_rate": 3.790516206482593e-05, "loss": 0.6141, "step": 6055 }, { "epoch": 7.75168, "grad_norm": 0.7270790934562683, "learning_rate": 3.7903161264505804e-05, "loss": 0.4887, "step": 6056 }, { "epoch": 7.75296, "grad_norm": 0.75218266248703, "learning_rate": 3.7901160464185676e-05, "loss": 0.5902, "step": 6057 }, { "epoch": 7.75424, "grad_norm": 0.8306524753570557, "learning_rate": 3.789915966386555e-05, "loss": 0.6816, "step": 6058 }, { "epoch": 7.75552, "grad_norm": 0.7545936703681946, "learning_rate": 3.789715886354542e-05, "loss": 0.5668, "step": 6059 }, { "epoch": 7.7568, "grad_norm": 0.7960179448127747, "learning_rate": 3.789515806322529e-05, "loss": 0.5391, "step": 6060 }, { "epoch": 7.75808, "grad_norm": 0.755833625793457, "learning_rate": 3.789315726290516e-05, "loss": 0.5984, "step": 6061 }, { "epoch": 7.75936, "grad_norm": 0.8086370229721069, "learning_rate": 3.7891156462585035e-05, "loss": 0.6226, "step": 6062 }, { "epoch": 7.76064, "grad_norm": 0.8106130361557007, "learning_rate": 3.788915566226491e-05, "loss": 0.6215, "step": 6063 }, { "epoch": 7.76192, "grad_norm": 0.7895246744155884, "learning_rate": 3.788715486194478e-05, "loss": 0.6468, "step": 6064 }, { "epoch": 7.7632, "grad_norm": 0.8080568313598633, "learning_rate": 3.788515406162465e-05, "loss": 0.6082, "step": 6065 }, { "epoch": 7.76448, "grad_norm": 0.7731059193611145, "learning_rate": 3.788315326130452e-05, "loss": 0.6086, "step": 6066 }, { "epoch": 7.76576, "grad_norm": 0.8263415694236755, "learning_rate": 3.78811524609844e-05, "loss": 0.6548, "step": 6067 }, { "epoch": 7.76704, "grad_norm": 0.7245672345161438, "learning_rate": 3.7879151660664266e-05, "loss": 0.5606, "step": 6068 }, { "epoch": 7.76832, "grad_norm": 0.764879047870636, "learning_rate": 3.787715086034414e-05, "loss": 0.6432, "step": 6069 }, { "epoch": 7.7696, "grad_norm": 0.7973184585571289, "learning_rate": 3.787515006002401e-05, "loss": 0.6108, "step": 6070 }, { "epoch": 7.77088, "grad_norm": 0.7751659750938416, "learning_rate": 3.787314925970388e-05, "loss": 0.6245, "step": 6071 }, { "epoch": 7.7721599999999995, "grad_norm": 0.76836758852005, "learning_rate": 3.7871148459383754e-05, "loss": 0.5633, "step": 6072 }, { "epoch": 7.77344, "grad_norm": 0.7604205012321472, "learning_rate": 3.7869147659063626e-05, "loss": 0.6017, "step": 6073 }, { "epoch": 7.77472, "grad_norm": 0.8240510821342468, "learning_rate": 3.7867146858743504e-05, "loss": 0.5819, "step": 6074 }, { "epoch": 7.776, "grad_norm": 0.7578047513961792, "learning_rate": 3.7865146058423376e-05, "loss": 0.562, "step": 6075 }, { "epoch": 7.77728, "grad_norm": 0.7977784872055054, "learning_rate": 3.786314525810324e-05, "loss": 0.6071, "step": 6076 }, { "epoch": 7.77856, "grad_norm": 0.7755118608474731, "learning_rate": 3.786114445778311e-05, "loss": 0.5861, "step": 6077 }, { "epoch": 7.77984, "grad_norm": 0.7646934390068054, "learning_rate": 3.7859143657462985e-05, "loss": 0.6142, "step": 6078 }, { "epoch": 7.78112, "grad_norm": 0.8070874214172363, "learning_rate": 3.785714285714286e-05, "loss": 0.6089, "step": 6079 }, { "epoch": 7.7824, "grad_norm": 0.7948157787322998, "learning_rate": 3.785514205682273e-05, "loss": 0.6112, "step": 6080 }, { "epoch": 7.78368, "grad_norm": 0.7674957513809204, "learning_rate": 3.785314125650261e-05, "loss": 0.5461, "step": 6081 }, { "epoch": 7.78496, "grad_norm": 0.7721199989318848, "learning_rate": 3.785114045618248e-05, "loss": 0.5872, "step": 6082 }, { "epoch": 7.78624, "grad_norm": 0.8199625015258789, "learning_rate": 3.784913965586235e-05, "loss": 0.6066, "step": 6083 }, { "epoch": 7.78752, "grad_norm": 0.8024269342422485, "learning_rate": 3.7847138855542216e-05, "loss": 0.5541, "step": 6084 }, { "epoch": 7.7888, "grad_norm": 0.7629589438438416, "learning_rate": 3.784513805522209e-05, "loss": 0.5699, "step": 6085 }, { "epoch": 7.79008, "grad_norm": 0.7849573493003845, "learning_rate": 3.784313725490196e-05, "loss": 0.5958, "step": 6086 }, { "epoch": 7.79136, "grad_norm": 0.8116906881332397, "learning_rate": 3.784113645458183e-05, "loss": 0.6552, "step": 6087 }, { "epoch": 7.7926400000000005, "grad_norm": 0.7979661822319031, "learning_rate": 3.783913565426171e-05, "loss": 0.6307, "step": 6088 }, { "epoch": 7.79392, "grad_norm": 0.8299638628959656, "learning_rate": 3.783713485394158e-05, "loss": 0.6451, "step": 6089 }, { "epoch": 7.7952, "grad_norm": 0.787670910358429, "learning_rate": 3.7835134053621454e-05, "loss": 0.6323, "step": 6090 }, { "epoch": 7.79648, "grad_norm": 0.8025368452072144, "learning_rate": 3.7833133253301326e-05, "loss": 0.6199, "step": 6091 }, { "epoch": 7.79776, "grad_norm": 0.7752863168716431, "learning_rate": 3.783113245298119e-05, "loss": 0.5975, "step": 6092 }, { "epoch": 7.79904, "grad_norm": 0.7841124534606934, "learning_rate": 3.782913165266106e-05, "loss": 0.5705, "step": 6093 }, { "epoch": 7.80032, "grad_norm": 0.7986131310462952, "learning_rate": 3.7827130852340935e-05, "loss": 0.5626, "step": 6094 }, { "epoch": 7.8016, "grad_norm": 0.7659592032432556, "learning_rate": 3.7825130052020806e-05, "loss": 0.582, "step": 6095 }, { "epoch": 7.80288, "grad_norm": 0.7751227617263794, "learning_rate": 3.7823129251700685e-05, "loss": 0.589, "step": 6096 }, { "epoch": 7.8041599999999995, "grad_norm": 0.810896635055542, "learning_rate": 3.782112845138056e-05, "loss": 0.5888, "step": 6097 }, { "epoch": 7.80544, "grad_norm": 0.8425770998001099, "learning_rate": 3.781912765106043e-05, "loss": 0.6118, "step": 6098 }, { "epoch": 7.80672, "grad_norm": 0.81880784034729, "learning_rate": 3.78171268507403e-05, "loss": 0.6555, "step": 6099 }, { "epoch": 7.808, "grad_norm": 0.8141629099845886, "learning_rate": 3.7815126050420166e-05, "loss": 0.6628, "step": 6100 }, { "epoch": 7.80928, "grad_norm": 0.8063963651657104, "learning_rate": 3.781312525010004e-05, "loss": 0.5981, "step": 6101 }, { "epoch": 7.81056, "grad_norm": 0.8052300214767456, "learning_rate": 3.781112444977991e-05, "loss": 0.5995, "step": 6102 }, { "epoch": 7.81184, "grad_norm": 0.804269552230835, "learning_rate": 3.780912364945979e-05, "loss": 0.6108, "step": 6103 }, { "epoch": 7.81312, "grad_norm": 0.7562627196311951, "learning_rate": 3.780712284913966e-05, "loss": 0.5164, "step": 6104 }, { "epoch": 7.8144, "grad_norm": 0.800515353679657, "learning_rate": 3.780512204881953e-05, "loss": 0.5988, "step": 6105 }, { "epoch": 7.81568, "grad_norm": 0.8121452331542969, "learning_rate": 3.7803121248499404e-05, "loss": 0.632, "step": 6106 }, { "epoch": 7.81696, "grad_norm": 0.8428505063056946, "learning_rate": 3.7801120448179275e-05, "loss": 0.6044, "step": 6107 }, { "epoch": 7.81824, "grad_norm": 0.8227953314781189, "learning_rate": 3.779911964785914e-05, "loss": 0.6519, "step": 6108 }, { "epoch": 7.81952, "grad_norm": 0.7917014360427856, "learning_rate": 3.779711884753901e-05, "loss": 0.5866, "step": 6109 }, { "epoch": 7.8208, "grad_norm": 0.8045591115951538, "learning_rate": 3.779511804721889e-05, "loss": 0.6472, "step": 6110 }, { "epoch": 7.82208, "grad_norm": 0.772802472114563, "learning_rate": 3.779311724689876e-05, "loss": 0.5495, "step": 6111 }, { "epoch": 7.82336, "grad_norm": 0.8186236023902893, "learning_rate": 3.7791116446578635e-05, "loss": 0.664, "step": 6112 }, { "epoch": 7.8246400000000005, "grad_norm": 0.806842029094696, "learning_rate": 3.778911564625851e-05, "loss": 0.6216, "step": 6113 }, { "epoch": 7.82592, "grad_norm": 0.7762079238891602, "learning_rate": 3.778711484593838e-05, "loss": 0.5853, "step": 6114 }, { "epoch": 7.8272, "grad_norm": 0.8283485770225525, "learning_rate": 3.778511404561825e-05, "loss": 0.5947, "step": 6115 }, { "epoch": 7.82848, "grad_norm": 0.8360929489135742, "learning_rate": 3.7783113245298115e-05, "loss": 0.6088, "step": 6116 }, { "epoch": 7.82976, "grad_norm": 0.7732667922973633, "learning_rate": 3.7781112444977994e-05, "loss": 0.6161, "step": 6117 }, { "epoch": 7.83104, "grad_norm": 0.8080599308013916, "learning_rate": 3.7779111644657866e-05, "loss": 0.6314, "step": 6118 }, { "epoch": 7.83232, "grad_norm": 0.8000563979148865, "learning_rate": 3.777711084433774e-05, "loss": 0.6548, "step": 6119 }, { "epoch": 7.8336, "grad_norm": 0.8152042031288147, "learning_rate": 3.777511004401761e-05, "loss": 0.6637, "step": 6120 }, { "epoch": 7.83488, "grad_norm": 0.8046169877052307, "learning_rate": 3.777310924369748e-05, "loss": 0.6306, "step": 6121 }, { "epoch": 7.83616, "grad_norm": 0.7880290746688843, "learning_rate": 3.777110844337735e-05, "loss": 0.6418, "step": 6122 }, { "epoch": 7.83744, "grad_norm": 0.8029178380966187, "learning_rate": 3.7769107643057225e-05, "loss": 0.6183, "step": 6123 }, { "epoch": 7.83872, "grad_norm": 0.7654139995574951, "learning_rate": 3.77671068427371e-05, "loss": 0.5793, "step": 6124 }, { "epoch": 7.84, "grad_norm": 0.8131774067878723, "learning_rate": 3.776510604241697e-05, "loss": 0.6485, "step": 6125 }, { "epoch": 7.84128, "grad_norm": 0.7340885996818542, "learning_rate": 3.776310524209684e-05, "loss": 0.5605, "step": 6126 }, { "epoch": 7.84256, "grad_norm": 0.7948605418205261, "learning_rate": 3.776110444177671e-05, "loss": 0.588, "step": 6127 }, { "epoch": 7.84384, "grad_norm": 0.823373019695282, "learning_rate": 3.7759103641456584e-05, "loss": 0.5925, "step": 6128 }, { "epoch": 7.84512, "grad_norm": 0.8136553764343262, "learning_rate": 3.7757102841136456e-05, "loss": 0.6336, "step": 6129 }, { "epoch": 7.8464, "grad_norm": 0.7955484390258789, "learning_rate": 3.775510204081633e-05, "loss": 0.5678, "step": 6130 }, { "epoch": 7.84768, "grad_norm": 0.7093358039855957, "learning_rate": 3.77531012404962e-05, "loss": 0.5829, "step": 6131 }, { "epoch": 7.84896, "grad_norm": 0.7424338459968567, "learning_rate": 3.775110044017607e-05, "loss": 0.5677, "step": 6132 }, { "epoch": 7.85024, "grad_norm": 0.7559502720832825, "learning_rate": 3.7749099639855944e-05, "loss": 0.5777, "step": 6133 }, { "epoch": 7.85152, "grad_norm": 0.8359054923057556, "learning_rate": 3.7747098839535816e-05, "loss": 0.5884, "step": 6134 }, { "epoch": 7.8528, "grad_norm": 0.8114975094795227, "learning_rate": 3.774509803921569e-05, "loss": 0.5916, "step": 6135 }, { "epoch": 7.85408, "grad_norm": 0.8038936853408813, "learning_rate": 3.774309723889556e-05, "loss": 0.6401, "step": 6136 }, { "epoch": 7.85536, "grad_norm": 0.7915340065956116, "learning_rate": 3.774109643857543e-05, "loss": 0.574, "step": 6137 }, { "epoch": 7.85664, "grad_norm": 0.8028757572174072, "learning_rate": 3.77390956382553e-05, "loss": 0.5954, "step": 6138 }, { "epoch": 7.85792, "grad_norm": 0.7789997458457947, "learning_rate": 3.7737094837935175e-05, "loss": 0.5468, "step": 6139 }, { "epoch": 7.8591999999999995, "grad_norm": 0.8444523215293884, "learning_rate": 3.773509403761505e-05, "loss": 0.6584, "step": 6140 }, { "epoch": 7.86048, "grad_norm": 0.7982633709907532, "learning_rate": 3.773309323729492e-05, "loss": 0.6439, "step": 6141 }, { "epoch": 7.86176, "grad_norm": 0.8487026691436768, "learning_rate": 3.773109243697479e-05, "loss": 0.6307, "step": 6142 }, { "epoch": 7.86304, "grad_norm": 0.8426850438117981, "learning_rate": 3.772909163665466e-05, "loss": 0.5936, "step": 6143 }, { "epoch": 7.86432, "grad_norm": 0.82231205701828, "learning_rate": 3.7727090836334534e-05, "loss": 0.5931, "step": 6144 }, { "epoch": 7.8656, "grad_norm": 0.8080248832702637, "learning_rate": 3.772509003601441e-05, "loss": 0.5993, "step": 6145 }, { "epoch": 7.86688, "grad_norm": 0.7820031046867371, "learning_rate": 3.772308923569428e-05, "loss": 0.5974, "step": 6146 }, { "epoch": 7.86816, "grad_norm": 0.7480020523071289, "learning_rate": 3.772108843537415e-05, "loss": 0.5778, "step": 6147 }, { "epoch": 7.86944, "grad_norm": 0.7739498019218445, "learning_rate": 3.771908763505402e-05, "loss": 0.6069, "step": 6148 }, { "epoch": 7.87072, "grad_norm": 0.79982590675354, "learning_rate": 3.7717086834733893e-05, "loss": 0.6673, "step": 6149 }, { "epoch": 7.872, "grad_norm": 0.7504255175590515, "learning_rate": 3.7715086034413765e-05, "loss": 0.5946, "step": 6150 }, { "epoch": 7.87328, "grad_norm": 0.7930676341056824, "learning_rate": 3.771308523409364e-05, "loss": 0.6112, "step": 6151 }, { "epoch": 7.87456, "grad_norm": 0.7735934853553772, "learning_rate": 3.7711084433773516e-05, "loss": 0.5914, "step": 6152 }, { "epoch": 7.87584, "grad_norm": 0.8146105408668518, "learning_rate": 3.770908363345339e-05, "loss": 0.6164, "step": 6153 }, { "epoch": 7.87712, "grad_norm": 0.7409805655479431, "learning_rate": 3.770708283313325e-05, "loss": 0.4994, "step": 6154 }, { "epoch": 7.8784, "grad_norm": 0.8767794370651245, "learning_rate": 3.7705082032813125e-05, "loss": 0.6723, "step": 6155 }, { "epoch": 7.8796800000000005, "grad_norm": 0.7842835187911987, "learning_rate": 3.7703081232492996e-05, "loss": 0.5496, "step": 6156 }, { "epoch": 7.88096, "grad_norm": 0.8183816075325012, "learning_rate": 3.770108043217287e-05, "loss": 0.5986, "step": 6157 }, { "epoch": 7.88224, "grad_norm": 0.7943567037582397, "learning_rate": 3.769907963185274e-05, "loss": 0.6196, "step": 6158 }, { "epoch": 7.88352, "grad_norm": 0.7995247840881348, "learning_rate": 3.769707883153262e-05, "loss": 0.6269, "step": 6159 }, { "epoch": 7.8848, "grad_norm": 0.798160970211029, "learning_rate": 3.769507803121249e-05, "loss": 0.5612, "step": 6160 }, { "epoch": 7.88608, "grad_norm": 0.7991229295730591, "learning_rate": 3.769307723089236e-05, "loss": 0.5785, "step": 6161 }, { "epoch": 7.88736, "grad_norm": 0.7615501880645752, "learning_rate": 3.769107643057223e-05, "loss": 0.5869, "step": 6162 }, { "epoch": 7.88864, "grad_norm": 0.7592645287513733, "learning_rate": 3.76890756302521e-05, "loss": 0.5602, "step": 6163 }, { "epoch": 7.88992, "grad_norm": 0.8296216726303101, "learning_rate": 3.768707482993197e-05, "loss": 0.6587, "step": 6164 }, { "epoch": 7.8911999999999995, "grad_norm": 0.7715473175048828, "learning_rate": 3.768507402961184e-05, "loss": 0.6282, "step": 6165 }, { "epoch": 7.89248, "grad_norm": 0.786847710609436, "learning_rate": 3.768307322929172e-05, "loss": 0.6401, "step": 6166 }, { "epoch": 7.89376, "grad_norm": 0.7830649018287659, "learning_rate": 3.7681072428971594e-05, "loss": 0.6306, "step": 6167 }, { "epoch": 7.89504, "grad_norm": 0.8743398189544678, "learning_rate": 3.7679071628651466e-05, "loss": 0.6588, "step": 6168 }, { "epoch": 7.89632, "grad_norm": 0.8453099131584167, "learning_rate": 3.767707082833134e-05, "loss": 0.6599, "step": 6169 }, { "epoch": 7.8976, "grad_norm": 0.8115950226783752, "learning_rate": 3.76750700280112e-05, "loss": 0.6126, "step": 6170 }, { "epoch": 7.89888, "grad_norm": 0.8021664619445801, "learning_rate": 3.7673069227691074e-05, "loss": 0.5736, "step": 6171 }, { "epoch": 7.90016, "grad_norm": 0.7566850781440735, "learning_rate": 3.7671068427370946e-05, "loss": 0.5289, "step": 6172 }, { "epoch": 7.90144, "grad_norm": 0.8024584054946899, "learning_rate": 3.7669067627050825e-05, "loss": 0.6521, "step": 6173 }, { "epoch": 7.90272, "grad_norm": 0.7810555100440979, "learning_rate": 3.76670668267307e-05, "loss": 0.6171, "step": 6174 }, { "epoch": 7.904, "grad_norm": 0.839141309261322, "learning_rate": 3.766506602641057e-05, "loss": 0.6384, "step": 6175 }, { "epoch": 7.90528, "grad_norm": 0.8140023350715637, "learning_rate": 3.766306522609044e-05, "loss": 0.6113, "step": 6176 }, { "epoch": 7.90656, "grad_norm": 0.7578498721122742, "learning_rate": 3.766106442577031e-05, "loss": 0.5981, "step": 6177 }, { "epoch": 7.90784, "grad_norm": 0.850180447101593, "learning_rate": 3.765906362545018e-05, "loss": 0.6304, "step": 6178 }, { "epoch": 7.90912, "grad_norm": 0.7836924195289612, "learning_rate": 3.765706282513005e-05, "loss": 0.6016, "step": 6179 }, { "epoch": 7.9104, "grad_norm": 0.7858523726463318, "learning_rate": 3.765506202480993e-05, "loss": 0.5774, "step": 6180 }, { "epoch": 7.9116800000000005, "grad_norm": 0.8169428706169128, "learning_rate": 3.76530612244898e-05, "loss": 0.6141, "step": 6181 }, { "epoch": 7.91296, "grad_norm": 0.779033362865448, "learning_rate": 3.765106042416967e-05, "loss": 0.5847, "step": 6182 }, { "epoch": 7.91424, "grad_norm": 0.8012449741363525, "learning_rate": 3.764905962384954e-05, "loss": 0.6393, "step": 6183 }, { "epoch": 7.91552, "grad_norm": 0.7956286668777466, "learning_rate": 3.7647058823529415e-05, "loss": 0.5944, "step": 6184 }, { "epoch": 7.9168, "grad_norm": 0.79158616065979, "learning_rate": 3.764505802320929e-05, "loss": 0.5891, "step": 6185 }, { "epoch": 7.91808, "grad_norm": 0.7679545879364014, "learning_rate": 3.764305722288915e-05, "loss": 0.5964, "step": 6186 }, { "epoch": 7.91936, "grad_norm": 0.7634429931640625, "learning_rate": 3.764105642256903e-05, "loss": 0.6025, "step": 6187 }, { "epoch": 7.92064, "grad_norm": 0.840979278087616, "learning_rate": 3.76390556222489e-05, "loss": 0.6005, "step": 6188 }, { "epoch": 7.92192, "grad_norm": 0.8021059632301331, "learning_rate": 3.7637054821928775e-05, "loss": 0.6507, "step": 6189 }, { "epoch": 7.9232, "grad_norm": 0.7973845601081848, "learning_rate": 3.7635054021608646e-05, "loss": 0.6245, "step": 6190 }, { "epoch": 7.92448, "grad_norm": 0.8019742369651794, "learning_rate": 3.763305322128852e-05, "loss": 0.6052, "step": 6191 }, { "epoch": 7.92576, "grad_norm": 0.8176249265670776, "learning_rate": 3.763105242096839e-05, "loss": 0.6816, "step": 6192 }, { "epoch": 7.92704, "grad_norm": 0.799094557762146, "learning_rate": 3.762905162064826e-05, "loss": 0.5972, "step": 6193 }, { "epoch": 7.92832, "grad_norm": 0.7592592835426331, "learning_rate": 3.7627050820328134e-05, "loss": 0.5614, "step": 6194 }, { "epoch": 7.9296, "grad_norm": 0.7643021941184998, "learning_rate": 3.7625050020008006e-05, "loss": 0.618, "step": 6195 }, { "epoch": 7.93088, "grad_norm": 0.8433973789215088, "learning_rate": 3.762304921968788e-05, "loss": 0.6484, "step": 6196 }, { "epoch": 7.93216, "grad_norm": 0.8006365299224854, "learning_rate": 3.762104841936775e-05, "loss": 0.5503, "step": 6197 }, { "epoch": 7.93344, "grad_norm": 0.8090243935585022, "learning_rate": 3.761904761904762e-05, "loss": 0.6048, "step": 6198 }, { "epoch": 7.93472, "grad_norm": 0.8174999356269836, "learning_rate": 3.761704681872749e-05, "loss": 0.6092, "step": 6199 }, { "epoch": 7.936, "grad_norm": 0.7761614322662354, "learning_rate": 3.7615046018407365e-05, "loss": 0.5421, "step": 6200 }, { "epoch": 7.93728, "grad_norm": 0.7439830303192139, "learning_rate": 3.761304521808724e-05, "loss": 0.6109, "step": 6201 }, { "epoch": 7.93856, "grad_norm": 0.8538080453872681, "learning_rate": 3.761104441776711e-05, "loss": 0.6092, "step": 6202 }, { "epoch": 7.93984, "grad_norm": 0.7983587980270386, "learning_rate": 3.760904361744698e-05, "loss": 0.6157, "step": 6203 }, { "epoch": 7.94112, "grad_norm": 0.8033997416496277, "learning_rate": 3.760704281712685e-05, "loss": 0.6363, "step": 6204 }, { "epoch": 7.9424, "grad_norm": 0.8110709190368652, "learning_rate": 3.7605042016806724e-05, "loss": 0.5945, "step": 6205 }, { "epoch": 7.94368, "grad_norm": 0.8630302548408508, "learning_rate": 3.7603041216486596e-05, "loss": 0.5869, "step": 6206 }, { "epoch": 7.94496, "grad_norm": 0.865012526512146, "learning_rate": 3.760104041616647e-05, "loss": 0.6861, "step": 6207 }, { "epoch": 7.9462399999999995, "grad_norm": 0.8047817945480347, "learning_rate": 3.759903961584634e-05, "loss": 0.6484, "step": 6208 }, { "epoch": 7.94752, "grad_norm": 0.7712967395782471, "learning_rate": 3.759703881552621e-05, "loss": 0.6387, "step": 6209 }, { "epoch": 7.9488, "grad_norm": 0.7860550284385681, "learning_rate": 3.7595038015206084e-05, "loss": 0.6384, "step": 6210 }, { "epoch": 7.95008, "grad_norm": 0.7760273218154907, "learning_rate": 3.7593037214885955e-05, "loss": 0.602, "step": 6211 }, { "epoch": 7.95136, "grad_norm": 0.7923306822776794, "learning_rate": 3.759103641456583e-05, "loss": 0.6, "step": 6212 }, { "epoch": 7.95264, "grad_norm": 0.7732347846031189, "learning_rate": 3.75890356142457e-05, "loss": 0.5843, "step": 6213 }, { "epoch": 7.95392, "grad_norm": 0.7786732912063599, "learning_rate": 3.758703481392557e-05, "loss": 0.6319, "step": 6214 }, { "epoch": 7.9552, "grad_norm": 0.822283148765564, "learning_rate": 3.758503401360544e-05, "loss": 0.638, "step": 6215 }, { "epoch": 7.95648, "grad_norm": 0.8353981375694275, "learning_rate": 3.7583033213285315e-05, "loss": 0.6462, "step": 6216 }, { "epoch": 7.95776, "grad_norm": 0.7998156547546387, "learning_rate": 3.7581032412965187e-05, "loss": 0.6271, "step": 6217 }, { "epoch": 7.95904, "grad_norm": 0.8036489486694336, "learning_rate": 3.757903161264506e-05, "loss": 0.6486, "step": 6218 }, { "epoch": 7.96032, "grad_norm": 0.8148389458656311, "learning_rate": 3.757703081232493e-05, "loss": 0.5962, "step": 6219 }, { "epoch": 7.9616, "grad_norm": 0.7913573980331421, "learning_rate": 3.75750300120048e-05, "loss": 0.5983, "step": 6220 }, { "epoch": 7.96288, "grad_norm": 0.7523571848869324, "learning_rate": 3.7573029211684674e-05, "loss": 0.6503, "step": 6221 }, { "epoch": 7.96416, "grad_norm": 0.7958295941352844, "learning_rate": 3.7571028411364546e-05, "loss": 0.5914, "step": 6222 }, { "epoch": 7.96544, "grad_norm": 0.7768007516860962, "learning_rate": 3.7569027611044424e-05, "loss": 0.617, "step": 6223 }, { "epoch": 7.9667200000000005, "grad_norm": 0.7701081037521362, "learning_rate": 3.756702681072429e-05, "loss": 0.5931, "step": 6224 }, { "epoch": 7.968, "grad_norm": 0.7834358811378479, "learning_rate": 3.756502601040416e-05, "loss": 0.5555, "step": 6225 }, { "epoch": 7.96928, "grad_norm": 0.7959411144256592, "learning_rate": 3.756302521008403e-05, "loss": 0.63, "step": 6226 }, { "epoch": 7.97056, "grad_norm": 0.7836350202560425, "learning_rate": 3.7561024409763905e-05, "loss": 0.5953, "step": 6227 }, { "epoch": 7.97184, "grad_norm": 0.7769935131072998, "learning_rate": 3.755902360944378e-05, "loss": 0.6272, "step": 6228 }, { "epoch": 7.97312, "grad_norm": 0.7422361373901367, "learning_rate": 3.755702280912365e-05, "loss": 0.5554, "step": 6229 }, { "epoch": 7.9744, "grad_norm": 0.8061965703964233, "learning_rate": 3.755502200880353e-05, "loss": 0.5511, "step": 6230 }, { "epoch": 7.97568, "grad_norm": 0.7815845608711243, "learning_rate": 3.75530212084834e-05, "loss": 0.5608, "step": 6231 }, { "epoch": 7.97696, "grad_norm": 0.7946850657463074, "learning_rate": 3.7551020408163264e-05, "loss": 0.5856, "step": 6232 }, { "epoch": 7.9782399999999996, "grad_norm": 0.738746702671051, "learning_rate": 3.7549019607843136e-05, "loss": 0.5789, "step": 6233 }, { "epoch": 7.97952, "grad_norm": 0.7725175619125366, "learning_rate": 3.754701880752301e-05, "loss": 0.6236, "step": 6234 }, { "epoch": 7.9808, "grad_norm": 0.8103924989700317, "learning_rate": 3.754501800720288e-05, "loss": 0.6221, "step": 6235 }, { "epoch": 7.98208, "grad_norm": 0.8287813067436218, "learning_rate": 3.754301720688275e-05, "loss": 0.6418, "step": 6236 }, { "epoch": 7.98336, "grad_norm": 0.8044477701187134, "learning_rate": 3.754101640656263e-05, "loss": 0.5968, "step": 6237 }, { "epoch": 7.98464, "grad_norm": 0.7894521951675415, "learning_rate": 3.75390156062425e-05, "loss": 0.5945, "step": 6238 }, { "epoch": 7.98592, "grad_norm": 0.8011317253112793, "learning_rate": 3.7537014805922374e-05, "loss": 0.6376, "step": 6239 }, { "epoch": 7.9872, "grad_norm": 0.7666401863098145, "learning_rate": 3.753501400560224e-05, "loss": 0.5611, "step": 6240 }, { "epoch": 7.98848, "grad_norm": 0.8295350670814514, "learning_rate": 3.753301320528211e-05, "loss": 0.6115, "step": 6241 }, { "epoch": 7.98976, "grad_norm": 0.7832409739494324, "learning_rate": 3.753101240496198e-05, "loss": 0.599, "step": 6242 }, { "epoch": 7.99104, "grad_norm": 0.8104324340820312, "learning_rate": 3.7529011604641855e-05, "loss": 0.6137, "step": 6243 }, { "epoch": 7.99232, "grad_norm": 0.7831165790557861, "learning_rate": 3.7527010804321733e-05, "loss": 0.5828, "step": 6244 }, { "epoch": 7.9936, "grad_norm": 0.8100852370262146, "learning_rate": 3.7525010004001605e-05, "loss": 0.5893, "step": 6245 }, { "epoch": 7.99488, "grad_norm": 0.8356633186340332, "learning_rate": 3.752300920368148e-05, "loss": 0.6308, "step": 6246 }, { "epoch": 7.99616, "grad_norm": 0.85598224401474, "learning_rate": 3.752100840336135e-05, "loss": 0.6509, "step": 6247 }, { "epoch": 7.99744, "grad_norm": 0.8315650224685669, "learning_rate": 3.7519007603041214e-05, "loss": 0.6497, "step": 6248 }, { "epoch": 7.9987200000000005, "grad_norm": 0.7793347835540771, "learning_rate": 3.7517006802721086e-05, "loss": 0.5806, "step": 6249 }, { "epoch": 8.0, "grad_norm": 1.704264521598816, "learning_rate": 3.751500600240096e-05, "loss": 1.0745, "step": 6250 }, { "epoch": 8.00128, "grad_norm": 0.7346014976501465, "learning_rate": 3.7513005202080836e-05, "loss": 0.5634, "step": 6251 }, { "epoch": 8.00256, "grad_norm": 0.7524464130401611, "learning_rate": 3.751100440176071e-05, "loss": 0.5499, "step": 6252 }, { "epoch": 8.00384, "grad_norm": 0.8172950148582458, "learning_rate": 3.750900360144058e-05, "loss": 0.5984, "step": 6253 }, { "epoch": 8.00512, "grad_norm": 0.8188893795013428, "learning_rate": 3.750700280112045e-05, "loss": 0.6099, "step": 6254 }, { "epoch": 8.0064, "grad_norm": 0.825077474117279, "learning_rate": 3.7505002000800324e-05, "loss": 0.6207, "step": 6255 }, { "epoch": 8.00768, "grad_norm": 0.7807846069335938, "learning_rate": 3.750300120048019e-05, "loss": 0.5735, "step": 6256 }, { "epoch": 8.00896, "grad_norm": 0.7946782112121582, "learning_rate": 3.750100040016006e-05, "loss": 0.6036, "step": 6257 }, { "epoch": 8.01024, "grad_norm": 0.8482866883277893, "learning_rate": 3.749899959983994e-05, "loss": 0.5998, "step": 6258 }, { "epoch": 8.01152, "grad_norm": 0.7960243821144104, "learning_rate": 3.749699879951981e-05, "loss": 0.6042, "step": 6259 }, { "epoch": 8.0128, "grad_norm": 0.830877959728241, "learning_rate": 3.749499799919968e-05, "loss": 0.586, "step": 6260 }, { "epoch": 8.01408, "grad_norm": 0.782637357711792, "learning_rate": 3.7492997198879555e-05, "loss": 0.5895, "step": 6261 }, { "epoch": 8.01536, "grad_norm": 0.8307430148124695, "learning_rate": 3.749099639855943e-05, "loss": 0.5734, "step": 6262 }, { "epoch": 8.01664, "grad_norm": 0.832324206829071, "learning_rate": 3.74889955982393e-05, "loss": 0.5546, "step": 6263 }, { "epoch": 8.01792, "grad_norm": 0.8269922733306885, "learning_rate": 3.7486994797919164e-05, "loss": 0.5858, "step": 6264 }, { "epoch": 8.0192, "grad_norm": 0.8416856527328491, "learning_rate": 3.748499399759904e-05, "loss": 0.6297, "step": 6265 }, { "epoch": 8.02048, "grad_norm": 0.806425929069519, "learning_rate": 3.7482993197278914e-05, "loss": 0.5737, "step": 6266 }, { "epoch": 8.02176, "grad_norm": 0.8288019895553589, "learning_rate": 3.7480992396958786e-05, "loss": 0.5856, "step": 6267 }, { "epoch": 8.02304, "grad_norm": 0.8416863679885864, "learning_rate": 3.747899159663866e-05, "loss": 0.589, "step": 6268 }, { "epoch": 8.02432, "grad_norm": 0.8179052472114563, "learning_rate": 3.747699079631853e-05, "loss": 0.6037, "step": 6269 }, { "epoch": 8.0256, "grad_norm": 0.7979174256324768, "learning_rate": 3.74749899959984e-05, "loss": 0.5805, "step": 6270 }, { "epoch": 8.02688, "grad_norm": 0.7354505658149719, "learning_rate": 3.7472989195678274e-05, "loss": 0.5747, "step": 6271 }, { "epoch": 8.02816, "grad_norm": 0.8041462898254395, "learning_rate": 3.7470988395358145e-05, "loss": 0.5907, "step": 6272 }, { "epoch": 8.02944, "grad_norm": 0.7784780263900757, "learning_rate": 3.746898759503802e-05, "loss": 0.5735, "step": 6273 }, { "epoch": 8.03072, "grad_norm": 0.8290993571281433, "learning_rate": 3.746698679471789e-05, "loss": 0.5509, "step": 6274 }, { "epoch": 8.032, "grad_norm": 0.7944415807723999, "learning_rate": 3.746498599439776e-05, "loss": 0.5914, "step": 6275 }, { "epoch": 8.03328, "grad_norm": 0.7763034701347351, "learning_rate": 3.746298519407763e-05, "loss": 0.5719, "step": 6276 }, { "epoch": 8.03456, "grad_norm": 0.8510763645172119, "learning_rate": 3.7460984393757505e-05, "loss": 0.6297, "step": 6277 }, { "epoch": 8.03584, "grad_norm": 0.7973195314407349, "learning_rate": 3.7458983593437377e-05, "loss": 0.5668, "step": 6278 }, { "epoch": 8.03712, "grad_norm": 0.7961564064025879, "learning_rate": 3.745698279311725e-05, "loss": 0.5721, "step": 6279 }, { "epoch": 8.0384, "grad_norm": 0.7879326343536377, "learning_rate": 3.745498199279712e-05, "loss": 0.5706, "step": 6280 }, { "epoch": 8.03968, "grad_norm": 0.8085858225822449, "learning_rate": 3.745298119247699e-05, "loss": 0.5476, "step": 6281 }, { "epoch": 8.04096, "grad_norm": 0.8712663054466248, "learning_rate": 3.7450980392156864e-05, "loss": 0.6085, "step": 6282 }, { "epoch": 8.04224, "grad_norm": 0.7817235589027405, "learning_rate": 3.7448979591836736e-05, "loss": 0.5623, "step": 6283 }, { "epoch": 8.043520000000001, "grad_norm": 0.8824650049209595, "learning_rate": 3.744697879151661e-05, "loss": 0.612, "step": 6284 }, { "epoch": 8.0448, "grad_norm": 0.8118883371353149, "learning_rate": 3.744497799119648e-05, "loss": 0.5937, "step": 6285 }, { "epoch": 8.04608, "grad_norm": 0.8654536604881287, "learning_rate": 3.744297719087636e-05, "loss": 0.6412, "step": 6286 }, { "epoch": 8.04736, "grad_norm": 0.8161172866821289, "learning_rate": 3.744097639055622e-05, "loss": 0.6114, "step": 6287 }, { "epoch": 8.04864, "grad_norm": 0.8604544997215271, "learning_rate": 3.7438975590236095e-05, "loss": 0.6085, "step": 6288 }, { "epoch": 8.04992, "grad_norm": 0.816491425037384, "learning_rate": 3.743697478991597e-05, "loss": 0.6371, "step": 6289 }, { "epoch": 8.0512, "grad_norm": 0.8112639784812927, "learning_rate": 3.743497398959584e-05, "loss": 0.5633, "step": 6290 }, { "epoch": 8.05248, "grad_norm": 0.8095521330833435, "learning_rate": 3.743297318927571e-05, "loss": 0.6115, "step": 6291 }, { "epoch": 8.05376, "grad_norm": 0.8249873518943787, "learning_rate": 3.743097238895558e-05, "loss": 0.5975, "step": 6292 }, { "epoch": 8.05504, "grad_norm": 0.7858548164367676, "learning_rate": 3.742897158863546e-05, "loss": 0.5539, "step": 6293 }, { "epoch": 8.05632, "grad_norm": 0.8200925588607788, "learning_rate": 3.742697078831533e-05, "loss": 0.5892, "step": 6294 }, { "epoch": 8.0576, "grad_norm": 0.7779000997543335, "learning_rate": 3.74249699879952e-05, "loss": 0.5372, "step": 6295 }, { "epoch": 8.05888, "grad_norm": 0.7932630181312561, "learning_rate": 3.742296918767507e-05, "loss": 0.5852, "step": 6296 }, { "epoch": 8.06016, "grad_norm": 0.8097670078277588, "learning_rate": 3.742096838735494e-05, "loss": 0.6018, "step": 6297 }, { "epoch": 8.06144, "grad_norm": 0.7957705855369568, "learning_rate": 3.7418967587034814e-05, "loss": 0.5866, "step": 6298 }, { "epoch": 8.06272, "grad_norm": 0.7967826128005981, "learning_rate": 3.7416966786714686e-05, "loss": 0.5785, "step": 6299 }, { "epoch": 8.064, "grad_norm": 0.7749722003936768, "learning_rate": 3.7414965986394564e-05, "loss": 0.5758, "step": 6300 }, { "epoch": 8.06528, "grad_norm": 0.8013430237770081, "learning_rate": 3.7412965186074436e-05, "loss": 0.5362, "step": 6301 }, { "epoch": 8.06656, "grad_norm": 0.8903826475143433, "learning_rate": 3.741096438575431e-05, "loss": 0.6127, "step": 6302 }, { "epoch": 8.06784, "grad_norm": 0.7794708013534546, "learning_rate": 3.740896358543417e-05, "loss": 0.5785, "step": 6303 }, { "epoch": 8.06912, "grad_norm": 0.8395883440971375, "learning_rate": 3.7406962785114045e-05, "loss": 0.612, "step": 6304 }, { "epoch": 8.0704, "grad_norm": 0.7724868655204773, "learning_rate": 3.740496198479392e-05, "loss": 0.5437, "step": 6305 }, { "epoch": 8.07168, "grad_norm": 0.8095210790634155, "learning_rate": 3.740296118447379e-05, "loss": 0.6093, "step": 6306 }, { "epoch": 8.07296, "grad_norm": 0.7821049690246582, "learning_rate": 3.740096038415367e-05, "loss": 0.5682, "step": 6307 }, { "epoch": 8.07424, "grad_norm": 0.8228663802146912, "learning_rate": 3.739895958383354e-05, "loss": 0.5655, "step": 6308 }, { "epoch": 8.07552, "grad_norm": 0.7802860140800476, "learning_rate": 3.739695878351341e-05, "loss": 0.5646, "step": 6309 }, { "epoch": 8.0768, "grad_norm": 0.8123772740364075, "learning_rate": 3.739495798319328e-05, "loss": 0.5666, "step": 6310 }, { "epoch": 8.07808, "grad_norm": 0.8669785857200623, "learning_rate": 3.739295718287315e-05, "loss": 0.5712, "step": 6311 }, { "epoch": 8.07936, "grad_norm": 0.798360288143158, "learning_rate": 3.739095638255302e-05, "loss": 0.5568, "step": 6312 }, { "epoch": 8.08064, "grad_norm": 0.8681429624557495, "learning_rate": 3.738895558223289e-05, "loss": 0.6515, "step": 6313 }, { "epoch": 8.08192, "grad_norm": 0.8483299612998962, "learning_rate": 3.7386954781912763e-05, "loss": 0.6093, "step": 6314 }, { "epoch": 8.0832, "grad_norm": 0.8004496693611145, "learning_rate": 3.738495398159264e-05, "loss": 0.5589, "step": 6315 }, { "epoch": 8.08448, "grad_norm": 0.8437052369117737, "learning_rate": 3.7382953181272514e-05, "loss": 0.6215, "step": 6316 }, { "epoch": 8.08576, "grad_norm": 0.7914602756500244, "learning_rate": 3.7380952380952386e-05, "loss": 0.5765, "step": 6317 }, { "epoch": 8.08704, "grad_norm": 0.8129740357398987, "learning_rate": 3.737895158063226e-05, "loss": 0.5463, "step": 6318 }, { "epoch": 8.08832, "grad_norm": 0.7785910964012146, "learning_rate": 3.737695078031212e-05, "loss": 0.5845, "step": 6319 }, { "epoch": 8.0896, "grad_norm": 0.781987726688385, "learning_rate": 3.7374949979991995e-05, "loss": 0.5644, "step": 6320 }, { "epoch": 8.09088, "grad_norm": 0.7904685139656067, "learning_rate": 3.7372949179671866e-05, "loss": 0.5591, "step": 6321 }, { "epoch": 8.09216, "grad_norm": 0.7658714652061462, "learning_rate": 3.7370948379351745e-05, "loss": 0.5768, "step": 6322 }, { "epoch": 8.09344, "grad_norm": 0.8255518674850464, "learning_rate": 3.736894757903162e-05, "loss": 0.6142, "step": 6323 }, { "epoch": 8.09472, "grad_norm": 0.7947357296943665, "learning_rate": 3.736694677871149e-05, "loss": 0.5057, "step": 6324 }, { "epoch": 8.096, "grad_norm": 0.8527635335922241, "learning_rate": 3.736494597839136e-05, "loss": 0.5636, "step": 6325 }, { "epoch": 8.09728, "grad_norm": 0.8318933248519897, "learning_rate": 3.736294517807123e-05, "loss": 0.6075, "step": 6326 }, { "epoch": 8.09856, "grad_norm": 0.8209018707275391, "learning_rate": 3.73609443777511e-05, "loss": 0.5532, "step": 6327 }, { "epoch": 8.09984, "grad_norm": 0.7837770581245422, "learning_rate": 3.735894357743097e-05, "loss": 0.576, "step": 6328 }, { "epoch": 8.10112, "grad_norm": 0.7998737096786499, "learning_rate": 3.735694277711085e-05, "loss": 0.5571, "step": 6329 }, { "epoch": 8.1024, "grad_norm": 0.7599712014198303, "learning_rate": 3.735494197679072e-05, "loss": 0.5275, "step": 6330 }, { "epoch": 8.10368, "grad_norm": 0.8159705400466919, "learning_rate": 3.735294117647059e-05, "loss": 0.5565, "step": 6331 }, { "epoch": 8.10496, "grad_norm": 0.8160362243652344, "learning_rate": 3.7350940376150464e-05, "loss": 0.5994, "step": 6332 }, { "epoch": 8.10624, "grad_norm": 0.7920660972595215, "learning_rate": 3.7348939575830335e-05, "loss": 0.6277, "step": 6333 }, { "epoch": 8.10752, "grad_norm": 0.8129349946975708, "learning_rate": 3.734693877551021e-05, "loss": 0.5953, "step": 6334 }, { "epoch": 8.1088, "grad_norm": 0.8045061826705933, "learning_rate": 3.734493797519007e-05, "loss": 0.5593, "step": 6335 }, { "epoch": 8.11008, "grad_norm": 0.8104987144470215, "learning_rate": 3.734293717486995e-05, "loss": 0.587, "step": 6336 }, { "epoch": 8.11136, "grad_norm": 0.8352326154708862, "learning_rate": 3.734093637454982e-05, "loss": 0.6288, "step": 6337 }, { "epoch": 8.11264, "grad_norm": 0.7948821187019348, "learning_rate": 3.7338935574229695e-05, "loss": 0.5477, "step": 6338 }, { "epoch": 8.11392, "grad_norm": 0.8230301141738892, "learning_rate": 3.7336934773909567e-05, "loss": 0.5805, "step": 6339 }, { "epoch": 8.1152, "grad_norm": 0.7984503507614136, "learning_rate": 3.733493397358944e-05, "loss": 0.5385, "step": 6340 }, { "epoch": 8.11648, "grad_norm": 0.817011833190918, "learning_rate": 3.733293317326931e-05, "loss": 0.6042, "step": 6341 }, { "epoch": 8.11776, "grad_norm": 0.7629779577255249, "learning_rate": 3.733093237294918e-05, "loss": 0.5647, "step": 6342 }, { "epoch": 8.11904, "grad_norm": 0.7868363261222839, "learning_rate": 3.7328931572629054e-05, "loss": 0.551, "step": 6343 }, { "epoch": 8.12032, "grad_norm": 0.8329203128814697, "learning_rate": 3.7326930772308926e-05, "loss": 0.6125, "step": 6344 }, { "epoch": 8.1216, "grad_norm": 0.835198700428009, "learning_rate": 3.73249299719888e-05, "loss": 0.6188, "step": 6345 }, { "epoch": 8.12288, "grad_norm": 0.8320775032043457, "learning_rate": 3.732292917166867e-05, "loss": 0.5952, "step": 6346 }, { "epoch": 8.12416, "grad_norm": 0.8890385627746582, "learning_rate": 3.732092837134854e-05, "loss": 0.5802, "step": 6347 }, { "epoch": 8.12544, "grad_norm": 0.9071861505508423, "learning_rate": 3.731892757102841e-05, "loss": 0.6534, "step": 6348 }, { "epoch": 8.12672, "grad_norm": 0.8464532494544983, "learning_rate": 3.7316926770708285e-05, "loss": 0.5867, "step": 6349 }, { "epoch": 8.128, "grad_norm": 0.8280791640281677, "learning_rate": 3.731492597038816e-05, "loss": 0.5995, "step": 6350 }, { "epoch": 8.12928, "grad_norm": 0.7893093228340149, "learning_rate": 3.731292517006803e-05, "loss": 0.5685, "step": 6351 }, { "epoch": 8.13056, "grad_norm": 0.8106672167778015, "learning_rate": 3.73109243697479e-05, "loss": 0.5615, "step": 6352 }, { "epoch": 8.13184, "grad_norm": 0.8697341084480286, "learning_rate": 3.730892356942777e-05, "loss": 0.6196, "step": 6353 }, { "epoch": 8.13312, "grad_norm": 0.837616503238678, "learning_rate": 3.7306922769107644e-05, "loss": 0.5308, "step": 6354 }, { "epoch": 8.1344, "grad_norm": 0.7826257944107056, "learning_rate": 3.7304921968787516e-05, "loss": 0.5694, "step": 6355 }, { "epoch": 8.13568, "grad_norm": 0.8352533578872681, "learning_rate": 3.730292116846739e-05, "loss": 0.6239, "step": 6356 }, { "epoch": 8.13696, "grad_norm": 0.835431694984436, "learning_rate": 3.730092036814726e-05, "loss": 0.6214, "step": 6357 }, { "epoch": 8.13824, "grad_norm": 0.8025604486465454, "learning_rate": 3.729891956782713e-05, "loss": 0.579, "step": 6358 }, { "epoch": 8.13952, "grad_norm": 0.7842774987220764, "learning_rate": 3.7296918767507004e-05, "loss": 0.5335, "step": 6359 }, { "epoch": 8.1408, "grad_norm": 0.8423367142677307, "learning_rate": 3.7294917967186876e-05, "loss": 0.6108, "step": 6360 }, { "epoch": 8.14208, "grad_norm": 0.8245922327041626, "learning_rate": 3.729291716686675e-05, "loss": 0.5721, "step": 6361 }, { "epoch": 8.14336, "grad_norm": 0.8257554173469543, "learning_rate": 3.729091636654662e-05, "loss": 0.5574, "step": 6362 }, { "epoch": 8.14464, "grad_norm": 0.8311666250228882, "learning_rate": 3.728891556622649e-05, "loss": 0.5973, "step": 6363 }, { "epoch": 8.14592, "grad_norm": 0.814927339553833, "learning_rate": 3.728691476590637e-05, "loss": 0.6139, "step": 6364 }, { "epoch": 8.1472, "grad_norm": 0.8726287484169006, "learning_rate": 3.7284913965586235e-05, "loss": 0.6423, "step": 6365 }, { "epoch": 8.14848, "grad_norm": 0.8697568774223328, "learning_rate": 3.728291316526611e-05, "loss": 0.556, "step": 6366 }, { "epoch": 8.14976, "grad_norm": 0.7923082113265991, "learning_rate": 3.728091236494598e-05, "loss": 0.5776, "step": 6367 }, { "epoch": 8.15104, "grad_norm": 0.8061732053756714, "learning_rate": 3.727891156462585e-05, "loss": 0.6374, "step": 6368 }, { "epoch": 8.15232, "grad_norm": 0.8172518610954285, "learning_rate": 3.727691076430572e-05, "loss": 0.5489, "step": 6369 }, { "epoch": 8.1536, "grad_norm": 0.7999323606491089, "learning_rate": 3.7274909963985594e-05, "loss": 0.556, "step": 6370 }, { "epoch": 8.15488, "grad_norm": 0.8566176891326904, "learning_rate": 3.727290916366547e-05, "loss": 0.6124, "step": 6371 }, { "epoch": 8.15616, "grad_norm": 0.8322476148605347, "learning_rate": 3.7270908363345345e-05, "loss": 0.5665, "step": 6372 }, { "epoch": 8.15744, "grad_norm": 0.8234203457832336, "learning_rate": 3.726890756302521e-05, "loss": 0.5859, "step": 6373 }, { "epoch": 8.15872, "grad_norm": 0.7753047347068787, "learning_rate": 3.726690676270508e-05, "loss": 0.5526, "step": 6374 }, { "epoch": 8.16, "grad_norm": 0.8442295789718628, "learning_rate": 3.7264905962384953e-05, "loss": 0.5812, "step": 6375 }, { "epoch": 8.16128, "grad_norm": 0.7989637851715088, "learning_rate": 3.7262905162064825e-05, "loss": 0.5454, "step": 6376 }, { "epoch": 8.16256, "grad_norm": 0.8231421113014221, "learning_rate": 3.72609043617447e-05, "loss": 0.5522, "step": 6377 }, { "epoch": 8.16384, "grad_norm": 0.8021591901779175, "learning_rate": 3.7258903561424576e-05, "loss": 0.5667, "step": 6378 }, { "epoch": 8.16512, "grad_norm": 0.8049442768096924, "learning_rate": 3.725690276110445e-05, "loss": 0.5194, "step": 6379 }, { "epoch": 8.1664, "grad_norm": 0.8534220457077026, "learning_rate": 3.725490196078432e-05, "loss": 0.6475, "step": 6380 }, { "epoch": 8.16768, "grad_norm": 0.7750808596611023, "learning_rate": 3.7252901160464185e-05, "loss": 0.5171, "step": 6381 }, { "epoch": 8.16896, "grad_norm": 0.7859979867935181, "learning_rate": 3.7250900360144056e-05, "loss": 0.5755, "step": 6382 }, { "epoch": 8.17024, "grad_norm": 0.9099715948104858, "learning_rate": 3.724889955982393e-05, "loss": 0.5664, "step": 6383 }, { "epoch": 8.17152, "grad_norm": 0.7696498036384583, "learning_rate": 3.72468987595038e-05, "loss": 0.5575, "step": 6384 }, { "epoch": 8.1728, "grad_norm": 0.8566391468048096, "learning_rate": 3.724489795918368e-05, "loss": 0.6486, "step": 6385 }, { "epoch": 8.17408, "grad_norm": 0.7947507500648499, "learning_rate": 3.724289715886355e-05, "loss": 0.5506, "step": 6386 }, { "epoch": 8.17536, "grad_norm": 0.8360593318939209, "learning_rate": 3.724089635854342e-05, "loss": 0.6098, "step": 6387 }, { "epoch": 8.17664, "grad_norm": 0.8018707036972046, "learning_rate": 3.7238895558223294e-05, "loss": 0.585, "step": 6388 }, { "epoch": 8.17792, "grad_norm": 0.8566110134124756, "learning_rate": 3.723689475790316e-05, "loss": 0.5698, "step": 6389 }, { "epoch": 8.1792, "grad_norm": 0.7939445376396179, "learning_rate": 3.723489395758303e-05, "loss": 0.5424, "step": 6390 }, { "epoch": 8.18048, "grad_norm": 0.816756010055542, "learning_rate": 3.72328931572629e-05, "loss": 0.563, "step": 6391 }, { "epoch": 8.18176, "grad_norm": 0.8309502601623535, "learning_rate": 3.723089235694278e-05, "loss": 0.5777, "step": 6392 }, { "epoch": 8.18304, "grad_norm": 0.8525409698486328, "learning_rate": 3.7228891556622654e-05, "loss": 0.6131, "step": 6393 }, { "epoch": 8.18432, "grad_norm": 0.8380247354507446, "learning_rate": 3.7226890756302525e-05, "loss": 0.5809, "step": 6394 }, { "epoch": 8.1856, "grad_norm": 0.7954679131507874, "learning_rate": 3.72248899559824e-05, "loss": 0.5846, "step": 6395 }, { "epoch": 8.18688, "grad_norm": 0.8677021265029907, "learning_rate": 3.722288915566227e-05, "loss": 0.6068, "step": 6396 }, { "epoch": 8.18816, "grad_norm": 0.8796229362487793, "learning_rate": 3.7220888355342134e-05, "loss": 0.637, "step": 6397 }, { "epoch": 8.18944, "grad_norm": 0.7766791582107544, "learning_rate": 3.7218887555022006e-05, "loss": 0.5808, "step": 6398 }, { "epoch": 8.19072, "grad_norm": 0.8018509149551392, "learning_rate": 3.7216886754701885e-05, "loss": 0.5721, "step": 6399 }, { "epoch": 8.192, "grad_norm": 0.8333677053451538, "learning_rate": 3.721488595438176e-05, "loss": 0.5966, "step": 6400 }, { "epoch": 8.19328, "grad_norm": 0.8443406224250793, "learning_rate": 3.721288515406163e-05, "loss": 0.5973, "step": 6401 }, { "epoch": 8.19456, "grad_norm": 0.8139026761054993, "learning_rate": 3.72108843537415e-05, "loss": 0.5496, "step": 6402 }, { "epoch": 8.19584, "grad_norm": 0.846799373626709, "learning_rate": 3.720888355342137e-05, "loss": 0.5896, "step": 6403 }, { "epoch": 8.19712, "grad_norm": 0.8184740543365479, "learning_rate": 3.7206882753101244e-05, "loss": 0.5586, "step": 6404 }, { "epoch": 8.1984, "grad_norm": 0.8557119965553284, "learning_rate": 3.720488195278111e-05, "loss": 0.5687, "step": 6405 }, { "epoch": 8.19968, "grad_norm": 0.7910146713256836, "learning_rate": 3.720288115246099e-05, "loss": 0.5474, "step": 6406 }, { "epoch": 8.20096, "grad_norm": 0.8577878475189209, "learning_rate": 3.720088035214086e-05, "loss": 0.6, "step": 6407 }, { "epoch": 8.20224, "grad_norm": 0.8686122894287109, "learning_rate": 3.719887955182073e-05, "loss": 0.6026, "step": 6408 }, { "epoch": 8.20352, "grad_norm": 0.7906977534294128, "learning_rate": 3.71968787515006e-05, "loss": 0.5427, "step": 6409 }, { "epoch": 8.2048, "grad_norm": 0.8151067495346069, "learning_rate": 3.7194877951180475e-05, "loss": 0.5795, "step": 6410 }, { "epoch": 8.20608, "grad_norm": 0.8321256041526794, "learning_rate": 3.719287715086035e-05, "loss": 0.6022, "step": 6411 }, { "epoch": 8.20736, "grad_norm": 0.7891202569007874, "learning_rate": 3.719087635054022e-05, "loss": 0.5335, "step": 6412 }, { "epoch": 8.20864, "grad_norm": 0.836283802986145, "learning_rate": 3.718887555022009e-05, "loss": 0.6082, "step": 6413 }, { "epoch": 8.20992, "grad_norm": 0.7881807684898376, "learning_rate": 3.718687474989996e-05, "loss": 0.562, "step": 6414 }, { "epoch": 8.2112, "grad_norm": 0.7665523886680603, "learning_rate": 3.7184873949579834e-05, "loss": 0.571, "step": 6415 }, { "epoch": 8.21248, "grad_norm": 0.8602042198181152, "learning_rate": 3.7182873149259706e-05, "loss": 0.6157, "step": 6416 }, { "epoch": 8.21376, "grad_norm": 0.8061181902885437, "learning_rate": 3.718087234893958e-05, "loss": 0.5852, "step": 6417 }, { "epoch": 8.21504, "grad_norm": 0.7839178442955017, "learning_rate": 3.717887154861945e-05, "loss": 0.5376, "step": 6418 }, { "epoch": 8.21632, "grad_norm": 0.8473336696624756, "learning_rate": 3.717687074829932e-05, "loss": 0.6365, "step": 6419 }, { "epoch": 8.2176, "grad_norm": 0.8579205870628357, "learning_rate": 3.7174869947979194e-05, "loss": 0.6029, "step": 6420 }, { "epoch": 8.21888, "grad_norm": 0.8451679348945618, "learning_rate": 3.7172869147659066e-05, "loss": 0.6307, "step": 6421 }, { "epoch": 8.22016, "grad_norm": 0.7627133131027222, "learning_rate": 3.717086834733894e-05, "loss": 0.5673, "step": 6422 }, { "epoch": 8.22144, "grad_norm": 0.8411123156547546, "learning_rate": 3.716886754701881e-05, "loss": 0.5622, "step": 6423 }, { "epoch": 8.22272, "grad_norm": 0.8919776678085327, "learning_rate": 3.716686674669868e-05, "loss": 0.6082, "step": 6424 }, { "epoch": 8.224, "grad_norm": 0.7615383267402649, "learning_rate": 3.716486594637855e-05, "loss": 0.5266, "step": 6425 }, { "epoch": 8.22528, "grad_norm": 0.8244317770004272, "learning_rate": 3.7162865146058425e-05, "loss": 0.5582, "step": 6426 }, { "epoch": 8.22656, "grad_norm": 0.8274043202400208, "learning_rate": 3.71608643457383e-05, "loss": 0.5784, "step": 6427 }, { "epoch": 8.22784, "grad_norm": 0.8274574279785156, "learning_rate": 3.715886354541817e-05, "loss": 0.5993, "step": 6428 }, { "epoch": 8.22912, "grad_norm": 0.7799541354179382, "learning_rate": 3.715686274509804e-05, "loss": 0.6209, "step": 6429 }, { "epoch": 8.2304, "grad_norm": 0.8560000658035278, "learning_rate": 3.715486194477791e-05, "loss": 0.6404, "step": 6430 }, { "epoch": 8.23168, "grad_norm": 0.8321433663368225, "learning_rate": 3.7152861144457784e-05, "loss": 0.6115, "step": 6431 }, { "epoch": 8.23296, "grad_norm": 0.7989788055419922, "learning_rate": 3.7150860344137656e-05, "loss": 0.5221, "step": 6432 }, { "epoch": 8.23424, "grad_norm": 0.8330096006393433, "learning_rate": 3.714885954381753e-05, "loss": 0.6045, "step": 6433 }, { "epoch": 8.23552, "grad_norm": 0.7904120683670044, "learning_rate": 3.71468587434974e-05, "loss": 0.5978, "step": 6434 }, { "epoch": 8.2368, "grad_norm": 0.8202242255210876, "learning_rate": 3.714485794317727e-05, "loss": 0.5845, "step": 6435 }, { "epoch": 8.23808, "grad_norm": 0.8055812120437622, "learning_rate": 3.7142857142857143e-05, "loss": 0.5653, "step": 6436 }, { "epoch": 8.23936, "grad_norm": 0.8505423665046692, "learning_rate": 3.7140856342537015e-05, "loss": 0.6106, "step": 6437 }, { "epoch": 8.24064, "grad_norm": 0.8569924235343933, "learning_rate": 3.713885554221689e-05, "loss": 0.5948, "step": 6438 }, { "epoch": 8.24192, "grad_norm": 0.785232663154602, "learning_rate": 3.713685474189676e-05, "loss": 0.5768, "step": 6439 }, { "epoch": 8.2432, "grad_norm": 0.8176567554473877, "learning_rate": 3.713485394157663e-05, "loss": 0.5744, "step": 6440 }, { "epoch": 8.24448, "grad_norm": 0.8164046406745911, "learning_rate": 3.71328531412565e-05, "loss": 0.5559, "step": 6441 }, { "epoch": 8.24576, "grad_norm": 0.7764423489570618, "learning_rate": 3.713085234093638e-05, "loss": 0.5796, "step": 6442 }, { "epoch": 8.24704, "grad_norm": 0.8372679352760315, "learning_rate": 3.7128851540616246e-05, "loss": 0.5921, "step": 6443 }, { "epoch": 8.24832, "grad_norm": 0.8438205122947693, "learning_rate": 3.712685074029612e-05, "loss": 0.6257, "step": 6444 }, { "epoch": 8.2496, "grad_norm": 0.810208261013031, "learning_rate": 3.712484993997599e-05, "loss": 0.5474, "step": 6445 }, { "epoch": 8.25088, "grad_norm": 0.866226315498352, "learning_rate": 3.712284913965586e-05, "loss": 0.5989, "step": 6446 }, { "epoch": 8.25216, "grad_norm": 0.8516311645507812, "learning_rate": 3.7120848339335734e-05, "loss": 0.6025, "step": 6447 }, { "epoch": 8.25344, "grad_norm": 0.8606278896331787, "learning_rate": 3.7118847539015606e-05, "loss": 0.5967, "step": 6448 }, { "epoch": 8.25472, "grad_norm": 0.8150187134742737, "learning_rate": 3.7116846738695484e-05, "loss": 0.5981, "step": 6449 }, { "epoch": 8.256, "grad_norm": 0.7826385498046875, "learning_rate": 3.7114845938375356e-05, "loss": 0.5887, "step": 6450 }, { "epoch": 8.25728, "grad_norm": 0.8195633888244629, "learning_rate": 3.711284513805522e-05, "loss": 0.587, "step": 6451 }, { "epoch": 8.25856, "grad_norm": 0.9077224135398865, "learning_rate": 3.711084433773509e-05, "loss": 0.6068, "step": 6452 }, { "epoch": 8.25984, "grad_norm": 0.8052554130554199, "learning_rate": 3.7108843537414965e-05, "loss": 0.5859, "step": 6453 }, { "epoch": 8.26112, "grad_norm": 0.8076302409172058, "learning_rate": 3.710684273709484e-05, "loss": 0.576, "step": 6454 }, { "epoch": 8.2624, "grad_norm": 0.8576489090919495, "learning_rate": 3.710484193677471e-05, "loss": 0.6078, "step": 6455 }, { "epoch": 8.26368, "grad_norm": 0.8286393880844116, "learning_rate": 3.710284113645459e-05, "loss": 0.6022, "step": 6456 }, { "epoch": 8.26496, "grad_norm": 0.8481602072715759, "learning_rate": 3.710084033613446e-05, "loss": 0.5954, "step": 6457 }, { "epoch": 8.26624, "grad_norm": 0.8346137404441833, "learning_rate": 3.709883953581433e-05, "loss": 0.5913, "step": 6458 }, { "epoch": 8.26752, "grad_norm": 0.8080378770828247, "learning_rate": 3.7096838735494196e-05, "loss": 0.6009, "step": 6459 }, { "epoch": 8.2688, "grad_norm": 0.8473795652389526, "learning_rate": 3.709483793517407e-05, "loss": 0.6306, "step": 6460 }, { "epoch": 8.27008, "grad_norm": 0.8970816731452942, "learning_rate": 3.709283713485394e-05, "loss": 0.5936, "step": 6461 }, { "epoch": 8.27136, "grad_norm": 0.8006930947303772, "learning_rate": 3.709083633453381e-05, "loss": 0.5833, "step": 6462 }, { "epoch": 8.272639999999999, "grad_norm": 0.800212562084198, "learning_rate": 3.708883553421369e-05, "loss": 0.5658, "step": 6463 }, { "epoch": 8.27392, "grad_norm": 0.8020644783973694, "learning_rate": 3.708683473389356e-05, "loss": 0.6084, "step": 6464 }, { "epoch": 8.2752, "grad_norm": 0.8160932660102844, "learning_rate": 3.7084833933573434e-05, "loss": 0.5927, "step": 6465 }, { "epoch": 8.27648, "grad_norm": 0.8528776168823242, "learning_rate": 3.7082833133253306e-05, "loss": 0.6092, "step": 6466 }, { "epoch": 8.27776, "grad_norm": 0.8029847145080566, "learning_rate": 3.708083233293317e-05, "loss": 0.6283, "step": 6467 }, { "epoch": 8.27904, "grad_norm": 0.7798078060150146, "learning_rate": 3.707883153261304e-05, "loss": 0.5935, "step": 6468 }, { "epoch": 8.28032, "grad_norm": 0.7831089496612549, "learning_rate": 3.7076830732292915e-05, "loss": 0.5904, "step": 6469 }, { "epoch": 8.2816, "grad_norm": 0.9106951951980591, "learning_rate": 3.707482993197279e-05, "loss": 0.6896, "step": 6470 }, { "epoch": 8.28288, "grad_norm": 0.8110149502754211, "learning_rate": 3.7072829131652665e-05, "loss": 0.5925, "step": 6471 }, { "epoch": 8.28416, "grad_norm": 0.7867901921272278, "learning_rate": 3.707082833133254e-05, "loss": 0.5726, "step": 6472 }, { "epoch": 8.28544, "grad_norm": 0.7895092368125916, "learning_rate": 3.706882753101241e-05, "loss": 0.5712, "step": 6473 }, { "epoch": 8.28672, "grad_norm": 0.8702477216720581, "learning_rate": 3.706682673069228e-05, "loss": 0.6259, "step": 6474 }, { "epoch": 8.288, "grad_norm": 0.8143934011459351, "learning_rate": 3.7064825930372146e-05, "loss": 0.5903, "step": 6475 }, { "epoch": 8.28928, "grad_norm": 0.8529263138771057, "learning_rate": 3.706282513005202e-05, "loss": 0.6155, "step": 6476 }, { "epoch": 8.29056, "grad_norm": 0.7962032556533813, "learning_rate": 3.7060824329731896e-05, "loss": 0.5951, "step": 6477 }, { "epoch": 8.29184, "grad_norm": 0.7777064442634583, "learning_rate": 3.705882352941177e-05, "loss": 0.5678, "step": 6478 }, { "epoch": 8.29312, "grad_norm": 0.8295832872390747, "learning_rate": 3.705682272909164e-05, "loss": 0.5486, "step": 6479 }, { "epoch": 8.2944, "grad_norm": 0.7815911769866943, "learning_rate": 3.705482192877151e-05, "loss": 0.5732, "step": 6480 }, { "epoch": 8.29568, "grad_norm": 0.7912229895591736, "learning_rate": 3.7052821128451384e-05, "loss": 0.5611, "step": 6481 }, { "epoch": 8.29696, "grad_norm": 0.8012306690216064, "learning_rate": 3.7050820328131256e-05, "loss": 0.5652, "step": 6482 }, { "epoch": 8.29824, "grad_norm": 0.7731996178627014, "learning_rate": 3.704881952781112e-05, "loss": 0.511, "step": 6483 }, { "epoch": 8.29952, "grad_norm": 0.858241081237793, "learning_rate": 3.7046818727491e-05, "loss": 0.5636, "step": 6484 }, { "epoch": 8.3008, "grad_norm": 0.816175639629364, "learning_rate": 3.704481792717087e-05, "loss": 0.5677, "step": 6485 }, { "epoch": 8.30208, "grad_norm": 0.7947445511817932, "learning_rate": 3.704281712685074e-05, "loss": 0.5523, "step": 6486 }, { "epoch": 8.30336, "grad_norm": 0.8075773119926453, "learning_rate": 3.7040816326530615e-05, "loss": 0.566, "step": 6487 }, { "epoch": 8.30464, "grad_norm": 0.8670647740364075, "learning_rate": 3.703881552621049e-05, "loss": 0.6355, "step": 6488 }, { "epoch": 8.30592, "grad_norm": 0.8332636952400208, "learning_rate": 3.703681472589036e-05, "loss": 0.6226, "step": 6489 }, { "epoch": 8.3072, "grad_norm": 0.8159798979759216, "learning_rate": 3.703481392557023e-05, "loss": 0.5558, "step": 6490 }, { "epoch": 8.30848, "grad_norm": 0.8338848948478699, "learning_rate": 3.70328131252501e-05, "loss": 0.5839, "step": 6491 }, { "epoch": 8.30976, "grad_norm": 0.8652037382125854, "learning_rate": 3.7030812324929974e-05, "loss": 0.6033, "step": 6492 }, { "epoch": 8.31104, "grad_norm": 0.8078513145446777, "learning_rate": 3.7028811524609846e-05, "loss": 0.5618, "step": 6493 }, { "epoch": 8.31232, "grad_norm": 0.8094210028648376, "learning_rate": 3.702681072428972e-05, "loss": 0.5781, "step": 6494 }, { "epoch": 8.3136, "grad_norm": 0.7953526973724365, "learning_rate": 3.702480992396959e-05, "loss": 0.6069, "step": 6495 }, { "epoch": 8.31488, "grad_norm": 0.8320873379707336, "learning_rate": 3.702280912364946e-05, "loss": 0.5769, "step": 6496 }, { "epoch": 8.31616, "grad_norm": 0.8492526412010193, "learning_rate": 3.7020808323329334e-05, "loss": 0.6435, "step": 6497 }, { "epoch": 8.31744, "grad_norm": 0.8726949691772461, "learning_rate": 3.7018807523009205e-05, "loss": 0.5645, "step": 6498 }, { "epoch": 8.31872, "grad_norm": 0.8547096252441406, "learning_rate": 3.701680672268908e-05, "loss": 0.5968, "step": 6499 }, { "epoch": 8.32, "grad_norm": 0.8142231702804565, "learning_rate": 3.701480592236895e-05, "loss": 0.6064, "step": 6500 }, { "epoch": 8.32128, "grad_norm": 0.8083314895629883, "learning_rate": 3.701280512204882e-05, "loss": 0.5691, "step": 6501 }, { "epoch": 8.32256, "grad_norm": 0.7994301319122314, "learning_rate": 3.701080432172869e-05, "loss": 0.6027, "step": 6502 }, { "epoch": 8.32384, "grad_norm": 0.8659411072731018, "learning_rate": 3.7008803521408565e-05, "loss": 0.6085, "step": 6503 }, { "epoch": 8.32512, "grad_norm": 0.8234634399414062, "learning_rate": 3.7006802721088437e-05, "loss": 0.593, "step": 6504 }, { "epoch": 8.3264, "grad_norm": 0.8921228051185608, "learning_rate": 3.700480192076831e-05, "loss": 0.6614, "step": 6505 }, { "epoch": 8.32768, "grad_norm": 0.7711195945739746, "learning_rate": 3.700280112044818e-05, "loss": 0.5886, "step": 6506 }, { "epoch": 8.32896, "grad_norm": 0.8125150203704834, "learning_rate": 3.700080032012805e-05, "loss": 0.58, "step": 6507 }, { "epoch": 8.33024, "grad_norm": 0.8450069427490234, "learning_rate": 3.6998799519807924e-05, "loss": 0.6523, "step": 6508 }, { "epoch": 8.33152, "grad_norm": 0.8026819229125977, "learning_rate": 3.6996798719487796e-05, "loss": 0.5938, "step": 6509 }, { "epoch": 8.3328, "grad_norm": 0.7969563603401184, "learning_rate": 3.699479791916767e-05, "loss": 0.591, "step": 6510 }, { "epoch": 8.33408, "grad_norm": 0.8721278309822083, "learning_rate": 3.699279711884754e-05, "loss": 0.636, "step": 6511 }, { "epoch": 8.33536, "grad_norm": 0.81974196434021, "learning_rate": 3.699079631852742e-05, "loss": 0.6409, "step": 6512 }, { "epoch": 8.33664, "grad_norm": 0.7834908366203308, "learning_rate": 3.698879551820728e-05, "loss": 0.5799, "step": 6513 }, { "epoch": 8.33792, "grad_norm": 0.815528929233551, "learning_rate": 3.6986794717887155e-05, "loss": 0.5999, "step": 6514 }, { "epoch": 8.3392, "grad_norm": 0.8319604992866516, "learning_rate": 3.698479391756703e-05, "loss": 0.6098, "step": 6515 }, { "epoch": 8.34048, "grad_norm": 0.7863122820854187, "learning_rate": 3.69827931172469e-05, "loss": 0.5897, "step": 6516 }, { "epoch": 8.34176, "grad_norm": 0.8292329907417297, "learning_rate": 3.698079231692677e-05, "loss": 0.5612, "step": 6517 }, { "epoch": 8.34304, "grad_norm": 0.8444699645042419, "learning_rate": 3.697879151660664e-05, "loss": 0.6062, "step": 6518 }, { "epoch": 8.34432, "grad_norm": 0.8435462117195129, "learning_rate": 3.697679071628652e-05, "loss": 0.5952, "step": 6519 }, { "epoch": 8.3456, "grad_norm": 0.8049376606941223, "learning_rate": 3.697478991596639e-05, "loss": 0.5783, "step": 6520 }, { "epoch": 8.34688, "grad_norm": 0.8144257068634033, "learning_rate": 3.697278911564626e-05, "loss": 0.6279, "step": 6521 }, { "epoch": 8.34816, "grad_norm": 0.8180856108665466, "learning_rate": 3.697078831532613e-05, "loss": 0.5911, "step": 6522 }, { "epoch": 8.34944, "grad_norm": 0.8317684531211853, "learning_rate": 3.6968787515006e-05, "loss": 0.6043, "step": 6523 }, { "epoch": 8.35072, "grad_norm": 0.8124802112579346, "learning_rate": 3.6966786714685874e-05, "loss": 0.6082, "step": 6524 }, { "epoch": 8.352, "grad_norm": 0.8340176939964294, "learning_rate": 3.6964785914365746e-05, "loss": 0.6172, "step": 6525 }, { "epoch": 8.35328, "grad_norm": 0.8047380447387695, "learning_rate": 3.6962785114045624e-05, "loss": 0.5722, "step": 6526 }, { "epoch": 8.35456, "grad_norm": 0.8233848810195923, "learning_rate": 3.6960784313725496e-05, "loss": 0.6046, "step": 6527 }, { "epoch": 8.35584, "grad_norm": 0.7907252311706543, "learning_rate": 3.695878351340537e-05, "loss": 0.5304, "step": 6528 }, { "epoch": 8.35712, "grad_norm": 0.8269102573394775, "learning_rate": 3.695678271308523e-05, "loss": 0.5938, "step": 6529 }, { "epoch": 8.3584, "grad_norm": 0.8250780701637268, "learning_rate": 3.6954781912765105e-05, "loss": 0.5838, "step": 6530 }, { "epoch": 8.35968, "grad_norm": 0.7899469137191772, "learning_rate": 3.695278111244498e-05, "loss": 0.5508, "step": 6531 }, { "epoch": 8.36096, "grad_norm": 0.8494967818260193, "learning_rate": 3.695078031212485e-05, "loss": 0.6586, "step": 6532 }, { "epoch": 8.36224, "grad_norm": 0.775540828704834, "learning_rate": 3.694877951180473e-05, "loss": 0.5521, "step": 6533 }, { "epoch": 8.36352, "grad_norm": 0.7813871502876282, "learning_rate": 3.69467787114846e-05, "loss": 0.5531, "step": 6534 }, { "epoch": 8.3648, "grad_norm": 0.7743489742279053, "learning_rate": 3.694477791116447e-05, "loss": 0.5521, "step": 6535 }, { "epoch": 8.36608, "grad_norm": 0.8797667026519775, "learning_rate": 3.694277711084434e-05, "loss": 0.5972, "step": 6536 }, { "epoch": 8.36736, "grad_norm": 0.8481217622756958, "learning_rate": 3.694077631052421e-05, "loss": 0.6185, "step": 6537 }, { "epoch": 8.36864, "grad_norm": 0.8754853010177612, "learning_rate": 3.693877551020408e-05, "loss": 0.6194, "step": 6538 }, { "epoch": 8.36992, "grad_norm": 0.8245212435722351, "learning_rate": 3.693677470988395e-05, "loss": 0.5823, "step": 6539 }, { "epoch": 8.3712, "grad_norm": 0.8101980090141296, "learning_rate": 3.693477390956382e-05, "loss": 0.5779, "step": 6540 }, { "epoch": 8.37248, "grad_norm": 0.8348204493522644, "learning_rate": 3.69327731092437e-05, "loss": 0.5794, "step": 6541 }, { "epoch": 8.37376, "grad_norm": 0.84105384349823, "learning_rate": 3.6930772308923574e-05, "loss": 0.5751, "step": 6542 }, { "epoch": 8.37504, "grad_norm": 0.8668518662452698, "learning_rate": 3.6928771508603446e-05, "loss": 0.5879, "step": 6543 }, { "epoch": 8.37632, "grad_norm": 0.8224269151687622, "learning_rate": 3.692677070828332e-05, "loss": 0.5835, "step": 6544 }, { "epoch": 8.3776, "grad_norm": 0.8665353059768677, "learning_rate": 3.692476990796318e-05, "loss": 0.5756, "step": 6545 }, { "epoch": 8.37888, "grad_norm": 0.8154611587524414, "learning_rate": 3.6922769107643054e-05, "loss": 0.6244, "step": 6546 }, { "epoch": 8.38016, "grad_norm": 0.7815151810646057, "learning_rate": 3.6920768307322926e-05, "loss": 0.5823, "step": 6547 }, { "epoch": 8.38144, "grad_norm": 0.8650956749916077, "learning_rate": 3.6918767507002805e-05, "loss": 0.5881, "step": 6548 }, { "epoch": 8.38272, "grad_norm": 0.8099015355110168, "learning_rate": 3.691676670668268e-05, "loss": 0.6132, "step": 6549 }, { "epoch": 8.384, "grad_norm": 0.8544344902038574, "learning_rate": 3.691476590636255e-05, "loss": 0.5674, "step": 6550 }, { "epoch": 8.38528, "grad_norm": 0.8150829672813416, "learning_rate": 3.691276510604242e-05, "loss": 0.6013, "step": 6551 }, { "epoch": 8.38656, "grad_norm": 0.8252568244934082, "learning_rate": 3.691076430572229e-05, "loss": 0.5952, "step": 6552 }, { "epoch": 8.38784, "grad_norm": 0.8224411010742188, "learning_rate": 3.690876350540216e-05, "loss": 0.613, "step": 6553 }, { "epoch": 8.38912, "grad_norm": 0.8442636728286743, "learning_rate": 3.690676270508203e-05, "loss": 0.6177, "step": 6554 }, { "epoch": 8.3904, "grad_norm": 0.8492255210876465, "learning_rate": 3.690476190476191e-05, "loss": 0.5842, "step": 6555 }, { "epoch": 8.39168, "grad_norm": 0.8563710451126099, "learning_rate": 3.690276110444178e-05, "loss": 0.6015, "step": 6556 }, { "epoch": 8.39296, "grad_norm": 0.8145362138748169, "learning_rate": 3.690076030412165e-05, "loss": 0.5714, "step": 6557 }, { "epoch": 8.39424, "grad_norm": 0.8347020745277405, "learning_rate": 3.6898759503801524e-05, "loss": 0.6105, "step": 6558 }, { "epoch": 8.39552, "grad_norm": 0.8055539131164551, "learning_rate": 3.6896758703481395e-05, "loss": 0.6214, "step": 6559 }, { "epoch": 8.3968, "grad_norm": 0.7828962802886963, "learning_rate": 3.689475790316127e-05, "loss": 0.6206, "step": 6560 }, { "epoch": 8.39808, "grad_norm": 0.8174715042114258, "learning_rate": 3.689275710284113e-05, "loss": 0.536, "step": 6561 }, { "epoch": 8.39936, "grad_norm": 0.7967883944511414, "learning_rate": 3.689075630252101e-05, "loss": 0.6092, "step": 6562 }, { "epoch": 8.40064, "grad_norm": 0.8208482265472412, "learning_rate": 3.688875550220088e-05, "loss": 0.5818, "step": 6563 }, { "epoch": 8.40192, "grad_norm": 0.7751407623291016, "learning_rate": 3.6886754701880755e-05, "loss": 0.5362, "step": 6564 }, { "epoch": 8.4032, "grad_norm": 0.8539714217185974, "learning_rate": 3.6884753901560627e-05, "loss": 0.5734, "step": 6565 }, { "epoch": 8.40448, "grad_norm": 0.8336576819419861, "learning_rate": 3.68827531012405e-05, "loss": 0.5931, "step": 6566 }, { "epoch": 8.40576, "grad_norm": 0.7995145320892334, "learning_rate": 3.688075230092037e-05, "loss": 0.6048, "step": 6567 }, { "epoch": 8.40704, "grad_norm": 0.8343474268913269, "learning_rate": 3.687875150060024e-05, "loss": 0.5621, "step": 6568 }, { "epoch": 8.40832, "grad_norm": 0.8455119132995605, "learning_rate": 3.6876750700280114e-05, "loss": 0.597, "step": 6569 }, { "epoch": 8.4096, "grad_norm": 0.839838445186615, "learning_rate": 3.6874749899959986e-05, "loss": 0.5993, "step": 6570 }, { "epoch": 8.41088, "grad_norm": 0.8005172610282898, "learning_rate": 3.687274909963986e-05, "loss": 0.5688, "step": 6571 }, { "epoch": 8.41216, "grad_norm": 0.8667073249816895, "learning_rate": 3.687074829931973e-05, "loss": 0.5971, "step": 6572 }, { "epoch": 8.41344, "grad_norm": 0.7542425990104675, "learning_rate": 3.68687474989996e-05, "loss": 0.5329, "step": 6573 }, { "epoch": 8.414719999999999, "grad_norm": 0.8643616437911987, "learning_rate": 3.686674669867947e-05, "loss": 0.6388, "step": 6574 }, { "epoch": 8.416, "grad_norm": 0.807478666305542, "learning_rate": 3.6864745898359345e-05, "loss": 0.6263, "step": 6575 }, { "epoch": 8.41728, "grad_norm": 0.8404189944267273, "learning_rate": 3.686274509803922e-05, "loss": 0.6011, "step": 6576 }, { "epoch": 8.41856, "grad_norm": 0.8093534111976624, "learning_rate": 3.686074429771909e-05, "loss": 0.5659, "step": 6577 }, { "epoch": 8.41984, "grad_norm": 0.8560016751289368, "learning_rate": 3.685874349739896e-05, "loss": 0.6221, "step": 6578 }, { "epoch": 8.42112, "grad_norm": 0.836397647857666, "learning_rate": 3.685674269707883e-05, "loss": 0.6274, "step": 6579 }, { "epoch": 8.4224, "grad_norm": 0.8117139935493469, "learning_rate": 3.6854741896758704e-05, "loss": 0.6296, "step": 6580 }, { "epoch": 8.42368, "grad_norm": 0.8044874668121338, "learning_rate": 3.6852741096438576e-05, "loss": 0.5879, "step": 6581 }, { "epoch": 8.42496, "grad_norm": 0.7988526225090027, "learning_rate": 3.685074029611845e-05, "loss": 0.5576, "step": 6582 }, { "epoch": 8.42624, "grad_norm": 0.7902975678443909, "learning_rate": 3.684873949579833e-05, "loss": 0.5694, "step": 6583 }, { "epoch": 8.42752, "grad_norm": 0.8768497109413147, "learning_rate": 3.684673869547819e-05, "loss": 0.6101, "step": 6584 }, { "epoch": 8.4288, "grad_norm": 0.806703507900238, "learning_rate": 3.6844737895158064e-05, "loss": 0.576, "step": 6585 }, { "epoch": 8.43008, "grad_norm": 0.8013177514076233, "learning_rate": 3.6842737094837936e-05, "loss": 0.6118, "step": 6586 }, { "epoch": 8.43136, "grad_norm": 0.8201990723609924, "learning_rate": 3.684073629451781e-05, "loss": 0.5902, "step": 6587 }, { "epoch": 8.43264, "grad_norm": 0.8329010009765625, "learning_rate": 3.683873549419768e-05, "loss": 0.5995, "step": 6588 }, { "epoch": 8.43392, "grad_norm": 0.7939520478248596, "learning_rate": 3.683673469387755e-05, "loss": 0.5849, "step": 6589 }, { "epoch": 8.4352, "grad_norm": 0.8510729670524597, "learning_rate": 3.683473389355743e-05, "loss": 0.6381, "step": 6590 }, { "epoch": 8.43648, "grad_norm": 0.8639024496078491, "learning_rate": 3.68327330932373e-05, "loss": 0.6025, "step": 6591 }, { "epoch": 8.43776, "grad_norm": 0.81855708360672, "learning_rate": 3.683073229291717e-05, "loss": 0.5719, "step": 6592 }, { "epoch": 8.43904, "grad_norm": 0.8242548108100891, "learning_rate": 3.682873149259704e-05, "loss": 0.5777, "step": 6593 }, { "epoch": 8.44032, "grad_norm": 0.8355400562286377, "learning_rate": 3.682673069227691e-05, "loss": 0.5496, "step": 6594 }, { "epoch": 8.4416, "grad_norm": 0.8287314772605896, "learning_rate": 3.682472989195678e-05, "loss": 0.6298, "step": 6595 }, { "epoch": 8.44288, "grad_norm": 0.8493402600288391, "learning_rate": 3.6822729091636654e-05, "loss": 0.624, "step": 6596 }, { "epoch": 8.44416, "grad_norm": 0.786516547203064, "learning_rate": 3.682072829131653e-05, "loss": 0.5663, "step": 6597 }, { "epoch": 8.44544, "grad_norm": 0.7651713490486145, "learning_rate": 3.6818727490996405e-05, "loss": 0.5248, "step": 6598 }, { "epoch": 8.44672, "grad_norm": 0.8335928320884705, "learning_rate": 3.6816726690676276e-05, "loss": 0.5722, "step": 6599 }, { "epoch": 8.448, "grad_norm": 0.8091786503791809, "learning_rate": 3.681472589035614e-05, "loss": 0.554, "step": 6600 }, { "epoch": 8.44928, "grad_norm": 0.846765398979187, "learning_rate": 3.6812725090036013e-05, "loss": 0.663, "step": 6601 }, { "epoch": 8.45056, "grad_norm": 0.8616847991943359, "learning_rate": 3.6810724289715885e-05, "loss": 0.6325, "step": 6602 }, { "epoch": 8.45184, "grad_norm": 0.7922678589820862, "learning_rate": 3.680872348939576e-05, "loss": 0.5701, "step": 6603 }, { "epoch": 8.45312, "grad_norm": 0.8290618658065796, "learning_rate": 3.6806722689075636e-05, "loss": 0.6524, "step": 6604 }, { "epoch": 8.4544, "grad_norm": 0.8202276825904846, "learning_rate": 3.680472188875551e-05, "loss": 0.5703, "step": 6605 }, { "epoch": 8.45568, "grad_norm": 0.8039751052856445, "learning_rate": 3.680272108843538e-05, "loss": 0.5763, "step": 6606 }, { "epoch": 8.45696, "grad_norm": 0.7898495197296143, "learning_rate": 3.680072028811525e-05, "loss": 0.5615, "step": 6607 }, { "epoch": 8.45824, "grad_norm": 0.833078145980835, "learning_rate": 3.6798719487795116e-05, "loss": 0.5745, "step": 6608 }, { "epoch": 8.45952, "grad_norm": 0.908738911151886, "learning_rate": 3.679671868747499e-05, "loss": 0.5853, "step": 6609 }, { "epoch": 8.4608, "grad_norm": 0.8550048470497131, "learning_rate": 3.679471788715486e-05, "loss": 0.5834, "step": 6610 }, { "epoch": 8.46208, "grad_norm": 0.8882399201393127, "learning_rate": 3.679271708683474e-05, "loss": 0.6226, "step": 6611 }, { "epoch": 8.46336, "grad_norm": 0.832423746585846, "learning_rate": 3.679071628651461e-05, "loss": 0.5573, "step": 6612 }, { "epoch": 8.46464, "grad_norm": 0.842068076133728, "learning_rate": 3.678871548619448e-05, "loss": 0.6053, "step": 6613 }, { "epoch": 8.46592, "grad_norm": 0.8376506567001343, "learning_rate": 3.6786714685874354e-05, "loss": 0.5527, "step": 6614 }, { "epoch": 8.4672, "grad_norm": 0.8572778701782227, "learning_rate": 3.6784713885554226e-05, "loss": 0.6259, "step": 6615 }, { "epoch": 8.46848, "grad_norm": 0.817713737487793, "learning_rate": 3.678271308523409e-05, "loss": 0.5689, "step": 6616 }, { "epoch": 8.46976, "grad_norm": 0.8326448202133179, "learning_rate": 3.678071228491396e-05, "loss": 0.5906, "step": 6617 }, { "epoch": 8.47104, "grad_norm": 0.8092232942581177, "learning_rate": 3.677871148459384e-05, "loss": 0.5749, "step": 6618 }, { "epoch": 8.47232, "grad_norm": 0.8321169018745422, "learning_rate": 3.6776710684273714e-05, "loss": 0.5773, "step": 6619 }, { "epoch": 8.4736, "grad_norm": 0.8475345373153687, "learning_rate": 3.6774709883953585e-05, "loss": 0.6028, "step": 6620 }, { "epoch": 8.47488, "grad_norm": 0.8301557898521423, "learning_rate": 3.677270908363346e-05, "loss": 0.5961, "step": 6621 }, { "epoch": 8.47616, "grad_norm": 0.8141891360282898, "learning_rate": 3.677070828331333e-05, "loss": 0.6512, "step": 6622 }, { "epoch": 8.47744, "grad_norm": 0.7891356348991394, "learning_rate": 3.67687074829932e-05, "loss": 0.539, "step": 6623 }, { "epoch": 8.47872, "grad_norm": 0.7641344666481018, "learning_rate": 3.6766706682673066e-05, "loss": 0.5497, "step": 6624 }, { "epoch": 8.48, "grad_norm": 0.766279935836792, "learning_rate": 3.6764705882352945e-05, "loss": 0.5496, "step": 6625 }, { "epoch": 8.48128, "grad_norm": 0.836593508720398, "learning_rate": 3.6762705082032817e-05, "loss": 0.6149, "step": 6626 }, { "epoch": 8.48256, "grad_norm": 0.8140498399734497, "learning_rate": 3.676070428171269e-05, "loss": 0.5873, "step": 6627 }, { "epoch": 8.48384, "grad_norm": 0.8106237053871155, "learning_rate": 3.675870348139256e-05, "loss": 0.5868, "step": 6628 }, { "epoch": 8.48512, "grad_norm": 0.7953768968582153, "learning_rate": 3.675670268107243e-05, "loss": 0.5578, "step": 6629 }, { "epoch": 8.4864, "grad_norm": 0.8248597383499146, "learning_rate": 3.6754701880752304e-05, "loss": 0.6027, "step": 6630 }, { "epoch": 8.48768, "grad_norm": 0.8090713024139404, "learning_rate": 3.6752701080432176e-05, "loss": 0.5848, "step": 6631 }, { "epoch": 8.48896, "grad_norm": 0.8814820647239685, "learning_rate": 3.675070028011205e-05, "loss": 0.6117, "step": 6632 }, { "epoch": 8.49024, "grad_norm": 0.8482004404067993, "learning_rate": 3.674869947979192e-05, "loss": 0.6126, "step": 6633 }, { "epoch": 8.49152, "grad_norm": 0.8183587789535522, "learning_rate": 3.674669867947179e-05, "loss": 0.5719, "step": 6634 }, { "epoch": 8.4928, "grad_norm": 0.8571783304214478, "learning_rate": 3.674469787915166e-05, "loss": 0.6063, "step": 6635 }, { "epoch": 8.49408, "grad_norm": 0.802609384059906, "learning_rate": 3.6742697078831535e-05, "loss": 0.625, "step": 6636 }, { "epoch": 8.49536, "grad_norm": 0.8341289758682251, "learning_rate": 3.674069627851141e-05, "loss": 0.5994, "step": 6637 }, { "epoch": 8.49664, "grad_norm": 0.8001972436904907, "learning_rate": 3.673869547819128e-05, "loss": 0.5502, "step": 6638 }, { "epoch": 8.49792, "grad_norm": 0.8287181258201599, "learning_rate": 3.673669467787115e-05, "loss": 0.602, "step": 6639 }, { "epoch": 8.4992, "grad_norm": 0.7981618046760559, "learning_rate": 3.673469387755102e-05, "loss": 0.6023, "step": 6640 }, { "epoch": 8.50048, "grad_norm": 0.8224805593490601, "learning_rate": 3.6732693077230894e-05, "loss": 0.5563, "step": 6641 }, { "epoch": 8.50176, "grad_norm": 0.7999238967895508, "learning_rate": 3.6730692276910766e-05, "loss": 0.5549, "step": 6642 }, { "epoch": 8.50304, "grad_norm": 0.8282129764556885, "learning_rate": 3.672869147659064e-05, "loss": 0.6004, "step": 6643 }, { "epoch": 8.50432, "grad_norm": 0.8010920882225037, "learning_rate": 3.672669067627051e-05, "loss": 0.5956, "step": 6644 }, { "epoch": 8.5056, "grad_norm": 0.7640748023986816, "learning_rate": 3.672468987595038e-05, "loss": 0.5638, "step": 6645 }, { "epoch": 8.50688, "grad_norm": 0.8032206296920776, "learning_rate": 3.6722689075630254e-05, "loss": 0.5797, "step": 6646 }, { "epoch": 8.50816, "grad_norm": 0.8864914774894714, "learning_rate": 3.6720688275310126e-05, "loss": 0.5798, "step": 6647 }, { "epoch": 8.50944, "grad_norm": 0.8176901340484619, "learning_rate": 3.671868747499e-05, "loss": 0.6298, "step": 6648 }, { "epoch": 8.51072, "grad_norm": 0.8001031279563904, "learning_rate": 3.671668667466987e-05, "loss": 0.6211, "step": 6649 }, { "epoch": 8.512, "grad_norm": 0.8112642168998718, "learning_rate": 3.671468587434974e-05, "loss": 0.6003, "step": 6650 }, { "epoch": 8.51328, "grad_norm": 0.8111798763275146, "learning_rate": 3.671268507402961e-05, "loss": 0.603, "step": 6651 }, { "epoch": 8.51456, "grad_norm": 0.8160041570663452, "learning_rate": 3.6710684273709485e-05, "loss": 0.5565, "step": 6652 }, { "epoch": 8.51584, "grad_norm": 0.7813338041305542, "learning_rate": 3.670868347338936e-05, "loss": 0.5783, "step": 6653 }, { "epoch": 8.51712, "grad_norm": 0.8111140131950378, "learning_rate": 3.670668267306923e-05, "loss": 0.6126, "step": 6654 }, { "epoch": 8.5184, "grad_norm": 0.8105128407478333, "learning_rate": 3.67046818727491e-05, "loss": 0.5711, "step": 6655 }, { "epoch": 8.51968, "grad_norm": 0.8445085287094116, "learning_rate": 3.670268107242897e-05, "loss": 0.64, "step": 6656 }, { "epoch": 8.52096, "grad_norm": 0.8055734634399414, "learning_rate": 3.6700680272108844e-05, "loss": 0.5938, "step": 6657 }, { "epoch": 8.52224, "grad_norm": 0.8156753182411194, "learning_rate": 3.6698679471788716e-05, "loss": 0.5672, "step": 6658 }, { "epoch": 8.52352, "grad_norm": 0.7987210154533386, "learning_rate": 3.669667867146859e-05, "loss": 0.5924, "step": 6659 }, { "epoch": 8.5248, "grad_norm": 0.878676176071167, "learning_rate": 3.669467787114846e-05, "loss": 0.6441, "step": 6660 }, { "epoch": 8.52608, "grad_norm": 0.8461551666259766, "learning_rate": 3.669267707082834e-05, "loss": 0.5701, "step": 6661 }, { "epoch": 8.52736, "grad_norm": 0.7994030117988586, "learning_rate": 3.6690676270508203e-05, "loss": 0.5331, "step": 6662 }, { "epoch": 8.52864, "grad_norm": 0.8361437320709229, "learning_rate": 3.6688675470188075e-05, "loss": 0.6438, "step": 6663 }, { "epoch": 8.52992, "grad_norm": 0.8350645899772644, "learning_rate": 3.668667466986795e-05, "loss": 0.5887, "step": 6664 }, { "epoch": 8.5312, "grad_norm": 0.8255120515823364, "learning_rate": 3.668467386954782e-05, "loss": 0.5805, "step": 6665 }, { "epoch": 8.53248, "grad_norm": 0.8830844759941101, "learning_rate": 3.668267306922769e-05, "loss": 0.6237, "step": 6666 }, { "epoch": 8.533760000000001, "grad_norm": 0.8089183568954468, "learning_rate": 3.668067226890756e-05, "loss": 0.5825, "step": 6667 }, { "epoch": 8.53504, "grad_norm": 0.7940279245376587, "learning_rate": 3.667867146858744e-05, "loss": 0.5835, "step": 6668 }, { "epoch": 8.53632, "grad_norm": 0.8011153340339661, "learning_rate": 3.667667066826731e-05, "loss": 0.5635, "step": 6669 }, { "epoch": 8.5376, "grad_norm": 0.8229313492774963, "learning_rate": 3.667466986794718e-05, "loss": 0.553, "step": 6670 }, { "epoch": 8.53888, "grad_norm": 0.8413478136062622, "learning_rate": 3.667266906762705e-05, "loss": 0.5636, "step": 6671 }, { "epoch": 8.54016, "grad_norm": 0.8253154754638672, "learning_rate": 3.667066826730692e-05, "loss": 0.6305, "step": 6672 }, { "epoch": 8.54144, "grad_norm": 0.8086980581283569, "learning_rate": 3.6668667466986794e-05, "loss": 0.6359, "step": 6673 }, { "epoch": 8.54272, "grad_norm": 0.8422557711601257, "learning_rate": 3.6666666666666666e-05, "loss": 0.5517, "step": 6674 }, { "epoch": 8.544, "grad_norm": 0.7937127947807312, "learning_rate": 3.6664665866346544e-05, "loss": 0.6099, "step": 6675 }, { "epoch": 8.54528, "grad_norm": 0.8104080557823181, "learning_rate": 3.6662665066026416e-05, "loss": 0.647, "step": 6676 }, { "epoch": 8.54656, "grad_norm": 0.7860154509544373, "learning_rate": 3.666066426570629e-05, "loss": 0.5656, "step": 6677 }, { "epoch": 8.54784, "grad_norm": 0.8154928684234619, "learning_rate": 3.665866346538615e-05, "loss": 0.5811, "step": 6678 }, { "epoch": 8.54912, "grad_norm": 0.8200475573539734, "learning_rate": 3.6656662665066025e-05, "loss": 0.5985, "step": 6679 }, { "epoch": 8.5504, "grad_norm": 0.7977676391601562, "learning_rate": 3.66546618647459e-05, "loss": 0.535, "step": 6680 }, { "epoch": 8.55168, "grad_norm": 0.7876496315002441, "learning_rate": 3.665266106442577e-05, "loss": 0.582, "step": 6681 }, { "epoch": 8.55296, "grad_norm": 0.7595927119255066, "learning_rate": 3.665066026410565e-05, "loss": 0.5749, "step": 6682 }, { "epoch": 8.55424, "grad_norm": 0.8560596108436584, "learning_rate": 3.664865946378552e-05, "loss": 0.6061, "step": 6683 }, { "epoch": 8.55552, "grad_norm": 0.7883579134941101, "learning_rate": 3.664665866346539e-05, "loss": 0.5673, "step": 6684 }, { "epoch": 8.556799999999999, "grad_norm": 0.8544105887413025, "learning_rate": 3.664465786314526e-05, "loss": 0.644, "step": 6685 }, { "epoch": 8.55808, "grad_norm": 0.8229860663414001, "learning_rate": 3.664265706282513e-05, "loss": 0.5651, "step": 6686 }, { "epoch": 8.55936, "grad_norm": 0.8209177851676941, "learning_rate": 3.6640656262505e-05, "loss": 0.6064, "step": 6687 }, { "epoch": 8.56064, "grad_norm": 0.867435097694397, "learning_rate": 3.663865546218487e-05, "loss": 0.568, "step": 6688 }, { "epoch": 8.56192, "grad_norm": 0.8385619521141052, "learning_rate": 3.663665466186475e-05, "loss": 0.5947, "step": 6689 }, { "epoch": 8.5632, "grad_norm": 0.7875993847846985, "learning_rate": 3.663465386154462e-05, "loss": 0.5768, "step": 6690 }, { "epoch": 8.56448, "grad_norm": 0.8218605518341064, "learning_rate": 3.6632653061224494e-05, "loss": 0.5482, "step": 6691 }, { "epoch": 8.565760000000001, "grad_norm": 0.7455148696899414, "learning_rate": 3.6630652260904366e-05, "loss": 0.5475, "step": 6692 }, { "epoch": 8.56704, "grad_norm": 0.7991039156913757, "learning_rate": 3.662865146058424e-05, "loss": 0.6022, "step": 6693 }, { "epoch": 8.56832, "grad_norm": 0.8934693932533264, "learning_rate": 3.66266506602641e-05, "loss": 0.6481, "step": 6694 }, { "epoch": 8.5696, "grad_norm": 0.8251892328262329, "learning_rate": 3.6624649859943975e-05, "loss": 0.6115, "step": 6695 }, { "epoch": 8.57088, "grad_norm": 0.7520719766616821, "learning_rate": 3.662264905962385e-05, "loss": 0.5618, "step": 6696 }, { "epoch": 8.57216, "grad_norm": 0.7765500545501709, "learning_rate": 3.6620648259303725e-05, "loss": 0.5914, "step": 6697 }, { "epoch": 8.57344, "grad_norm": 0.8249855637550354, "learning_rate": 3.66186474589836e-05, "loss": 0.5604, "step": 6698 }, { "epoch": 8.57472, "grad_norm": 0.8163955211639404, "learning_rate": 3.661664665866347e-05, "loss": 0.5495, "step": 6699 }, { "epoch": 8.576, "grad_norm": 0.7878378033638, "learning_rate": 3.661464585834334e-05, "loss": 0.5725, "step": 6700 }, { "epoch": 8.57728, "grad_norm": 0.8135743141174316, "learning_rate": 3.661264505802321e-05, "loss": 0.5855, "step": 6701 }, { "epoch": 8.57856, "grad_norm": 0.8749912977218628, "learning_rate": 3.661064425770308e-05, "loss": 0.6184, "step": 6702 }, { "epoch": 8.57984, "grad_norm": 0.7891753315925598, "learning_rate": 3.6608643457382956e-05, "loss": 0.5873, "step": 6703 }, { "epoch": 8.58112, "grad_norm": 0.8346560001373291, "learning_rate": 3.660664265706283e-05, "loss": 0.5899, "step": 6704 }, { "epoch": 8.5824, "grad_norm": 0.874178946018219, "learning_rate": 3.66046418567427e-05, "loss": 0.6579, "step": 6705 }, { "epoch": 8.58368, "grad_norm": 0.7875180244445801, "learning_rate": 3.660264105642257e-05, "loss": 0.5685, "step": 6706 }, { "epoch": 8.58496, "grad_norm": 0.8282900452613831, "learning_rate": 3.6600640256102444e-05, "loss": 0.5997, "step": 6707 }, { "epoch": 8.58624, "grad_norm": 0.8672759532928467, "learning_rate": 3.6598639455782316e-05, "loss": 0.5734, "step": 6708 }, { "epoch": 8.58752, "grad_norm": 0.8106910586357117, "learning_rate": 3.659663865546219e-05, "loss": 0.6093, "step": 6709 }, { "epoch": 8.588799999999999, "grad_norm": 0.8382178544998169, "learning_rate": 3.659463785514206e-05, "loss": 0.6188, "step": 6710 }, { "epoch": 8.59008, "grad_norm": 0.8015230298042297, "learning_rate": 3.659263705482193e-05, "loss": 0.5521, "step": 6711 }, { "epoch": 8.59136, "grad_norm": 0.8438718914985657, "learning_rate": 3.65906362545018e-05, "loss": 0.6076, "step": 6712 }, { "epoch": 8.59264, "grad_norm": 0.8490085601806641, "learning_rate": 3.6588635454181675e-05, "loss": 0.6349, "step": 6713 }, { "epoch": 8.59392, "grad_norm": 0.7863012552261353, "learning_rate": 3.658663465386155e-05, "loss": 0.5578, "step": 6714 }, { "epoch": 8.5952, "grad_norm": 0.7872239947319031, "learning_rate": 3.658463385354142e-05, "loss": 0.5816, "step": 6715 }, { "epoch": 8.59648, "grad_norm": 0.7982747554779053, "learning_rate": 3.658263305322129e-05, "loss": 0.5745, "step": 6716 }, { "epoch": 8.59776, "grad_norm": 0.7870101928710938, "learning_rate": 3.658063225290116e-05, "loss": 0.5575, "step": 6717 }, { "epoch": 8.59904, "grad_norm": 0.8285995125770569, "learning_rate": 3.6578631452581034e-05, "loss": 0.5857, "step": 6718 }, { "epoch": 8.60032, "grad_norm": 0.8226214051246643, "learning_rate": 3.6576630652260906e-05, "loss": 0.5893, "step": 6719 }, { "epoch": 8.6016, "grad_norm": 0.8384042382240295, "learning_rate": 3.657462985194078e-05, "loss": 0.5664, "step": 6720 }, { "epoch": 8.60288, "grad_norm": 0.8193085789680481, "learning_rate": 3.657262905162065e-05, "loss": 0.5688, "step": 6721 }, { "epoch": 8.60416, "grad_norm": 0.8433443307876587, "learning_rate": 3.657062825130052e-05, "loss": 0.6191, "step": 6722 }, { "epoch": 8.60544, "grad_norm": 0.7860081791877747, "learning_rate": 3.6568627450980393e-05, "loss": 0.547, "step": 6723 }, { "epoch": 8.60672, "grad_norm": 0.7903274893760681, "learning_rate": 3.6566626650660265e-05, "loss": 0.5653, "step": 6724 }, { "epoch": 8.608, "grad_norm": 0.8736501336097717, "learning_rate": 3.656462585034014e-05, "loss": 0.5936, "step": 6725 }, { "epoch": 8.60928, "grad_norm": 0.8456231355667114, "learning_rate": 3.656262505002001e-05, "loss": 0.5829, "step": 6726 }, { "epoch": 8.61056, "grad_norm": 0.7985215187072754, "learning_rate": 3.656062424969988e-05, "loss": 0.5847, "step": 6727 }, { "epoch": 8.61184, "grad_norm": 0.8238279223442078, "learning_rate": 3.655862344937975e-05, "loss": 0.5796, "step": 6728 }, { "epoch": 8.61312, "grad_norm": 0.826208233833313, "learning_rate": 3.6556622649059625e-05, "loss": 0.5839, "step": 6729 }, { "epoch": 8.6144, "grad_norm": 0.7756603956222534, "learning_rate": 3.6554621848739496e-05, "loss": 0.544, "step": 6730 }, { "epoch": 8.61568, "grad_norm": 0.7803246974945068, "learning_rate": 3.6552621048419375e-05, "loss": 0.5523, "step": 6731 }, { "epoch": 8.61696, "grad_norm": 0.8132696747779846, "learning_rate": 3.655062024809924e-05, "loss": 0.5963, "step": 6732 }, { "epoch": 8.61824, "grad_norm": 0.8277651071548462, "learning_rate": 3.654861944777911e-05, "loss": 0.6203, "step": 6733 }, { "epoch": 8.61952, "grad_norm": 0.8409605622291565, "learning_rate": 3.6546618647458984e-05, "loss": 0.6087, "step": 6734 }, { "epoch": 8.6208, "grad_norm": 0.8315775990486145, "learning_rate": 3.6544617847138856e-05, "loss": 0.5775, "step": 6735 }, { "epoch": 8.62208, "grad_norm": 0.8047201037406921, "learning_rate": 3.654261704681873e-05, "loss": 0.6142, "step": 6736 }, { "epoch": 8.62336, "grad_norm": 0.7707719802856445, "learning_rate": 3.65406162464986e-05, "loss": 0.5723, "step": 6737 }, { "epoch": 8.62464, "grad_norm": 0.7818657755851746, "learning_rate": 3.653861544617848e-05, "loss": 0.6173, "step": 6738 }, { "epoch": 8.62592, "grad_norm": 0.8295494914054871, "learning_rate": 3.653661464585835e-05, "loss": 0.598, "step": 6739 }, { "epoch": 8.6272, "grad_norm": 0.8243419528007507, "learning_rate": 3.6534613845538215e-05, "loss": 0.5859, "step": 6740 }, { "epoch": 8.62848, "grad_norm": 0.8508108258247375, "learning_rate": 3.653261304521809e-05, "loss": 0.6116, "step": 6741 }, { "epoch": 8.62976, "grad_norm": 0.8881233334541321, "learning_rate": 3.653061224489796e-05, "loss": 0.6642, "step": 6742 }, { "epoch": 8.63104, "grad_norm": 0.8283036351203918, "learning_rate": 3.652861144457783e-05, "loss": 0.5926, "step": 6743 }, { "epoch": 8.63232, "grad_norm": 0.7696643471717834, "learning_rate": 3.65266106442577e-05, "loss": 0.5894, "step": 6744 }, { "epoch": 8.6336, "grad_norm": 0.8470257520675659, "learning_rate": 3.652460984393758e-05, "loss": 0.5801, "step": 6745 }, { "epoch": 8.63488, "grad_norm": 0.8444271087646484, "learning_rate": 3.652260904361745e-05, "loss": 0.5986, "step": 6746 }, { "epoch": 8.63616, "grad_norm": 0.7664961814880371, "learning_rate": 3.6520608243297325e-05, "loss": 0.553, "step": 6747 }, { "epoch": 8.63744, "grad_norm": 0.8508731722831726, "learning_rate": 3.651860744297719e-05, "loss": 0.6113, "step": 6748 }, { "epoch": 8.63872, "grad_norm": 0.7583442330360413, "learning_rate": 3.651660664265706e-05, "loss": 0.533, "step": 6749 }, { "epoch": 8.64, "grad_norm": 0.812756359577179, "learning_rate": 3.6514605842336934e-05, "loss": 0.5829, "step": 6750 }, { "epoch": 8.64128, "grad_norm": 0.8092144727706909, "learning_rate": 3.6512605042016805e-05, "loss": 0.5675, "step": 6751 }, { "epoch": 8.64256, "grad_norm": 0.8396515846252441, "learning_rate": 3.6510604241696684e-05, "loss": 0.6065, "step": 6752 }, { "epoch": 8.64384, "grad_norm": 0.7741784453392029, "learning_rate": 3.6508603441376556e-05, "loss": 0.5881, "step": 6753 }, { "epoch": 8.64512, "grad_norm": 0.8222891092300415, "learning_rate": 3.650660264105643e-05, "loss": 0.6125, "step": 6754 }, { "epoch": 8.6464, "grad_norm": 0.8462512493133545, "learning_rate": 3.65046018407363e-05, "loss": 0.6372, "step": 6755 }, { "epoch": 8.64768, "grad_norm": 0.8327202796936035, "learning_rate": 3.6502601040416165e-05, "loss": 0.6419, "step": 6756 }, { "epoch": 8.64896, "grad_norm": 0.7695246338844299, "learning_rate": 3.6500600240096037e-05, "loss": 0.5385, "step": 6757 }, { "epoch": 8.65024, "grad_norm": 0.8170539736747742, "learning_rate": 3.649859943977591e-05, "loss": 0.5818, "step": 6758 }, { "epoch": 8.65152, "grad_norm": 0.8147903680801392, "learning_rate": 3.649659863945579e-05, "loss": 0.616, "step": 6759 }, { "epoch": 8.6528, "grad_norm": 0.7922413945198059, "learning_rate": 3.649459783913566e-05, "loss": 0.6042, "step": 6760 }, { "epoch": 8.65408, "grad_norm": 0.7604812979698181, "learning_rate": 3.649259703881553e-05, "loss": 0.5312, "step": 6761 }, { "epoch": 8.65536, "grad_norm": 0.855582594871521, "learning_rate": 3.64905962384954e-05, "loss": 0.6038, "step": 6762 }, { "epoch": 8.65664, "grad_norm": 0.8385626077651978, "learning_rate": 3.6488595438175275e-05, "loss": 0.569, "step": 6763 }, { "epoch": 8.65792, "grad_norm": 0.8010948300361633, "learning_rate": 3.648659463785514e-05, "loss": 0.5846, "step": 6764 }, { "epoch": 8.6592, "grad_norm": 0.8321779370307922, "learning_rate": 3.648459383753501e-05, "loss": 0.6176, "step": 6765 }, { "epoch": 8.66048, "grad_norm": 0.8671482801437378, "learning_rate": 3.648259303721488e-05, "loss": 0.6062, "step": 6766 }, { "epoch": 8.66176, "grad_norm": 0.8177146911621094, "learning_rate": 3.648059223689476e-05, "loss": 0.5906, "step": 6767 }, { "epoch": 8.66304, "grad_norm": 0.8458163738250732, "learning_rate": 3.6478591436574634e-05, "loss": 0.6042, "step": 6768 }, { "epoch": 8.66432, "grad_norm": 0.7744457125663757, "learning_rate": 3.6476590636254506e-05, "loss": 0.5768, "step": 6769 }, { "epoch": 8.6656, "grad_norm": 0.8034748435020447, "learning_rate": 3.647458983593438e-05, "loss": 0.5837, "step": 6770 }, { "epoch": 8.66688, "grad_norm": 0.7622728943824768, "learning_rate": 3.647258903561425e-05, "loss": 0.5629, "step": 6771 }, { "epoch": 8.66816, "grad_norm": 0.8260266780853271, "learning_rate": 3.6470588235294114e-05, "loss": 0.5742, "step": 6772 }, { "epoch": 8.66944, "grad_norm": 0.8134649395942688, "learning_rate": 3.6468587434973986e-05, "loss": 0.5833, "step": 6773 }, { "epoch": 8.67072, "grad_norm": 0.827939510345459, "learning_rate": 3.6466586634653865e-05, "loss": 0.5361, "step": 6774 }, { "epoch": 8.672, "grad_norm": 0.8437283635139465, "learning_rate": 3.646458583433374e-05, "loss": 0.6519, "step": 6775 }, { "epoch": 8.67328, "grad_norm": 0.836875319480896, "learning_rate": 3.646258503401361e-05, "loss": 0.605, "step": 6776 }, { "epoch": 8.67456, "grad_norm": 0.8690876364707947, "learning_rate": 3.646058423369348e-05, "loss": 0.6817, "step": 6777 }, { "epoch": 8.67584, "grad_norm": 0.80400550365448, "learning_rate": 3.645858343337335e-05, "loss": 0.5805, "step": 6778 }, { "epoch": 8.67712, "grad_norm": 0.7860267162322998, "learning_rate": 3.6456582633053224e-05, "loss": 0.5937, "step": 6779 }, { "epoch": 8.6784, "grad_norm": 0.8096144199371338, "learning_rate": 3.645458183273309e-05, "loss": 0.5387, "step": 6780 }, { "epoch": 8.67968, "grad_norm": 0.7942869067192078, "learning_rate": 3.645258103241297e-05, "loss": 0.5842, "step": 6781 }, { "epoch": 8.68096, "grad_norm": 0.768929123878479, "learning_rate": 3.645058023209284e-05, "loss": 0.6049, "step": 6782 }, { "epoch": 8.68224, "grad_norm": 0.8190042972564697, "learning_rate": 3.644857943177271e-05, "loss": 0.5947, "step": 6783 }, { "epoch": 8.68352, "grad_norm": 0.847685694694519, "learning_rate": 3.6446578631452584e-05, "loss": 0.6109, "step": 6784 }, { "epoch": 8.6848, "grad_norm": 0.7972142696380615, "learning_rate": 3.6444577831132455e-05, "loss": 0.5764, "step": 6785 }, { "epoch": 8.68608, "grad_norm": 0.8239948153495789, "learning_rate": 3.644257703081233e-05, "loss": 0.6277, "step": 6786 }, { "epoch": 8.68736, "grad_norm": 0.7374285459518433, "learning_rate": 3.64405762304922e-05, "loss": 0.5331, "step": 6787 }, { "epoch": 8.68864, "grad_norm": 0.7998077273368835, "learning_rate": 3.643857543017207e-05, "loss": 0.5555, "step": 6788 }, { "epoch": 8.68992, "grad_norm": 0.8306496739387512, "learning_rate": 3.643657462985194e-05, "loss": 0.5906, "step": 6789 }, { "epoch": 8.6912, "grad_norm": 0.8808903694152832, "learning_rate": 3.6434573829531815e-05, "loss": 0.6105, "step": 6790 }, { "epoch": 8.69248, "grad_norm": 0.7989248037338257, "learning_rate": 3.6432573029211687e-05, "loss": 0.5822, "step": 6791 }, { "epoch": 8.69376, "grad_norm": 0.7795215845108032, "learning_rate": 3.643057222889156e-05, "loss": 0.5552, "step": 6792 }, { "epoch": 8.69504, "grad_norm": 0.8287959098815918, "learning_rate": 3.642857142857143e-05, "loss": 0.6018, "step": 6793 }, { "epoch": 8.69632, "grad_norm": 0.8223719000816345, "learning_rate": 3.64265706282513e-05, "loss": 0.5816, "step": 6794 }, { "epoch": 8.6976, "grad_norm": 0.8280214667320251, "learning_rate": 3.6424569827931174e-05, "loss": 0.6346, "step": 6795 }, { "epoch": 8.698879999999999, "grad_norm": 0.795192539691925, "learning_rate": 3.6422569027611046e-05, "loss": 0.6239, "step": 6796 }, { "epoch": 8.70016, "grad_norm": 0.7667868137359619, "learning_rate": 3.642056822729092e-05, "loss": 0.5148, "step": 6797 }, { "epoch": 8.70144, "grad_norm": 0.8211344480514526, "learning_rate": 3.641856742697079e-05, "loss": 0.5715, "step": 6798 }, { "epoch": 8.70272, "grad_norm": 0.8203229904174805, "learning_rate": 3.641656662665066e-05, "loss": 0.6021, "step": 6799 }, { "epoch": 8.704, "grad_norm": 0.8265122175216675, "learning_rate": 3.641456582633053e-05, "loss": 0.6179, "step": 6800 }, { "epoch": 8.70528, "grad_norm": 0.9294067025184631, "learning_rate": 3.6412565026010405e-05, "loss": 0.6312, "step": 6801 }, { "epoch": 8.70656, "grad_norm": 0.8427034020423889, "learning_rate": 3.641056422569028e-05, "loss": 0.5626, "step": 6802 }, { "epoch": 8.707840000000001, "grad_norm": 0.8450182676315308, "learning_rate": 3.640856342537015e-05, "loss": 0.6435, "step": 6803 }, { "epoch": 8.70912, "grad_norm": 0.8336526155471802, "learning_rate": 3.640656262505002e-05, "loss": 0.5936, "step": 6804 }, { "epoch": 8.7104, "grad_norm": 0.8691603541374207, "learning_rate": 3.640456182472989e-05, "loss": 0.605, "step": 6805 }, { "epoch": 8.71168, "grad_norm": 0.7679282426834106, "learning_rate": 3.6402561024409764e-05, "loss": 0.5439, "step": 6806 }, { "epoch": 8.71296, "grad_norm": 0.8385738730430603, "learning_rate": 3.6400560224089636e-05, "loss": 0.6184, "step": 6807 }, { "epoch": 8.71424, "grad_norm": 0.8129993677139282, "learning_rate": 3.639855942376951e-05, "loss": 0.5656, "step": 6808 }, { "epoch": 8.71552, "grad_norm": 0.7901842594146729, "learning_rate": 3.639655862344939e-05, "loss": 0.573, "step": 6809 }, { "epoch": 8.7168, "grad_norm": 0.8075759410858154, "learning_rate": 3.639455782312925e-05, "loss": 0.5685, "step": 6810 }, { "epoch": 8.71808, "grad_norm": 0.7919057011604309, "learning_rate": 3.6392557022809124e-05, "loss": 0.58, "step": 6811 }, { "epoch": 8.71936, "grad_norm": 0.8212895393371582, "learning_rate": 3.6390556222488995e-05, "loss": 0.5962, "step": 6812 }, { "epoch": 8.72064, "grad_norm": 0.8056111931800842, "learning_rate": 3.638855542216887e-05, "loss": 0.5887, "step": 6813 }, { "epoch": 8.72192, "grad_norm": 0.794323742389679, "learning_rate": 3.638655462184874e-05, "loss": 0.5535, "step": 6814 }, { "epoch": 8.7232, "grad_norm": 0.8696872591972351, "learning_rate": 3.638455382152861e-05, "loss": 0.5984, "step": 6815 }, { "epoch": 8.72448, "grad_norm": 0.8847552537918091, "learning_rate": 3.638255302120849e-05, "loss": 0.6501, "step": 6816 }, { "epoch": 8.72576, "grad_norm": 0.8174288272857666, "learning_rate": 3.638055222088836e-05, "loss": 0.6027, "step": 6817 }, { "epoch": 8.72704, "grad_norm": 0.8479911088943481, "learning_rate": 3.637855142056823e-05, "loss": 0.6216, "step": 6818 }, { "epoch": 8.72832, "grad_norm": 0.8264278173446655, "learning_rate": 3.63765506202481e-05, "loss": 0.615, "step": 6819 }, { "epoch": 8.7296, "grad_norm": 0.8488075137138367, "learning_rate": 3.637454981992797e-05, "loss": 0.5891, "step": 6820 }, { "epoch": 8.730879999999999, "grad_norm": 0.8699356317520142, "learning_rate": 3.637254901960784e-05, "loss": 0.6251, "step": 6821 }, { "epoch": 8.73216, "grad_norm": 0.8555402755737305, "learning_rate": 3.6370548219287714e-05, "loss": 0.635, "step": 6822 }, { "epoch": 8.73344, "grad_norm": 0.7735991477966309, "learning_rate": 3.636854741896759e-05, "loss": 0.5566, "step": 6823 }, { "epoch": 8.73472, "grad_norm": 0.8107903599739075, "learning_rate": 3.6366546618647465e-05, "loss": 0.5747, "step": 6824 }, { "epoch": 8.736, "grad_norm": 0.7944985032081604, "learning_rate": 3.6364545818327336e-05, "loss": 0.5683, "step": 6825 }, { "epoch": 8.73728, "grad_norm": 0.8392530083656311, "learning_rate": 3.63625450180072e-05, "loss": 0.5869, "step": 6826 }, { "epoch": 8.73856, "grad_norm": 0.8549109101295471, "learning_rate": 3.636054421768707e-05, "loss": 0.5996, "step": 6827 }, { "epoch": 8.739840000000001, "grad_norm": 0.8116100430488586, "learning_rate": 3.6358543417366945e-05, "loss": 0.5885, "step": 6828 }, { "epoch": 8.74112, "grad_norm": 0.81255042552948, "learning_rate": 3.635654261704682e-05, "loss": 0.5747, "step": 6829 }, { "epoch": 8.7424, "grad_norm": 0.8412413597106934, "learning_rate": 3.6354541816726696e-05, "loss": 0.595, "step": 6830 }, { "epoch": 8.74368, "grad_norm": 0.8014543056488037, "learning_rate": 3.635254101640657e-05, "loss": 0.5669, "step": 6831 }, { "epoch": 8.74496, "grad_norm": 0.7754011750221252, "learning_rate": 3.635054021608644e-05, "loss": 0.5427, "step": 6832 }, { "epoch": 8.74624, "grad_norm": 0.8052383065223694, "learning_rate": 3.634853941576631e-05, "loss": 0.576, "step": 6833 }, { "epoch": 8.74752, "grad_norm": 0.8288300633430481, "learning_rate": 3.6346538615446176e-05, "loss": 0.6057, "step": 6834 }, { "epoch": 8.7488, "grad_norm": 0.8074236512184143, "learning_rate": 3.634453781512605e-05, "loss": 0.5821, "step": 6835 }, { "epoch": 8.75008, "grad_norm": 0.7861261963844299, "learning_rate": 3.634253701480592e-05, "loss": 0.5665, "step": 6836 }, { "epoch": 8.75136, "grad_norm": 0.8360334634780884, "learning_rate": 3.63405362144858e-05, "loss": 0.6376, "step": 6837 }, { "epoch": 8.75264, "grad_norm": 0.7639442086219788, "learning_rate": 3.633853541416567e-05, "loss": 0.5622, "step": 6838 }, { "epoch": 8.75392, "grad_norm": 0.7803179025650024, "learning_rate": 3.633653461384554e-05, "loss": 0.5281, "step": 6839 }, { "epoch": 8.7552, "grad_norm": 0.7837743163108826, "learning_rate": 3.6334533813525414e-05, "loss": 0.5903, "step": 6840 }, { "epoch": 8.75648, "grad_norm": 0.7992691397666931, "learning_rate": 3.6332533013205286e-05, "loss": 0.5786, "step": 6841 }, { "epoch": 8.75776, "grad_norm": 0.8223332762718201, "learning_rate": 3.633053221288515e-05, "loss": 0.5554, "step": 6842 }, { "epoch": 8.75904, "grad_norm": 0.8041775226593018, "learning_rate": 3.632853141256502e-05, "loss": 0.5898, "step": 6843 }, { "epoch": 8.76032, "grad_norm": 0.8420320153236389, "learning_rate": 3.63265306122449e-05, "loss": 0.5924, "step": 6844 }, { "epoch": 8.7616, "grad_norm": 0.7990514039993286, "learning_rate": 3.6324529811924774e-05, "loss": 0.6265, "step": 6845 }, { "epoch": 8.76288, "grad_norm": 0.7773391008377075, "learning_rate": 3.6322529011604645e-05, "loss": 0.5407, "step": 6846 }, { "epoch": 8.76416, "grad_norm": 0.8004972338676453, "learning_rate": 3.632052821128452e-05, "loss": 0.6135, "step": 6847 }, { "epoch": 8.76544, "grad_norm": 0.7751970887184143, "learning_rate": 3.631852741096439e-05, "loss": 0.5591, "step": 6848 }, { "epoch": 8.76672, "grad_norm": 0.7655860185623169, "learning_rate": 3.631652661064426e-05, "loss": 0.5736, "step": 6849 }, { "epoch": 8.768, "grad_norm": 0.8234750628471375, "learning_rate": 3.6314525810324126e-05, "loss": 0.5844, "step": 6850 }, { "epoch": 8.76928, "grad_norm": 0.8051683306694031, "learning_rate": 3.6312525010004005e-05, "loss": 0.5872, "step": 6851 }, { "epoch": 8.77056, "grad_norm": 0.9070225954055786, "learning_rate": 3.6310524209683877e-05, "loss": 0.6276, "step": 6852 }, { "epoch": 8.77184, "grad_norm": 0.8074741959571838, "learning_rate": 3.630852340936375e-05, "loss": 0.5962, "step": 6853 }, { "epoch": 8.77312, "grad_norm": 0.8040851354598999, "learning_rate": 3.630652260904362e-05, "loss": 0.5674, "step": 6854 }, { "epoch": 8.7744, "grad_norm": 0.8116294741630554, "learning_rate": 3.630452180872349e-05, "loss": 0.5863, "step": 6855 }, { "epoch": 8.77568, "grad_norm": 0.9247410297393799, "learning_rate": 3.6302521008403364e-05, "loss": 0.6572, "step": 6856 }, { "epoch": 8.77696, "grad_norm": 0.8132933378219604, "learning_rate": 3.6300520208083236e-05, "loss": 0.5931, "step": 6857 }, { "epoch": 8.77824, "grad_norm": 0.8136230707168579, "learning_rate": 3.629851940776311e-05, "loss": 0.579, "step": 6858 }, { "epoch": 8.77952, "grad_norm": 0.8221474885940552, "learning_rate": 3.629651860744298e-05, "loss": 0.6021, "step": 6859 }, { "epoch": 8.7808, "grad_norm": 0.8838080763816833, "learning_rate": 3.629451780712285e-05, "loss": 0.5825, "step": 6860 }, { "epoch": 8.78208, "grad_norm": 0.8534195423126221, "learning_rate": 3.629251700680272e-05, "loss": 0.6262, "step": 6861 }, { "epoch": 8.78336, "grad_norm": 0.8286737203598022, "learning_rate": 3.6290516206482595e-05, "loss": 0.5569, "step": 6862 }, { "epoch": 8.78464, "grad_norm": 0.7863860130310059, "learning_rate": 3.628851540616247e-05, "loss": 0.5827, "step": 6863 }, { "epoch": 8.78592, "grad_norm": 0.8157966136932373, "learning_rate": 3.628651460584234e-05, "loss": 0.6109, "step": 6864 }, { "epoch": 8.7872, "grad_norm": 0.7991543412208557, "learning_rate": 3.628451380552221e-05, "loss": 0.5703, "step": 6865 }, { "epoch": 8.78848, "grad_norm": 0.7719929814338684, "learning_rate": 3.628251300520208e-05, "loss": 0.5925, "step": 6866 }, { "epoch": 8.78976, "grad_norm": 0.8264296054840088, "learning_rate": 3.6280512204881954e-05, "loss": 0.584, "step": 6867 }, { "epoch": 8.79104, "grad_norm": 0.8640742897987366, "learning_rate": 3.6278511404561826e-05, "loss": 0.6214, "step": 6868 }, { "epoch": 8.79232, "grad_norm": 0.7543335556983948, "learning_rate": 3.62765106042417e-05, "loss": 0.5316, "step": 6869 }, { "epoch": 8.7936, "grad_norm": 0.8252272605895996, "learning_rate": 3.627450980392157e-05, "loss": 0.6209, "step": 6870 }, { "epoch": 8.79488, "grad_norm": 0.7842090129852295, "learning_rate": 3.627250900360144e-05, "loss": 0.5349, "step": 6871 }, { "epoch": 8.79616, "grad_norm": 0.794221818447113, "learning_rate": 3.6270508203281314e-05, "loss": 0.6018, "step": 6872 }, { "epoch": 8.79744, "grad_norm": 0.7998900413513184, "learning_rate": 3.6268507402961186e-05, "loss": 0.5694, "step": 6873 }, { "epoch": 8.79872, "grad_norm": 0.8280250430107117, "learning_rate": 3.626650660264106e-05, "loss": 0.5735, "step": 6874 }, { "epoch": 8.8, "grad_norm": 0.8256487846374512, "learning_rate": 3.626450580232093e-05, "loss": 0.5985, "step": 6875 }, { "epoch": 8.80128, "grad_norm": 0.86505526304245, "learning_rate": 3.62625050020008e-05, "loss": 0.6511, "step": 6876 }, { "epoch": 8.80256, "grad_norm": 0.7898805141448975, "learning_rate": 3.626050420168067e-05, "loss": 0.5708, "step": 6877 }, { "epoch": 8.80384, "grad_norm": 0.8345022797584534, "learning_rate": 3.6258503401360545e-05, "loss": 0.5572, "step": 6878 }, { "epoch": 8.80512, "grad_norm": 0.8210886120796204, "learning_rate": 3.625650260104042e-05, "loss": 0.581, "step": 6879 }, { "epoch": 8.8064, "grad_norm": 0.7753137350082397, "learning_rate": 3.625450180072029e-05, "loss": 0.6006, "step": 6880 }, { "epoch": 8.80768, "grad_norm": 0.8361926078796387, "learning_rate": 3.625250100040016e-05, "loss": 0.5687, "step": 6881 }, { "epoch": 8.80896, "grad_norm": 0.8121229410171509, "learning_rate": 3.625050020008003e-05, "loss": 0.5764, "step": 6882 }, { "epoch": 8.81024, "grad_norm": 0.8940417766571045, "learning_rate": 3.6248499399759904e-05, "loss": 0.6222, "step": 6883 }, { "epoch": 8.81152, "grad_norm": 0.7658796906471252, "learning_rate": 3.6246498599439776e-05, "loss": 0.5626, "step": 6884 }, { "epoch": 8.8128, "grad_norm": 0.8226578831672668, "learning_rate": 3.624449779911965e-05, "loss": 0.5823, "step": 6885 }, { "epoch": 8.81408, "grad_norm": 0.8413184881210327, "learning_rate": 3.624249699879952e-05, "loss": 0.6244, "step": 6886 }, { "epoch": 8.81536, "grad_norm": 0.8740143775939941, "learning_rate": 3.62404961984794e-05, "loss": 0.6014, "step": 6887 }, { "epoch": 8.81664, "grad_norm": 0.7891989946365356, "learning_rate": 3.6238495398159263e-05, "loss": 0.5616, "step": 6888 }, { "epoch": 8.81792, "grad_norm": 0.8135474920272827, "learning_rate": 3.6236494597839135e-05, "loss": 0.5674, "step": 6889 }, { "epoch": 8.8192, "grad_norm": 0.7830061316490173, "learning_rate": 3.623449379751901e-05, "loss": 0.5726, "step": 6890 }, { "epoch": 8.82048, "grad_norm": 0.8667339086532593, "learning_rate": 3.623249299719888e-05, "loss": 0.5673, "step": 6891 }, { "epoch": 8.82176, "grad_norm": 0.7781888246536255, "learning_rate": 3.623049219687875e-05, "loss": 0.5379, "step": 6892 }, { "epoch": 8.82304, "grad_norm": 0.8315311670303345, "learning_rate": 3.622849139655862e-05, "loss": 0.5652, "step": 6893 }, { "epoch": 8.82432, "grad_norm": 0.8577895164489746, "learning_rate": 3.62264905962385e-05, "loss": 0.631, "step": 6894 }, { "epoch": 8.8256, "grad_norm": 0.8391826152801514, "learning_rate": 3.622448979591837e-05, "loss": 0.6021, "step": 6895 }, { "epoch": 8.82688, "grad_norm": 0.8212698698043823, "learning_rate": 3.622248899559824e-05, "loss": 0.6771, "step": 6896 }, { "epoch": 8.82816, "grad_norm": 0.7723982334136963, "learning_rate": 3.622048819527811e-05, "loss": 0.5674, "step": 6897 }, { "epoch": 8.82944, "grad_norm": 0.807265043258667, "learning_rate": 3.621848739495798e-05, "loss": 0.6185, "step": 6898 }, { "epoch": 8.83072, "grad_norm": 0.8213419318199158, "learning_rate": 3.6216486594637854e-05, "loss": 0.6491, "step": 6899 }, { "epoch": 8.832, "grad_norm": 0.8153632283210754, "learning_rate": 3.6214485794317726e-05, "loss": 0.6082, "step": 6900 }, { "epoch": 8.83328, "grad_norm": 0.80569988489151, "learning_rate": 3.6212484993997604e-05, "loss": 0.6263, "step": 6901 }, { "epoch": 8.83456, "grad_norm": 0.8200774192810059, "learning_rate": 3.6210484193677476e-05, "loss": 0.6107, "step": 6902 }, { "epoch": 8.83584, "grad_norm": 0.783087432384491, "learning_rate": 3.620848339335735e-05, "loss": 0.5873, "step": 6903 }, { "epoch": 8.83712, "grad_norm": 0.8208640813827515, "learning_rate": 3.620648259303721e-05, "loss": 0.6105, "step": 6904 }, { "epoch": 8.8384, "grad_norm": 0.8038390278816223, "learning_rate": 3.6204481792717085e-05, "loss": 0.6263, "step": 6905 }, { "epoch": 8.83968, "grad_norm": 0.7997908592224121, "learning_rate": 3.620248099239696e-05, "loss": 0.631, "step": 6906 }, { "epoch": 8.84096, "grad_norm": 0.8349906802177429, "learning_rate": 3.620048019207683e-05, "loss": 0.5978, "step": 6907 }, { "epoch": 8.84224, "grad_norm": 0.8827357292175293, "learning_rate": 3.619847939175671e-05, "loss": 0.6295, "step": 6908 }, { "epoch": 8.84352, "grad_norm": 0.8000686168670654, "learning_rate": 3.619647859143658e-05, "loss": 0.6055, "step": 6909 }, { "epoch": 8.8448, "grad_norm": 0.8262300491333008, "learning_rate": 3.619447779111645e-05, "loss": 0.5929, "step": 6910 }, { "epoch": 8.84608, "grad_norm": 0.8049215078353882, "learning_rate": 3.619247699079632e-05, "loss": 0.5946, "step": 6911 }, { "epoch": 8.84736, "grad_norm": 0.8154301643371582, "learning_rate": 3.619047619047619e-05, "loss": 0.5801, "step": 6912 }, { "epoch": 8.84864, "grad_norm": 0.8478993773460388, "learning_rate": 3.618847539015606e-05, "loss": 0.6509, "step": 6913 }, { "epoch": 8.849920000000001, "grad_norm": 0.8522371649742126, "learning_rate": 3.618647458983593e-05, "loss": 0.5853, "step": 6914 }, { "epoch": 8.8512, "grad_norm": 0.8328931331634521, "learning_rate": 3.618447378951581e-05, "loss": 0.6368, "step": 6915 }, { "epoch": 8.85248, "grad_norm": 0.7752423882484436, "learning_rate": 3.618247298919568e-05, "loss": 0.558, "step": 6916 }, { "epoch": 8.85376, "grad_norm": 0.7981879115104675, "learning_rate": 3.6180472188875554e-05, "loss": 0.5696, "step": 6917 }, { "epoch": 8.85504, "grad_norm": 0.837726354598999, "learning_rate": 3.6178471388555426e-05, "loss": 0.6013, "step": 6918 }, { "epoch": 8.85632, "grad_norm": 0.8261789679527283, "learning_rate": 3.61764705882353e-05, "loss": 0.6242, "step": 6919 }, { "epoch": 8.8576, "grad_norm": 0.8679850697517395, "learning_rate": 3.617446978791516e-05, "loss": 0.6112, "step": 6920 }, { "epoch": 8.85888, "grad_norm": 0.8656749129295349, "learning_rate": 3.6172468987595035e-05, "loss": 0.6052, "step": 6921 }, { "epoch": 8.86016, "grad_norm": 0.7777904272079468, "learning_rate": 3.617046818727491e-05, "loss": 0.5814, "step": 6922 }, { "epoch": 8.86144, "grad_norm": 0.8450097441673279, "learning_rate": 3.6168467386954785e-05, "loss": 0.5964, "step": 6923 }, { "epoch": 8.86272, "grad_norm": 0.865833580493927, "learning_rate": 3.616646658663466e-05, "loss": 0.6279, "step": 6924 }, { "epoch": 8.864, "grad_norm": 0.8091690540313721, "learning_rate": 3.616446578631453e-05, "loss": 0.5425, "step": 6925 }, { "epoch": 8.86528, "grad_norm": 0.8185449838638306, "learning_rate": 3.61624649859944e-05, "loss": 0.606, "step": 6926 }, { "epoch": 8.86656, "grad_norm": 0.8065361976623535, "learning_rate": 3.616046418567427e-05, "loss": 0.5938, "step": 6927 }, { "epoch": 8.86784, "grad_norm": 0.8017386198043823, "learning_rate": 3.615846338535414e-05, "loss": 0.5727, "step": 6928 }, { "epoch": 8.86912, "grad_norm": 0.8515564799308777, "learning_rate": 3.6156462585034016e-05, "loss": 0.6308, "step": 6929 }, { "epoch": 8.8704, "grad_norm": 0.837009608745575, "learning_rate": 3.615446178471389e-05, "loss": 0.6016, "step": 6930 }, { "epoch": 8.87168, "grad_norm": 0.8977416157722473, "learning_rate": 3.615246098439376e-05, "loss": 0.6323, "step": 6931 }, { "epoch": 8.872959999999999, "grad_norm": 0.8525516390800476, "learning_rate": 3.615046018407363e-05, "loss": 0.627, "step": 6932 }, { "epoch": 8.87424, "grad_norm": 0.838636577129364, "learning_rate": 3.6148459383753504e-05, "loss": 0.6056, "step": 6933 }, { "epoch": 8.87552, "grad_norm": 0.8404485583305359, "learning_rate": 3.6146458583433376e-05, "loss": 0.6425, "step": 6934 }, { "epoch": 8.8768, "grad_norm": 0.8780763745307922, "learning_rate": 3.614445778311325e-05, "loss": 0.677, "step": 6935 }, { "epoch": 8.87808, "grad_norm": 0.7928383350372314, "learning_rate": 3.614245698279312e-05, "loss": 0.584, "step": 6936 }, { "epoch": 8.87936, "grad_norm": 0.8395443558692932, "learning_rate": 3.614045618247299e-05, "loss": 0.6165, "step": 6937 }, { "epoch": 8.88064, "grad_norm": 0.8272031545639038, "learning_rate": 3.613845538215286e-05, "loss": 0.5808, "step": 6938 }, { "epoch": 8.881920000000001, "grad_norm": 0.8448715806007385, "learning_rate": 3.6136454581832735e-05, "loss": 0.6293, "step": 6939 }, { "epoch": 8.8832, "grad_norm": 0.8742750287055969, "learning_rate": 3.613445378151261e-05, "loss": 0.5869, "step": 6940 }, { "epoch": 8.88448, "grad_norm": 0.7730932235717773, "learning_rate": 3.613245298119248e-05, "loss": 0.5513, "step": 6941 }, { "epoch": 8.88576, "grad_norm": 0.83644700050354, "learning_rate": 3.613045218087235e-05, "loss": 0.6132, "step": 6942 }, { "epoch": 8.88704, "grad_norm": 0.8572961688041687, "learning_rate": 3.612845138055222e-05, "loss": 0.6198, "step": 6943 }, { "epoch": 8.88832, "grad_norm": 0.8267669081687927, "learning_rate": 3.6126450580232094e-05, "loss": 0.5856, "step": 6944 }, { "epoch": 8.8896, "grad_norm": 0.7987276315689087, "learning_rate": 3.6124449779911966e-05, "loss": 0.5852, "step": 6945 }, { "epoch": 8.89088, "grad_norm": 0.8039761781692505, "learning_rate": 3.612244897959184e-05, "loss": 0.5562, "step": 6946 }, { "epoch": 8.89216, "grad_norm": 0.8594339489936829, "learning_rate": 3.612044817927171e-05, "loss": 0.5928, "step": 6947 }, { "epoch": 8.89344, "grad_norm": 0.7759758234024048, "learning_rate": 3.611844737895158e-05, "loss": 0.5363, "step": 6948 }, { "epoch": 8.89472, "grad_norm": 0.7741097211837769, "learning_rate": 3.6116446578631453e-05, "loss": 0.5417, "step": 6949 }, { "epoch": 8.896, "grad_norm": 0.800906777381897, "learning_rate": 3.611444577831133e-05, "loss": 0.5737, "step": 6950 }, { "epoch": 8.89728, "grad_norm": 0.8281027674674988, "learning_rate": 3.61124449779912e-05, "loss": 0.5661, "step": 6951 }, { "epoch": 8.89856, "grad_norm": 0.7662954926490784, "learning_rate": 3.611044417767107e-05, "loss": 0.559, "step": 6952 }, { "epoch": 8.89984, "grad_norm": 0.8090856671333313, "learning_rate": 3.610844337735094e-05, "loss": 0.5599, "step": 6953 }, { "epoch": 8.90112, "grad_norm": 0.8325416445732117, "learning_rate": 3.610644257703081e-05, "loss": 0.5671, "step": 6954 }, { "epoch": 8.9024, "grad_norm": 0.8077730536460876, "learning_rate": 3.6104441776710685e-05, "loss": 0.6039, "step": 6955 }, { "epoch": 8.90368, "grad_norm": 0.8278130888938904, "learning_rate": 3.6102440976390556e-05, "loss": 0.5771, "step": 6956 }, { "epoch": 8.904959999999999, "grad_norm": 0.7988932132720947, "learning_rate": 3.6100440176070435e-05, "loss": 0.6022, "step": 6957 }, { "epoch": 8.90624, "grad_norm": 0.8389017581939697, "learning_rate": 3.609843937575031e-05, "loss": 0.6001, "step": 6958 }, { "epoch": 8.90752, "grad_norm": 0.896360456943512, "learning_rate": 3.609643857543017e-05, "loss": 0.6543, "step": 6959 }, { "epoch": 8.9088, "grad_norm": 0.8242748379707336, "learning_rate": 3.6094437775110044e-05, "loss": 0.5962, "step": 6960 }, { "epoch": 8.91008, "grad_norm": 0.8101686835289001, "learning_rate": 3.6092436974789916e-05, "loss": 0.6084, "step": 6961 }, { "epoch": 8.91136, "grad_norm": 0.8416367769241333, "learning_rate": 3.609043617446979e-05, "loss": 0.5865, "step": 6962 }, { "epoch": 8.91264, "grad_norm": 0.8185626864433289, "learning_rate": 3.608843537414966e-05, "loss": 0.6199, "step": 6963 }, { "epoch": 8.91392, "grad_norm": 0.8309803009033203, "learning_rate": 3.608643457382954e-05, "loss": 0.6395, "step": 6964 }, { "epoch": 8.9152, "grad_norm": 0.7879054546356201, "learning_rate": 3.608443377350941e-05, "loss": 0.5667, "step": 6965 }, { "epoch": 8.91648, "grad_norm": 0.8321192860603333, "learning_rate": 3.608243297318928e-05, "loss": 0.6035, "step": 6966 }, { "epoch": 8.91776, "grad_norm": 0.8073901534080505, "learning_rate": 3.608043217286915e-05, "loss": 0.5635, "step": 6967 }, { "epoch": 8.91904, "grad_norm": 0.805634081363678, "learning_rate": 3.607843137254902e-05, "loss": 0.5788, "step": 6968 }, { "epoch": 8.92032, "grad_norm": 0.7749924063682556, "learning_rate": 3.607643057222889e-05, "loss": 0.5697, "step": 6969 }, { "epoch": 8.9216, "grad_norm": 0.7843794226646423, "learning_rate": 3.607442977190876e-05, "loss": 0.5763, "step": 6970 }, { "epoch": 8.92288, "grad_norm": 0.8274489641189575, "learning_rate": 3.607242897158864e-05, "loss": 0.5843, "step": 6971 }, { "epoch": 8.92416, "grad_norm": 0.834439754486084, "learning_rate": 3.607042817126851e-05, "loss": 0.5806, "step": 6972 }, { "epoch": 8.92544, "grad_norm": 0.8098054528236389, "learning_rate": 3.6068427370948385e-05, "loss": 0.5754, "step": 6973 }, { "epoch": 8.92672, "grad_norm": 0.8164894580841064, "learning_rate": 3.606642657062826e-05, "loss": 0.6191, "step": 6974 }, { "epoch": 8.928, "grad_norm": 0.7646953463554382, "learning_rate": 3.606442577030812e-05, "loss": 0.5733, "step": 6975 }, { "epoch": 8.92928, "grad_norm": 0.7695797085762024, "learning_rate": 3.6062424969987994e-05, "loss": 0.5586, "step": 6976 }, { "epoch": 8.93056, "grad_norm": 0.8515230417251587, "learning_rate": 3.6060424169667865e-05, "loss": 0.6997, "step": 6977 }, { "epoch": 8.93184, "grad_norm": 0.7551551461219788, "learning_rate": 3.6058423369347744e-05, "loss": 0.5588, "step": 6978 }, { "epoch": 8.93312, "grad_norm": 0.8263706564903259, "learning_rate": 3.6056422569027616e-05, "loss": 0.6269, "step": 6979 }, { "epoch": 8.9344, "grad_norm": 0.819164514541626, "learning_rate": 3.605442176870749e-05, "loss": 0.5782, "step": 6980 }, { "epoch": 8.93568, "grad_norm": 0.875464916229248, "learning_rate": 3.605242096838736e-05, "loss": 0.6204, "step": 6981 }, { "epoch": 8.93696, "grad_norm": 0.807060718536377, "learning_rate": 3.605042016806723e-05, "loss": 0.5681, "step": 6982 }, { "epoch": 8.93824, "grad_norm": 0.824044942855835, "learning_rate": 3.6048419367747097e-05, "loss": 0.6358, "step": 6983 }, { "epoch": 8.93952, "grad_norm": 0.7487238645553589, "learning_rate": 3.604641856742697e-05, "loss": 0.5667, "step": 6984 }, { "epoch": 8.9408, "grad_norm": 0.8306474685668945, "learning_rate": 3.604441776710684e-05, "loss": 0.5704, "step": 6985 }, { "epoch": 8.94208, "grad_norm": 0.7934936285018921, "learning_rate": 3.604241696678672e-05, "loss": 0.5734, "step": 6986 }, { "epoch": 8.94336, "grad_norm": 0.8018743991851807, "learning_rate": 3.604041616646659e-05, "loss": 0.5555, "step": 6987 }, { "epoch": 8.94464, "grad_norm": 0.8124516606330872, "learning_rate": 3.603841536614646e-05, "loss": 0.5972, "step": 6988 }, { "epoch": 8.94592, "grad_norm": 0.8345234394073486, "learning_rate": 3.6036414565826334e-05, "loss": 0.6231, "step": 6989 }, { "epoch": 8.9472, "grad_norm": 0.8359727263450623, "learning_rate": 3.6034413765506206e-05, "loss": 0.6152, "step": 6990 }, { "epoch": 8.94848, "grad_norm": 0.7563595771789551, "learning_rate": 3.603241296518607e-05, "loss": 0.5544, "step": 6991 }, { "epoch": 8.94976, "grad_norm": 0.8067274689674377, "learning_rate": 3.603041216486594e-05, "loss": 0.5755, "step": 6992 }, { "epoch": 8.95104, "grad_norm": 0.8166893720626831, "learning_rate": 3.602841136454582e-05, "loss": 0.6486, "step": 6993 }, { "epoch": 8.95232, "grad_norm": 0.7941920161247253, "learning_rate": 3.6026410564225694e-05, "loss": 0.6155, "step": 6994 }, { "epoch": 8.9536, "grad_norm": 0.7989978790283203, "learning_rate": 3.6024409763905566e-05, "loss": 0.5686, "step": 6995 }, { "epoch": 8.95488, "grad_norm": 0.8766310214996338, "learning_rate": 3.602240896358544e-05, "loss": 0.6189, "step": 6996 }, { "epoch": 8.95616, "grad_norm": 0.8384754061698914, "learning_rate": 3.602040816326531e-05, "loss": 0.6313, "step": 6997 }, { "epoch": 8.95744, "grad_norm": 0.8599010705947876, "learning_rate": 3.601840736294518e-05, "loss": 0.6274, "step": 6998 }, { "epoch": 8.95872, "grad_norm": 0.8240278363227844, "learning_rate": 3.6016406562625046e-05, "loss": 0.6005, "step": 6999 }, { "epoch": 8.96, "grad_norm": 0.8268716335296631, "learning_rate": 3.6014405762304925e-05, "loss": 0.6159, "step": 7000 }, { "epoch": 8.96128, "grad_norm": 0.8283771872520447, "learning_rate": 3.60124049619848e-05, "loss": 0.6447, "step": 7001 }, { "epoch": 8.96256, "grad_norm": 0.8202353119850159, "learning_rate": 3.601040416166467e-05, "loss": 0.5554, "step": 7002 }, { "epoch": 8.96384, "grad_norm": 0.8381731510162354, "learning_rate": 3.600840336134454e-05, "loss": 0.6646, "step": 7003 }, { "epoch": 8.96512, "grad_norm": 0.8112981915473938, "learning_rate": 3.600640256102441e-05, "loss": 0.5944, "step": 7004 }, { "epoch": 8.9664, "grad_norm": 0.8326748013496399, "learning_rate": 3.6004401760704284e-05, "loss": 0.6004, "step": 7005 }, { "epoch": 8.96768, "grad_norm": 0.8164408206939697, "learning_rate": 3.6002400960384156e-05, "loss": 0.5891, "step": 7006 }, { "epoch": 8.96896, "grad_norm": 0.7985873818397522, "learning_rate": 3.600040016006403e-05, "loss": 0.5516, "step": 7007 }, { "epoch": 8.97024, "grad_norm": 0.8269280195236206, "learning_rate": 3.59983993597439e-05, "loss": 0.5673, "step": 7008 }, { "epoch": 8.97152, "grad_norm": 0.7575619220733643, "learning_rate": 3.599639855942377e-05, "loss": 0.5324, "step": 7009 }, { "epoch": 8.9728, "grad_norm": 0.8653445243835449, "learning_rate": 3.5994397759103643e-05, "loss": 0.6027, "step": 7010 }, { "epoch": 8.97408, "grad_norm": 0.8530913591384888, "learning_rate": 3.5992396958783515e-05, "loss": 0.6428, "step": 7011 }, { "epoch": 8.97536, "grad_norm": 0.8212682604789734, "learning_rate": 3.599039615846339e-05, "loss": 0.5799, "step": 7012 }, { "epoch": 8.97664, "grad_norm": 0.8439937233924866, "learning_rate": 3.598839535814326e-05, "loss": 0.6195, "step": 7013 }, { "epoch": 8.97792, "grad_norm": 0.8312199711799622, "learning_rate": 3.598639455782313e-05, "loss": 0.5808, "step": 7014 }, { "epoch": 8.9792, "grad_norm": 0.8415351510047913, "learning_rate": 3.5984393757503e-05, "loss": 0.6143, "step": 7015 }, { "epoch": 8.98048, "grad_norm": 0.8534009456634521, "learning_rate": 3.5982392957182875e-05, "loss": 0.6133, "step": 7016 }, { "epoch": 8.98176, "grad_norm": 0.819942057132721, "learning_rate": 3.5980392156862746e-05, "loss": 0.62, "step": 7017 }, { "epoch": 8.98304, "grad_norm": 0.8175826668739319, "learning_rate": 3.597839135654262e-05, "loss": 0.5802, "step": 7018 }, { "epoch": 8.98432, "grad_norm": 0.8342677354812622, "learning_rate": 3.597639055622249e-05, "loss": 0.5873, "step": 7019 }, { "epoch": 8.9856, "grad_norm": 0.7667478919029236, "learning_rate": 3.597438975590236e-05, "loss": 0.557, "step": 7020 }, { "epoch": 8.98688, "grad_norm": 0.7987193465232849, "learning_rate": 3.5972388955582234e-05, "loss": 0.565, "step": 7021 }, { "epoch": 8.98816, "grad_norm": 0.8105794787406921, "learning_rate": 3.5970388155262106e-05, "loss": 0.6025, "step": 7022 }, { "epoch": 8.98944, "grad_norm": 0.8362847566604614, "learning_rate": 3.596838735494198e-05, "loss": 0.5946, "step": 7023 }, { "epoch": 8.99072, "grad_norm": 0.7905120253562927, "learning_rate": 3.596638655462185e-05, "loss": 0.5964, "step": 7024 }, { "epoch": 8.992, "grad_norm": 0.8283672332763672, "learning_rate": 3.596438575430172e-05, "loss": 0.596, "step": 7025 }, { "epoch": 8.99328, "grad_norm": 0.8208228945732117, "learning_rate": 3.596238495398159e-05, "loss": 0.6088, "step": 7026 }, { "epoch": 8.99456, "grad_norm": 0.7455883026123047, "learning_rate": 3.5960384153661465e-05, "loss": 0.5508, "step": 7027 }, { "epoch": 8.99584, "grad_norm": 0.8352407217025757, "learning_rate": 3.5958383353341344e-05, "loss": 0.628, "step": 7028 }, { "epoch": 8.99712, "grad_norm": 0.8079842925071716, "learning_rate": 3.595638255302121e-05, "loss": 0.596, "step": 7029 }, { "epoch": 8.9984, "grad_norm": 0.8588855862617493, "learning_rate": 3.595438175270108e-05, "loss": 0.6203, "step": 7030 }, { "epoch": 8.99968, "grad_norm": 0.8400841355323792, "learning_rate": 3.595238095238095e-05, "loss": 0.5663, "step": 7031 }, { "epoch": 9.00096, "grad_norm": 1.7468935251235962, "learning_rate": 3.5950380152060824e-05, "loss": 1.0437, "step": 7032 }, { "epoch": 9.00224, "grad_norm": 0.7875801920890808, "learning_rate": 3.5948379351740696e-05, "loss": 0.556, "step": 7033 }, { "epoch": 9.00352, "grad_norm": 0.8308430910110474, "learning_rate": 3.594637855142057e-05, "loss": 0.5821, "step": 7034 }, { "epoch": 9.0048, "grad_norm": 0.7866562008857727, "learning_rate": 3.594437775110045e-05, "loss": 0.529, "step": 7035 }, { "epoch": 9.00608, "grad_norm": 0.8387976884841919, "learning_rate": 3.594237695078032e-05, "loss": 0.5738, "step": 7036 }, { "epoch": 9.00736, "grad_norm": 0.8457949757575989, "learning_rate": 3.5940376150460184e-05, "loss": 0.5637, "step": 7037 }, { "epoch": 9.00864, "grad_norm": 0.8400401473045349, "learning_rate": 3.5938375350140055e-05, "loss": 0.6053, "step": 7038 }, { "epoch": 9.00992, "grad_norm": 0.8000286221504211, "learning_rate": 3.593637454981993e-05, "loss": 0.5456, "step": 7039 }, { "epoch": 9.0112, "grad_norm": 0.8358906507492065, "learning_rate": 3.59343737494998e-05, "loss": 0.5632, "step": 7040 }, { "epoch": 9.01248, "grad_norm": 0.8368620872497559, "learning_rate": 3.593237294917967e-05, "loss": 0.5464, "step": 7041 }, { "epoch": 9.01376, "grad_norm": 0.8233303427696228, "learning_rate": 3.593037214885955e-05, "loss": 0.5641, "step": 7042 }, { "epoch": 9.01504, "grad_norm": 0.8248091340065002, "learning_rate": 3.592837134853942e-05, "loss": 0.5642, "step": 7043 }, { "epoch": 9.01632, "grad_norm": 0.7816410660743713, "learning_rate": 3.592637054821929e-05, "loss": 0.557, "step": 7044 }, { "epoch": 9.0176, "grad_norm": 0.8148441314697266, "learning_rate": 3.592436974789916e-05, "loss": 0.5754, "step": 7045 }, { "epoch": 9.01888, "grad_norm": 0.846493661403656, "learning_rate": 3.592236894757903e-05, "loss": 0.5328, "step": 7046 }, { "epoch": 9.02016, "grad_norm": 0.7609879374504089, "learning_rate": 3.59203681472589e-05, "loss": 0.5445, "step": 7047 }, { "epoch": 9.02144, "grad_norm": 0.7646995186805725, "learning_rate": 3.5918367346938774e-05, "loss": 0.5248, "step": 7048 }, { "epoch": 9.02272, "grad_norm": 0.9177650809288025, "learning_rate": 3.591636654661865e-05, "loss": 0.6154, "step": 7049 }, { "epoch": 9.024, "grad_norm": 0.8574452996253967, "learning_rate": 3.5914365746298525e-05, "loss": 0.5763, "step": 7050 }, { "epoch": 9.02528, "grad_norm": 0.845577597618103, "learning_rate": 3.5912364945978396e-05, "loss": 0.5505, "step": 7051 }, { "epoch": 9.02656, "grad_norm": 0.8447266817092896, "learning_rate": 3.591036414565827e-05, "loss": 0.5625, "step": 7052 }, { "epoch": 9.02784, "grad_norm": 0.8364947438240051, "learning_rate": 3.590836334533813e-05, "loss": 0.6222, "step": 7053 }, { "epoch": 9.02912, "grad_norm": 0.7808526158332825, "learning_rate": 3.5906362545018005e-05, "loss": 0.5635, "step": 7054 }, { "epoch": 9.0304, "grad_norm": 0.8134317994117737, "learning_rate": 3.590436174469788e-05, "loss": 0.5632, "step": 7055 }, { "epoch": 9.03168, "grad_norm": 0.833970844745636, "learning_rate": 3.5902360944377756e-05, "loss": 0.5108, "step": 7056 }, { "epoch": 9.03296, "grad_norm": 0.8089005947113037, "learning_rate": 3.590036014405763e-05, "loss": 0.548, "step": 7057 }, { "epoch": 9.03424, "grad_norm": 0.8224313855171204, "learning_rate": 3.58983593437375e-05, "loss": 0.5738, "step": 7058 }, { "epoch": 9.03552, "grad_norm": 0.8225283026695251, "learning_rate": 3.589635854341737e-05, "loss": 0.5791, "step": 7059 }, { "epoch": 9.0368, "grad_norm": 0.8561593890190125, "learning_rate": 3.589435774309724e-05, "loss": 0.6185, "step": 7060 }, { "epoch": 9.03808, "grad_norm": 0.803023636341095, "learning_rate": 3.589235694277711e-05, "loss": 0.5655, "step": 7061 }, { "epoch": 9.03936, "grad_norm": 0.8006875514984131, "learning_rate": 3.589035614245698e-05, "loss": 0.5293, "step": 7062 }, { "epoch": 9.04064, "grad_norm": 0.7813231348991394, "learning_rate": 3.588835534213686e-05, "loss": 0.5739, "step": 7063 }, { "epoch": 9.04192, "grad_norm": 0.7644641995429993, "learning_rate": 3.588635454181673e-05, "loss": 0.524, "step": 7064 }, { "epoch": 9.0432, "grad_norm": 0.8366073369979858, "learning_rate": 3.58843537414966e-05, "loss": 0.5816, "step": 7065 }, { "epoch": 9.04448, "grad_norm": 0.8413146138191223, "learning_rate": 3.5882352941176474e-05, "loss": 0.575, "step": 7066 }, { "epoch": 9.04576, "grad_norm": 0.868990957736969, "learning_rate": 3.5880352140856346e-05, "loss": 0.5821, "step": 7067 }, { "epoch": 9.04704, "grad_norm": 0.8294727206230164, "learning_rate": 3.587835134053622e-05, "loss": 0.5557, "step": 7068 }, { "epoch": 9.04832, "grad_norm": 0.8696639537811279, "learning_rate": 3.587635054021608e-05, "loss": 0.6255, "step": 7069 }, { "epoch": 9.0496, "grad_norm": 0.8515434861183167, "learning_rate": 3.587434973989596e-05, "loss": 0.625, "step": 7070 }, { "epoch": 9.05088, "grad_norm": 0.8410640358924866, "learning_rate": 3.5872348939575834e-05, "loss": 0.6161, "step": 7071 }, { "epoch": 9.05216, "grad_norm": 0.8619793653488159, "learning_rate": 3.5870348139255705e-05, "loss": 0.5977, "step": 7072 }, { "epoch": 9.05344, "grad_norm": 0.8171403408050537, "learning_rate": 3.586834733893558e-05, "loss": 0.5569, "step": 7073 }, { "epoch": 9.05472, "grad_norm": 0.823085367679596, "learning_rate": 3.586634653861545e-05, "loss": 0.5687, "step": 7074 }, { "epoch": 9.056, "grad_norm": 0.8016168475151062, "learning_rate": 3.586434573829532e-05, "loss": 0.5622, "step": 7075 }, { "epoch": 9.05728, "grad_norm": 0.8238281607627869, "learning_rate": 3.586234493797519e-05, "loss": 0.5507, "step": 7076 }, { "epoch": 9.05856, "grad_norm": 0.859733521938324, "learning_rate": 3.5860344137655065e-05, "loss": 0.581, "step": 7077 }, { "epoch": 9.05984, "grad_norm": 0.8221707940101624, "learning_rate": 3.5858343337334937e-05, "loss": 0.5824, "step": 7078 }, { "epoch": 9.06112, "grad_norm": 0.818545401096344, "learning_rate": 3.585634253701481e-05, "loss": 0.5601, "step": 7079 }, { "epoch": 9.0624, "grad_norm": 0.8852583765983582, "learning_rate": 3.585434173669468e-05, "loss": 0.584, "step": 7080 }, { "epoch": 9.06368, "grad_norm": 0.8168684244155884, "learning_rate": 3.585234093637455e-05, "loss": 0.5746, "step": 7081 }, { "epoch": 9.06496, "grad_norm": 0.77977454662323, "learning_rate": 3.5850340136054424e-05, "loss": 0.5343, "step": 7082 }, { "epoch": 9.06624, "grad_norm": 0.8777265548706055, "learning_rate": 3.5848339335734296e-05, "loss": 0.6393, "step": 7083 }, { "epoch": 9.06752, "grad_norm": 0.8020537495613098, "learning_rate": 3.584633853541417e-05, "loss": 0.5444, "step": 7084 }, { "epoch": 9.0688, "grad_norm": 0.7844178080558777, "learning_rate": 3.584433773509404e-05, "loss": 0.5409, "step": 7085 }, { "epoch": 9.07008, "grad_norm": 0.8328168392181396, "learning_rate": 3.584233693477391e-05, "loss": 0.5712, "step": 7086 }, { "epoch": 9.07136, "grad_norm": 0.8295521140098572, "learning_rate": 3.584033613445378e-05, "loss": 0.5724, "step": 7087 }, { "epoch": 9.07264, "grad_norm": 0.8165435791015625, "learning_rate": 3.5838335334133655e-05, "loss": 0.5741, "step": 7088 }, { "epoch": 9.07392, "grad_norm": 0.7974668145179749, "learning_rate": 3.583633453381353e-05, "loss": 0.5751, "step": 7089 }, { "epoch": 9.0752, "grad_norm": 0.8819810152053833, "learning_rate": 3.58343337334934e-05, "loss": 0.5696, "step": 7090 }, { "epoch": 9.07648, "grad_norm": 0.8275978565216064, "learning_rate": 3.583233293317327e-05, "loss": 0.5667, "step": 7091 }, { "epoch": 9.07776, "grad_norm": 0.8336565494537354, "learning_rate": 3.583033213285314e-05, "loss": 0.5487, "step": 7092 }, { "epoch": 9.079039999999999, "grad_norm": 0.8257220983505249, "learning_rate": 3.5828331332533014e-05, "loss": 0.5878, "step": 7093 }, { "epoch": 9.08032, "grad_norm": 0.8719106316566467, "learning_rate": 3.5826330532212886e-05, "loss": 0.5884, "step": 7094 }, { "epoch": 9.0816, "grad_norm": 0.822557270526886, "learning_rate": 3.582432973189276e-05, "loss": 0.5495, "step": 7095 }, { "epoch": 9.08288, "grad_norm": 0.8632369637489319, "learning_rate": 3.582232893157263e-05, "loss": 0.631, "step": 7096 }, { "epoch": 9.08416, "grad_norm": 0.7848061919212341, "learning_rate": 3.58203281312525e-05, "loss": 0.5497, "step": 7097 }, { "epoch": 9.08544, "grad_norm": 0.8202498555183411, "learning_rate": 3.5818327330932374e-05, "loss": 0.5686, "step": 7098 }, { "epoch": 9.08672, "grad_norm": 0.8682575225830078, "learning_rate": 3.5816326530612245e-05, "loss": 0.5808, "step": 7099 }, { "epoch": 9.088, "grad_norm": 0.8932950496673584, "learning_rate": 3.581432573029212e-05, "loss": 0.5941, "step": 7100 }, { "epoch": 9.08928, "grad_norm": 0.8102815747261047, "learning_rate": 3.581232492997199e-05, "loss": 0.5704, "step": 7101 }, { "epoch": 9.09056, "grad_norm": 0.848307192325592, "learning_rate": 3.581032412965186e-05, "loss": 0.5954, "step": 7102 }, { "epoch": 9.09184, "grad_norm": 0.8251248598098755, "learning_rate": 3.580832332933173e-05, "loss": 0.599, "step": 7103 }, { "epoch": 9.09312, "grad_norm": 0.8306258916854858, "learning_rate": 3.5806322529011605e-05, "loss": 0.5746, "step": 7104 }, { "epoch": 9.0944, "grad_norm": 0.8358504176139832, "learning_rate": 3.580432172869148e-05, "loss": 0.5811, "step": 7105 }, { "epoch": 9.09568, "grad_norm": 0.8394206762313843, "learning_rate": 3.5802320928371355e-05, "loss": 0.5633, "step": 7106 }, { "epoch": 9.09696, "grad_norm": 0.7698180079460144, "learning_rate": 3.580032012805122e-05, "loss": 0.5287, "step": 7107 }, { "epoch": 9.09824, "grad_norm": 0.8235006928443909, "learning_rate": 3.579831932773109e-05, "loss": 0.575, "step": 7108 }, { "epoch": 9.09952, "grad_norm": 0.8552541136741638, "learning_rate": 3.5796318527410964e-05, "loss": 0.5888, "step": 7109 }, { "epoch": 9.1008, "grad_norm": 0.7659354209899902, "learning_rate": 3.5794317727090836e-05, "loss": 0.5523, "step": 7110 }, { "epoch": 9.10208, "grad_norm": 0.8290457129478455, "learning_rate": 3.579231692677071e-05, "loss": 0.5812, "step": 7111 }, { "epoch": 9.10336, "grad_norm": 0.8734808564186096, "learning_rate": 3.579031612645058e-05, "loss": 0.5864, "step": 7112 }, { "epoch": 9.10464, "grad_norm": 0.8360257148742676, "learning_rate": 3.578831532613046e-05, "loss": 0.6079, "step": 7113 }, { "epoch": 9.10592, "grad_norm": 0.8263709545135498, "learning_rate": 3.578631452581033e-05, "loss": 0.6076, "step": 7114 }, { "epoch": 9.1072, "grad_norm": 0.8358266949653625, "learning_rate": 3.5784313725490195e-05, "loss": 0.5644, "step": 7115 }, { "epoch": 9.10848, "grad_norm": 0.9020827412605286, "learning_rate": 3.578231292517007e-05, "loss": 0.6079, "step": 7116 }, { "epoch": 9.10976, "grad_norm": 0.8776836395263672, "learning_rate": 3.578031212484994e-05, "loss": 0.5905, "step": 7117 }, { "epoch": 9.11104, "grad_norm": 0.8753827214241028, "learning_rate": 3.577831132452981e-05, "loss": 0.5962, "step": 7118 }, { "epoch": 9.11232, "grad_norm": 0.8712731599807739, "learning_rate": 3.577631052420968e-05, "loss": 0.5731, "step": 7119 }, { "epoch": 9.1136, "grad_norm": 0.8524981141090393, "learning_rate": 3.577430972388956e-05, "loss": 0.5583, "step": 7120 }, { "epoch": 9.11488, "grad_norm": 0.8630044460296631, "learning_rate": 3.577230892356943e-05, "loss": 0.5547, "step": 7121 }, { "epoch": 9.11616, "grad_norm": 0.7815329432487488, "learning_rate": 3.5770308123249305e-05, "loss": 0.5294, "step": 7122 }, { "epoch": 9.11744, "grad_norm": 0.8376869559288025, "learning_rate": 3.576830732292917e-05, "loss": 0.599, "step": 7123 }, { "epoch": 9.11872, "grad_norm": 0.8631175756454468, "learning_rate": 3.576630652260904e-05, "loss": 0.5887, "step": 7124 }, { "epoch": 9.12, "grad_norm": 0.8864824175834656, "learning_rate": 3.5764305722288914e-05, "loss": 0.571, "step": 7125 }, { "epoch": 9.12128, "grad_norm": 0.8389583230018616, "learning_rate": 3.5762304921968786e-05, "loss": 0.5336, "step": 7126 }, { "epoch": 9.12256, "grad_norm": 0.838886022567749, "learning_rate": 3.5760304121648664e-05, "loss": 0.5679, "step": 7127 }, { "epoch": 9.12384, "grad_norm": 0.850913941860199, "learning_rate": 3.5758303321328536e-05, "loss": 0.5632, "step": 7128 }, { "epoch": 9.12512, "grad_norm": 0.8992368578910828, "learning_rate": 3.575630252100841e-05, "loss": 0.598, "step": 7129 }, { "epoch": 9.1264, "grad_norm": 0.8634032011032104, "learning_rate": 3.575430172068828e-05, "loss": 0.6198, "step": 7130 }, { "epoch": 9.12768, "grad_norm": 0.8508159518241882, "learning_rate": 3.5752300920368145e-05, "loss": 0.5757, "step": 7131 }, { "epoch": 9.12896, "grad_norm": 0.8230028748512268, "learning_rate": 3.575030012004802e-05, "loss": 0.5853, "step": 7132 }, { "epoch": 9.13024, "grad_norm": 0.8228382468223572, "learning_rate": 3.574829931972789e-05, "loss": 0.5304, "step": 7133 }, { "epoch": 9.13152, "grad_norm": 0.8506357669830322, "learning_rate": 3.574629851940777e-05, "loss": 0.5977, "step": 7134 }, { "epoch": 9.1328, "grad_norm": 0.8166884779930115, "learning_rate": 3.574429771908764e-05, "loss": 0.5696, "step": 7135 }, { "epoch": 9.13408, "grad_norm": 0.7951632142066956, "learning_rate": 3.574229691876751e-05, "loss": 0.5402, "step": 7136 }, { "epoch": 9.13536, "grad_norm": 0.8789847493171692, "learning_rate": 3.574029611844738e-05, "loss": 0.5845, "step": 7137 }, { "epoch": 9.13664, "grad_norm": 0.8529048562049866, "learning_rate": 3.5738295318127255e-05, "loss": 0.584, "step": 7138 }, { "epoch": 9.13792, "grad_norm": 0.8400517702102661, "learning_rate": 3.573629451780712e-05, "loss": 0.585, "step": 7139 }, { "epoch": 9.1392, "grad_norm": 0.8045387268066406, "learning_rate": 3.573429371748699e-05, "loss": 0.576, "step": 7140 }, { "epoch": 9.14048, "grad_norm": 0.8386081457138062, "learning_rate": 3.573229291716687e-05, "loss": 0.6016, "step": 7141 }, { "epoch": 9.14176, "grad_norm": 0.8390840291976929, "learning_rate": 3.573029211684674e-05, "loss": 0.5581, "step": 7142 }, { "epoch": 9.14304, "grad_norm": 0.8972001671791077, "learning_rate": 3.5728291316526614e-05, "loss": 0.593, "step": 7143 }, { "epoch": 9.14432, "grad_norm": 0.8241517543792725, "learning_rate": 3.5726290516206486e-05, "loss": 0.5142, "step": 7144 }, { "epoch": 9.1456, "grad_norm": 0.8427436947822571, "learning_rate": 3.572428971588636e-05, "loss": 0.6082, "step": 7145 }, { "epoch": 9.14688, "grad_norm": 0.8868518471717834, "learning_rate": 3.572228891556623e-05, "loss": 0.5601, "step": 7146 }, { "epoch": 9.14816, "grad_norm": 0.8616020679473877, "learning_rate": 3.5720288115246095e-05, "loss": 0.589, "step": 7147 }, { "epoch": 9.14944, "grad_norm": 0.8238210082054138, "learning_rate": 3.571828731492597e-05, "loss": 0.5611, "step": 7148 }, { "epoch": 9.15072, "grad_norm": 0.7928246259689331, "learning_rate": 3.5716286514605845e-05, "loss": 0.5399, "step": 7149 }, { "epoch": 9.152, "grad_norm": 0.8270377516746521, "learning_rate": 3.571428571428572e-05, "loss": 0.5782, "step": 7150 }, { "epoch": 9.15328, "grad_norm": 0.8852272033691406, "learning_rate": 3.571228491396559e-05, "loss": 0.5984, "step": 7151 }, { "epoch": 9.15456, "grad_norm": 0.8285141587257385, "learning_rate": 3.571028411364546e-05, "loss": 0.5448, "step": 7152 }, { "epoch": 9.15584, "grad_norm": 0.8800240159034729, "learning_rate": 3.570828331332533e-05, "loss": 0.6208, "step": 7153 }, { "epoch": 9.15712, "grad_norm": 0.8323574662208557, "learning_rate": 3.5706282513005204e-05, "loss": 0.5727, "step": 7154 }, { "epoch": 9.1584, "grad_norm": 0.798857569694519, "learning_rate": 3.5704281712685076e-05, "loss": 0.5719, "step": 7155 }, { "epoch": 9.15968, "grad_norm": 0.8088046312332153, "learning_rate": 3.570228091236495e-05, "loss": 0.545, "step": 7156 }, { "epoch": 9.16096, "grad_norm": 0.8132175207138062, "learning_rate": 3.570028011204482e-05, "loss": 0.5349, "step": 7157 }, { "epoch": 9.16224, "grad_norm": 0.8076733946800232, "learning_rate": 3.569827931172469e-05, "loss": 0.549, "step": 7158 }, { "epoch": 9.16352, "grad_norm": 0.8983336687088013, "learning_rate": 3.5696278511404564e-05, "loss": 0.5902, "step": 7159 }, { "epoch": 9.1648, "grad_norm": 0.8289273381233215, "learning_rate": 3.5694277711084436e-05, "loss": 0.5484, "step": 7160 }, { "epoch": 9.166080000000001, "grad_norm": 0.8771588206291199, "learning_rate": 3.569227691076431e-05, "loss": 0.5911, "step": 7161 }, { "epoch": 9.16736, "grad_norm": 0.8836865425109863, "learning_rate": 3.569027611044418e-05, "loss": 0.5916, "step": 7162 }, { "epoch": 9.16864, "grad_norm": 0.8692944049835205, "learning_rate": 3.568827531012405e-05, "loss": 0.6283, "step": 7163 }, { "epoch": 9.16992, "grad_norm": 0.8750563263893127, "learning_rate": 3.568627450980392e-05, "loss": 0.5961, "step": 7164 }, { "epoch": 9.1712, "grad_norm": 0.9316475987434387, "learning_rate": 3.5684273709483795e-05, "loss": 0.6031, "step": 7165 }, { "epoch": 9.17248, "grad_norm": 0.9066860675811768, "learning_rate": 3.568227290916367e-05, "loss": 0.5978, "step": 7166 }, { "epoch": 9.17376, "grad_norm": 0.8408805727958679, "learning_rate": 3.568027210884354e-05, "loss": 0.5614, "step": 7167 }, { "epoch": 9.17504, "grad_norm": 0.9087552428245544, "learning_rate": 3.567827130852341e-05, "loss": 0.6356, "step": 7168 }, { "epoch": 9.17632, "grad_norm": 0.8625233769416809, "learning_rate": 3.567627050820328e-05, "loss": 0.5493, "step": 7169 }, { "epoch": 9.1776, "grad_norm": 0.9364339709281921, "learning_rate": 3.5674269707883154e-05, "loss": 0.5999, "step": 7170 }, { "epoch": 9.17888, "grad_norm": 0.895353376865387, "learning_rate": 3.5672268907563026e-05, "loss": 0.5865, "step": 7171 }, { "epoch": 9.18016, "grad_norm": 0.8568089008331299, "learning_rate": 3.56702681072429e-05, "loss": 0.5691, "step": 7172 }, { "epoch": 9.18144, "grad_norm": 0.9034045934677124, "learning_rate": 3.566826730692277e-05, "loss": 0.6174, "step": 7173 }, { "epoch": 9.18272, "grad_norm": 0.8543916344642639, "learning_rate": 3.566626650660264e-05, "loss": 0.5903, "step": 7174 }, { "epoch": 9.184, "grad_norm": 0.7896568775177002, "learning_rate": 3.566426570628251e-05, "loss": 0.5389, "step": 7175 }, { "epoch": 9.18528, "grad_norm": 0.8801631927490234, "learning_rate": 3.566226490596239e-05, "loss": 0.5925, "step": 7176 }, { "epoch": 9.18656, "grad_norm": 0.8208610415458679, "learning_rate": 3.566026410564226e-05, "loss": 0.5339, "step": 7177 }, { "epoch": 9.18784, "grad_norm": 0.8739574551582336, "learning_rate": 3.565826330532213e-05, "loss": 0.5946, "step": 7178 }, { "epoch": 9.18912, "grad_norm": 0.8522581458091736, "learning_rate": 3.5656262505002e-05, "loss": 0.604, "step": 7179 }, { "epoch": 9.1904, "grad_norm": 0.8527406454086304, "learning_rate": 3.565426170468187e-05, "loss": 0.5349, "step": 7180 }, { "epoch": 9.19168, "grad_norm": 0.796159029006958, "learning_rate": 3.5652260904361745e-05, "loss": 0.5852, "step": 7181 }, { "epoch": 9.19296, "grad_norm": 0.8322233557701111, "learning_rate": 3.5650260104041616e-05, "loss": 0.5491, "step": 7182 }, { "epoch": 9.19424, "grad_norm": 0.8693201541900635, "learning_rate": 3.5648259303721495e-05, "loss": 0.6308, "step": 7183 }, { "epoch": 9.19552, "grad_norm": 0.8879520297050476, "learning_rate": 3.564625850340137e-05, "loss": 0.5455, "step": 7184 }, { "epoch": 9.1968, "grad_norm": 0.8827615976333618, "learning_rate": 3.564425770308123e-05, "loss": 0.5712, "step": 7185 }, { "epoch": 9.19808, "grad_norm": 0.8380709886550903, "learning_rate": 3.5642256902761104e-05, "loss": 0.5884, "step": 7186 }, { "epoch": 9.19936, "grad_norm": 0.8361532092094421, "learning_rate": 3.5640256102440976e-05, "loss": 0.5282, "step": 7187 }, { "epoch": 9.20064, "grad_norm": 0.8010978102684021, "learning_rate": 3.563825530212085e-05, "loss": 0.5372, "step": 7188 }, { "epoch": 9.20192, "grad_norm": 0.8207326531410217, "learning_rate": 3.563625450180072e-05, "loss": 0.5496, "step": 7189 }, { "epoch": 9.2032, "grad_norm": 0.9393238425254822, "learning_rate": 3.56342537014806e-05, "loss": 0.6691, "step": 7190 }, { "epoch": 9.20448, "grad_norm": 0.9167293310165405, "learning_rate": 3.563225290116047e-05, "loss": 0.577, "step": 7191 }, { "epoch": 9.20576, "grad_norm": 0.8757715821266174, "learning_rate": 3.563025210084034e-05, "loss": 0.56, "step": 7192 }, { "epoch": 9.20704, "grad_norm": 0.8311654329299927, "learning_rate": 3.562825130052021e-05, "loss": 0.5641, "step": 7193 }, { "epoch": 9.20832, "grad_norm": 0.8445587754249573, "learning_rate": 3.562625050020008e-05, "loss": 0.5876, "step": 7194 }, { "epoch": 9.2096, "grad_norm": 0.805767834186554, "learning_rate": 3.562424969987995e-05, "loss": 0.5486, "step": 7195 }, { "epoch": 9.21088, "grad_norm": 0.8612878918647766, "learning_rate": 3.562224889955982e-05, "loss": 0.5913, "step": 7196 }, { "epoch": 9.21216, "grad_norm": 0.7950903177261353, "learning_rate": 3.56202480992397e-05, "loss": 0.5245, "step": 7197 }, { "epoch": 9.21344, "grad_norm": 0.8570892214775085, "learning_rate": 3.561824729891957e-05, "loss": 0.5713, "step": 7198 }, { "epoch": 9.21472, "grad_norm": 0.8819652795791626, "learning_rate": 3.5616246498599445e-05, "loss": 0.6082, "step": 7199 }, { "epoch": 9.216, "grad_norm": 0.8352935910224915, "learning_rate": 3.5614245698279317e-05, "loss": 0.5274, "step": 7200 }, { "epoch": 9.21728, "grad_norm": 0.8423967957496643, "learning_rate": 3.561224489795918e-05, "loss": 0.5569, "step": 7201 }, { "epoch": 9.21856, "grad_norm": 0.8680324554443359, "learning_rate": 3.5610244097639054e-05, "loss": 0.5971, "step": 7202 }, { "epoch": 9.21984, "grad_norm": 0.8493099808692932, "learning_rate": 3.5608243297318925e-05, "loss": 0.5817, "step": 7203 }, { "epoch": 9.22112, "grad_norm": 0.8210243582725525, "learning_rate": 3.5606242496998804e-05, "loss": 0.5623, "step": 7204 }, { "epoch": 9.2224, "grad_norm": 0.8306945562362671, "learning_rate": 3.5604241696678676e-05, "loss": 0.5609, "step": 7205 }, { "epoch": 9.22368, "grad_norm": 0.8076730370521545, "learning_rate": 3.560224089635855e-05, "loss": 0.5814, "step": 7206 }, { "epoch": 9.22496, "grad_norm": 0.8856160640716553, "learning_rate": 3.560024009603842e-05, "loss": 0.614, "step": 7207 }, { "epoch": 9.22624, "grad_norm": 0.8176262974739075, "learning_rate": 3.559823929571829e-05, "loss": 0.5156, "step": 7208 }, { "epoch": 9.22752, "grad_norm": 0.8237152099609375, "learning_rate": 3.5596238495398157e-05, "loss": 0.6039, "step": 7209 }, { "epoch": 9.2288, "grad_norm": 0.8996145129203796, "learning_rate": 3.559423769507803e-05, "loss": 0.5799, "step": 7210 }, { "epoch": 9.23008, "grad_norm": 0.8621578216552734, "learning_rate": 3.55922368947579e-05, "loss": 0.5983, "step": 7211 }, { "epoch": 9.23136, "grad_norm": 0.834199845790863, "learning_rate": 3.559023609443778e-05, "loss": 0.584, "step": 7212 }, { "epoch": 9.23264, "grad_norm": 0.8422051072120667, "learning_rate": 3.558823529411765e-05, "loss": 0.6015, "step": 7213 }, { "epoch": 9.23392, "grad_norm": 0.8835949301719666, "learning_rate": 3.558623449379752e-05, "loss": 0.6181, "step": 7214 }, { "epoch": 9.2352, "grad_norm": 0.8236482739448547, "learning_rate": 3.5584233693477394e-05, "loss": 0.5114, "step": 7215 }, { "epoch": 9.23648, "grad_norm": 0.8392600417137146, "learning_rate": 3.5582232893157266e-05, "loss": 0.5606, "step": 7216 }, { "epoch": 9.23776, "grad_norm": 0.8492880463600159, "learning_rate": 3.558023209283713e-05, "loss": 0.5595, "step": 7217 }, { "epoch": 9.23904, "grad_norm": 0.8469056487083435, "learning_rate": 3.5578231292517e-05, "loss": 0.5619, "step": 7218 }, { "epoch": 9.24032, "grad_norm": 0.8622716069221497, "learning_rate": 3.557623049219688e-05, "loss": 0.5764, "step": 7219 }, { "epoch": 9.2416, "grad_norm": 0.8697147369384766, "learning_rate": 3.5574229691876754e-05, "loss": 0.6487, "step": 7220 }, { "epoch": 9.24288, "grad_norm": 0.8444170355796814, "learning_rate": 3.5572228891556626e-05, "loss": 0.5976, "step": 7221 }, { "epoch": 9.24416, "grad_norm": 0.9171527028083801, "learning_rate": 3.55702280912365e-05, "loss": 0.5975, "step": 7222 }, { "epoch": 9.24544, "grad_norm": 0.8756519556045532, "learning_rate": 3.556822729091637e-05, "loss": 0.6635, "step": 7223 }, { "epoch": 9.24672, "grad_norm": 0.8335146903991699, "learning_rate": 3.556622649059624e-05, "loss": 0.5707, "step": 7224 }, { "epoch": 9.248, "grad_norm": 0.8439629673957825, "learning_rate": 3.5564225690276106e-05, "loss": 0.5443, "step": 7225 }, { "epoch": 9.24928, "grad_norm": 0.8886339664459229, "learning_rate": 3.5562224889955985e-05, "loss": 0.6433, "step": 7226 }, { "epoch": 9.25056, "grad_norm": 0.8498263359069824, "learning_rate": 3.556022408963586e-05, "loss": 0.5432, "step": 7227 }, { "epoch": 9.25184, "grad_norm": 0.8150326609611511, "learning_rate": 3.555822328931573e-05, "loss": 0.5489, "step": 7228 }, { "epoch": 9.25312, "grad_norm": 0.8487164974212646, "learning_rate": 3.55562224889956e-05, "loss": 0.6123, "step": 7229 }, { "epoch": 9.2544, "grad_norm": 0.8371261358261108, "learning_rate": 3.555422168867547e-05, "loss": 0.586, "step": 7230 }, { "epoch": 9.25568, "grad_norm": 0.7906522154808044, "learning_rate": 3.5552220888355344e-05, "loss": 0.5475, "step": 7231 }, { "epoch": 9.25696, "grad_norm": 0.8361690640449524, "learning_rate": 3.5550220088035216e-05, "loss": 0.5587, "step": 7232 }, { "epoch": 9.25824, "grad_norm": 0.8770825266838074, "learning_rate": 3.554821928771509e-05, "loss": 0.5292, "step": 7233 }, { "epoch": 9.25952, "grad_norm": 0.8710092306137085, "learning_rate": 3.554621848739496e-05, "loss": 0.5879, "step": 7234 }, { "epoch": 9.2608, "grad_norm": 0.9107195734977722, "learning_rate": 3.554421768707483e-05, "loss": 0.6258, "step": 7235 }, { "epoch": 9.26208, "grad_norm": 0.8514677286148071, "learning_rate": 3.5542216886754703e-05, "loss": 0.5595, "step": 7236 }, { "epoch": 9.26336, "grad_norm": 0.806038498878479, "learning_rate": 3.5540216086434575e-05, "loss": 0.5441, "step": 7237 }, { "epoch": 9.26464, "grad_norm": 0.8175530433654785, "learning_rate": 3.553821528611445e-05, "loss": 0.5597, "step": 7238 }, { "epoch": 9.26592, "grad_norm": 0.8429561257362366, "learning_rate": 3.553621448579432e-05, "loss": 0.6029, "step": 7239 }, { "epoch": 9.2672, "grad_norm": 0.7659432888031006, "learning_rate": 3.553421368547419e-05, "loss": 0.4918, "step": 7240 }, { "epoch": 9.26848, "grad_norm": 0.8414883613586426, "learning_rate": 3.553221288515406e-05, "loss": 0.5624, "step": 7241 }, { "epoch": 9.26976, "grad_norm": 0.8417079448699951, "learning_rate": 3.5530212084833935e-05, "loss": 0.5585, "step": 7242 }, { "epoch": 9.27104, "grad_norm": 0.8089073300361633, "learning_rate": 3.5528211284513806e-05, "loss": 0.5623, "step": 7243 }, { "epoch": 9.27232, "grad_norm": 0.8453049063682556, "learning_rate": 3.552621048419368e-05, "loss": 0.5725, "step": 7244 }, { "epoch": 9.2736, "grad_norm": 0.8788660168647766, "learning_rate": 3.552420968387355e-05, "loss": 0.6243, "step": 7245 }, { "epoch": 9.27488, "grad_norm": 0.8520131707191467, "learning_rate": 3.552220888355342e-05, "loss": 0.5707, "step": 7246 }, { "epoch": 9.27616, "grad_norm": 0.8138136863708496, "learning_rate": 3.55202080832333e-05, "loss": 0.5788, "step": 7247 }, { "epoch": 9.27744, "grad_norm": 0.8127238750457764, "learning_rate": 3.5518207282913166e-05, "loss": 0.5925, "step": 7248 }, { "epoch": 9.27872, "grad_norm": 0.7981357574462891, "learning_rate": 3.551620648259304e-05, "loss": 0.5755, "step": 7249 }, { "epoch": 9.28, "grad_norm": 0.8368809223175049, "learning_rate": 3.551420568227291e-05, "loss": 0.5962, "step": 7250 }, { "epoch": 9.28128, "grad_norm": 0.832737386226654, "learning_rate": 3.551220488195278e-05, "loss": 0.5504, "step": 7251 }, { "epoch": 9.28256, "grad_norm": 0.9122929573059082, "learning_rate": 3.551020408163265e-05, "loss": 0.6593, "step": 7252 }, { "epoch": 9.28384, "grad_norm": 0.8156841397285461, "learning_rate": 3.5508203281312525e-05, "loss": 0.5478, "step": 7253 }, { "epoch": 9.28512, "grad_norm": 0.8393061757087708, "learning_rate": 3.5506202480992404e-05, "loss": 0.5744, "step": 7254 }, { "epoch": 9.2864, "grad_norm": 0.8752142786979675, "learning_rate": 3.5504201680672275e-05, "loss": 0.6099, "step": 7255 }, { "epoch": 9.28768, "grad_norm": 0.8940780162811279, "learning_rate": 3.550220088035214e-05, "loss": 0.6174, "step": 7256 }, { "epoch": 9.28896, "grad_norm": 0.8913589119911194, "learning_rate": 3.550020008003201e-05, "loss": 0.5515, "step": 7257 }, { "epoch": 9.29024, "grad_norm": 0.8833445310592651, "learning_rate": 3.5498199279711884e-05, "loss": 0.6179, "step": 7258 }, { "epoch": 9.29152, "grad_norm": 0.8273428678512573, "learning_rate": 3.5496198479391756e-05, "loss": 0.5577, "step": 7259 }, { "epoch": 9.2928, "grad_norm": 0.8162719011306763, "learning_rate": 3.549419767907163e-05, "loss": 0.5771, "step": 7260 }, { "epoch": 9.29408, "grad_norm": 0.846421480178833, "learning_rate": 3.549219687875151e-05, "loss": 0.5642, "step": 7261 }, { "epoch": 9.29536, "grad_norm": 0.8472880125045776, "learning_rate": 3.549019607843138e-05, "loss": 0.6061, "step": 7262 }, { "epoch": 9.29664, "grad_norm": 0.8349382877349854, "learning_rate": 3.548819527811125e-05, "loss": 0.6095, "step": 7263 }, { "epoch": 9.29792, "grad_norm": 0.8894979953765869, "learning_rate": 3.5486194477791115e-05, "loss": 0.6172, "step": 7264 }, { "epoch": 9.2992, "grad_norm": 0.8571387529373169, "learning_rate": 3.548419367747099e-05, "loss": 0.5449, "step": 7265 }, { "epoch": 9.30048, "grad_norm": 0.898956835269928, "learning_rate": 3.548219287715086e-05, "loss": 0.64, "step": 7266 }, { "epoch": 9.30176, "grad_norm": 0.8103107213973999, "learning_rate": 3.548019207683073e-05, "loss": 0.5698, "step": 7267 }, { "epoch": 9.30304, "grad_norm": 0.8312423825263977, "learning_rate": 3.547819127651061e-05, "loss": 0.612, "step": 7268 }, { "epoch": 9.30432, "grad_norm": 0.7823015451431274, "learning_rate": 3.547619047619048e-05, "loss": 0.5439, "step": 7269 }, { "epoch": 9.3056, "grad_norm": 0.8299819231033325, "learning_rate": 3.547418967587035e-05, "loss": 0.5612, "step": 7270 }, { "epoch": 9.30688, "grad_norm": 0.8400328159332275, "learning_rate": 3.5472188875550225e-05, "loss": 0.5443, "step": 7271 }, { "epoch": 9.30816, "grad_norm": 0.7800350189208984, "learning_rate": 3.547018807523009e-05, "loss": 0.5261, "step": 7272 }, { "epoch": 9.30944, "grad_norm": 0.847671389579773, "learning_rate": 3.546818727490996e-05, "loss": 0.5819, "step": 7273 }, { "epoch": 9.31072, "grad_norm": 0.8879785537719727, "learning_rate": 3.5466186474589834e-05, "loss": 0.5905, "step": 7274 }, { "epoch": 9.312, "grad_norm": 0.8334875702857971, "learning_rate": 3.546418567426971e-05, "loss": 0.5622, "step": 7275 }, { "epoch": 9.31328, "grad_norm": 0.8510679006576538, "learning_rate": 3.5462184873949584e-05, "loss": 0.5825, "step": 7276 }, { "epoch": 9.31456, "grad_norm": 0.8725741505622864, "learning_rate": 3.5460184073629456e-05, "loss": 0.6079, "step": 7277 }, { "epoch": 9.31584, "grad_norm": 0.8539466261863708, "learning_rate": 3.545818327330933e-05, "loss": 0.5574, "step": 7278 }, { "epoch": 9.31712, "grad_norm": 0.8324747681617737, "learning_rate": 3.54561824729892e-05, "loss": 0.5647, "step": 7279 }, { "epoch": 9.3184, "grad_norm": 0.8533186912536621, "learning_rate": 3.5454181672669065e-05, "loss": 0.527, "step": 7280 }, { "epoch": 9.31968, "grad_norm": 0.8378028273582458, "learning_rate": 3.545218087234894e-05, "loss": 0.5808, "step": 7281 }, { "epoch": 9.32096, "grad_norm": 0.8582174777984619, "learning_rate": 3.5450180072028816e-05, "loss": 0.6096, "step": 7282 }, { "epoch": 9.32224, "grad_norm": 0.8167418837547302, "learning_rate": 3.544817927170869e-05, "loss": 0.5209, "step": 7283 }, { "epoch": 9.32352, "grad_norm": 0.8674497604370117, "learning_rate": 3.544617847138856e-05, "loss": 0.5823, "step": 7284 }, { "epoch": 9.3248, "grad_norm": 0.8791519999504089, "learning_rate": 3.544417767106843e-05, "loss": 0.5943, "step": 7285 }, { "epoch": 9.32608, "grad_norm": 0.8264080882072449, "learning_rate": 3.54421768707483e-05, "loss": 0.5731, "step": 7286 }, { "epoch": 9.32736, "grad_norm": 0.8929938673973083, "learning_rate": 3.5440176070428175e-05, "loss": 0.606, "step": 7287 }, { "epoch": 9.32864, "grad_norm": 0.8631495237350464, "learning_rate": 3.543817527010804e-05, "loss": 0.5336, "step": 7288 }, { "epoch": 9.32992, "grad_norm": 0.8779585957527161, "learning_rate": 3.543617446978792e-05, "loss": 0.5883, "step": 7289 }, { "epoch": 9.3312, "grad_norm": 0.8582602739334106, "learning_rate": 3.543417366946779e-05, "loss": 0.5969, "step": 7290 }, { "epoch": 9.33248, "grad_norm": 0.9014308452606201, "learning_rate": 3.543217286914766e-05, "loss": 0.5873, "step": 7291 }, { "epoch": 9.33376, "grad_norm": 0.8432881832122803, "learning_rate": 3.5430172068827534e-05, "loss": 0.5814, "step": 7292 }, { "epoch": 9.33504, "grad_norm": 0.8336278200149536, "learning_rate": 3.5428171268507406e-05, "loss": 0.528, "step": 7293 }, { "epoch": 9.33632, "grad_norm": 0.8923518657684326, "learning_rate": 3.542617046818728e-05, "loss": 0.6088, "step": 7294 }, { "epoch": 9.3376, "grad_norm": 0.843570351600647, "learning_rate": 3.542416966786715e-05, "loss": 0.5867, "step": 7295 }, { "epoch": 9.33888, "grad_norm": 0.8679114580154419, "learning_rate": 3.542216886754702e-05, "loss": 0.5836, "step": 7296 }, { "epoch": 9.340160000000001, "grad_norm": 0.8755765557289124, "learning_rate": 3.5420168067226893e-05, "loss": 0.6218, "step": 7297 }, { "epoch": 9.34144, "grad_norm": 0.8723339438438416, "learning_rate": 3.5418167266906765e-05, "loss": 0.5534, "step": 7298 }, { "epoch": 9.34272, "grad_norm": 0.8771824240684509, "learning_rate": 3.541616646658664e-05, "loss": 0.6106, "step": 7299 }, { "epoch": 9.344, "grad_norm": 0.7724134922027588, "learning_rate": 3.541416566626651e-05, "loss": 0.5511, "step": 7300 }, { "epoch": 9.34528, "grad_norm": 0.8475801348686218, "learning_rate": 3.541216486594638e-05, "loss": 0.5752, "step": 7301 }, { "epoch": 9.34656, "grad_norm": 0.8831236958503723, "learning_rate": 3.541016406562625e-05, "loss": 0.6072, "step": 7302 }, { "epoch": 9.34784, "grad_norm": 0.8372252583503723, "learning_rate": 3.5408163265306125e-05, "loss": 0.5337, "step": 7303 }, { "epoch": 9.34912, "grad_norm": 0.810483992099762, "learning_rate": 3.5406162464985996e-05, "loss": 0.6066, "step": 7304 }, { "epoch": 9.3504, "grad_norm": 0.8280951380729675, "learning_rate": 3.540416166466587e-05, "loss": 0.5439, "step": 7305 }, { "epoch": 9.35168, "grad_norm": 0.8466097712516785, "learning_rate": 3.540216086434574e-05, "loss": 0.6007, "step": 7306 }, { "epoch": 9.35296, "grad_norm": 0.8468445539474487, "learning_rate": 3.540016006402561e-05, "loss": 0.5211, "step": 7307 }, { "epoch": 9.35424, "grad_norm": 0.8270046710968018, "learning_rate": 3.5398159263705484e-05, "loss": 0.6036, "step": 7308 }, { "epoch": 9.35552, "grad_norm": 0.8338344693183899, "learning_rate": 3.5396158463385356e-05, "loss": 0.5664, "step": 7309 }, { "epoch": 9.3568, "grad_norm": 0.8763896822929382, "learning_rate": 3.539415766306523e-05, "loss": 0.5601, "step": 7310 }, { "epoch": 9.35808, "grad_norm": 0.8396816849708557, "learning_rate": 3.53921568627451e-05, "loss": 0.5643, "step": 7311 }, { "epoch": 9.35936, "grad_norm": 0.8239136338233948, "learning_rate": 3.539015606242497e-05, "loss": 0.5644, "step": 7312 }, { "epoch": 9.36064, "grad_norm": 0.8570470809936523, "learning_rate": 3.538815526210484e-05, "loss": 0.5655, "step": 7313 }, { "epoch": 9.36192, "grad_norm": 0.8580249547958374, "learning_rate": 3.5386154461784715e-05, "loss": 0.6384, "step": 7314 }, { "epoch": 9.3632, "grad_norm": 0.8450345993041992, "learning_rate": 3.538415366146459e-05, "loss": 0.5922, "step": 7315 }, { "epoch": 9.36448, "grad_norm": 0.8073413968086243, "learning_rate": 3.538215286114446e-05, "loss": 0.5208, "step": 7316 }, { "epoch": 9.36576, "grad_norm": 0.8269434571266174, "learning_rate": 3.538015206082434e-05, "loss": 0.5674, "step": 7317 }, { "epoch": 9.36704, "grad_norm": 0.831274688243866, "learning_rate": 3.53781512605042e-05, "loss": 0.5477, "step": 7318 }, { "epoch": 9.36832, "grad_norm": 0.8366057872772217, "learning_rate": 3.5376150460184074e-05, "loss": 0.561, "step": 7319 }, { "epoch": 9.3696, "grad_norm": 0.8297872543334961, "learning_rate": 3.5374149659863946e-05, "loss": 0.5819, "step": 7320 }, { "epoch": 9.37088, "grad_norm": 0.8322229981422424, "learning_rate": 3.537214885954382e-05, "loss": 0.5315, "step": 7321 }, { "epoch": 9.37216, "grad_norm": 0.8755327463150024, "learning_rate": 3.537014805922369e-05, "loss": 0.5705, "step": 7322 }, { "epoch": 9.37344, "grad_norm": 0.8848744630813599, "learning_rate": 3.536814725890356e-05, "loss": 0.6089, "step": 7323 }, { "epoch": 9.37472, "grad_norm": 0.7823454737663269, "learning_rate": 3.5366146458583434e-05, "loss": 0.5527, "step": 7324 }, { "epoch": 9.376, "grad_norm": 0.8572680950164795, "learning_rate": 3.536414565826331e-05, "loss": 0.6162, "step": 7325 }, { "epoch": 9.37728, "grad_norm": 0.8322939872741699, "learning_rate": 3.536214485794318e-05, "loss": 0.5732, "step": 7326 }, { "epoch": 9.37856, "grad_norm": 0.87436842918396, "learning_rate": 3.536014405762305e-05, "loss": 0.6265, "step": 7327 }, { "epoch": 9.37984, "grad_norm": 0.8274515867233276, "learning_rate": 3.535814325730292e-05, "loss": 0.5288, "step": 7328 }, { "epoch": 9.38112, "grad_norm": 0.9130957126617432, "learning_rate": 3.535614245698279e-05, "loss": 0.6091, "step": 7329 }, { "epoch": 9.3824, "grad_norm": 0.8239238858222961, "learning_rate": 3.5354141656662665e-05, "loss": 0.5281, "step": 7330 }, { "epoch": 9.38368, "grad_norm": 0.8236826658248901, "learning_rate": 3.5352140856342537e-05, "loss": 0.5264, "step": 7331 }, { "epoch": 9.38496, "grad_norm": 0.8105927109718323, "learning_rate": 3.5350140056022415e-05, "loss": 0.5647, "step": 7332 }, { "epoch": 9.38624, "grad_norm": 0.8803057670593262, "learning_rate": 3.534813925570229e-05, "loss": 0.5641, "step": 7333 }, { "epoch": 9.38752, "grad_norm": 0.8564363121986389, "learning_rate": 3.534613845538215e-05, "loss": 0.5985, "step": 7334 }, { "epoch": 9.3888, "grad_norm": 0.8307512998580933, "learning_rate": 3.5344137655062024e-05, "loss": 0.5664, "step": 7335 }, { "epoch": 9.39008, "grad_norm": 0.9065113067626953, "learning_rate": 3.5342136854741896e-05, "loss": 0.633, "step": 7336 }, { "epoch": 9.39136, "grad_norm": 0.8214498162269592, "learning_rate": 3.534013605442177e-05, "loss": 0.5621, "step": 7337 }, { "epoch": 9.39264, "grad_norm": 0.8665810823440552, "learning_rate": 3.533813525410164e-05, "loss": 0.6223, "step": 7338 }, { "epoch": 9.39392, "grad_norm": 0.9425195455551147, "learning_rate": 3.533613445378152e-05, "loss": 0.6163, "step": 7339 }, { "epoch": 9.395199999999999, "grad_norm": 0.8307021260261536, "learning_rate": 3.533413365346139e-05, "loss": 0.5604, "step": 7340 }, { "epoch": 9.39648, "grad_norm": 0.8683647513389587, "learning_rate": 3.533213285314126e-05, "loss": 0.5811, "step": 7341 }, { "epoch": 9.39776, "grad_norm": 0.8393630385398865, "learning_rate": 3.533013205282113e-05, "loss": 0.5444, "step": 7342 }, { "epoch": 9.39904, "grad_norm": 0.809565544128418, "learning_rate": 3.5328131252501e-05, "loss": 0.5328, "step": 7343 }, { "epoch": 9.40032, "grad_norm": 0.8294006586074829, "learning_rate": 3.532613045218087e-05, "loss": 0.5563, "step": 7344 }, { "epoch": 9.4016, "grad_norm": 0.8536190390586853, "learning_rate": 3.532412965186074e-05, "loss": 0.5752, "step": 7345 }, { "epoch": 9.40288, "grad_norm": 0.8586856722831726, "learning_rate": 3.532212885154062e-05, "loss": 0.6095, "step": 7346 }, { "epoch": 9.40416, "grad_norm": 0.9051749110221863, "learning_rate": 3.532012805122049e-05, "loss": 0.6253, "step": 7347 }, { "epoch": 9.40544, "grad_norm": 0.8567177057266235, "learning_rate": 3.5318127250900365e-05, "loss": 0.6243, "step": 7348 }, { "epoch": 9.40672, "grad_norm": 0.8297619819641113, "learning_rate": 3.531612645058024e-05, "loss": 0.6051, "step": 7349 }, { "epoch": 9.408, "grad_norm": 0.8204346895217896, "learning_rate": 3.53141256502601e-05, "loss": 0.5123, "step": 7350 }, { "epoch": 9.40928, "grad_norm": 0.8355170488357544, "learning_rate": 3.5312124849939974e-05, "loss": 0.5724, "step": 7351 }, { "epoch": 9.41056, "grad_norm": 0.7993974089622498, "learning_rate": 3.5310124049619846e-05, "loss": 0.5733, "step": 7352 }, { "epoch": 9.41184, "grad_norm": 0.846606969833374, "learning_rate": 3.5308123249299724e-05, "loss": 0.5533, "step": 7353 }, { "epoch": 9.41312, "grad_norm": 0.8249667882919312, "learning_rate": 3.5306122448979596e-05, "loss": 0.5561, "step": 7354 }, { "epoch": 9.4144, "grad_norm": 0.8175135850906372, "learning_rate": 3.530412164865947e-05, "loss": 0.557, "step": 7355 }, { "epoch": 9.41568, "grad_norm": 0.855964183807373, "learning_rate": 3.530212084833934e-05, "loss": 0.5509, "step": 7356 }, { "epoch": 9.41696, "grad_norm": 0.8171284198760986, "learning_rate": 3.530012004801921e-05, "loss": 0.5227, "step": 7357 }, { "epoch": 9.41824, "grad_norm": 0.8054320812225342, "learning_rate": 3.529811924769908e-05, "loss": 0.6215, "step": 7358 }, { "epoch": 9.41952, "grad_norm": 0.8339996933937073, "learning_rate": 3.529611844737895e-05, "loss": 0.5524, "step": 7359 }, { "epoch": 9.4208, "grad_norm": 0.8672724366188049, "learning_rate": 3.529411764705883e-05, "loss": 0.5946, "step": 7360 }, { "epoch": 9.42208, "grad_norm": 0.799502968788147, "learning_rate": 3.52921168467387e-05, "loss": 0.5152, "step": 7361 }, { "epoch": 9.42336, "grad_norm": 0.8360862731933594, "learning_rate": 3.529011604641857e-05, "loss": 0.5564, "step": 7362 }, { "epoch": 9.42464, "grad_norm": 0.8385033011436462, "learning_rate": 3.528811524609844e-05, "loss": 0.5551, "step": 7363 }, { "epoch": 9.42592, "grad_norm": 0.8228257894515991, "learning_rate": 3.5286114445778315e-05, "loss": 0.5351, "step": 7364 }, { "epoch": 9.4272, "grad_norm": 0.8925015330314636, "learning_rate": 3.5284113645458186e-05, "loss": 0.6315, "step": 7365 }, { "epoch": 9.42848, "grad_norm": 0.9042237997055054, "learning_rate": 3.528211284513805e-05, "loss": 0.6171, "step": 7366 }, { "epoch": 9.42976, "grad_norm": 0.8735190033912659, "learning_rate": 3.528011204481793e-05, "loss": 0.581, "step": 7367 }, { "epoch": 9.43104, "grad_norm": 0.8328735828399658, "learning_rate": 3.52781112444978e-05, "loss": 0.5541, "step": 7368 }, { "epoch": 9.43232, "grad_norm": 0.8481590151786804, "learning_rate": 3.5276110444177674e-05, "loss": 0.5868, "step": 7369 }, { "epoch": 9.4336, "grad_norm": 0.7884271144866943, "learning_rate": 3.5274109643857546e-05, "loss": 0.5346, "step": 7370 }, { "epoch": 9.43488, "grad_norm": 0.8754736185073853, "learning_rate": 3.527210884353742e-05, "loss": 0.6171, "step": 7371 }, { "epoch": 9.43616, "grad_norm": 0.8703823685646057, "learning_rate": 3.527010804321729e-05, "loss": 0.5973, "step": 7372 }, { "epoch": 9.43744, "grad_norm": 0.869755208492279, "learning_rate": 3.526810724289716e-05, "loss": 0.5851, "step": 7373 }, { "epoch": 9.43872, "grad_norm": 0.8053287267684937, "learning_rate": 3.526610644257703e-05, "loss": 0.5132, "step": 7374 }, { "epoch": 9.44, "grad_norm": 0.853288471698761, "learning_rate": 3.5264105642256905e-05, "loss": 0.5325, "step": 7375 }, { "epoch": 9.44128, "grad_norm": 0.8669641613960266, "learning_rate": 3.526210484193678e-05, "loss": 0.5699, "step": 7376 }, { "epoch": 9.44256, "grad_norm": 0.8685311079025269, "learning_rate": 3.526010404161665e-05, "loss": 0.6092, "step": 7377 }, { "epoch": 9.44384, "grad_norm": 0.8137316703796387, "learning_rate": 3.525810324129652e-05, "loss": 0.5549, "step": 7378 }, { "epoch": 9.44512, "grad_norm": 0.8705141544342041, "learning_rate": 3.525610244097639e-05, "loss": 0.5925, "step": 7379 }, { "epoch": 9.4464, "grad_norm": 0.7994008660316467, "learning_rate": 3.5254101640656264e-05, "loss": 0.5114, "step": 7380 }, { "epoch": 9.44768, "grad_norm": 0.8135007619857788, "learning_rate": 3.5252100840336136e-05, "loss": 0.5546, "step": 7381 }, { "epoch": 9.44896, "grad_norm": 0.8146666288375854, "learning_rate": 3.525010004001601e-05, "loss": 0.5699, "step": 7382 }, { "epoch": 9.45024, "grad_norm": 0.8331019878387451, "learning_rate": 3.524809923969588e-05, "loss": 0.6037, "step": 7383 }, { "epoch": 9.45152, "grad_norm": 0.8365830183029175, "learning_rate": 3.524609843937575e-05, "loss": 0.5718, "step": 7384 }, { "epoch": 9.4528, "grad_norm": 0.8438559770584106, "learning_rate": 3.5244097639055624e-05, "loss": 0.5881, "step": 7385 }, { "epoch": 9.45408, "grad_norm": 0.8334947228431702, "learning_rate": 3.5242096838735495e-05, "loss": 0.5361, "step": 7386 }, { "epoch": 9.45536, "grad_norm": 0.8178591132164001, "learning_rate": 3.524009603841537e-05, "loss": 0.5467, "step": 7387 }, { "epoch": 9.45664, "grad_norm": 0.8228579759597778, "learning_rate": 3.523809523809524e-05, "loss": 0.549, "step": 7388 }, { "epoch": 9.45792, "grad_norm": 0.8397416472434998, "learning_rate": 3.523609443777511e-05, "loss": 0.574, "step": 7389 }, { "epoch": 9.4592, "grad_norm": 0.8239879012107849, "learning_rate": 3.523409363745498e-05, "loss": 0.5415, "step": 7390 }, { "epoch": 9.46048, "grad_norm": 0.8690317273139954, "learning_rate": 3.5232092837134855e-05, "loss": 0.6039, "step": 7391 }, { "epoch": 9.46176, "grad_norm": 0.7893775105476379, "learning_rate": 3.523009203681473e-05, "loss": 0.555, "step": 7392 }, { "epoch": 9.46304, "grad_norm": 0.8786347508430481, "learning_rate": 3.52280912364946e-05, "loss": 0.5882, "step": 7393 }, { "epoch": 9.46432, "grad_norm": 0.8203712105751038, "learning_rate": 3.522609043617447e-05, "loss": 0.5633, "step": 7394 }, { "epoch": 9.4656, "grad_norm": 0.8264157176017761, "learning_rate": 3.522408963585435e-05, "loss": 0.548, "step": 7395 }, { "epoch": 9.46688, "grad_norm": 0.7980100512504578, "learning_rate": 3.5222088835534214e-05, "loss": 0.5698, "step": 7396 }, { "epoch": 9.46816, "grad_norm": 0.8063759803771973, "learning_rate": 3.5220088035214086e-05, "loss": 0.5541, "step": 7397 }, { "epoch": 9.46944, "grad_norm": 0.8606681227684021, "learning_rate": 3.521808723489396e-05, "loss": 0.5892, "step": 7398 }, { "epoch": 9.47072, "grad_norm": 0.8731750249862671, "learning_rate": 3.521608643457383e-05, "loss": 0.6104, "step": 7399 }, { "epoch": 9.472, "grad_norm": 0.8435729742050171, "learning_rate": 3.52140856342537e-05, "loss": 0.5474, "step": 7400 }, { "epoch": 9.47328, "grad_norm": 0.8424503207206726, "learning_rate": 3.521208483393357e-05, "loss": 0.6079, "step": 7401 }, { "epoch": 9.47456, "grad_norm": 0.83389812707901, "learning_rate": 3.521008403361345e-05, "loss": 0.5787, "step": 7402 }, { "epoch": 9.47584, "grad_norm": 0.8542918562889099, "learning_rate": 3.5208083233293324e-05, "loss": 0.5622, "step": 7403 }, { "epoch": 9.47712, "grad_norm": 0.8500902652740479, "learning_rate": 3.520608243297319e-05, "loss": 0.5805, "step": 7404 }, { "epoch": 9.4784, "grad_norm": 0.8857991099357605, "learning_rate": 3.520408163265306e-05, "loss": 0.5924, "step": 7405 }, { "epoch": 9.47968, "grad_norm": 0.8328712582588196, "learning_rate": 3.520208083233293e-05, "loss": 0.5427, "step": 7406 }, { "epoch": 9.48096, "grad_norm": 0.8790886998176575, "learning_rate": 3.5200080032012804e-05, "loss": 0.6283, "step": 7407 }, { "epoch": 9.482240000000001, "grad_norm": 0.8296170234680176, "learning_rate": 3.5198079231692676e-05, "loss": 0.5678, "step": 7408 }, { "epoch": 9.48352, "grad_norm": 0.8877446055412292, "learning_rate": 3.5196078431372555e-05, "loss": 0.5913, "step": 7409 }, { "epoch": 9.4848, "grad_norm": 0.8364286422729492, "learning_rate": 3.519407763105243e-05, "loss": 0.5662, "step": 7410 }, { "epoch": 9.48608, "grad_norm": 0.8386290669441223, "learning_rate": 3.51920768307323e-05, "loss": 0.5586, "step": 7411 }, { "epoch": 9.48736, "grad_norm": 0.8200079798698425, "learning_rate": 3.5190076030412164e-05, "loss": 0.5446, "step": 7412 }, { "epoch": 9.48864, "grad_norm": 0.8406325578689575, "learning_rate": 3.5188075230092036e-05, "loss": 0.5378, "step": 7413 }, { "epoch": 9.48992, "grad_norm": 0.8740529417991638, "learning_rate": 3.518607442977191e-05, "loss": 0.557, "step": 7414 }, { "epoch": 9.4912, "grad_norm": 0.8621671199798584, "learning_rate": 3.518407362945178e-05, "loss": 0.5946, "step": 7415 }, { "epoch": 9.49248, "grad_norm": 0.812969446182251, "learning_rate": 3.518207282913166e-05, "loss": 0.5282, "step": 7416 }, { "epoch": 9.49376, "grad_norm": 0.8162618279457092, "learning_rate": 3.518007202881153e-05, "loss": 0.5539, "step": 7417 }, { "epoch": 9.49504, "grad_norm": 0.879939079284668, "learning_rate": 3.51780712284914e-05, "loss": 0.5933, "step": 7418 }, { "epoch": 9.49632, "grad_norm": 0.8177329897880554, "learning_rate": 3.5176070428171274e-05, "loss": 0.5697, "step": 7419 }, { "epoch": 9.4976, "grad_norm": 0.7988497018814087, "learning_rate": 3.517406962785114e-05, "loss": 0.4975, "step": 7420 }, { "epoch": 9.49888, "grad_norm": 0.8101351261138916, "learning_rate": 3.517206882753101e-05, "loss": 0.5755, "step": 7421 }, { "epoch": 9.50016, "grad_norm": 0.9145906567573547, "learning_rate": 3.517006802721088e-05, "loss": 0.6038, "step": 7422 }, { "epoch": 9.50144, "grad_norm": 0.8204926252365112, "learning_rate": 3.516806722689076e-05, "loss": 0.536, "step": 7423 }, { "epoch": 9.50272, "grad_norm": 0.8429908156394958, "learning_rate": 3.516606642657063e-05, "loss": 0.5637, "step": 7424 }, { "epoch": 9.504, "grad_norm": 0.8640322685241699, "learning_rate": 3.5164065626250505e-05, "loss": 0.6141, "step": 7425 }, { "epoch": 9.505279999999999, "grad_norm": 0.9030240774154663, "learning_rate": 3.5162064825930377e-05, "loss": 0.5929, "step": 7426 }, { "epoch": 9.50656, "grad_norm": 0.8568662405014038, "learning_rate": 3.516006402561025e-05, "loss": 0.5649, "step": 7427 }, { "epoch": 9.50784, "grad_norm": 0.8583548069000244, "learning_rate": 3.5158063225290113e-05, "loss": 0.5824, "step": 7428 }, { "epoch": 9.50912, "grad_norm": 0.8739598989486694, "learning_rate": 3.5156062424969985e-05, "loss": 0.5895, "step": 7429 }, { "epoch": 9.5104, "grad_norm": 0.9026753306388855, "learning_rate": 3.515406162464986e-05, "loss": 0.6056, "step": 7430 }, { "epoch": 9.51168, "grad_norm": 0.8631276488304138, "learning_rate": 3.5152060824329736e-05, "loss": 0.5616, "step": 7431 }, { "epoch": 9.51296, "grad_norm": 0.833394467830658, "learning_rate": 3.515006002400961e-05, "loss": 0.5742, "step": 7432 }, { "epoch": 9.514240000000001, "grad_norm": 0.8503471612930298, "learning_rate": 3.514805922368948e-05, "loss": 0.5484, "step": 7433 }, { "epoch": 9.51552, "grad_norm": 0.8320133686065674, "learning_rate": 3.514605842336935e-05, "loss": 0.5725, "step": 7434 }, { "epoch": 9.5168, "grad_norm": 0.8564555048942566, "learning_rate": 3.514405762304922e-05, "loss": 0.5475, "step": 7435 }, { "epoch": 9.51808, "grad_norm": 0.8562284708023071, "learning_rate": 3.514205682272909e-05, "loss": 0.6121, "step": 7436 }, { "epoch": 9.51936, "grad_norm": 0.8364126682281494, "learning_rate": 3.514005602240896e-05, "loss": 0.57, "step": 7437 }, { "epoch": 9.52064, "grad_norm": 0.8533650636672974, "learning_rate": 3.513805522208884e-05, "loss": 0.6145, "step": 7438 }, { "epoch": 9.52192, "grad_norm": 0.8603209257125854, "learning_rate": 3.513605442176871e-05, "loss": 0.5859, "step": 7439 }, { "epoch": 9.5232, "grad_norm": 0.8335322141647339, "learning_rate": 3.513405362144858e-05, "loss": 0.5862, "step": 7440 }, { "epoch": 9.52448, "grad_norm": 0.848399817943573, "learning_rate": 3.5132052821128454e-05, "loss": 0.572, "step": 7441 }, { "epoch": 9.52576, "grad_norm": 0.7791329026222229, "learning_rate": 3.5130052020808326e-05, "loss": 0.5761, "step": 7442 }, { "epoch": 9.52704, "grad_norm": 0.8725833892822266, "learning_rate": 3.51280512204882e-05, "loss": 0.6316, "step": 7443 }, { "epoch": 9.52832, "grad_norm": 0.8655521869659424, "learning_rate": 3.512605042016806e-05, "loss": 0.5992, "step": 7444 }, { "epoch": 9.5296, "grad_norm": 0.862616777420044, "learning_rate": 3.512404961984794e-05, "loss": 0.5501, "step": 7445 }, { "epoch": 9.53088, "grad_norm": 0.8571741580963135, "learning_rate": 3.5122048819527814e-05, "loss": 0.5958, "step": 7446 }, { "epoch": 9.53216, "grad_norm": 0.8498180508613586, "learning_rate": 3.5120048019207686e-05, "loss": 0.5891, "step": 7447 }, { "epoch": 9.53344, "grad_norm": 0.829552412033081, "learning_rate": 3.511804721888756e-05, "loss": 0.5575, "step": 7448 }, { "epoch": 9.53472, "grad_norm": 0.8693800568580627, "learning_rate": 3.511604641856743e-05, "loss": 0.6149, "step": 7449 }, { "epoch": 9.536, "grad_norm": 0.7874047160148621, "learning_rate": 3.51140456182473e-05, "loss": 0.5286, "step": 7450 }, { "epoch": 9.537279999999999, "grad_norm": 0.8254282474517822, "learning_rate": 3.511204481792717e-05, "loss": 0.581, "step": 7451 }, { "epoch": 9.53856, "grad_norm": 0.8394695520401001, "learning_rate": 3.5110044017607045e-05, "loss": 0.5244, "step": 7452 }, { "epoch": 9.53984, "grad_norm": 0.8637860417366028, "learning_rate": 3.510804321728692e-05, "loss": 0.6142, "step": 7453 }, { "epoch": 9.54112, "grad_norm": 0.8084328770637512, "learning_rate": 3.510604241696679e-05, "loss": 0.529, "step": 7454 }, { "epoch": 9.5424, "grad_norm": 0.8361181616783142, "learning_rate": 3.510404161664666e-05, "loss": 0.5863, "step": 7455 }, { "epoch": 9.54368, "grad_norm": 0.8431722521781921, "learning_rate": 3.510204081632653e-05, "loss": 0.5477, "step": 7456 }, { "epoch": 9.54496, "grad_norm": 0.868948221206665, "learning_rate": 3.5100040016006404e-05, "loss": 0.6217, "step": 7457 }, { "epoch": 9.54624, "grad_norm": 0.8233850002288818, "learning_rate": 3.5098039215686276e-05, "loss": 0.5369, "step": 7458 }, { "epoch": 9.54752, "grad_norm": 0.8040074110031128, "learning_rate": 3.509603841536615e-05, "loss": 0.5127, "step": 7459 }, { "epoch": 9.5488, "grad_norm": 0.8325415849685669, "learning_rate": 3.509403761504602e-05, "loss": 0.5572, "step": 7460 }, { "epoch": 9.55008, "grad_norm": 0.8861798644065857, "learning_rate": 3.509203681472589e-05, "loss": 0.6159, "step": 7461 }, { "epoch": 9.55136, "grad_norm": 0.8803839087486267, "learning_rate": 3.509003601440576e-05, "loss": 0.586, "step": 7462 }, { "epoch": 9.55264, "grad_norm": 0.8698328733444214, "learning_rate": 3.5088035214085635e-05, "loss": 0.57, "step": 7463 }, { "epoch": 9.55392, "grad_norm": 0.8476325273513794, "learning_rate": 3.508603441376551e-05, "loss": 0.5661, "step": 7464 }, { "epoch": 9.5552, "grad_norm": 0.789214015007019, "learning_rate": 3.508403361344538e-05, "loss": 0.5412, "step": 7465 }, { "epoch": 9.55648, "grad_norm": 0.851634681224823, "learning_rate": 3.508203281312525e-05, "loss": 0.5722, "step": 7466 }, { "epoch": 9.55776, "grad_norm": 0.8242769241333008, "learning_rate": 3.508003201280512e-05, "loss": 0.5649, "step": 7467 }, { "epoch": 9.55904, "grad_norm": 0.8163190484046936, "learning_rate": 3.5078031212484995e-05, "loss": 0.5694, "step": 7468 }, { "epoch": 9.56032, "grad_norm": 0.802607536315918, "learning_rate": 3.5076030412164866e-05, "loss": 0.5583, "step": 7469 }, { "epoch": 9.5616, "grad_norm": 0.8320533633232117, "learning_rate": 3.507402961184474e-05, "loss": 0.5443, "step": 7470 }, { "epoch": 9.56288, "grad_norm": 0.8149722218513489, "learning_rate": 3.507202881152461e-05, "loss": 0.5603, "step": 7471 }, { "epoch": 9.56416, "grad_norm": 0.8735612630844116, "learning_rate": 3.507002801120448e-05, "loss": 0.5986, "step": 7472 }, { "epoch": 9.56544, "grad_norm": 0.8785853385925293, "learning_rate": 3.506802721088436e-05, "loss": 0.6058, "step": 7473 }, { "epoch": 9.56672, "grad_norm": 0.8740175366401672, "learning_rate": 3.5066026410564226e-05, "loss": 0.5921, "step": 7474 }, { "epoch": 9.568, "grad_norm": 0.8479311466217041, "learning_rate": 3.50640256102441e-05, "loss": 0.5683, "step": 7475 }, { "epoch": 9.56928, "grad_norm": 0.786037266254425, "learning_rate": 3.506202480992397e-05, "loss": 0.5639, "step": 7476 }, { "epoch": 9.57056, "grad_norm": 0.8478996753692627, "learning_rate": 3.506002400960384e-05, "loss": 0.5613, "step": 7477 }, { "epoch": 9.57184, "grad_norm": 0.8028008341789246, "learning_rate": 3.505802320928371e-05, "loss": 0.4897, "step": 7478 }, { "epoch": 9.57312, "grad_norm": 0.844845175743103, "learning_rate": 3.5056022408963585e-05, "loss": 0.5955, "step": 7479 }, { "epoch": 9.5744, "grad_norm": 0.8595391511917114, "learning_rate": 3.5054021608643464e-05, "loss": 0.5998, "step": 7480 }, { "epoch": 9.57568, "grad_norm": 0.860565185546875, "learning_rate": 3.5052020808323335e-05, "loss": 0.5901, "step": 7481 }, { "epoch": 9.57696, "grad_norm": 0.8473759293556213, "learning_rate": 3.50500200080032e-05, "loss": 0.5797, "step": 7482 }, { "epoch": 9.57824, "grad_norm": 0.8636260032653809, "learning_rate": 3.504801920768307e-05, "loss": 0.5983, "step": 7483 }, { "epoch": 9.57952, "grad_norm": 0.8650203943252563, "learning_rate": 3.5046018407362944e-05, "loss": 0.5942, "step": 7484 }, { "epoch": 9.5808, "grad_norm": 0.9003608226776123, "learning_rate": 3.5044017607042816e-05, "loss": 0.5908, "step": 7485 }, { "epoch": 9.58208, "grad_norm": 0.8681960105895996, "learning_rate": 3.504201680672269e-05, "loss": 0.6542, "step": 7486 }, { "epoch": 9.58336, "grad_norm": 0.8483841419219971, "learning_rate": 3.5040016006402567e-05, "loss": 0.5657, "step": 7487 }, { "epoch": 9.58464, "grad_norm": 0.894481897354126, "learning_rate": 3.503801520608244e-05, "loss": 0.6173, "step": 7488 }, { "epoch": 9.58592, "grad_norm": 0.7843658328056335, "learning_rate": 3.503601440576231e-05, "loss": 0.527, "step": 7489 }, { "epoch": 9.5872, "grad_norm": 0.8325011134147644, "learning_rate": 3.5034013605442175e-05, "loss": 0.5687, "step": 7490 }, { "epoch": 9.58848, "grad_norm": 0.8706384301185608, "learning_rate": 3.503201280512205e-05, "loss": 0.6229, "step": 7491 }, { "epoch": 9.58976, "grad_norm": 0.8439401388168335, "learning_rate": 3.503001200480192e-05, "loss": 0.565, "step": 7492 }, { "epoch": 9.59104, "grad_norm": 0.8194774985313416, "learning_rate": 3.502801120448179e-05, "loss": 0.5327, "step": 7493 }, { "epoch": 9.59232, "grad_norm": 0.8847520351409912, "learning_rate": 3.502601040416167e-05, "loss": 0.6109, "step": 7494 }, { "epoch": 9.5936, "grad_norm": 0.8577123284339905, "learning_rate": 3.502400960384154e-05, "loss": 0.6078, "step": 7495 }, { "epoch": 9.59488, "grad_norm": 0.8352939486503601, "learning_rate": 3.502200880352141e-05, "loss": 0.5655, "step": 7496 }, { "epoch": 9.59616, "grad_norm": 0.8935215473175049, "learning_rate": 3.5020008003201285e-05, "loss": 0.6405, "step": 7497 }, { "epoch": 9.59744, "grad_norm": 0.8329689502716064, "learning_rate": 3.501800720288115e-05, "loss": 0.5859, "step": 7498 }, { "epoch": 9.59872, "grad_norm": 0.8347263932228088, "learning_rate": 3.501600640256102e-05, "loss": 0.5564, "step": 7499 }, { "epoch": 9.6, "grad_norm": 0.8732603192329407, "learning_rate": 3.5014005602240894e-05, "loss": 0.6078, "step": 7500 }, { "epoch": 9.60128, "grad_norm": 0.8412352800369263, "learning_rate": 3.501200480192077e-05, "loss": 0.5728, "step": 7501 }, { "epoch": 9.60256, "grad_norm": 0.8667293787002563, "learning_rate": 3.5010004001600644e-05, "loss": 0.5968, "step": 7502 }, { "epoch": 9.60384, "grad_norm": 0.8245180249214172, "learning_rate": 3.5008003201280516e-05, "loss": 0.5847, "step": 7503 }, { "epoch": 9.60512, "grad_norm": 0.8028945326805115, "learning_rate": 3.500600240096039e-05, "loss": 0.5471, "step": 7504 }, { "epoch": 9.6064, "grad_norm": 0.844236433506012, "learning_rate": 3.500400160064026e-05, "loss": 0.5414, "step": 7505 }, { "epoch": 9.60768, "grad_norm": 0.8215577006340027, "learning_rate": 3.5002000800320125e-05, "loss": 0.5612, "step": 7506 }, { "epoch": 9.60896, "grad_norm": 0.8602308034896851, "learning_rate": 3.5e-05, "loss": 0.6125, "step": 7507 }, { "epoch": 9.61024, "grad_norm": 0.8574222922325134, "learning_rate": 3.4997999199679876e-05, "loss": 0.6139, "step": 7508 }, { "epoch": 9.61152, "grad_norm": 0.8892394304275513, "learning_rate": 3.499599839935975e-05, "loss": 0.6059, "step": 7509 }, { "epoch": 9.6128, "grad_norm": 0.8644453287124634, "learning_rate": 3.499399759903962e-05, "loss": 0.6204, "step": 7510 }, { "epoch": 9.61408, "grad_norm": 0.8173637390136719, "learning_rate": 3.499199679871949e-05, "loss": 0.5308, "step": 7511 }, { "epoch": 9.61536, "grad_norm": 0.8753309845924377, "learning_rate": 3.498999599839936e-05, "loss": 0.6066, "step": 7512 }, { "epoch": 9.61664, "grad_norm": 0.8645505309104919, "learning_rate": 3.4987995198079235e-05, "loss": 0.5772, "step": 7513 }, { "epoch": 9.61792, "grad_norm": 0.8549172878265381, "learning_rate": 3.49859943977591e-05, "loss": 0.5876, "step": 7514 }, { "epoch": 9.6192, "grad_norm": 0.8866432309150696, "learning_rate": 3.498399359743898e-05, "loss": 0.5682, "step": 7515 }, { "epoch": 9.62048, "grad_norm": 0.8328559398651123, "learning_rate": 3.498199279711885e-05, "loss": 0.568, "step": 7516 }, { "epoch": 9.62176, "grad_norm": 0.8284956216812134, "learning_rate": 3.497999199679872e-05, "loss": 0.5592, "step": 7517 }, { "epoch": 9.62304, "grad_norm": 0.8239821195602417, "learning_rate": 3.4977991196478594e-05, "loss": 0.6051, "step": 7518 }, { "epoch": 9.62432, "grad_norm": 0.8453178405761719, "learning_rate": 3.4975990396158466e-05, "loss": 0.5632, "step": 7519 }, { "epoch": 9.6256, "grad_norm": 0.8655393123626709, "learning_rate": 3.497398959583834e-05, "loss": 0.6542, "step": 7520 }, { "epoch": 9.62688, "grad_norm": 0.8494473099708557, "learning_rate": 3.497198879551821e-05, "loss": 0.584, "step": 7521 }, { "epoch": 9.62816, "grad_norm": 0.8517120480537415, "learning_rate": 3.496998799519808e-05, "loss": 0.5515, "step": 7522 }, { "epoch": 9.62944, "grad_norm": 0.8242863416671753, "learning_rate": 3.4967987194877953e-05, "loss": 0.5267, "step": 7523 }, { "epoch": 9.63072, "grad_norm": 0.8850992321968079, "learning_rate": 3.4965986394557825e-05, "loss": 0.6003, "step": 7524 }, { "epoch": 9.632, "grad_norm": 0.8448317050933838, "learning_rate": 3.49639855942377e-05, "loss": 0.6187, "step": 7525 }, { "epoch": 9.63328, "grad_norm": 0.8723029494285583, "learning_rate": 3.496198479391757e-05, "loss": 0.6129, "step": 7526 }, { "epoch": 9.63456, "grad_norm": 0.8665981292724609, "learning_rate": 3.495998399359744e-05, "loss": 0.5949, "step": 7527 }, { "epoch": 9.63584, "grad_norm": 0.8512018322944641, "learning_rate": 3.495798319327731e-05, "loss": 0.5528, "step": 7528 }, { "epoch": 9.63712, "grad_norm": 0.8611388802528381, "learning_rate": 3.4955982392957185e-05, "loss": 0.5936, "step": 7529 }, { "epoch": 9.6384, "grad_norm": 0.865940272808075, "learning_rate": 3.4953981592637056e-05, "loss": 0.6168, "step": 7530 }, { "epoch": 9.63968, "grad_norm": 0.8968199491500854, "learning_rate": 3.495198079231693e-05, "loss": 0.6163, "step": 7531 }, { "epoch": 9.64096, "grad_norm": 0.8725362420082092, "learning_rate": 3.49499799919968e-05, "loss": 0.6209, "step": 7532 }, { "epoch": 9.64224, "grad_norm": 0.8303427696228027, "learning_rate": 3.494797919167667e-05, "loss": 0.5585, "step": 7533 }, { "epoch": 9.64352, "grad_norm": 0.8281154036521912, "learning_rate": 3.4945978391356544e-05, "loss": 0.5885, "step": 7534 }, { "epoch": 9.6448, "grad_norm": 0.8495305776596069, "learning_rate": 3.4943977591036416e-05, "loss": 0.6193, "step": 7535 }, { "epoch": 9.64608, "grad_norm": 0.8367983102798462, "learning_rate": 3.494197679071629e-05, "loss": 0.5942, "step": 7536 }, { "epoch": 9.64736, "grad_norm": 0.8438671231269836, "learning_rate": 3.493997599039616e-05, "loss": 0.5856, "step": 7537 }, { "epoch": 9.64864, "grad_norm": 0.8848779797554016, "learning_rate": 3.493797519007603e-05, "loss": 0.6449, "step": 7538 }, { "epoch": 9.64992, "grad_norm": 0.7978121042251587, "learning_rate": 3.49359743897559e-05, "loss": 0.573, "step": 7539 }, { "epoch": 9.6512, "grad_norm": 0.807237446308136, "learning_rate": 3.4933973589435775e-05, "loss": 0.5753, "step": 7540 }, { "epoch": 9.65248, "grad_norm": 0.7662572264671326, "learning_rate": 3.493197278911565e-05, "loss": 0.5361, "step": 7541 }, { "epoch": 9.65376, "grad_norm": 0.8328047394752502, "learning_rate": 3.492997198879552e-05, "loss": 0.5792, "step": 7542 }, { "epoch": 9.65504, "grad_norm": 0.7806389331817627, "learning_rate": 3.492797118847539e-05, "loss": 0.5287, "step": 7543 }, { "epoch": 9.656320000000001, "grad_norm": 0.8597953915596008, "learning_rate": 3.492597038815526e-05, "loss": 0.5512, "step": 7544 }, { "epoch": 9.6576, "grad_norm": 0.8092467784881592, "learning_rate": 3.4923969587835134e-05, "loss": 0.557, "step": 7545 }, { "epoch": 9.65888, "grad_norm": 0.8465959429740906, "learning_rate": 3.4921968787515006e-05, "loss": 0.5901, "step": 7546 }, { "epoch": 9.66016, "grad_norm": 0.8790938258171082, "learning_rate": 3.491996798719488e-05, "loss": 0.5985, "step": 7547 }, { "epoch": 9.66144, "grad_norm": 0.8606868982315063, "learning_rate": 3.491796718687475e-05, "loss": 0.6082, "step": 7548 }, { "epoch": 9.66272, "grad_norm": 0.8462215065956116, "learning_rate": 3.491596638655462e-05, "loss": 0.6326, "step": 7549 }, { "epoch": 9.664, "grad_norm": 0.8208281993865967, "learning_rate": 3.4913965586234494e-05, "loss": 0.5717, "step": 7550 }, { "epoch": 9.66528, "grad_norm": 0.8237935900688171, "learning_rate": 3.491196478591437e-05, "loss": 0.5861, "step": 7551 }, { "epoch": 9.66656, "grad_norm": 0.8763028979301453, "learning_rate": 3.490996398559424e-05, "loss": 0.5965, "step": 7552 }, { "epoch": 9.66784, "grad_norm": 0.7981544733047485, "learning_rate": 3.490796318527411e-05, "loss": 0.51, "step": 7553 }, { "epoch": 9.66912, "grad_norm": 0.8098160624504089, "learning_rate": 3.490596238495398e-05, "loss": 0.5858, "step": 7554 }, { "epoch": 9.6704, "grad_norm": 0.8530492782592773, "learning_rate": 3.490396158463385e-05, "loss": 0.5727, "step": 7555 }, { "epoch": 9.67168, "grad_norm": 0.8604266047477722, "learning_rate": 3.4901960784313725e-05, "loss": 0.6063, "step": 7556 }, { "epoch": 9.67296, "grad_norm": 0.8214526772499084, "learning_rate": 3.4899959983993597e-05, "loss": 0.549, "step": 7557 }, { "epoch": 9.67424, "grad_norm": 0.7836440205574036, "learning_rate": 3.4897959183673475e-05, "loss": 0.5446, "step": 7558 }, { "epoch": 9.67552, "grad_norm": 0.8219870924949646, "learning_rate": 3.489595838335335e-05, "loss": 0.5766, "step": 7559 }, { "epoch": 9.6768, "grad_norm": 0.8548493385314941, "learning_rate": 3.489395758303321e-05, "loss": 0.5894, "step": 7560 }, { "epoch": 9.67808, "grad_norm": 0.8581690788269043, "learning_rate": 3.4891956782713084e-05, "loss": 0.549, "step": 7561 }, { "epoch": 9.679359999999999, "grad_norm": 0.858836829662323, "learning_rate": 3.4889955982392956e-05, "loss": 0.5783, "step": 7562 }, { "epoch": 9.68064, "grad_norm": 0.840040385723114, "learning_rate": 3.488795518207283e-05, "loss": 0.6081, "step": 7563 }, { "epoch": 9.68192, "grad_norm": 0.8569283485412598, "learning_rate": 3.48859543817527e-05, "loss": 0.61, "step": 7564 }, { "epoch": 9.6832, "grad_norm": 0.843917191028595, "learning_rate": 3.488395358143258e-05, "loss": 0.5821, "step": 7565 }, { "epoch": 9.68448, "grad_norm": 0.8357688784599304, "learning_rate": 3.488195278111245e-05, "loss": 0.5624, "step": 7566 }, { "epoch": 9.68576, "grad_norm": 0.8503354787826538, "learning_rate": 3.487995198079232e-05, "loss": 0.6566, "step": 7567 }, { "epoch": 9.68704, "grad_norm": 0.8254701495170593, "learning_rate": 3.487795118047219e-05, "loss": 0.5381, "step": 7568 }, { "epoch": 9.688320000000001, "grad_norm": 0.899823784828186, "learning_rate": 3.487595038015206e-05, "loss": 0.578, "step": 7569 }, { "epoch": 9.6896, "grad_norm": 0.8426713943481445, "learning_rate": 3.487394957983193e-05, "loss": 0.5818, "step": 7570 }, { "epoch": 9.69088, "grad_norm": 0.846393346786499, "learning_rate": 3.48719487795118e-05, "loss": 0.5821, "step": 7571 }, { "epoch": 9.69216, "grad_norm": 0.8248345255851746, "learning_rate": 3.486994797919168e-05, "loss": 0.5701, "step": 7572 }, { "epoch": 9.69344, "grad_norm": 0.8395203948020935, "learning_rate": 3.486794717887155e-05, "loss": 0.5862, "step": 7573 }, { "epoch": 9.69472, "grad_norm": 0.8293047547340393, "learning_rate": 3.4865946378551425e-05, "loss": 0.5585, "step": 7574 }, { "epoch": 9.696, "grad_norm": 0.848100483417511, "learning_rate": 3.48639455782313e-05, "loss": 0.5687, "step": 7575 }, { "epoch": 9.69728, "grad_norm": 0.8797847032546997, "learning_rate": 3.486194477791116e-05, "loss": 0.6331, "step": 7576 }, { "epoch": 9.69856, "grad_norm": 0.7995453476905823, "learning_rate": 3.4859943977591034e-05, "loss": 0.5394, "step": 7577 }, { "epoch": 9.69984, "grad_norm": 0.8336317539215088, "learning_rate": 3.4857943177270906e-05, "loss": 0.6146, "step": 7578 }, { "epoch": 9.70112, "grad_norm": 0.8199133276939392, "learning_rate": 3.4855942376950784e-05, "loss": 0.596, "step": 7579 }, { "epoch": 9.7024, "grad_norm": 0.8550775647163391, "learning_rate": 3.4853941576630656e-05, "loss": 0.5881, "step": 7580 }, { "epoch": 9.70368, "grad_norm": 0.8737037777900696, "learning_rate": 3.485194077631053e-05, "loss": 0.6125, "step": 7581 }, { "epoch": 9.70496, "grad_norm": 0.8511414527893066, "learning_rate": 3.48499399759904e-05, "loss": 0.5995, "step": 7582 }, { "epoch": 9.70624, "grad_norm": 0.8362718224525452, "learning_rate": 3.484793917567027e-05, "loss": 0.5418, "step": 7583 }, { "epoch": 9.70752, "grad_norm": 0.8965070247650146, "learning_rate": 3.484593837535014e-05, "loss": 0.6493, "step": 7584 }, { "epoch": 9.7088, "grad_norm": 0.8419719338417053, "learning_rate": 3.484393757503001e-05, "loss": 0.5813, "step": 7585 }, { "epoch": 9.71008, "grad_norm": 0.903337299823761, "learning_rate": 3.484193677470989e-05, "loss": 0.6155, "step": 7586 }, { "epoch": 9.711359999999999, "grad_norm": 0.8451673984527588, "learning_rate": 3.483993597438976e-05, "loss": 0.5438, "step": 7587 }, { "epoch": 9.71264, "grad_norm": 0.8772710561752319, "learning_rate": 3.483793517406963e-05, "loss": 0.5555, "step": 7588 }, { "epoch": 9.71392, "grad_norm": 0.8326647877693176, "learning_rate": 3.48359343737495e-05, "loss": 0.6242, "step": 7589 }, { "epoch": 9.7152, "grad_norm": 0.8710974454879761, "learning_rate": 3.4833933573429375e-05, "loss": 0.6274, "step": 7590 }, { "epoch": 9.71648, "grad_norm": 0.8127773404121399, "learning_rate": 3.4831932773109246e-05, "loss": 0.6033, "step": 7591 }, { "epoch": 9.71776, "grad_norm": 0.7887704372406006, "learning_rate": 3.482993197278911e-05, "loss": 0.5284, "step": 7592 }, { "epoch": 9.71904, "grad_norm": 0.8618784546852112, "learning_rate": 3.482793117246899e-05, "loss": 0.6029, "step": 7593 }, { "epoch": 9.72032, "grad_norm": 0.8521010875701904, "learning_rate": 3.482593037214886e-05, "loss": 0.5912, "step": 7594 }, { "epoch": 9.7216, "grad_norm": 0.8535512685775757, "learning_rate": 3.4823929571828734e-05, "loss": 0.5922, "step": 7595 }, { "epoch": 9.72288, "grad_norm": 0.8280578255653381, "learning_rate": 3.4821928771508606e-05, "loss": 0.5867, "step": 7596 }, { "epoch": 9.72416, "grad_norm": 0.7792149782180786, "learning_rate": 3.481992797118848e-05, "loss": 0.5363, "step": 7597 }, { "epoch": 9.72544, "grad_norm": 0.7704604864120483, "learning_rate": 3.481792717086835e-05, "loss": 0.5437, "step": 7598 }, { "epoch": 9.72672, "grad_norm": 0.819395899772644, "learning_rate": 3.481592637054822e-05, "loss": 0.5802, "step": 7599 }, { "epoch": 9.728, "grad_norm": 0.83621746301651, "learning_rate": 3.481392557022809e-05, "loss": 0.579, "step": 7600 }, { "epoch": 9.72928, "grad_norm": 0.8944879770278931, "learning_rate": 3.4811924769907965e-05, "loss": 0.6284, "step": 7601 }, { "epoch": 9.73056, "grad_norm": 0.8720524311065674, "learning_rate": 3.480992396958784e-05, "loss": 0.5917, "step": 7602 }, { "epoch": 9.73184, "grad_norm": 0.877223789691925, "learning_rate": 3.480792316926771e-05, "loss": 0.591, "step": 7603 }, { "epoch": 9.73312, "grad_norm": 0.9013481140136719, "learning_rate": 3.480592236894758e-05, "loss": 0.6077, "step": 7604 }, { "epoch": 9.7344, "grad_norm": 0.8513638973236084, "learning_rate": 3.480392156862745e-05, "loss": 0.5542, "step": 7605 }, { "epoch": 9.73568, "grad_norm": 0.862791121006012, "learning_rate": 3.4801920768307324e-05, "loss": 0.5792, "step": 7606 }, { "epoch": 9.73696, "grad_norm": 0.8830035328865051, "learning_rate": 3.4799919967987196e-05, "loss": 0.5655, "step": 7607 }, { "epoch": 9.73824, "grad_norm": 0.8427029252052307, "learning_rate": 3.479791916766707e-05, "loss": 0.5665, "step": 7608 }, { "epoch": 9.73952, "grad_norm": 0.8503885269165039, "learning_rate": 3.479591836734694e-05, "loss": 0.5891, "step": 7609 }, { "epoch": 9.7408, "grad_norm": 0.8855993151664734, "learning_rate": 3.479391756702681e-05, "loss": 0.603, "step": 7610 }, { "epoch": 9.74208, "grad_norm": 0.8236071467399597, "learning_rate": 3.4791916766706684e-05, "loss": 0.5416, "step": 7611 }, { "epoch": 9.74336, "grad_norm": 0.8632338643074036, "learning_rate": 3.4789915966386555e-05, "loss": 0.572, "step": 7612 }, { "epoch": 9.74464, "grad_norm": 0.8438730239868164, "learning_rate": 3.478791516606643e-05, "loss": 0.5588, "step": 7613 }, { "epoch": 9.74592, "grad_norm": 0.8554427027702332, "learning_rate": 3.4785914365746306e-05, "loss": 0.596, "step": 7614 }, { "epoch": 9.7472, "grad_norm": 0.8517599701881409, "learning_rate": 3.478391356542617e-05, "loss": 0.5597, "step": 7615 }, { "epoch": 9.74848, "grad_norm": 0.8286646604537964, "learning_rate": 3.478191276510604e-05, "loss": 0.5932, "step": 7616 }, { "epoch": 9.74976, "grad_norm": 0.7939943075180054, "learning_rate": 3.4779911964785915e-05, "loss": 0.5925, "step": 7617 }, { "epoch": 9.75104, "grad_norm": 0.8272203803062439, "learning_rate": 3.4777911164465787e-05, "loss": 0.6005, "step": 7618 }, { "epoch": 9.75232, "grad_norm": 0.8649276494979858, "learning_rate": 3.477591036414566e-05, "loss": 0.5847, "step": 7619 }, { "epoch": 9.7536, "grad_norm": 0.7952982783317566, "learning_rate": 3.477390956382553e-05, "loss": 0.5359, "step": 7620 }, { "epoch": 9.75488, "grad_norm": 0.8271358609199524, "learning_rate": 3.477190876350541e-05, "loss": 0.5964, "step": 7621 }, { "epoch": 9.75616, "grad_norm": 0.8045120239257812, "learning_rate": 3.476990796318528e-05, "loss": 0.5676, "step": 7622 }, { "epoch": 9.75744, "grad_norm": 0.7775219082832336, "learning_rate": 3.4767907162865146e-05, "loss": 0.5519, "step": 7623 }, { "epoch": 9.75872, "grad_norm": 0.8189329504966736, "learning_rate": 3.476590636254502e-05, "loss": 0.5834, "step": 7624 }, { "epoch": 9.76, "grad_norm": 0.8617966771125793, "learning_rate": 3.476390556222489e-05, "loss": 0.5863, "step": 7625 }, { "epoch": 9.76128, "grad_norm": 0.8166400790214539, "learning_rate": 3.476190476190476e-05, "loss": 0.5754, "step": 7626 }, { "epoch": 9.76256, "grad_norm": 0.8374921679496765, "learning_rate": 3.475990396158463e-05, "loss": 0.557, "step": 7627 }, { "epoch": 9.76384, "grad_norm": 0.7682600617408752, "learning_rate": 3.475790316126451e-05, "loss": 0.5077, "step": 7628 }, { "epoch": 9.76512, "grad_norm": 0.8810880780220032, "learning_rate": 3.4755902360944384e-05, "loss": 0.6657, "step": 7629 }, { "epoch": 9.7664, "grad_norm": 0.8713139295578003, "learning_rate": 3.4753901560624256e-05, "loss": 0.6198, "step": 7630 }, { "epoch": 9.76768, "grad_norm": 0.858737051486969, "learning_rate": 3.475190076030412e-05, "loss": 0.5409, "step": 7631 }, { "epoch": 9.76896, "grad_norm": 0.7983132004737854, "learning_rate": 3.474989995998399e-05, "loss": 0.5621, "step": 7632 }, { "epoch": 9.77024, "grad_norm": 0.8178649544715881, "learning_rate": 3.4747899159663864e-05, "loss": 0.5632, "step": 7633 }, { "epoch": 9.77152, "grad_norm": 0.8813498020172119, "learning_rate": 3.4745898359343736e-05, "loss": 0.6399, "step": 7634 }, { "epoch": 9.7728, "grad_norm": 0.8513997197151184, "learning_rate": 3.4743897559023615e-05, "loss": 0.5805, "step": 7635 }, { "epoch": 9.77408, "grad_norm": 0.8140328526496887, "learning_rate": 3.474189675870349e-05, "loss": 0.534, "step": 7636 }, { "epoch": 9.77536, "grad_norm": 0.8354536890983582, "learning_rate": 3.473989595838336e-05, "loss": 0.5771, "step": 7637 }, { "epoch": 9.77664, "grad_norm": 0.8029643297195435, "learning_rate": 3.473789515806323e-05, "loss": 0.527, "step": 7638 }, { "epoch": 9.77792, "grad_norm": 0.8642273545265198, "learning_rate": 3.4735894357743096e-05, "loss": 0.591, "step": 7639 }, { "epoch": 9.7792, "grad_norm": 0.8148753643035889, "learning_rate": 3.473389355742297e-05, "loss": 0.5682, "step": 7640 }, { "epoch": 9.78048, "grad_norm": 0.8324840664863586, "learning_rate": 3.473189275710284e-05, "loss": 0.5479, "step": 7641 }, { "epoch": 9.78176, "grad_norm": 0.911037027835846, "learning_rate": 3.472989195678272e-05, "loss": 0.6622, "step": 7642 }, { "epoch": 9.78304, "grad_norm": 0.847952663898468, "learning_rate": 3.472789115646259e-05, "loss": 0.5716, "step": 7643 }, { "epoch": 9.78432, "grad_norm": 0.8314871191978455, "learning_rate": 3.472589035614246e-05, "loss": 0.626, "step": 7644 }, { "epoch": 9.7856, "grad_norm": 0.8641082048416138, "learning_rate": 3.4723889555822333e-05, "loss": 0.5465, "step": 7645 }, { "epoch": 9.78688, "grad_norm": 0.834967851638794, "learning_rate": 3.4721888755502205e-05, "loss": 0.5653, "step": 7646 }, { "epoch": 9.78816, "grad_norm": 0.8525474667549133, "learning_rate": 3.471988795518207e-05, "loss": 0.5715, "step": 7647 }, { "epoch": 9.78944, "grad_norm": 0.8522567749023438, "learning_rate": 3.471788715486194e-05, "loss": 0.5699, "step": 7648 }, { "epoch": 9.79072, "grad_norm": 0.8890289068222046, "learning_rate": 3.471588635454182e-05, "loss": 0.6542, "step": 7649 }, { "epoch": 9.792, "grad_norm": 0.8257919549942017, "learning_rate": 3.471388555422169e-05, "loss": 0.5972, "step": 7650 }, { "epoch": 9.79328, "grad_norm": 0.8120242357254028, "learning_rate": 3.4711884753901565e-05, "loss": 0.5841, "step": 7651 }, { "epoch": 9.79456, "grad_norm": 0.8151296973228455, "learning_rate": 3.4709883953581436e-05, "loss": 0.5583, "step": 7652 }, { "epoch": 9.79584, "grad_norm": 0.8686169981956482, "learning_rate": 3.470788315326131e-05, "loss": 0.5816, "step": 7653 }, { "epoch": 9.79712, "grad_norm": 0.871562123298645, "learning_rate": 3.470588235294118e-05, "loss": 0.5792, "step": 7654 }, { "epoch": 9.7984, "grad_norm": 0.8428971767425537, "learning_rate": 3.4703881552621045e-05, "loss": 0.517, "step": 7655 }, { "epoch": 9.79968, "grad_norm": 0.8845090866088867, "learning_rate": 3.470188075230092e-05, "loss": 0.6303, "step": 7656 }, { "epoch": 9.80096, "grad_norm": 0.8464072942733765, "learning_rate": 3.4699879951980796e-05, "loss": 0.5476, "step": 7657 }, { "epoch": 9.80224, "grad_norm": 0.871966540813446, "learning_rate": 3.469787915166067e-05, "loss": 0.5654, "step": 7658 }, { "epoch": 9.80352, "grad_norm": 0.811346173286438, "learning_rate": 3.469587835134054e-05, "loss": 0.5738, "step": 7659 }, { "epoch": 9.8048, "grad_norm": 0.8650268316268921, "learning_rate": 3.469387755102041e-05, "loss": 0.6266, "step": 7660 }, { "epoch": 9.80608, "grad_norm": 0.7988438010215759, "learning_rate": 3.469187675070028e-05, "loss": 0.5255, "step": 7661 }, { "epoch": 9.80736, "grad_norm": 0.8700106739997864, "learning_rate": 3.4689875950380155e-05, "loss": 0.6087, "step": 7662 }, { "epoch": 9.80864, "grad_norm": 0.8239459991455078, "learning_rate": 3.468787515006002e-05, "loss": 0.5602, "step": 7663 }, { "epoch": 9.80992, "grad_norm": 0.9134540557861328, "learning_rate": 3.46858743497399e-05, "loss": 0.5968, "step": 7664 }, { "epoch": 9.8112, "grad_norm": 0.9253342747688293, "learning_rate": 3.468387354941977e-05, "loss": 0.6128, "step": 7665 }, { "epoch": 9.81248, "grad_norm": 0.7687677145004272, "learning_rate": 3.468187274909964e-05, "loss": 0.5191, "step": 7666 }, { "epoch": 9.81376, "grad_norm": 0.8132002949714661, "learning_rate": 3.4679871948779514e-05, "loss": 0.5736, "step": 7667 }, { "epoch": 9.81504, "grad_norm": 0.8032669425010681, "learning_rate": 3.4677871148459386e-05, "loss": 0.5311, "step": 7668 }, { "epoch": 9.81632, "grad_norm": 0.8530933260917664, "learning_rate": 3.467587034813926e-05, "loss": 0.5699, "step": 7669 }, { "epoch": 9.8176, "grad_norm": 0.8100473880767822, "learning_rate": 3.467386954781913e-05, "loss": 0.5431, "step": 7670 }, { "epoch": 9.81888, "grad_norm": 0.8088340759277344, "learning_rate": 3.4671868747499e-05, "loss": 0.5454, "step": 7671 }, { "epoch": 9.82016, "grad_norm": 0.8359904289245605, "learning_rate": 3.4669867947178874e-05, "loss": 0.567, "step": 7672 }, { "epoch": 9.821439999999999, "grad_norm": 0.8769454956054688, "learning_rate": 3.4667867146858745e-05, "loss": 0.5966, "step": 7673 }, { "epoch": 9.82272, "grad_norm": 0.8174176812171936, "learning_rate": 3.466586634653862e-05, "loss": 0.5452, "step": 7674 }, { "epoch": 9.824, "grad_norm": 0.8501536846160889, "learning_rate": 3.466386554621849e-05, "loss": 0.5929, "step": 7675 }, { "epoch": 9.82528, "grad_norm": 0.8151215314865112, "learning_rate": 3.466186474589836e-05, "loss": 0.552, "step": 7676 }, { "epoch": 9.82656, "grad_norm": 0.8890595436096191, "learning_rate": 3.465986394557823e-05, "loss": 0.6142, "step": 7677 }, { "epoch": 9.82784, "grad_norm": 0.8509846329689026, "learning_rate": 3.4657863145258105e-05, "loss": 0.6358, "step": 7678 }, { "epoch": 9.82912, "grad_norm": 0.8409616947174072, "learning_rate": 3.465586234493798e-05, "loss": 0.5594, "step": 7679 }, { "epoch": 9.830400000000001, "grad_norm": 0.8827511072158813, "learning_rate": 3.465386154461785e-05, "loss": 0.6054, "step": 7680 }, { "epoch": 9.83168, "grad_norm": 0.8123102784156799, "learning_rate": 3.465186074429772e-05, "loss": 0.5833, "step": 7681 }, { "epoch": 9.83296, "grad_norm": 0.8210862874984741, "learning_rate": 3.464985994397759e-05, "loss": 0.6082, "step": 7682 }, { "epoch": 9.83424, "grad_norm": 0.8209624886512756, "learning_rate": 3.4647859143657464e-05, "loss": 0.615, "step": 7683 }, { "epoch": 9.83552, "grad_norm": 0.8852274417877197, "learning_rate": 3.4645858343337336e-05, "loss": 0.6337, "step": 7684 }, { "epoch": 9.8368, "grad_norm": 0.8162413239479065, "learning_rate": 3.464385754301721e-05, "loss": 0.5424, "step": 7685 }, { "epoch": 9.83808, "grad_norm": 0.8327953219413757, "learning_rate": 3.464185674269708e-05, "loss": 0.5759, "step": 7686 }, { "epoch": 9.83936, "grad_norm": 0.8537192940711975, "learning_rate": 3.463985594237695e-05, "loss": 0.5714, "step": 7687 }, { "epoch": 9.84064, "grad_norm": 0.8469899892807007, "learning_rate": 3.463785514205682e-05, "loss": 0.6088, "step": 7688 }, { "epoch": 9.84192, "grad_norm": 0.8121939301490784, "learning_rate": 3.4635854341736695e-05, "loss": 0.5807, "step": 7689 }, { "epoch": 9.8432, "grad_norm": 0.8225154280662537, "learning_rate": 3.463385354141657e-05, "loss": 0.5833, "step": 7690 }, { "epoch": 9.84448, "grad_norm": 0.8836968541145325, "learning_rate": 3.463185274109644e-05, "loss": 0.5574, "step": 7691 }, { "epoch": 9.84576, "grad_norm": 0.8945218920707703, "learning_rate": 3.462985194077632e-05, "loss": 0.6273, "step": 7692 }, { "epoch": 9.84704, "grad_norm": 0.8279548287391663, "learning_rate": 3.462785114045618e-05, "loss": 0.5263, "step": 7693 }, { "epoch": 9.84832, "grad_norm": 0.8554354906082153, "learning_rate": 3.4625850340136054e-05, "loss": 0.5709, "step": 7694 }, { "epoch": 9.8496, "grad_norm": 0.8173097372055054, "learning_rate": 3.4623849539815926e-05, "loss": 0.5876, "step": 7695 }, { "epoch": 9.85088, "grad_norm": 0.8793294429779053, "learning_rate": 3.46218487394958e-05, "loss": 0.627, "step": 7696 }, { "epoch": 9.85216, "grad_norm": 0.7973728775978088, "learning_rate": 3.461984793917567e-05, "loss": 0.5243, "step": 7697 }, { "epoch": 9.853439999999999, "grad_norm": 0.8718621730804443, "learning_rate": 3.461784713885554e-05, "loss": 0.5843, "step": 7698 }, { "epoch": 9.85472, "grad_norm": 0.8487060070037842, "learning_rate": 3.461584633853542e-05, "loss": 0.5407, "step": 7699 }, { "epoch": 9.856, "grad_norm": 0.8882485032081604, "learning_rate": 3.461384553821529e-05, "loss": 0.6204, "step": 7700 }, { "epoch": 9.85728, "grad_norm": 0.8720253705978394, "learning_rate": 3.461184473789516e-05, "loss": 0.5962, "step": 7701 }, { "epoch": 9.85856, "grad_norm": 0.8400206565856934, "learning_rate": 3.460984393757503e-05, "loss": 0.5695, "step": 7702 }, { "epoch": 9.85984, "grad_norm": 0.9012857675552368, "learning_rate": 3.46078431372549e-05, "loss": 0.5909, "step": 7703 }, { "epoch": 9.86112, "grad_norm": 0.8660620450973511, "learning_rate": 3.460584233693477e-05, "loss": 0.5646, "step": 7704 }, { "epoch": 9.862400000000001, "grad_norm": 0.8514848947525024, "learning_rate": 3.4603841536614645e-05, "loss": 0.5869, "step": 7705 }, { "epoch": 9.86368, "grad_norm": 0.8337460160255432, "learning_rate": 3.4601840736294524e-05, "loss": 0.5438, "step": 7706 }, { "epoch": 9.86496, "grad_norm": 0.8334992527961731, "learning_rate": 3.4599839935974395e-05, "loss": 0.5825, "step": 7707 }, { "epoch": 9.86624, "grad_norm": 0.8814105987548828, "learning_rate": 3.459783913565427e-05, "loss": 0.6023, "step": 7708 }, { "epoch": 9.86752, "grad_norm": 0.9168762564659119, "learning_rate": 3.459583833533413e-05, "loss": 0.6323, "step": 7709 }, { "epoch": 9.8688, "grad_norm": 0.8501449227333069, "learning_rate": 3.4593837535014004e-05, "loss": 0.5978, "step": 7710 }, { "epoch": 9.87008, "grad_norm": 0.8778765201568604, "learning_rate": 3.4591836734693876e-05, "loss": 0.5654, "step": 7711 }, { "epoch": 9.87136, "grad_norm": 0.8818456530570984, "learning_rate": 3.458983593437375e-05, "loss": 0.6219, "step": 7712 }, { "epoch": 9.87264, "grad_norm": 0.8822683095932007, "learning_rate": 3.4587835134053627e-05, "loss": 0.6086, "step": 7713 }, { "epoch": 9.87392, "grad_norm": 0.8153172731399536, "learning_rate": 3.45858343337335e-05, "loss": 0.568, "step": 7714 }, { "epoch": 9.8752, "grad_norm": 0.8170477151870728, "learning_rate": 3.458383353341337e-05, "loss": 0.5734, "step": 7715 }, { "epoch": 9.87648, "grad_norm": 0.8518624901771545, "learning_rate": 3.458183273309324e-05, "loss": 0.5697, "step": 7716 }, { "epoch": 9.87776, "grad_norm": 0.8774121999740601, "learning_rate": 3.457983193277311e-05, "loss": 0.5983, "step": 7717 }, { "epoch": 9.87904, "grad_norm": 0.851138710975647, "learning_rate": 3.457783113245298e-05, "loss": 0.6161, "step": 7718 }, { "epoch": 9.88032, "grad_norm": 0.8621139526367188, "learning_rate": 3.457583033213285e-05, "loss": 0.6147, "step": 7719 }, { "epoch": 9.8816, "grad_norm": 0.8349597454071045, "learning_rate": 3.457382953181273e-05, "loss": 0.5587, "step": 7720 }, { "epoch": 9.88288, "grad_norm": 0.8482023477554321, "learning_rate": 3.45718287314926e-05, "loss": 0.5783, "step": 7721 }, { "epoch": 9.88416, "grad_norm": 0.8426772952079773, "learning_rate": 3.456982793117247e-05, "loss": 0.5994, "step": 7722 }, { "epoch": 9.88544, "grad_norm": 0.8138001561164856, "learning_rate": 3.4567827130852345e-05, "loss": 0.5165, "step": 7723 }, { "epoch": 9.88672, "grad_norm": 0.838672399520874, "learning_rate": 3.456582633053222e-05, "loss": 0.5827, "step": 7724 }, { "epoch": 9.888, "grad_norm": 0.8754329085350037, "learning_rate": 3.456382553021208e-05, "loss": 0.5673, "step": 7725 }, { "epoch": 9.88928, "grad_norm": 0.856153130531311, "learning_rate": 3.4561824729891954e-05, "loss": 0.5857, "step": 7726 }, { "epoch": 9.89056, "grad_norm": 0.8428956866264343, "learning_rate": 3.455982392957183e-05, "loss": 0.5538, "step": 7727 }, { "epoch": 9.89184, "grad_norm": 0.8644785284996033, "learning_rate": 3.4557823129251704e-05, "loss": 0.5754, "step": 7728 }, { "epoch": 9.89312, "grad_norm": 0.8557632565498352, "learning_rate": 3.4555822328931576e-05, "loss": 0.5598, "step": 7729 }, { "epoch": 9.8944, "grad_norm": 0.84462970495224, "learning_rate": 3.455382152861145e-05, "loss": 0.59, "step": 7730 }, { "epoch": 9.89568, "grad_norm": 0.8369764089584351, "learning_rate": 3.455182072829132e-05, "loss": 0.6021, "step": 7731 }, { "epoch": 9.89696, "grad_norm": 0.8819150328636169, "learning_rate": 3.454981992797119e-05, "loss": 0.6122, "step": 7732 }, { "epoch": 9.89824, "grad_norm": 0.8815730214118958, "learning_rate": 3.454781912765106e-05, "loss": 0.6513, "step": 7733 }, { "epoch": 9.89952, "grad_norm": 0.8620604276657104, "learning_rate": 3.4545818327330936e-05, "loss": 0.5683, "step": 7734 }, { "epoch": 9.9008, "grad_norm": 0.8745002746582031, "learning_rate": 3.454381752701081e-05, "loss": 0.5996, "step": 7735 }, { "epoch": 9.90208, "grad_norm": 0.8810462355613708, "learning_rate": 3.454181672669068e-05, "loss": 0.6145, "step": 7736 }, { "epoch": 9.90336, "grad_norm": 0.8663910031318665, "learning_rate": 3.453981592637055e-05, "loss": 0.5592, "step": 7737 }, { "epoch": 9.90464, "grad_norm": 0.8494516611099243, "learning_rate": 3.453781512605042e-05, "loss": 0.5281, "step": 7738 }, { "epoch": 9.90592, "grad_norm": 0.8416398763656616, "learning_rate": 3.4535814325730295e-05, "loss": 0.5736, "step": 7739 }, { "epoch": 9.9072, "grad_norm": 0.7921009659767151, "learning_rate": 3.453381352541017e-05, "loss": 0.5327, "step": 7740 }, { "epoch": 9.90848, "grad_norm": 0.8950327038764954, "learning_rate": 3.453181272509004e-05, "loss": 0.6223, "step": 7741 }, { "epoch": 9.90976, "grad_norm": 0.8658446669578552, "learning_rate": 3.452981192476991e-05, "loss": 0.6088, "step": 7742 }, { "epoch": 9.91104, "grad_norm": 0.8409397006034851, "learning_rate": 3.452781112444978e-05, "loss": 0.6228, "step": 7743 }, { "epoch": 9.91232, "grad_norm": 0.873226523399353, "learning_rate": 3.4525810324129654e-05, "loss": 0.6051, "step": 7744 }, { "epoch": 9.9136, "grad_norm": 0.8407812714576721, "learning_rate": 3.4523809523809526e-05, "loss": 0.5404, "step": 7745 }, { "epoch": 9.91488, "grad_norm": 0.8529120087623596, "learning_rate": 3.45218087234894e-05, "loss": 0.6056, "step": 7746 }, { "epoch": 9.91616, "grad_norm": 0.88858562707901, "learning_rate": 3.451980792316927e-05, "loss": 0.5821, "step": 7747 }, { "epoch": 9.91744, "grad_norm": 0.8706713318824768, "learning_rate": 3.451780712284914e-05, "loss": 0.6058, "step": 7748 }, { "epoch": 9.91872, "grad_norm": 0.8645205497741699, "learning_rate": 3.451580632252901e-05, "loss": 0.634, "step": 7749 }, { "epoch": 9.92, "grad_norm": 0.9036076068878174, "learning_rate": 3.4513805522208885e-05, "loss": 0.6029, "step": 7750 }, { "epoch": 9.92128, "grad_norm": 0.8720131516456604, "learning_rate": 3.451180472188876e-05, "loss": 0.587, "step": 7751 }, { "epoch": 9.92256, "grad_norm": 0.8317989110946655, "learning_rate": 3.450980392156863e-05, "loss": 0.5854, "step": 7752 }, { "epoch": 9.92384, "grad_norm": 0.8542910814285278, "learning_rate": 3.45078031212485e-05, "loss": 0.6074, "step": 7753 }, { "epoch": 9.92512, "grad_norm": 0.858959972858429, "learning_rate": 3.450580232092837e-05, "loss": 0.5892, "step": 7754 }, { "epoch": 9.9264, "grad_norm": 0.8159505128860474, "learning_rate": 3.4503801520608245e-05, "loss": 0.5781, "step": 7755 }, { "epoch": 9.92768, "grad_norm": 0.810766339302063, "learning_rate": 3.4501800720288116e-05, "loss": 0.5439, "step": 7756 }, { "epoch": 9.92896, "grad_norm": 0.8826687335968018, "learning_rate": 3.449979991996799e-05, "loss": 0.6347, "step": 7757 }, { "epoch": 9.93024, "grad_norm": 0.8329678773880005, "learning_rate": 3.449779911964786e-05, "loss": 0.5471, "step": 7758 }, { "epoch": 9.93152, "grad_norm": 0.8527499437332153, "learning_rate": 3.449579831932773e-05, "loss": 0.5741, "step": 7759 }, { "epoch": 9.9328, "grad_norm": 0.8892127871513367, "learning_rate": 3.4493797519007604e-05, "loss": 0.6161, "step": 7760 }, { "epoch": 9.93408, "grad_norm": 0.8425252437591553, "learning_rate": 3.4491796718687476e-05, "loss": 0.6091, "step": 7761 }, { "epoch": 9.93536, "grad_norm": 0.8560076355934143, "learning_rate": 3.4489795918367354e-05, "loss": 0.6142, "step": 7762 }, { "epoch": 9.93664, "grad_norm": 0.8586561679840088, "learning_rate": 3.448779511804722e-05, "loss": 0.6231, "step": 7763 }, { "epoch": 9.93792, "grad_norm": 0.8342392444610596, "learning_rate": 3.448579431772709e-05, "loss": 0.546, "step": 7764 }, { "epoch": 9.9392, "grad_norm": 0.788269579410553, "learning_rate": 3.448379351740696e-05, "loss": 0.5268, "step": 7765 }, { "epoch": 9.94048, "grad_norm": 0.8121354579925537, "learning_rate": 3.4481792717086835e-05, "loss": 0.5221, "step": 7766 }, { "epoch": 9.94176, "grad_norm": 0.8493075966835022, "learning_rate": 3.447979191676671e-05, "loss": 0.6281, "step": 7767 }, { "epoch": 9.94304, "grad_norm": 0.8820042014122009, "learning_rate": 3.447779111644658e-05, "loss": 0.5822, "step": 7768 }, { "epoch": 9.94432, "grad_norm": 0.8810234665870667, "learning_rate": 3.447579031612645e-05, "loss": 0.5933, "step": 7769 }, { "epoch": 9.9456, "grad_norm": 0.8822959065437317, "learning_rate": 3.447378951580633e-05, "loss": 0.6068, "step": 7770 }, { "epoch": 9.94688, "grad_norm": 0.8374966979026794, "learning_rate": 3.4471788715486194e-05, "loss": 0.5737, "step": 7771 }, { "epoch": 9.94816, "grad_norm": 0.8867483139038086, "learning_rate": 3.4469787915166066e-05, "loss": 0.5899, "step": 7772 }, { "epoch": 9.94944, "grad_norm": 0.8893982172012329, "learning_rate": 3.446778711484594e-05, "loss": 0.5771, "step": 7773 }, { "epoch": 9.95072, "grad_norm": 0.8742989897727966, "learning_rate": 3.446578631452581e-05, "loss": 0.5844, "step": 7774 }, { "epoch": 9.952, "grad_norm": 0.8614414930343628, "learning_rate": 3.446378551420568e-05, "loss": 0.5596, "step": 7775 }, { "epoch": 9.95328, "grad_norm": 0.8370686173439026, "learning_rate": 3.4461784713885554e-05, "loss": 0.5797, "step": 7776 }, { "epoch": 9.95456, "grad_norm": 0.8885643482208252, "learning_rate": 3.445978391356543e-05, "loss": 0.5866, "step": 7777 }, { "epoch": 9.95584, "grad_norm": 0.8820523619651794, "learning_rate": 3.4457783113245304e-05, "loss": 0.5999, "step": 7778 }, { "epoch": 9.95712, "grad_norm": 0.8967301249504089, "learning_rate": 3.445578231292517e-05, "loss": 0.6284, "step": 7779 }, { "epoch": 9.9584, "grad_norm": 0.8450223207473755, "learning_rate": 3.445378151260504e-05, "loss": 0.5917, "step": 7780 }, { "epoch": 9.95968, "grad_norm": 0.8768996596336365, "learning_rate": 3.445178071228491e-05, "loss": 0.5879, "step": 7781 }, { "epoch": 9.96096, "grad_norm": 0.876092255115509, "learning_rate": 3.4449779911964785e-05, "loss": 0.609, "step": 7782 }, { "epoch": 9.96224, "grad_norm": 0.799235463142395, "learning_rate": 3.4447779111644657e-05, "loss": 0.5266, "step": 7783 }, { "epoch": 9.96352, "grad_norm": 0.8335480690002441, "learning_rate": 3.4445778311324535e-05, "loss": 0.579, "step": 7784 }, { "epoch": 9.9648, "grad_norm": 0.8067023158073425, "learning_rate": 3.444377751100441e-05, "loss": 0.5569, "step": 7785 }, { "epoch": 9.96608, "grad_norm": 0.8609577417373657, "learning_rate": 3.444177671068428e-05, "loss": 0.614, "step": 7786 }, { "epoch": 9.96736, "grad_norm": 0.8286531567573547, "learning_rate": 3.4439775910364144e-05, "loss": 0.582, "step": 7787 }, { "epoch": 9.96864, "grad_norm": 0.8231421113014221, "learning_rate": 3.4437775110044016e-05, "loss": 0.5541, "step": 7788 }, { "epoch": 9.96992, "grad_norm": 0.8715162873268127, "learning_rate": 3.443577430972389e-05, "loss": 0.6195, "step": 7789 }, { "epoch": 9.9712, "grad_norm": 0.9087401032447815, "learning_rate": 3.443377350940376e-05, "loss": 0.593, "step": 7790 }, { "epoch": 9.972480000000001, "grad_norm": 0.8596914410591125, "learning_rate": 3.443177270908364e-05, "loss": 0.5851, "step": 7791 }, { "epoch": 9.97376, "grad_norm": 0.8268222808837891, "learning_rate": 3.442977190876351e-05, "loss": 0.5725, "step": 7792 }, { "epoch": 9.97504, "grad_norm": 0.8293636441230774, "learning_rate": 3.442777110844338e-05, "loss": 0.5897, "step": 7793 }, { "epoch": 9.97632, "grad_norm": 0.8125051259994507, "learning_rate": 3.4425770308123254e-05, "loss": 0.5753, "step": 7794 }, { "epoch": 9.9776, "grad_norm": 0.8295629024505615, "learning_rate": 3.442376950780312e-05, "loss": 0.5983, "step": 7795 }, { "epoch": 9.97888, "grad_norm": 0.828607439994812, "learning_rate": 3.442176870748299e-05, "loss": 0.6585, "step": 7796 }, { "epoch": 9.98016, "grad_norm": 0.8404040932655334, "learning_rate": 3.441976790716286e-05, "loss": 0.5979, "step": 7797 }, { "epoch": 9.98144, "grad_norm": 0.8014954924583435, "learning_rate": 3.441776710684274e-05, "loss": 0.5512, "step": 7798 }, { "epoch": 9.98272, "grad_norm": 0.8161661028862, "learning_rate": 3.441576630652261e-05, "loss": 0.5654, "step": 7799 }, { "epoch": 9.984, "grad_norm": 0.8457411527633667, "learning_rate": 3.4413765506202485e-05, "loss": 0.6289, "step": 7800 }, { "epoch": 9.98528, "grad_norm": 0.8675429224967957, "learning_rate": 3.441176470588236e-05, "loss": 0.5657, "step": 7801 }, { "epoch": 9.98656, "grad_norm": 0.8058546185493469, "learning_rate": 3.440976390556223e-05, "loss": 0.5645, "step": 7802 }, { "epoch": 9.98784, "grad_norm": 0.7946271896362305, "learning_rate": 3.4407763105242094e-05, "loss": 0.5822, "step": 7803 }, { "epoch": 9.98912, "grad_norm": 0.7849445939064026, "learning_rate": 3.4405762304921965e-05, "loss": 0.5638, "step": 7804 }, { "epoch": 9.9904, "grad_norm": 0.8515406847000122, "learning_rate": 3.4403761504601844e-05, "loss": 0.5734, "step": 7805 }, { "epoch": 9.99168, "grad_norm": 0.8528926968574524, "learning_rate": 3.4401760704281716e-05, "loss": 0.5722, "step": 7806 }, { "epoch": 9.99296, "grad_norm": 0.851125180721283, "learning_rate": 3.439975990396159e-05, "loss": 0.5603, "step": 7807 }, { "epoch": 9.99424, "grad_norm": 0.8529646396636963, "learning_rate": 3.439775910364146e-05, "loss": 0.6152, "step": 7808 }, { "epoch": 9.995519999999999, "grad_norm": 0.8701699376106262, "learning_rate": 3.439575830332133e-05, "loss": 0.6082, "step": 7809 }, { "epoch": 9.9968, "grad_norm": 0.875981330871582, "learning_rate": 3.4393757503001203e-05, "loss": 0.5978, "step": 7810 }, { "epoch": 9.99808, "grad_norm": 0.8291494250297546, "learning_rate": 3.439175670268107e-05, "loss": 0.5422, "step": 7811 }, { "epoch": 9.99936, "grad_norm": 0.9472531080245972, "learning_rate": 3.438975590236095e-05, "loss": 0.6558, "step": 7812 }, { "epoch": 10.00064, "grad_norm": 1.5498021841049194, "learning_rate": 3.438775510204082e-05, "loss": 0.8758, "step": 7813 }, { "epoch": 10.00192, "grad_norm": 0.8499518632888794, "learning_rate": 3.438575430172069e-05, "loss": 0.5858, "step": 7814 }, { "epoch": 10.0032, "grad_norm": 0.7815194129943848, "learning_rate": 3.438375350140056e-05, "loss": 0.5487, "step": 7815 }, { "epoch": 10.00448, "grad_norm": 0.8824237585067749, "learning_rate": 3.4381752701080435e-05, "loss": 0.65, "step": 7816 }, { "epoch": 10.00576, "grad_norm": 0.7906972169876099, "learning_rate": 3.4379751900760306e-05, "loss": 0.5486, "step": 7817 }, { "epoch": 10.00704, "grad_norm": 0.8036819100379944, "learning_rate": 3.437775110044018e-05, "loss": 0.5068, "step": 7818 }, { "epoch": 10.00832, "grad_norm": 0.7495970129966736, "learning_rate": 3.437575030012005e-05, "loss": 0.5221, "step": 7819 }, { "epoch": 10.0096, "grad_norm": 0.8430529832839966, "learning_rate": 3.437374949979992e-05, "loss": 0.5377, "step": 7820 }, { "epoch": 10.01088, "grad_norm": 0.8672014474868774, "learning_rate": 3.4371748699479794e-05, "loss": 0.5683, "step": 7821 }, { "epoch": 10.01216, "grad_norm": 0.904216468334198, "learning_rate": 3.4369747899159666e-05, "loss": 0.6345, "step": 7822 }, { "epoch": 10.01344, "grad_norm": 0.9090774655342102, "learning_rate": 3.436774709883954e-05, "loss": 0.6341, "step": 7823 }, { "epoch": 10.01472, "grad_norm": 0.857877790927887, "learning_rate": 3.436574629851941e-05, "loss": 0.5991, "step": 7824 }, { "epoch": 10.016, "grad_norm": 0.8711366057395935, "learning_rate": 3.436374549819928e-05, "loss": 0.5882, "step": 7825 }, { "epoch": 10.01728, "grad_norm": 0.8916945457458496, "learning_rate": 3.436174469787915e-05, "loss": 0.5846, "step": 7826 }, { "epoch": 10.01856, "grad_norm": 0.8220417499542236, "learning_rate": 3.4359743897559025e-05, "loss": 0.5259, "step": 7827 }, { "epoch": 10.01984, "grad_norm": 0.8267784118652344, "learning_rate": 3.43577430972389e-05, "loss": 0.5125, "step": 7828 }, { "epoch": 10.02112, "grad_norm": 0.8478044271469116, "learning_rate": 3.435574229691877e-05, "loss": 0.564, "step": 7829 }, { "epoch": 10.0224, "grad_norm": 0.8476915955543518, "learning_rate": 3.435374149659864e-05, "loss": 0.514, "step": 7830 }, { "epoch": 10.02368, "grad_norm": 0.8940284252166748, "learning_rate": 3.435174069627851e-05, "loss": 0.6042, "step": 7831 }, { "epoch": 10.02496, "grad_norm": 0.8573498129844666, "learning_rate": 3.4349739895958384e-05, "loss": 0.5649, "step": 7832 }, { "epoch": 10.02624, "grad_norm": 0.8762027621269226, "learning_rate": 3.4347739095638256e-05, "loss": 0.5969, "step": 7833 }, { "epoch": 10.02752, "grad_norm": 0.9091726541519165, "learning_rate": 3.434573829531813e-05, "loss": 0.5614, "step": 7834 }, { "epoch": 10.0288, "grad_norm": 0.8687475919723511, "learning_rate": 3.4343737494998e-05, "loss": 0.5659, "step": 7835 }, { "epoch": 10.03008, "grad_norm": 0.8800902366638184, "learning_rate": 3.434173669467787e-05, "loss": 0.5798, "step": 7836 }, { "epoch": 10.03136, "grad_norm": 0.8568121194839478, "learning_rate": 3.4339735894357744e-05, "loss": 0.5399, "step": 7837 }, { "epoch": 10.03264, "grad_norm": 0.8370882272720337, "learning_rate": 3.4337735094037615e-05, "loss": 0.5406, "step": 7838 }, { "epoch": 10.03392, "grad_norm": 0.8754110932350159, "learning_rate": 3.433573429371749e-05, "loss": 0.5842, "step": 7839 }, { "epoch": 10.0352, "grad_norm": 0.8230734467506409, "learning_rate": 3.4333733493397366e-05, "loss": 0.529, "step": 7840 }, { "epoch": 10.03648, "grad_norm": 0.8830897212028503, "learning_rate": 3.433173269307723e-05, "loss": 0.5796, "step": 7841 }, { "epoch": 10.03776, "grad_norm": 0.8082362413406372, "learning_rate": 3.43297318927571e-05, "loss": 0.5195, "step": 7842 }, { "epoch": 10.03904, "grad_norm": 0.8439376354217529, "learning_rate": 3.4327731092436975e-05, "loss": 0.5555, "step": 7843 }, { "epoch": 10.04032, "grad_norm": 0.8426663279533386, "learning_rate": 3.4325730292116847e-05, "loss": 0.5987, "step": 7844 }, { "epoch": 10.0416, "grad_norm": 0.8852523565292358, "learning_rate": 3.432372949179672e-05, "loss": 0.5595, "step": 7845 }, { "epoch": 10.04288, "grad_norm": 0.83525151014328, "learning_rate": 3.432172869147659e-05, "loss": 0.5076, "step": 7846 }, { "epoch": 10.04416, "grad_norm": 0.879931628704071, "learning_rate": 3.431972789115647e-05, "loss": 0.56, "step": 7847 }, { "epoch": 10.04544, "grad_norm": 0.8788473606109619, "learning_rate": 3.431772709083634e-05, "loss": 0.6005, "step": 7848 }, { "epoch": 10.04672, "grad_norm": 0.8380979895591736, "learning_rate": 3.4315726290516206e-05, "loss": 0.5227, "step": 7849 }, { "epoch": 10.048, "grad_norm": 0.8248594999313354, "learning_rate": 3.431372549019608e-05, "loss": 0.5403, "step": 7850 }, { "epoch": 10.04928, "grad_norm": 0.8815642595291138, "learning_rate": 3.431172468987595e-05, "loss": 0.5672, "step": 7851 }, { "epoch": 10.05056, "grad_norm": 0.8581164479255676, "learning_rate": 3.430972388955582e-05, "loss": 0.5726, "step": 7852 }, { "epoch": 10.05184, "grad_norm": 0.9075201749801636, "learning_rate": 3.430772308923569e-05, "loss": 0.6214, "step": 7853 }, { "epoch": 10.05312, "grad_norm": 0.918493390083313, "learning_rate": 3.430572228891557e-05, "loss": 0.591, "step": 7854 }, { "epoch": 10.0544, "grad_norm": 0.9461652040481567, "learning_rate": 3.4303721488595444e-05, "loss": 0.641, "step": 7855 }, { "epoch": 10.05568, "grad_norm": 0.9013059735298157, "learning_rate": 3.4301720688275316e-05, "loss": 0.548, "step": 7856 }, { "epoch": 10.05696, "grad_norm": 0.8635231256484985, "learning_rate": 3.429971988795518e-05, "loss": 0.5381, "step": 7857 }, { "epoch": 10.05824, "grad_norm": 0.8652742505073547, "learning_rate": 3.429771908763505e-05, "loss": 0.526, "step": 7858 }, { "epoch": 10.05952, "grad_norm": 0.8748012781143188, "learning_rate": 3.4295718287314924e-05, "loss": 0.5851, "step": 7859 }, { "epoch": 10.0608, "grad_norm": 0.8530634641647339, "learning_rate": 3.4293717486994796e-05, "loss": 0.5469, "step": 7860 }, { "epoch": 10.06208, "grad_norm": 0.8816656470298767, "learning_rate": 3.4291716686674675e-05, "loss": 0.5817, "step": 7861 }, { "epoch": 10.06336, "grad_norm": 0.8042013049125671, "learning_rate": 3.428971588635455e-05, "loss": 0.5568, "step": 7862 }, { "epoch": 10.06464, "grad_norm": 0.8870890736579895, "learning_rate": 3.428771508603442e-05, "loss": 0.5667, "step": 7863 }, { "epoch": 10.06592, "grad_norm": 0.9021799564361572, "learning_rate": 3.428571428571429e-05, "loss": 0.5946, "step": 7864 }, { "epoch": 10.0672, "grad_norm": 0.8616315126419067, "learning_rate": 3.4283713485394156e-05, "loss": 0.5375, "step": 7865 }, { "epoch": 10.06848, "grad_norm": 0.8459718823432922, "learning_rate": 3.428171268507403e-05, "loss": 0.5569, "step": 7866 }, { "epoch": 10.06976, "grad_norm": 0.8223601579666138, "learning_rate": 3.42797118847539e-05, "loss": 0.5202, "step": 7867 }, { "epoch": 10.07104, "grad_norm": 0.8322831392288208, "learning_rate": 3.427771108443378e-05, "loss": 0.5596, "step": 7868 }, { "epoch": 10.07232, "grad_norm": 0.8532194495201111, "learning_rate": 3.427571028411365e-05, "loss": 0.562, "step": 7869 }, { "epoch": 10.0736, "grad_norm": 0.8365527391433716, "learning_rate": 3.427370948379352e-05, "loss": 0.5869, "step": 7870 }, { "epoch": 10.07488, "grad_norm": 0.8544728755950928, "learning_rate": 3.4271708683473393e-05, "loss": 0.562, "step": 7871 }, { "epoch": 10.07616, "grad_norm": 0.8916680216789246, "learning_rate": 3.4269707883153265e-05, "loss": 0.6106, "step": 7872 }, { "epoch": 10.07744, "grad_norm": 0.904686450958252, "learning_rate": 3.426770708283313e-05, "loss": 0.5728, "step": 7873 }, { "epoch": 10.07872, "grad_norm": 0.9149865508079529, "learning_rate": 3.4265706282513e-05, "loss": 0.5738, "step": 7874 }, { "epoch": 10.08, "grad_norm": 0.905720591545105, "learning_rate": 3.426370548219288e-05, "loss": 0.5414, "step": 7875 }, { "epoch": 10.08128, "grad_norm": 0.9203237295150757, "learning_rate": 3.426170468187275e-05, "loss": 0.5853, "step": 7876 }, { "epoch": 10.08256, "grad_norm": 0.8600616455078125, "learning_rate": 3.4259703881552625e-05, "loss": 0.5506, "step": 7877 }, { "epoch": 10.08384, "grad_norm": 0.8730953335762024, "learning_rate": 3.4257703081232496e-05, "loss": 0.5553, "step": 7878 }, { "epoch": 10.08512, "grad_norm": 0.8567367792129517, "learning_rate": 3.425570228091237e-05, "loss": 0.5875, "step": 7879 }, { "epoch": 10.0864, "grad_norm": 0.8998702168464661, "learning_rate": 3.425370148059224e-05, "loss": 0.6049, "step": 7880 }, { "epoch": 10.08768, "grad_norm": 0.88094162940979, "learning_rate": 3.4251700680272105e-05, "loss": 0.5715, "step": 7881 }, { "epoch": 10.08896, "grad_norm": 0.8907934427261353, "learning_rate": 3.424969987995198e-05, "loss": 0.5553, "step": 7882 }, { "epoch": 10.09024, "grad_norm": 0.809238076210022, "learning_rate": 3.4247699079631856e-05, "loss": 0.5389, "step": 7883 }, { "epoch": 10.09152, "grad_norm": 0.8654890060424805, "learning_rate": 3.424569827931173e-05, "loss": 0.5753, "step": 7884 }, { "epoch": 10.0928, "grad_norm": 0.8068164587020874, "learning_rate": 3.42436974789916e-05, "loss": 0.5082, "step": 7885 }, { "epoch": 10.09408, "grad_norm": 0.8308097124099731, "learning_rate": 3.424169667867147e-05, "loss": 0.5334, "step": 7886 }, { "epoch": 10.09536, "grad_norm": 0.889793336391449, "learning_rate": 3.423969587835134e-05, "loss": 0.5774, "step": 7887 }, { "epoch": 10.09664, "grad_norm": 0.9080507159233093, "learning_rate": 3.4237695078031215e-05, "loss": 0.5919, "step": 7888 }, { "epoch": 10.09792, "grad_norm": 0.8671941757202148, "learning_rate": 3.423569427771108e-05, "loss": 0.5619, "step": 7889 }, { "epoch": 10.0992, "grad_norm": 0.8323997259140015, "learning_rate": 3.423369347739096e-05, "loss": 0.4927, "step": 7890 }, { "epoch": 10.10048, "grad_norm": 0.8521079421043396, "learning_rate": 3.423169267707083e-05, "loss": 0.5689, "step": 7891 }, { "epoch": 10.10176, "grad_norm": 0.8631630539894104, "learning_rate": 3.42296918767507e-05, "loss": 0.5655, "step": 7892 }, { "epoch": 10.10304, "grad_norm": 0.8886538147926331, "learning_rate": 3.4227691076430574e-05, "loss": 0.5851, "step": 7893 }, { "epoch": 10.10432, "grad_norm": 0.8930172920227051, "learning_rate": 3.4225690276110446e-05, "loss": 0.6251, "step": 7894 }, { "epoch": 10.1056, "grad_norm": 0.8463165760040283, "learning_rate": 3.422368947579032e-05, "loss": 0.4898, "step": 7895 }, { "epoch": 10.10688, "grad_norm": 0.893409252166748, "learning_rate": 3.422168867547019e-05, "loss": 0.5761, "step": 7896 }, { "epoch": 10.10816, "grad_norm": 0.8559973239898682, "learning_rate": 3.421968787515006e-05, "loss": 0.5351, "step": 7897 }, { "epoch": 10.10944, "grad_norm": 0.8258970379829407, "learning_rate": 3.4217687074829934e-05, "loss": 0.5773, "step": 7898 }, { "epoch": 10.11072, "grad_norm": 0.8773029446601868, "learning_rate": 3.4215686274509805e-05, "loss": 0.547, "step": 7899 }, { "epoch": 10.112, "grad_norm": 0.8930992484092712, "learning_rate": 3.421368547418968e-05, "loss": 0.6053, "step": 7900 }, { "epoch": 10.11328, "grad_norm": 0.872535228729248, "learning_rate": 3.421168467386955e-05, "loss": 0.5813, "step": 7901 }, { "epoch": 10.11456, "grad_norm": 0.8744470477104187, "learning_rate": 3.420968387354942e-05, "loss": 0.5333, "step": 7902 }, { "epoch": 10.11584, "grad_norm": 0.8636131882667542, "learning_rate": 3.420768307322929e-05, "loss": 0.5746, "step": 7903 }, { "epoch": 10.11712, "grad_norm": 0.865611732006073, "learning_rate": 3.4205682272909165e-05, "loss": 0.561, "step": 7904 }, { "epoch": 10.1184, "grad_norm": 0.8732993006706238, "learning_rate": 3.4203681472589037e-05, "loss": 0.6067, "step": 7905 }, { "epoch": 10.11968, "grad_norm": 0.8363077044487, "learning_rate": 3.420168067226891e-05, "loss": 0.5354, "step": 7906 }, { "epoch": 10.12096, "grad_norm": 0.9122660756111145, "learning_rate": 3.419967987194878e-05, "loss": 0.6047, "step": 7907 }, { "epoch": 10.12224, "grad_norm": 0.8874605298042297, "learning_rate": 3.419767907162865e-05, "loss": 0.6021, "step": 7908 }, { "epoch": 10.12352, "grad_norm": 0.9380055665969849, "learning_rate": 3.4195678271308524e-05, "loss": 0.6033, "step": 7909 }, { "epoch": 10.1248, "grad_norm": 0.8816724419593811, "learning_rate": 3.4193677470988396e-05, "loss": 0.5775, "step": 7910 }, { "epoch": 10.12608, "grad_norm": 0.8694825172424316, "learning_rate": 3.4191676670668274e-05, "loss": 0.5716, "step": 7911 }, { "epoch": 10.12736, "grad_norm": 0.9232937693595886, "learning_rate": 3.418967587034814e-05, "loss": 0.6145, "step": 7912 }, { "epoch": 10.12864, "grad_norm": 0.8252090215682983, "learning_rate": 3.418767507002801e-05, "loss": 0.5711, "step": 7913 }, { "epoch": 10.12992, "grad_norm": 0.8287693858146667, "learning_rate": 3.418567426970788e-05, "loss": 0.5349, "step": 7914 }, { "epoch": 10.1312, "grad_norm": 0.8445566892623901, "learning_rate": 3.4183673469387755e-05, "loss": 0.5614, "step": 7915 }, { "epoch": 10.13248, "grad_norm": 0.886237382888794, "learning_rate": 3.418167266906763e-05, "loss": 0.5865, "step": 7916 }, { "epoch": 10.13376, "grad_norm": 0.8767721652984619, "learning_rate": 3.41796718687475e-05, "loss": 0.5316, "step": 7917 }, { "epoch": 10.13504, "grad_norm": 0.907900333404541, "learning_rate": 3.417767106842738e-05, "loss": 0.5782, "step": 7918 }, { "epoch": 10.13632, "grad_norm": 0.8434520363807678, "learning_rate": 3.417567026810725e-05, "loss": 0.5359, "step": 7919 }, { "epoch": 10.1376, "grad_norm": 0.8087551593780518, "learning_rate": 3.4173669467787114e-05, "loss": 0.5508, "step": 7920 }, { "epoch": 10.13888, "grad_norm": 0.9005576968193054, "learning_rate": 3.4171668667466986e-05, "loss": 0.5558, "step": 7921 }, { "epoch": 10.14016, "grad_norm": 0.8877005577087402, "learning_rate": 3.416966786714686e-05, "loss": 0.615, "step": 7922 }, { "epoch": 10.14144, "grad_norm": 0.8587065935134888, "learning_rate": 3.416766706682673e-05, "loss": 0.5714, "step": 7923 }, { "epoch": 10.14272, "grad_norm": 0.8844937086105347, "learning_rate": 3.41656662665066e-05, "loss": 0.6407, "step": 7924 }, { "epoch": 10.144, "grad_norm": 0.8781236410140991, "learning_rate": 3.416366546618648e-05, "loss": 0.5428, "step": 7925 }, { "epoch": 10.14528, "grad_norm": 0.8121967315673828, "learning_rate": 3.416166466586635e-05, "loss": 0.5061, "step": 7926 }, { "epoch": 10.14656, "grad_norm": 0.8259449005126953, "learning_rate": 3.4159663865546224e-05, "loss": 0.5297, "step": 7927 }, { "epoch": 10.14784, "grad_norm": 0.9252973794937134, "learning_rate": 3.415766306522609e-05, "loss": 0.6159, "step": 7928 }, { "epoch": 10.14912, "grad_norm": 0.8705994486808777, "learning_rate": 3.415566226490596e-05, "loss": 0.5131, "step": 7929 }, { "epoch": 10.1504, "grad_norm": 0.892468273639679, "learning_rate": 3.415366146458583e-05, "loss": 0.551, "step": 7930 }, { "epoch": 10.15168, "grad_norm": 0.8754770755767822, "learning_rate": 3.4151660664265705e-05, "loss": 0.5457, "step": 7931 }, { "epoch": 10.15296, "grad_norm": 0.8621395826339722, "learning_rate": 3.4149659863945583e-05, "loss": 0.5292, "step": 7932 }, { "epoch": 10.15424, "grad_norm": 0.9692301750183105, "learning_rate": 3.4147659063625455e-05, "loss": 0.5985, "step": 7933 }, { "epoch": 10.15552, "grad_norm": 0.9365867376327515, "learning_rate": 3.414565826330533e-05, "loss": 0.5769, "step": 7934 }, { "epoch": 10.1568, "grad_norm": 0.9413188695907593, "learning_rate": 3.41436574629852e-05, "loss": 0.6406, "step": 7935 }, { "epoch": 10.15808, "grad_norm": 0.8697437047958374, "learning_rate": 3.4141656662665064e-05, "loss": 0.5793, "step": 7936 }, { "epoch": 10.15936, "grad_norm": 0.8574464917182922, "learning_rate": 3.4139655862344936e-05, "loss": 0.5234, "step": 7937 }, { "epoch": 10.16064, "grad_norm": 0.8575087785720825, "learning_rate": 3.413765506202481e-05, "loss": 0.5734, "step": 7938 }, { "epoch": 10.16192, "grad_norm": 0.8977473974227905, "learning_rate": 3.4135654261704686e-05, "loss": 0.5894, "step": 7939 }, { "epoch": 10.1632, "grad_norm": 0.8591358661651611, "learning_rate": 3.413365346138456e-05, "loss": 0.527, "step": 7940 }, { "epoch": 10.16448, "grad_norm": 0.8251833915710449, "learning_rate": 3.413165266106443e-05, "loss": 0.5393, "step": 7941 }, { "epoch": 10.16576, "grad_norm": 0.8660216331481934, "learning_rate": 3.41296518607443e-05, "loss": 0.5797, "step": 7942 }, { "epoch": 10.16704, "grad_norm": 0.9057691693305969, "learning_rate": 3.4127651060424174e-05, "loss": 0.5394, "step": 7943 }, { "epoch": 10.16832, "grad_norm": 0.914899468421936, "learning_rate": 3.412565026010404e-05, "loss": 0.5747, "step": 7944 }, { "epoch": 10.1696, "grad_norm": 0.838821291923523, "learning_rate": 3.412364945978391e-05, "loss": 0.5453, "step": 7945 }, { "epoch": 10.17088, "grad_norm": 0.8535134792327881, "learning_rate": 3.412164865946379e-05, "loss": 0.5618, "step": 7946 }, { "epoch": 10.17216, "grad_norm": 0.8193386793136597, "learning_rate": 3.411964785914366e-05, "loss": 0.5303, "step": 7947 }, { "epoch": 10.17344, "grad_norm": 0.8792878985404968, "learning_rate": 3.411764705882353e-05, "loss": 0.6145, "step": 7948 }, { "epoch": 10.17472, "grad_norm": 0.9004140496253967, "learning_rate": 3.4115646258503405e-05, "loss": 0.6126, "step": 7949 }, { "epoch": 10.176, "grad_norm": 0.883179247379303, "learning_rate": 3.411364545818328e-05, "loss": 0.5655, "step": 7950 }, { "epoch": 10.17728, "grad_norm": 0.8625328540802002, "learning_rate": 3.411164465786315e-05, "loss": 0.5993, "step": 7951 }, { "epoch": 10.17856, "grad_norm": 0.8304584622383118, "learning_rate": 3.4109643857543014e-05, "loss": 0.5177, "step": 7952 }, { "epoch": 10.17984, "grad_norm": 0.8241145610809326, "learning_rate": 3.410764305722289e-05, "loss": 0.5112, "step": 7953 }, { "epoch": 10.18112, "grad_norm": 0.8981744647026062, "learning_rate": 3.4105642256902764e-05, "loss": 0.5897, "step": 7954 }, { "epoch": 10.1824, "grad_norm": 0.8619903326034546, "learning_rate": 3.4103641456582636e-05, "loss": 0.5543, "step": 7955 }, { "epoch": 10.18368, "grad_norm": 0.8140372037887573, "learning_rate": 3.410164065626251e-05, "loss": 0.5214, "step": 7956 }, { "epoch": 10.18496, "grad_norm": 0.8518925309181213, "learning_rate": 3.409963985594238e-05, "loss": 0.5465, "step": 7957 }, { "epoch": 10.18624, "grad_norm": 0.8702125549316406, "learning_rate": 3.409763905562225e-05, "loss": 0.5621, "step": 7958 }, { "epoch": 10.18752, "grad_norm": 0.8971076011657715, "learning_rate": 3.4095638255302124e-05, "loss": 0.5631, "step": 7959 }, { "epoch": 10.1888, "grad_norm": 0.853391706943512, "learning_rate": 3.4093637454981995e-05, "loss": 0.5682, "step": 7960 }, { "epoch": 10.19008, "grad_norm": 0.8766993284225464, "learning_rate": 3.409163665466187e-05, "loss": 0.6276, "step": 7961 }, { "epoch": 10.19136, "grad_norm": 0.8604820370674133, "learning_rate": 3.408963585434174e-05, "loss": 0.5688, "step": 7962 }, { "epoch": 10.19264, "grad_norm": 0.8074341416358948, "learning_rate": 3.408763505402161e-05, "loss": 0.5106, "step": 7963 }, { "epoch": 10.19392, "grad_norm": 0.871636152267456, "learning_rate": 3.408563425370148e-05, "loss": 0.6026, "step": 7964 }, { "epoch": 10.1952, "grad_norm": 0.8580559492111206, "learning_rate": 3.4083633453381355e-05, "loss": 0.5621, "step": 7965 }, { "epoch": 10.19648, "grad_norm": 0.8855502605438232, "learning_rate": 3.408163265306123e-05, "loss": 0.5359, "step": 7966 }, { "epoch": 10.19776, "grad_norm": 0.8627590537071228, "learning_rate": 3.40796318527411e-05, "loss": 0.521, "step": 7967 }, { "epoch": 10.19904, "grad_norm": 0.7877764105796814, "learning_rate": 3.407763105242097e-05, "loss": 0.5293, "step": 7968 }, { "epoch": 10.20032, "grad_norm": 0.8240257501602173, "learning_rate": 3.407563025210084e-05, "loss": 0.5453, "step": 7969 }, { "epoch": 10.2016, "grad_norm": 0.8509385585784912, "learning_rate": 3.4073629451780714e-05, "loss": 0.5745, "step": 7970 }, { "epoch": 10.20288, "grad_norm": 0.8994958400726318, "learning_rate": 3.4071628651460586e-05, "loss": 0.5561, "step": 7971 }, { "epoch": 10.20416, "grad_norm": 0.8885303139686584, "learning_rate": 3.406962785114046e-05, "loss": 0.5537, "step": 7972 }, { "epoch": 10.20544, "grad_norm": 0.9128597378730774, "learning_rate": 3.406762705082033e-05, "loss": 0.5795, "step": 7973 }, { "epoch": 10.20672, "grad_norm": 0.83772212266922, "learning_rate": 3.40656262505002e-05, "loss": 0.5156, "step": 7974 }, { "epoch": 10.208, "grad_norm": 0.822971522808075, "learning_rate": 3.406362545018007e-05, "loss": 0.5237, "step": 7975 }, { "epoch": 10.20928, "grad_norm": 0.898842453956604, "learning_rate": 3.4061624649859945e-05, "loss": 0.5979, "step": 7976 }, { "epoch": 10.21056, "grad_norm": 0.9250447154045105, "learning_rate": 3.405962384953982e-05, "loss": 0.6017, "step": 7977 }, { "epoch": 10.21184, "grad_norm": 0.8869206309318542, "learning_rate": 3.405762304921969e-05, "loss": 0.5366, "step": 7978 }, { "epoch": 10.21312, "grad_norm": 0.863950788974762, "learning_rate": 3.405562224889956e-05, "loss": 0.5374, "step": 7979 }, { "epoch": 10.2144, "grad_norm": 0.81676185131073, "learning_rate": 3.405362144857943e-05, "loss": 0.5335, "step": 7980 }, { "epoch": 10.21568, "grad_norm": 0.8401233553886414, "learning_rate": 3.405162064825931e-05, "loss": 0.5484, "step": 7981 }, { "epoch": 10.21696, "grad_norm": 0.8678064346313477, "learning_rate": 3.4049619847939176e-05, "loss": 0.5231, "step": 7982 }, { "epoch": 10.21824, "grad_norm": 0.8489272594451904, "learning_rate": 3.404761904761905e-05, "loss": 0.4638, "step": 7983 }, { "epoch": 10.21952, "grad_norm": 0.8029497265815735, "learning_rate": 3.404561824729892e-05, "loss": 0.5081, "step": 7984 }, { "epoch": 10.2208, "grad_norm": 0.8994140625, "learning_rate": 3.404361744697879e-05, "loss": 0.5386, "step": 7985 }, { "epoch": 10.22208, "grad_norm": 0.8933800458908081, "learning_rate": 3.4041616646658664e-05, "loss": 0.5312, "step": 7986 }, { "epoch": 10.22336, "grad_norm": 0.8817566633224487, "learning_rate": 3.4039615846338536e-05, "loss": 0.5578, "step": 7987 }, { "epoch": 10.22464, "grad_norm": 0.8852986693382263, "learning_rate": 3.403761504601841e-05, "loss": 0.5669, "step": 7988 }, { "epoch": 10.22592, "grad_norm": 0.9889851212501526, "learning_rate": 3.4035614245698286e-05, "loss": 0.5101, "step": 7989 }, { "epoch": 10.2272, "grad_norm": 0.8555750846862793, "learning_rate": 3.403361344537815e-05, "loss": 0.551, "step": 7990 }, { "epoch": 10.22848, "grad_norm": 0.838453471660614, "learning_rate": 3.403161264505802e-05, "loss": 0.565, "step": 7991 }, { "epoch": 10.22976, "grad_norm": 0.8447315096855164, "learning_rate": 3.4029611844737895e-05, "loss": 0.5671, "step": 7992 }, { "epoch": 10.23104, "grad_norm": 0.8619872331619263, "learning_rate": 3.402761104441777e-05, "loss": 0.5877, "step": 7993 }, { "epoch": 10.23232, "grad_norm": 0.8690614104270935, "learning_rate": 3.402561024409764e-05, "loss": 0.5583, "step": 7994 }, { "epoch": 10.2336, "grad_norm": 0.9469643831253052, "learning_rate": 3.402360944377751e-05, "loss": 0.5224, "step": 7995 }, { "epoch": 10.23488, "grad_norm": 0.8962279558181763, "learning_rate": 3.402160864345739e-05, "loss": 0.5714, "step": 7996 }, { "epoch": 10.23616, "grad_norm": 0.9197521805763245, "learning_rate": 3.401960784313726e-05, "loss": 0.5755, "step": 7997 }, { "epoch": 10.23744, "grad_norm": 0.8115956783294678, "learning_rate": 3.4017607042817126e-05, "loss": 0.5475, "step": 7998 }, { "epoch": 10.23872, "grad_norm": 0.7984319925308228, "learning_rate": 3.4015606242497e-05, "loss": 0.5479, "step": 7999 }, { "epoch": 10.24, "grad_norm": 0.8857104778289795, "learning_rate": 3.401360544217687e-05, "loss": 0.5641, "step": 8000 }, { "epoch": 10.24128, "grad_norm": 0.9119358062744141, "learning_rate": 3.401160464185674e-05, "loss": 0.5903, "step": 8001 }, { "epoch": 10.24256, "grad_norm": 0.8577554821968079, "learning_rate": 3.4009603841536613e-05, "loss": 0.5551, "step": 8002 }, { "epoch": 10.24384, "grad_norm": 0.9047631025314331, "learning_rate": 3.400760304121649e-05, "loss": 0.551, "step": 8003 }, { "epoch": 10.24512, "grad_norm": 0.8967153429985046, "learning_rate": 3.4005602240896364e-05, "loss": 0.5531, "step": 8004 }, { "epoch": 10.2464, "grad_norm": 0.8773406744003296, "learning_rate": 3.4003601440576236e-05, "loss": 0.5648, "step": 8005 }, { "epoch": 10.24768, "grad_norm": 0.903235137462616, "learning_rate": 3.40016006402561e-05, "loss": 0.5552, "step": 8006 }, { "epoch": 10.24896, "grad_norm": 0.8304867148399353, "learning_rate": 3.399959983993597e-05, "loss": 0.5472, "step": 8007 }, { "epoch": 10.25024, "grad_norm": 0.844079852104187, "learning_rate": 3.3997599039615845e-05, "loss": 0.5607, "step": 8008 }, { "epoch": 10.25152, "grad_norm": 0.8768244981765747, "learning_rate": 3.3995598239295716e-05, "loss": 0.5559, "step": 8009 }, { "epoch": 10.2528, "grad_norm": 0.8504285216331482, "learning_rate": 3.3993597438975595e-05, "loss": 0.5555, "step": 8010 }, { "epoch": 10.25408, "grad_norm": 0.8665524125099182, "learning_rate": 3.399159663865547e-05, "loss": 0.5473, "step": 8011 }, { "epoch": 10.25536, "grad_norm": 0.8851114511489868, "learning_rate": 3.398959583833534e-05, "loss": 0.6117, "step": 8012 }, { "epoch": 10.25664, "grad_norm": 0.8923035860061646, "learning_rate": 3.398759503801521e-05, "loss": 0.5575, "step": 8013 }, { "epoch": 10.25792, "grad_norm": 0.8378768563270569, "learning_rate": 3.3985594237695076e-05, "loss": 0.5484, "step": 8014 }, { "epoch": 10.2592, "grad_norm": 0.8756462931632996, "learning_rate": 3.398359343737495e-05, "loss": 0.562, "step": 8015 }, { "epoch": 10.26048, "grad_norm": 0.8708872199058533, "learning_rate": 3.398159263705482e-05, "loss": 0.5702, "step": 8016 }, { "epoch": 10.26176, "grad_norm": 0.865534245967865, "learning_rate": 3.39795918367347e-05, "loss": 0.5521, "step": 8017 }, { "epoch": 10.26304, "grad_norm": 0.8553348779678345, "learning_rate": 3.397759103641457e-05, "loss": 0.5661, "step": 8018 }, { "epoch": 10.26432, "grad_norm": 0.9413052797317505, "learning_rate": 3.397559023609444e-05, "loss": 0.5919, "step": 8019 }, { "epoch": 10.2656, "grad_norm": 0.8924047350883484, "learning_rate": 3.3973589435774314e-05, "loss": 0.5855, "step": 8020 }, { "epoch": 10.26688, "grad_norm": 0.8545451164245605, "learning_rate": 3.3971588635454186e-05, "loss": 0.5515, "step": 8021 }, { "epoch": 10.26816, "grad_norm": 0.8800926208496094, "learning_rate": 3.396958783513405e-05, "loss": 0.537, "step": 8022 }, { "epoch": 10.26944, "grad_norm": 0.9025660753250122, "learning_rate": 3.396758703481392e-05, "loss": 0.6178, "step": 8023 }, { "epoch": 10.27072, "grad_norm": 0.8830463290214539, "learning_rate": 3.39655862344938e-05, "loss": 0.555, "step": 8024 }, { "epoch": 10.272, "grad_norm": 0.9167059659957886, "learning_rate": 3.396358543417367e-05, "loss": 0.5678, "step": 8025 }, { "epoch": 10.27328, "grad_norm": 0.8567641377449036, "learning_rate": 3.3961584633853545e-05, "loss": 0.5473, "step": 8026 }, { "epoch": 10.27456, "grad_norm": 0.8600786328315735, "learning_rate": 3.395958383353342e-05, "loss": 0.5562, "step": 8027 }, { "epoch": 10.27584, "grad_norm": 0.8937509655952454, "learning_rate": 3.395758303321329e-05, "loss": 0.5721, "step": 8028 }, { "epoch": 10.27712, "grad_norm": 0.8415324091911316, "learning_rate": 3.395558223289316e-05, "loss": 0.5219, "step": 8029 }, { "epoch": 10.2784, "grad_norm": 0.888176679611206, "learning_rate": 3.3953581432573025e-05, "loss": 0.5618, "step": 8030 }, { "epoch": 10.27968, "grad_norm": 0.8729084730148315, "learning_rate": 3.3951580632252904e-05, "loss": 0.581, "step": 8031 }, { "epoch": 10.28096, "grad_norm": 0.8753156661987305, "learning_rate": 3.3949579831932776e-05, "loss": 0.5667, "step": 8032 }, { "epoch": 10.28224, "grad_norm": 0.8683957457542419, "learning_rate": 3.394757903161265e-05, "loss": 0.5274, "step": 8033 }, { "epoch": 10.28352, "grad_norm": 0.8913008570671082, "learning_rate": 3.394557823129252e-05, "loss": 0.5799, "step": 8034 }, { "epoch": 10.2848, "grad_norm": 0.8697010278701782, "learning_rate": 3.394357743097239e-05, "loss": 0.56, "step": 8035 }, { "epoch": 10.28608, "grad_norm": 0.8997796177864075, "learning_rate": 3.394157663065226e-05, "loss": 0.6066, "step": 8036 }, { "epoch": 10.28736, "grad_norm": 0.8587310314178467, "learning_rate": 3.3939575830332135e-05, "loss": 0.5758, "step": 8037 }, { "epoch": 10.288640000000001, "grad_norm": 0.8550379276275635, "learning_rate": 3.393757503001201e-05, "loss": 0.5858, "step": 8038 }, { "epoch": 10.28992, "grad_norm": 0.8495590686798096, "learning_rate": 3.393557422969188e-05, "loss": 0.5774, "step": 8039 }, { "epoch": 10.2912, "grad_norm": 0.8640943765640259, "learning_rate": 3.393357342937175e-05, "loss": 0.5839, "step": 8040 }, { "epoch": 10.29248, "grad_norm": 0.8572250604629517, "learning_rate": 3.393157262905162e-05, "loss": 0.5521, "step": 8041 }, { "epoch": 10.29376, "grad_norm": 0.8335790634155273, "learning_rate": 3.3929571828731495e-05, "loss": 0.5296, "step": 8042 }, { "epoch": 10.29504, "grad_norm": 0.861184298992157, "learning_rate": 3.3927571028411366e-05, "loss": 0.5687, "step": 8043 }, { "epoch": 10.29632, "grad_norm": 0.8524270057678223, "learning_rate": 3.392557022809124e-05, "loss": 0.5577, "step": 8044 }, { "epoch": 10.2976, "grad_norm": 0.9047152996063232, "learning_rate": 3.392356942777111e-05, "loss": 0.5752, "step": 8045 }, { "epoch": 10.29888, "grad_norm": 0.8878473043441772, "learning_rate": 3.392156862745098e-05, "loss": 0.5627, "step": 8046 }, { "epoch": 10.30016, "grad_norm": 0.8869286775588989, "learning_rate": 3.3919567827130854e-05, "loss": 0.587, "step": 8047 }, { "epoch": 10.30144, "grad_norm": 0.8654095530509949, "learning_rate": 3.3917567026810726e-05, "loss": 0.5427, "step": 8048 }, { "epoch": 10.30272, "grad_norm": 0.9054012298583984, "learning_rate": 3.39155662264906e-05, "loss": 0.5907, "step": 8049 }, { "epoch": 10.304, "grad_norm": 0.852177083492279, "learning_rate": 3.391356542617047e-05, "loss": 0.5445, "step": 8050 }, { "epoch": 10.30528, "grad_norm": 0.9118196964263916, "learning_rate": 3.391156462585034e-05, "loss": 0.5557, "step": 8051 }, { "epoch": 10.30656, "grad_norm": 0.9239182472229004, "learning_rate": 3.390956382553021e-05, "loss": 0.5647, "step": 8052 }, { "epoch": 10.30784, "grad_norm": 0.875339925289154, "learning_rate": 3.3907563025210085e-05, "loss": 0.5532, "step": 8053 }, { "epoch": 10.30912, "grad_norm": 0.9014837145805359, "learning_rate": 3.390556222488996e-05, "loss": 0.5569, "step": 8054 }, { "epoch": 10.3104, "grad_norm": 0.9332119226455688, "learning_rate": 3.390356142456983e-05, "loss": 0.5645, "step": 8055 }, { "epoch": 10.31168, "grad_norm": 0.8813993334770203, "learning_rate": 3.39015606242497e-05, "loss": 0.5751, "step": 8056 }, { "epoch": 10.31296, "grad_norm": 0.8453924655914307, "learning_rate": 3.389955982392957e-05, "loss": 0.5304, "step": 8057 }, { "epoch": 10.31424, "grad_norm": 0.8781421780586243, "learning_rate": 3.3897559023609444e-05, "loss": 0.5555, "step": 8058 }, { "epoch": 10.31552, "grad_norm": 0.819387674331665, "learning_rate": 3.389555822328932e-05, "loss": 0.5362, "step": 8059 }, { "epoch": 10.3168, "grad_norm": 0.838315486907959, "learning_rate": 3.389355742296919e-05, "loss": 0.5609, "step": 8060 }, { "epoch": 10.31808, "grad_norm": 0.8653327226638794, "learning_rate": 3.389155662264906e-05, "loss": 0.5313, "step": 8061 }, { "epoch": 10.31936, "grad_norm": 0.8249607682228088, "learning_rate": 3.388955582232893e-05, "loss": 0.5308, "step": 8062 }, { "epoch": 10.32064, "grad_norm": 0.8277181386947632, "learning_rate": 3.3887555022008804e-05, "loss": 0.5428, "step": 8063 }, { "epoch": 10.32192, "grad_norm": 0.8770848512649536, "learning_rate": 3.3885554221688675e-05, "loss": 0.536, "step": 8064 }, { "epoch": 10.3232, "grad_norm": 0.8789455890655518, "learning_rate": 3.388355342136855e-05, "loss": 0.5509, "step": 8065 }, { "epoch": 10.32448, "grad_norm": 0.8777965307235718, "learning_rate": 3.3881552621048426e-05, "loss": 0.5601, "step": 8066 }, { "epoch": 10.32576, "grad_norm": 0.846284031867981, "learning_rate": 3.38795518207283e-05, "loss": 0.5322, "step": 8067 }, { "epoch": 10.32704, "grad_norm": 0.820878267288208, "learning_rate": 3.387755102040816e-05, "loss": 0.4972, "step": 8068 }, { "epoch": 10.32832, "grad_norm": 0.8530818819999695, "learning_rate": 3.3875550220088035e-05, "loss": 0.5465, "step": 8069 }, { "epoch": 10.3296, "grad_norm": 0.8610104322433472, "learning_rate": 3.3873549419767907e-05, "loss": 0.5503, "step": 8070 }, { "epoch": 10.33088, "grad_norm": 0.883931040763855, "learning_rate": 3.387154861944778e-05, "loss": 0.5977, "step": 8071 }, { "epoch": 10.33216, "grad_norm": 0.8754240870475769, "learning_rate": 3.386954781912765e-05, "loss": 0.5968, "step": 8072 }, { "epoch": 10.33344, "grad_norm": 0.8416239023208618, "learning_rate": 3.386754701880753e-05, "loss": 0.5346, "step": 8073 }, { "epoch": 10.33472, "grad_norm": 0.8367804884910583, "learning_rate": 3.38655462184874e-05, "loss": 0.5026, "step": 8074 }, { "epoch": 10.336, "grad_norm": 0.8462991118431091, "learning_rate": 3.386354541816727e-05, "loss": 0.563, "step": 8075 }, { "epoch": 10.33728, "grad_norm": 0.8666642308235168, "learning_rate": 3.386154461784714e-05, "loss": 0.5549, "step": 8076 }, { "epoch": 10.33856, "grad_norm": 0.8881992101669312, "learning_rate": 3.385954381752701e-05, "loss": 0.5675, "step": 8077 }, { "epoch": 10.33984, "grad_norm": 0.8590701222419739, "learning_rate": 3.385754301720688e-05, "loss": 0.5806, "step": 8078 }, { "epoch": 10.34112, "grad_norm": 0.8979669809341431, "learning_rate": 3.385554221688675e-05, "loss": 0.5928, "step": 8079 }, { "epoch": 10.3424, "grad_norm": 0.8830470442771912, "learning_rate": 3.385354141656663e-05, "loss": 0.5631, "step": 8080 }, { "epoch": 10.343679999999999, "grad_norm": 0.8694333434104919, "learning_rate": 3.3851540616246504e-05, "loss": 0.5683, "step": 8081 }, { "epoch": 10.34496, "grad_norm": 0.8495916128158569, "learning_rate": 3.3849539815926376e-05, "loss": 0.5719, "step": 8082 }, { "epoch": 10.34624, "grad_norm": 0.8509037494659424, "learning_rate": 3.384753901560625e-05, "loss": 0.5438, "step": 8083 }, { "epoch": 10.34752, "grad_norm": 0.8636916875839233, "learning_rate": 3.384553821528611e-05, "loss": 0.5433, "step": 8084 }, { "epoch": 10.3488, "grad_norm": 0.8579983711242676, "learning_rate": 3.3843537414965984e-05, "loss": 0.5244, "step": 8085 }, { "epoch": 10.35008, "grad_norm": 0.8326935172080994, "learning_rate": 3.3841536614645856e-05, "loss": 0.4873, "step": 8086 }, { "epoch": 10.35136, "grad_norm": 0.8900673389434814, "learning_rate": 3.3839535814325735e-05, "loss": 0.6081, "step": 8087 }, { "epoch": 10.35264, "grad_norm": 0.8790429830551147, "learning_rate": 3.383753501400561e-05, "loss": 0.5785, "step": 8088 }, { "epoch": 10.35392, "grad_norm": 0.8508909344673157, "learning_rate": 3.383553421368548e-05, "loss": 0.5545, "step": 8089 }, { "epoch": 10.3552, "grad_norm": 0.9315157532691956, "learning_rate": 3.383353341336535e-05, "loss": 0.5911, "step": 8090 }, { "epoch": 10.35648, "grad_norm": 0.8326758146286011, "learning_rate": 3.383153261304522e-05, "loss": 0.5483, "step": 8091 }, { "epoch": 10.35776, "grad_norm": 0.8776389956474304, "learning_rate": 3.382953181272509e-05, "loss": 0.5853, "step": 8092 }, { "epoch": 10.35904, "grad_norm": 0.8722195625305176, "learning_rate": 3.382753101240496e-05, "loss": 0.5785, "step": 8093 }, { "epoch": 10.36032, "grad_norm": 0.9000146389007568, "learning_rate": 3.382553021208484e-05, "loss": 0.5833, "step": 8094 }, { "epoch": 10.3616, "grad_norm": 0.8690416812896729, "learning_rate": 3.382352941176471e-05, "loss": 0.5583, "step": 8095 }, { "epoch": 10.36288, "grad_norm": 0.8674702048301697, "learning_rate": 3.382152861144458e-05, "loss": 0.5792, "step": 8096 }, { "epoch": 10.36416, "grad_norm": 0.9020072221755981, "learning_rate": 3.3819527811124453e-05, "loss": 0.5737, "step": 8097 }, { "epoch": 10.36544, "grad_norm": 0.9401588439941406, "learning_rate": 3.3817527010804325e-05, "loss": 0.6055, "step": 8098 }, { "epoch": 10.36672, "grad_norm": 0.8760440349578857, "learning_rate": 3.38155262104842e-05, "loss": 0.5584, "step": 8099 }, { "epoch": 10.368, "grad_norm": 0.8204819560050964, "learning_rate": 3.381352541016406e-05, "loss": 0.5278, "step": 8100 }, { "epoch": 10.36928, "grad_norm": 0.9087020754814148, "learning_rate": 3.3811524609843934e-05, "loss": 0.5957, "step": 8101 }, { "epoch": 10.37056, "grad_norm": 0.9149252772331238, "learning_rate": 3.380952380952381e-05, "loss": 0.5722, "step": 8102 }, { "epoch": 10.37184, "grad_norm": 0.8594633936882019, "learning_rate": 3.3807523009203685e-05, "loss": 0.5866, "step": 8103 }, { "epoch": 10.37312, "grad_norm": 0.8941963315010071, "learning_rate": 3.3805522208883556e-05, "loss": 0.566, "step": 8104 }, { "epoch": 10.3744, "grad_norm": 0.9061945676803589, "learning_rate": 3.380352140856343e-05, "loss": 0.5989, "step": 8105 }, { "epoch": 10.37568, "grad_norm": 0.9072801470756531, "learning_rate": 3.38015206082433e-05, "loss": 0.5341, "step": 8106 }, { "epoch": 10.37696, "grad_norm": 0.8797597289085388, "learning_rate": 3.379951980792317e-05, "loss": 0.5658, "step": 8107 }, { "epoch": 10.37824, "grad_norm": 0.8986260294914246, "learning_rate": 3.379751900760304e-05, "loss": 0.5663, "step": 8108 }, { "epoch": 10.37952, "grad_norm": 0.870966374874115, "learning_rate": 3.3795518207282916e-05, "loss": 0.5759, "step": 8109 }, { "epoch": 10.3808, "grad_norm": 0.8527510166168213, "learning_rate": 3.379351740696279e-05, "loss": 0.5566, "step": 8110 }, { "epoch": 10.38208, "grad_norm": 0.8812639117240906, "learning_rate": 3.379151660664266e-05, "loss": 0.5787, "step": 8111 }, { "epoch": 10.38336, "grad_norm": 0.9007368683815002, "learning_rate": 3.378951580632253e-05, "loss": 0.6065, "step": 8112 }, { "epoch": 10.38464, "grad_norm": 0.908555269241333, "learning_rate": 3.37875150060024e-05, "loss": 0.5707, "step": 8113 }, { "epoch": 10.38592, "grad_norm": 0.8893713355064392, "learning_rate": 3.3785514205682275e-05, "loss": 0.6067, "step": 8114 }, { "epoch": 10.3872, "grad_norm": 0.8574310541152954, "learning_rate": 3.378351340536215e-05, "loss": 0.5643, "step": 8115 }, { "epoch": 10.38848, "grad_norm": 0.823165774345398, "learning_rate": 3.378151260504202e-05, "loss": 0.5808, "step": 8116 }, { "epoch": 10.38976, "grad_norm": 0.818540096282959, "learning_rate": 3.377951180472189e-05, "loss": 0.5709, "step": 8117 }, { "epoch": 10.39104, "grad_norm": 0.8618648052215576, "learning_rate": 3.377751100440176e-05, "loss": 0.5565, "step": 8118 }, { "epoch": 10.39232, "grad_norm": 0.869215726852417, "learning_rate": 3.3775510204081634e-05, "loss": 0.5499, "step": 8119 }, { "epoch": 10.3936, "grad_norm": 0.9559123516082764, "learning_rate": 3.3773509403761506e-05, "loss": 0.6168, "step": 8120 }, { "epoch": 10.39488, "grad_norm": 0.924443244934082, "learning_rate": 3.377150860344138e-05, "loss": 0.5954, "step": 8121 }, { "epoch": 10.39616, "grad_norm": 0.8861650824546814, "learning_rate": 3.376950780312125e-05, "loss": 0.5793, "step": 8122 }, { "epoch": 10.39744, "grad_norm": 0.9248014092445374, "learning_rate": 3.376750700280112e-05, "loss": 0.5581, "step": 8123 }, { "epoch": 10.39872, "grad_norm": 0.8819634914398193, "learning_rate": 3.3765506202480994e-05, "loss": 0.5263, "step": 8124 }, { "epoch": 10.4, "grad_norm": 0.8656063079833984, "learning_rate": 3.3763505402160865e-05, "loss": 0.5241, "step": 8125 }, { "epoch": 10.40128, "grad_norm": 0.8893786668777466, "learning_rate": 3.376150460184074e-05, "loss": 0.5451, "step": 8126 }, { "epoch": 10.40256, "grad_norm": 0.9260717034339905, "learning_rate": 3.375950380152061e-05, "loss": 0.5505, "step": 8127 }, { "epoch": 10.40384, "grad_norm": 0.912977397441864, "learning_rate": 3.375750300120048e-05, "loss": 0.6363, "step": 8128 }, { "epoch": 10.40512, "grad_norm": 0.8732292652130127, "learning_rate": 3.375550220088035e-05, "loss": 0.5778, "step": 8129 }, { "epoch": 10.4064, "grad_norm": 0.8404425978660583, "learning_rate": 3.3753501400560225e-05, "loss": 0.5693, "step": 8130 }, { "epoch": 10.40768, "grad_norm": 0.9023398160934448, "learning_rate": 3.3751500600240097e-05, "loss": 0.6112, "step": 8131 }, { "epoch": 10.40896, "grad_norm": 0.9250540137290955, "learning_rate": 3.374949979991997e-05, "loss": 0.6067, "step": 8132 }, { "epoch": 10.41024, "grad_norm": 0.9005985856056213, "learning_rate": 3.374749899959984e-05, "loss": 0.5746, "step": 8133 }, { "epoch": 10.41152, "grad_norm": 0.9189587235450745, "learning_rate": 3.374549819927971e-05, "loss": 0.5847, "step": 8134 }, { "epoch": 10.4128, "grad_norm": 0.8882416486740112, "learning_rate": 3.3743497398959584e-05, "loss": 0.6156, "step": 8135 }, { "epoch": 10.41408, "grad_norm": 0.845780074596405, "learning_rate": 3.3741496598639456e-05, "loss": 0.549, "step": 8136 }, { "epoch": 10.41536, "grad_norm": 0.9068146347999573, "learning_rate": 3.3739495798319334e-05, "loss": 0.5735, "step": 8137 }, { "epoch": 10.41664, "grad_norm": 0.8547993898391724, "learning_rate": 3.37374949979992e-05, "loss": 0.5472, "step": 8138 }, { "epoch": 10.41792, "grad_norm": 0.8667715191841125, "learning_rate": 3.373549419767907e-05, "loss": 0.5533, "step": 8139 }, { "epoch": 10.4192, "grad_norm": 0.8902264833450317, "learning_rate": 3.373349339735894e-05, "loss": 0.5751, "step": 8140 }, { "epoch": 10.42048, "grad_norm": 0.8540170788764954, "learning_rate": 3.3731492597038815e-05, "loss": 0.5179, "step": 8141 }, { "epoch": 10.42176, "grad_norm": 0.8607949614524841, "learning_rate": 3.372949179671869e-05, "loss": 0.5825, "step": 8142 }, { "epoch": 10.42304, "grad_norm": 0.8747577667236328, "learning_rate": 3.372749099639856e-05, "loss": 0.6228, "step": 8143 }, { "epoch": 10.42432, "grad_norm": 0.8700942993164062, "learning_rate": 3.372549019607844e-05, "loss": 0.5888, "step": 8144 }, { "epoch": 10.4256, "grad_norm": 0.9046246409416199, "learning_rate": 3.372348939575831e-05, "loss": 0.5769, "step": 8145 }, { "epoch": 10.42688, "grad_norm": 0.8095414042472839, "learning_rate": 3.3721488595438174e-05, "loss": 0.5288, "step": 8146 }, { "epoch": 10.42816, "grad_norm": 0.8345249891281128, "learning_rate": 3.3719487795118046e-05, "loss": 0.5943, "step": 8147 }, { "epoch": 10.42944, "grad_norm": 0.9272936582565308, "learning_rate": 3.371748699479792e-05, "loss": 0.5317, "step": 8148 }, { "epoch": 10.43072, "grad_norm": 0.9539486169815063, "learning_rate": 3.371548619447779e-05, "loss": 0.6475, "step": 8149 }, { "epoch": 10.432, "grad_norm": Infinity, "learning_rate": 3.371548619447779e-05, "loss": 0.6468, "step": 8150 }, { "epoch": 10.43328, "grad_norm": 0.8678048253059387, "learning_rate": 3.371348539415766e-05, "loss": 0.5463, "step": 8151 }, { "epoch": 10.43456, "grad_norm": 0.8489457964897156, "learning_rate": 3.371148459383754e-05, "loss": 0.5703, "step": 8152 }, { "epoch": 10.43584, "grad_norm": 0.8788950443267822, "learning_rate": 3.370948379351741e-05, "loss": 0.5346, "step": 8153 }, { "epoch": 10.43712, "grad_norm": 0.853437066078186, "learning_rate": 3.3707482993197284e-05, "loss": 0.5659, "step": 8154 }, { "epoch": 10.4384, "grad_norm": 0.8777585625648499, "learning_rate": 3.370548219287715e-05, "loss": 0.5578, "step": 8155 }, { "epoch": 10.43968, "grad_norm": 0.8524881601333618, "learning_rate": 3.370348139255702e-05, "loss": 0.5656, "step": 8156 }, { "epoch": 10.44096, "grad_norm": 0.8162695169448853, "learning_rate": 3.370148059223689e-05, "loss": 0.5092, "step": 8157 }, { "epoch": 10.44224, "grad_norm": 0.8364541530609131, "learning_rate": 3.3699479791916765e-05, "loss": 0.5346, "step": 8158 }, { "epoch": 10.44352, "grad_norm": 0.9183416962623596, "learning_rate": 3.3697478991596643e-05, "loss": 0.6277, "step": 8159 }, { "epoch": 10.4448, "grad_norm": 0.8532844185829163, "learning_rate": 3.3695478191276515e-05, "loss": 0.5318, "step": 8160 }, { "epoch": 10.44608, "grad_norm": 0.8698902726173401, "learning_rate": 3.369347739095639e-05, "loss": 0.5872, "step": 8161 }, { "epoch": 10.44736, "grad_norm": 0.8348177671432495, "learning_rate": 3.369147659063626e-05, "loss": 0.5381, "step": 8162 }, { "epoch": 10.44864, "grad_norm": 0.8752634525299072, "learning_rate": 3.3689475790316124e-05, "loss": 0.5587, "step": 8163 }, { "epoch": 10.44992, "grad_norm": 0.8772228360176086, "learning_rate": 3.3687474989995996e-05, "loss": 0.5763, "step": 8164 }, { "epoch": 10.4512, "grad_norm": 0.8985121250152588, "learning_rate": 3.368547418967587e-05, "loss": 0.5443, "step": 8165 }, { "epoch": 10.45248, "grad_norm": 0.8824835419654846, "learning_rate": 3.3683473389355746e-05, "loss": 0.5583, "step": 8166 }, { "epoch": 10.45376, "grad_norm": 0.8776131272315979, "learning_rate": 3.368147258903562e-05, "loss": 0.5331, "step": 8167 }, { "epoch": 10.45504, "grad_norm": 0.9394381046295166, "learning_rate": 3.367947178871549e-05, "loss": 0.6115, "step": 8168 }, { "epoch": 10.45632, "grad_norm": 0.8873891234397888, "learning_rate": 3.367747098839536e-05, "loss": 0.5574, "step": 8169 }, { "epoch": 10.4576, "grad_norm": 0.8328064680099487, "learning_rate": 3.3675470188075234e-05, "loss": 0.5499, "step": 8170 }, { "epoch": 10.45888, "grad_norm": 0.8790708184242249, "learning_rate": 3.36734693877551e-05, "loss": 0.5231, "step": 8171 }, { "epoch": 10.46016, "grad_norm": 0.8684185743331909, "learning_rate": 3.367146858743497e-05, "loss": 0.5494, "step": 8172 }, { "epoch": 10.46144, "grad_norm": 0.9176812767982483, "learning_rate": 3.366946778711485e-05, "loss": 0.5929, "step": 8173 }, { "epoch": 10.462720000000001, "grad_norm": 0.9031935930252075, "learning_rate": 3.366746698679472e-05, "loss": 0.5999, "step": 8174 }, { "epoch": 10.464, "grad_norm": 0.8639822006225586, "learning_rate": 3.366546618647459e-05, "loss": 0.6266, "step": 8175 }, { "epoch": 10.46528, "grad_norm": 0.8309516310691833, "learning_rate": 3.3663465386154465e-05, "loss": 0.5368, "step": 8176 }, { "epoch": 10.46656, "grad_norm": 0.8735124468803406, "learning_rate": 3.366146458583434e-05, "loss": 0.5759, "step": 8177 }, { "epoch": 10.46784, "grad_norm": 0.850908100605011, "learning_rate": 3.365946378551421e-05, "loss": 0.5377, "step": 8178 }, { "epoch": 10.46912, "grad_norm": 0.8890109062194824, "learning_rate": 3.3657462985194074e-05, "loss": 0.5821, "step": 8179 }, { "epoch": 10.4704, "grad_norm": 0.8895400762557983, "learning_rate": 3.365546218487395e-05, "loss": 0.5535, "step": 8180 }, { "epoch": 10.47168, "grad_norm": 0.8601114153862, "learning_rate": 3.3653461384553824e-05, "loss": 0.5324, "step": 8181 }, { "epoch": 10.47296, "grad_norm": 0.8928337693214417, "learning_rate": 3.3651460584233696e-05, "loss": 0.5868, "step": 8182 }, { "epoch": 10.47424, "grad_norm": 0.8805640935897827, "learning_rate": 3.364945978391357e-05, "loss": 0.5735, "step": 8183 }, { "epoch": 10.47552, "grad_norm": 0.8599642515182495, "learning_rate": 3.364745898359344e-05, "loss": 0.5431, "step": 8184 }, { "epoch": 10.4768, "grad_norm": 0.9143441319465637, "learning_rate": 3.364545818327331e-05, "loss": 0.5903, "step": 8185 }, { "epoch": 10.47808, "grad_norm": 0.8206641674041748, "learning_rate": 3.3643457382953184e-05, "loss": 0.5216, "step": 8186 }, { "epoch": 10.47936, "grad_norm": 0.7796454429626465, "learning_rate": 3.3641456582633055e-05, "loss": 0.4986, "step": 8187 }, { "epoch": 10.48064, "grad_norm": 0.8821196556091309, "learning_rate": 3.363945578231293e-05, "loss": 0.5665, "step": 8188 }, { "epoch": 10.48192, "grad_norm": 0.86763596534729, "learning_rate": 3.36374549819928e-05, "loss": 0.5393, "step": 8189 }, { "epoch": 10.4832, "grad_norm": 0.8729016184806824, "learning_rate": 3.363545418167267e-05, "loss": 0.5357, "step": 8190 }, { "epoch": 10.48448, "grad_norm": 0.9105653166770935, "learning_rate": 3.363345338135254e-05, "loss": 0.6309, "step": 8191 }, { "epoch": 10.48576, "grad_norm": 0.8702036142349243, "learning_rate": 3.3631452581032415e-05, "loss": 0.5913, "step": 8192 }, { "epoch": 10.48704, "grad_norm": 0.8215315937995911, "learning_rate": 3.3629451780712287e-05, "loss": 0.585, "step": 8193 }, { "epoch": 10.48832, "grad_norm": 0.8497202396392822, "learning_rate": 3.362745098039216e-05, "loss": 0.5519, "step": 8194 }, { "epoch": 10.4896, "grad_norm": 0.8402096629142761, "learning_rate": 3.362545018007203e-05, "loss": 0.5624, "step": 8195 }, { "epoch": 10.49088, "grad_norm": 0.8736252784729004, "learning_rate": 3.36234493797519e-05, "loss": 0.5404, "step": 8196 }, { "epoch": 10.49216, "grad_norm": 0.8728154301643372, "learning_rate": 3.3621448579431774e-05, "loss": 0.5412, "step": 8197 }, { "epoch": 10.49344, "grad_norm": 0.8873513340950012, "learning_rate": 3.3619447779111646e-05, "loss": 0.5671, "step": 8198 }, { "epoch": 10.49472, "grad_norm": 0.873191237449646, "learning_rate": 3.361744697879152e-05, "loss": 0.5645, "step": 8199 }, { "epoch": 10.496, "grad_norm": 0.8937346339225769, "learning_rate": 3.361544617847139e-05, "loss": 0.604, "step": 8200 }, { "epoch": 10.49728, "grad_norm": 0.8788394927978516, "learning_rate": 3.361344537815127e-05, "loss": 0.6088, "step": 8201 }, { "epoch": 10.49856, "grad_norm": 0.8717678189277649, "learning_rate": 3.361144457783113e-05, "loss": 0.5475, "step": 8202 }, { "epoch": 10.49984, "grad_norm": 0.8855169415473938, "learning_rate": 3.3609443777511005e-05, "loss": 0.6181, "step": 8203 }, { "epoch": 10.50112, "grad_norm": 0.9005426168441772, "learning_rate": 3.360744297719088e-05, "loss": 0.5924, "step": 8204 }, { "epoch": 10.5024, "grad_norm": 0.9078197479248047, "learning_rate": 3.360544217687075e-05, "loss": 0.6005, "step": 8205 }, { "epoch": 10.50368, "grad_norm": 0.9592050909996033, "learning_rate": 3.360344137655062e-05, "loss": 0.6359, "step": 8206 }, { "epoch": 10.50496, "grad_norm": 0.8957341313362122, "learning_rate": 3.360144057623049e-05, "loss": 0.5806, "step": 8207 }, { "epoch": 10.50624, "grad_norm": 0.8797503709793091, "learning_rate": 3.359943977591037e-05, "loss": 0.5798, "step": 8208 }, { "epoch": 10.50752, "grad_norm": 0.8420235514640808, "learning_rate": 3.359743897559024e-05, "loss": 0.5894, "step": 8209 }, { "epoch": 10.5088, "grad_norm": 0.8644899725914001, "learning_rate": 3.359543817527011e-05, "loss": 0.553, "step": 8210 }, { "epoch": 10.51008, "grad_norm": 0.8995040059089661, "learning_rate": 3.359343737494998e-05, "loss": 0.6117, "step": 8211 }, { "epoch": 10.51136, "grad_norm": 0.8395155668258667, "learning_rate": 3.359143657462985e-05, "loss": 0.5678, "step": 8212 }, { "epoch": 10.51264, "grad_norm": 0.8515924215316772, "learning_rate": 3.3589435774309724e-05, "loss": 0.5409, "step": 8213 }, { "epoch": 10.51392, "grad_norm": 0.9288252592086792, "learning_rate": 3.3587434973989596e-05, "loss": 0.6328, "step": 8214 }, { "epoch": 10.5152, "grad_norm": 0.9206863045692444, "learning_rate": 3.358543417366947e-05, "loss": 0.6071, "step": 8215 }, { "epoch": 10.51648, "grad_norm": 0.875196099281311, "learning_rate": 3.3583433373349346e-05, "loss": 0.5762, "step": 8216 }, { "epoch": 10.517759999999999, "grad_norm": 0.8598700165748596, "learning_rate": 3.358143257302921e-05, "loss": 0.5428, "step": 8217 }, { "epoch": 10.51904, "grad_norm": 0.8348386287689209, "learning_rate": 3.357943177270908e-05, "loss": 0.5135, "step": 8218 }, { "epoch": 10.52032, "grad_norm": 0.8748103380203247, "learning_rate": 3.3577430972388955e-05, "loss": 0.5582, "step": 8219 }, { "epoch": 10.5216, "grad_norm": 0.8388493657112122, "learning_rate": 3.357543017206883e-05, "loss": 0.5594, "step": 8220 }, { "epoch": 10.52288, "grad_norm": 0.8430709838867188, "learning_rate": 3.35734293717487e-05, "loss": 0.5304, "step": 8221 }, { "epoch": 10.52416, "grad_norm": 0.8487758040428162, "learning_rate": 3.357142857142857e-05, "loss": 0.5249, "step": 8222 }, { "epoch": 10.52544, "grad_norm": 0.8581976890563965, "learning_rate": 3.356942777110845e-05, "loss": 0.5209, "step": 8223 }, { "epoch": 10.52672, "grad_norm": 0.9251878261566162, "learning_rate": 3.356742697078832e-05, "loss": 0.5878, "step": 8224 }, { "epoch": 10.528, "grad_norm": 0.8627904057502747, "learning_rate": 3.3565426170468186e-05, "loss": 0.5766, "step": 8225 }, { "epoch": 10.52928, "grad_norm": 0.8664613962173462, "learning_rate": 3.356342537014806e-05, "loss": 0.562, "step": 8226 }, { "epoch": 10.53056, "grad_norm": 0.8714502453804016, "learning_rate": 3.356142456982793e-05, "loss": 0.578, "step": 8227 }, { "epoch": 10.53184, "grad_norm": 0.8915185332298279, "learning_rate": 3.35594237695078e-05, "loss": 0.5542, "step": 8228 }, { "epoch": 10.53312, "grad_norm": 0.9236972332000732, "learning_rate": 3.3557422969187673e-05, "loss": 0.5776, "step": 8229 }, { "epoch": 10.5344, "grad_norm": 0.8826823234558105, "learning_rate": 3.355542216886755e-05, "loss": 0.5879, "step": 8230 }, { "epoch": 10.53568, "grad_norm": 0.8796812295913696, "learning_rate": 3.3553421368547424e-05, "loss": 0.5992, "step": 8231 }, { "epoch": 10.53696, "grad_norm": 0.8763803243637085, "learning_rate": 3.3551420568227296e-05, "loss": 0.5546, "step": 8232 }, { "epoch": 10.53824, "grad_norm": 0.8898423910140991, "learning_rate": 3.354941976790716e-05, "loss": 0.5755, "step": 8233 }, { "epoch": 10.53952, "grad_norm": 0.8547391295433044, "learning_rate": 3.354741896758703e-05, "loss": 0.5844, "step": 8234 }, { "epoch": 10.5408, "grad_norm": 0.8550713658332825, "learning_rate": 3.3545418167266905e-05, "loss": 0.5609, "step": 8235 }, { "epoch": 10.54208, "grad_norm": 0.8670467138290405, "learning_rate": 3.3543417366946776e-05, "loss": 0.5439, "step": 8236 }, { "epoch": 10.54336, "grad_norm": 0.8298435807228088, "learning_rate": 3.3541416566626655e-05, "loss": 0.5424, "step": 8237 }, { "epoch": 10.54464, "grad_norm": 0.8586260676383972, "learning_rate": 3.353941576630653e-05, "loss": 0.5316, "step": 8238 }, { "epoch": 10.54592, "grad_norm": 0.8656241297721863, "learning_rate": 3.35374149659864e-05, "loss": 0.5515, "step": 8239 }, { "epoch": 10.5472, "grad_norm": 0.8878941535949707, "learning_rate": 3.353541416566627e-05, "loss": 0.5874, "step": 8240 }, { "epoch": 10.54848, "grad_norm": 0.900540828704834, "learning_rate": 3.3533413365346136e-05, "loss": 0.6261, "step": 8241 }, { "epoch": 10.54976, "grad_norm": 0.8868071436882019, "learning_rate": 3.353141256502601e-05, "loss": 0.5749, "step": 8242 }, { "epoch": 10.55104, "grad_norm": 0.9406156539916992, "learning_rate": 3.352941176470588e-05, "loss": 0.6071, "step": 8243 }, { "epoch": 10.55232, "grad_norm": 0.9124932289123535, "learning_rate": 3.352741096438576e-05, "loss": 0.587, "step": 8244 }, { "epoch": 10.5536, "grad_norm": 0.8528494834899902, "learning_rate": 3.352541016406563e-05, "loss": 0.5266, "step": 8245 }, { "epoch": 10.55488, "grad_norm": 0.8507541418075562, "learning_rate": 3.35234093637455e-05, "loss": 0.5906, "step": 8246 }, { "epoch": 10.55616, "grad_norm": 0.8238681554794312, "learning_rate": 3.3521408563425374e-05, "loss": 0.5421, "step": 8247 }, { "epoch": 10.55744, "grad_norm": 0.8529794812202454, "learning_rate": 3.3519407763105245e-05, "loss": 0.5699, "step": 8248 }, { "epoch": 10.55872, "grad_norm": 0.8948842883110046, "learning_rate": 3.351740696278511e-05, "loss": 0.5987, "step": 8249 }, { "epoch": 10.56, "grad_norm": 0.8481034636497498, "learning_rate": 3.351540616246498e-05, "loss": 0.5355, "step": 8250 }, { "epoch": 10.56128, "grad_norm": 0.900606632232666, "learning_rate": 3.351340536214486e-05, "loss": 0.6198, "step": 8251 }, { "epoch": 10.56256, "grad_norm": 0.8651003241539001, "learning_rate": 3.351140456182473e-05, "loss": 0.5792, "step": 8252 }, { "epoch": 10.56384, "grad_norm": 0.8270814418792725, "learning_rate": 3.3509403761504605e-05, "loss": 0.5138, "step": 8253 }, { "epoch": 10.56512, "grad_norm": 0.8760477900505066, "learning_rate": 3.350740296118448e-05, "loss": 0.5928, "step": 8254 }, { "epoch": 10.5664, "grad_norm": 0.889542818069458, "learning_rate": 3.350540216086435e-05, "loss": 0.6012, "step": 8255 }, { "epoch": 10.56768, "grad_norm": 0.8778030872344971, "learning_rate": 3.350340136054422e-05, "loss": 0.5747, "step": 8256 }, { "epoch": 10.56896, "grad_norm": 0.889215350151062, "learning_rate": 3.3501400560224085e-05, "loss": 0.5577, "step": 8257 }, { "epoch": 10.57024, "grad_norm": 0.8863917589187622, "learning_rate": 3.3499399759903964e-05, "loss": 0.5355, "step": 8258 }, { "epoch": 10.57152, "grad_norm": 0.8980902433395386, "learning_rate": 3.3497398959583836e-05, "loss": 0.5563, "step": 8259 }, { "epoch": 10.5728, "grad_norm": 0.8849428296089172, "learning_rate": 3.349539815926371e-05, "loss": 0.578, "step": 8260 }, { "epoch": 10.57408, "grad_norm": 0.8660392761230469, "learning_rate": 3.349339735894358e-05, "loss": 0.5187, "step": 8261 }, { "epoch": 10.57536, "grad_norm": 0.891211986541748, "learning_rate": 3.349139655862345e-05, "loss": 0.5778, "step": 8262 }, { "epoch": 10.57664, "grad_norm": 0.8924466967582703, "learning_rate": 3.348939575830332e-05, "loss": 0.5561, "step": 8263 }, { "epoch": 10.57792, "grad_norm": 0.9162275791168213, "learning_rate": 3.3487394957983195e-05, "loss": 0.6179, "step": 8264 }, { "epoch": 10.5792, "grad_norm": 0.9216055870056152, "learning_rate": 3.348539415766307e-05, "loss": 0.5975, "step": 8265 }, { "epoch": 10.58048, "grad_norm": 0.8706624507904053, "learning_rate": 3.348339335734294e-05, "loss": 0.5412, "step": 8266 }, { "epoch": 10.58176, "grad_norm": 0.8571588397026062, "learning_rate": 3.348139255702281e-05, "loss": 0.5807, "step": 8267 }, { "epoch": 10.58304, "grad_norm": 0.7969219088554382, "learning_rate": 3.347939175670268e-05, "loss": 0.5632, "step": 8268 }, { "epoch": 10.58432, "grad_norm": 0.8372544050216675, "learning_rate": 3.3477390956382554e-05, "loss": 0.5497, "step": 8269 }, { "epoch": 10.5856, "grad_norm": 0.8220162987709045, "learning_rate": 3.3475390156062426e-05, "loss": 0.5106, "step": 8270 }, { "epoch": 10.58688, "grad_norm": 0.8827033042907715, "learning_rate": 3.34733893557423e-05, "loss": 0.5885, "step": 8271 }, { "epoch": 10.58816, "grad_norm": 0.8495452404022217, "learning_rate": 3.347138855542217e-05, "loss": 0.565, "step": 8272 }, { "epoch": 10.58944, "grad_norm": 0.8994190692901611, "learning_rate": 3.346938775510204e-05, "loss": 0.5612, "step": 8273 }, { "epoch": 10.59072, "grad_norm": 0.8603847622871399, "learning_rate": 3.3467386954781914e-05, "loss": 0.5775, "step": 8274 }, { "epoch": 10.592, "grad_norm": 0.884907066822052, "learning_rate": 3.3465386154461786e-05, "loss": 0.5231, "step": 8275 }, { "epoch": 10.59328, "grad_norm": 0.9084495306015015, "learning_rate": 3.346338535414166e-05, "loss": 0.5845, "step": 8276 }, { "epoch": 10.59456, "grad_norm": 0.8667944669723511, "learning_rate": 3.346138455382153e-05, "loss": 0.5664, "step": 8277 }, { "epoch": 10.59584, "grad_norm": 0.8516685366630554, "learning_rate": 3.34593837535014e-05, "loss": 0.5483, "step": 8278 }, { "epoch": 10.59712, "grad_norm": 0.8369115591049194, "learning_rate": 3.345738295318128e-05, "loss": 0.505, "step": 8279 }, { "epoch": 10.5984, "grad_norm": 0.8858165740966797, "learning_rate": 3.3455382152861145e-05, "loss": 0.5714, "step": 8280 }, { "epoch": 10.59968, "grad_norm": 0.858709990978241, "learning_rate": 3.345338135254102e-05, "loss": 0.5921, "step": 8281 }, { "epoch": 10.60096, "grad_norm": 0.8412848114967346, "learning_rate": 3.345138055222089e-05, "loss": 0.5848, "step": 8282 }, { "epoch": 10.60224, "grad_norm": 0.8578768968582153, "learning_rate": 3.344937975190076e-05, "loss": 0.564, "step": 8283 }, { "epoch": 10.60352, "grad_norm": 0.8731163144111633, "learning_rate": 3.344737895158063e-05, "loss": 0.5947, "step": 8284 }, { "epoch": 10.604800000000001, "grad_norm": 0.8939322829246521, "learning_rate": 3.3445378151260504e-05, "loss": 0.6458, "step": 8285 }, { "epoch": 10.60608, "grad_norm": 0.8193719983100891, "learning_rate": 3.344337735094038e-05, "loss": 0.5601, "step": 8286 }, { "epoch": 10.60736, "grad_norm": 0.9018811583518982, "learning_rate": 3.3441376550620255e-05, "loss": 0.5914, "step": 8287 }, { "epoch": 10.60864, "grad_norm": 0.8852116465568542, "learning_rate": 3.343937575030012e-05, "loss": 0.5545, "step": 8288 }, { "epoch": 10.60992, "grad_norm": 0.880398690700531, "learning_rate": 3.343737494997999e-05, "loss": 0.5642, "step": 8289 }, { "epoch": 10.6112, "grad_norm": 0.8978055715560913, "learning_rate": 3.3435374149659863e-05, "loss": 0.5649, "step": 8290 }, { "epoch": 10.61248, "grad_norm": 0.8889503479003906, "learning_rate": 3.3433373349339735e-05, "loss": 0.6023, "step": 8291 }, { "epoch": 10.61376, "grad_norm": 0.8328986167907715, "learning_rate": 3.343137254901961e-05, "loss": 0.5532, "step": 8292 }, { "epoch": 10.61504, "grad_norm": 0.8667716979980469, "learning_rate": 3.3429371748699486e-05, "loss": 0.5735, "step": 8293 }, { "epoch": 10.61632, "grad_norm": 0.8680749535560608, "learning_rate": 3.342737094837936e-05, "loss": 0.5629, "step": 8294 }, { "epoch": 10.6176, "grad_norm": 0.8346037268638611, "learning_rate": 3.342537014805923e-05, "loss": 0.5411, "step": 8295 }, { "epoch": 10.61888, "grad_norm": 0.8703474402427673, "learning_rate": 3.3423369347739095e-05, "loss": 0.5365, "step": 8296 }, { "epoch": 10.62016, "grad_norm": 0.8694249391555786, "learning_rate": 3.3421368547418966e-05, "loss": 0.5453, "step": 8297 }, { "epoch": 10.62144, "grad_norm": 0.8796177506446838, "learning_rate": 3.341936774709884e-05, "loss": 0.5339, "step": 8298 }, { "epoch": 10.62272, "grad_norm": 0.8828015923500061, "learning_rate": 3.341736694677871e-05, "loss": 0.5838, "step": 8299 }, { "epoch": 10.624, "grad_norm": 0.9480679035186768, "learning_rate": 3.341536614645859e-05, "loss": 0.5491, "step": 8300 }, { "epoch": 10.62528, "grad_norm": 0.8716679811477661, "learning_rate": 3.341336534613846e-05, "loss": 0.5377, "step": 8301 }, { "epoch": 10.62656, "grad_norm": 0.886888861656189, "learning_rate": 3.341136454581833e-05, "loss": 0.5493, "step": 8302 }, { "epoch": 10.627839999999999, "grad_norm": 0.8986707925796509, "learning_rate": 3.3409363745498204e-05, "loss": 0.5805, "step": 8303 }, { "epoch": 10.62912, "grad_norm": 0.8717333078384399, "learning_rate": 3.340736294517807e-05, "loss": 0.5042, "step": 8304 }, { "epoch": 10.6304, "grad_norm": 0.8657734394073486, "learning_rate": 3.340536214485794e-05, "loss": 0.5976, "step": 8305 }, { "epoch": 10.63168, "grad_norm": 0.8697702288627625, "learning_rate": 3.340336134453781e-05, "loss": 0.5653, "step": 8306 }, { "epoch": 10.63296, "grad_norm": 0.9345014691352844, "learning_rate": 3.340136054421769e-05, "loss": 0.6546, "step": 8307 }, { "epoch": 10.63424, "grad_norm": 0.8431648015975952, "learning_rate": 3.3399359743897564e-05, "loss": 0.5652, "step": 8308 }, { "epoch": 10.63552, "grad_norm": 0.8228232264518738, "learning_rate": 3.3397358943577436e-05, "loss": 0.5742, "step": 8309 }, { "epoch": 10.636800000000001, "grad_norm": 0.8282391428947449, "learning_rate": 3.339535814325731e-05, "loss": 0.5692, "step": 8310 }, { "epoch": 10.63808, "grad_norm": 0.8612817525863647, "learning_rate": 3.339335734293718e-05, "loss": 0.5897, "step": 8311 }, { "epoch": 10.63936, "grad_norm": 0.8568882942199707, "learning_rate": 3.3391356542617044e-05, "loss": 0.5675, "step": 8312 }, { "epoch": 10.64064, "grad_norm": 0.8818540573120117, "learning_rate": 3.3389355742296916e-05, "loss": 0.5862, "step": 8313 }, { "epoch": 10.64192, "grad_norm": 0.859569787979126, "learning_rate": 3.3387354941976795e-05, "loss": 0.5456, "step": 8314 }, { "epoch": 10.6432, "grad_norm": 0.9021491408348083, "learning_rate": 3.338535414165667e-05, "loss": 0.6043, "step": 8315 }, { "epoch": 10.64448, "grad_norm": 0.8722798228263855, "learning_rate": 3.338335334133654e-05, "loss": 0.5441, "step": 8316 }, { "epoch": 10.64576, "grad_norm": 0.8792587518692017, "learning_rate": 3.338135254101641e-05, "loss": 0.5534, "step": 8317 }, { "epoch": 10.64704, "grad_norm": 0.8752608895301819, "learning_rate": 3.337935174069628e-05, "loss": 0.537, "step": 8318 }, { "epoch": 10.64832, "grad_norm": 0.8951905369758606, "learning_rate": 3.3377350940376154e-05, "loss": 0.6054, "step": 8319 }, { "epoch": 10.6496, "grad_norm": 0.8960387110710144, "learning_rate": 3.337535014005602e-05, "loss": 0.6164, "step": 8320 }, { "epoch": 10.65088, "grad_norm": 0.9029424786567688, "learning_rate": 3.33733493397359e-05, "loss": 0.5642, "step": 8321 }, { "epoch": 10.65216, "grad_norm": 0.8857460618019104, "learning_rate": 3.337134853941577e-05, "loss": 0.5833, "step": 8322 }, { "epoch": 10.65344, "grad_norm": 0.8373080492019653, "learning_rate": 3.336934773909564e-05, "loss": 0.5349, "step": 8323 }, { "epoch": 10.65472, "grad_norm": 0.8566232919692993, "learning_rate": 3.336734693877551e-05, "loss": 0.5648, "step": 8324 }, { "epoch": 10.656, "grad_norm": 0.894591212272644, "learning_rate": 3.3365346138455385e-05, "loss": 0.5989, "step": 8325 }, { "epoch": 10.65728, "grad_norm": 0.8619808554649353, "learning_rate": 3.336334533813526e-05, "loss": 0.535, "step": 8326 }, { "epoch": 10.65856, "grad_norm": 0.8474307060241699, "learning_rate": 3.336134453781513e-05, "loss": 0.5814, "step": 8327 }, { "epoch": 10.659839999999999, "grad_norm": 0.8520254492759705, "learning_rate": 3.3359343737494994e-05, "loss": 0.522, "step": 8328 }, { "epoch": 10.66112, "grad_norm": 0.8767806887626648, "learning_rate": 3.335734293717487e-05, "loss": 0.5766, "step": 8329 }, { "epoch": 10.6624, "grad_norm": 0.8880060911178589, "learning_rate": 3.3355342136854745e-05, "loss": 0.6238, "step": 8330 }, { "epoch": 10.66368, "grad_norm": 0.8470461964607239, "learning_rate": 3.3353341336534616e-05, "loss": 0.5878, "step": 8331 }, { "epoch": 10.66496, "grad_norm": 0.8690903782844543, "learning_rate": 3.335134053621449e-05, "loss": 0.6082, "step": 8332 }, { "epoch": 10.66624, "grad_norm": 0.8474588990211487, "learning_rate": 3.334933973589436e-05, "loss": 0.5873, "step": 8333 }, { "epoch": 10.66752, "grad_norm": 0.9330595135688782, "learning_rate": 3.334733893557423e-05, "loss": 0.6094, "step": 8334 }, { "epoch": 10.6688, "grad_norm": 0.8823012709617615, "learning_rate": 3.3345338135254104e-05, "loss": 0.5635, "step": 8335 }, { "epoch": 10.67008, "grad_norm": 0.8572724461555481, "learning_rate": 3.3343337334933976e-05, "loss": 0.5932, "step": 8336 }, { "epoch": 10.67136, "grad_norm": 0.8401781916618347, "learning_rate": 3.334133653461385e-05, "loss": 0.595, "step": 8337 }, { "epoch": 10.67264, "grad_norm": 0.841372549533844, "learning_rate": 3.333933573429372e-05, "loss": 0.5606, "step": 8338 }, { "epoch": 10.67392, "grad_norm": 0.8536092042922974, "learning_rate": 3.333733493397359e-05, "loss": 0.5635, "step": 8339 }, { "epoch": 10.6752, "grad_norm": 0.8724260330200195, "learning_rate": 3.333533413365346e-05, "loss": 0.5582, "step": 8340 }, { "epoch": 10.67648, "grad_norm": 0.8848631978034973, "learning_rate": 3.3333333333333335e-05, "loss": 0.5671, "step": 8341 }, { "epoch": 10.67776, "grad_norm": 0.8863515257835388, "learning_rate": 3.333133253301321e-05, "loss": 0.6179, "step": 8342 }, { "epoch": 10.67904, "grad_norm": 0.8705168962478638, "learning_rate": 3.332933173269308e-05, "loss": 0.5777, "step": 8343 }, { "epoch": 10.68032, "grad_norm": 0.845525324344635, "learning_rate": 3.332733093237295e-05, "loss": 0.5936, "step": 8344 }, { "epoch": 10.6816, "grad_norm": 0.9055159687995911, "learning_rate": 3.332533013205282e-05, "loss": 0.5915, "step": 8345 }, { "epoch": 10.68288, "grad_norm": 0.8923260569572449, "learning_rate": 3.3323329331732694e-05, "loss": 0.5929, "step": 8346 }, { "epoch": 10.68416, "grad_norm": 0.8504287600517273, "learning_rate": 3.3321328531412566e-05, "loss": 0.5632, "step": 8347 }, { "epoch": 10.68544, "grad_norm": 0.8821974992752075, "learning_rate": 3.331932773109244e-05, "loss": 0.5745, "step": 8348 }, { "epoch": 10.68672, "grad_norm": 0.8966748118400574, "learning_rate": 3.331732693077231e-05, "loss": 0.5867, "step": 8349 }, { "epoch": 10.688, "grad_norm": 0.8681886196136475, "learning_rate": 3.331532613045218e-05, "loss": 0.5741, "step": 8350 }, { "epoch": 10.68928, "grad_norm": 0.8703950643539429, "learning_rate": 3.3313325330132053e-05, "loss": 0.5322, "step": 8351 }, { "epoch": 10.69056, "grad_norm": 0.8685113787651062, "learning_rate": 3.3311324529811925e-05, "loss": 0.5438, "step": 8352 }, { "epoch": 10.69184, "grad_norm": 0.8648349642753601, "learning_rate": 3.33093237294918e-05, "loss": 0.5632, "step": 8353 }, { "epoch": 10.69312, "grad_norm": 0.906882107257843, "learning_rate": 3.330732292917167e-05, "loss": 0.5535, "step": 8354 }, { "epoch": 10.6944, "grad_norm": 0.8382284045219421, "learning_rate": 3.330532212885154e-05, "loss": 0.542, "step": 8355 }, { "epoch": 10.69568, "grad_norm": 0.90635746717453, "learning_rate": 3.330332132853141e-05, "loss": 0.588, "step": 8356 }, { "epoch": 10.69696, "grad_norm": 0.8943015336990356, "learning_rate": 3.330132052821129e-05, "loss": 0.5763, "step": 8357 }, { "epoch": 10.69824, "grad_norm": 0.8443808555603027, "learning_rate": 3.3299319727891156e-05, "loss": 0.4964, "step": 8358 }, { "epoch": 10.69952, "grad_norm": 0.8682435750961304, "learning_rate": 3.329731892757103e-05, "loss": 0.5687, "step": 8359 }, { "epoch": 10.7008, "grad_norm": 0.856095552444458, "learning_rate": 3.32953181272509e-05, "loss": 0.5713, "step": 8360 }, { "epoch": 10.70208, "grad_norm": 0.8573371171951294, "learning_rate": 3.329331732693077e-05, "loss": 0.5471, "step": 8361 }, { "epoch": 10.70336, "grad_norm": 0.8584128618240356, "learning_rate": 3.3291316526610644e-05, "loss": 0.545, "step": 8362 }, { "epoch": 10.70464, "grad_norm": 0.827085554599762, "learning_rate": 3.3289315726290516e-05, "loss": 0.5662, "step": 8363 }, { "epoch": 10.70592, "grad_norm": 0.852120578289032, "learning_rate": 3.3287314925970394e-05, "loss": 0.5583, "step": 8364 }, { "epoch": 10.7072, "grad_norm": 0.8772450685501099, "learning_rate": 3.3285314125650266e-05, "loss": 0.5958, "step": 8365 }, { "epoch": 10.70848, "grad_norm": 0.7987073063850403, "learning_rate": 3.328331332533013e-05, "loss": 0.5179, "step": 8366 }, { "epoch": 10.70976, "grad_norm": 0.8500874638557434, "learning_rate": 3.328131252501e-05, "loss": 0.5829, "step": 8367 }, { "epoch": 10.71104, "grad_norm": 0.8709688782691956, "learning_rate": 3.3279311724689875e-05, "loss": 0.5892, "step": 8368 }, { "epoch": 10.71232, "grad_norm": 0.8327182531356812, "learning_rate": 3.327731092436975e-05, "loss": 0.5589, "step": 8369 }, { "epoch": 10.7136, "grad_norm": 0.8227571845054626, "learning_rate": 3.327531012404962e-05, "loss": 0.5639, "step": 8370 }, { "epoch": 10.71488, "grad_norm": 0.8822687268257141, "learning_rate": 3.32733093237295e-05, "loss": 0.5801, "step": 8371 }, { "epoch": 10.71616, "grad_norm": 0.8199798464775085, "learning_rate": 3.327130852340937e-05, "loss": 0.5418, "step": 8372 }, { "epoch": 10.71744, "grad_norm": 0.877302885055542, "learning_rate": 3.326930772308924e-05, "loss": 0.5717, "step": 8373 }, { "epoch": 10.71872, "grad_norm": 0.874665379524231, "learning_rate": 3.3267306922769106e-05, "loss": 0.558, "step": 8374 }, { "epoch": 10.72, "grad_norm": 0.8584824800491333, "learning_rate": 3.326530612244898e-05, "loss": 0.5602, "step": 8375 }, { "epoch": 10.72128, "grad_norm": 0.8538774251937866, "learning_rate": 3.326330532212885e-05, "loss": 0.5614, "step": 8376 }, { "epoch": 10.72256, "grad_norm": 0.8546411991119385, "learning_rate": 3.326130452180872e-05, "loss": 0.6141, "step": 8377 }, { "epoch": 10.72384, "grad_norm": 0.8563506007194519, "learning_rate": 3.32593037214886e-05, "loss": 0.5317, "step": 8378 }, { "epoch": 10.72512, "grad_norm": 0.88761305809021, "learning_rate": 3.325730292116847e-05, "loss": 0.5783, "step": 8379 }, { "epoch": 10.7264, "grad_norm": 0.8833502531051636, "learning_rate": 3.3255302120848344e-05, "loss": 0.5822, "step": 8380 }, { "epoch": 10.72768, "grad_norm": 0.8859793543815613, "learning_rate": 3.3253301320528216e-05, "loss": 0.5652, "step": 8381 }, { "epoch": 10.72896, "grad_norm": 0.855240523815155, "learning_rate": 3.325130052020808e-05, "loss": 0.5449, "step": 8382 }, { "epoch": 10.73024, "grad_norm": 0.8389564156532288, "learning_rate": 3.324929971988795e-05, "loss": 0.5591, "step": 8383 }, { "epoch": 10.73152, "grad_norm": 0.8849216103553772, "learning_rate": 3.3247298919567825e-05, "loss": 0.5663, "step": 8384 }, { "epoch": 10.7328, "grad_norm": 0.8661803007125854, "learning_rate": 3.3245298119247703e-05, "loss": 0.59, "step": 8385 }, { "epoch": 10.73408, "grad_norm": 0.8539361953735352, "learning_rate": 3.3243297318927575e-05, "loss": 0.5407, "step": 8386 }, { "epoch": 10.73536, "grad_norm": 0.9314575791358948, "learning_rate": 3.324129651860745e-05, "loss": 0.6113, "step": 8387 }, { "epoch": 10.73664, "grad_norm": 0.8329246044158936, "learning_rate": 3.323929571828732e-05, "loss": 0.5317, "step": 8388 }, { "epoch": 10.73792, "grad_norm": 0.8746315240859985, "learning_rate": 3.323729491796719e-05, "loss": 0.6104, "step": 8389 }, { "epoch": 10.7392, "grad_norm": 0.8568903803825378, "learning_rate": 3.3235294117647056e-05, "loss": 0.6033, "step": 8390 }, { "epoch": 10.74048, "grad_norm": 0.8712742924690247, "learning_rate": 3.323329331732693e-05, "loss": 0.5644, "step": 8391 }, { "epoch": 10.74176, "grad_norm": 0.8397703170776367, "learning_rate": 3.3231292517006806e-05, "loss": 0.5512, "step": 8392 }, { "epoch": 10.74304, "grad_norm": 0.9338214993476868, "learning_rate": 3.322929171668668e-05, "loss": 0.6489, "step": 8393 }, { "epoch": 10.74432, "grad_norm": 0.8752598762512207, "learning_rate": 3.322729091636655e-05, "loss": 0.5417, "step": 8394 }, { "epoch": 10.7456, "grad_norm": 0.8597055077552795, "learning_rate": 3.322529011604642e-05, "loss": 0.5591, "step": 8395 }, { "epoch": 10.74688, "grad_norm": 0.9333181381225586, "learning_rate": 3.3223289315726294e-05, "loss": 0.5923, "step": 8396 }, { "epoch": 10.74816, "grad_norm": 0.8479388356208801, "learning_rate": 3.3221288515406166e-05, "loss": 0.5427, "step": 8397 }, { "epoch": 10.74944, "grad_norm": 0.8617178201675415, "learning_rate": 3.321928771508603e-05, "loss": 0.5473, "step": 8398 }, { "epoch": 10.75072, "grad_norm": 0.9343951344490051, "learning_rate": 3.321728691476591e-05, "loss": 0.6115, "step": 8399 }, { "epoch": 10.752, "grad_norm": 0.9055424928665161, "learning_rate": 3.321528611444578e-05, "loss": 0.5647, "step": 8400 }, { "epoch": 10.75328, "grad_norm": 0.9039073586463928, "learning_rate": 3.321328531412565e-05, "loss": 0.5382, "step": 8401 }, { "epoch": 10.75456, "grad_norm": 0.8732048869132996, "learning_rate": 3.3211284513805525e-05, "loss": 0.5488, "step": 8402 }, { "epoch": 10.75584, "grad_norm": 0.859381914138794, "learning_rate": 3.32092837134854e-05, "loss": 0.6105, "step": 8403 }, { "epoch": 10.75712, "grad_norm": 0.8635976910591125, "learning_rate": 3.320728291316527e-05, "loss": 0.5932, "step": 8404 }, { "epoch": 10.7584, "grad_norm": 0.8763898611068726, "learning_rate": 3.320528211284514e-05, "loss": 0.5703, "step": 8405 }, { "epoch": 10.75968, "grad_norm": 0.828618049621582, "learning_rate": 3.320328131252501e-05, "loss": 0.5675, "step": 8406 }, { "epoch": 10.76096, "grad_norm": 0.894950807094574, "learning_rate": 3.3201280512204884e-05, "loss": 0.5832, "step": 8407 }, { "epoch": 10.76224, "grad_norm": 0.8377487659454346, "learning_rate": 3.3199279711884756e-05, "loss": 0.5568, "step": 8408 }, { "epoch": 10.76352, "grad_norm": 0.8179535269737244, "learning_rate": 3.319727891156463e-05, "loss": 0.4754, "step": 8409 }, { "epoch": 10.7648, "grad_norm": 0.9088795185089111, "learning_rate": 3.31952781112445e-05, "loss": 0.6044, "step": 8410 }, { "epoch": 10.76608, "grad_norm": 0.9149488806724548, "learning_rate": 3.319327731092437e-05, "loss": 0.577, "step": 8411 }, { "epoch": 10.76736, "grad_norm": 0.9027832746505737, "learning_rate": 3.3191276510604244e-05, "loss": 0.5866, "step": 8412 }, { "epoch": 10.76864, "grad_norm": 0.86416095495224, "learning_rate": 3.3189275710284115e-05, "loss": 0.6003, "step": 8413 }, { "epoch": 10.76992, "grad_norm": 0.8570460677146912, "learning_rate": 3.318727490996399e-05, "loss": 0.562, "step": 8414 }, { "epoch": 10.7712, "grad_norm": 0.9128470420837402, "learning_rate": 3.318527410964386e-05, "loss": 0.5915, "step": 8415 }, { "epoch": 10.77248, "grad_norm": 0.8856536746025085, "learning_rate": 3.318327330932373e-05, "loss": 0.5406, "step": 8416 }, { "epoch": 10.77376, "grad_norm": 0.8812784552574158, "learning_rate": 3.31812725090036e-05, "loss": 0.5233, "step": 8417 }, { "epoch": 10.77504, "grad_norm": 0.8757975697517395, "learning_rate": 3.3179271708683475e-05, "loss": 0.5947, "step": 8418 }, { "epoch": 10.77632, "grad_norm": 0.8653748035430908, "learning_rate": 3.3177270908363347e-05, "loss": 0.5823, "step": 8419 }, { "epoch": 10.7776, "grad_norm": 0.9042747020721436, "learning_rate": 3.317527010804322e-05, "loss": 0.5583, "step": 8420 }, { "epoch": 10.778880000000001, "grad_norm": 0.8836710453033447, "learning_rate": 3.317326930772309e-05, "loss": 0.563, "step": 8421 }, { "epoch": 10.78016, "grad_norm": 0.8876301050186157, "learning_rate": 3.317126850740296e-05, "loss": 0.5814, "step": 8422 }, { "epoch": 10.78144, "grad_norm": 0.8692103028297424, "learning_rate": 3.3169267707082834e-05, "loss": 0.6207, "step": 8423 }, { "epoch": 10.78272, "grad_norm": 0.8895595669746399, "learning_rate": 3.3167266906762706e-05, "loss": 0.566, "step": 8424 }, { "epoch": 10.784, "grad_norm": 0.8768266439437866, "learning_rate": 3.316526610644258e-05, "loss": 0.6218, "step": 8425 }, { "epoch": 10.78528, "grad_norm": 0.8848718404769897, "learning_rate": 3.316326530612245e-05, "loss": 0.5943, "step": 8426 }, { "epoch": 10.78656, "grad_norm": 0.9084532260894775, "learning_rate": 3.316126450580233e-05, "loss": 0.6096, "step": 8427 }, { "epoch": 10.78784, "grad_norm": 0.9083433747291565, "learning_rate": 3.315926370548219e-05, "loss": 0.5836, "step": 8428 }, { "epoch": 10.78912, "grad_norm": 0.8622574806213379, "learning_rate": 3.3157262905162065e-05, "loss": 0.5541, "step": 8429 }, { "epoch": 10.7904, "grad_norm": 0.8447574973106384, "learning_rate": 3.315526210484194e-05, "loss": 0.5544, "step": 8430 }, { "epoch": 10.79168, "grad_norm": 0.8831068873405457, "learning_rate": 3.315326130452181e-05, "loss": 0.6255, "step": 8431 }, { "epoch": 10.79296, "grad_norm": 0.8802228569984436, "learning_rate": 3.315126050420168e-05, "loss": 0.6056, "step": 8432 }, { "epoch": 10.79424, "grad_norm": 0.8990511298179626, "learning_rate": 3.314925970388155e-05, "loss": 0.6257, "step": 8433 }, { "epoch": 10.79552, "grad_norm": 0.8636667728424072, "learning_rate": 3.3147258903561424e-05, "loss": 0.5596, "step": 8434 }, { "epoch": 10.7968, "grad_norm": 0.8775346279144287, "learning_rate": 3.31452581032413e-05, "loss": 0.5596, "step": 8435 }, { "epoch": 10.79808, "grad_norm": 0.8895041942596436, "learning_rate": 3.314325730292117e-05, "loss": 0.5812, "step": 8436 }, { "epoch": 10.79936, "grad_norm": 0.8645225167274475, "learning_rate": 3.314125650260104e-05, "loss": 0.566, "step": 8437 }, { "epoch": 10.80064, "grad_norm": 0.9078739285469055, "learning_rate": 3.313925570228091e-05, "loss": 0.5976, "step": 8438 }, { "epoch": 10.801919999999999, "grad_norm": 0.8740204572677612, "learning_rate": 3.3137254901960784e-05, "loss": 0.5386, "step": 8439 }, { "epoch": 10.8032, "grad_norm": 0.8575959205627441, "learning_rate": 3.3135254101640656e-05, "loss": 0.5371, "step": 8440 }, { "epoch": 10.80448, "grad_norm": 0.8617302179336548, "learning_rate": 3.313325330132053e-05, "loss": 0.5515, "step": 8441 }, { "epoch": 10.80576, "grad_norm": 0.8753884434700012, "learning_rate": 3.3131252501000406e-05, "loss": 0.5821, "step": 8442 }, { "epoch": 10.80704, "grad_norm": 0.8543848991394043, "learning_rate": 3.312925170068028e-05, "loss": 0.5948, "step": 8443 }, { "epoch": 10.80832, "grad_norm": 0.8967148661613464, "learning_rate": 3.312725090036014e-05, "loss": 0.6083, "step": 8444 }, { "epoch": 10.8096, "grad_norm": 0.8336367011070251, "learning_rate": 3.3125250100040015e-05, "loss": 0.5127, "step": 8445 }, { "epoch": 10.810880000000001, "grad_norm": 0.8468740582466125, "learning_rate": 3.312324929971989e-05, "loss": 0.5522, "step": 8446 }, { "epoch": 10.81216, "grad_norm": 0.8677701354026794, "learning_rate": 3.312124849939976e-05, "loss": 0.5554, "step": 8447 }, { "epoch": 10.81344, "grad_norm": 0.8767905235290527, "learning_rate": 3.311924769907963e-05, "loss": 0.5572, "step": 8448 }, { "epoch": 10.81472, "grad_norm": 0.8894631266593933, "learning_rate": 3.311724689875951e-05, "loss": 0.6002, "step": 8449 }, { "epoch": 10.816, "grad_norm": 0.8528721332550049, "learning_rate": 3.311524609843938e-05, "loss": 0.5644, "step": 8450 }, { "epoch": 10.81728, "grad_norm": 0.8421396017074585, "learning_rate": 3.311324529811925e-05, "loss": 0.5392, "step": 8451 }, { "epoch": 10.81856, "grad_norm": 0.8507756590843201, "learning_rate": 3.311124449779912e-05, "loss": 0.5513, "step": 8452 }, { "epoch": 10.81984, "grad_norm": 0.8500514626502991, "learning_rate": 3.310924369747899e-05, "loss": 0.6198, "step": 8453 }, { "epoch": 10.82112, "grad_norm": 0.8615267872810364, "learning_rate": 3.310724289715886e-05, "loss": 0.5162, "step": 8454 }, { "epoch": 10.8224, "grad_norm": 0.8663539290428162, "learning_rate": 3.310524209683873e-05, "loss": 0.5811, "step": 8455 }, { "epoch": 10.82368, "grad_norm": 0.8774354457855225, "learning_rate": 3.310324129651861e-05, "loss": 0.6085, "step": 8456 }, { "epoch": 10.82496, "grad_norm": 0.9009751081466675, "learning_rate": 3.3101240496198484e-05, "loss": 0.5154, "step": 8457 }, { "epoch": 10.82624, "grad_norm": 0.9249484539031982, "learning_rate": 3.3099239695878356e-05, "loss": 0.6029, "step": 8458 }, { "epoch": 10.82752, "grad_norm": 0.8451379537582397, "learning_rate": 3.309723889555823e-05, "loss": 0.5601, "step": 8459 }, { "epoch": 10.8288, "grad_norm": 0.905768871307373, "learning_rate": 3.309523809523809e-05, "loss": 0.5652, "step": 8460 }, { "epoch": 10.83008, "grad_norm": 0.9199037551879883, "learning_rate": 3.3093237294917965e-05, "loss": 0.6089, "step": 8461 }, { "epoch": 10.83136, "grad_norm": 0.8506740927696228, "learning_rate": 3.3091236494597836e-05, "loss": 0.5242, "step": 8462 }, { "epoch": 10.83264, "grad_norm": 0.9114420413970947, "learning_rate": 3.3089235694277715e-05, "loss": 0.6026, "step": 8463 }, { "epoch": 10.833919999999999, "grad_norm": 0.8378769755363464, "learning_rate": 3.308723489395759e-05, "loss": 0.5428, "step": 8464 }, { "epoch": 10.8352, "grad_norm": 0.863136887550354, "learning_rate": 3.308523409363746e-05, "loss": 0.5569, "step": 8465 }, { "epoch": 10.83648, "grad_norm": 0.9558273553848267, "learning_rate": 3.308323329331733e-05, "loss": 0.6102, "step": 8466 }, { "epoch": 10.83776, "grad_norm": 0.860105037689209, "learning_rate": 3.30812324929972e-05, "loss": 0.5872, "step": 8467 }, { "epoch": 10.83904, "grad_norm": 0.8709381222724915, "learning_rate": 3.307923169267707e-05, "loss": 0.5548, "step": 8468 }, { "epoch": 10.84032, "grad_norm": 0.8705414533615112, "learning_rate": 3.307723089235694e-05, "loss": 0.5317, "step": 8469 }, { "epoch": 10.8416, "grad_norm": 0.8116998672485352, "learning_rate": 3.307523009203682e-05, "loss": 0.5055, "step": 8470 }, { "epoch": 10.84288, "grad_norm": 0.9043382406234741, "learning_rate": 3.307322929171669e-05, "loss": 0.5995, "step": 8471 }, { "epoch": 10.84416, "grad_norm": 0.9392409920692444, "learning_rate": 3.307122849139656e-05, "loss": 0.6222, "step": 8472 }, { "epoch": 10.84544, "grad_norm": 0.8821365833282471, "learning_rate": 3.3069227691076434e-05, "loss": 0.5192, "step": 8473 }, { "epoch": 10.84672, "grad_norm": 0.885803759098053, "learning_rate": 3.3067226890756305e-05, "loss": 0.6162, "step": 8474 }, { "epoch": 10.848, "grad_norm": 0.8577411770820618, "learning_rate": 3.306522609043618e-05, "loss": 0.5609, "step": 8475 }, { "epoch": 10.84928, "grad_norm": 0.8510453104972839, "learning_rate": 3.306322529011604e-05, "loss": 0.5378, "step": 8476 }, { "epoch": 10.85056, "grad_norm": 0.8527485728263855, "learning_rate": 3.306122448979592e-05, "loss": 0.558, "step": 8477 }, { "epoch": 10.85184, "grad_norm": 0.8647722601890564, "learning_rate": 3.305922368947579e-05, "loss": 0.5802, "step": 8478 }, { "epoch": 10.85312, "grad_norm": 0.8825690746307373, "learning_rate": 3.3057222889155665e-05, "loss": 0.5959, "step": 8479 }, { "epoch": 10.8544, "grad_norm": 0.8674288392066956, "learning_rate": 3.3055222088835537e-05, "loss": 0.5621, "step": 8480 }, { "epoch": 10.85568, "grad_norm": 0.8390064239501953, "learning_rate": 3.305322128851541e-05, "loss": 0.5452, "step": 8481 }, { "epoch": 10.85696, "grad_norm": 0.8398364186286926, "learning_rate": 3.305122048819528e-05, "loss": 0.5649, "step": 8482 }, { "epoch": 10.85824, "grad_norm": 0.7934215664863586, "learning_rate": 3.304921968787515e-05, "loss": 0.5222, "step": 8483 }, { "epoch": 10.85952, "grad_norm": 0.8218225240707397, "learning_rate": 3.3047218887555024e-05, "loss": 0.5509, "step": 8484 }, { "epoch": 10.8608, "grad_norm": 0.8352612257003784, "learning_rate": 3.3045218087234896e-05, "loss": 0.564, "step": 8485 }, { "epoch": 10.86208, "grad_norm": 0.8846644759178162, "learning_rate": 3.304321728691477e-05, "loss": 0.5925, "step": 8486 }, { "epoch": 10.86336, "grad_norm": 0.8664666414260864, "learning_rate": 3.304121648659464e-05, "loss": 0.573, "step": 8487 }, { "epoch": 10.86464, "grad_norm": 0.8052683472633362, "learning_rate": 3.303921568627451e-05, "loss": 0.5172, "step": 8488 }, { "epoch": 10.86592, "grad_norm": 0.8650311827659607, "learning_rate": 3.303721488595438e-05, "loss": 0.5449, "step": 8489 }, { "epoch": 10.8672, "grad_norm": 0.8478010296821594, "learning_rate": 3.3035214085634255e-05, "loss": 0.5509, "step": 8490 }, { "epoch": 10.86848, "grad_norm": 0.8610655665397644, "learning_rate": 3.303321328531413e-05, "loss": 0.5669, "step": 8491 }, { "epoch": 10.86976, "grad_norm": 0.8204941153526306, "learning_rate": 3.3031212484994e-05, "loss": 0.5607, "step": 8492 }, { "epoch": 10.87104, "grad_norm": 0.8423011302947998, "learning_rate": 3.302921168467387e-05, "loss": 0.5431, "step": 8493 }, { "epoch": 10.87232, "grad_norm": 0.8661743998527527, "learning_rate": 3.302721088435374e-05, "loss": 0.5609, "step": 8494 }, { "epoch": 10.8736, "grad_norm": 0.8518742918968201, "learning_rate": 3.3025210084033614e-05, "loss": 0.5223, "step": 8495 }, { "epoch": 10.87488, "grad_norm": 0.9045655131340027, "learning_rate": 3.3023209283713486e-05, "loss": 0.6088, "step": 8496 }, { "epoch": 10.87616, "grad_norm": 0.8613651990890503, "learning_rate": 3.302120848339336e-05, "loss": 0.5155, "step": 8497 }, { "epoch": 10.87744, "grad_norm": 0.8599672317504883, "learning_rate": 3.301920768307323e-05, "loss": 0.5743, "step": 8498 }, { "epoch": 10.87872, "grad_norm": 0.8626627922058105, "learning_rate": 3.30172068827531e-05, "loss": 0.5878, "step": 8499 }, { "epoch": 10.88, "grad_norm": 0.879513144493103, "learning_rate": 3.3015206082432974e-05, "loss": 0.5954, "step": 8500 }, { "epoch": 10.88128, "grad_norm": 0.8912549614906311, "learning_rate": 3.3013205282112846e-05, "loss": 0.585, "step": 8501 }, { "epoch": 10.88256, "grad_norm": 0.8111445903778076, "learning_rate": 3.301120448179272e-05, "loss": 0.5391, "step": 8502 }, { "epoch": 10.88384, "grad_norm": 0.8916809558868408, "learning_rate": 3.300920368147259e-05, "loss": 0.6075, "step": 8503 }, { "epoch": 10.88512, "grad_norm": 0.8808960318565369, "learning_rate": 3.300720288115246e-05, "loss": 0.5037, "step": 8504 }, { "epoch": 10.8864, "grad_norm": 0.8821715116500854, "learning_rate": 3.300520208083234e-05, "loss": 0.566, "step": 8505 }, { "epoch": 10.88768, "grad_norm": 0.8724703192710876, "learning_rate": 3.3003201280512205e-05, "loss": 0.5715, "step": 8506 }, { "epoch": 10.88896, "grad_norm": 0.8815937638282776, "learning_rate": 3.300120048019208e-05, "loss": 0.5345, "step": 8507 }, { "epoch": 10.89024, "grad_norm": 0.8287563920021057, "learning_rate": 3.299919967987195e-05, "loss": 0.551, "step": 8508 }, { "epoch": 10.89152, "grad_norm": 0.9234015941619873, "learning_rate": 3.299719887955182e-05, "loss": 0.6083, "step": 8509 }, { "epoch": 10.8928, "grad_norm": 0.8626712560653687, "learning_rate": 3.299519807923169e-05, "loss": 0.6007, "step": 8510 }, { "epoch": 10.89408, "grad_norm": 0.8290971517562866, "learning_rate": 3.2993197278911564e-05, "loss": 0.5234, "step": 8511 }, { "epoch": 10.89536, "grad_norm": 0.8788360357284546, "learning_rate": 3.299119647859144e-05, "loss": 0.6131, "step": 8512 }, { "epoch": 10.89664, "grad_norm": 0.8960014581680298, "learning_rate": 3.2989195678271315e-05, "loss": 0.6024, "step": 8513 }, { "epoch": 10.89792, "grad_norm": 0.8566673398017883, "learning_rate": 3.298719487795118e-05, "loss": 0.5515, "step": 8514 }, { "epoch": 10.8992, "grad_norm": 0.8075211048126221, "learning_rate": 3.298519407763105e-05, "loss": 0.516, "step": 8515 }, { "epoch": 10.90048, "grad_norm": 0.9277984499931335, "learning_rate": 3.2983193277310923e-05, "loss": 0.6206, "step": 8516 }, { "epoch": 10.90176, "grad_norm": 0.8750211596488953, "learning_rate": 3.2981192476990795e-05, "loss": 0.5697, "step": 8517 }, { "epoch": 10.90304, "grad_norm": 0.8238059282302856, "learning_rate": 3.297919167667067e-05, "loss": 0.541, "step": 8518 }, { "epoch": 10.90432, "grad_norm": 0.8247233629226685, "learning_rate": 3.2977190876350546e-05, "loss": 0.5711, "step": 8519 }, { "epoch": 10.9056, "grad_norm": 0.880142092704773, "learning_rate": 3.297519007603042e-05, "loss": 0.581, "step": 8520 }, { "epoch": 10.90688, "grad_norm": 0.9421783089637756, "learning_rate": 3.297318927571029e-05, "loss": 0.6042, "step": 8521 }, { "epoch": 10.90816, "grad_norm": 0.8348096609115601, "learning_rate": 3.2971188475390155e-05, "loss": 0.5075, "step": 8522 }, { "epoch": 10.90944, "grad_norm": 0.8904414772987366, "learning_rate": 3.2969187675070026e-05, "loss": 0.6436, "step": 8523 }, { "epoch": 10.91072, "grad_norm": 0.927683413028717, "learning_rate": 3.29671868747499e-05, "loss": 0.6266, "step": 8524 }, { "epoch": 10.912, "grad_norm": 0.8911395072937012, "learning_rate": 3.296518607442977e-05, "loss": 0.6085, "step": 8525 }, { "epoch": 10.91328, "grad_norm": 0.8726085424423218, "learning_rate": 3.296318527410965e-05, "loss": 0.5798, "step": 8526 }, { "epoch": 10.91456, "grad_norm": 0.8867003321647644, "learning_rate": 3.296118447378952e-05, "loss": 0.5685, "step": 8527 }, { "epoch": 10.91584, "grad_norm": 0.877424418926239, "learning_rate": 3.295918367346939e-05, "loss": 0.5986, "step": 8528 }, { "epoch": 10.91712, "grad_norm": 0.8679222464561462, "learning_rate": 3.2957182873149264e-05, "loss": 0.5735, "step": 8529 }, { "epoch": 10.9184, "grad_norm": 0.8904789090156555, "learning_rate": 3.295518207282913e-05, "loss": 0.567, "step": 8530 }, { "epoch": 10.91968, "grad_norm": 0.8913958668708801, "learning_rate": 3.2953181272509e-05, "loss": 0.57, "step": 8531 }, { "epoch": 10.920960000000001, "grad_norm": 0.8957118391990662, "learning_rate": 3.295118047218887e-05, "loss": 0.5692, "step": 8532 }, { "epoch": 10.92224, "grad_norm": 0.8906075358390808, "learning_rate": 3.294917967186875e-05, "loss": 0.5645, "step": 8533 }, { "epoch": 10.92352, "grad_norm": 0.8821467757225037, "learning_rate": 3.2947178871548624e-05, "loss": 0.5799, "step": 8534 }, { "epoch": 10.9248, "grad_norm": 0.8894703984260559, "learning_rate": 3.2945178071228495e-05, "loss": 0.6076, "step": 8535 }, { "epoch": 10.92608, "grad_norm": 0.8252606987953186, "learning_rate": 3.294317727090837e-05, "loss": 0.5762, "step": 8536 }, { "epoch": 10.92736, "grad_norm": 0.8460403680801392, "learning_rate": 3.294117647058824e-05, "loss": 0.5853, "step": 8537 }, { "epoch": 10.92864, "grad_norm": 0.8825834393501282, "learning_rate": 3.2939175670268104e-05, "loss": 0.5628, "step": 8538 }, { "epoch": 10.92992, "grad_norm": 0.8889744877815247, "learning_rate": 3.2937174869947976e-05, "loss": 0.549, "step": 8539 }, { "epoch": 10.9312, "grad_norm": 0.9132182002067566, "learning_rate": 3.2935174069627855e-05, "loss": 0.5821, "step": 8540 }, { "epoch": 10.93248, "grad_norm": 0.8521642684936523, "learning_rate": 3.2933173269307727e-05, "loss": 0.5378, "step": 8541 }, { "epoch": 10.93376, "grad_norm": 0.8864872455596924, "learning_rate": 3.29311724689876e-05, "loss": 0.5589, "step": 8542 }, { "epoch": 10.93504, "grad_norm": 0.8883858919143677, "learning_rate": 3.292917166866747e-05, "loss": 0.5716, "step": 8543 }, { "epoch": 10.93632, "grad_norm": 0.862106442451477, "learning_rate": 3.292717086834734e-05, "loss": 0.5487, "step": 8544 }, { "epoch": 10.9376, "grad_norm": 0.9219768643379211, "learning_rate": 3.2925170068027214e-05, "loss": 0.5911, "step": 8545 }, { "epoch": 10.93888, "grad_norm": 0.9102250337600708, "learning_rate": 3.292316926770708e-05, "loss": 0.5712, "step": 8546 }, { "epoch": 10.94016, "grad_norm": 0.8525840640068054, "learning_rate": 3.292116846738695e-05, "loss": 0.5885, "step": 8547 }, { "epoch": 10.94144, "grad_norm": 0.907727837562561, "learning_rate": 3.291916766706683e-05, "loss": 0.6011, "step": 8548 }, { "epoch": 10.94272, "grad_norm": 0.8702722191810608, "learning_rate": 3.29171668667467e-05, "loss": 0.564, "step": 8549 }, { "epoch": 10.943999999999999, "grad_norm": 0.8578301668167114, "learning_rate": 3.291516606642657e-05, "loss": 0.5763, "step": 8550 }, { "epoch": 10.94528, "grad_norm": 0.9135550260543823, "learning_rate": 3.2913165266106445e-05, "loss": 0.6035, "step": 8551 }, { "epoch": 10.94656, "grad_norm": 0.8668347001075745, "learning_rate": 3.291116446578632e-05, "loss": 0.5407, "step": 8552 }, { "epoch": 10.94784, "grad_norm": 0.8684234619140625, "learning_rate": 3.290916366546619e-05, "loss": 0.5913, "step": 8553 }, { "epoch": 10.94912, "grad_norm": 0.8825991749763489, "learning_rate": 3.2907162865146054e-05, "loss": 0.6135, "step": 8554 }, { "epoch": 10.9504, "grad_norm": 0.9004281163215637, "learning_rate": 3.290516206482593e-05, "loss": 0.6063, "step": 8555 }, { "epoch": 10.95168, "grad_norm": 0.8196146488189697, "learning_rate": 3.2903161264505804e-05, "loss": 0.5095, "step": 8556 }, { "epoch": 10.952960000000001, "grad_norm": 0.8968302011489868, "learning_rate": 3.2901160464185676e-05, "loss": 0.6126, "step": 8557 }, { "epoch": 10.95424, "grad_norm": 0.8384824991226196, "learning_rate": 3.289915966386555e-05, "loss": 0.5662, "step": 8558 }, { "epoch": 10.95552, "grad_norm": 0.9103649258613586, "learning_rate": 3.289715886354542e-05, "loss": 0.5788, "step": 8559 }, { "epoch": 10.9568, "grad_norm": 0.8952497243881226, "learning_rate": 3.289515806322529e-05, "loss": 0.5809, "step": 8560 }, { "epoch": 10.95808, "grad_norm": 0.8801275491714478, "learning_rate": 3.2893157262905164e-05, "loss": 0.5753, "step": 8561 }, { "epoch": 10.95936, "grad_norm": 0.9225260615348816, "learning_rate": 3.2891156462585036e-05, "loss": 0.601, "step": 8562 }, { "epoch": 10.96064, "grad_norm": 0.8567471504211426, "learning_rate": 3.288915566226491e-05, "loss": 0.554, "step": 8563 }, { "epoch": 10.96192, "grad_norm": 0.8874462842941284, "learning_rate": 3.288715486194478e-05, "loss": 0.5497, "step": 8564 }, { "epoch": 10.9632, "grad_norm": 0.916522741317749, "learning_rate": 3.288515406162465e-05, "loss": 0.6097, "step": 8565 }, { "epoch": 10.96448, "grad_norm": 0.890692412853241, "learning_rate": 3.288315326130452e-05, "loss": 0.5895, "step": 8566 }, { "epoch": 10.96576, "grad_norm": 0.8528810739517212, "learning_rate": 3.2881152460984395e-05, "loss": 0.5664, "step": 8567 }, { "epoch": 10.96704, "grad_norm": 0.8634024858474731, "learning_rate": 3.287915166066427e-05, "loss": 0.598, "step": 8568 }, { "epoch": 10.96832, "grad_norm": 0.8372824788093567, "learning_rate": 3.287715086034414e-05, "loss": 0.5313, "step": 8569 }, { "epoch": 10.9696, "grad_norm": 0.8880471587181091, "learning_rate": 3.287515006002401e-05, "loss": 0.5818, "step": 8570 }, { "epoch": 10.97088, "grad_norm": 0.8914214372634888, "learning_rate": 3.287314925970388e-05, "loss": 0.5708, "step": 8571 }, { "epoch": 10.97216, "grad_norm": 0.8861146569252014, "learning_rate": 3.2871148459383754e-05, "loss": 0.5029, "step": 8572 }, { "epoch": 10.97344, "grad_norm": 0.9242550730705261, "learning_rate": 3.2869147659063626e-05, "loss": 0.5788, "step": 8573 }, { "epoch": 10.97472, "grad_norm": 0.8695968389511108, "learning_rate": 3.28671468587435e-05, "loss": 0.5828, "step": 8574 }, { "epoch": 10.975999999999999, "grad_norm": 0.8502150177955627, "learning_rate": 3.286514605842337e-05, "loss": 0.5377, "step": 8575 }, { "epoch": 10.97728, "grad_norm": 0.8788812160491943, "learning_rate": 3.286314525810325e-05, "loss": 0.5599, "step": 8576 }, { "epoch": 10.97856, "grad_norm": 0.9098305106163025, "learning_rate": 3.2861144457783113e-05, "loss": 0.5665, "step": 8577 }, { "epoch": 10.97984, "grad_norm": 0.9447456002235413, "learning_rate": 3.2859143657462985e-05, "loss": 0.5948, "step": 8578 }, { "epoch": 10.98112, "grad_norm": 0.9016488194465637, "learning_rate": 3.285714285714286e-05, "loss": 0.5681, "step": 8579 }, { "epoch": 10.9824, "grad_norm": 0.8744751811027527, "learning_rate": 3.285514205682273e-05, "loss": 0.5673, "step": 8580 }, { "epoch": 10.98368, "grad_norm": 0.8944303393363953, "learning_rate": 3.28531412565026e-05, "loss": 0.5808, "step": 8581 }, { "epoch": 10.98496, "grad_norm": 0.8822252750396729, "learning_rate": 3.285114045618247e-05, "loss": 0.5607, "step": 8582 }, { "epoch": 10.98624, "grad_norm": 0.8522640466690063, "learning_rate": 3.284913965586235e-05, "loss": 0.5971, "step": 8583 }, { "epoch": 10.98752, "grad_norm": 0.874326765537262, "learning_rate": 3.284713885554222e-05, "loss": 0.6094, "step": 8584 }, { "epoch": 10.9888, "grad_norm": 0.9082483053207397, "learning_rate": 3.284513805522209e-05, "loss": 0.5866, "step": 8585 }, { "epoch": 10.99008, "grad_norm": 0.8725292086601257, "learning_rate": 3.284313725490196e-05, "loss": 0.6204, "step": 8586 }, { "epoch": 10.99136, "grad_norm": 0.8516504168510437, "learning_rate": 3.284113645458183e-05, "loss": 0.5942, "step": 8587 }, { "epoch": 10.99264, "grad_norm": 0.8405760526657104, "learning_rate": 3.2839135654261704e-05, "loss": 0.6035, "step": 8588 }, { "epoch": 10.99392, "grad_norm": 0.8437188863754272, "learning_rate": 3.2837134853941576e-05, "loss": 0.5071, "step": 8589 }, { "epoch": 10.9952, "grad_norm": 0.877285897731781, "learning_rate": 3.2835134053621454e-05, "loss": 0.5761, "step": 8590 }, { "epoch": 10.99648, "grad_norm": 0.8691837191581726, "learning_rate": 3.2833133253301326e-05, "loss": 0.6239, "step": 8591 }, { "epoch": 10.99776, "grad_norm": 0.8213376402854919, "learning_rate": 3.28311324529812e-05, "loss": 0.5429, "step": 8592 }, { "epoch": 10.99904, "grad_norm": 0.8809442520141602, "learning_rate": 3.282913165266106e-05, "loss": 0.5888, "step": 8593 }, { "epoch": 11.00032, "grad_norm": 1.9187315702438354, "learning_rate": 3.2827130852340935e-05, "loss": 1.0473, "step": 8594 }, { "epoch": 11.0016, "grad_norm": 0.9014028310775757, "learning_rate": 3.282513005202081e-05, "loss": 0.587, "step": 8595 }, { "epoch": 11.00288, "grad_norm": 0.8248893022537231, "learning_rate": 3.282312925170068e-05, "loss": 0.5534, "step": 8596 }, { "epoch": 11.00416, "grad_norm": 0.8243193626403809, "learning_rate": 3.282112845138056e-05, "loss": 0.5334, "step": 8597 }, { "epoch": 11.00544, "grad_norm": 0.8348228931427002, "learning_rate": 3.281912765106043e-05, "loss": 0.5808, "step": 8598 }, { "epoch": 11.00672, "grad_norm": 0.8530436754226685, "learning_rate": 3.28171268507403e-05, "loss": 0.5644, "step": 8599 }, { "epoch": 11.008, "grad_norm": 0.8829576969146729, "learning_rate": 3.281512605042017e-05, "loss": 0.5946, "step": 8600 }, { "epoch": 11.00928, "grad_norm": 0.8608317971229553, "learning_rate": 3.281312525010004e-05, "loss": 0.5479, "step": 8601 }, { "epoch": 11.01056, "grad_norm": 0.8292250037193298, "learning_rate": 3.281112444977991e-05, "loss": 0.5278, "step": 8602 }, { "epoch": 11.01184, "grad_norm": 0.8977212905883789, "learning_rate": 3.280912364945978e-05, "loss": 0.6198, "step": 8603 }, { "epoch": 11.01312, "grad_norm": 0.866463840007782, "learning_rate": 3.280712284913966e-05, "loss": 0.5493, "step": 8604 }, { "epoch": 11.0144, "grad_norm": 0.9034777879714966, "learning_rate": 3.280512204881953e-05, "loss": 0.5457, "step": 8605 }, { "epoch": 11.01568, "grad_norm": 0.9087190628051758, "learning_rate": 3.2803121248499404e-05, "loss": 0.5557, "step": 8606 }, { "epoch": 11.01696, "grad_norm": 0.8566074967384338, "learning_rate": 3.2801120448179276e-05, "loss": 0.5722, "step": 8607 }, { "epoch": 11.01824, "grad_norm": 0.8810505270957947, "learning_rate": 3.279911964785915e-05, "loss": 0.5842, "step": 8608 }, { "epoch": 11.01952, "grad_norm": 0.8587162494659424, "learning_rate": 3.279711884753901e-05, "loss": 0.4996, "step": 8609 }, { "epoch": 11.0208, "grad_norm": 0.8678077459335327, "learning_rate": 3.2795118047218885e-05, "loss": 0.5706, "step": 8610 }, { "epoch": 11.02208, "grad_norm": 0.8607105612754822, "learning_rate": 3.279311724689876e-05, "loss": 0.548, "step": 8611 }, { "epoch": 11.02336, "grad_norm": 0.8053077459335327, "learning_rate": 3.2791116446578635e-05, "loss": 0.5258, "step": 8612 }, { "epoch": 11.02464, "grad_norm": 0.8794430494308472, "learning_rate": 3.278911564625851e-05, "loss": 0.5362, "step": 8613 }, { "epoch": 11.02592, "grad_norm": 0.8815926313400269, "learning_rate": 3.278711484593838e-05, "loss": 0.5306, "step": 8614 }, { "epoch": 11.0272, "grad_norm": 0.8498107194900513, "learning_rate": 3.278511404561825e-05, "loss": 0.5102, "step": 8615 }, { "epoch": 11.02848, "grad_norm": 0.907062292098999, "learning_rate": 3.278311324529812e-05, "loss": 0.5752, "step": 8616 }, { "epoch": 11.02976, "grad_norm": 0.8796223998069763, "learning_rate": 3.278111244497799e-05, "loss": 0.5416, "step": 8617 }, { "epoch": 11.03104, "grad_norm": 0.9161748290061951, "learning_rate": 3.2779111644657866e-05, "loss": 0.5676, "step": 8618 }, { "epoch": 11.03232, "grad_norm": 0.8488770127296448, "learning_rate": 3.277711084433774e-05, "loss": 0.5681, "step": 8619 }, { "epoch": 11.0336, "grad_norm": 0.9294846057891846, "learning_rate": 3.277511004401761e-05, "loss": 0.5358, "step": 8620 }, { "epoch": 11.03488, "grad_norm": 0.8868327736854553, "learning_rate": 3.277310924369748e-05, "loss": 0.4881, "step": 8621 }, { "epoch": 11.03616, "grad_norm": 0.8899376392364502, "learning_rate": 3.2771108443377354e-05, "loss": 0.533, "step": 8622 }, { "epoch": 11.03744, "grad_norm": 0.8953622579574585, "learning_rate": 3.2769107643057226e-05, "loss": 0.5389, "step": 8623 }, { "epoch": 11.03872, "grad_norm": 0.9292336106300354, "learning_rate": 3.27671068427371e-05, "loss": 0.606, "step": 8624 }, { "epoch": 11.04, "grad_norm": 0.9286232590675354, "learning_rate": 3.276510604241697e-05, "loss": 0.5897, "step": 8625 }, { "epoch": 11.04128, "grad_norm": 0.9098997712135315, "learning_rate": 3.276310524209684e-05, "loss": 0.5316, "step": 8626 }, { "epoch": 11.04256, "grad_norm": 0.9626008868217468, "learning_rate": 3.276110444177671e-05, "loss": 0.6035, "step": 8627 }, { "epoch": 11.04384, "grad_norm": 0.8677993416786194, "learning_rate": 3.2759103641456585e-05, "loss": 0.4975, "step": 8628 }, { "epoch": 11.04512, "grad_norm": 0.8999881148338318, "learning_rate": 3.275710284113646e-05, "loss": 0.5494, "step": 8629 }, { "epoch": 11.0464, "grad_norm": 0.8779304623603821, "learning_rate": 3.275510204081633e-05, "loss": 0.5525, "step": 8630 }, { "epoch": 11.04768, "grad_norm": 0.9179341793060303, "learning_rate": 3.27531012404962e-05, "loss": 0.5784, "step": 8631 }, { "epoch": 11.04896, "grad_norm": 0.9072324633598328, "learning_rate": 3.275110044017607e-05, "loss": 0.548, "step": 8632 }, { "epoch": 11.05024, "grad_norm": 0.9047489166259766, "learning_rate": 3.2749099639855944e-05, "loss": 0.5374, "step": 8633 }, { "epoch": 11.05152, "grad_norm": 0.9042777419090271, "learning_rate": 3.2747098839535816e-05, "loss": 0.6008, "step": 8634 }, { "epoch": 11.0528, "grad_norm": 0.913502037525177, "learning_rate": 3.274509803921569e-05, "loss": 0.6139, "step": 8635 }, { "epoch": 11.05408, "grad_norm": 0.8988363146781921, "learning_rate": 3.274309723889556e-05, "loss": 0.5735, "step": 8636 }, { "epoch": 11.05536, "grad_norm": 0.9248592257499695, "learning_rate": 3.274109643857543e-05, "loss": 0.6024, "step": 8637 }, { "epoch": 11.05664, "grad_norm": 0.8595258593559265, "learning_rate": 3.2739095638255303e-05, "loss": 0.5686, "step": 8638 }, { "epoch": 11.05792, "grad_norm": 0.917407751083374, "learning_rate": 3.2737094837935175e-05, "loss": 0.5686, "step": 8639 }, { "epoch": 11.0592, "grad_norm": 0.8839895725250244, "learning_rate": 3.273509403761505e-05, "loss": 0.5334, "step": 8640 }, { "epoch": 11.06048, "grad_norm": 0.8676347136497498, "learning_rate": 3.273309323729492e-05, "loss": 0.5396, "step": 8641 }, { "epoch": 11.06176, "grad_norm": 0.880967915058136, "learning_rate": 3.273109243697479e-05, "loss": 0.514, "step": 8642 }, { "epoch": 11.06304, "grad_norm": 0.9367587566375732, "learning_rate": 3.272909163665466e-05, "loss": 0.5731, "step": 8643 }, { "epoch": 11.06432, "grad_norm": 0.8796616196632385, "learning_rate": 3.2727090836334535e-05, "loss": 0.5887, "step": 8644 }, { "epoch": 11.0656, "grad_norm": 0.8505174517631531, "learning_rate": 3.2725090036014406e-05, "loss": 0.5534, "step": 8645 }, { "epoch": 11.06688, "grad_norm": 0.8879563808441162, "learning_rate": 3.2723089235694285e-05, "loss": 0.6019, "step": 8646 }, { "epoch": 11.06816, "grad_norm": 0.876809298992157, "learning_rate": 3.272108843537415e-05, "loss": 0.5354, "step": 8647 }, { "epoch": 11.06944, "grad_norm": 0.8911082148551941, "learning_rate": 3.271908763505402e-05, "loss": 0.5154, "step": 8648 }, { "epoch": 11.07072, "grad_norm": 0.9009726643562317, "learning_rate": 3.2717086834733894e-05, "loss": 0.5128, "step": 8649 }, { "epoch": 11.072, "grad_norm": 0.8957163691520691, "learning_rate": 3.2715086034413766e-05, "loss": 0.5322, "step": 8650 }, { "epoch": 11.07328, "grad_norm": 0.866563618183136, "learning_rate": 3.271308523409364e-05, "loss": 0.5464, "step": 8651 }, { "epoch": 11.07456, "grad_norm": 0.9009678959846497, "learning_rate": 3.271108443377351e-05, "loss": 0.536, "step": 8652 }, { "epoch": 11.07584, "grad_norm": 0.9172965288162231, "learning_rate": 3.270908363345339e-05, "loss": 0.5667, "step": 8653 }, { "epoch": 11.07712, "grad_norm": 0.9287314414978027, "learning_rate": 3.270708283313326e-05, "loss": 0.5507, "step": 8654 }, { "epoch": 11.0784, "grad_norm": 0.9205261468887329, "learning_rate": 3.2705082032813125e-05, "loss": 0.5491, "step": 8655 }, { "epoch": 11.07968, "grad_norm": 0.8569768667221069, "learning_rate": 3.2703081232493e-05, "loss": 0.5158, "step": 8656 }, { "epoch": 11.08096, "grad_norm": 0.9120553731918335, "learning_rate": 3.270108043217287e-05, "loss": 0.5894, "step": 8657 }, { "epoch": 11.08224, "grad_norm": 0.9037409424781799, "learning_rate": 3.269907963185274e-05, "loss": 0.5456, "step": 8658 }, { "epoch": 11.08352, "grad_norm": 0.9417985081672668, "learning_rate": 3.269707883153261e-05, "loss": 0.5491, "step": 8659 }, { "epoch": 11.0848, "grad_norm": 0.8706283569335938, "learning_rate": 3.2695078031212484e-05, "loss": 0.5391, "step": 8660 }, { "epoch": 11.08608, "grad_norm": 0.8442224860191345, "learning_rate": 3.269307723089236e-05, "loss": 0.5301, "step": 8661 }, { "epoch": 11.08736, "grad_norm": 0.8892581462860107, "learning_rate": 3.2691076430572235e-05, "loss": 0.5239, "step": 8662 }, { "epoch": 11.08864, "grad_norm": 0.8483731746673584, "learning_rate": 3.26890756302521e-05, "loss": 0.5079, "step": 8663 }, { "epoch": 11.08992, "grad_norm": 0.8920766711235046, "learning_rate": 3.268707482993197e-05, "loss": 0.5682, "step": 8664 }, { "epoch": 11.0912, "grad_norm": 0.8351317048072815, "learning_rate": 3.2685074029611844e-05, "loss": 0.5189, "step": 8665 }, { "epoch": 11.09248, "grad_norm": 0.9149410724639893, "learning_rate": 3.2683073229291715e-05, "loss": 0.5959, "step": 8666 }, { "epoch": 11.09376, "grad_norm": 0.8564448356628418, "learning_rate": 3.268107242897159e-05, "loss": 0.5124, "step": 8667 }, { "epoch": 11.09504, "grad_norm": 0.8573926687240601, "learning_rate": 3.2679071628651466e-05, "loss": 0.5366, "step": 8668 }, { "epoch": 11.09632, "grad_norm": 0.8379302620887756, "learning_rate": 3.267707082833134e-05, "loss": 0.5218, "step": 8669 }, { "epoch": 11.0976, "grad_norm": 0.8685084581375122, "learning_rate": 3.267507002801121e-05, "loss": 0.5322, "step": 8670 }, { "epoch": 11.09888, "grad_norm": 0.8932461738586426, "learning_rate": 3.2673069227691075e-05, "loss": 0.59, "step": 8671 }, { "epoch": 11.10016, "grad_norm": 0.8610022664070129, "learning_rate": 3.267106842737095e-05, "loss": 0.5244, "step": 8672 }, { "epoch": 11.10144, "grad_norm": 0.8831630349159241, "learning_rate": 3.266906762705082e-05, "loss": 0.609, "step": 8673 }, { "epoch": 11.10272, "grad_norm": 0.899043619632721, "learning_rate": 3.266706682673069e-05, "loss": 0.552, "step": 8674 }, { "epoch": 11.104, "grad_norm": 0.9248834848403931, "learning_rate": 3.266506602641057e-05, "loss": 0.5851, "step": 8675 }, { "epoch": 11.10528, "grad_norm": 0.8615071177482605, "learning_rate": 3.266306522609044e-05, "loss": 0.5664, "step": 8676 }, { "epoch": 11.10656, "grad_norm": 0.882163405418396, "learning_rate": 3.266106442577031e-05, "loss": 0.5823, "step": 8677 }, { "epoch": 11.10784, "grad_norm": 0.8114548921585083, "learning_rate": 3.2659063625450185e-05, "loss": 0.5001, "step": 8678 }, { "epoch": 11.10912, "grad_norm": 0.9176467657089233, "learning_rate": 3.265706282513005e-05, "loss": 0.566, "step": 8679 }, { "epoch": 11.1104, "grad_norm": 0.867563009262085, "learning_rate": 3.265506202480992e-05, "loss": 0.5233, "step": 8680 }, { "epoch": 11.11168, "grad_norm": 0.8672628402709961, "learning_rate": 3.265306122448979e-05, "loss": 0.5412, "step": 8681 }, { "epoch": 11.11296, "grad_norm": 0.9063531160354614, "learning_rate": 3.265106042416967e-05, "loss": 0.5753, "step": 8682 }, { "epoch": 11.11424, "grad_norm": 0.86628258228302, "learning_rate": 3.2649059623849544e-05, "loss": 0.5739, "step": 8683 }, { "epoch": 11.11552, "grad_norm": 0.8533140420913696, "learning_rate": 3.2647058823529416e-05, "loss": 0.555, "step": 8684 }, { "epoch": 11.1168, "grad_norm": 0.8930099606513977, "learning_rate": 3.264505802320929e-05, "loss": 0.5193, "step": 8685 }, { "epoch": 11.11808, "grad_norm": 0.8562926650047302, "learning_rate": 3.264305722288916e-05, "loss": 0.5209, "step": 8686 }, { "epoch": 11.11936, "grad_norm": 0.8850371241569519, "learning_rate": 3.2641056422569024e-05, "loss": 0.5525, "step": 8687 }, { "epoch": 11.12064, "grad_norm": 0.932697594165802, "learning_rate": 3.2639055622248896e-05, "loss": 0.5613, "step": 8688 }, { "epoch": 11.12192, "grad_norm": 0.8724458813667297, "learning_rate": 3.2637054821928775e-05, "loss": 0.5146, "step": 8689 }, { "epoch": 11.1232, "grad_norm": 0.8727782368659973, "learning_rate": 3.263505402160865e-05, "loss": 0.5339, "step": 8690 }, { "epoch": 11.12448, "grad_norm": 0.916933536529541, "learning_rate": 3.263305322128852e-05, "loss": 0.5998, "step": 8691 }, { "epoch": 11.12576, "grad_norm": 0.875407338142395, "learning_rate": 3.263105242096839e-05, "loss": 0.5418, "step": 8692 }, { "epoch": 11.12704, "grad_norm": 0.8921369910240173, "learning_rate": 3.262905162064826e-05, "loss": 0.5805, "step": 8693 }, { "epoch": 11.12832, "grad_norm": 0.9058687090873718, "learning_rate": 3.2627050820328134e-05, "loss": 0.5751, "step": 8694 }, { "epoch": 11.1296, "grad_norm": 0.9492460489273071, "learning_rate": 3.2625050020008e-05, "loss": 0.5572, "step": 8695 }, { "epoch": 11.13088, "grad_norm": 0.9078418016433716, "learning_rate": 3.262304921968788e-05, "loss": 0.5816, "step": 8696 }, { "epoch": 11.13216, "grad_norm": 0.8583430051803589, "learning_rate": 3.262104841936775e-05, "loss": 0.5219, "step": 8697 }, { "epoch": 11.13344, "grad_norm": 0.9013339281082153, "learning_rate": 3.261904761904762e-05, "loss": 0.5521, "step": 8698 }, { "epoch": 11.13472, "grad_norm": 0.9228495955467224, "learning_rate": 3.2617046818727494e-05, "loss": 0.6093, "step": 8699 }, { "epoch": 11.136, "grad_norm": 0.895261287689209, "learning_rate": 3.2615046018407365e-05, "loss": 0.5856, "step": 8700 }, { "epoch": 11.13728, "grad_norm": 0.9044747948646545, "learning_rate": 3.261304521808724e-05, "loss": 0.5621, "step": 8701 }, { "epoch": 11.13856, "grad_norm": 0.9378688335418701, "learning_rate": 3.261104441776711e-05, "loss": 0.6047, "step": 8702 }, { "epoch": 11.13984, "grad_norm": 0.941558301448822, "learning_rate": 3.260904361744698e-05, "loss": 0.5467, "step": 8703 }, { "epoch": 11.14112, "grad_norm": 0.9449127912521362, "learning_rate": 3.260704281712685e-05, "loss": 0.5336, "step": 8704 }, { "epoch": 11.1424, "grad_norm": 0.8693781495094299, "learning_rate": 3.2605042016806725e-05, "loss": 0.522, "step": 8705 }, { "epoch": 11.14368, "grad_norm": 0.8828169107437134, "learning_rate": 3.2603041216486597e-05, "loss": 0.557, "step": 8706 }, { "epoch": 11.14496, "grad_norm": 0.9226519465446472, "learning_rate": 3.260104041616647e-05, "loss": 0.5246, "step": 8707 }, { "epoch": 11.14624, "grad_norm": 0.9169709086418152, "learning_rate": 3.259903961584634e-05, "loss": 0.58, "step": 8708 }, { "epoch": 11.14752, "grad_norm": 0.92818683385849, "learning_rate": 3.259703881552621e-05, "loss": 0.5937, "step": 8709 }, { "epoch": 11.1488, "grad_norm": 0.9431915879249573, "learning_rate": 3.2595038015206084e-05, "loss": 0.5702, "step": 8710 }, { "epoch": 11.150079999999999, "grad_norm": 0.8843738436698914, "learning_rate": 3.2593037214885956e-05, "loss": 0.5305, "step": 8711 }, { "epoch": 11.15136, "grad_norm": 0.8925315737724304, "learning_rate": 3.259103641456583e-05, "loss": 0.5114, "step": 8712 }, { "epoch": 11.15264, "grad_norm": 0.8767265677452087, "learning_rate": 3.25890356142457e-05, "loss": 0.5199, "step": 8713 }, { "epoch": 11.15392, "grad_norm": 0.8850144743919373, "learning_rate": 3.258703481392557e-05, "loss": 0.5388, "step": 8714 }, { "epoch": 11.1552, "grad_norm": 0.8759915232658386, "learning_rate": 3.258503401360544e-05, "loss": 0.5628, "step": 8715 }, { "epoch": 11.15648, "grad_norm": 0.8618879318237305, "learning_rate": 3.2583033213285315e-05, "loss": 0.5094, "step": 8716 }, { "epoch": 11.15776, "grad_norm": 0.9156239628791809, "learning_rate": 3.258103241296519e-05, "loss": 0.562, "step": 8717 }, { "epoch": 11.15904, "grad_norm": 0.8669613599777222, "learning_rate": 3.257903161264506e-05, "loss": 0.5478, "step": 8718 }, { "epoch": 11.16032, "grad_norm": 0.9046533107757568, "learning_rate": 3.257703081232493e-05, "loss": 0.6049, "step": 8719 }, { "epoch": 11.1616, "grad_norm": 0.9057307243347168, "learning_rate": 3.25750300120048e-05, "loss": 0.5175, "step": 8720 }, { "epoch": 11.16288, "grad_norm": 0.8702899813652039, "learning_rate": 3.2573029211684674e-05, "loss": 0.5061, "step": 8721 }, { "epoch": 11.16416, "grad_norm": 0.8357275724411011, "learning_rate": 3.2571028411364546e-05, "loss": 0.5112, "step": 8722 }, { "epoch": 11.16544, "grad_norm": 0.9093285799026489, "learning_rate": 3.256902761104442e-05, "loss": 0.5853, "step": 8723 }, { "epoch": 11.16672, "grad_norm": 0.8935590386390686, "learning_rate": 3.25670268107243e-05, "loss": 0.5726, "step": 8724 }, { "epoch": 11.168, "grad_norm": 0.8492559194564819, "learning_rate": 3.256502601040416e-05, "loss": 0.522, "step": 8725 }, { "epoch": 11.16928, "grad_norm": 0.8798608779907227, "learning_rate": 3.2563025210084034e-05, "loss": 0.5661, "step": 8726 }, { "epoch": 11.17056, "grad_norm": 0.8920543193817139, "learning_rate": 3.2561024409763906e-05, "loss": 0.5535, "step": 8727 }, { "epoch": 11.17184, "grad_norm": 0.8616754412651062, "learning_rate": 3.255902360944378e-05, "loss": 0.5294, "step": 8728 }, { "epoch": 11.17312, "grad_norm": 0.8341376185417175, "learning_rate": 3.255702280912365e-05, "loss": 0.4899, "step": 8729 }, { "epoch": 11.1744, "grad_norm": 0.8843587636947632, "learning_rate": 3.255502200880352e-05, "loss": 0.5537, "step": 8730 }, { "epoch": 11.17568, "grad_norm": 0.8609508872032166, "learning_rate": 3.25530212084834e-05, "loss": 0.5028, "step": 8731 }, { "epoch": 11.17696, "grad_norm": 0.8849518299102783, "learning_rate": 3.255102040816327e-05, "loss": 0.5173, "step": 8732 }, { "epoch": 11.17824, "grad_norm": 0.874070942401886, "learning_rate": 3.254901960784314e-05, "loss": 0.5143, "step": 8733 }, { "epoch": 11.17952, "grad_norm": 0.9157163500785828, "learning_rate": 3.254701880752301e-05, "loss": 0.5429, "step": 8734 }, { "epoch": 11.1808, "grad_norm": 0.90493243932724, "learning_rate": 3.254501800720288e-05, "loss": 0.5794, "step": 8735 }, { "epoch": 11.18208, "grad_norm": 0.9423990249633789, "learning_rate": 3.254301720688275e-05, "loss": 0.5889, "step": 8736 }, { "epoch": 11.18336, "grad_norm": 0.950400710105896, "learning_rate": 3.2541016406562624e-05, "loss": 0.5909, "step": 8737 }, { "epoch": 11.18464, "grad_norm": 0.946723997592926, "learning_rate": 3.25390156062425e-05, "loss": 0.5764, "step": 8738 }, { "epoch": 11.18592, "grad_norm": 0.9119811058044434, "learning_rate": 3.2537014805922375e-05, "loss": 0.5859, "step": 8739 }, { "epoch": 11.1872, "grad_norm": 0.8999208807945251, "learning_rate": 3.2535014005602246e-05, "loss": 0.5725, "step": 8740 }, { "epoch": 11.18848, "grad_norm": 0.8947361707687378, "learning_rate": 3.253301320528211e-05, "loss": 0.5432, "step": 8741 }, { "epoch": 11.18976, "grad_norm": 0.9105396270751953, "learning_rate": 3.253101240496198e-05, "loss": 0.5896, "step": 8742 }, { "epoch": 11.19104, "grad_norm": 0.9032384753227234, "learning_rate": 3.2529011604641855e-05, "loss": 0.5516, "step": 8743 }, { "epoch": 11.19232, "grad_norm": 0.966087281703949, "learning_rate": 3.252701080432173e-05, "loss": 0.5906, "step": 8744 }, { "epoch": 11.1936, "grad_norm": 0.9482411742210388, "learning_rate": 3.2525010004001606e-05, "loss": 0.597, "step": 8745 }, { "epoch": 11.19488, "grad_norm": 0.8811086416244507, "learning_rate": 3.252300920368148e-05, "loss": 0.54, "step": 8746 }, { "epoch": 11.19616, "grad_norm": 0.8933411240577698, "learning_rate": 3.252100840336135e-05, "loss": 0.5465, "step": 8747 }, { "epoch": 11.19744, "grad_norm": 0.8633106350898743, "learning_rate": 3.251900760304122e-05, "loss": 0.5178, "step": 8748 }, { "epoch": 11.19872, "grad_norm": 0.8998020887374878, "learning_rate": 3.2517006802721086e-05, "loss": 0.5437, "step": 8749 }, { "epoch": 11.2, "grad_norm": 0.9244217276573181, "learning_rate": 3.251500600240096e-05, "loss": 0.6415, "step": 8750 }, { "epoch": 11.20128, "grad_norm": 0.9325177073478699, "learning_rate": 3.251300520208083e-05, "loss": 0.6122, "step": 8751 }, { "epoch": 11.20256, "grad_norm": 0.9503816962242126, "learning_rate": 3.251100440176071e-05, "loss": 0.5848, "step": 8752 }, { "epoch": 11.20384, "grad_norm": 0.9142863154411316, "learning_rate": 3.250900360144058e-05, "loss": 0.5554, "step": 8753 }, { "epoch": 11.20512, "grad_norm": 0.8959869742393494, "learning_rate": 3.250700280112045e-05, "loss": 0.6131, "step": 8754 }, { "epoch": 11.2064, "grad_norm": 0.9044204354286194, "learning_rate": 3.2505002000800324e-05, "loss": 0.5885, "step": 8755 }, { "epoch": 11.20768, "grad_norm": 0.9072441458702087, "learning_rate": 3.2503001200480196e-05, "loss": 0.5396, "step": 8756 }, { "epoch": 11.20896, "grad_norm": 0.8490614295005798, "learning_rate": 3.250100040016006e-05, "loss": 0.5353, "step": 8757 }, { "epoch": 11.21024, "grad_norm": 0.9115065336227417, "learning_rate": 3.249899959983993e-05, "loss": 0.5137, "step": 8758 }, { "epoch": 11.21152, "grad_norm": 0.8661530017852783, "learning_rate": 3.249699879951981e-05, "loss": 0.4879, "step": 8759 }, { "epoch": 11.2128, "grad_norm": 0.8535701036453247, "learning_rate": 3.2494997999199684e-05, "loss": 0.5335, "step": 8760 }, { "epoch": 11.21408, "grad_norm": 0.9392260313034058, "learning_rate": 3.2492997198879555e-05, "loss": 0.5892, "step": 8761 }, { "epoch": 11.21536, "grad_norm": 0.861599862575531, "learning_rate": 3.249099639855943e-05, "loss": 0.5172, "step": 8762 }, { "epoch": 11.21664, "grad_norm": 0.8953681588172913, "learning_rate": 3.24889955982393e-05, "loss": 0.5388, "step": 8763 }, { "epoch": 11.21792, "grad_norm": 0.9016774892807007, "learning_rate": 3.248699479791917e-05, "loss": 0.5661, "step": 8764 }, { "epoch": 11.2192, "grad_norm": 0.8995947241783142, "learning_rate": 3.2484993997599036e-05, "loss": 0.5247, "step": 8765 }, { "epoch": 11.22048, "grad_norm": 0.9032994508743286, "learning_rate": 3.2482993197278915e-05, "loss": 0.5577, "step": 8766 }, { "epoch": 11.22176, "grad_norm": 0.934908390045166, "learning_rate": 3.2480992396958787e-05, "loss": 0.6034, "step": 8767 }, { "epoch": 11.22304, "grad_norm": 0.8706194758415222, "learning_rate": 3.247899159663866e-05, "loss": 0.5512, "step": 8768 }, { "epoch": 11.22432, "grad_norm": 0.8422589302062988, "learning_rate": 3.247699079631853e-05, "loss": 0.5348, "step": 8769 }, { "epoch": 11.2256, "grad_norm": 0.8409394025802612, "learning_rate": 3.24749899959984e-05, "loss": 0.545, "step": 8770 }, { "epoch": 11.22688, "grad_norm": 0.9218395948410034, "learning_rate": 3.2472989195678274e-05, "loss": 0.6042, "step": 8771 }, { "epoch": 11.22816, "grad_norm": 0.8771111965179443, "learning_rate": 3.2470988395358146e-05, "loss": 0.5602, "step": 8772 }, { "epoch": 11.22944, "grad_norm": 0.8485012054443359, "learning_rate": 3.246898759503801e-05, "loss": 0.5159, "step": 8773 }, { "epoch": 11.23072, "grad_norm": 0.9108744263648987, "learning_rate": 3.246698679471789e-05, "loss": 0.5771, "step": 8774 }, { "epoch": 11.232, "grad_norm": 0.8793061971664429, "learning_rate": 3.246498599439776e-05, "loss": 0.5697, "step": 8775 }, { "epoch": 11.23328, "grad_norm": 0.8831557631492615, "learning_rate": 3.246298519407763e-05, "loss": 0.5513, "step": 8776 }, { "epoch": 11.23456, "grad_norm": 0.9184591770172119, "learning_rate": 3.2460984393757505e-05, "loss": 0.5967, "step": 8777 }, { "epoch": 11.23584, "grad_norm": 0.9362067580223083, "learning_rate": 3.245898359343738e-05, "loss": 0.5762, "step": 8778 }, { "epoch": 11.23712, "grad_norm": 0.9164170026779175, "learning_rate": 3.245698279311725e-05, "loss": 0.5353, "step": 8779 }, { "epoch": 11.2384, "grad_norm": 0.878761351108551, "learning_rate": 3.245498199279712e-05, "loss": 0.5491, "step": 8780 }, { "epoch": 11.23968, "grad_norm": 0.828332245349884, "learning_rate": 3.245298119247699e-05, "loss": 0.505, "step": 8781 }, { "epoch": 11.24096, "grad_norm": 0.9329909682273865, "learning_rate": 3.2450980392156864e-05, "loss": 0.5247, "step": 8782 }, { "epoch": 11.24224, "grad_norm": 0.9171594977378845, "learning_rate": 3.2448979591836736e-05, "loss": 0.5647, "step": 8783 }, { "epoch": 11.24352, "grad_norm": 0.829479455947876, "learning_rate": 3.244697879151661e-05, "loss": 0.4752, "step": 8784 }, { "epoch": 11.2448, "grad_norm": 0.8708236813545227, "learning_rate": 3.244497799119648e-05, "loss": 0.5795, "step": 8785 }, { "epoch": 11.24608, "grad_norm": 0.8896415829658508, "learning_rate": 3.244297719087635e-05, "loss": 0.5318, "step": 8786 }, { "epoch": 11.24736, "grad_norm": 0.9375750422477722, "learning_rate": 3.2440976390556224e-05, "loss": 0.612, "step": 8787 }, { "epoch": 11.24864, "grad_norm": 0.9271597862243652, "learning_rate": 3.2438975590236096e-05, "loss": 0.5192, "step": 8788 }, { "epoch": 11.24992, "grad_norm": 0.894335925579071, "learning_rate": 3.243697478991597e-05, "loss": 0.6151, "step": 8789 }, { "epoch": 11.2512, "grad_norm": 0.9000656008720398, "learning_rate": 3.243497398959584e-05, "loss": 0.5774, "step": 8790 }, { "epoch": 11.25248, "grad_norm": 0.8798941969871521, "learning_rate": 3.243297318927571e-05, "loss": 0.5594, "step": 8791 }, { "epoch": 11.25376, "grad_norm": 0.8531872034072876, "learning_rate": 3.243097238895558e-05, "loss": 0.5439, "step": 8792 }, { "epoch": 11.25504, "grad_norm": 0.9348339438438416, "learning_rate": 3.2428971588635455e-05, "loss": 0.5349, "step": 8793 }, { "epoch": 11.25632, "grad_norm": 0.8829602003097534, "learning_rate": 3.242697078831533e-05, "loss": 0.5285, "step": 8794 }, { "epoch": 11.2576, "grad_norm": 0.9161067605018616, "learning_rate": 3.24249699879952e-05, "loss": 0.5868, "step": 8795 }, { "epoch": 11.25888, "grad_norm": 0.8983628153800964, "learning_rate": 3.242296918767507e-05, "loss": 0.5213, "step": 8796 }, { "epoch": 11.26016, "grad_norm": 0.9068379998207092, "learning_rate": 3.242096838735494e-05, "loss": 0.5583, "step": 8797 }, { "epoch": 11.26144, "grad_norm": 0.9481262564659119, "learning_rate": 3.2418967587034814e-05, "loss": 0.5554, "step": 8798 }, { "epoch": 11.26272, "grad_norm": 0.8567753434181213, "learning_rate": 3.2416966786714686e-05, "loss": 0.4905, "step": 8799 }, { "epoch": 11.264, "grad_norm": 0.9208746552467346, "learning_rate": 3.241496598639456e-05, "loss": 0.5791, "step": 8800 }, { "epoch": 11.26528, "grad_norm": 0.9474934935569763, "learning_rate": 3.241296518607443e-05, "loss": 0.5849, "step": 8801 }, { "epoch": 11.26656, "grad_norm": 0.9702956676483154, "learning_rate": 3.241096438575431e-05, "loss": 0.62, "step": 8802 }, { "epoch": 11.26784, "grad_norm": 0.8868454694747925, "learning_rate": 3.2408963585434173e-05, "loss": 0.5779, "step": 8803 }, { "epoch": 11.269120000000001, "grad_norm": 0.9131936430931091, "learning_rate": 3.2406962785114045e-05, "loss": 0.5787, "step": 8804 }, { "epoch": 11.2704, "grad_norm": 0.8842905163764954, "learning_rate": 3.240496198479392e-05, "loss": 0.5514, "step": 8805 }, { "epoch": 11.27168, "grad_norm": 0.8582538366317749, "learning_rate": 3.240296118447379e-05, "loss": 0.5355, "step": 8806 }, { "epoch": 11.27296, "grad_norm": 0.8297027945518494, "learning_rate": 3.240096038415366e-05, "loss": 0.5481, "step": 8807 }, { "epoch": 11.27424, "grad_norm": 0.9267979264259338, "learning_rate": 3.239895958383353e-05, "loss": 0.5676, "step": 8808 }, { "epoch": 11.27552, "grad_norm": 0.9307857155799866, "learning_rate": 3.239695878351341e-05, "loss": 0.5526, "step": 8809 }, { "epoch": 11.2768, "grad_norm": 0.9614000916481018, "learning_rate": 3.239495798319328e-05, "loss": 0.583, "step": 8810 }, { "epoch": 11.27808, "grad_norm": 0.8482030630111694, "learning_rate": 3.239295718287315e-05, "loss": 0.5473, "step": 8811 }, { "epoch": 11.27936, "grad_norm": 0.8771170377731323, "learning_rate": 3.239095638255302e-05, "loss": 0.5808, "step": 8812 }, { "epoch": 11.28064, "grad_norm": 0.8574709296226501, "learning_rate": 3.238895558223289e-05, "loss": 0.55, "step": 8813 }, { "epoch": 11.28192, "grad_norm": 0.8876118659973145, "learning_rate": 3.2386954781912764e-05, "loss": 0.5451, "step": 8814 }, { "epoch": 11.2832, "grad_norm": 0.874653697013855, "learning_rate": 3.2384953981592636e-05, "loss": 0.5076, "step": 8815 }, { "epoch": 11.28448, "grad_norm": 0.9255512356758118, "learning_rate": 3.2382953181272514e-05, "loss": 0.5806, "step": 8816 }, { "epoch": 11.28576, "grad_norm": 0.9429183602333069, "learning_rate": 3.2380952380952386e-05, "loss": 0.5884, "step": 8817 }, { "epoch": 11.28704, "grad_norm": 0.9244290590286255, "learning_rate": 3.237895158063226e-05, "loss": 0.5938, "step": 8818 }, { "epoch": 11.28832, "grad_norm": 0.8580917716026306, "learning_rate": 3.237695078031212e-05, "loss": 0.55, "step": 8819 }, { "epoch": 11.2896, "grad_norm": 0.9350975155830383, "learning_rate": 3.2374949979991995e-05, "loss": 0.5704, "step": 8820 }, { "epoch": 11.29088, "grad_norm": 0.913230836391449, "learning_rate": 3.237294917967187e-05, "loss": 0.5657, "step": 8821 }, { "epoch": 11.292159999999999, "grad_norm": 0.8917736411094666, "learning_rate": 3.237094837935174e-05, "loss": 0.5393, "step": 8822 }, { "epoch": 11.29344, "grad_norm": 0.8485799431800842, "learning_rate": 3.236894757903162e-05, "loss": 0.5264, "step": 8823 }, { "epoch": 11.29472, "grad_norm": 0.8522817492485046, "learning_rate": 3.236694677871149e-05, "loss": 0.4902, "step": 8824 }, { "epoch": 11.296, "grad_norm": 0.8695195317268372, "learning_rate": 3.236494597839136e-05, "loss": 0.5453, "step": 8825 }, { "epoch": 11.29728, "grad_norm": 0.9237691164016724, "learning_rate": 3.236294517807123e-05, "loss": 0.5709, "step": 8826 }, { "epoch": 11.29856, "grad_norm": 0.9328168034553528, "learning_rate": 3.23609443777511e-05, "loss": 0.5593, "step": 8827 }, { "epoch": 11.29984, "grad_norm": 0.8827159404754639, "learning_rate": 3.235894357743097e-05, "loss": 0.528, "step": 8828 }, { "epoch": 11.30112, "grad_norm": 0.9112002849578857, "learning_rate": 3.235694277711084e-05, "loss": 0.557, "step": 8829 }, { "epoch": 11.3024, "grad_norm": 0.8474133610725403, "learning_rate": 3.235494197679072e-05, "loss": 0.5448, "step": 8830 }, { "epoch": 11.30368, "grad_norm": 0.9159875512123108, "learning_rate": 3.235294117647059e-05, "loss": 0.5638, "step": 8831 }, { "epoch": 11.30496, "grad_norm": 0.8455161452293396, "learning_rate": 3.2350940376150464e-05, "loss": 0.5317, "step": 8832 }, { "epoch": 11.30624, "grad_norm": 0.8944229483604431, "learning_rate": 3.2348939575830336e-05, "loss": 0.5455, "step": 8833 }, { "epoch": 11.30752, "grad_norm": 0.9140881896018982, "learning_rate": 3.234693877551021e-05, "loss": 0.565, "step": 8834 }, { "epoch": 11.3088, "grad_norm": 0.9187982082366943, "learning_rate": 3.234493797519007e-05, "loss": 0.5593, "step": 8835 }, { "epoch": 11.31008, "grad_norm": 0.9055234789848328, "learning_rate": 3.2342937174869945e-05, "loss": 0.5166, "step": 8836 }, { "epoch": 11.31136, "grad_norm": 0.9232755303382874, "learning_rate": 3.234093637454982e-05, "loss": 0.5993, "step": 8837 }, { "epoch": 11.31264, "grad_norm": 0.8765431046485901, "learning_rate": 3.2338935574229695e-05, "loss": 0.5717, "step": 8838 }, { "epoch": 11.31392, "grad_norm": 0.848947286605835, "learning_rate": 3.233693477390957e-05, "loss": 0.528, "step": 8839 }, { "epoch": 11.3152, "grad_norm": 0.9066827297210693, "learning_rate": 3.233493397358944e-05, "loss": 0.5882, "step": 8840 }, { "epoch": 11.31648, "grad_norm": 0.9069925546646118, "learning_rate": 3.233293317326931e-05, "loss": 0.5502, "step": 8841 }, { "epoch": 11.31776, "grad_norm": 0.9095439314842224, "learning_rate": 3.233093237294918e-05, "loss": 0.5638, "step": 8842 }, { "epoch": 11.31904, "grad_norm": 0.9278164505958557, "learning_rate": 3.232893157262905e-05, "loss": 0.5834, "step": 8843 }, { "epoch": 11.32032, "grad_norm": 0.8815664052963257, "learning_rate": 3.2326930772308926e-05, "loss": 0.5516, "step": 8844 }, { "epoch": 11.3216, "grad_norm": 0.8748176097869873, "learning_rate": 3.23249299719888e-05, "loss": 0.5715, "step": 8845 }, { "epoch": 11.32288, "grad_norm": 0.8980875015258789, "learning_rate": 3.232292917166867e-05, "loss": 0.5218, "step": 8846 }, { "epoch": 11.32416, "grad_norm": 0.9095767736434937, "learning_rate": 3.232092837134854e-05, "loss": 0.5353, "step": 8847 }, { "epoch": 11.32544, "grad_norm": 0.9545304775238037, "learning_rate": 3.2318927571028414e-05, "loss": 0.5816, "step": 8848 }, { "epoch": 11.32672, "grad_norm": 0.8944869637489319, "learning_rate": 3.2316926770708286e-05, "loss": 0.5624, "step": 8849 }, { "epoch": 11.328, "grad_norm": 0.891927182674408, "learning_rate": 3.231492597038816e-05, "loss": 0.523, "step": 8850 }, { "epoch": 11.32928, "grad_norm": 0.9130933284759521, "learning_rate": 3.231292517006803e-05, "loss": 0.6285, "step": 8851 }, { "epoch": 11.33056, "grad_norm": 0.8824868202209473, "learning_rate": 3.23109243697479e-05, "loss": 0.5385, "step": 8852 }, { "epoch": 11.33184, "grad_norm": 0.8892396688461304, "learning_rate": 3.230892356942777e-05, "loss": 0.5261, "step": 8853 }, { "epoch": 11.33312, "grad_norm": 0.9578300714492798, "learning_rate": 3.2306922769107645e-05, "loss": 0.635, "step": 8854 }, { "epoch": 11.3344, "grad_norm": 0.9021438956260681, "learning_rate": 3.230492196878752e-05, "loss": 0.5346, "step": 8855 }, { "epoch": 11.33568, "grad_norm": 0.8745192289352417, "learning_rate": 3.230292116846739e-05, "loss": 0.5481, "step": 8856 }, { "epoch": 11.33696, "grad_norm": 0.8894162178039551, "learning_rate": 3.230092036814726e-05, "loss": 0.558, "step": 8857 }, { "epoch": 11.33824, "grad_norm": 0.8988181352615356, "learning_rate": 3.229891956782713e-05, "loss": 0.5633, "step": 8858 }, { "epoch": 11.33952, "grad_norm": 0.8719711899757385, "learning_rate": 3.2296918767507004e-05, "loss": 0.5066, "step": 8859 }, { "epoch": 11.3408, "grad_norm": 0.8456899523735046, "learning_rate": 3.2294917967186876e-05, "loss": 0.5139, "step": 8860 }, { "epoch": 11.34208, "grad_norm": 0.8919529914855957, "learning_rate": 3.229291716686675e-05, "loss": 0.5203, "step": 8861 }, { "epoch": 11.34336, "grad_norm": 0.8657916188240051, "learning_rate": 3.229091636654662e-05, "loss": 0.5298, "step": 8862 }, { "epoch": 11.34464, "grad_norm": 0.8953869938850403, "learning_rate": 3.228891556622649e-05, "loss": 0.5217, "step": 8863 }, { "epoch": 11.34592, "grad_norm": 0.9137585759162903, "learning_rate": 3.2286914765906363e-05, "loss": 0.5738, "step": 8864 }, { "epoch": 11.3472, "grad_norm": 0.8777027726173401, "learning_rate": 3.228491396558624e-05, "loss": 0.5342, "step": 8865 }, { "epoch": 11.34848, "grad_norm": 0.9374188780784607, "learning_rate": 3.228291316526611e-05, "loss": 0.6148, "step": 8866 }, { "epoch": 11.34976, "grad_norm": 0.8601318001747131, "learning_rate": 3.228091236494598e-05, "loss": 0.5121, "step": 8867 }, { "epoch": 11.35104, "grad_norm": 0.904672384262085, "learning_rate": 3.227891156462585e-05, "loss": 0.5718, "step": 8868 }, { "epoch": 11.35232, "grad_norm": 0.9223089218139648, "learning_rate": 3.227691076430572e-05, "loss": 0.5815, "step": 8869 }, { "epoch": 11.3536, "grad_norm": 0.8737932443618774, "learning_rate": 3.2274909963985595e-05, "loss": 0.5285, "step": 8870 }, { "epoch": 11.35488, "grad_norm": 0.9372346997261047, "learning_rate": 3.2272909163665466e-05, "loss": 0.5748, "step": 8871 }, { "epoch": 11.35616, "grad_norm": 0.9131039977073669, "learning_rate": 3.2270908363345345e-05, "loss": 0.5606, "step": 8872 }, { "epoch": 11.35744, "grad_norm": 0.9296916127204895, "learning_rate": 3.226890756302522e-05, "loss": 0.576, "step": 8873 }, { "epoch": 11.35872, "grad_norm": 0.9245988130569458, "learning_rate": 3.226690676270508e-05, "loss": 0.5462, "step": 8874 }, { "epoch": 11.36, "grad_norm": 0.934669017791748, "learning_rate": 3.2264905962384954e-05, "loss": 0.555, "step": 8875 }, { "epoch": 11.36128, "grad_norm": 0.8804442882537842, "learning_rate": 3.2262905162064826e-05, "loss": 0.5157, "step": 8876 }, { "epoch": 11.36256, "grad_norm": 0.8725820779800415, "learning_rate": 3.22609043617447e-05, "loss": 0.5829, "step": 8877 }, { "epoch": 11.36384, "grad_norm": 0.9546533823013306, "learning_rate": 3.225890356142457e-05, "loss": 0.5668, "step": 8878 }, { "epoch": 11.36512, "grad_norm": 0.9215765595436096, "learning_rate": 3.225690276110445e-05, "loss": 0.546, "step": 8879 }, { "epoch": 11.3664, "grad_norm": 0.9249255061149597, "learning_rate": 3.225490196078432e-05, "loss": 0.5562, "step": 8880 }, { "epoch": 11.36768, "grad_norm": 0.9244142174720764, "learning_rate": 3.225290116046419e-05, "loss": 0.54, "step": 8881 }, { "epoch": 11.36896, "grad_norm": 0.9065011143684387, "learning_rate": 3.225090036014406e-05, "loss": 0.5555, "step": 8882 }, { "epoch": 11.37024, "grad_norm": 0.8759673237800598, "learning_rate": 3.224889955982393e-05, "loss": 0.5506, "step": 8883 }, { "epoch": 11.37152, "grad_norm": 0.9149131774902344, "learning_rate": 3.22468987595038e-05, "loss": 0.5635, "step": 8884 }, { "epoch": 11.3728, "grad_norm": 0.9005359411239624, "learning_rate": 3.224489795918367e-05, "loss": 0.5289, "step": 8885 }, { "epoch": 11.37408, "grad_norm": 0.9132285714149475, "learning_rate": 3.2242897158863544e-05, "loss": 0.5287, "step": 8886 }, { "epoch": 11.37536, "grad_norm": 0.9003539681434631, "learning_rate": 3.224089635854342e-05, "loss": 0.521, "step": 8887 }, { "epoch": 11.37664, "grad_norm": 0.8908035755157471, "learning_rate": 3.2238895558223295e-05, "loss": 0.5668, "step": 8888 }, { "epoch": 11.37792, "grad_norm": 0.9106180667877197, "learning_rate": 3.223689475790317e-05, "loss": 0.5502, "step": 8889 }, { "epoch": 11.3792, "grad_norm": 0.9311418533325195, "learning_rate": 3.223489395758303e-05, "loss": 0.5703, "step": 8890 }, { "epoch": 11.38048, "grad_norm": 0.8639375567436218, "learning_rate": 3.2232893157262904e-05, "loss": 0.5366, "step": 8891 }, { "epoch": 11.38176, "grad_norm": 0.903183102607727, "learning_rate": 3.2230892356942775e-05, "loss": 0.5783, "step": 8892 }, { "epoch": 11.38304, "grad_norm": 0.8958691954612732, "learning_rate": 3.222889155662265e-05, "loss": 0.5197, "step": 8893 }, { "epoch": 11.38432, "grad_norm": 0.95194411277771, "learning_rate": 3.2226890756302526e-05, "loss": 0.6212, "step": 8894 }, { "epoch": 11.3856, "grad_norm": 0.8696224689483643, "learning_rate": 3.22248899559824e-05, "loss": 0.5376, "step": 8895 }, { "epoch": 11.38688, "grad_norm": 0.925094485282898, "learning_rate": 3.222288915566227e-05, "loss": 0.5839, "step": 8896 }, { "epoch": 11.38816, "grad_norm": 0.8680374026298523, "learning_rate": 3.222088835534214e-05, "loss": 0.4927, "step": 8897 }, { "epoch": 11.38944, "grad_norm": 0.9025076031684875, "learning_rate": 3.2218887555022007e-05, "loss": 0.5683, "step": 8898 }, { "epoch": 11.39072, "grad_norm": 0.8952671885490417, "learning_rate": 3.221688675470188e-05, "loss": 0.5648, "step": 8899 }, { "epoch": 11.392, "grad_norm": 0.916085422039032, "learning_rate": 3.221488595438175e-05, "loss": 0.5825, "step": 8900 }, { "epoch": 11.39328, "grad_norm": 0.8768932223320007, "learning_rate": 3.221288515406163e-05, "loss": 0.5205, "step": 8901 }, { "epoch": 11.39456, "grad_norm": 0.912806510925293, "learning_rate": 3.22108843537415e-05, "loss": 0.5078, "step": 8902 }, { "epoch": 11.39584, "grad_norm": 0.9842191338539124, "learning_rate": 3.220888355342137e-05, "loss": 0.6436, "step": 8903 }, { "epoch": 11.39712, "grad_norm": 0.9304628372192383, "learning_rate": 3.2206882753101244e-05, "loss": 0.5789, "step": 8904 }, { "epoch": 11.3984, "grad_norm": 0.9298278093338013, "learning_rate": 3.2204881952781116e-05, "loss": 0.575, "step": 8905 }, { "epoch": 11.39968, "grad_norm": 0.9112979173660278, "learning_rate": 3.220288115246098e-05, "loss": 0.5536, "step": 8906 }, { "epoch": 11.40096, "grad_norm": 0.9044650793075562, "learning_rate": 3.220088035214085e-05, "loss": 0.5697, "step": 8907 }, { "epoch": 11.40224, "grad_norm": 0.90861576795578, "learning_rate": 3.219887955182073e-05, "loss": 0.5748, "step": 8908 }, { "epoch": 11.40352, "grad_norm": 0.8984891176223755, "learning_rate": 3.2196878751500604e-05, "loss": 0.5191, "step": 8909 }, { "epoch": 11.4048, "grad_norm": 0.9326877593994141, "learning_rate": 3.2194877951180476e-05, "loss": 0.5591, "step": 8910 }, { "epoch": 11.40608, "grad_norm": 0.9329106211662292, "learning_rate": 3.219287715086035e-05, "loss": 0.5585, "step": 8911 }, { "epoch": 11.40736, "grad_norm": 0.917129397392273, "learning_rate": 3.219087635054022e-05, "loss": 0.5843, "step": 8912 }, { "epoch": 11.40864, "grad_norm": 0.8364509344100952, "learning_rate": 3.218887555022009e-05, "loss": 0.5127, "step": 8913 }, { "epoch": 11.40992, "grad_norm": 0.8630757927894592, "learning_rate": 3.2186874749899956e-05, "loss": 0.5589, "step": 8914 }, { "epoch": 11.411200000000001, "grad_norm": 0.9007800221443176, "learning_rate": 3.2184873949579835e-05, "loss": 0.6069, "step": 8915 }, { "epoch": 11.41248, "grad_norm": 0.9004875421524048, "learning_rate": 3.218287314925971e-05, "loss": 0.5776, "step": 8916 }, { "epoch": 11.41376, "grad_norm": 0.8987116813659668, "learning_rate": 3.218087234893958e-05, "loss": 0.5216, "step": 8917 }, { "epoch": 11.41504, "grad_norm": 0.8875414133071899, "learning_rate": 3.217887154861945e-05, "loss": 0.5412, "step": 8918 }, { "epoch": 11.41632, "grad_norm": 0.8848508596420288, "learning_rate": 3.217687074829932e-05, "loss": 0.5271, "step": 8919 }, { "epoch": 11.4176, "grad_norm": 0.9203560948371887, "learning_rate": 3.2174869947979194e-05, "loss": 0.5973, "step": 8920 }, { "epoch": 11.41888, "grad_norm": 0.9155365228652954, "learning_rate": 3.2172869147659066e-05, "loss": 0.536, "step": 8921 }, { "epoch": 11.42016, "grad_norm": 0.9187197685241699, "learning_rate": 3.217086834733894e-05, "loss": 0.5452, "step": 8922 }, { "epoch": 11.42144, "grad_norm": 0.927579939365387, "learning_rate": 3.216886754701881e-05, "loss": 0.5534, "step": 8923 }, { "epoch": 11.42272, "grad_norm": 0.9490095376968384, "learning_rate": 3.216686674669868e-05, "loss": 0.5418, "step": 8924 }, { "epoch": 11.424, "grad_norm": 0.9742210507392883, "learning_rate": 3.2164865946378553e-05, "loss": 0.5727, "step": 8925 }, { "epoch": 11.42528, "grad_norm": 0.9028719067573547, "learning_rate": 3.2162865146058425e-05, "loss": 0.5383, "step": 8926 }, { "epoch": 11.42656, "grad_norm": 0.9693518280982971, "learning_rate": 3.21608643457383e-05, "loss": 0.6059, "step": 8927 }, { "epoch": 11.42784, "grad_norm": 0.866688072681427, "learning_rate": 3.215886354541817e-05, "loss": 0.5364, "step": 8928 }, { "epoch": 11.42912, "grad_norm": 0.9084386229515076, "learning_rate": 3.215686274509804e-05, "loss": 0.5616, "step": 8929 }, { "epoch": 11.4304, "grad_norm": 0.9357685446739197, "learning_rate": 3.215486194477791e-05, "loss": 0.5707, "step": 8930 }, { "epoch": 11.43168, "grad_norm": 0.8925808072090149, "learning_rate": 3.2152861144457785e-05, "loss": 0.5594, "step": 8931 }, { "epoch": 11.43296, "grad_norm": 0.9576624035835266, "learning_rate": 3.2150860344137656e-05, "loss": 0.5892, "step": 8932 }, { "epoch": 11.43424, "grad_norm": 0.8737156391143799, "learning_rate": 3.214885954381753e-05, "loss": 0.54, "step": 8933 }, { "epoch": 11.43552, "grad_norm": 0.9288681745529175, "learning_rate": 3.21468587434974e-05, "loss": 0.5832, "step": 8934 }, { "epoch": 11.4368, "grad_norm": 0.9747262597084045, "learning_rate": 3.214485794317727e-05, "loss": 0.5935, "step": 8935 }, { "epoch": 11.43808, "grad_norm": 0.8848868012428284, "learning_rate": 3.2142857142857144e-05, "loss": 0.5971, "step": 8936 }, { "epoch": 11.43936, "grad_norm": 0.9349198341369629, "learning_rate": 3.2140856342537016e-05, "loss": 0.5941, "step": 8937 }, { "epoch": 11.44064, "grad_norm": 0.8732218146324158, "learning_rate": 3.213885554221689e-05, "loss": 0.5509, "step": 8938 }, { "epoch": 11.44192, "grad_norm": 0.8781768083572388, "learning_rate": 3.213685474189676e-05, "loss": 0.569, "step": 8939 }, { "epoch": 11.4432, "grad_norm": 0.8788825869560242, "learning_rate": 3.213485394157663e-05, "loss": 0.5946, "step": 8940 }, { "epoch": 11.44448, "grad_norm": 0.8523260354995728, "learning_rate": 3.21328531412565e-05, "loss": 0.5232, "step": 8941 }, { "epoch": 11.44576, "grad_norm": 0.9410714507102966, "learning_rate": 3.2130852340936375e-05, "loss": 0.5787, "step": 8942 }, { "epoch": 11.44704, "grad_norm": 0.9099283814430237, "learning_rate": 3.2128851540616254e-05, "loss": 0.5687, "step": 8943 }, { "epoch": 11.44832, "grad_norm": 0.9799555540084839, "learning_rate": 3.212685074029612e-05, "loss": 0.586, "step": 8944 }, { "epoch": 11.4496, "grad_norm": 0.983367383480072, "learning_rate": 3.212484993997599e-05, "loss": 0.5625, "step": 8945 }, { "epoch": 11.45088, "grad_norm": 0.9114019870758057, "learning_rate": 3.212284913965586e-05, "loss": 0.5501, "step": 8946 }, { "epoch": 11.45216, "grad_norm": 0.964597761631012, "learning_rate": 3.2120848339335734e-05, "loss": 0.584, "step": 8947 }, { "epoch": 11.45344, "grad_norm": 0.8593063950538635, "learning_rate": 3.2118847539015606e-05, "loss": 0.5178, "step": 8948 }, { "epoch": 11.45472, "grad_norm": 0.8868183493614197, "learning_rate": 3.211684673869548e-05, "loss": 0.5591, "step": 8949 }, { "epoch": 11.456, "grad_norm": 0.8840005397796631, "learning_rate": 3.211484593837536e-05, "loss": 0.5101, "step": 8950 }, { "epoch": 11.45728, "grad_norm": 0.9012945890426636, "learning_rate": 3.211284513805523e-05, "loss": 0.5854, "step": 8951 }, { "epoch": 11.45856, "grad_norm": 0.8864527940750122, "learning_rate": 3.2110844337735094e-05, "loss": 0.5846, "step": 8952 }, { "epoch": 11.45984, "grad_norm": 0.9185726642608643, "learning_rate": 3.2108843537414965e-05, "loss": 0.5771, "step": 8953 }, { "epoch": 11.46112, "grad_norm": 0.8853892683982849, "learning_rate": 3.210684273709484e-05, "loss": 0.526, "step": 8954 }, { "epoch": 11.4624, "grad_norm": 0.9288807511329651, "learning_rate": 3.210484193677471e-05, "loss": 0.5306, "step": 8955 }, { "epoch": 11.46368, "grad_norm": 0.9466400742530823, "learning_rate": 3.210284113645458e-05, "loss": 0.606, "step": 8956 }, { "epoch": 11.46496, "grad_norm": 0.9334494471549988, "learning_rate": 3.210084033613446e-05, "loss": 0.5746, "step": 8957 }, { "epoch": 11.466239999999999, "grad_norm": 0.9179563522338867, "learning_rate": 3.209883953581433e-05, "loss": 0.5453, "step": 8958 }, { "epoch": 11.46752, "grad_norm": 0.9171554446220398, "learning_rate": 3.2096838735494203e-05, "loss": 0.5683, "step": 8959 }, { "epoch": 11.4688, "grad_norm": 0.9348716735839844, "learning_rate": 3.209483793517407e-05, "loss": 0.5502, "step": 8960 }, { "epoch": 11.47008, "grad_norm": 0.9068107604980469, "learning_rate": 3.209283713485394e-05, "loss": 0.5924, "step": 8961 }, { "epoch": 11.47136, "grad_norm": 0.9149397611618042, "learning_rate": 3.209083633453381e-05, "loss": 0.5681, "step": 8962 }, { "epoch": 11.47264, "grad_norm": 0.876530110836029, "learning_rate": 3.2088835534213684e-05, "loss": 0.55, "step": 8963 }, { "epoch": 11.47392, "grad_norm": 0.8814985752105713, "learning_rate": 3.208683473389356e-05, "loss": 0.5513, "step": 8964 }, { "epoch": 11.4752, "grad_norm": 0.8350082039833069, "learning_rate": 3.2084833933573435e-05, "loss": 0.5217, "step": 8965 }, { "epoch": 11.47648, "grad_norm": 0.8814594149589539, "learning_rate": 3.2082833133253306e-05, "loss": 0.5626, "step": 8966 }, { "epoch": 11.47776, "grad_norm": 0.8578638434410095, "learning_rate": 3.208083233293318e-05, "loss": 0.5671, "step": 8967 }, { "epoch": 11.47904, "grad_norm": 0.8845701217651367, "learning_rate": 3.207883153261304e-05, "loss": 0.5927, "step": 8968 }, { "epoch": 11.48032, "grad_norm": 0.8991924524307251, "learning_rate": 3.2076830732292915e-05, "loss": 0.5528, "step": 8969 }, { "epoch": 11.4816, "grad_norm": 0.898190975189209, "learning_rate": 3.207482993197279e-05, "loss": 0.5988, "step": 8970 }, { "epoch": 11.48288, "grad_norm": 0.9360696077346802, "learning_rate": 3.2072829131652666e-05, "loss": 0.569, "step": 8971 }, { "epoch": 11.48416, "grad_norm": 0.845306932926178, "learning_rate": 3.207082833133254e-05, "loss": 0.5489, "step": 8972 }, { "epoch": 11.48544, "grad_norm": 0.9122028946876526, "learning_rate": 3.206882753101241e-05, "loss": 0.5735, "step": 8973 }, { "epoch": 11.48672, "grad_norm": 0.9009816646575928, "learning_rate": 3.206682673069228e-05, "loss": 0.5192, "step": 8974 }, { "epoch": 11.488, "grad_norm": 0.9005442261695862, "learning_rate": 3.206482593037215e-05, "loss": 0.6311, "step": 8975 }, { "epoch": 11.48928, "grad_norm": 0.931171715259552, "learning_rate": 3.206282513005202e-05, "loss": 0.5831, "step": 8976 }, { "epoch": 11.49056, "grad_norm": 0.902475893497467, "learning_rate": 3.206082432973189e-05, "loss": 0.5382, "step": 8977 }, { "epoch": 11.49184, "grad_norm": 0.9351593852043152, "learning_rate": 3.205882352941177e-05, "loss": 0.5586, "step": 8978 }, { "epoch": 11.49312, "grad_norm": 0.9076836705207825, "learning_rate": 3.205682272909164e-05, "loss": 0.5568, "step": 8979 }, { "epoch": 11.4944, "grad_norm": 0.8612614274024963, "learning_rate": 3.205482192877151e-05, "loss": 0.531, "step": 8980 }, { "epoch": 11.49568, "grad_norm": 0.8604639768600464, "learning_rate": 3.2052821128451384e-05, "loss": 0.5539, "step": 8981 }, { "epoch": 11.49696, "grad_norm": 0.8722749352455139, "learning_rate": 3.2050820328131256e-05, "loss": 0.5436, "step": 8982 }, { "epoch": 11.49824, "grad_norm": 0.8463775515556335, "learning_rate": 3.204881952781113e-05, "loss": 0.5086, "step": 8983 }, { "epoch": 11.49952, "grad_norm": 0.9018126130104065, "learning_rate": 3.204681872749099e-05, "loss": 0.6194, "step": 8984 }, { "epoch": 11.5008, "grad_norm": 0.9355940222740173, "learning_rate": 3.204481792717087e-05, "loss": 0.5751, "step": 8985 }, { "epoch": 11.50208, "grad_norm": 0.8555790185928345, "learning_rate": 3.2042817126850744e-05, "loss": 0.5685, "step": 8986 }, { "epoch": 11.50336, "grad_norm": 0.8574039936065674, "learning_rate": 3.2040816326530615e-05, "loss": 0.5265, "step": 8987 }, { "epoch": 11.50464, "grad_norm": 0.898993730545044, "learning_rate": 3.203881552621049e-05, "loss": 0.5406, "step": 8988 }, { "epoch": 11.50592, "grad_norm": 0.9504684805870056, "learning_rate": 3.203681472589036e-05, "loss": 0.6053, "step": 8989 }, { "epoch": 11.5072, "grad_norm": 0.9039958119392395, "learning_rate": 3.203481392557023e-05, "loss": 0.577, "step": 8990 }, { "epoch": 11.50848, "grad_norm": 0.9396102428436279, "learning_rate": 3.20328131252501e-05, "loss": 0.5763, "step": 8991 }, { "epoch": 11.50976, "grad_norm": 0.8567662835121155, "learning_rate": 3.203081232492997e-05, "loss": 0.5162, "step": 8992 }, { "epoch": 11.51104, "grad_norm": 0.9210956692695618, "learning_rate": 3.2028811524609847e-05, "loss": 0.579, "step": 8993 }, { "epoch": 11.51232, "grad_norm": 0.8853841423988342, "learning_rate": 3.202681072428972e-05, "loss": 0.5236, "step": 8994 }, { "epoch": 11.5136, "grad_norm": 0.9339121580123901, "learning_rate": 3.202480992396959e-05, "loss": 0.619, "step": 8995 }, { "epoch": 11.51488, "grad_norm": 0.8573284149169922, "learning_rate": 3.202280912364946e-05, "loss": 0.5583, "step": 8996 }, { "epoch": 11.51616, "grad_norm": 0.9069136381149292, "learning_rate": 3.2020808323329334e-05, "loss": 0.5256, "step": 8997 }, { "epoch": 11.51744, "grad_norm": 0.9280446767807007, "learning_rate": 3.2018807523009206e-05, "loss": 0.6084, "step": 8998 }, { "epoch": 11.51872, "grad_norm": 0.8712737560272217, "learning_rate": 3.201680672268908e-05, "loss": 0.5816, "step": 8999 }, { "epoch": 11.52, "grad_norm": 0.8745893836021423, "learning_rate": 3.201480592236895e-05, "loss": 0.5167, "step": 9000 }, { "epoch": 11.52128, "grad_norm": 0.8918792605400085, "learning_rate": 3.201280512204882e-05, "loss": 0.587, "step": 9001 }, { "epoch": 11.52256, "grad_norm": 0.9053367972373962, "learning_rate": 3.201080432172869e-05, "loss": 0.5899, "step": 9002 }, { "epoch": 11.52384, "grad_norm": 0.8964258432388306, "learning_rate": 3.2008803521408565e-05, "loss": 0.5589, "step": 9003 }, { "epoch": 11.52512, "grad_norm": 0.852827250957489, "learning_rate": 3.200680272108844e-05, "loss": 0.5024, "step": 9004 }, { "epoch": 11.5264, "grad_norm": 0.8889139890670776, "learning_rate": 3.200480192076831e-05, "loss": 0.5795, "step": 9005 }, { "epoch": 11.52768, "grad_norm": 0.8449302315711975, "learning_rate": 3.200280112044818e-05, "loss": 0.5439, "step": 9006 }, { "epoch": 11.52896, "grad_norm": 0.9062175154685974, "learning_rate": 3.200080032012805e-05, "loss": 0.543, "step": 9007 }, { "epoch": 11.53024, "grad_norm": 0.9764111042022705, "learning_rate": 3.1998799519807924e-05, "loss": 0.5982, "step": 9008 }, { "epoch": 11.53152, "grad_norm": 0.8889600038528442, "learning_rate": 3.1996798719487796e-05, "loss": 0.5308, "step": 9009 }, { "epoch": 11.5328, "grad_norm": 0.9224411845207214, "learning_rate": 3.199479791916767e-05, "loss": 0.5809, "step": 9010 }, { "epoch": 11.53408, "grad_norm": 0.8506020307540894, "learning_rate": 3.199279711884754e-05, "loss": 0.5375, "step": 9011 }, { "epoch": 11.53536, "grad_norm": 0.8242222666740417, "learning_rate": 3.199079631852741e-05, "loss": 0.5254, "step": 9012 }, { "epoch": 11.53664, "grad_norm": 0.8847529888153076, "learning_rate": 3.1988795518207284e-05, "loss": 0.5867, "step": 9013 }, { "epoch": 11.53792, "grad_norm": 0.8491979837417603, "learning_rate": 3.1986794717887156e-05, "loss": 0.5463, "step": 9014 }, { "epoch": 11.5392, "grad_norm": 0.8366856575012207, "learning_rate": 3.198479391756703e-05, "loss": 0.5193, "step": 9015 }, { "epoch": 11.54048, "grad_norm": 0.8921111822128296, "learning_rate": 3.19827931172469e-05, "loss": 0.5723, "step": 9016 }, { "epoch": 11.54176, "grad_norm": 0.8784570097923279, "learning_rate": 3.198079231692677e-05, "loss": 0.5305, "step": 9017 }, { "epoch": 11.54304, "grad_norm": 0.9135284423828125, "learning_rate": 3.197879151660664e-05, "loss": 0.5463, "step": 9018 }, { "epoch": 11.54432, "grad_norm": 0.8987352848052979, "learning_rate": 3.1976790716286515e-05, "loss": 0.6218, "step": 9019 }, { "epoch": 11.5456, "grad_norm": 0.857602059841156, "learning_rate": 3.197478991596639e-05, "loss": 0.5085, "step": 9020 }, { "epoch": 11.54688, "grad_norm": 0.8649136424064636, "learning_rate": 3.1972789115646265e-05, "loss": 0.5577, "step": 9021 }, { "epoch": 11.54816, "grad_norm": 0.8804261684417725, "learning_rate": 3.197078831532613e-05, "loss": 0.5755, "step": 9022 }, { "epoch": 11.54944, "grad_norm": 0.8786762356758118, "learning_rate": 3.1968787515006e-05, "loss": 0.5368, "step": 9023 }, { "epoch": 11.55072, "grad_norm": 0.8894196152687073, "learning_rate": 3.1966786714685874e-05, "loss": 0.5745, "step": 9024 }, { "epoch": 11.552, "grad_norm": 0.9118727445602417, "learning_rate": 3.1964785914365746e-05, "loss": 0.5589, "step": 9025 }, { "epoch": 11.55328, "grad_norm": 0.9220285415649414, "learning_rate": 3.196278511404562e-05, "loss": 0.5938, "step": 9026 }, { "epoch": 11.55456, "grad_norm": 0.9039615392684937, "learning_rate": 3.196078431372549e-05, "loss": 0.5626, "step": 9027 }, { "epoch": 11.55584, "grad_norm": 0.8908433318138123, "learning_rate": 3.195878351340537e-05, "loss": 0.6023, "step": 9028 }, { "epoch": 11.55712, "grad_norm": 0.8978443741798401, "learning_rate": 3.195678271308524e-05, "loss": 0.5491, "step": 9029 }, { "epoch": 11.5584, "grad_norm": 0.8988606929779053, "learning_rate": 3.1954781912765105e-05, "loss": 0.5366, "step": 9030 }, { "epoch": 11.55968, "grad_norm": 0.9168279767036438, "learning_rate": 3.195278111244498e-05, "loss": 0.5961, "step": 9031 }, { "epoch": 11.56096, "grad_norm": 0.8574641942977905, "learning_rate": 3.195078031212485e-05, "loss": 0.5461, "step": 9032 }, { "epoch": 11.56224, "grad_norm": 0.9339374303817749, "learning_rate": 3.194877951180472e-05, "loss": 0.5732, "step": 9033 }, { "epoch": 11.56352, "grad_norm": 0.9759158492088318, "learning_rate": 3.194677871148459e-05, "loss": 0.6018, "step": 9034 }, { "epoch": 11.5648, "grad_norm": 0.9553015232086182, "learning_rate": 3.194477791116447e-05, "loss": 0.5936, "step": 9035 }, { "epoch": 11.56608, "grad_norm": 0.9375491142272949, "learning_rate": 3.194277711084434e-05, "loss": 0.5606, "step": 9036 }, { "epoch": 11.56736, "grad_norm": 0.9261005520820618, "learning_rate": 3.1940776310524215e-05, "loss": 0.61, "step": 9037 }, { "epoch": 11.56864, "grad_norm": 0.9291247129440308, "learning_rate": 3.193877551020408e-05, "loss": 0.6361, "step": 9038 }, { "epoch": 11.56992, "grad_norm": 0.9398210644721985, "learning_rate": 3.193677470988395e-05, "loss": 0.554, "step": 9039 }, { "epoch": 11.5712, "grad_norm": 0.897376298904419, "learning_rate": 3.1934773909563824e-05, "loss": 0.5887, "step": 9040 }, { "epoch": 11.57248, "grad_norm": 0.8442445397377014, "learning_rate": 3.1932773109243696e-05, "loss": 0.5555, "step": 9041 }, { "epoch": 11.57376, "grad_norm": 0.8896632194519043, "learning_rate": 3.1930772308923574e-05, "loss": 0.5612, "step": 9042 }, { "epoch": 11.57504, "grad_norm": 0.8563700914382935, "learning_rate": 3.1928771508603446e-05, "loss": 0.5588, "step": 9043 }, { "epoch": 11.57632, "grad_norm": 0.8944402933120728, "learning_rate": 3.192677070828332e-05, "loss": 0.5691, "step": 9044 }, { "epoch": 11.5776, "grad_norm": 0.9250562191009521, "learning_rate": 3.192476990796319e-05, "loss": 0.582, "step": 9045 }, { "epoch": 11.57888, "grad_norm": 0.9460017085075378, "learning_rate": 3.1922769107643055e-05, "loss": 0.6296, "step": 9046 }, { "epoch": 11.58016, "grad_norm": 0.9160764217376709, "learning_rate": 3.192076830732293e-05, "loss": 0.5749, "step": 9047 }, { "epoch": 11.58144, "grad_norm": 0.8978300094604492, "learning_rate": 3.19187675070028e-05, "loss": 0.522, "step": 9048 }, { "epoch": 11.58272, "grad_norm": 0.9328131675720215, "learning_rate": 3.191676670668268e-05, "loss": 0.6059, "step": 9049 }, { "epoch": 11.584, "grad_norm": 0.9057248830795288, "learning_rate": 3.191476590636255e-05, "loss": 0.5399, "step": 9050 }, { "epoch": 11.585280000000001, "grad_norm": 0.9383625984191895, "learning_rate": 3.191276510604242e-05, "loss": 0.6006, "step": 9051 }, { "epoch": 11.58656, "grad_norm": 0.9605696797370911, "learning_rate": 3.191076430572229e-05, "loss": 0.6013, "step": 9052 }, { "epoch": 11.58784, "grad_norm": 0.9236608743667603, "learning_rate": 3.1908763505402165e-05, "loss": 0.54, "step": 9053 }, { "epoch": 11.58912, "grad_norm": 0.9085046052932739, "learning_rate": 3.190676270508203e-05, "loss": 0.5383, "step": 9054 }, { "epoch": 11.5904, "grad_norm": 0.9364131689071655, "learning_rate": 3.19047619047619e-05, "loss": 0.5888, "step": 9055 }, { "epoch": 11.59168, "grad_norm": 0.9124594330787659, "learning_rate": 3.190276110444178e-05, "loss": 0.5496, "step": 9056 }, { "epoch": 11.59296, "grad_norm": 0.900393009185791, "learning_rate": 3.190076030412165e-05, "loss": 0.5176, "step": 9057 }, { "epoch": 11.59424, "grad_norm": 0.8931260704994202, "learning_rate": 3.1898759503801524e-05, "loss": 0.5582, "step": 9058 }, { "epoch": 11.59552, "grad_norm": 0.8596901297569275, "learning_rate": 3.1896758703481396e-05, "loss": 0.5446, "step": 9059 }, { "epoch": 11.5968, "grad_norm": 0.8517556190490723, "learning_rate": 3.189475790316127e-05, "loss": 0.5244, "step": 9060 }, { "epoch": 11.59808, "grad_norm": 0.9195119142532349, "learning_rate": 3.189275710284114e-05, "loss": 0.6166, "step": 9061 }, { "epoch": 11.59936, "grad_norm": 0.8437806367874146, "learning_rate": 3.1890756302521005e-05, "loss": 0.5334, "step": 9062 }, { "epoch": 11.60064, "grad_norm": 0.8890377283096313, "learning_rate": 3.188875550220088e-05, "loss": 0.5651, "step": 9063 }, { "epoch": 11.60192, "grad_norm": 0.9175596833229065, "learning_rate": 3.1886754701880755e-05, "loss": 0.587, "step": 9064 }, { "epoch": 11.6032, "grad_norm": 0.8746216893196106, "learning_rate": 3.188475390156063e-05, "loss": 0.562, "step": 9065 }, { "epoch": 11.60448, "grad_norm": 0.9140630960464478, "learning_rate": 3.18827531012405e-05, "loss": 0.5781, "step": 9066 }, { "epoch": 11.60576, "grad_norm": 0.8744653463363647, "learning_rate": 3.188075230092037e-05, "loss": 0.5093, "step": 9067 }, { "epoch": 11.60704, "grad_norm": 0.8821300268173218, "learning_rate": 3.187875150060024e-05, "loss": 0.5314, "step": 9068 }, { "epoch": 11.608319999999999, "grad_norm": 0.9396583437919617, "learning_rate": 3.1876750700280114e-05, "loss": 0.5425, "step": 9069 }, { "epoch": 11.6096, "grad_norm": 0.9217917323112488, "learning_rate": 3.1874749899959986e-05, "loss": 0.5373, "step": 9070 }, { "epoch": 11.61088, "grad_norm": 0.9106178283691406, "learning_rate": 3.187274909963986e-05, "loss": 0.5847, "step": 9071 }, { "epoch": 11.61216, "grad_norm": 0.852333128452301, "learning_rate": 3.187074829931973e-05, "loss": 0.4806, "step": 9072 }, { "epoch": 11.61344, "grad_norm": 0.8371844291687012, "learning_rate": 3.18687474989996e-05, "loss": 0.5455, "step": 9073 }, { "epoch": 11.61472, "grad_norm": 0.8995749950408936, "learning_rate": 3.1866746698679474e-05, "loss": 0.5826, "step": 9074 }, { "epoch": 11.616, "grad_norm": 0.8314163088798523, "learning_rate": 3.1864745898359346e-05, "loss": 0.5006, "step": 9075 }, { "epoch": 11.617280000000001, "grad_norm": 0.920068085193634, "learning_rate": 3.186274509803922e-05, "loss": 0.5947, "step": 9076 }, { "epoch": 11.61856, "grad_norm": 0.9251027703285217, "learning_rate": 3.186074429771909e-05, "loss": 0.5639, "step": 9077 }, { "epoch": 11.61984, "grad_norm": 0.8840105533599854, "learning_rate": 3.185874349739896e-05, "loss": 0.543, "step": 9078 }, { "epoch": 11.62112, "grad_norm": 0.8957288265228271, "learning_rate": 3.185674269707883e-05, "loss": 0.5646, "step": 9079 }, { "epoch": 11.6224, "grad_norm": 0.8777086734771729, "learning_rate": 3.1854741896758705e-05, "loss": 0.554, "step": 9080 }, { "epoch": 11.62368, "grad_norm": 0.8875241875648499, "learning_rate": 3.185274109643858e-05, "loss": 0.5173, "step": 9081 }, { "epoch": 11.62496, "grad_norm": 0.9366897344589233, "learning_rate": 3.185074029611845e-05, "loss": 0.6264, "step": 9082 }, { "epoch": 11.62624, "grad_norm": 0.9162141680717468, "learning_rate": 3.184873949579832e-05, "loss": 0.5632, "step": 9083 }, { "epoch": 11.62752, "grad_norm": 0.9224411249160767, "learning_rate": 3.184673869547819e-05, "loss": 0.5485, "step": 9084 }, { "epoch": 11.6288, "grad_norm": 0.8495880365371704, "learning_rate": 3.1844737895158064e-05, "loss": 0.4792, "step": 9085 }, { "epoch": 11.63008, "grad_norm": 0.9102917313575745, "learning_rate": 3.1842737094837936e-05, "loss": 0.563, "step": 9086 }, { "epoch": 11.63136, "grad_norm": 0.9147833585739136, "learning_rate": 3.184073629451781e-05, "loss": 0.5744, "step": 9087 }, { "epoch": 11.63264, "grad_norm": 0.897863507270813, "learning_rate": 3.183873549419768e-05, "loss": 0.544, "step": 9088 }, { "epoch": 11.63392, "grad_norm": 0.8683566451072693, "learning_rate": 3.183673469387755e-05, "loss": 0.5532, "step": 9089 }, { "epoch": 11.6352, "grad_norm": 0.8494045734405518, "learning_rate": 3.1834733893557423e-05, "loss": 0.5413, "step": 9090 }, { "epoch": 11.63648, "grad_norm": 0.8431652784347534, "learning_rate": 3.18327330932373e-05, "loss": 0.5293, "step": 9091 }, { "epoch": 11.63776, "grad_norm": 0.8813535571098328, "learning_rate": 3.183073229291717e-05, "loss": 0.5671, "step": 9092 }, { "epoch": 11.63904, "grad_norm": 0.9110351204872131, "learning_rate": 3.182873149259704e-05, "loss": 0.5919, "step": 9093 }, { "epoch": 11.64032, "grad_norm": 0.9060587286949158, "learning_rate": 3.182673069227691e-05, "loss": 0.5494, "step": 9094 }, { "epoch": 11.6416, "grad_norm": 0.8764411211013794, "learning_rate": 3.182472989195678e-05, "loss": 0.5471, "step": 9095 }, { "epoch": 11.64288, "grad_norm": 0.8792155981063843, "learning_rate": 3.1822729091636655e-05, "loss": 0.581, "step": 9096 }, { "epoch": 11.64416, "grad_norm": 0.8817364573478699, "learning_rate": 3.1820728291316526e-05, "loss": 0.5777, "step": 9097 }, { "epoch": 11.64544, "grad_norm": 0.8980505466461182, "learning_rate": 3.1818727490996405e-05, "loss": 0.5925, "step": 9098 }, { "epoch": 11.64672, "grad_norm": 0.9368979930877686, "learning_rate": 3.181672669067628e-05, "loss": 0.5798, "step": 9099 }, { "epoch": 11.648, "grad_norm": 0.9280688762664795, "learning_rate": 3.181472589035614e-05, "loss": 0.559, "step": 9100 }, { "epoch": 11.64928, "grad_norm": 0.8853485584259033, "learning_rate": 3.1812725090036014e-05, "loss": 0.5102, "step": 9101 }, { "epoch": 11.65056, "grad_norm": 0.9357476234436035, "learning_rate": 3.1810724289715886e-05, "loss": 0.5616, "step": 9102 }, { "epoch": 11.65184, "grad_norm": 0.8820586800575256, "learning_rate": 3.180872348939576e-05, "loss": 0.5462, "step": 9103 }, { "epoch": 11.65312, "grad_norm": 0.9340028166770935, "learning_rate": 3.180672268907563e-05, "loss": 0.5758, "step": 9104 }, { "epoch": 11.6544, "grad_norm": 0.9392242431640625, "learning_rate": 3.18047218887555e-05, "loss": 0.546, "step": 9105 }, { "epoch": 11.65568, "grad_norm": 0.874309241771698, "learning_rate": 3.180272108843538e-05, "loss": 0.5407, "step": 9106 }, { "epoch": 11.65696, "grad_norm": 0.9422518610954285, "learning_rate": 3.180072028811525e-05, "loss": 0.5634, "step": 9107 }, { "epoch": 11.65824, "grad_norm": 0.8917849659919739, "learning_rate": 3.179871948779512e-05, "loss": 0.5674, "step": 9108 }, { "epoch": 11.65952, "grad_norm": 0.9151961207389832, "learning_rate": 3.179671868747499e-05, "loss": 0.5681, "step": 9109 }, { "epoch": 11.6608, "grad_norm": 0.9490141868591309, "learning_rate": 3.179471788715486e-05, "loss": 0.5851, "step": 9110 }, { "epoch": 11.66208, "grad_norm": 0.9519876837730408, "learning_rate": 3.179271708683473e-05, "loss": 0.5503, "step": 9111 }, { "epoch": 11.66336, "grad_norm": 0.9187465906143188, "learning_rate": 3.1790716286514604e-05, "loss": 0.5978, "step": 9112 }, { "epoch": 11.66464, "grad_norm": 0.8703121542930603, "learning_rate": 3.178871548619448e-05, "loss": 0.5282, "step": 9113 }, { "epoch": 11.66592, "grad_norm": 0.9381796717643738, "learning_rate": 3.1786714685874355e-05, "loss": 0.5909, "step": 9114 }, { "epoch": 11.6672, "grad_norm": 0.8817717432975769, "learning_rate": 3.1784713885554227e-05, "loss": 0.5652, "step": 9115 }, { "epoch": 11.66848, "grad_norm": 0.8601658940315247, "learning_rate": 3.178271308523409e-05, "loss": 0.5071, "step": 9116 }, { "epoch": 11.66976, "grad_norm": 0.9270244836807251, "learning_rate": 3.1780712284913964e-05, "loss": 0.5354, "step": 9117 }, { "epoch": 11.67104, "grad_norm": 0.885297954082489, "learning_rate": 3.1778711484593835e-05, "loss": 0.577, "step": 9118 }, { "epoch": 11.67232, "grad_norm": 0.929111659526825, "learning_rate": 3.177671068427371e-05, "loss": 0.5876, "step": 9119 }, { "epoch": 11.6736, "grad_norm": 0.8553233742713928, "learning_rate": 3.1774709883953586e-05, "loss": 0.5356, "step": 9120 }, { "epoch": 11.67488, "grad_norm": 0.9026567339897156, "learning_rate": 3.177270908363346e-05, "loss": 0.5431, "step": 9121 }, { "epoch": 11.67616, "grad_norm": 0.9046716690063477, "learning_rate": 3.177070828331333e-05, "loss": 0.5823, "step": 9122 }, { "epoch": 11.67744, "grad_norm": 0.8545054197311401, "learning_rate": 3.17687074829932e-05, "loss": 0.5063, "step": 9123 }, { "epoch": 11.67872, "grad_norm": 0.9412639737129211, "learning_rate": 3.1766706682673067e-05, "loss": 0.5906, "step": 9124 }, { "epoch": 11.68, "grad_norm": 0.9309195280075073, "learning_rate": 3.176470588235294e-05, "loss": 0.5749, "step": 9125 }, { "epoch": 11.68128, "grad_norm": 0.9051558971405029, "learning_rate": 3.176270508203281e-05, "loss": 0.5678, "step": 9126 }, { "epoch": 11.68256, "grad_norm": 0.927655816078186, "learning_rate": 3.176070428171269e-05, "loss": 0.6117, "step": 9127 }, { "epoch": 11.68384, "grad_norm": 0.9260802865028381, "learning_rate": 3.175870348139256e-05, "loss": 0.5492, "step": 9128 }, { "epoch": 11.68512, "grad_norm": 0.8989552855491638, "learning_rate": 3.175670268107243e-05, "loss": 0.5712, "step": 9129 }, { "epoch": 11.6864, "grad_norm": 0.9030582308769226, "learning_rate": 3.1754701880752304e-05, "loss": 0.5752, "step": 9130 }, { "epoch": 11.68768, "grad_norm": 0.9276950359344482, "learning_rate": 3.1752701080432176e-05, "loss": 0.6009, "step": 9131 }, { "epoch": 11.68896, "grad_norm": 0.8523882031440735, "learning_rate": 3.175070028011204e-05, "loss": 0.5174, "step": 9132 }, { "epoch": 11.69024, "grad_norm": 0.9310316443443298, "learning_rate": 3.174869947979191e-05, "loss": 0.6016, "step": 9133 }, { "epoch": 11.69152, "grad_norm": 0.9005877375602722, "learning_rate": 3.174669867947179e-05, "loss": 0.5351, "step": 9134 }, { "epoch": 11.6928, "grad_norm": 0.9501352906227112, "learning_rate": 3.1744697879151664e-05, "loss": 0.5938, "step": 9135 }, { "epoch": 11.69408, "grad_norm": 0.9486740827560425, "learning_rate": 3.1742697078831536e-05, "loss": 0.5906, "step": 9136 }, { "epoch": 11.69536, "grad_norm": 0.9294406771659851, "learning_rate": 3.174069627851141e-05, "loss": 0.592, "step": 9137 }, { "epoch": 11.69664, "grad_norm": 0.8708564043045044, "learning_rate": 3.173869547819128e-05, "loss": 0.5135, "step": 9138 }, { "epoch": 11.69792, "grad_norm": 1.0165308713912964, "learning_rate": 3.173669467787115e-05, "loss": 0.6272, "step": 9139 }, { "epoch": 11.6992, "grad_norm": 0.8839307427406311, "learning_rate": 3.1734693877551016e-05, "loss": 0.5633, "step": 9140 }, { "epoch": 11.70048, "grad_norm": 0.8960999846458435, "learning_rate": 3.1732693077230895e-05, "loss": 0.5565, "step": 9141 }, { "epoch": 11.70176, "grad_norm": 0.9335712790489197, "learning_rate": 3.173069227691077e-05, "loss": 0.5516, "step": 9142 }, { "epoch": 11.70304, "grad_norm": 0.8799536228179932, "learning_rate": 3.172869147659064e-05, "loss": 0.585, "step": 9143 }, { "epoch": 11.70432, "grad_norm": 0.9027646780014038, "learning_rate": 3.172669067627051e-05, "loss": 0.5533, "step": 9144 }, { "epoch": 11.7056, "grad_norm": 0.9302271008491516, "learning_rate": 3.172468987595038e-05, "loss": 0.6, "step": 9145 }, { "epoch": 11.70688, "grad_norm": 0.8945030570030212, "learning_rate": 3.1722689075630254e-05, "loss": 0.5827, "step": 9146 }, { "epoch": 11.70816, "grad_norm": 0.9455578327178955, "learning_rate": 3.1720688275310126e-05, "loss": 0.546, "step": 9147 }, { "epoch": 11.70944, "grad_norm": 0.8383356332778931, "learning_rate": 3.171868747499e-05, "loss": 0.5304, "step": 9148 }, { "epoch": 11.71072, "grad_norm": 0.8386917114257812, "learning_rate": 3.171668667466987e-05, "loss": 0.5498, "step": 9149 }, { "epoch": 11.712, "grad_norm": 0.8906264901161194, "learning_rate": 3.171468587434974e-05, "loss": 0.5319, "step": 9150 }, { "epoch": 11.71328, "grad_norm": 0.9018164873123169, "learning_rate": 3.1712685074029613e-05, "loss": 0.5535, "step": 9151 }, { "epoch": 11.71456, "grad_norm": 0.9292755126953125, "learning_rate": 3.1710684273709485e-05, "loss": 0.5615, "step": 9152 }, { "epoch": 11.71584, "grad_norm": 0.947083592414856, "learning_rate": 3.170868347338936e-05, "loss": 0.5326, "step": 9153 }, { "epoch": 11.71712, "grad_norm": 0.872265100479126, "learning_rate": 3.170668267306923e-05, "loss": 0.5514, "step": 9154 }, { "epoch": 11.7184, "grad_norm": 0.8972936272621155, "learning_rate": 3.17046818727491e-05, "loss": 0.555, "step": 9155 }, { "epoch": 11.71968, "grad_norm": 0.8738512992858887, "learning_rate": 3.170268107242897e-05, "loss": 0.5585, "step": 9156 }, { "epoch": 11.72096, "grad_norm": 0.8479512333869934, "learning_rate": 3.1700680272108845e-05, "loss": 0.5217, "step": 9157 }, { "epoch": 11.72224, "grad_norm": 0.853814959526062, "learning_rate": 3.1698679471788716e-05, "loss": 0.5418, "step": 9158 }, { "epoch": 11.72352, "grad_norm": 0.9030658006668091, "learning_rate": 3.169667867146859e-05, "loss": 0.5371, "step": 9159 }, { "epoch": 11.7248, "grad_norm": 0.9065144062042236, "learning_rate": 3.169467787114846e-05, "loss": 0.555, "step": 9160 }, { "epoch": 11.72608, "grad_norm": 0.8621678948402405, "learning_rate": 3.169267707082833e-05, "loss": 0.552, "step": 9161 }, { "epoch": 11.727360000000001, "grad_norm": 0.8453906178474426, "learning_rate": 3.1690676270508204e-05, "loss": 0.5394, "step": 9162 }, { "epoch": 11.72864, "grad_norm": 0.9425848722457886, "learning_rate": 3.1688675470188076e-05, "loss": 0.6404, "step": 9163 }, { "epoch": 11.72992, "grad_norm": 0.9024143815040588, "learning_rate": 3.168667466986795e-05, "loss": 0.5704, "step": 9164 }, { "epoch": 11.7312, "grad_norm": 0.89797443151474, "learning_rate": 3.168467386954782e-05, "loss": 0.5647, "step": 9165 }, { "epoch": 11.73248, "grad_norm": 0.9100660085678101, "learning_rate": 3.168267306922769e-05, "loss": 0.553, "step": 9166 }, { "epoch": 11.73376, "grad_norm": 0.8714706301689148, "learning_rate": 3.168067226890756e-05, "loss": 0.5353, "step": 9167 }, { "epoch": 11.73504, "grad_norm": 0.8743889331817627, "learning_rate": 3.1678671468587435e-05, "loss": 0.5513, "step": 9168 }, { "epoch": 11.73632, "grad_norm": 0.8862664699554443, "learning_rate": 3.1676670668267314e-05, "loss": 0.522, "step": 9169 }, { "epoch": 11.7376, "grad_norm": 0.8375951647758484, "learning_rate": 3.167466986794718e-05, "loss": 0.5308, "step": 9170 }, { "epoch": 11.73888, "grad_norm": 0.9067046642303467, "learning_rate": 3.167266906762705e-05, "loss": 0.5673, "step": 9171 }, { "epoch": 11.74016, "grad_norm": 0.8670250177383423, "learning_rate": 3.167066826730692e-05, "loss": 0.519, "step": 9172 }, { "epoch": 11.74144, "grad_norm": 0.8583195805549622, "learning_rate": 3.1668667466986794e-05, "loss": 0.532, "step": 9173 }, { "epoch": 11.74272, "grad_norm": 0.9073027968406677, "learning_rate": 3.1666666666666666e-05, "loss": 0.5339, "step": 9174 }, { "epoch": 11.744, "grad_norm": 0.8694731593132019, "learning_rate": 3.166466586634654e-05, "loss": 0.5488, "step": 9175 }, { "epoch": 11.74528, "grad_norm": 0.9093069434165955, "learning_rate": 3.166266506602642e-05, "loss": 0.5305, "step": 9176 }, { "epoch": 11.74656, "grad_norm": 0.8719558119773865, "learning_rate": 3.166066426570629e-05, "loss": 0.5112, "step": 9177 }, { "epoch": 11.74784, "grad_norm": 0.8447220325469971, "learning_rate": 3.1658663465386154e-05, "loss": 0.4831, "step": 9178 }, { "epoch": 11.74912, "grad_norm": 0.911290168762207, "learning_rate": 3.1656662665066025e-05, "loss": 0.5559, "step": 9179 }, { "epoch": 11.750399999999999, "grad_norm": 0.8428891897201538, "learning_rate": 3.16546618647459e-05, "loss": 0.5264, "step": 9180 }, { "epoch": 11.75168, "grad_norm": 0.9175834059715271, "learning_rate": 3.165266106442577e-05, "loss": 0.5798, "step": 9181 }, { "epoch": 11.75296, "grad_norm": 0.8715541958808899, "learning_rate": 3.165066026410564e-05, "loss": 0.5017, "step": 9182 }, { "epoch": 11.75424, "grad_norm": 0.8270224928855896, "learning_rate": 3.164865946378552e-05, "loss": 0.5368, "step": 9183 }, { "epoch": 11.75552, "grad_norm": 0.9201467037200928, "learning_rate": 3.164665866346539e-05, "loss": 0.5696, "step": 9184 }, { "epoch": 11.7568, "grad_norm": 0.8654879331588745, "learning_rate": 3.164465786314526e-05, "loss": 0.5424, "step": 9185 }, { "epoch": 11.75808, "grad_norm": 0.897590696811676, "learning_rate": 3.164265706282513e-05, "loss": 0.5809, "step": 9186 }, { "epoch": 11.759360000000001, "grad_norm": 0.895999014377594, "learning_rate": 3.1640656262505e-05, "loss": 0.5432, "step": 9187 }, { "epoch": 11.76064, "grad_norm": 0.9101575613021851, "learning_rate": 3.163865546218487e-05, "loss": 0.6348, "step": 9188 }, { "epoch": 11.76192, "grad_norm": 0.8356999754905701, "learning_rate": 3.1636654661864744e-05, "loss": 0.4978, "step": 9189 }, { "epoch": 11.7632, "grad_norm": 0.886934757232666, "learning_rate": 3.163465386154462e-05, "loss": 0.5392, "step": 9190 }, { "epoch": 11.76448, "grad_norm": 0.8999463319778442, "learning_rate": 3.1632653061224494e-05, "loss": 0.5413, "step": 9191 }, { "epoch": 11.76576, "grad_norm": 0.9087318778038025, "learning_rate": 3.1630652260904366e-05, "loss": 0.5776, "step": 9192 }, { "epoch": 11.76704, "grad_norm": 0.8902879357337952, "learning_rate": 3.162865146058424e-05, "loss": 0.5987, "step": 9193 }, { "epoch": 11.76832, "grad_norm": 0.8536181449890137, "learning_rate": 3.16266506602641e-05, "loss": 0.5449, "step": 9194 }, { "epoch": 11.7696, "grad_norm": 0.8617259860038757, "learning_rate": 3.1624649859943975e-05, "loss": 0.5814, "step": 9195 }, { "epoch": 11.77088, "grad_norm": 0.9288644790649414, "learning_rate": 3.162264905962385e-05, "loss": 0.5716, "step": 9196 }, { "epoch": 11.77216, "grad_norm": 0.9424710869789124, "learning_rate": 3.1620648259303726e-05, "loss": 0.5987, "step": 9197 }, { "epoch": 11.77344, "grad_norm": 0.8956670761108398, "learning_rate": 3.16186474589836e-05, "loss": 0.5165, "step": 9198 }, { "epoch": 11.77472, "grad_norm": 0.9281831383705139, "learning_rate": 3.161664665866347e-05, "loss": 0.572, "step": 9199 }, { "epoch": 11.776, "grad_norm": 0.9560943841934204, "learning_rate": 3.161464585834334e-05, "loss": 0.5877, "step": 9200 }, { "epoch": 11.77728, "grad_norm": 0.8967903852462769, "learning_rate": 3.161264505802321e-05, "loss": 0.5637, "step": 9201 }, { "epoch": 11.77856, "grad_norm": 0.8792325258255005, "learning_rate": 3.161064425770308e-05, "loss": 0.5224, "step": 9202 }, { "epoch": 11.77984, "grad_norm": 0.910875678062439, "learning_rate": 3.160864345738295e-05, "loss": 0.5554, "step": 9203 }, { "epoch": 11.78112, "grad_norm": 0.8686668872833252, "learning_rate": 3.160664265706283e-05, "loss": 0.5578, "step": 9204 }, { "epoch": 11.782399999999999, "grad_norm": 0.8657653331756592, "learning_rate": 3.16046418567427e-05, "loss": 0.5172, "step": 9205 }, { "epoch": 11.78368, "grad_norm": 0.8818771243095398, "learning_rate": 3.160264105642257e-05, "loss": 0.5662, "step": 9206 }, { "epoch": 11.78496, "grad_norm": 0.9204904437065125, "learning_rate": 3.1600640256102444e-05, "loss": 0.5711, "step": 9207 }, { "epoch": 11.78624, "grad_norm": 0.9225419759750366, "learning_rate": 3.1598639455782316e-05, "loss": 0.5677, "step": 9208 }, { "epoch": 11.78752, "grad_norm": 0.8883199095726013, "learning_rate": 3.159663865546219e-05, "loss": 0.5218, "step": 9209 }, { "epoch": 11.7888, "grad_norm": 0.9116802215576172, "learning_rate": 3.159463785514205e-05, "loss": 0.6062, "step": 9210 }, { "epoch": 11.79008, "grad_norm": 0.8611482977867126, "learning_rate": 3.159263705482193e-05, "loss": 0.5445, "step": 9211 }, { "epoch": 11.79136, "grad_norm": 0.8924483060836792, "learning_rate": 3.1590636254501803e-05, "loss": 0.602, "step": 9212 }, { "epoch": 11.79264, "grad_norm": 0.8485545516014099, "learning_rate": 3.1588635454181675e-05, "loss": 0.5326, "step": 9213 }, { "epoch": 11.79392, "grad_norm": 0.8666700124740601, "learning_rate": 3.158663465386155e-05, "loss": 0.5149, "step": 9214 }, { "epoch": 11.7952, "grad_norm": 0.9414648413658142, "learning_rate": 3.158463385354142e-05, "loss": 0.5767, "step": 9215 }, { "epoch": 11.79648, "grad_norm": 0.885181188583374, "learning_rate": 3.158263305322129e-05, "loss": 0.5112, "step": 9216 }, { "epoch": 11.79776, "grad_norm": 0.8811646103858948, "learning_rate": 3.158063225290116e-05, "loss": 0.5482, "step": 9217 }, { "epoch": 11.79904, "grad_norm": 0.905148983001709, "learning_rate": 3.157863145258103e-05, "loss": 0.6129, "step": 9218 }, { "epoch": 11.80032, "grad_norm": 0.9198310971260071, "learning_rate": 3.1576630652260906e-05, "loss": 0.5667, "step": 9219 }, { "epoch": 11.8016, "grad_norm": 0.9002284407615662, "learning_rate": 3.157462985194078e-05, "loss": 0.5478, "step": 9220 }, { "epoch": 11.80288, "grad_norm": 0.8892956972122192, "learning_rate": 3.157262905162065e-05, "loss": 0.5446, "step": 9221 }, { "epoch": 11.80416, "grad_norm": 0.8949340581893921, "learning_rate": 3.157062825130052e-05, "loss": 0.5443, "step": 9222 }, { "epoch": 11.80544, "grad_norm": 0.869086742401123, "learning_rate": 3.1568627450980394e-05, "loss": 0.546, "step": 9223 }, { "epoch": 11.80672, "grad_norm": 0.920300304889679, "learning_rate": 3.1566626650660266e-05, "loss": 0.5784, "step": 9224 }, { "epoch": 11.808, "grad_norm": 0.8822394013404846, "learning_rate": 3.156462585034014e-05, "loss": 0.5174, "step": 9225 }, { "epoch": 11.80928, "grad_norm": 0.9120069146156311, "learning_rate": 3.156262505002001e-05, "loss": 0.5333, "step": 9226 }, { "epoch": 11.81056, "grad_norm": 0.8972925543785095, "learning_rate": 3.156062424969988e-05, "loss": 0.6007, "step": 9227 }, { "epoch": 11.81184, "grad_norm": 0.8929551839828491, "learning_rate": 3.155862344937975e-05, "loss": 0.5792, "step": 9228 }, { "epoch": 11.81312, "grad_norm": 0.8880111575126648, "learning_rate": 3.1556622649059625e-05, "loss": 0.5315, "step": 9229 }, { "epoch": 11.8144, "grad_norm": 0.9577479362487793, "learning_rate": 3.15546218487395e-05, "loss": 0.6392, "step": 9230 }, { "epoch": 11.81568, "grad_norm": 0.9124792814254761, "learning_rate": 3.155262104841937e-05, "loss": 0.5666, "step": 9231 }, { "epoch": 11.81696, "grad_norm": 0.9073526263237, "learning_rate": 3.155062024809924e-05, "loss": 0.5656, "step": 9232 }, { "epoch": 11.81824, "grad_norm": 0.8945934176445007, "learning_rate": 3.154861944777911e-05, "loss": 0.5644, "step": 9233 }, { "epoch": 11.81952, "grad_norm": 0.8661502599716187, "learning_rate": 3.1546618647458984e-05, "loss": 0.5162, "step": 9234 }, { "epoch": 11.8208, "grad_norm": 0.9258007407188416, "learning_rate": 3.1544617847138856e-05, "loss": 0.542, "step": 9235 }, { "epoch": 11.82208, "grad_norm": 0.859990119934082, "learning_rate": 3.154261704681873e-05, "loss": 0.5686, "step": 9236 }, { "epoch": 11.82336, "grad_norm": 0.8406195044517517, "learning_rate": 3.15406162464986e-05, "loss": 0.5378, "step": 9237 }, { "epoch": 11.82464, "grad_norm": 0.8850319385528564, "learning_rate": 3.153861544617847e-05, "loss": 0.5726, "step": 9238 }, { "epoch": 11.82592, "grad_norm": 0.9233469367027283, "learning_rate": 3.1536614645858344e-05, "loss": 0.5878, "step": 9239 }, { "epoch": 11.8272, "grad_norm": 0.9111922979354858, "learning_rate": 3.153461384553822e-05, "loss": 0.5668, "step": 9240 }, { "epoch": 11.82848, "grad_norm": 0.903139054775238, "learning_rate": 3.153261304521809e-05, "loss": 0.5535, "step": 9241 }, { "epoch": 11.82976, "grad_norm": 0.8692033886909485, "learning_rate": 3.153061224489796e-05, "loss": 0.4958, "step": 9242 }, { "epoch": 11.83104, "grad_norm": 0.8684501051902771, "learning_rate": 3.152861144457783e-05, "loss": 0.5101, "step": 9243 }, { "epoch": 11.83232, "grad_norm": 0.8597356081008911, "learning_rate": 3.15266106442577e-05, "loss": 0.5575, "step": 9244 }, { "epoch": 11.8336, "grad_norm": 0.8873345851898193, "learning_rate": 3.1524609843937575e-05, "loss": 0.5863, "step": 9245 }, { "epoch": 11.83488, "grad_norm": 0.8959015011787415, "learning_rate": 3.152260904361745e-05, "loss": 0.5667, "step": 9246 }, { "epoch": 11.83616, "grad_norm": 0.9511164426803589, "learning_rate": 3.1520608243297325e-05, "loss": 0.5755, "step": 9247 }, { "epoch": 11.83744, "grad_norm": 0.9274942278862, "learning_rate": 3.15186074429772e-05, "loss": 0.5661, "step": 9248 }, { "epoch": 11.83872, "grad_norm": 0.893008291721344, "learning_rate": 3.151660664265706e-05, "loss": 0.5641, "step": 9249 }, { "epoch": 11.84, "grad_norm": 0.9084400534629822, "learning_rate": 3.1514605842336934e-05, "loss": 0.533, "step": 9250 }, { "epoch": 11.84128, "grad_norm": 0.9191239476203918, "learning_rate": 3.1512605042016806e-05, "loss": 0.557, "step": 9251 }, { "epoch": 11.84256, "grad_norm": 0.933152973651886, "learning_rate": 3.151060424169668e-05, "loss": 0.5457, "step": 9252 }, { "epoch": 11.84384, "grad_norm": 0.9432767629623413, "learning_rate": 3.150860344137655e-05, "loss": 0.6098, "step": 9253 }, { "epoch": 11.84512, "grad_norm": 0.8910885453224182, "learning_rate": 3.150660264105643e-05, "loss": 0.5155, "step": 9254 }, { "epoch": 11.8464, "grad_norm": 0.8926212787628174, "learning_rate": 3.15046018407363e-05, "loss": 0.5236, "step": 9255 }, { "epoch": 11.84768, "grad_norm": 0.8529669046401978, "learning_rate": 3.150260104041617e-05, "loss": 0.5063, "step": 9256 }, { "epoch": 11.84896, "grad_norm": 0.8807107210159302, "learning_rate": 3.150060024009604e-05, "loss": 0.5411, "step": 9257 }, { "epoch": 11.85024, "grad_norm": 0.8540694117546082, "learning_rate": 3.149859943977591e-05, "loss": 0.5482, "step": 9258 }, { "epoch": 11.85152, "grad_norm": 0.8822780847549438, "learning_rate": 3.149659863945578e-05, "loss": 0.5256, "step": 9259 }, { "epoch": 11.8528, "grad_norm": 0.8807215094566345, "learning_rate": 3.149459783913565e-05, "loss": 0.5734, "step": 9260 }, { "epoch": 11.85408, "grad_norm": 0.8632503151893616, "learning_rate": 3.149259703881553e-05, "loss": 0.5134, "step": 9261 }, { "epoch": 11.85536, "grad_norm": 0.9131238460540771, "learning_rate": 3.14905962384954e-05, "loss": 0.5878, "step": 9262 }, { "epoch": 11.85664, "grad_norm": 0.862756609916687, "learning_rate": 3.1488595438175275e-05, "loss": 0.5613, "step": 9263 }, { "epoch": 11.85792, "grad_norm": 0.8660969734191895, "learning_rate": 3.148659463785515e-05, "loss": 0.5352, "step": 9264 }, { "epoch": 11.8592, "grad_norm": 0.9220228791236877, "learning_rate": 3.148459383753501e-05, "loss": 0.5738, "step": 9265 }, { "epoch": 11.86048, "grad_norm": 0.872033953666687, "learning_rate": 3.1482593037214884e-05, "loss": 0.5792, "step": 9266 }, { "epoch": 11.86176, "grad_norm": 0.8677129745483398, "learning_rate": 3.1480592236894756e-05, "loss": 0.5334, "step": 9267 }, { "epoch": 11.86304, "grad_norm": 0.8851068615913391, "learning_rate": 3.1478591436574634e-05, "loss": 0.5471, "step": 9268 }, { "epoch": 11.86432, "grad_norm": 0.921118438243866, "learning_rate": 3.1476590636254506e-05, "loss": 0.562, "step": 9269 }, { "epoch": 11.8656, "grad_norm": 0.868963897228241, "learning_rate": 3.147458983593438e-05, "loss": 0.5733, "step": 9270 }, { "epoch": 11.86688, "grad_norm": 0.9390560388565063, "learning_rate": 3.147258903561425e-05, "loss": 0.5904, "step": 9271 }, { "epoch": 11.86816, "grad_norm": 0.9005419611930847, "learning_rate": 3.147058823529412e-05, "loss": 0.5529, "step": 9272 }, { "epoch": 11.86944, "grad_norm": 0.8554105758666992, "learning_rate": 3.146858743497399e-05, "loss": 0.6002, "step": 9273 }, { "epoch": 11.87072, "grad_norm": 0.9244778752326965, "learning_rate": 3.146658663465386e-05, "loss": 0.5658, "step": 9274 }, { "epoch": 11.872, "grad_norm": 0.8940372467041016, "learning_rate": 3.146458583433374e-05, "loss": 0.5647, "step": 9275 }, { "epoch": 11.87328, "grad_norm": 0.998906672000885, "learning_rate": 3.146258503401361e-05, "loss": 0.6089, "step": 9276 }, { "epoch": 11.87456, "grad_norm": 0.915664792060852, "learning_rate": 3.146058423369348e-05, "loss": 0.5579, "step": 9277 }, { "epoch": 11.87584, "grad_norm": 0.8526196479797363, "learning_rate": 3.145858343337335e-05, "loss": 0.5319, "step": 9278 }, { "epoch": 11.87712, "grad_norm": 0.8531317710876465, "learning_rate": 3.1456582633053225e-05, "loss": 0.5384, "step": 9279 }, { "epoch": 11.8784, "grad_norm": 0.876233696937561, "learning_rate": 3.1454581832733097e-05, "loss": 0.5413, "step": 9280 }, { "epoch": 11.87968, "grad_norm": 0.8565805554389954, "learning_rate": 3.145258103241296e-05, "loss": 0.5441, "step": 9281 }, { "epoch": 11.88096, "grad_norm": 0.8764281272888184, "learning_rate": 3.145058023209284e-05, "loss": 0.5208, "step": 9282 }, { "epoch": 11.88224, "grad_norm": 0.837112545967102, "learning_rate": 3.144857943177271e-05, "loss": 0.5515, "step": 9283 }, { "epoch": 11.88352, "grad_norm": 0.8274785280227661, "learning_rate": 3.1446578631452584e-05, "loss": 0.5302, "step": 9284 }, { "epoch": 11.8848, "grad_norm": 0.8528085350990295, "learning_rate": 3.1444577831132456e-05, "loss": 0.5414, "step": 9285 }, { "epoch": 11.88608, "grad_norm": 0.8616606593132019, "learning_rate": 3.144257703081233e-05, "loss": 0.5315, "step": 9286 }, { "epoch": 11.88736, "grad_norm": 0.9000251293182373, "learning_rate": 3.14405762304922e-05, "loss": 0.6074, "step": 9287 }, { "epoch": 11.88864, "grad_norm": 0.9343587756156921, "learning_rate": 3.143857543017207e-05, "loss": 0.5863, "step": 9288 }, { "epoch": 11.88992, "grad_norm": 0.9544128179550171, "learning_rate": 3.143657462985194e-05, "loss": 0.5909, "step": 9289 }, { "epoch": 11.8912, "grad_norm": 0.8408989310264587, "learning_rate": 3.1434573829531815e-05, "loss": 0.5349, "step": 9290 }, { "epoch": 11.89248, "grad_norm": 0.8732945919036865, "learning_rate": 3.143257302921169e-05, "loss": 0.5135, "step": 9291 }, { "epoch": 11.89376, "grad_norm": 0.8831563591957092, "learning_rate": 3.143057222889156e-05, "loss": 0.5941, "step": 9292 }, { "epoch": 11.89504, "grad_norm": 0.7958306074142456, "learning_rate": 3.142857142857143e-05, "loss": 0.5004, "step": 9293 }, { "epoch": 11.89632, "grad_norm": 0.9264546632766724, "learning_rate": 3.14265706282513e-05, "loss": 0.5965, "step": 9294 }, { "epoch": 11.8976, "grad_norm": 0.9189742803573608, "learning_rate": 3.1424569827931174e-05, "loss": 0.5785, "step": 9295 }, { "epoch": 11.89888, "grad_norm": 0.9090092778205872, "learning_rate": 3.1422569027611046e-05, "loss": 0.5715, "step": 9296 }, { "epoch": 11.90016, "grad_norm": 0.9040752053260803, "learning_rate": 3.142056822729092e-05, "loss": 0.5748, "step": 9297 }, { "epoch": 11.901440000000001, "grad_norm": 0.8540357947349548, "learning_rate": 3.141856742697079e-05, "loss": 0.5637, "step": 9298 }, { "epoch": 11.90272, "grad_norm": 0.8528608083724976, "learning_rate": 3.141656662665066e-05, "loss": 0.5238, "step": 9299 }, { "epoch": 11.904, "grad_norm": 0.8939346075057983, "learning_rate": 3.1414565826330534e-05, "loss": 0.5637, "step": 9300 }, { "epoch": 11.90528, "grad_norm": 0.8866257071495056, "learning_rate": 3.1412565026010406e-05, "loss": 0.6088, "step": 9301 }, { "epoch": 11.90656, "grad_norm": 0.9136728644371033, "learning_rate": 3.141056422569028e-05, "loss": 0.5708, "step": 9302 }, { "epoch": 11.90784, "grad_norm": 0.908406138420105, "learning_rate": 3.140856342537015e-05, "loss": 0.5593, "step": 9303 }, { "epoch": 11.90912, "grad_norm": 0.9199392199516296, "learning_rate": 3.140656262505002e-05, "loss": 0.5653, "step": 9304 }, { "epoch": 11.9104, "grad_norm": 0.9563983082771301, "learning_rate": 3.140456182472989e-05, "loss": 0.56, "step": 9305 }, { "epoch": 11.91168, "grad_norm": 0.8945441842079163, "learning_rate": 3.1402561024409765e-05, "loss": 0.5475, "step": 9306 }, { "epoch": 11.91296, "grad_norm": 0.9007996320724487, "learning_rate": 3.140056022408964e-05, "loss": 0.6062, "step": 9307 }, { "epoch": 11.91424, "grad_norm": 0.9043374061584473, "learning_rate": 3.139855942376951e-05, "loss": 0.5396, "step": 9308 }, { "epoch": 11.91552, "grad_norm": 0.8611972332000732, "learning_rate": 3.139655862344938e-05, "loss": 0.5255, "step": 9309 }, { "epoch": 11.9168, "grad_norm": 0.9283512234687805, "learning_rate": 3.139455782312926e-05, "loss": 0.5369, "step": 9310 }, { "epoch": 11.91808, "grad_norm": 0.9080418944358826, "learning_rate": 3.1392557022809124e-05, "loss": 0.5843, "step": 9311 }, { "epoch": 11.91936, "grad_norm": 0.9004126191139221, "learning_rate": 3.1390556222488996e-05, "loss": 0.5956, "step": 9312 }, { "epoch": 11.92064, "grad_norm": 0.9140704274177551, "learning_rate": 3.138855542216887e-05, "loss": 0.5571, "step": 9313 }, { "epoch": 11.92192, "grad_norm": 0.8815629482269287, "learning_rate": 3.138655462184874e-05, "loss": 0.5591, "step": 9314 }, { "epoch": 11.9232, "grad_norm": 0.9093433618545532, "learning_rate": 3.138455382152861e-05, "loss": 0.5775, "step": 9315 }, { "epoch": 11.924479999999999, "grad_norm": 0.8649457693099976, "learning_rate": 3.138255302120848e-05, "loss": 0.547, "step": 9316 }, { "epoch": 11.92576, "grad_norm": 0.8733769059181213, "learning_rate": 3.138055222088836e-05, "loss": 0.5395, "step": 9317 }, { "epoch": 11.92704, "grad_norm": 0.8359034061431885, "learning_rate": 3.1378551420568234e-05, "loss": 0.5197, "step": 9318 }, { "epoch": 11.92832, "grad_norm": 0.9323531985282898, "learning_rate": 3.13765506202481e-05, "loss": 0.5518, "step": 9319 }, { "epoch": 11.9296, "grad_norm": 0.8930750489234924, "learning_rate": 3.137454981992797e-05, "loss": 0.576, "step": 9320 }, { "epoch": 11.93088, "grad_norm": 0.9025473594665527, "learning_rate": 3.137254901960784e-05, "loss": 0.6194, "step": 9321 }, { "epoch": 11.93216, "grad_norm": 0.8879018425941467, "learning_rate": 3.1370548219287715e-05, "loss": 0.5253, "step": 9322 }, { "epoch": 11.933440000000001, "grad_norm": 0.9026443958282471, "learning_rate": 3.1368547418967586e-05, "loss": 0.61, "step": 9323 }, { "epoch": 11.93472, "grad_norm": 0.9492444396018982, "learning_rate": 3.1366546618647465e-05, "loss": 0.5656, "step": 9324 }, { "epoch": 11.936, "grad_norm": 0.913387656211853, "learning_rate": 3.136454581832734e-05, "loss": 0.533, "step": 9325 }, { "epoch": 11.93728, "grad_norm": 0.8534862995147705, "learning_rate": 3.136254501800721e-05, "loss": 0.5176, "step": 9326 }, { "epoch": 11.93856, "grad_norm": 0.8602653741836548, "learning_rate": 3.1360544217687074e-05, "loss": 0.5341, "step": 9327 }, { "epoch": 11.93984, "grad_norm": 0.9074930548667908, "learning_rate": 3.1358543417366946e-05, "loss": 0.539, "step": 9328 }, { "epoch": 11.94112, "grad_norm": 0.9066956639289856, "learning_rate": 3.135654261704682e-05, "loss": 0.5496, "step": 9329 }, { "epoch": 11.9424, "grad_norm": 0.9023146033287048, "learning_rate": 3.135454181672669e-05, "loss": 0.5679, "step": 9330 }, { "epoch": 11.94368, "grad_norm": 0.9154323935508728, "learning_rate": 3.135254101640656e-05, "loss": 0.539, "step": 9331 }, { "epoch": 11.94496, "grad_norm": 0.9193128347396851, "learning_rate": 3.135054021608644e-05, "loss": 0.5444, "step": 9332 }, { "epoch": 11.94624, "grad_norm": 0.9258593916893005, "learning_rate": 3.134853941576631e-05, "loss": 0.5432, "step": 9333 }, { "epoch": 11.94752, "grad_norm": 0.9019486904144287, "learning_rate": 3.1346538615446184e-05, "loss": 0.5888, "step": 9334 }, { "epoch": 11.9488, "grad_norm": 0.9071494340896606, "learning_rate": 3.134453781512605e-05, "loss": 0.5334, "step": 9335 }, { "epoch": 11.95008, "grad_norm": 0.8721327185630798, "learning_rate": 3.134253701480592e-05, "loss": 0.5885, "step": 9336 }, { "epoch": 11.95136, "grad_norm": 0.9365826845169067, "learning_rate": 3.134053621448579e-05, "loss": 0.5799, "step": 9337 }, { "epoch": 11.95264, "grad_norm": 0.9165831804275513, "learning_rate": 3.1338535414165664e-05, "loss": 0.5104, "step": 9338 }, { "epoch": 11.95392, "grad_norm": 0.8874984979629517, "learning_rate": 3.133653461384554e-05, "loss": 0.5204, "step": 9339 }, { "epoch": 11.9552, "grad_norm": 0.8862015604972839, "learning_rate": 3.1334533813525415e-05, "loss": 0.5502, "step": 9340 }, { "epoch": 11.956479999999999, "grad_norm": 0.9116657376289368, "learning_rate": 3.1332533013205287e-05, "loss": 0.5619, "step": 9341 }, { "epoch": 11.95776, "grad_norm": 0.8775522112846375, "learning_rate": 3.133053221288516e-05, "loss": 0.526, "step": 9342 }, { "epoch": 11.95904, "grad_norm": 0.8832023739814758, "learning_rate": 3.1328531412565023e-05, "loss": 0.5192, "step": 9343 }, { "epoch": 11.96032, "grad_norm": 0.8574889302253723, "learning_rate": 3.1326530612244895e-05, "loss": 0.5594, "step": 9344 }, { "epoch": 11.9616, "grad_norm": 0.8446320295333862, "learning_rate": 3.132452981192477e-05, "loss": 0.5316, "step": 9345 }, { "epoch": 11.96288, "grad_norm": 0.8501649498939514, "learning_rate": 3.1322529011604646e-05, "loss": 0.5666, "step": 9346 }, { "epoch": 11.96416, "grad_norm": 0.9033554196357727, "learning_rate": 3.132052821128452e-05, "loss": 0.5631, "step": 9347 }, { "epoch": 11.96544, "grad_norm": 0.9225219488143921, "learning_rate": 3.131852741096439e-05, "loss": 0.573, "step": 9348 }, { "epoch": 11.96672, "grad_norm": 0.9399309754371643, "learning_rate": 3.131652661064426e-05, "loss": 0.6026, "step": 9349 }, { "epoch": 11.968, "grad_norm": 0.8722439408302307, "learning_rate": 3.131452581032413e-05, "loss": 0.5392, "step": 9350 }, { "epoch": 11.96928, "grad_norm": 0.8984782099723816, "learning_rate": 3.1312525010004e-05, "loss": 0.5778, "step": 9351 }, { "epoch": 11.97056, "grad_norm": 0.9469655156135559, "learning_rate": 3.131052420968387e-05, "loss": 0.6142, "step": 9352 }, { "epoch": 11.97184, "grad_norm": 0.8549695014953613, "learning_rate": 3.130852340936375e-05, "loss": 0.5452, "step": 9353 }, { "epoch": 11.97312, "grad_norm": 0.913131058216095, "learning_rate": 3.130652260904362e-05, "loss": 0.5255, "step": 9354 }, { "epoch": 11.9744, "grad_norm": 0.8826735615730286, "learning_rate": 3.130452180872349e-05, "loss": 0.5603, "step": 9355 }, { "epoch": 11.97568, "grad_norm": 0.8853660225868225, "learning_rate": 3.1302521008403364e-05, "loss": 0.5223, "step": 9356 }, { "epoch": 11.97696, "grad_norm": 0.9496334791183472, "learning_rate": 3.1300520208083236e-05, "loss": 0.6112, "step": 9357 }, { "epoch": 11.97824, "grad_norm": 0.9710426926612854, "learning_rate": 3.129851940776311e-05, "loss": 0.5996, "step": 9358 }, { "epoch": 11.97952, "grad_norm": 0.9180809855461121, "learning_rate": 3.129651860744297e-05, "loss": 0.5501, "step": 9359 }, { "epoch": 11.9808, "grad_norm": 0.875312089920044, "learning_rate": 3.129451780712285e-05, "loss": 0.5474, "step": 9360 }, { "epoch": 11.98208, "grad_norm": 0.9497184753417969, "learning_rate": 3.1292517006802724e-05, "loss": 0.6125, "step": 9361 }, { "epoch": 11.98336, "grad_norm": 0.9112341403961182, "learning_rate": 3.1290516206482596e-05, "loss": 0.5461, "step": 9362 }, { "epoch": 11.98464, "grad_norm": 0.8672278523445129, "learning_rate": 3.128851540616247e-05, "loss": 0.5453, "step": 9363 }, { "epoch": 11.98592, "grad_norm": 0.853042721748352, "learning_rate": 3.128651460584234e-05, "loss": 0.5445, "step": 9364 }, { "epoch": 11.9872, "grad_norm": 0.936596155166626, "learning_rate": 3.128451380552221e-05, "loss": 0.5467, "step": 9365 }, { "epoch": 11.98848, "grad_norm": 0.901436984539032, "learning_rate": 3.128251300520208e-05, "loss": 0.5904, "step": 9366 }, { "epoch": 11.98976, "grad_norm": 0.9036868214607239, "learning_rate": 3.1280512204881955e-05, "loss": 0.5724, "step": 9367 }, { "epoch": 11.99104, "grad_norm": 0.8857167959213257, "learning_rate": 3.127851140456183e-05, "loss": 0.5707, "step": 9368 }, { "epoch": 11.99232, "grad_norm": 0.8939414024353027, "learning_rate": 3.12765106042417e-05, "loss": 0.6032, "step": 9369 }, { "epoch": 11.9936, "grad_norm": 0.9171572327613831, "learning_rate": 3.127450980392157e-05, "loss": 0.5319, "step": 9370 }, { "epoch": 11.99488, "grad_norm": 0.92975914478302, "learning_rate": 3.127250900360144e-05, "loss": 0.6055, "step": 9371 }, { "epoch": 11.99616, "grad_norm": 0.9182210564613342, "learning_rate": 3.1270508203281314e-05, "loss": 0.6125, "step": 9372 }, { "epoch": 11.99744, "grad_norm": 0.8801096081733704, "learning_rate": 3.1268507402961186e-05, "loss": 0.5197, "step": 9373 }, { "epoch": 11.99872, "grad_norm": 0.9054288268089294, "learning_rate": 3.126650660264106e-05, "loss": 0.5574, "step": 9374 }, { "epoch": 12.0, "grad_norm": Infinity, "learning_rate": 3.126650660264106e-05, "loss": 0.9918, "step": 9375 }, { "epoch": 12.00128, "grad_norm": 0.8894946575164795, "learning_rate": 3.126450580232093e-05, "loss": 0.5222, "step": 9376 }, { "epoch": 12.00256, "grad_norm": 0.8508769273757935, "learning_rate": 3.12625050020008e-05, "loss": 0.521, "step": 9377 }, { "epoch": 12.00384, "grad_norm": 0.8415517807006836, "learning_rate": 3.1260504201680673e-05, "loss": 0.5538, "step": 9378 }, { "epoch": 12.00512, "grad_norm": 0.8784507513046265, "learning_rate": 3.1258503401360545e-05, "loss": 0.5281, "step": 9379 }, { "epoch": 12.0064, "grad_norm": 0.8953071236610413, "learning_rate": 3.125650260104042e-05, "loss": 0.5587, "step": 9380 }, { "epoch": 12.00768, "grad_norm": 0.9295496344566345, "learning_rate": 3.125450180072029e-05, "loss": 0.5007, "step": 9381 }, { "epoch": 12.00896, "grad_norm": 0.8452316522598267, "learning_rate": 3.125250100040016e-05, "loss": 0.5127, "step": 9382 }, { "epoch": 12.01024, "grad_norm": 0.9824538826942444, "learning_rate": 3.125050020008003e-05, "loss": 0.6222, "step": 9383 }, { "epoch": 12.01152, "grad_norm": 0.8931326270103455, "learning_rate": 3.1248499399759905e-05, "loss": 0.5504, "step": 9384 }, { "epoch": 12.0128, "grad_norm": 0.8844808340072632, "learning_rate": 3.1246498599439776e-05, "loss": 0.5371, "step": 9385 }, { "epoch": 12.01408, "grad_norm": 0.8983806371688843, "learning_rate": 3.124449779911965e-05, "loss": 0.5548, "step": 9386 }, { "epoch": 12.01536, "grad_norm": 0.9335402250289917, "learning_rate": 3.124249699879952e-05, "loss": 0.5677, "step": 9387 }, { "epoch": 12.01664, "grad_norm": 0.928325355052948, "learning_rate": 3.124049619847939e-05, "loss": 0.5343, "step": 9388 }, { "epoch": 12.01792, "grad_norm": 0.9786158800125122, "learning_rate": 3.123849539815927e-05, "loss": 0.5708, "step": 9389 }, { "epoch": 12.0192, "grad_norm": 0.8691074252128601, "learning_rate": 3.1236494597839136e-05, "loss": 0.5301, "step": 9390 }, { "epoch": 12.02048, "grad_norm": 0.9225201606750488, "learning_rate": 3.123449379751901e-05, "loss": 0.5683, "step": 9391 }, { "epoch": 12.02176, "grad_norm": 0.9116083383560181, "learning_rate": 3.123249299719888e-05, "loss": 0.5318, "step": 9392 }, { "epoch": 12.02304, "grad_norm": 0.9338945150375366, "learning_rate": 3.123049219687875e-05, "loss": 0.5751, "step": 9393 }, { "epoch": 12.02432, "grad_norm": 0.8985954523086548, "learning_rate": 3.122849139655862e-05, "loss": 0.5471, "step": 9394 }, { "epoch": 12.0256, "grad_norm": 0.9236059188842773, "learning_rate": 3.1226490596238495e-05, "loss": 0.54, "step": 9395 }, { "epoch": 12.02688, "grad_norm": 0.902243971824646, "learning_rate": 3.1224489795918374e-05, "loss": 0.5632, "step": 9396 }, { "epoch": 12.02816, "grad_norm": 0.9559075832366943, "learning_rate": 3.1222488995598245e-05, "loss": 0.5201, "step": 9397 }, { "epoch": 12.02944, "grad_norm": 0.8943519592285156, "learning_rate": 3.122048819527811e-05, "loss": 0.5407, "step": 9398 }, { "epoch": 12.03072, "grad_norm": 0.8215572834014893, "learning_rate": 3.121848739495798e-05, "loss": 0.4806, "step": 9399 }, { "epoch": 12.032, "grad_norm": 0.9161409139633179, "learning_rate": 3.1216486594637854e-05, "loss": 0.5971, "step": 9400 }, { "epoch": 12.03328, "grad_norm": 0.9485968947410583, "learning_rate": 3.1214485794317726e-05, "loss": 0.5852, "step": 9401 }, { "epoch": 12.03456, "grad_norm": 0.8830835223197937, "learning_rate": 3.12124849939976e-05, "loss": 0.475, "step": 9402 }, { "epoch": 12.03584, "grad_norm": 0.9634863138198853, "learning_rate": 3.1210484193677477e-05, "loss": 0.5458, "step": 9403 }, { "epoch": 12.03712, "grad_norm": 0.9121254086494446, "learning_rate": 3.120848339335735e-05, "loss": 0.5025, "step": 9404 }, { "epoch": 12.0384, "grad_norm": 1.0343906879425049, "learning_rate": 3.120648259303722e-05, "loss": 0.5407, "step": 9405 }, { "epoch": 12.03968, "grad_norm": 0.892846941947937, "learning_rate": 3.1204481792717085e-05, "loss": 0.5607, "step": 9406 }, { "epoch": 12.04096, "grad_norm": 0.8945642113685608, "learning_rate": 3.120248099239696e-05, "loss": 0.5562, "step": 9407 }, { "epoch": 12.04224, "grad_norm": 0.8921273350715637, "learning_rate": 3.120048019207683e-05, "loss": 0.5033, "step": 9408 }, { "epoch": 12.043520000000001, "grad_norm": 0.8849148154258728, "learning_rate": 3.11984793917567e-05, "loss": 0.558, "step": 9409 }, { "epoch": 12.0448, "grad_norm": 0.8761171698570251, "learning_rate": 3.119647859143658e-05, "loss": 0.5437, "step": 9410 }, { "epoch": 12.04608, "grad_norm": 0.9475454092025757, "learning_rate": 3.119447779111645e-05, "loss": 0.5873, "step": 9411 }, { "epoch": 12.04736, "grad_norm": 0.9096229076385498, "learning_rate": 3.119247699079632e-05, "loss": 0.5356, "step": 9412 }, { "epoch": 12.04864, "grad_norm": 0.8909059166908264, "learning_rate": 3.1190476190476195e-05, "loss": 0.5258, "step": 9413 }, { "epoch": 12.04992, "grad_norm": 0.8790292739868164, "learning_rate": 3.118847539015606e-05, "loss": 0.5287, "step": 9414 }, { "epoch": 12.0512, "grad_norm": 0.9126578569412231, "learning_rate": 3.118647458983593e-05, "loss": 0.5081, "step": 9415 }, { "epoch": 12.05248, "grad_norm": 0.9787217378616333, "learning_rate": 3.1184473789515804e-05, "loss": 0.5798, "step": 9416 }, { "epoch": 12.05376, "grad_norm": 0.8833532333374023, "learning_rate": 3.118247298919568e-05, "loss": 0.5202, "step": 9417 }, { "epoch": 12.05504, "grad_norm": 0.8925086855888367, "learning_rate": 3.1180472188875554e-05, "loss": 0.5235, "step": 9418 }, { "epoch": 12.05632, "grad_norm": 0.9011042714118958, "learning_rate": 3.1178471388555426e-05, "loss": 0.54, "step": 9419 }, { "epoch": 12.0576, "grad_norm": 0.8985966444015503, "learning_rate": 3.11764705882353e-05, "loss": 0.5078, "step": 9420 }, { "epoch": 12.05888, "grad_norm": 0.9557191729545593, "learning_rate": 3.117446978791517e-05, "loss": 0.5882, "step": 9421 }, { "epoch": 12.06016, "grad_norm": 0.9084435701370239, "learning_rate": 3.1172468987595035e-05, "loss": 0.5736, "step": 9422 }, { "epoch": 12.06144, "grad_norm": 0.8469879627227783, "learning_rate": 3.117046818727491e-05, "loss": 0.5194, "step": 9423 }, { "epoch": 12.06272, "grad_norm": 0.9563634991645813, "learning_rate": 3.1168467386954786e-05, "loss": 0.5507, "step": 9424 }, { "epoch": 12.064, "grad_norm": 0.9139657020568848, "learning_rate": 3.116646658663466e-05, "loss": 0.5972, "step": 9425 }, { "epoch": 12.06528, "grad_norm": 0.8986368775367737, "learning_rate": 3.116446578631453e-05, "loss": 0.5257, "step": 9426 }, { "epoch": 12.06656, "grad_norm": 0.9286632537841797, "learning_rate": 3.11624649859944e-05, "loss": 0.5412, "step": 9427 }, { "epoch": 12.06784, "grad_norm": 0.9259238243103027, "learning_rate": 3.116046418567427e-05, "loss": 0.5729, "step": 9428 }, { "epoch": 12.06912, "grad_norm": 0.8716070652008057, "learning_rate": 3.1158463385354145e-05, "loss": 0.5373, "step": 9429 }, { "epoch": 12.0704, "grad_norm": 0.937944233417511, "learning_rate": 3.115646258503401e-05, "loss": 0.5433, "step": 9430 }, { "epoch": 12.07168, "grad_norm": 0.8506153225898743, "learning_rate": 3.115446178471389e-05, "loss": 0.4806, "step": 9431 }, { "epoch": 12.07296, "grad_norm": 0.9485256671905518, "learning_rate": 3.115246098439376e-05, "loss": 0.5236, "step": 9432 }, { "epoch": 12.07424, "grad_norm": 0.9169779419898987, "learning_rate": 3.115046018407363e-05, "loss": 0.5357, "step": 9433 }, { "epoch": 12.07552, "grad_norm": 0.9298108816146851, "learning_rate": 3.1148459383753504e-05, "loss": 0.5527, "step": 9434 }, { "epoch": 12.0768, "grad_norm": 0.8851594924926758, "learning_rate": 3.1146458583433376e-05, "loss": 0.4717, "step": 9435 }, { "epoch": 12.07808, "grad_norm": 0.9402860999107361, "learning_rate": 3.114445778311325e-05, "loss": 0.5484, "step": 9436 }, { "epoch": 12.07936, "grad_norm": 0.9027056097984314, "learning_rate": 3.114245698279312e-05, "loss": 0.5506, "step": 9437 }, { "epoch": 12.08064, "grad_norm": 0.9592556953430176, "learning_rate": 3.114045618247299e-05, "loss": 0.5557, "step": 9438 }, { "epoch": 12.08192, "grad_norm": 0.9053078293800354, "learning_rate": 3.1138455382152863e-05, "loss": 0.5468, "step": 9439 }, { "epoch": 12.0832, "grad_norm": 0.941765546798706, "learning_rate": 3.1136454581832735e-05, "loss": 0.5678, "step": 9440 }, { "epoch": 12.08448, "grad_norm": 0.9676002264022827, "learning_rate": 3.113445378151261e-05, "loss": 0.5451, "step": 9441 }, { "epoch": 12.08576, "grad_norm": 0.9788287878036499, "learning_rate": 3.113245298119248e-05, "loss": 0.6131, "step": 9442 }, { "epoch": 12.08704, "grad_norm": 0.9841639399528503, "learning_rate": 3.113045218087235e-05, "loss": 0.5438, "step": 9443 }, { "epoch": 12.08832, "grad_norm": 0.955268144607544, "learning_rate": 3.112845138055222e-05, "loss": 0.5622, "step": 9444 }, { "epoch": 12.0896, "grad_norm": 0.905160129070282, "learning_rate": 3.1126450580232095e-05, "loss": 0.5138, "step": 9445 }, { "epoch": 12.09088, "grad_norm": 0.9529837369918823, "learning_rate": 3.1124449779911966e-05, "loss": 0.5891, "step": 9446 }, { "epoch": 12.09216, "grad_norm": 1.0116820335388184, "learning_rate": 3.112244897959184e-05, "loss": 0.5347, "step": 9447 }, { "epoch": 12.09344, "grad_norm": 0.9079636335372925, "learning_rate": 3.112044817927171e-05, "loss": 0.5178, "step": 9448 }, { "epoch": 12.09472, "grad_norm": 0.9194412231445312, "learning_rate": 3.111844737895158e-05, "loss": 0.5488, "step": 9449 }, { "epoch": 12.096, "grad_norm": 0.9513563513755798, "learning_rate": 3.1116446578631454e-05, "loss": 0.5472, "step": 9450 }, { "epoch": 12.09728, "grad_norm": 0.9122597575187683, "learning_rate": 3.1114445778311326e-05, "loss": 0.5887, "step": 9451 }, { "epoch": 12.09856, "grad_norm": 0.9185177683830261, "learning_rate": 3.11124449779912e-05, "loss": 0.5636, "step": 9452 }, { "epoch": 12.09984, "grad_norm": 0.923226535320282, "learning_rate": 3.111044417767107e-05, "loss": 0.5302, "step": 9453 }, { "epoch": 12.10112, "grad_norm": 0.8814284801483154, "learning_rate": 3.110844337735094e-05, "loss": 0.522, "step": 9454 }, { "epoch": 12.1024, "grad_norm": 0.9242017865180969, "learning_rate": 3.110644257703081e-05, "loss": 0.5458, "step": 9455 }, { "epoch": 12.10368, "grad_norm": 0.9658417701721191, "learning_rate": 3.1104441776710685e-05, "loss": 0.5912, "step": 9456 }, { "epoch": 12.10496, "grad_norm": 0.90215003490448, "learning_rate": 3.110244097639056e-05, "loss": 0.5586, "step": 9457 }, { "epoch": 12.10624, "grad_norm": 0.9708035588264465, "learning_rate": 3.110044017607043e-05, "loss": 0.5718, "step": 9458 }, { "epoch": 12.10752, "grad_norm": 0.8893763422966003, "learning_rate": 3.10984393757503e-05, "loss": 0.532, "step": 9459 }, { "epoch": 12.1088, "grad_norm": 0.9912137389183044, "learning_rate": 3.109643857543017e-05, "loss": 0.5976, "step": 9460 }, { "epoch": 12.11008, "grad_norm": 0.89457768201828, "learning_rate": 3.1094437775110044e-05, "loss": 0.5394, "step": 9461 }, { "epoch": 12.11136, "grad_norm": 0.9052734971046448, "learning_rate": 3.1092436974789916e-05, "loss": 0.5318, "step": 9462 }, { "epoch": 12.11264, "grad_norm": 0.9517974257469177, "learning_rate": 3.109043617446979e-05, "loss": 0.5494, "step": 9463 }, { "epoch": 12.11392, "grad_norm": 0.921795666217804, "learning_rate": 3.108843537414966e-05, "loss": 0.4871, "step": 9464 }, { "epoch": 12.1152, "grad_norm": 0.8893502950668335, "learning_rate": 3.108643457382953e-05, "loss": 0.5022, "step": 9465 }, { "epoch": 12.11648, "grad_norm": 1.020261287689209, "learning_rate": 3.1084433773509404e-05, "loss": 0.6282, "step": 9466 }, { "epoch": 12.11776, "grad_norm": 0.9455602169036865, "learning_rate": 3.108243297318928e-05, "loss": 0.5625, "step": 9467 }, { "epoch": 12.11904, "grad_norm": 0.9229424595832825, "learning_rate": 3.108043217286915e-05, "loss": 0.5478, "step": 9468 }, { "epoch": 12.12032, "grad_norm": 0.9324727654457092, "learning_rate": 3.107843137254902e-05, "loss": 0.5842, "step": 9469 }, { "epoch": 12.1216, "grad_norm": 0.8508306741714478, "learning_rate": 3.107643057222889e-05, "loss": 0.4829, "step": 9470 }, { "epoch": 12.12288, "grad_norm": 0.9504947662353516, "learning_rate": 3.107442977190876e-05, "loss": 0.5494, "step": 9471 }, { "epoch": 12.12416, "grad_norm": 0.8809190988540649, "learning_rate": 3.1072428971588635e-05, "loss": 0.518, "step": 9472 }, { "epoch": 12.12544, "grad_norm": 0.9718751311302185, "learning_rate": 3.1070428171268507e-05, "loss": 0.5083, "step": 9473 }, { "epoch": 12.12672, "grad_norm": 0.9734033346176147, "learning_rate": 3.1068427370948385e-05, "loss": 0.5977, "step": 9474 }, { "epoch": 12.128, "grad_norm": 0.892812967300415, "learning_rate": 3.106642657062826e-05, "loss": 0.5214, "step": 9475 }, { "epoch": 12.12928, "grad_norm": 0.9172582626342773, "learning_rate": 3.106442577030812e-05, "loss": 0.5311, "step": 9476 }, { "epoch": 12.13056, "grad_norm": 0.8705976009368896, "learning_rate": 3.1062424969987994e-05, "loss": 0.4711, "step": 9477 }, { "epoch": 12.13184, "grad_norm": 0.9229205250740051, "learning_rate": 3.1060424169667866e-05, "loss": 0.5198, "step": 9478 }, { "epoch": 12.13312, "grad_norm": 0.9089353680610657, "learning_rate": 3.105842336934774e-05, "loss": 0.5156, "step": 9479 }, { "epoch": 12.1344, "grad_norm": 0.9263928532600403, "learning_rate": 3.105642256902761e-05, "loss": 0.4731, "step": 9480 }, { "epoch": 12.13568, "grad_norm": 0.8989574313163757, "learning_rate": 3.105442176870749e-05, "loss": 0.5337, "step": 9481 }, { "epoch": 12.13696, "grad_norm": 0.9506709575653076, "learning_rate": 3.105242096838736e-05, "loss": 0.5357, "step": 9482 }, { "epoch": 12.13824, "grad_norm": 0.9234217405319214, "learning_rate": 3.105042016806723e-05, "loss": 0.5471, "step": 9483 }, { "epoch": 12.13952, "grad_norm": 0.8666446805000305, "learning_rate": 3.10484193677471e-05, "loss": 0.5298, "step": 9484 }, { "epoch": 12.1408, "grad_norm": 0.9469799399375916, "learning_rate": 3.104641856742697e-05, "loss": 0.546, "step": 9485 }, { "epoch": 12.14208, "grad_norm": 0.8840956091880798, "learning_rate": 3.104441776710684e-05, "loss": 0.5417, "step": 9486 }, { "epoch": 12.14336, "grad_norm": 0.8798984885215759, "learning_rate": 3.104241696678671e-05, "loss": 0.5403, "step": 9487 }, { "epoch": 12.14464, "grad_norm": 0.9367090463638306, "learning_rate": 3.104041616646659e-05, "loss": 0.562, "step": 9488 }, { "epoch": 12.14592, "grad_norm": 0.8626176118850708, "learning_rate": 3.103841536614646e-05, "loss": 0.5044, "step": 9489 }, { "epoch": 12.1472, "grad_norm": 0.953429102897644, "learning_rate": 3.1036414565826335e-05, "loss": 0.5363, "step": 9490 }, { "epoch": 12.14848, "grad_norm": 0.847478449344635, "learning_rate": 3.103441376550621e-05, "loss": 0.4921, "step": 9491 }, { "epoch": 12.14976, "grad_norm": 0.8839147090911865, "learning_rate": 3.103241296518607e-05, "loss": 0.5174, "step": 9492 }, { "epoch": 12.15104, "grad_norm": 0.952755868434906, "learning_rate": 3.1030412164865944e-05, "loss": 0.5461, "step": 9493 }, { "epoch": 12.15232, "grad_norm": 0.8509531021118164, "learning_rate": 3.1028411364545816e-05, "loss": 0.5218, "step": 9494 }, { "epoch": 12.1536, "grad_norm": 0.9300630688667297, "learning_rate": 3.1026410564225694e-05, "loss": 0.5625, "step": 9495 }, { "epoch": 12.15488, "grad_norm": 0.9531006217002869, "learning_rate": 3.1024409763905566e-05, "loss": 0.5831, "step": 9496 }, { "epoch": 12.15616, "grad_norm": 0.9329797625541687, "learning_rate": 3.102240896358544e-05, "loss": 0.539, "step": 9497 }, { "epoch": 12.15744, "grad_norm": 0.855829656124115, "learning_rate": 3.102040816326531e-05, "loss": 0.4812, "step": 9498 }, { "epoch": 12.15872, "grad_norm": 0.8939371705055237, "learning_rate": 3.101840736294518e-05, "loss": 0.5208, "step": 9499 }, { "epoch": 12.16, "grad_norm": 0.9188044667243958, "learning_rate": 3.101640656262505e-05, "loss": 0.545, "step": 9500 }, { "epoch": 12.16128, "grad_norm": 0.8524420261383057, "learning_rate": 3.101440576230492e-05, "loss": 0.5028, "step": 9501 }, { "epoch": 12.16256, "grad_norm": 0.906829297542572, "learning_rate": 3.10124049619848e-05, "loss": 0.5557, "step": 9502 }, { "epoch": 12.16384, "grad_norm": 0.904629647731781, "learning_rate": 3.101040416166467e-05, "loss": 0.5265, "step": 9503 }, { "epoch": 12.16512, "grad_norm": 0.930142343044281, "learning_rate": 3.100840336134454e-05, "loss": 0.5353, "step": 9504 }, { "epoch": 12.1664, "grad_norm": 0.9185201525688171, "learning_rate": 3.100640256102441e-05, "loss": 0.5448, "step": 9505 }, { "epoch": 12.16768, "grad_norm": 0.9382092952728271, "learning_rate": 3.1004401760704285e-05, "loss": 0.5498, "step": 9506 }, { "epoch": 12.16896, "grad_norm": 0.9041383862495422, "learning_rate": 3.1002400960384156e-05, "loss": 0.4895, "step": 9507 }, { "epoch": 12.17024, "grad_norm": 0.921959638595581, "learning_rate": 3.100040016006402e-05, "loss": 0.5513, "step": 9508 }, { "epoch": 12.17152, "grad_norm": 0.9161378145217896, "learning_rate": 3.09983993597439e-05, "loss": 0.5745, "step": 9509 }, { "epoch": 12.1728, "grad_norm": 0.9752117991447449, "learning_rate": 3.099639855942377e-05, "loss": 0.5436, "step": 9510 }, { "epoch": 12.17408, "grad_norm": 0.8904070854187012, "learning_rate": 3.0994397759103644e-05, "loss": 0.5351, "step": 9511 }, { "epoch": 12.17536, "grad_norm": 0.8877695202827454, "learning_rate": 3.0992396958783516e-05, "loss": 0.5604, "step": 9512 }, { "epoch": 12.17664, "grad_norm": 0.9066382050514221, "learning_rate": 3.099039615846339e-05, "loss": 0.5335, "step": 9513 }, { "epoch": 12.17792, "grad_norm": 0.9197863936424255, "learning_rate": 3.098839535814326e-05, "loss": 0.5328, "step": 9514 }, { "epoch": 12.1792, "grad_norm": 0.9085410237312317, "learning_rate": 3.098639455782313e-05, "loss": 0.4907, "step": 9515 }, { "epoch": 12.18048, "grad_norm": 0.9633046984672546, "learning_rate": 3.0984393757503e-05, "loss": 0.5437, "step": 9516 }, { "epoch": 12.18176, "grad_norm": 0.9514236450195312, "learning_rate": 3.0982392957182875e-05, "loss": 0.5653, "step": 9517 }, { "epoch": 12.18304, "grad_norm": 0.9399169087409973, "learning_rate": 3.098039215686275e-05, "loss": 0.521, "step": 9518 }, { "epoch": 12.18432, "grad_norm": 0.9197844862937927, "learning_rate": 3.097839135654262e-05, "loss": 0.5644, "step": 9519 }, { "epoch": 12.1856, "grad_norm": 0.8995923399925232, "learning_rate": 3.097639055622249e-05, "loss": 0.5357, "step": 9520 }, { "epoch": 12.18688, "grad_norm": 0.9238200783729553, "learning_rate": 3.097438975590236e-05, "loss": 0.5414, "step": 9521 }, { "epoch": 12.18816, "grad_norm": 0.9110772609710693, "learning_rate": 3.0972388955582234e-05, "loss": 0.5164, "step": 9522 }, { "epoch": 12.18944, "grad_norm": 0.9395493865013123, "learning_rate": 3.0970388155262106e-05, "loss": 0.5542, "step": 9523 }, { "epoch": 12.19072, "grad_norm": 0.8886218667030334, "learning_rate": 3.096838735494198e-05, "loss": 0.4889, "step": 9524 }, { "epoch": 12.192, "grad_norm": 0.9301427006721497, "learning_rate": 3.096638655462185e-05, "loss": 0.5434, "step": 9525 }, { "epoch": 12.19328, "grad_norm": 0.956852912902832, "learning_rate": 3.096438575430172e-05, "loss": 0.5286, "step": 9526 }, { "epoch": 12.19456, "grad_norm": 0.9380202293395996, "learning_rate": 3.0962384953981594e-05, "loss": 0.5866, "step": 9527 }, { "epoch": 12.19584, "grad_norm": 0.9663145542144775, "learning_rate": 3.0960384153661465e-05, "loss": 0.5784, "step": 9528 }, { "epoch": 12.19712, "grad_norm": 0.9319117069244385, "learning_rate": 3.095838335334134e-05, "loss": 0.5328, "step": 9529 }, { "epoch": 12.1984, "grad_norm": 0.9537164568901062, "learning_rate": 3.0956382553021216e-05, "loss": 0.5598, "step": 9530 }, { "epoch": 12.19968, "grad_norm": 0.9373032450675964, "learning_rate": 3.095438175270108e-05, "loss": 0.5457, "step": 9531 }, { "epoch": 12.20096, "grad_norm": 0.9559725522994995, "learning_rate": 3.095238095238095e-05, "loss": 0.5518, "step": 9532 }, { "epoch": 12.20224, "grad_norm": 0.9501890540122986, "learning_rate": 3.0950380152060825e-05, "loss": 0.5554, "step": 9533 }, { "epoch": 12.20352, "grad_norm": 0.9486271142959595, "learning_rate": 3.0948379351740697e-05, "loss": 0.5774, "step": 9534 }, { "epoch": 12.2048, "grad_norm": 0.8942713737487793, "learning_rate": 3.094637855142057e-05, "loss": 0.539, "step": 9535 }, { "epoch": 12.20608, "grad_norm": 0.9453210234642029, "learning_rate": 3.094437775110044e-05, "loss": 0.5477, "step": 9536 }, { "epoch": 12.20736, "grad_norm": 0.9913842678070068, "learning_rate": 3.094237695078032e-05, "loss": 0.5536, "step": 9537 }, { "epoch": 12.20864, "grad_norm": 0.9225572347640991, "learning_rate": 3.094037615046019e-05, "loss": 0.5175, "step": 9538 }, { "epoch": 12.20992, "grad_norm": 0.8077876567840576, "learning_rate": 3.0938375350140056e-05, "loss": 0.5227, "step": 9539 }, { "epoch": 12.2112, "grad_norm": 0.9560815691947937, "learning_rate": 3.093637454981993e-05, "loss": 0.5408, "step": 9540 }, { "epoch": 12.21248, "grad_norm": 0.9433364868164062, "learning_rate": 3.09343737494998e-05, "loss": 0.5622, "step": 9541 }, { "epoch": 12.21376, "grad_norm": 0.934070885181427, "learning_rate": 3.093237294917967e-05, "loss": 0.5873, "step": 9542 }, { "epoch": 12.21504, "grad_norm": 0.8797191977500916, "learning_rate": 3.093037214885954e-05, "loss": 0.5258, "step": 9543 }, { "epoch": 12.21632, "grad_norm": 0.9075624346733093, "learning_rate": 3.092837134853942e-05, "loss": 0.5438, "step": 9544 }, { "epoch": 12.2176, "grad_norm": 0.9165863990783691, "learning_rate": 3.0926370548219294e-05, "loss": 0.5279, "step": 9545 }, { "epoch": 12.21888, "grad_norm": 0.9102526307106018, "learning_rate": 3.0924369747899166e-05, "loss": 0.5665, "step": 9546 }, { "epoch": 12.22016, "grad_norm": 0.9222195744514465, "learning_rate": 3.092236894757903e-05, "loss": 0.5457, "step": 9547 }, { "epoch": 12.22144, "grad_norm": 0.9091429710388184, "learning_rate": 3.09203681472589e-05, "loss": 0.5565, "step": 9548 }, { "epoch": 12.22272, "grad_norm": 0.8707777857780457, "learning_rate": 3.0918367346938774e-05, "loss": 0.5086, "step": 9549 }, { "epoch": 12.224, "grad_norm": 0.8751879930496216, "learning_rate": 3.0916366546618646e-05, "loss": 0.4871, "step": 9550 }, { "epoch": 12.22528, "grad_norm": 0.9578078389167786, "learning_rate": 3.091436574629852e-05, "loss": 0.5734, "step": 9551 }, { "epoch": 12.22656, "grad_norm": 0.9274276494979858, "learning_rate": 3.09123649459784e-05, "loss": 0.5629, "step": 9552 }, { "epoch": 12.22784, "grad_norm": 0.9546663761138916, "learning_rate": 3.091036414565827e-05, "loss": 0.5466, "step": 9553 }, { "epoch": 12.22912, "grad_norm": 0.9720085859298706, "learning_rate": 3.090836334533814e-05, "loss": 0.5884, "step": 9554 }, { "epoch": 12.2304, "grad_norm": 0.9668819308280945, "learning_rate": 3.0906362545018006e-05, "loss": 0.5534, "step": 9555 }, { "epoch": 12.23168, "grad_norm": 0.9304522275924683, "learning_rate": 3.090436174469788e-05, "loss": 0.5697, "step": 9556 }, { "epoch": 12.23296, "grad_norm": 0.915131688117981, "learning_rate": 3.090236094437775e-05, "loss": 0.5135, "step": 9557 }, { "epoch": 12.23424, "grad_norm": 0.8442711234092712, "learning_rate": 3.090036014405762e-05, "loss": 0.5244, "step": 9558 }, { "epoch": 12.23552, "grad_norm": 0.8993996381759644, "learning_rate": 3.08983593437375e-05, "loss": 0.5362, "step": 9559 }, { "epoch": 12.2368, "grad_norm": 0.9662063121795654, "learning_rate": 3.089635854341737e-05, "loss": 0.5618, "step": 9560 }, { "epoch": 12.23808, "grad_norm": 0.9213639497756958, "learning_rate": 3.0894357743097244e-05, "loss": 0.5064, "step": 9561 }, { "epoch": 12.23936, "grad_norm": 0.9037312269210815, "learning_rate": 3.0892356942777115e-05, "loss": 0.5408, "step": 9562 }, { "epoch": 12.24064, "grad_norm": 0.9527158141136169, "learning_rate": 3.089035614245698e-05, "loss": 0.5756, "step": 9563 }, { "epoch": 12.24192, "grad_norm": 0.886060357093811, "learning_rate": 3.088835534213685e-05, "loss": 0.5351, "step": 9564 }, { "epoch": 12.2432, "grad_norm": 0.9264008402824402, "learning_rate": 3.0886354541816724e-05, "loss": 0.5426, "step": 9565 }, { "epoch": 12.24448, "grad_norm": 0.9923083782196045, "learning_rate": 3.08843537414966e-05, "loss": 0.556, "step": 9566 }, { "epoch": 12.24576, "grad_norm": 0.9164190888404846, "learning_rate": 3.0882352941176475e-05, "loss": 0.5639, "step": 9567 }, { "epoch": 12.24704, "grad_norm": 0.9647570252418518, "learning_rate": 3.0880352140856347e-05, "loss": 0.5686, "step": 9568 }, { "epoch": 12.24832, "grad_norm": 0.9134481549263, "learning_rate": 3.087835134053622e-05, "loss": 0.5295, "step": 9569 }, { "epoch": 12.2496, "grad_norm": 0.8650757670402527, "learning_rate": 3.087635054021609e-05, "loss": 0.5215, "step": 9570 }, { "epoch": 12.25088, "grad_norm": 0.9101705551147461, "learning_rate": 3.0874349739895955e-05, "loss": 0.5681, "step": 9571 }, { "epoch": 12.25216, "grad_norm": 0.9110287427902222, "learning_rate": 3.087234893957583e-05, "loss": 0.5318, "step": 9572 }, { "epoch": 12.25344, "grad_norm": 0.8674918413162231, "learning_rate": 3.0870348139255706e-05, "loss": 0.5185, "step": 9573 }, { "epoch": 12.25472, "grad_norm": 0.9074335694313049, "learning_rate": 3.086834733893558e-05, "loss": 0.5352, "step": 9574 }, { "epoch": 12.256, "grad_norm": 0.9423937797546387, "learning_rate": 3.086634653861545e-05, "loss": 0.4859, "step": 9575 }, { "epoch": 12.25728, "grad_norm": 0.8732250928878784, "learning_rate": 3.086434573829532e-05, "loss": 0.5237, "step": 9576 }, { "epoch": 12.25856, "grad_norm": 0.9658347964286804, "learning_rate": 3.086234493797519e-05, "loss": 0.5853, "step": 9577 }, { "epoch": 12.25984, "grad_norm": 0.9143103957176208, "learning_rate": 3.0860344137655065e-05, "loss": 0.5277, "step": 9578 }, { "epoch": 12.26112, "grad_norm": 0.8892953395843506, "learning_rate": 3.085834333733493e-05, "loss": 0.5178, "step": 9579 }, { "epoch": 12.2624, "grad_norm": 0.8988597393035889, "learning_rate": 3.085634253701481e-05, "loss": 0.5443, "step": 9580 }, { "epoch": 12.26368, "grad_norm": 0.9245293736457825, "learning_rate": 3.085434173669468e-05, "loss": 0.5107, "step": 9581 }, { "epoch": 12.26496, "grad_norm": 0.9400307536125183, "learning_rate": 3.085234093637455e-05, "loss": 0.5741, "step": 9582 }, { "epoch": 12.26624, "grad_norm": 0.9172420501708984, "learning_rate": 3.0850340136054424e-05, "loss": 0.5751, "step": 9583 }, { "epoch": 12.26752, "grad_norm": 0.8972305059432983, "learning_rate": 3.0848339335734296e-05, "loss": 0.4933, "step": 9584 }, { "epoch": 12.2688, "grad_norm": 0.8898636698722839, "learning_rate": 3.084633853541417e-05, "loss": 0.5589, "step": 9585 }, { "epoch": 12.27008, "grad_norm": 0.9300894141197205, "learning_rate": 3.084433773509404e-05, "loss": 0.5789, "step": 9586 }, { "epoch": 12.27136, "grad_norm": 0.8965767621994019, "learning_rate": 3.084233693477391e-05, "loss": 0.5071, "step": 9587 }, { "epoch": 12.272639999999999, "grad_norm": 0.9093717932701111, "learning_rate": 3.0840336134453784e-05, "loss": 0.5453, "step": 9588 }, { "epoch": 12.27392, "grad_norm": 0.9376620650291443, "learning_rate": 3.0838335334133656e-05, "loss": 0.5392, "step": 9589 }, { "epoch": 12.2752, "grad_norm": 0.9461066126823425, "learning_rate": 3.083633453381353e-05, "loss": 0.5636, "step": 9590 }, { "epoch": 12.27648, "grad_norm": 0.8881168365478516, "learning_rate": 3.08343337334934e-05, "loss": 0.5378, "step": 9591 }, { "epoch": 12.27776, "grad_norm": 0.9719573855400085, "learning_rate": 3.083233293317327e-05, "loss": 0.6019, "step": 9592 }, { "epoch": 12.27904, "grad_norm": 0.9166926741600037, "learning_rate": 3.083033213285314e-05, "loss": 0.579, "step": 9593 }, { "epoch": 12.28032, "grad_norm": 0.9417465925216675, "learning_rate": 3.0828331332533015e-05, "loss": 0.5209, "step": 9594 }, { "epoch": 12.2816, "grad_norm": 0.9969757199287415, "learning_rate": 3.082633053221289e-05, "loss": 0.5814, "step": 9595 }, { "epoch": 12.28288, "grad_norm": 0.9610475897789001, "learning_rate": 3.082432973189276e-05, "loss": 0.5997, "step": 9596 }, { "epoch": 12.28416, "grad_norm": 0.8945482969284058, "learning_rate": 3.082232893157263e-05, "loss": 0.5166, "step": 9597 }, { "epoch": 12.28544, "grad_norm": 0.9552536606788635, "learning_rate": 3.08203281312525e-05, "loss": 0.5762, "step": 9598 }, { "epoch": 12.28672, "grad_norm": 0.9500235915184021, "learning_rate": 3.0818327330932374e-05, "loss": 0.5129, "step": 9599 }, { "epoch": 12.288, "grad_norm": 0.8789950609207153, "learning_rate": 3.0816326530612246e-05, "loss": 0.5308, "step": 9600 }, { "epoch": 12.28928, "grad_norm": 0.8765109181404114, "learning_rate": 3.081432573029212e-05, "loss": 0.4918, "step": 9601 }, { "epoch": 12.29056, "grad_norm": 0.9502473473548889, "learning_rate": 3.081232492997199e-05, "loss": 0.5485, "step": 9602 }, { "epoch": 12.29184, "grad_norm": 0.9585152864456177, "learning_rate": 3.081032412965186e-05, "loss": 0.5748, "step": 9603 }, { "epoch": 12.29312, "grad_norm": 0.9246171116828918, "learning_rate": 3.080832332933173e-05, "loss": 0.5394, "step": 9604 }, { "epoch": 12.2944, "grad_norm": 0.9247891902923584, "learning_rate": 3.0806322529011605e-05, "loss": 0.5278, "step": 9605 }, { "epoch": 12.29568, "grad_norm": 0.9395011067390442, "learning_rate": 3.080432172869148e-05, "loss": 0.555, "step": 9606 }, { "epoch": 12.29696, "grad_norm": 0.9174982905387878, "learning_rate": 3.080232092837135e-05, "loss": 0.51, "step": 9607 }, { "epoch": 12.29824, "grad_norm": 0.8428568840026855, "learning_rate": 3.080032012805123e-05, "loss": 0.5107, "step": 9608 }, { "epoch": 12.29952, "grad_norm": 0.9542026519775391, "learning_rate": 3.079831932773109e-05, "loss": 0.5491, "step": 9609 }, { "epoch": 12.3008, "grad_norm": 0.9842756390571594, "learning_rate": 3.0796318527410964e-05, "loss": 0.5392, "step": 9610 }, { "epoch": 12.30208, "grad_norm": 0.9100461006164551, "learning_rate": 3.0794317727090836e-05, "loss": 0.5535, "step": 9611 }, { "epoch": 12.30336, "grad_norm": 0.8975451588630676, "learning_rate": 3.079231692677071e-05, "loss": 0.5449, "step": 9612 }, { "epoch": 12.30464, "grad_norm": 0.9020168781280518, "learning_rate": 3.079031612645058e-05, "loss": 0.5142, "step": 9613 }, { "epoch": 12.30592, "grad_norm": 0.9032678604125977, "learning_rate": 3.078831532613045e-05, "loss": 0.5045, "step": 9614 }, { "epoch": 12.3072, "grad_norm": 0.9697889089584351, "learning_rate": 3.078631452581033e-05, "loss": 0.5418, "step": 9615 }, { "epoch": 12.30848, "grad_norm": 0.9151008725166321, "learning_rate": 3.07843137254902e-05, "loss": 0.5524, "step": 9616 }, { "epoch": 12.30976, "grad_norm": 0.9026890993118286, "learning_rate": 3.078231292517007e-05, "loss": 0.5102, "step": 9617 }, { "epoch": 12.31104, "grad_norm": 0.9061116576194763, "learning_rate": 3.078031212484994e-05, "loss": 0.5607, "step": 9618 }, { "epoch": 12.31232, "grad_norm": 0.893916666507721, "learning_rate": 3.077831132452981e-05, "loss": 0.4953, "step": 9619 }, { "epoch": 12.3136, "grad_norm": 0.9334031343460083, "learning_rate": 3.077631052420968e-05, "loss": 0.5265, "step": 9620 }, { "epoch": 12.31488, "grad_norm": 0.8903616070747375, "learning_rate": 3.0774309723889555e-05, "loss": 0.5208, "step": 9621 }, { "epoch": 12.31616, "grad_norm": 0.9477413296699524, "learning_rate": 3.0772308923569434e-05, "loss": 0.5759, "step": 9622 }, { "epoch": 12.31744, "grad_norm": 0.881305456161499, "learning_rate": 3.0770308123249305e-05, "loss": 0.531, "step": 9623 }, { "epoch": 12.31872, "grad_norm": 0.9640594124794006, "learning_rate": 3.076830732292918e-05, "loss": 0.5694, "step": 9624 }, { "epoch": 12.32, "grad_norm": 0.888465404510498, "learning_rate": 3.076630652260904e-05, "loss": 0.4961, "step": 9625 }, { "epoch": 12.32128, "grad_norm": 0.8741172552108765, "learning_rate": 3.0764305722288914e-05, "loss": 0.5258, "step": 9626 }, { "epoch": 12.32256, "grad_norm": 0.9086135029792786, "learning_rate": 3.0762304921968786e-05, "loss": 0.545, "step": 9627 }, { "epoch": 12.32384, "grad_norm": 0.8762311935424805, "learning_rate": 3.076030412164866e-05, "loss": 0.538, "step": 9628 }, { "epoch": 12.32512, "grad_norm": 0.9073178172111511, "learning_rate": 3.0758303321328537e-05, "loss": 0.5466, "step": 9629 }, { "epoch": 12.3264, "grad_norm": 0.866665780544281, "learning_rate": 3.075630252100841e-05, "loss": 0.545, "step": 9630 }, { "epoch": 12.32768, "grad_norm": 0.9432623386383057, "learning_rate": 3.075430172068828e-05, "loss": 0.5641, "step": 9631 }, { "epoch": 12.32896, "grad_norm": 0.9183183312416077, "learning_rate": 3.075230092036815e-05, "loss": 0.5232, "step": 9632 }, { "epoch": 12.33024, "grad_norm": 0.9821698069572449, "learning_rate": 3.075030012004802e-05, "loss": 0.5631, "step": 9633 }, { "epoch": 12.33152, "grad_norm": 0.9003911018371582, "learning_rate": 3.074829931972789e-05, "loss": 0.5493, "step": 9634 }, { "epoch": 12.3328, "grad_norm": 0.9370520114898682, "learning_rate": 3.074629851940776e-05, "loss": 0.556, "step": 9635 }, { "epoch": 12.33408, "grad_norm": 0.8950033187866211, "learning_rate": 3.074429771908764e-05, "loss": 0.5258, "step": 9636 }, { "epoch": 12.33536, "grad_norm": 0.9155859351158142, "learning_rate": 3.074229691876751e-05, "loss": 0.5623, "step": 9637 }, { "epoch": 12.33664, "grad_norm": 0.929165244102478, "learning_rate": 3.074029611844738e-05, "loss": 0.5535, "step": 9638 }, { "epoch": 12.33792, "grad_norm": 0.9462549686431885, "learning_rate": 3.0738295318127255e-05, "loss": 0.5687, "step": 9639 }, { "epoch": 12.3392, "grad_norm": 0.9272847175598145, "learning_rate": 3.073629451780713e-05, "loss": 0.5393, "step": 9640 }, { "epoch": 12.34048, "grad_norm": 0.8985689282417297, "learning_rate": 3.073429371748699e-05, "loss": 0.5385, "step": 9641 }, { "epoch": 12.34176, "grad_norm": 0.9289854764938354, "learning_rate": 3.0732292917166864e-05, "loss": 0.5499, "step": 9642 }, { "epoch": 12.34304, "grad_norm": 0.9333150386810303, "learning_rate": 3.073029211684674e-05, "loss": 0.5588, "step": 9643 }, { "epoch": 12.34432, "grad_norm": 0.9284766316413879, "learning_rate": 3.0728291316526614e-05, "loss": 0.5602, "step": 9644 }, { "epoch": 12.3456, "grad_norm": 0.9135120511054993, "learning_rate": 3.0726290516206486e-05, "loss": 0.5444, "step": 9645 }, { "epoch": 12.34688, "grad_norm": 0.8909054398536682, "learning_rate": 3.072428971588636e-05, "loss": 0.553, "step": 9646 }, { "epoch": 12.34816, "grad_norm": 0.9332317113876343, "learning_rate": 3.072228891556623e-05, "loss": 0.558, "step": 9647 }, { "epoch": 12.34944, "grad_norm": 0.8675878643989563, "learning_rate": 3.07202881152461e-05, "loss": 0.5093, "step": 9648 }, { "epoch": 12.35072, "grad_norm": 0.9375959634780884, "learning_rate": 3.071828731492597e-05, "loss": 0.5277, "step": 9649 }, { "epoch": 12.352, "grad_norm": 0.9183427691459656, "learning_rate": 3.0716286514605846e-05, "loss": 0.5171, "step": 9650 }, { "epoch": 12.35328, "grad_norm": 0.9373577237129211, "learning_rate": 3.071428571428572e-05, "loss": 0.5495, "step": 9651 }, { "epoch": 12.35456, "grad_norm": 0.9106853604316711, "learning_rate": 3.071228491396559e-05, "loss": 0.512, "step": 9652 }, { "epoch": 12.35584, "grad_norm": 0.9207229614257812, "learning_rate": 3.071028411364546e-05, "loss": 0.538, "step": 9653 }, { "epoch": 12.35712, "grad_norm": 0.9344127178192139, "learning_rate": 3.070828331332533e-05, "loss": 0.5688, "step": 9654 }, { "epoch": 12.3584, "grad_norm": 0.9381983280181885, "learning_rate": 3.0706282513005205e-05, "loss": 0.5687, "step": 9655 }, { "epoch": 12.35968, "grad_norm": 0.9426577687263489, "learning_rate": 3.070428171268508e-05, "loss": 0.5498, "step": 9656 }, { "epoch": 12.36096, "grad_norm": 0.9524323344230652, "learning_rate": 3.070228091236495e-05, "loss": 0.5394, "step": 9657 }, { "epoch": 12.36224, "grad_norm": 0.9800800681114197, "learning_rate": 3.070028011204482e-05, "loss": 0.5799, "step": 9658 }, { "epoch": 12.36352, "grad_norm": 0.9301595687866211, "learning_rate": 3.069827931172469e-05, "loss": 0.551, "step": 9659 }, { "epoch": 12.3648, "grad_norm": 0.9066020250320435, "learning_rate": 3.0696278511404564e-05, "loss": 0.5535, "step": 9660 }, { "epoch": 12.36608, "grad_norm": 0.9361892938613892, "learning_rate": 3.0694277711084436e-05, "loss": 0.5224, "step": 9661 }, { "epoch": 12.36736, "grad_norm": 0.9013199806213379, "learning_rate": 3.069227691076431e-05, "loss": 0.5531, "step": 9662 }, { "epoch": 12.36864, "grad_norm": 0.9619473814964294, "learning_rate": 3.069027611044418e-05, "loss": 0.5865, "step": 9663 }, { "epoch": 12.36992, "grad_norm": 0.9332448244094849, "learning_rate": 3.068827531012405e-05, "loss": 0.528, "step": 9664 }, { "epoch": 12.3712, "grad_norm": 0.9028695821762085, "learning_rate": 3.0686274509803923e-05, "loss": 0.5707, "step": 9665 }, { "epoch": 12.37248, "grad_norm": 0.9249699115753174, "learning_rate": 3.0684273709483795e-05, "loss": 0.5385, "step": 9666 }, { "epoch": 12.37376, "grad_norm": 0.8583320379257202, "learning_rate": 3.068227290916367e-05, "loss": 0.5264, "step": 9667 }, { "epoch": 12.37504, "grad_norm": 0.8901596069335938, "learning_rate": 3.068027210884354e-05, "loss": 0.5339, "step": 9668 }, { "epoch": 12.37632, "grad_norm": 0.9462403059005737, "learning_rate": 3.067827130852341e-05, "loss": 0.5604, "step": 9669 }, { "epoch": 12.3776, "grad_norm": 0.9234113693237305, "learning_rate": 3.067627050820328e-05, "loss": 0.5289, "step": 9670 }, { "epoch": 12.37888, "grad_norm": 0.9540828466415405, "learning_rate": 3.0674269707883155e-05, "loss": 0.5867, "step": 9671 }, { "epoch": 12.38016, "grad_norm": 0.936007559299469, "learning_rate": 3.0672268907563026e-05, "loss": 0.5348, "step": 9672 }, { "epoch": 12.38144, "grad_norm": 0.9366984367370605, "learning_rate": 3.06702681072429e-05, "loss": 0.52, "step": 9673 }, { "epoch": 12.38272, "grad_norm": 0.9363959431648254, "learning_rate": 3.066826730692277e-05, "loss": 0.5781, "step": 9674 }, { "epoch": 12.384, "grad_norm": 0.947193443775177, "learning_rate": 3.066626650660264e-05, "loss": 0.5687, "step": 9675 }, { "epoch": 12.38528, "grad_norm": 0.8775532841682434, "learning_rate": 3.0664265706282514e-05, "loss": 0.5269, "step": 9676 }, { "epoch": 12.38656, "grad_norm": 0.8915206789970398, "learning_rate": 3.0662264905962386e-05, "loss": 0.5509, "step": 9677 }, { "epoch": 12.38784, "grad_norm": 0.9614311456680298, "learning_rate": 3.066026410564226e-05, "loss": 0.6082, "step": 9678 }, { "epoch": 12.38912, "grad_norm": 0.9689681529998779, "learning_rate": 3.065826330532213e-05, "loss": 0.5464, "step": 9679 }, { "epoch": 12.3904, "grad_norm": 0.9640049338340759, "learning_rate": 3.0656262505002e-05, "loss": 0.5523, "step": 9680 }, { "epoch": 12.39168, "grad_norm": 0.8928719758987427, "learning_rate": 3.065426170468187e-05, "loss": 0.568, "step": 9681 }, { "epoch": 12.39296, "grad_norm": 0.9739533066749573, "learning_rate": 3.0652260904361745e-05, "loss": 0.5606, "step": 9682 }, { "epoch": 12.39424, "grad_norm": 0.894177258014679, "learning_rate": 3.065026010404162e-05, "loss": 0.5587, "step": 9683 }, { "epoch": 12.39552, "grad_norm": 0.9096843600273132, "learning_rate": 3.064825930372149e-05, "loss": 0.5484, "step": 9684 }, { "epoch": 12.3968, "grad_norm": 0.862076997756958, "learning_rate": 3.064625850340136e-05, "loss": 0.4924, "step": 9685 }, { "epoch": 12.39808, "grad_norm": 0.8842014670372009, "learning_rate": 3.064425770308124e-05, "loss": 0.5408, "step": 9686 }, { "epoch": 12.39936, "grad_norm": 0.9338904023170471, "learning_rate": 3.0642256902761104e-05, "loss": 0.556, "step": 9687 }, { "epoch": 12.40064, "grad_norm": 0.9398663640022278, "learning_rate": 3.0640256102440976e-05, "loss": 0.5535, "step": 9688 }, { "epoch": 12.40192, "grad_norm": 0.944598913192749, "learning_rate": 3.063825530212085e-05, "loss": 0.5534, "step": 9689 }, { "epoch": 12.4032, "grad_norm": 0.9525898098945618, "learning_rate": 3.063625450180072e-05, "loss": 0.5571, "step": 9690 }, { "epoch": 12.40448, "grad_norm": 0.9750270247459412, "learning_rate": 3.063425370148059e-05, "loss": 0.6101, "step": 9691 }, { "epoch": 12.40576, "grad_norm": 0.9584131240844727, "learning_rate": 3.0632252901160464e-05, "loss": 0.5429, "step": 9692 }, { "epoch": 12.40704, "grad_norm": 0.8970118165016174, "learning_rate": 3.063025210084034e-05, "loss": 0.537, "step": 9693 }, { "epoch": 12.40832, "grad_norm": 0.9430438876152039, "learning_rate": 3.0628251300520214e-05, "loss": 0.5364, "step": 9694 }, { "epoch": 12.4096, "grad_norm": 0.9645063281059265, "learning_rate": 3.062625050020008e-05, "loss": 0.5354, "step": 9695 }, { "epoch": 12.41088, "grad_norm": 0.9782135486602783, "learning_rate": 3.062424969987995e-05, "loss": 0.551, "step": 9696 }, { "epoch": 12.41216, "grad_norm": 0.9144942760467529, "learning_rate": 3.062224889955982e-05, "loss": 0.5719, "step": 9697 }, { "epoch": 12.41344, "grad_norm": 0.9149807691574097, "learning_rate": 3.0620248099239695e-05, "loss": 0.4937, "step": 9698 }, { "epoch": 12.414719999999999, "grad_norm": 0.9331946969032288, "learning_rate": 3.0618247298919567e-05, "loss": 0.5472, "step": 9699 }, { "epoch": 12.416, "grad_norm": 0.9157038927078247, "learning_rate": 3.0616246498599445e-05, "loss": 0.5839, "step": 9700 }, { "epoch": 12.41728, "grad_norm": 0.955386757850647, "learning_rate": 3.061424569827932e-05, "loss": 0.5573, "step": 9701 }, { "epoch": 12.41856, "grad_norm": 0.9963140487670898, "learning_rate": 3.061224489795919e-05, "loss": 0.5524, "step": 9702 }, { "epoch": 12.41984, "grad_norm": 0.9035356044769287, "learning_rate": 3.0610244097639054e-05, "loss": 0.5279, "step": 9703 }, { "epoch": 12.42112, "grad_norm": 0.885988712310791, "learning_rate": 3.0608243297318926e-05, "loss": 0.5387, "step": 9704 }, { "epoch": 12.4224, "grad_norm": 0.9754456281661987, "learning_rate": 3.06062424969988e-05, "loss": 0.5724, "step": 9705 }, { "epoch": 12.42368, "grad_norm": 0.8800594806671143, "learning_rate": 3.060424169667867e-05, "loss": 0.503, "step": 9706 }, { "epoch": 12.42496, "grad_norm": 0.9732749462127686, "learning_rate": 3.060224089635855e-05, "loss": 0.5478, "step": 9707 }, { "epoch": 12.42624, "grad_norm": 0.8836948871612549, "learning_rate": 3.060024009603842e-05, "loss": 0.5481, "step": 9708 }, { "epoch": 12.42752, "grad_norm": 0.8984959721565247, "learning_rate": 3.059823929571829e-05, "loss": 0.57, "step": 9709 }, { "epoch": 12.4288, "grad_norm": 0.9155954122543335, "learning_rate": 3.0596238495398164e-05, "loss": 0.5659, "step": 9710 }, { "epoch": 12.43008, "grad_norm": 0.897638201713562, "learning_rate": 3.059423769507803e-05, "loss": 0.5256, "step": 9711 }, { "epoch": 12.43136, "grad_norm": 0.9188939929008484, "learning_rate": 3.05922368947579e-05, "loss": 0.575, "step": 9712 }, { "epoch": 12.43264, "grad_norm": 0.9399385452270508, "learning_rate": 3.059023609443777e-05, "loss": 0.569, "step": 9713 }, { "epoch": 12.43392, "grad_norm": 0.9348775148391724, "learning_rate": 3.058823529411765e-05, "loss": 0.5243, "step": 9714 }, { "epoch": 12.4352, "grad_norm": 0.9413948655128479, "learning_rate": 3.058623449379752e-05, "loss": 0.5641, "step": 9715 }, { "epoch": 12.43648, "grad_norm": 0.9737404584884644, "learning_rate": 3.0584233693477395e-05, "loss": 0.5496, "step": 9716 }, { "epoch": 12.43776, "grad_norm": 0.8885311484336853, "learning_rate": 3.058223289315727e-05, "loss": 0.5279, "step": 9717 }, { "epoch": 12.43904, "grad_norm": 0.9236472249031067, "learning_rate": 3.058023209283714e-05, "loss": 0.5602, "step": 9718 }, { "epoch": 12.44032, "grad_norm": 0.9534575343132019, "learning_rate": 3.0578231292517004e-05, "loss": 0.5359, "step": 9719 }, { "epoch": 12.4416, "grad_norm": 0.9509499073028564, "learning_rate": 3.0576230492196876e-05, "loss": 0.5428, "step": 9720 }, { "epoch": 12.44288, "grad_norm": 0.9832822680473328, "learning_rate": 3.0574229691876754e-05, "loss": 0.5792, "step": 9721 }, { "epoch": 12.44416, "grad_norm": 0.8691754937171936, "learning_rate": 3.0572228891556626e-05, "loss": 0.4652, "step": 9722 }, { "epoch": 12.44544, "grad_norm": 0.9270925521850586, "learning_rate": 3.05702280912365e-05, "loss": 0.5903, "step": 9723 }, { "epoch": 12.44672, "grad_norm": 0.9018378853797913, "learning_rate": 3.056822729091637e-05, "loss": 0.5811, "step": 9724 }, { "epoch": 12.448, "grad_norm": 0.8946050405502319, "learning_rate": 3.056622649059624e-05, "loss": 0.5234, "step": 9725 }, { "epoch": 12.44928, "grad_norm": 0.9452985525131226, "learning_rate": 3.0564225690276113e-05, "loss": 0.5267, "step": 9726 }, { "epoch": 12.45056, "grad_norm": 0.9652531147003174, "learning_rate": 3.056222488995598e-05, "loss": 0.582, "step": 9727 }, { "epoch": 12.45184, "grad_norm": 0.9452758431434631, "learning_rate": 3.056022408963586e-05, "loss": 0.6246, "step": 9728 }, { "epoch": 12.45312, "grad_norm": 0.9329007863998413, "learning_rate": 3.055822328931573e-05, "loss": 0.535, "step": 9729 }, { "epoch": 12.4544, "grad_norm": 0.9430539011955261, "learning_rate": 3.05562224889956e-05, "loss": 0.5769, "step": 9730 }, { "epoch": 12.45568, "grad_norm": 0.9002211689949036, "learning_rate": 3.055422168867547e-05, "loss": 0.5709, "step": 9731 }, { "epoch": 12.45696, "grad_norm": 0.8887854814529419, "learning_rate": 3.0552220888355345e-05, "loss": 0.5366, "step": 9732 }, { "epoch": 12.45824, "grad_norm": 0.8703662753105164, "learning_rate": 3.0550220088035216e-05, "loss": 0.4981, "step": 9733 }, { "epoch": 12.45952, "grad_norm": 0.9427342414855957, "learning_rate": 3.054821928771509e-05, "loss": 0.5584, "step": 9734 }, { "epoch": 12.4608, "grad_norm": 0.9092150926589966, "learning_rate": 3.054621848739496e-05, "loss": 0.5098, "step": 9735 }, { "epoch": 12.46208, "grad_norm": 0.9247660636901855, "learning_rate": 3.054421768707483e-05, "loss": 0.5255, "step": 9736 }, { "epoch": 12.46336, "grad_norm": 0.8831589221954346, "learning_rate": 3.0542216886754704e-05, "loss": 0.5409, "step": 9737 }, { "epoch": 12.46464, "grad_norm": 0.9176967740058899, "learning_rate": 3.0540216086434576e-05, "loss": 0.5467, "step": 9738 }, { "epoch": 12.46592, "grad_norm": 0.9632626175880432, "learning_rate": 3.053821528611445e-05, "loss": 0.5785, "step": 9739 }, { "epoch": 12.4672, "grad_norm": 0.9407864212989807, "learning_rate": 3.053621448579432e-05, "loss": 0.5921, "step": 9740 }, { "epoch": 12.46848, "grad_norm": 0.9391652941703796, "learning_rate": 3.053421368547419e-05, "loss": 0.5707, "step": 9741 }, { "epoch": 12.46976, "grad_norm": 0.9201903939247131, "learning_rate": 3.053221288515406e-05, "loss": 0.5635, "step": 9742 }, { "epoch": 12.47104, "grad_norm": 0.9038977026939392, "learning_rate": 3.0530212084833935e-05, "loss": 0.5281, "step": 9743 }, { "epoch": 12.47232, "grad_norm": 0.8993562459945679, "learning_rate": 3.052821128451381e-05, "loss": 0.5565, "step": 9744 }, { "epoch": 12.4736, "grad_norm": 0.9167428016662598, "learning_rate": 3.052621048419368e-05, "loss": 0.6111, "step": 9745 }, { "epoch": 12.47488, "grad_norm": 0.9848437905311584, "learning_rate": 3.052420968387355e-05, "loss": 0.54, "step": 9746 }, { "epoch": 12.47616, "grad_norm": 0.937060534954071, "learning_rate": 3.052220888355342e-05, "loss": 0.5115, "step": 9747 }, { "epoch": 12.47744, "grad_norm": 0.9132764935493469, "learning_rate": 3.0520208083233294e-05, "loss": 0.5782, "step": 9748 }, { "epoch": 12.47872, "grad_norm": 0.89909428358078, "learning_rate": 3.0518207282913166e-05, "loss": 0.5177, "step": 9749 }, { "epoch": 12.48, "grad_norm": 1.0129623413085938, "learning_rate": 3.051620648259304e-05, "loss": 0.6329, "step": 9750 }, { "epoch": 12.48128, "grad_norm": 0.9289687871932983, "learning_rate": 3.0514205682272913e-05, "loss": 0.4931, "step": 9751 }, { "epoch": 12.48256, "grad_norm": 0.8912503123283386, "learning_rate": 3.0512204881952782e-05, "loss": 0.4795, "step": 9752 }, { "epoch": 12.48384, "grad_norm": 0.9053838849067688, "learning_rate": 3.0510204081632654e-05, "loss": 0.5524, "step": 9753 }, { "epoch": 12.48512, "grad_norm": 0.9058827757835388, "learning_rate": 3.0508203281312525e-05, "loss": 0.5323, "step": 9754 }, { "epoch": 12.4864, "grad_norm": 0.9562934041023254, "learning_rate": 3.0506202480992397e-05, "loss": 0.5378, "step": 9755 }, { "epoch": 12.48768, "grad_norm": 0.8920333981513977, "learning_rate": 3.0504201680672273e-05, "loss": 0.516, "step": 9756 }, { "epoch": 12.48896, "grad_norm": 0.909192681312561, "learning_rate": 3.0502200880352144e-05, "loss": 0.5491, "step": 9757 }, { "epoch": 12.49024, "grad_norm": 0.8839040398597717, "learning_rate": 3.0500200080032016e-05, "loss": 0.5202, "step": 9758 }, { "epoch": 12.49152, "grad_norm": 0.9545120000839233, "learning_rate": 3.0498199279711888e-05, "loss": 0.5876, "step": 9759 }, { "epoch": 12.4928, "grad_norm": 0.885185182094574, "learning_rate": 3.0496198479391757e-05, "loss": 0.5223, "step": 9760 }, { "epoch": 12.49408, "grad_norm": 0.9367235898971558, "learning_rate": 3.049419767907163e-05, "loss": 0.5421, "step": 9761 }, { "epoch": 12.49536, "grad_norm": 0.9383306503295898, "learning_rate": 3.04921968787515e-05, "loss": 0.5907, "step": 9762 }, { "epoch": 12.49664, "grad_norm": 0.869220495223999, "learning_rate": 3.0490196078431376e-05, "loss": 0.5031, "step": 9763 }, { "epoch": 12.49792, "grad_norm": 0.9352115392684937, "learning_rate": 3.0488195278111247e-05, "loss": 0.559, "step": 9764 }, { "epoch": 12.4992, "grad_norm": 0.8659606575965881, "learning_rate": 3.048619447779112e-05, "loss": 0.5303, "step": 9765 }, { "epoch": 12.50048, "grad_norm": 0.921378493309021, "learning_rate": 3.048419367747099e-05, "loss": 0.5447, "step": 9766 }, { "epoch": 12.50176, "grad_norm": 0.8634151816368103, "learning_rate": 3.0482192877150863e-05, "loss": 0.5637, "step": 9767 }, { "epoch": 12.50304, "grad_norm": 0.9479573965072632, "learning_rate": 3.048019207683073e-05, "loss": 0.5185, "step": 9768 }, { "epoch": 12.50432, "grad_norm": 0.9313710927963257, "learning_rate": 3.0478191276510603e-05, "loss": 0.5721, "step": 9769 }, { "epoch": 12.5056, "grad_norm": 0.9208516478538513, "learning_rate": 3.0476190476190482e-05, "loss": 0.5565, "step": 9770 }, { "epoch": 12.50688, "grad_norm": 0.9547169804573059, "learning_rate": 3.047418967587035e-05, "loss": 0.5426, "step": 9771 }, { "epoch": 12.50816, "grad_norm": 0.8872602581977844, "learning_rate": 3.0472188875550222e-05, "loss": 0.5087, "step": 9772 }, { "epoch": 12.50944, "grad_norm": 0.8986743092536926, "learning_rate": 3.0470188075230094e-05, "loss": 0.5241, "step": 9773 }, { "epoch": 12.51072, "grad_norm": 0.9405331015586853, "learning_rate": 3.0468187274909966e-05, "loss": 0.5672, "step": 9774 }, { "epoch": 12.512, "grad_norm": 0.9390310645103455, "learning_rate": 3.0466186474589838e-05, "loss": 0.5732, "step": 9775 }, { "epoch": 12.51328, "grad_norm": 0.994657039642334, "learning_rate": 3.0464185674269706e-05, "loss": 0.5688, "step": 9776 }, { "epoch": 12.51456, "grad_norm": 0.9204804301261902, "learning_rate": 3.0462184873949578e-05, "loss": 0.6019, "step": 9777 }, { "epoch": 12.51584, "grad_norm": 0.9733226299285889, "learning_rate": 3.0460184073629457e-05, "loss": 0.5185, "step": 9778 }, { "epoch": 12.51712, "grad_norm": 0.9087737202644348, "learning_rate": 3.0458183273309325e-05, "loss": 0.5242, "step": 9779 }, { "epoch": 12.5184, "grad_norm": 0.9519286751747131, "learning_rate": 3.0456182472989197e-05, "loss": 0.5675, "step": 9780 }, { "epoch": 12.51968, "grad_norm": 0.899489164352417, "learning_rate": 3.045418167266907e-05, "loss": 0.5307, "step": 9781 }, { "epoch": 12.52096, "grad_norm": 0.926542341709137, "learning_rate": 3.045218087234894e-05, "loss": 0.5442, "step": 9782 }, { "epoch": 12.52224, "grad_norm": 0.8883079290390015, "learning_rate": 3.0450180072028813e-05, "loss": 0.5385, "step": 9783 }, { "epoch": 12.52352, "grad_norm": 0.9173272252082825, "learning_rate": 3.044817927170868e-05, "loss": 0.5661, "step": 9784 }, { "epoch": 12.5248, "grad_norm": 1.0070183277130127, "learning_rate": 3.044617847138856e-05, "loss": 0.5957, "step": 9785 }, { "epoch": 12.52608, "grad_norm": 0.8930588364601135, "learning_rate": 3.044417767106843e-05, "loss": 0.5212, "step": 9786 }, { "epoch": 12.52736, "grad_norm": 0.883655846118927, "learning_rate": 3.04421768707483e-05, "loss": 0.547, "step": 9787 }, { "epoch": 12.52864, "grad_norm": 0.917133092880249, "learning_rate": 3.0440176070428172e-05, "loss": 0.4803, "step": 9788 }, { "epoch": 12.52992, "grad_norm": 0.9094095826148987, "learning_rate": 3.0438175270108044e-05, "loss": 0.544, "step": 9789 }, { "epoch": 12.5312, "grad_norm": 0.977846622467041, "learning_rate": 3.0436174469787916e-05, "loss": 0.6179, "step": 9790 }, { "epoch": 12.53248, "grad_norm": 0.9368249177932739, "learning_rate": 3.0434173669467788e-05, "loss": 0.5934, "step": 9791 }, { "epoch": 12.533760000000001, "grad_norm": 0.8966599106788635, "learning_rate": 3.0432172869147663e-05, "loss": 0.55, "step": 9792 }, { "epoch": 12.53504, "grad_norm": 0.9707033038139343, "learning_rate": 3.0430172068827535e-05, "loss": 0.5788, "step": 9793 }, { "epoch": 12.53632, "grad_norm": 0.9984316825866699, "learning_rate": 3.0428171268507406e-05, "loss": 0.6074, "step": 9794 }, { "epoch": 12.5376, "grad_norm": 0.9540078043937683, "learning_rate": 3.0426170468187275e-05, "loss": 0.5769, "step": 9795 }, { "epoch": 12.53888, "grad_norm": 0.9404662847518921, "learning_rate": 3.0424169667867147e-05, "loss": 0.5381, "step": 9796 }, { "epoch": 12.54016, "grad_norm": 0.8832229375839233, "learning_rate": 3.042216886754702e-05, "loss": 0.5524, "step": 9797 }, { "epoch": 12.54144, "grad_norm": 0.9047516584396362, "learning_rate": 3.042016806722689e-05, "loss": 0.5836, "step": 9798 }, { "epoch": 12.54272, "grad_norm": 0.9186232089996338, "learning_rate": 3.0418167266906766e-05, "loss": 0.5923, "step": 9799 }, { "epoch": 12.544, "grad_norm": 1.0132439136505127, "learning_rate": 3.0416166466586638e-05, "loss": 0.5755, "step": 9800 }, { "epoch": 12.54528, "grad_norm": 0.9297081232070923, "learning_rate": 3.041416566626651e-05, "loss": 0.5604, "step": 9801 }, { "epoch": 12.54656, "grad_norm": 0.9263479113578796, "learning_rate": 3.041216486594638e-05, "loss": 0.5238, "step": 9802 }, { "epoch": 12.54784, "grad_norm": 0.964608371257782, "learning_rate": 3.041016406562625e-05, "loss": 0.5523, "step": 9803 }, { "epoch": 12.54912, "grad_norm": 0.8875142931938171, "learning_rate": 3.040816326530612e-05, "loss": 0.4931, "step": 9804 }, { "epoch": 12.5504, "grad_norm": 0.9739357829093933, "learning_rate": 3.0406162464985994e-05, "loss": 0.5274, "step": 9805 }, { "epoch": 12.55168, "grad_norm": 0.8982617855072021, "learning_rate": 3.040416166466587e-05, "loss": 0.5359, "step": 9806 }, { "epoch": 12.55296, "grad_norm": 0.9386458396911621, "learning_rate": 3.040216086434574e-05, "loss": 0.5505, "step": 9807 }, { "epoch": 12.55424, "grad_norm": 0.9533564448356628, "learning_rate": 3.0400160064025612e-05, "loss": 0.6093, "step": 9808 }, { "epoch": 12.55552, "grad_norm": 0.9058338403701782, "learning_rate": 3.0398159263705484e-05, "loss": 0.5394, "step": 9809 }, { "epoch": 12.556799999999999, "grad_norm": 1.01373291015625, "learning_rate": 3.0396158463385356e-05, "loss": 0.5627, "step": 9810 }, { "epoch": 12.55808, "grad_norm": 1.0229073762893677, "learning_rate": 3.0394157663065225e-05, "loss": 0.5781, "step": 9811 }, { "epoch": 12.55936, "grad_norm": 0.9454696178436279, "learning_rate": 3.0392156862745097e-05, "loss": 0.5293, "step": 9812 }, { "epoch": 12.56064, "grad_norm": 0.9132083058357239, "learning_rate": 3.0390156062424975e-05, "loss": 0.546, "step": 9813 }, { "epoch": 12.56192, "grad_norm": 0.90849369764328, "learning_rate": 3.0388155262104844e-05, "loss": 0.5221, "step": 9814 }, { "epoch": 12.5632, "grad_norm": 0.9330136179924011, "learning_rate": 3.0386154461784715e-05, "loss": 0.5212, "step": 9815 }, { "epoch": 12.56448, "grad_norm": 0.9285899996757507, "learning_rate": 3.0384153661464587e-05, "loss": 0.6005, "step": 9816 }, { "epoch": 12.565760000000001, "grad_norm": 0.8947268724441528, "learning_rate": 3.038215286114446e-05, "loss": 0.5294, "step": 9817 }, { "epoch": 12.56704, "grad_norm": 0.9373891949653625, "learning_rate": 3.038015206082433e-05, "loss": 0.5463, "step": 9818 }, { "epoch": 12.56832, "grad_norm": 0.9649326205253601, "learning_rate": 3.03781512605042e-05, "loss": 0.5701, "step": 9819 }, { "epoch": 12.5696, "grad_norm": 0.9598551392555237, "learning_rate": 3.0376150460184078e-05, "loss": 0.6032, "step": 9820 }, { "epoch": 12.57088, "grad_norm": 1.0046522617340088, "learning_rate": 3.037414965986395e-05, "loss": 0.6048, "step": 9821 }, { "epoch": 12.57216, "grad_norm": 0.9179285168647766, "learning_rate": 3.037214885954382e-05, "loss": 0.5717, "step": 9822 }, { "epoch": 12.57344, "grad_norm": 0.9317951798439026, "learning_rate": 3.037014805922369e-05, "loss": 0.5137, "step": 9823 }, { "epoch": 12.57472, "grad_norm": 0.9623469114303589, "learning_rate": 3.0368147258903562e-05, "loss": 0.564, "step": 9824 }, { "epoch": 12.576, "grad_norm": 0.9758321046829224, "learning_rate": 3.0366146458583434e-05, "loss": 0.586, "step": 9825 }, { "epoch": 12.57728, "grad_norm": 0.9277058839797974, "learning_rate": 3.0364145658263306e-05, "loss": 0.5431, "step": 9826 }, { "epoch": 12.57856, "grad_norm": 0.9288407564163208, "learning_rate": 3.036214485794318e-05, "loss": 0.5268, "step": 9827 }, { "epoch": 12.57984, "grad_norm": 0.9227582812309265, "learning_rate": 3.0360144057623053e-05, "loss": 0.5604, "step": 9828 }, { "epoch": 12.58112, "grad_norm": 0.8422914743423462, "learning_rate": 3.0358143257302925e-05, "loss": 0.5244, "step": 9829 }, { "epoch": 12.5824, "grad_norm": 0.9041396975517273, "learning_rate": 3.0356142456982793e-05, "loss": 0.558, "step": 9830 }, { "epoch": 12.58368, "grad_norm": 0.9546091556549072, "learning_rate": 3.0354141656662665e-05, "loss": 0.5733, "step": 9831 }, { "epoch": 12.58496, "grad_norm": 0.9388670325279236, "learning_rate": 3.0352140856342537e-05, "loss": 0.542, "step": 9832 }, { "epoch": 12.58624, "grad_norm": 0.9128877520561218, "learning_rate": 3.035014005602241e-05, "loss": 0.5452, "step": 9833 }, { "epoch": 12.58752, "grad_norm": 0.9351105690002441, "learning_rate": 3.0348139255702284e-05, "loss": 0.5283, "step": 9834 }, { "epoch": 12.588799999999999, "grad_norm": 0.9028121829032898, "learning_rate": 3.0346138455382156e-05, "loss": 0.5479, "step": 9835 }, { "epoch": 12.59008, "grad_norm": 0.9574125409126282, "learning_rate": 3.0344137655062028e-05, "loss": 0.5786, "step": 9836 }, { "epoch": 12.59136, "grad_norm": 0.9389735460281372, "learning_rate": 3.03421368547419e-05, "loss": 0.5688, "step": 9837 }, { "epoch": 12.59264, "grad_norm": 0.889968752861023, "learning_rate": 3.0340136054421768e-05, "loss": 0.5263, "step": 9838 }, { "epoch": 12.59392, "grad_norm": 0.8959866762161255, "learning_rate": 3.033813525410164e-05, "loss": 0.5766, "step": 9839 }, { "epoch": 12.5952, "grad_norm": 0.9044232964515686, "learning_rate": 3.0336134453781512e-05, "loss": 0.536, "step": 9840 }, { "epoch": 12.59648, "grad_norm": 0.896634042263031, "learning_rate": 3.0334133653461387e-05, "loss": 0.5389, "step": 9841 }, { "epoch": 12.59776, "grad_norm": 0.9344484806060791, "learning_rate": 3.033213285314126e-05, "loss": 0.552, "step": 9842 }, { "epoch": 12.59904, "grad_norm": 1.0194035768508911, "learning_rate": 3.033013205282113e-05, "loss": 0.5967, "step": 9843 }, { "epoch": 12.60032, "grad_norm": 0.9549538493156433, "learning_rate": 3.0328131252501003e-05, "loss": 0.5707, "step": 9844 }, { "epoch": 12.6016, "grad_norm": 0.9823207259178162, "learning_rate": 3.0326130452180875e-05, "loss": 0.5702, "step": 9845 }, { "epoch": 12.60288, "grad_norm": 0.9882277846336365, "learning_rate": 3.0324129651860743e-05, "loss": 0.6005, "step": 9846 }, { "epoch": 12.60416, "grad_norm": 0.8940028548240662, "learning_rate": 3.0322128851540615e-05, "loss": 0.5271, "step": 9847 }, { "epoch": 12.60544, "grad_norm": 0.9345920085906982, "learning_rate": 3.0320128051220494e-05, "loss": 0.5547, "step": 9848 }, { "epoch": 12.60672, "grad_norm": 0.8393700122833252, "learning_rate": 3.0318127250900362e-05, "loss": 0.5294, "step": 9849 }, { "epoch": 12.608, "grad_norm": 0.9185911417007446, "learning_rate": 3.0316126450580234e-05, "loss": 0.5218, "step": 9850 }, { "epoch": 12.60928, "grad_norm": 0.9599105715751648, "learning_rate": 3.0314125650260106e-05, "loss": 0.5365, "step": 9851 }, { "epoch": 12.61056, "grad_norm": 0.9423932433128357, "learning_rate": 3.0312124849939978e-05, "loss": 0.5799, "step": 9852 }, { "epoch": 12.61184, "grad_norm": 0.9011184573173523, "learning_rate": 3.031012404961985e-05, "loss": 0.5154, "step": 9853 }, { "epoch": 12.61312, "grad_norm": 0.9672764539718628, "learning_rate": 3.0308123249299718e-05, "loss": 0.5381, "step": 9854 }, { "epoch": 12.6144, "grad_norm": 0.9262174963951111, "learning_rate": 3.0306122448979597e-05, "loss": 0.5639, "step": 9855 }, { "epoch": 12.61568, "grad_norm": 0.9052280783653259, "learning_rate": 3.030412164865947e-05, "loss": 0.5398, "step": 9856 }, { "epoch": 12.61696, "grad_norm": 0.9824522733688354, "learning_rate": 3.0302120848339337e-05, "loss": 0.5554, "step": 9857 }, { "epoch": 12.61824, "grad_norm": 0.9158837795257568, "learning_rate": 3.030012004801921e-05, "loss": 0.5455, "step": 9858 }, { "epoch": 12.61952, "grad_norm": 0.8802503943443298, "learning_rate": 3.029811924769908e-05, "loss": 0.5605, "step": 9859 }, { "epoch": 12.6208, "grad_norm": 0.9751753807067871, "learning_rate": 3.0296118447378952e-05, "loss": 0.6144, "step": 9860 }, { "epoch": 12.62208, "grad_norm": 0.9232403039932251, "learning_rate": 3.0294117647058824e-05, "loss": 0.5928, "step": 9861 }, { "epoch": 12.62336, "grad_norm": 0.9599583148956299, "learning_rate": 3.02921168467387e-05, "loss": 0.5603, "step": 9862 }, { "epoch": 12.62464, "grad_norm": 0.8870642185211182, "learning_rate": 3.029011604641857e-05, "loss": 0.5307, "step": 9863 }, { "epoch": 12.62592, "grad_norm": 0.9634974598884583, "learning_rate": 3.0288115246098443e-05, "loss": 0.5891, "step": 9864 }, { "epoch": 12.6272, "grad_norm": 0.8991132974624634, "learning_rate": 3.028611444577831e-05, "loss": 0.5648, "step": 9865 }, { "epoch": 12.62848, "grad_norm": 0.8831954002380371, "learning_rate": 3.0284113645458184e-05, "loss": 0.5817, "step": 9866 }, { "epoch": 12.62976, "grad_norm": 0.9551595449447632, "learning_rate": 3.0282112845138055e-05, "loss": 0.5596, "step": 9867 }, { "epoch": 12.63104, "grad_norm": 0.9024450182914734, "learning_rate": 3.0280112044817927e-05, "loss": 0.5449, "step": 9868 }, { "epoch": 12.63232, "grad_norm": 0.8698076009750366, "learning_rate": 3.0278111244497803e-05, "loss": 0.5622, "step": 9869 }, { "epoch": 12.6336, "grad_norm": 0.9642755389213562, "learning_rate": 3.0276110444177674e-05, "loss": 0.5489, "step": 9870 }, { "epoch": 12.63488, "grad_norm": 0.8824167251586914, "learning_rate": 3.0274109643857546e-05, "loss": 0.5393, "step": 9871 }, { "epoch": 12.63616, "grad_norm": 0.9382432103157043, "learning_rate": 3.0272108843537418e-05, "loss": 0.5089, "step": 9872 }, { "epoch": 12.63744, "grad_norm": 0.9504085779190063, "learning_rate": 3.0270108043217287e-05, "loss": 0.5866, "step": 9873 }, { "epoch": 12.63872, "grad_norm": 0.9067440032958984, "learning_rate": 3.026810724289716e-05, "loss": 0.5128, "step": 9874 }, { "epoch": 12.64, "grad_norm": 0.8995640277862549, "learning_rate": 3.026610644257703e-05, "loss": 0.5669, "step": 9875 }, { "epoch": 12.64128, "grad_norm": 0.919801652431488, "learning_rate": 3.0264105642256906e-05, "loss": 0.5359, "step": 9876 }, { "epoch": 12.64256, "grad_norm": 0.8864008188247681, "learning_rate": 3.0262104841936777e-05, "loss": 0.5566, "step": 9877 }, { "epoch": 12.64384, "grad_norm": 0.940040111541748, "learning_rate": 3.026010404161665e-05, "loss": 0.4943, "step": 9878 }, { "epoch": 12.64512, "grad_norm": 0.9585533738136292, "learning_rate": 3.025810324129652e-05, "loss": 0.6033, "step": 9879 }, { "epoch": 12.6464, "grad_norm": 0.9533222317695618, "learning_rate": 3.0256102440976393e-05, "loss": 0.5542, "step": 9880 }, { "epoch": 12.64768, "grad_norm": 0.9033933877944946, "learning_rate": 3.025410164065626e-05, "loss": 0.5347, "step": 9881 }, { "epoch": 12.64896, "grad_norm": 0.8955773115158081, "learning_rate": 3.0252100840336133e-05, "loss": 0.5131, "step": 9882 }, { "epoch": 12.65024, "grad_norm": 0.9527932405471802, "learning_rate": 3.0250100040016012e-05, "loss": 0.5438, "step": 9883 }, { "epoch": 12.65152, "grad_norm": 0.9614503979682922, "learning_rate": 3.024809923969588e-05, "loss": 0.5858, "step": 9884 }, { "epoch": 12.6528, "grad_norm": 0.9274621605873108, "learning_rate": 3.0246098439375752e-05, "loss": 0.4716, "step": 9885 }, { "epoch": 12.65408, "grad_norm": 0.8693538904190063, "learning_rate": 3.0244097639055624e-05, "loss": 0.5251, "step": 9886 }, { "epoch": 12.65536, "grad_norm": 0.9951110482215881, "learning_rate": 3.0242096838735496e-05, "loss": 0.5828, "step": 9887 }, { "epoch": 12.65664, "grad_norm": 0.9591448903083801, "learning_rate": 3.0240096038415368e-05, "loss": 0.5891, "step": 9888 }, { "epoch": 12.65792, "grad_norm": 0.9329800009727478, "learning_rate": 3.0238095238095236e-05, "loss": 0.5501, "step": 9889 }, { "epoch": 12.6592, "grad_norm": 0.911115288734436, "learning_rate": 3.0236094437775108e-05, "loss": 0.5655, "step": 9890 }, { "epoch": 12.66048, "grad_norm": 0.9046706557273865, "learning_rate": 3.0234093637454987e-05, "loss": 0.5582, "step": 9891 }, { "epoch": 12.66176, "grad_norm": 0.9165447950363159, "learning_rate": 3.0232092837134855e-05, "loss": 0.5773, "step": 9892 }, { "epoch": 12.66304, "grad_norm": 0.9891864657402039, "learning_rate": 3.0230092036814727e-05, "loss": 0.6351, "step": 9893 }, { "epoch": 12.66432, "grad_norm": 0.8324431777000427, "learning_rate": 3.02280912364946e-05, "loss": 0.5198, "step": 9894 }, { "epoch": 12.6656, "grad_norm": 0.8809410929679871, "learning_rate": 3.022609043617447e-05, "loss": 0.5294, "step": 9895 }, { "epoch": 12.66688, "grad_norm": 0.8862389922142029, "learning_rate": 3.0224089635854343e-05, "loss": 0.5305, "step": 9896 }, { "epoch": 12.66816, "grad_norm": 0.8923200368881226, "learning_rate": 3.022208883553421e-05, "loss": 0.5304, "step": 9897 }, { "epoch": 12.66944, "grad_norm": 0.9289109110832214, "learning_rate": 3.022008803521409e-05, "loss": 0.5732, "step": 9898 }, { "epoch": 12.67072, "grad_norm": 0.9402267336845398, "learning_rate": 3.021808723489396e-05, "loss": 0.5936, "step": 9899 }, { "epoch": 12.672, "grad_norm": 0.9303426146507263, "learning_rate": 3.021608643457383e-05, "loss": 0.555, "step": 9900 }, { "epoch": 12.67328, "grad_norm": 0.8610079288482666, "learning_rate": 3.0214085634253702e-05, "loss": 0.5454, "step": 9901 }, { "epoch": 12.67456, "grad_norm": 0.9670707583427429, "learning_rate": 3.0212084833933574e-05, "loss": 0.5369, "step": 9902 }, { "epoch": 12.67584, "grad_norm": 0.9356277585029602, "learning_rate": 3.0210084033613446e-05, "loss": 0.556, "step": 9903 }, { "epoch": 12.67712, "grad_norm": 0.8627066016197205, "learning_rate": 3.0208083233293317e-05, "loss": 0.5081, "step": 9904 }, { "epoch": 12.6784, "grad_norm": 0.918406069278717, "learning_rate": 3.0206082432973193e-05, "loss": 0.5187, "step": 9905 }, { "epoch": 12.67968, "grad_norm": 0.9237809777259827, "learning_rate": 3.0204081632653065e-05, "loss": 0.5165, "step": 9906 }, { "epoch": 12.68096, "grad_norm": 0.8767214417457581, "learning_rate": 3.0202080832332936e-05, "loss": 0.4903, "step": 9907 }, { "epoch": 12.68224, "grad_norm": 1.0054161548614502, "learning_rate": 3.0200080032012805e-05, "loss": 0.5947, "step": 9908 }, { "epoch": 12.68352, "grad_norm": 0.9165403246879578, "learning_rate": 3.0198079231692677e-05, "loss": 0.5614, "step": 9909 }, { "epoch": 12.6848, "grad_norm": 0.9228652715682983, "learning_rate": 3.019607843137255e-05, "loss": 0.5589, "step": 9910 }, { "epoch": 12.68608, "grad_norm": 0.8921694755554199, "learning_rate": 3.019407763105242e-05, "loss": 0.5131, "step": 9911 }, { "epoch": 12.68736, "grad_norm": 0.9007654786109924, "learning_rate": 3.0192076830732296e-05, "loss": 0.5555, "step": 9912 }, { "epoch": 12.68864, "grad_norm": 0.8586267828941345, "learning_rate": 3.0190076030412168e-05, "loss": 0.5562, "step": 9913 }, { "epoch": 12.68992, "grad_norm": 0.9020005464553833, "learning_rate": 3.018807523009204e-05, "loss": 0.5793, "step": 9914 }, { "epoch": 12.6912, "grad_norm": 0.887444019317627, "learning_rate": 3.018607442977191e-05, "loss": 0.572, "step": 9915 }, { "epoch": 12.69248, "grad_norm": 0.9526282548904419, "learning_rate": 3.018407362945178e-05, "loss": 0.574, "step": 9916 }, { "epoch": 12.69376, "grad_norm": 0.9392738938331604, "learning_rate": 3.018207282913165e-05, "loss": 0.5512, "step": 9917 }, { "epoch": 12.69504, "grad_norm": 0.938337504863739, "learning_rate": 3.0180072028811523e-05, "loss": 0.5903, "step": 9918 }, { "epoch": 12.69632, "grad_norm": 0.9258535504341125, "learning_rate": 3.01780712284914e-05, "loss": 0.5464, "step": 9919 }, { "epoch": 12.6976, "grad_norm": 0.9325132369995117, "learning_rate": 3.017607042817127e-05, "loss": 0.5417, "step": 9920 }, { "epoch": 12.698879999999999, "grad_norm": 0.8271166682243347, "learning_rate": 3.0174069627851142e-05, "loss": 0.4818, "step": 9921 }, { "epoch": 12.70016, "grad_norm": 0.9156856536865234, "learning_rate": 3.0172068827531014e-05, "loss": 0.508, "step": 9922 }, { "epoch": 12.70144, "grad_norm": 0.9150842428207397, "learning_rate": 3.0170068027210886e-05, "loss": 0.5311, "step": 9923 }, { "epoch": 12.70272, "grad_norm": 0.9068151116371155, "learning_rate": 3.0168067226890755e-05, "loss": 0.5557, "step": 9924 }, { "epoch": 12.704, "grad_norm": 0.9375526309013367, "learning_rate": 3.0166066426570626e-05, "loss": 0.5309, "step": 9925 }, { "epoch": 12.70528, "grad_norm": 0.9023036360740662, "learning_rate": 3.0164065626250505e-05, "loss": 0.5057, "step": 9926 }, { "epoch": 12.70656, "grad_norm": 0.9717710614204407, "learning_rate": 3.0162064825930374e-05, "loss": 0.5915, "step": 9927 }, { "epoch": 12.707840000000001, "grad_norm": 0.8739220499992371, "learning_rate": 3.0160064025610245e-05, "loss": 0.5367, "step": 9928 }, { "epoch": 12.70912, "grad_norm": 0.903511643409729, "learning_rate": 3.0158063225290117e-05, "loss": 0.515, "step": 9929 }, { "epoch": 12.7104, "grad_norm": 0.9045807719230652, "learning_rate": 3.015606242496999e-05, "loss": 0.5255, "step": 9930 }, { "epoch": 12.71168, "grad_norm": 0.8613312244415283, "learning_rate": 3.015406162464986e-05, "loss": 0.5145, "step": 9931 }, { "epoch": 12.71296, "grad_norm": 0.9359320998191833, "learning_rate": 3.015206082432973e-05, "loss": 0.5632, "step": 9932 }, { "epoch": 12.71424, "grad_norm": 0.8847656846046448, "learning_rate": 3.0150060024009608e-05, "loss": 0.5334, "step": 9933 }, { "epoch": 12.71552, "grad_norm": 0.9487404823303223, "learning_rate": 3.014805922368948e-05, "loss": 0.5566, "step": 9934 }, { "epoch": 12.7168, "grad_norm": 1.0114420652389526, "learning_rate": 3.014605842336935e-05, "loss": 0.5976, "step": 9935 }, { "epoch": 12.71808, "grad_norm": 0.9208101034164429, "learning_rate": 3.014405762304922e-05, "loss": 0.5365, "step": 9936 }, { "epoch": 12.71936, "grad_norm": 0.8874728083610535, "learning_rate": 3.0142056822729092e-05, "loss": 0.5601, "step": 9937 }, { "epoch": 12.72064, "grad_norm": 0.8904001116752625, "learning_rate": 3.0140056022408964e-05, "loss": 0.534, "step": 9938 }, { "epoch": 12.72192, "grad_norm": 0.9263580441474915, "learning_rate": 3.0138055222088836e-05, "loss": 0.5514, "step": 9939 }, { "epoch": 12.7232, "grad_norm": 0.9768319725990295, "learning_rate": 3.013605442176871e-05, "loss": 0.556, "step": 9940 }, { "epoch": 12.72448, "grad_norm": 0.8745740652084351, "learning_rate": 3.0134053621448583e-05, "loss": 0.559, "step": 9941 }, { "epoch": 12.72576, "grad_norm": 0.963864266872406, "learning_rate": 3.0132052821128455e-05, "loss": 0.5594, "step": 9942 }, { "epoch": 12.72704, "grad_norm": 0.9123207330703735, "learning_rate": 3.0130052020808323e-05, "loss": 0.5439, "step": 9943 }, { "epoch": 12.72832, "grad_norm": 0.8876965045928955, "learning_rate": 3.0128051220488195e-05, "loss": 0.5076, "step": 9944 }, { "epoch": 12.7296, "grad_norm": 0.8987967371940613, "learning_rate": 3.0126050420168067e-05, "loss": 0.5557, "step": 9945 }, { "epoch": 12.730879999999999, "grad_norm": 0.9241291880607605, "learning_rate": 3.012404961984794e-05, "loss": 0.5262, "step": 9946 }, { "epoch": 12.73216, "grad_norm": 0.9232166409492493, "learning_rate": 3.0122048819527814e-05, "loss": 0.5576, "step": 9947 }, { "epoch": 12.73344, "grad_norm": 0.8862061500549316, "learning_rate": 3.0120048019207686e-05, "loss": 0.5504, "step": 9948 }, { "epoch": 12.73472, "grad_norm": 0.9697844982147217, "learning_rate": 3.0118047218887558e-05, "loss": 0.578, "step": 9949 }, { "epoch": 12.736, "grad_norm": 0.9346733093261719, "learning_rate": 3.011604641856743e-05, "loss": 0.5529, "step": 9950 }, { "epoch": 12.73728, "grad_norm": 0.9218923449516296, "learning_rate": 3.0114045618247298e-05, "loss": 0.5634, "step": 9951 }, { "epoch": 12.73856, "grad_norm": 1.0053513050079346, "learning_rate": 3.011204481792717e-05, "loss": 0.5313, "step": 9952 }, { "epoch": 12.739840000000001, "grad_norm": 0.9116829633712769, "learning_rate": 3.0110044017607042e-05, "loss": 0.565, "step": 9953 }, { "epoch": 12.74112, "grad_norm": 0.8650526404380798, "learning_rate": 3.0108043217286917e-05, "loss": 0.4948, "step": 9954 }, { "epoch": 12.7424, "grad_norm": 0.9802688360214233, "learning_rate": 3.010604241696679e-05, "loss": 0.5878, "step": 9955 }, { "epoch": 12.74368, "grad_norm": 0.9233496785163879, "learning_rate": 3.010404161664666e-05, "loss": 0.5235, "step": 9956 }, { "epoch": 12.74496, "grad_norm": 0.9107963442802429, "learning_rate": 3.0102040816326533e-05, "loss": 0.5203, "step": 9957 }, { "epoch": 12.74624, "grad_norm": 0.9463147521018982, "learning_rate": 3.0100040016006405e-05, "loss": 0.5231, "step": 9958 }, { "epoch": 12.74752, "grad_norm": 0.9093472957611084, "learning_rate": 3.0098039215686273e-05, "loss": 0.5534, "step": 9959 }, { "epoch": 12.7488, "grad_norm": 1.0168708562850952, "learning_rate": 3.0096038415366145e-05, "loss": 0.6179, "step": 9960 }, { "epoch": 12.75008, "grad_norm": 0.8969560265541077, "learning_rate": 3.0094037615046023e-05, "loss": 0.5302, "step": 9961 }, { "epoch": 12.75136, "grad_norm": 0.9327144026756287, "learning_rate": 3.0092036814725892e-05, "loss": 0.5877, "step": 9962 }, { "epoch": 12.75264, "grad_norm": 0.9616484642028809, "learning_rate": 3.0090036014405764e-05, "loss": 0.5779, "step": 9963 }, { "epoch": 12.75392, "grad_norm": 0.9223829507827759, "learning_rate": 3.0088035214085636e-05, "loss": 0.5649, "step": 9964 }, { "epoch": 12.7552, "grad_norm": 0.8914867639541626, "learning_rate": 3.0086034413765508e-05, "loss": 0.5435, "step": 9965 }, { "epoch": 12.75648, "grad_norm": 0.8898464441299438, "learning_rate": 3.008403361344538e-05, "loss": 0.5918, "step": 9966 }, { "epoch": 12.75776, "grad_norm": 0.8924893140792847, "learning_rate": 3.0082032813125248e-05, "loss": 0.5312, "step": 9967 }, { "epoch": 12.75904, "grad_norm": 0.9874442219734192, "learning_rate": 3.0080032012805126e-05, "loss": 0.5591, "step": 9968 }, { "epoch": 12.76032, "grad_norm": 0.8837193846702576, "learning_rate": 3.0078031212485e-05, "loss": 0.5174, "step": 9969 }, { "epoch": 12.7616, "grad_norm": 0.9593360424041748, "learning_rate": 3.0076030412164867e-05, "loss": 0.5871, "step": 9970 }, { "epoch": 12.76288, "grad_norm": 0.9044248461723328, "learning_rate": 3.007402961184474e-05, "loss": 0.5099, "step": 9971 }, { "epoch": 12.76416, "grad_norm": 0.9392541646957397, "learning_rate": 3.007202881152461e-05, "loss": 0.5553, "step": 9972 }, { "epoch": 12.76544, "grad_norm": 0.9666686058044434, "learning_rate": 3.0070028011204482e-05, "loss": 0.54, "step": 9973 }, { "epoch": 12.76672, "grad_norm": 0.9441251754760742, "learning_rate": 3.0068027210884354e-05, "loss": 0.6014, "step": 9974 }, { "epoch": 12.768, "grad_norm": 0.8636193871498108, "learning_rate": 3.006602641056423e-05, "loss": 0.5608, "step": 9975 }, { "epoch": 12.76928, "grad_norm": 0.8688570261001587, "learning_rate": 3.00640256102441e-05, "loss": 0.5199, "step": 9976 }, { "epoch": 12.77056, "grad_norm": 0.9947240352630615, "learning_rate": 3.0062024809923973e-05, "loss": 0.5931, "step": 9977 }, { "epoch": 12.77184, "grad_norm": 0.9212228059768677, "learning_rate": 3.006002400960384e-05, "loss": 0.4972, "step": 9978 }, { "epoch": 12.77312, "grad_norm": 0.9512316584587097, "learning_rate": 3.0058023209283714e-05, "loss": 0.5568, "step": 9979 }, { "epoch": 12.7744, "grad_norm": 0.9175422191619873, "learning_rate": 3.0056022408963585e-05, "loss": 0.5375, "step": 9980 }, { "epoch": 12.77568, "grad_norm": 0.918135404586792, "learning_rate": 3.0054021608643457e-05, "loss": 0.5353, "step": 9981 }, { "epoch": 12.77696, "grad_norm": 0.937879204750061, "learning_rate": 3.0052020808323332e-05, "loss": 0.5707, "step": 9982 }, { "epoch": 12.77824, "grad_norm": 0.8635044693946838, "learning_rate": 3.0050020008003204e-05, "loss": 0.5118, "step": 9983 }, { "epoch": 12.77952, "grad_norm": 0.8819299936294556, "learning_rate": 3.0048019207683076e-05, "loss": 0.4941, "step": 9984 }, { "epoch": 12.7808, "grad_norm": 0.9287380576133728, "learning_rate": 3.0046018407362948e-05, "loss": 0.5739, "step": 9985 }, { "epoch": 12.78208, "grad_norm": 0.8525633811950684, "learning_rate": 3.0044017607042817e-05, "loss": 0.5316, "step": 9986 }, { "epoch": 12.78336, "grad_norm": 0.8659927845001221, "learning_rate": 3.004201680672269e-05, "loss": 0.5096, "step": 9987 }, { "epoch": 12.78464, "grad_norm": 0.876082181930542, "learning_rate": 3.004001600640256e-05, "loss": 0.5481, "step": 9988 }, { "epoch": 12.78592, "grad_norm": 0.8915519118309021, "learning_rate": 3.0038015206082435e-05, "loss": 0.5542, "step": 9989 }, { "epoch": 12.7872, "grad_norm": 0.961881697177887, "learning_rate": 3.0036014405762307e-05, "loss": 0.541, "step": 9990 }, { "epoch": 12.78848, "grad_norm": 0.9022373557090759, "learning_rate": 3.003401360544218e-05, "loss": 0.5454, "step": 9991 }, { "epoch": 12.78976, "grad_norm": 0.9297406077384949, "learning_rate": 3.003201280512205e-05, "loss": 0.5537, "step": 9992 }, { "epoch": 12.79104, "grad_norm": 0.9228201508522034, "learning_rate": 3.0030012004801923e-05, "loss": 0.5281, "step": 9993 }, { "epoch": 12.79232, "grad_norm": 0.8959901928901672, "learning_rate": 3.002801120448179e-05, "loss": 0.5428, "step": 9994 }, { "epoch": 12.7936, "grad_norm": 0.903972864151001, "learning_rate": 3.0026010404161663e-05, "loss": 0.5513, "step": 9995 }, { "epoch": 12.79488, "grad_norm": 0.9172789454460144, "learning_rate": 3.0024009603841542e-05, "loss": 0.5723, "step": 9996 }, { "epoch": 12.79616, "grad_norm": 0.872391402721405, "learning_rate": 3.002200880352141e-05, "loss": 0.5302, "step": 9997 }, { "epoch": 12.79744, "grad_norm": 0.9103299379348755, "learning_rate": 3.0020008003201282e-05, "loss": 0.5548, "step": 9998 }, { "epoch": 12.79872, "grad_norm": 0.9463931322097778, "learning_rate": 3.0018007202881154e-05, "loss": 0.5299, "step": 9999 }, { "epoch": 12.8, "grad_norm": 0.9099661707878113, "learning_rate": 3.0016006402561026e-05, "loss": 0.5733, "step": 10000 }, { "epoch": 12.80128, "grad_norm": 0.8782345652580261, "learning_rate": 3.0014005602240898e-05, "loss": 0.5495, "step": 10001 }, { "epoch": 12.80256, "grad_norm": 0.9504924416542053, "learning_rate": 3.0012004801920766e-05, "loss": 0.5548, "step": 10002 }, { "epoch": 12.80384, "grad_norm": 0.9535840749740601, "learning_rate": 3.0010004001600638e-05, "loss": 0.6183, "step": 10003 }, { "epoch": 12.80512, "grad_norm": 0.9448462724685669, "learning_rate": 3.0008003201280517e-05, "loss": 0.5871, "step": 10004 }, { "epoch": 12.8064, "grad_norm": 0.856593906879425, "learning_rate": 3.0006002400960385e-05, "loss": 0.5501, "step": 10005 }, { "epoch": 12.80768, "grad_norm": 0.9613591432571411, "learning_rate": 3.0004001600640257e-05, "loss": 0.5669, "step": 10006 }, { "epoch": 12.80896, "grad_norm": 0.9325043559074402, "learning_rate": 3.000200080032013e-05, "loss": 0.5625, "step": 10007 }, { "epoch": 12.81024, "grad_norm": 0.8977808356285095, "learning_rate": 3e-05, "loss": 0.5378, "step": 10008 }, { "epoch": 12.81152, "grad_norm": 0.8371548652648926, "learning_rate": 2.9997999199679873e-05, "loss": 0.5012, "step": 10009 }, { "epoch": 12.8128, "grad_norm": 0.9407792091369629, "learning_rate": 2.999599839935974e-05, "loss": 0.6057, "step": 10010 }, { "epoch": 12.81408, "grad_norm": 0.911292552947998, "learning_rate": 2.999399759903962e-05, "loss": 0.5342, "step": 10011 }, { "epoch": 12.81536, "grad_norm": 0.947562038898468, "learning_rate": 2.999199679871949e-05, "loss": 0.597, "step": 10012 }, { "epoch": 12.81664, "grad_norm": 0.8605309724807739, "learning_rate": 2.998999599839936e-05, "loss": 0.5011, "step": 10013 }, { "epoch": 12.81792, "grad_norm": 0.9538720846176147, "learning_rate": 2.9987995198079232e-05, "loss": 0.5383, "step": 10014 }, { "epoch": 12.8192, "grad_norm": 0.8674808144569397, "learning_rate": 2.9985994397759104e-05, "loss": 0.5635, "step": 10015 }, { "epoch": 12.82048, "grad_norm": 0.8953600525856018, "learning_rate": 2.9983993597438976e-05, "loss": 0.5338, "step": 10016 }, { "epoch": 12.82176, "grad_norm": 0.9249882102012634, "learning_rate": 2.9981992797118847e-05, "loss": 0.5529, "step": 10017 }, { "epoch": 12.82304, "grad_norm": 0.9073203802108765, "learning_rate": 2.9979991996798723e-05, "loss": 0.5515, "step": 10018 }, { "epoch": 12.82432, "grad_norm": 0.8862814903259277, "learning_rate": 2.9977991196478595e-05, "loss": 0.5639, "step": 10019 }, { "epoch": 12.8256, "grad_norm": 0.9115248918533325, "learning_rate": 2.9975990396158466e-05, "loss": 0.5338, "step": 10020 }, { "epoch": 12.82688, "grad_norm": 0.9155441522598267, "learning_rate": 2.9973989595838335e-05, "loss": 0.5904, "step": 10021 }, { "epoch": 12.82816, "grad_norm": 0.936619222164154, "learning_rate": 2.9971988795518207e-05, "loss": 0.5606, "step": 10022 }, { "epoch": 12.82944, "grad_norm": 0.8957839012145996, "learning_rate": 2.996998799519808e-05, "loss": 0.5432, "step": 10023 }, { "epoch": 12.83072, "grad_norm": 0.8455110192298889, "learning_rate": 2.996798719487795e-05, "loss": 0.5095, "step": 10024 }, { "epoch": 12.832, "grad_norm": 0.8867446184158325, "learning_rate": 2.9965986394557826e-05, "loss": 0.5319, "step": 10025 }, { "epoch": 12.83328, "grad_norm": 0.8974578976631165, "learning_rate": 2.9963985594237698e-05, "loss": 0.5514, "step": 10026 }, { "epoch": 12.83456, "grad_norm": 0.8834570050239563, "learning_rate": 2.996198479391757e-05, "loss": 0.5682, "step": 10027 }, { "epoch": 12.83584, "grad_norm": 0.9071859121322632, "learning_rate": 2.995998399359744e-05, "loss": 0.5334, "step": 10028 }, { "epoch": 12.83712, "grad_norm": 0.9252317547798157, "learning_rate": 2.995798319327731e-05, "loss": 0.5062, "step": 10029 }, { "epoch": 12.8384, "grad_norm": 0.9470645189285278, "learning_rate": 2.995598239295718e-05, "loss": 0.5328, "step": 10030 }, { "epoch": 12.83968, "grad_norm": 0.9187636375427246, "learning_rate": 2.9953981592637053e-05, "loss": 0.6012, "step": 10031 }, { "epoch": 12.84096, "grad_norm": 0.9187378883361816, "learning_rate": 2.9951980792316932e-05, "loss": 0.582, "step": 10032 }, { "epoch": 12.84224, "grad_norm": 0.9218907952308655, "learning_rate": 2.99499799919968e-05, "loss": 0.5591, "step": 10033 }, { "epoch": 12.84352, "grad_norm": 0.8669567704200745, "learning_rate": 2.9947979191676672e-05, "loss": 0.5021, "step": 10034 }, { "epoch": 12.8448, "grad_norm": 0.9718922972679138, "learning_rate": 2.9945978391356544e-05, "loss": 0.5636, "step": 10035 }, { "epoch": 12.84608, "grad_norm": 0.9400469064712524, "learning_rate": 2.9943977591036416e-05, "loss": 0.5565, "step": 10036 }, { "epoch": 12.84736, "grad_norm": 0.9250262975692749, "learning_rate": 2.9941976790716285e-05, "loss": 0.5336, "step": 10037 }, { "epoch": 12.84864, "grad_norm": 0.9361352324485779, "learning_rate": 2.9939975990396156e-05, "loss": 0.5438, "step": 10038 }, { "epoch": 12.849920000000001, "grad_norm": 0.9436172246932983, "learning_rate": 2.9937975190076035e-05, "loss": 0.5784, "step": 10039 }, { "epoch": 12.8512, "grad_norm": 0.9591764807701111, "learning_rate": 2.9935974389755907e-05, "loss": 0.5426, "step": 10040 }, { "epoch": 12.85248, "grad_norm": 0.9026290774345398, "learning_rate": 2.9933973589435775e-05, "loss": 0.5524, "step": 10041 }, { "epoch": 12.85376, "grad_norm": 0.8631820678710938, "learning_rate": 2.9931972789115647e-05, "loss": 0.5365, "step": 10042 }, { "epoch": 12.85504, "grad_norm": 0.8909380435943604, "learning_rate": 2.992997198879552e-05, "loss": 0.4973, "step": 10043 }, { "epoch": 12.85632, "grad_norm": 0.9117045998573303, "learning_rate": 2.992797118847539e-05, "loss": 0.5777, "step": 10044 }, { "epoch": 12.8576, "grad_norm": 0.9356653094291687, "learning_rate": 2.992597038815526e-05, "loss": 0.5703, "step": 10045 }, { "epoch": 12.85888, "grad_norm": 0.9161821007728577, "learning_rate": 2.9923969587835138e-05, "loss": 0.5378, "step": 10046 }, { "epoch": 12.86016, "grad_norm": 0.9785245060920715, "learning_rate": 2.992196878751501e-05, "loss": 0.6163, "step": 10047 }, { "epoch": 12.86144, "grad_norm": 0.896397590637207, "learning_rate": 2.9919967987194882e-05, "loss": 0.5743, "step": 10048 }, { "epoch": 12.86272, "grad_norm": 0.8948002457618713, "learning_rate": 2.991796718687475e-05, "loss": 0.5688, "step": 10049 }, { "epoch": 12.864, "grad_norm": 0.9673354625701904, "learning_rate": 2.9915966386554622e-05, "loss": 0.572, "step": 10050 }, { "epoch": 12.86528, "grad_norm": 0.9555987119674683, "learning_rate": 2.9913965586234494e-05, "loss": 0.5516, "step": 10051 }, { "epoch": 12.86656, "grad_norm": 0.908279299736023, "learning_rate": 2.9911964785914366e-05, "loss": 0.5513, "step": 10052 }, { "epoch": 12.86784, "grad_norm": 0.8543174862861633, "learning_rate": 2.990996398559424e-05, "loss": 0.5194, "step": 10053 }, { "epoch": 12.86912, "grad_norm": 0.9750853180885315, "learning_rate": 2.9907963185274113e-05, "loss": 0.5985, "step": 10054 }, { "epoch": 12.8704, "grad_norm": 0.8315509557723999, "learning_rate": 2.9905962384953985e-05, "loss": 0.5259, "step": 10055 }, { "epoch": 12.87168, "grad_norm": 0.8759975433349609, "learning_rate": 2.9903961584633857e-05, "loss": 0.5003, "step": 10056 }, { "epoch": 12.872959999999999, "grad_norm": 0.8982505798339844, "learning_rate": 2.9901960784313725e-05, "loss": 0.5745, "step": 10057 }, { "epoch": 12.87424, "grad_norm": 0.9328362941741943, "learning_rate": 2.9899959983993597e-05, "loss": 0.5945, "step": 10058 }, { "epoch": 12.87552, "grad_norm": 0.9085741639137268, "learning_rate": 2.989795918367347e-05, "loss": 0.5479, "step": 10059 }, { "epoch": 12.8768, "grad_norm": 0.879937469959259, "learning_rate": 2.9895958383353344e-05, "loss": 0.5385, "step": 10060 }, { "epoch": 12.87808, "grad_norm": 0.932284951210022, "learning_rate": 2.9893957583033216e-05, "loss": 0.5911, "step": 10061 }, { "epoch": 12.87936, "grad_norm": 0.9298887252807617, "learning_rate": 2.9891956782713088e-05, "loss": 0.5586, "step": 10062 }, { "epoch": 12.88064, "grad_norm": 0.9269364476203918, "learning_rate": 2.988995598239296e-05, "loss": 0.5432, "step": 10063 }, { "epoch": 12.881920000000001, "grad_norm": 0.9070879817008972, "learning_rate": 2.988795518207283e-05, "loss": 0.5722, "step": 10064 }, { "epoch": 12.8832, "grad_norm": 0.9287840723991394, "learning_rate": 2.98859543817527e-05, "loss": 0.5642, "step": 10065 }, { "epoch": 12.88448, "grad_norm": 0.9351973533630371, "learning_rate": 2.9883953581432572e-05, "loss": 0.5648, "step": 10066 }, { "epoch": 12.88576, "grad_norm": 0.9211728572845459, "learning_rate": 2.988195278111245e-05, "loss": 0.5144, "step": 10067 }, { "epoch": 12.88704, "grad_norm": 0.9010429382324219, "learning_rate": 2.987995198079232e-05, "loss": 0.536, "step": 10068 }, { "epoch": 12.88832, "grad_norm": 0.9658145904541016, "learning_rate": 2.987795118047219e-05, "loss": 0.5977, "step": 10069 }, { "epoch": 12.8896, "grad_norm": 1.0316338539123535, "learning_rate": 2.9875950380152063e-05, "loss": 0.5963, "step": 10070 }, { "epoch": 12.89088, "grad_norm": 0.8775344491004944, "learning_rate": 2.9873949579831935e-05, "loss": 0.525, "step": 10071 }, { "epoch": 12.89216, "grad_norm": 0.9124401807785034, "learning_rate": 2.9871948779511806e-05, "loss": 0.5544, "step": 10072 }, { "epoch": 12.89344, "grad_norm": 0.9111108183860779, "learning_rate": 2.9869947979191675e-05, "loss": 0.574, "step": 10073 }, { "epoch": 12.89472, "grad_norm": 0.9600886702537537, "learning_rate": 2.9867947178871553e-05, "loss": 0.5347, "step": 10074 }, { "epoch": 12.896, "grad_norm": 0.967065691947937, "learning_rate": 2.9865946378551425e-05, "loss": 0.5945, "step": 10075 }, { "epoch": 12.89728, "grad_norm": 0.8715311288833618, "learning_rate": 2.9863945578231294e-05, "loss": 0.5239, "step": 10076 }, { "epoch": 12.89856, "grad_norm": 0.8990671634674072, "learning_rate": 2.9861944777911166e-05, "loss": 0.5526, "step": 10077 }, { "epoch": 12.89984, "grad_norm": 0.8786954283714294, "learning_rate": 2.9859943977591038e-05, "loss": 0.5653, "step": 10078 }, { "epoch": 12.90112, "grad_norm": 0.9412379860877991, "learning_rate": 2.985794317727091e-05, "loss": 0.5592, "step": 10079 }, { "epoch": 12.9024, "grad_norm": 0.9109273552894592, "learning_rate": 2.985594237695078e-05, "loss": 0.5307, "step": 10080 }, { "epoch": 12.90368, "grad_norm": 0.9346706867218018, "learning_rate": 2.9853941576630656e-05, "loss": 0.5661, "step": 10081 }, { "epoch": 12.904959999999999, "grad_norm": 0.9366570711135864, "learning_rate": 2.985194077631053e-05, "loss": 0.5806, "step": 10082 }, { "epoch": 12.90624, "grad_norm": 0.906199038028717, "learning_rate": 2.98499399759904e-05, "loss": 0.5577, "step": 10083 }, { "epoch": 12.90752, "grad_norm": 0.9052384495735168, "learning_rate": 2.984793917567027e-05, "loss": 0.5144, "step": 10084 }, { "epoch": 12.9088, "grad_norm": 0.8487529754638672, "learning_rate": 2.984593837535014e-05, "loss": 0.5046, "step": 10085 }, { "epoch": 12.91008, "grad_norm": 0.9538546204566956, "learning_rate": 2.9843937575030012e-05, "loss": 0.5881, "step": 10086 }, { "epoch": 12.91136, "grad_norm": 0.9074375033378601, "learning_rate": 2.9841936774709884e-05, "loss": 0.5077, "step": 10087 }, { "epoch": 12.91264, "grad_norm": 0.8918171525001526, "learning_rate": 2.983993597438976e-05, "loss": 0.547, "step": 10088 }, { "epoch": 12.91392, "grad_norm": 0.877153217792511, "learning_rate": 2.983793517406963e-05, "loss": 0.5116, "step": 10089 }, { "epoch": 12.9152, "grad_norm": 0.9327219724655151, "learning_rate": 2.9835934373749503e-05, "loss": 0.5949, "step": 10090 }, { "epoch": 12.91648, "grad_norm": 0.8527021408081055, "learning_rate": 2.9833933573429375e-05, "loss": 0.5414, "step": 10091 }, { "epoch": 12.91776, "grad_norm": 0.9309681057929993, "learning_rate": 2.9831932773109244e-05, "loss": 0.5714, "step": 10092 }, { "epoch": 12.91904, "grad_norm": 1.0311362743377686, "learning_rate": 2.9829931972789115e-05, "loss": 0.6181, "step": 10093 }, { "epoch": 12.92032, "grad_norm": 0.9770155549049377, "learning_rate": 2.9827931172468987e-05, "loss": 0.6033, "step": 10094 }, { "epoch": 12.9216, "grad_norm": 0.9168463945388794, "learning_rate": 2.9825930372148862e-05, "loss": 0.5045, "step": 10095 }, { "epoch": 12.92288, "grad_norm": 0.9091753959655762, "learning_rate": 2.9823929571828734e-05, "loss": 0.5097, "step": 10096 }, { "epoch": 12.92416, "grad_norm": 0.9451500177383423, "learning_rate": 2.9821928771508606e-05, "loss": 0.5358, "step": 10097 }, { "epoch": 12.92544, "grad_norm": 0.8995253443717957, "learning_rate": 2.9819927971188478e-05, "loss": 0.5671, "step": 10098 }, { "epoch": 12.92672, "grad_norm": 0.9600387215614319, "learning_rate": 2.981792717086835e-05, "loss": 0.6202, "step": 10099 }, { "epoch": 12.928, "grad_norm": 0.9363338351249695, "learning_rate": 2.981592637054822e-05, "loss": 0.5377, "step": 10100 }, { "epoch": 12.92928, "grad_norm": 0.9418403506278992, "learning_rate": 2.981392557022809e-05, "loss": 0.5899, "step": 10101 }, { "epoch": 12.93056, "grad_norm": 0.9346731305122375, "learning_rate": 2.981192476990797e-05, "loss": 0.5907, "step": 10102 }, { "epoch": 12.93184, "grad_norm": 0.8982060551643372, "learning_rate": 2.9809923969587837e-05, "loss": 0.5422, "step": 10103 }, { "epoch": 12.93312, "grad_norm": 0.980532705783844, "learning_rate": 2.980792316926771e-05, "loss": 0.6078, "step": 10104 }, { "epoch": 12.9344, "grad_norm": 0.9058598875999451, "learning_rate": 2.980592236894758e-05, "loss": 0.5936, "step": 10105 }, { "epoch": 12.93568, "grad_norm": 0.81413733959198, "learning_rate": 2.9803921568627453e-05, "loss": 0.4857, "step": 10106 }, { "epoch": 12.93696, "grad_norm": 0.9065173864364624, "learning_rate": 2.9801920768307325e-05, "loss": 0.5294, "step": 10107 }, { "epoch": 12.93824, "grad_norm": 0.9533013701438904, "learning_rate": 2.9799919967987193e-05, "loss": 0.5727, "step": 10108 }, { "epoch": 12.93952, "grad_norm": 0.9087998867034912, "learning_rate": 2.9797919167667065e-05, "loss": 0.5334, "step": 10109 }, { "epoch": 12.9408, "grad_norm": 0.905795156955719, "learning_rate": 2.9795918367346944e-05, "loss": 0.5665, "step": 10110 }, { "epoch": 12.94208, "grad_norm": 0.9425820708274841, "learning_rate": 2.9793917567026812e-05, "loss": 0.5665, "step": 10111 }, { "epoch": 12.94336, "grad_norm": 0.8737591505050659, "learning_rate": 2.9791916766706684e-05, "loss": 0.5013, "step": 10112 }, { "epoch": 12.94464, "grad_norm": 0.9114744067192078, "learning_rate": 2.9789915966386556e-05, "loss": 0.5359, "step": 10113 }, { "epoch": 12.94592, "grad_norm": 0.9109938144683838, "learning_rate": 2.9787915166066428e-05, "loss": 0.525, "step": 10114 }, { "epoch": 12.9472, "grad_norm": 1.0331107378005981, "learning_rate": 2.97859143657463e-05, "loss": 0.6145, "step": 10115 }, { "epoch": 12.94848, "grad_norm": 0.9183046221733093, "learning_rate": 2.9783913565426168e-05, "loss": 0.5607, "step": 10116 }, { "epoch": 12.94976, "grad_norm": 0.8793911337852478, "learning_rate": 2.9781912765106047e-05, "loss": 0.5091, "step": 10117 }, { "epoch": 12.95104, "grad_norm": 0.8715007305145264, "learning_rate": 2.977991196478592e-05, "loss": 0.5342, "step": 10118 }, { "epoch": 12.95232, "grad_norm": 0.9130722284317017, "learning_rate": 2.9777911164465787e-05, "loss": 0.569, "step": 10119 }, { "epoch": 12.9536, "grad_norm": 0.8778414130210876, "learning_rate": 2.977591036414566e-05, "loss": 0.5715, "step": 10120 }, { "epoch": 12.95488, "grad_norm": 0.8723252415657043, "learning_rate": 2.977390956382553e-05, "loss": 0.5556, "step": 10121 }, { "epoch": 12.95616, "grad_norm": 0.9245275259017944, "learning_rate": 2.9771908763505403e-05, "loss": 0.5706, "step": 10122 }, { "epoch": 12.95744, "grad_norm": 0.9028117656707764, "learning_rate": 2.9769907963185274e-05, "loss": 0.5435, "step": 10123 }, { "epoch": 12.95872, "grad_norm": 0.8952034115791321, "learning_rate": 2.976790716286515e-05, "loss": 0.5545, "step": 10124 }, { "epoch": 12.96, "grad_norm": 0.889771580696106, "learning_rate": 2.976590636254502e-05, "loss": 0.5124, "step": 10125 }, { "epoch": 12.96128, "grad_norm": 0.9668723940849304, "learning_rate": 2.9763905562224893e-05, "loss": 0.537, "step": 10126 }, { "epoch": 12.96256, "grad_norm": 0.8818986415863037, "learning_rate": 2.9761904761904762e-05, "loss": 0.5641, "step": 10127 }, { "epoch": 12.96384, "grad_norm": 0.8750602006912231, "learning_rate": 2.9759903961584634e-05, "loss": 0.5355, "step": 10128 }, { "epoch": 12.96512, "grad_norm": 0.9368957877159119, "learning_rate": 2.9757903161264506e-05, "loss": 0.5498, "step": 10129 }, { "epoch": 12.9664, "grad_norm": 0.9112243056297302, "learning_rate": 2.9755902360944377e-05, "loss": 0.5243, "step": 10130 }, { "epoch": 12.96768, "grad_norm": 0.9702836871147156, "learning_rate": 2.9753901560624253e-05, "loss": 0.5832, "step": 10131 }, { "epoch": 12.96896, "grad_norm": 0.9459221363067627, "learning_rate": 2.9751900760304125e-05, "loss": 0.5452, "step": 10132 }, { "epoch": 12.97024, "grad_norm": 0.8926064968109131, "learning_rate": 2.9749899959983996e-05, "loss": 0.5324, "step": 10133 }, { "epoch": 12.97152, "grad_norm": 0.9436557292938232, "learning_rate": 2.9747899159663868e-05, "loss": 0.5705, "step": 10134 }, { "epoch": 12.9728, "grad_norm": 0.8792611956596375, "learning_rate": 2.9745898359343737e-05, "loss": 0.5387, "step": 10135 }, { "epoch": 12.97408, "grad_norm": 0.8789375424385071, "learning_rate": 2.974389755902361e-05, "loss": 0.4897, "step": 10136 }, { "epoch": 12.97536, "grad_norm": 0.9235910773277283, "learning_rate": 2.974189675870348e-05, "loss": 0.5667, "step": 10137 }, { "epoch": 12.97664, "grad_norm": 0.9125930070877075, "learning_rate": 2.9739895958383356e-05, "loss": 0.5704, "step": 10138 }, { "epoch": 12.97792, "grad_norm": 0.9023631811141968, "learning_rate": 2.9737895158063228e-05, "loss": 0.5521, "step": 10139 }, { "epoch": 12.9792, "grad_norm": 0.9152993559837341, "learning_rate": 2.97358943577431e-05, "loss": 0.4928, "step": 10140 }, { "epoch": 12.98048, "grad_norm": 0.9630506038665771, "learning_rate": 2.973389355742297e-05, "loss": 0.5829, "step": 10141 }, { "epoch": 12.98176, "grad_norm": 0.9099013209342957, "learning_rate": 2.9731892757102843e-05, "loss": 0.5566, "step": 10142 }, { "epoch": 12.98304, "grad_norm": 0.8991549015045166, "learning_rate": 2.972989195678271e-05, "loss": 0.5459, "step": 10143 }, { "epoch": 12.98432, "grad_norm": 0.9547131657600403, "learning_rate": 2.9727891156462583e-05, "loss": 0.5874, "step": 10144 }, { "epoch": 12.9856, "grad_norm": 0.9026567339897156, "learning_rate": 2.9725890356142462e-05, "loss": 0.547, "step": 10145 }, { "epoch": 12.98688, "grad_norm": 0.9058331847190857, "learning_rate": 2.972388955582233e-05, "loss": 0.52, "step": 10146 }, { "epoch": 12.98816, "grad_norm": 0.9603937864303589, "learning_rate": 2.9721888755502202e-05, "loss": 0.615, "step": 10147 }, { "epoch": 12.98944, "grad_norm": 0.9125460982322693, "learning_rate": 2.9719887955182074e-05, "loss": 0.5463, "step": 10148 }, { "epoch": 12.99072, "grad_norm": 0.9278149008750916, "learning_rate": 2.9717887154861946e-05, "loss": 0.5386, "step": 10149 }, { "epoch": 12.992, "grad_norm": 0.88197922706604, "learning_rate": 2.9715886354541818e-05, "loss": 0.5443, "step": 10150 }, { "epoch": 12.99328, "grad_norm": 0.9040613174438477, "learning_rate": 2.9713885554221686e-05, "loss": 0.5515, "step": 10151 }, { "epoch": 12.99456, "grad_norm": 0.9090338945388794, "learning_rate": 2.9711884753901565e-05, "loss": 0.5483, "step": 10152 }, { "epoch": 12.99584, "grad_norm": 0.9126954674720764, "learning_rate": 2.9709883953581437e-05, "loss": 0.6022, "step": 10153 }, { "epoch": 12.99712, "grad_norm": 0.9582985043525696, "learning_rate": 2.9707883153261305e-05, "loss": 0.5414, "step": 10154 }, { "epoch": 12.9984, "grad_norm": 0.9858940839767456, "learning_rate": 2.9705882352941177e-05, "loss": 0.5773, "step": 10155 }, { "epoch": 12.99968, "grad_norm": 0.9113115072250366, "learning_rate": 2.970388155262105e-05, "loss": 0.5455, "step": 10156 }, { "epoch": 13.00096, "grad_norm": 1.8516324758529663, "learning_rate": 2.970188075230092e-05, "loss": 0.9295, "step": 10157 }, { "epoch": 13.00224, "grad_norm": 0.8609241843223572, "learning_rate": 2.9699879951980793e-05, "loss": 0.4725, "step": 10158 }, { "epoch": 13.00352, "grad_norm": 0.8947498202323914, "learning_rate": 2.9697879151660668e-05, "loss": 0.5303, "step": 10159 }, { "epoch": 13.0048, "grad_norm": 0.9149647951126099, "learning_rate": 2.969587835134054e-05, "loss": 0.5236, "step": 10160 }, { "epoch": 13.00608, "grad_norm": 0.9216925501823425, "learning_rate": 2.9693877551020412e-05, "loss": 0.5461, "step": 10161 }, { "epoch": 13.00736, "grad_norm": 0.9156137108802795, "learning_rate": 2.969187675070028e-05, "loss": 0.5692, "step": 10162 }, { "epoch": 13.00864, "grad_norm": 0.9287101030349731, "learning_rate": 2.9689875950380152e-05, "loss": 0.5375, "step": 10163 }, { "epoch": 13.00992, "grad_norm": 0.9227731227874756, "learning_rate": 2.9687875150060024e-05, "loss": 0.5584, "step": 10164 }, { "epoch": 13.0112, "grad_norm": 0.9401906132698059, "learning_rate": 2.9685874349739896e-05, "loss": 0.5354, "step": 10165 }, { "epoch": 13.01248, "grad_norm": 0.9380677938461304, "learning_rate": 2.968387354941977e-05, "loss": 0.5367, "step": 10166 }, { "epoch": 13.01376, "grad_norm": 0.9107190370559692, "learning_rate": 2.9681872749099643e-05, "loss": 0.515, "step": 10167 }, { "epoch": 13.01504, "grad_norm": 0.9699810147285461, "learning_rate": 2.9679871948779515e-05, "loss": 0.5413, "step": 10168 }, { "epoch": 13.01632, "grad_norm": 0.9228163361549377, "learning_rate": 2.9677871148459387e-05, "loss": 0.5589, "step": 10169 }, { "epoch": 13.0176, "grad_norm": 0.8890709280967712, "learning_rate": 2.9675870348139255e-05, "loss": 0.5434, "step": 10170 }, { "epoch": 13.01888, "grad_norm": 0.8726478815078735, "learning_rate": 2.9673869547819127e-05, "loss": 0.5035, "step": 10171 }, { "epoch": 13.02016, "grad_norm": 0.9581865072250366, "learning_rate": 2.9671868747499e-05, "loss": 0.5897, "step": 10172 }, { "epoch": 13.02144, "grad_norm": 0.9103356599807739, "learning_rate": 2.9669867947178874e-05, "loss": 0.5305, "step": 10173 }, { "epoch": 13.02272, "grad_norm": 0.9056451916694641, "learning_rate": 2.9667867146858746e-05, "loss": 0.5249, "step": 10174 }, { "epoch": 13.024, "grad_norm": 0.9359303712844849, "learning_rate": 2.9665866346538618e-05, "loss": 0.552, "step": 10175 }, { "epoch": 13.02528, "grad_norm": 0.9274691343307495, "learning_rate": 2.966386554621849e-05, "loss": 0.5194, "step": 10176 }, { "epoch": 13.02656, "grad_norm": 0.8577048182487488, "learning_rate": 2.966186474589836e-05, "loss": 0.488, "step": 10177 }, { "epoch": 13.02784, "grad_norm": 0.8899839520454407, "learning_rate": 2.965986394557823e-05, "loss": 0.5371, "step": 10178 }, { "epoch": 13.02912, "grad_norm": 0.8708844184875488, "learning_rate": 2.9657863145258102e-05, "loss": 0.5118, "step": 10179 }, { "epoch": 13.0304, "grad_norm": 0.9025720357894897, "learning_rate": 2.965586234493798e-05, "loss": 0.5332, "step": 10180 }, { "epoch": 13.03168, "grad_norm": 0.9257388710975647, "learning_rate": 2.965386154461785e-05, "loss": 0.6321, "step": 10181 }, { "epoch": 13.03296, "grad_norm": 0.8940935730934143, "learning_rate": 2.965186074429772e-05, "loss": 0.5406, "step": 10182 }, { "epoch": 13.03424, "grad_norm": 0.9493997693061829, "learning_rate": 2.9649859943977593e-05, "loss": 0.5364, "step": 10183 }, { "epoch": 13.03552, "grad_norm": 0.8869070410728455, "learning_rate": 2.9647859143657464e-05, "loss": 0.5076, "step": 10184 }, { "epoch": 13.0368, "grad_norm": 0.9130921363830566, "learning_rate": 2.9645858343337336e-05, "loss": 0.553, "step": 10185 }, { "epoch": 13.03808, "grad_norm": 0.8858230113983154, "learning_rate": 2.9643857543017205e-05, "loss": 0.471, "step": 10186 }, { "epoch": 13.03936, "grad_norm": 0.8924762010574341, "learning_rate": 2.9641856742697083e-05, "loss": 0.5224, "step": 10187 }, { "epoch": 13.04064, "grad_norm": 0.9254968762397766, "learning_rate": 2.9639855942376955e-05, "loss": 0.5203, "step": 10188 }, { "epoch": 13.04192, "grad_norm": 0.9559713006019592, "learning_rate": 2.9637855142056824e-05, "loss": 0.5552, "step": 10189 }, { "epoch": 13.0432, "grad_norm": 0.9039125442504883, "learning_rate": 2.9635854341736696e-05, "loss": 0.5119, "step": 10190 }, { "epoch": 13.04448, "grad_norm": 0.9470018744468689, "learning_rate": 2.9633853541416567e-05, "loss": 0.5233, "step": 10191 }, { "epoch": 13.04576, "grad_norm": 0.991151750087738, "learning_rate": 2.963185274109644e-05, "loss": 0.5534, "step": 10192 }, { "epoch": 13.04704, "grad_norm": 0.9508792161941528, "learning_rate": 2.962985194077631e-05, "loss": 0.4863, "step": 10193 }, { "epoch": 13.04832, "grad_norm": 0.9612112045288086, "learning_rate": 2.9627851140456186e-05, "loss": 0.5613, "step": 10194 }, { "epoch": 13.0496, "grad_norm": 0.9475077986717224, "learning_rate": 2.9625850340136058e-05, "loss": 0.5695, "step": 10195 }, { "epoch": 13.05088, "grad_norm": 0.9162499308586121, "learning_rate": 2.962384953981593e-05, "loss": 0.5628, "step": 10196 }, { "epoch": 13.05216, "grad_norm": 0.9611510038375854, "learning_rate": 2.96218487394958e-05, "loss": 0.5155, "step": 10197 }, { "epoch": 13.05344, "grad_norm": 0.9086456298828125, "learning_rate": 2.961984793917567e-05, "loss": 0.5197, "step": 10198 }, { "epoch": 13.05472, "grad_norm": 0.9529975652694702, "learning_rate": 2.9617847138855542e-05, "loss": 0.5468, "step": 10199 }, { "epoch": 13.056, "grad_norm": 0.9286540746688843, "learning_rate": 2.9615846338535414e-05, "loss": 0.5342, "step": 10200 }, { "epoch": 13.05728, "grad_norm": 0.9417791366577148, "learning_rate": 2.961384553821529e-05, "loss": 0.5515, "step": 10201 }, { "epoch": 13.05856, "grad_norm": 0.9122342467308044, "learning_rate": 2.961184473789516e-05, "loss": 0.4782, "step": 10202 }, { "epoch": 13.05984, "grad_norm": 0.9562501311302185, "learning_rate": 2.9609843937575033e-05, "loss": 0.5577, "step": 10203 }, { "epoch": 13.06112, "grad_norm": 0.9384812116622925, "learning_rate": 2.9607843137254905e-05, "loss": 0.5087, "step": 10204 }, { "epoch": 13.0624, "grad_norm": 0.9272902607917786, "learning_rate": 2.9605842336934773e-05, "loss": 0.4929, "step": 10205 }, { "epoch": 13.06368, "grad_norm": 0.9776669144630432, "learning_rate": 2.9603841536614645e-05, "loss": 0.5292, "step": 10206 }, { "epoch": 13.06496, "grad_norm": 0.9316274523735046, "learning_rate": 2.9601840736294517e-05, "loss": 0.5501, "step": 10207 }, { "epoch": 13.06624, "grad_norm": 0.9690887928009033, "learning_rate": 2.9599839935974392e-05, "loss": 0.5342, "step": 10208 }, { "epoch": 13.06752, "grad_norm": 0.8935588002204895, "learning_rate": 2.9597839135654264e-05, "loss": 0.5046, "step": 10209 }, { "epoch": 13.0688, "grad_norm": 0.9328128099441528, "learning_rate": 2.9595838335334136e-05, "loss": 0.5601, "step": 10210 }, { "epoch": 13.07008, "grad_norm": 0.9481015801429749, "learning_rate": 2.9593837535014008e-05, "loss": 0.5336, "step": 10211 }, { "epoch": 13.07136, "grad_norm": 0.9740045070648193, "learning_rate": 2.959183673469388e-05, "loss": 0.5383, "step": 10212 }, { "epoch": 13.07264, "grad_norm": 0.9591369032859802, "learning_rate": 2.958983593437375e-05, "loss": 0.5688, "step": 10213 }, { "epoch": 13.07392, "grad_norm": 0.8902487754821777, "learning_rate": 2.958783513405362e-05, "loss": 0.5243, "step": 10214 }, { "epoch": 13.0752, "grad_norm": 0.8981714844703674, "learning_rate": 2.95858343337335e-05, "loss": 0.504, "step": 10215 }, { "epoch": 13.07648, "grad_norm": 0.857538640499115, "learning_rate": 2.9583833533413367e-05, "loss": 0.4907, "step": 10216 }, { "epoch": 13.07776, "grad_norm": 0.9131780862808228, "learning_rate": 2.958183273309324e-05, "loss": 0.5018, "step": 10217 }, { "epoch": 13.079039999999999, "grad_norm": 0.9661425352096558, "learning_rate": 2.957983193277311e-05, "loss": 0.5639, "step": 10218 }, { "epoch": 13.08032, "grad_norm": 0.9604724645614624, "learning_rate": 2.9577831132452983e-05, "loss": 0.5433, "step": 10219 }, { "epoch": 13.0816, "grad_norm": 0.9069200158119202, "learning_rate": 2.9575830332132855e-05, "loss": 0.5409, "step": 10220 }, { "epoch": 13.08288, "grad_norm": 0.9007838368415833, "learning_rate": 2.9573829531812723e-05, "loss": 0.5377, "step": 10221 }, { "epoch": 13.08416, "grad_norm": 0.9173440337181091, "learning_rate": 2.9571828731492595e-05, "loss": 0.5275, "step": 10222 }, { "epoch": 13.08544, "grad_norm": 0.936603844165802, "learning_rate": 2.9569827931172474e-05, "loss": 0.5257, "step": 10223 }, { "epoch": 13.08672, "grad_norm": 0.9958267211914062, "learning_rate": 2.9567827130852342e-05, "loss": 0.5632, "step": 10224 }, { "epoch": 13.088, "grad_norm": 0.9502796530723572, "learning_rate": 2.9565826330532214e-05, "loss": 0.5396, "step": 10225 }, { "epoch": 13.08928, "grad_norm": 0.8708899617195129, "learning_rate": 2.9563825530212086e-05, "loss": 0.4855, "step": 10226 }, { "epoch": 13.09056, "grad_norm": 0.9281712770462036, "learning_rate": 2.9561824729891958e-05, "loss": 0.4983, "step": 10227 }, { "epoch": 13.09184, "grad_norm": 0.943922221660614, "learning_rate": 2.955982392957183e-05, "loss": 0.5545, "step": 10228 }, { "epoch": 13.09312, "grad_norm": 0.9381184577941895, "learning_rate": 2.9557823129251698e-05, "loss": 0.5406, "step": 10229 }, { "epoch": 13.0944, "grad_norm": 0.9126680493354797, "learning_rate": 2.9555822328931577e-05, "loss": 0.5364, "step": 10230 }, { "epoch": 13.09568, "grad_norm": 0.9556214809417725, "learning_rate": 2.955382152861145e-05, "loss": 0.5298, "step": 10231 }, { "epoch": 13.09696, "grad_norm": 0.9941635727882385, "learning_rate": 2.9551820728291317e-05, "loss": 0.5511, "step": 10232 }, { "epoch": 13.09824, "grad_norm": 0.9520924687385559, "learning_rate": 2.954981992797119e-05, "loss": 0.5176, "step": 10233 }, { "epoch": 13.09952, "grad_norm": 1.0025166273117065, "learning_rate": 2.954781912765106e-05, "loss": 0.5434, "step": 10234 }, { "epoch": 13.1008, "grad_norm": 0.9303215146064758, "learning_rate": 2.9545818327330933e-05, "loss": 0.5081, "step": 10235 }, { "epoch": 13.10208, "grad_norm": 0.9125444293022156, "learning_rate": 2.9543817527010804e-05, "loss": 0.5134, "step": 10236 }, { "epoch": 13.10336, "grad_norm": 0.9002273678779602, "learning_rate": 2.954181672669068e-05, "loss": 0.4885, "step": 10237 }, { "epoch": 13.10464, "grad_norm": 0.9505068063735962, "learning_rate": 2.953981592637055e-05, "loss": 0.5645, "step": 10238 }, { "epoch": 13.10592, "grad_norm": 0.9050679206848145, "learning_rate": 2.9537815126050423e-05, "loss": 0.5322, "step": 10239 }, { "epoch": 13.1072, "grad_norm": 0.9206121563911438, "learning_rate": 2.9535814325730292e-05, "loss": 0.5077, "step": 10240 }, { "epoch": 13.10848, "grad_norm": 0.9037232398986816, "learning_rate": 2.9533813525410164e-05, "loss": 0.5202, "step": 10241 }, { "epoch": 13.10976, "grad_norm": 0.9261408448219299, "learning_rate": 2.9531812725090036e-05, "loss": 0.5514, "step": 10242 }, { "epoch": 13.11104, "grad_norm": 0.9304765462875366, "learning_rate": 2.9529811924769907e-05, "loss": 0.5671, "step": 10243 }, { "epoch": 13.11232, "grad_norm": 0.8699920773506165, "learning_rate": 2.9527811124449783e-05, "loss": 0.5076, "step": 10244 }, { "epoch": 13.1136, "grad_norm": 0.9810715913772583, "learning_rate": 2.9525810324129655e-05, "loss": 0.6187, "step": 10245 }, { "epoch": 13.11488, "grad_norm": 0.9661498665809631, "learning_rate": 2.9523809523809526e-05, "loss": 0.575, "step": 10246 }, { "epoch": 13.11616, "grad_norm": 0.9530900120735168, "learning_rate": 2.9521808723489398e-05, "loss": 0.5207, "step": 10247 }, { "epoch": 13.11744, "grad_norm": 0.8980867266654968, "learning_rate": 2.9519807923169267e-05, "loss": 0.48, "step": 10248 }, { "epoch": 13.11872, "grad_norm": 1.0098669528961182, "learning_rate": 2.951780712284914e-05, "loss": 0.5863, "step": 10249 }, { "epoch": 13.12, "grad_norm": 0.8966968655586243, "learning_rate": 2.951580632252901e-05, "loss": 0.5161, "step": 10250 }, { "epoch": 13.12128, "grad_norm": 0.8702352046966553, "learning_rate": 2.9513805522208886e-05, "loss": 0.4706, "step": 10251 }, { "epoch": 13.12256, "grad_norm": 0.923116147518158, "learning_rate": 2.9511804721888758e-05, "loss": 0.5097, "step": 10252 }, { "epoch": 13.12384, "grad_norm": 0.9864121079444885, "learning_rate": 2.950980392156863e-05, "loss": 0.5453, "step": 10253 }, { "epoch": 13.12512, "grad_norm": 0.9209245443344116, "learning_rate": 2.95078031212485e-05, "loss": 0.5178, "step": 10254 }, { "epoch": 13.1264, "grad_norm": 0.9390770792961121, "learning_rate": 2.9505802320928373e-05, "loss": 0.5423, "step": 10255 }, { "epoch": 13.12768, "grad_norm": 0.9811953902244568, "learning_rate": 2.950380152060824e-05, "loss": 0.5938, "step": 10256 }, { "epoch": 13.12896, "grad_norm": 0.9948422908782959, "learning_rate": 2.9501800720288113e-05, "loss": 0.6015, "step": 10257 }, { "epoch": 13.13024, "grad_norm": 0.9334530830383301, "learning_rate": 2.9499799919967992e-05, "loss": 0.5042, "step": 10258 }, { "epoch": 13.13152, "grad_norm": 0.887524425983429, "learning_rate": 2.949779911964786e-05, "loss": 0.5115, "step": 10259 }, { "epoch": 13.1328, "grad_norm": 0.9282869696617126, "learning_rate": 2.9495798319327732e-05, "loss": 0.5348, "step": 10260 }, { "epoch": 13.13408, "grad_norm": 0.8602845072746277, "learning_rate": 2.9493797519007604e-05, "loss": 0.477, "step": 10261 }, { "epoch": 13.13536, "grad_norm": 0.9811587333679199, "learning_rate": 2.9491796718687476e-05, "loss": 0.6148, "step": 10262 }, { "epoch": 13.13664, "grad_norm": 0.938101589679718, "learning_rate": 2.9489795918367348e-05, "loss": 0.5469, "step": 10263 }, { "epoch": 13.13792, "grad_norm": 0.938423216342926, "learning_rate": 2.9487795118047216e-05, "loss": 0.5415, "step": 10264 }, { "epoch": 13.1392, "grad_norm": 0.9609931111335754, "learning_rate": 2.9485794317727095e-05, "loss": 0.5779, "step": 10265 }, { "epoch": 13.14048, "grad_norm": 0.9402124881744385, "learning_rate": 2.9483793517406967e-05, "loss": 0.5688, "step": 10266 }, { "epoch": 13.14176, "grad_norm": 0.9588215351104736, "learning_rate": 2.9481792717086835e-05, "loss": 0.5559, "step": 10267 }, { "epoch": 13.14304, "grad_norm": 0.9343993663787842, "learning_rate": 2.9479791916766707e-05, "loss": 0.5233, "step": 10268 }, { "epoch": 13.14432, "grad_norm": 0.8951725363731384, "learning_rate": 2.947779111644658e-05, "loss": 0.4691, "step": 10269 }, { "epoch": 13.1456, "grad_norm": 0.9678958654403687, "learning_rate": 2.947579031612645e-05, "loss": 0.5648, "step": 10270 }, { "epoch": 13.14688, "grad_norm": 0.973098874092102, "learning_rate": 2.9473789515806323e-05, "loss": 0.527, "step": 10271 }, { "epoch": 13.14816, "grad_norm": 0.9279984831809998, "learning_rate": 2.9471788715486198e-05, "loss": 0.4758, "step": 10272 }, { "epoch": 13.14944, "grad_norm": 0.9784411787986755, "learning_rate": 2.946978791516607e-05, "loss": 0.5491, "step": 10273 }, { "epoch": 13.15072, "grad_norm": 0.9150949120521545, "learning_rate": 2.9467787114845942e-05, "loss": 0.5418, "step": 10274 }, { "epoch": 13.152, "grad_norm": 0.8972795605659485, "learning_rate": 2.946578631452581e-05, "loss": 0.5065, "step": 10275 }, { "epoch": 13.15328, "grad_norm": 0.8859349489212036, "learning_rate": 2.9463785514205682e-05, "loss": 0.5211, "step": 10276 }, { "epoch": 13.15456, "grad_norm": 0.9786564707756042, "learning_rate": 2.9461784713885554e-05, "loss": 0.5468, "step": 10277 }, { "epoch": 13.15584, "grad_norm": 0.9281904101371765, "learning_rate": 2.9459783913565426e-05, "loss": 0.5303, "step": 10278 }, { "epoch": 13.15712, "grad_norm": 0.8598402142524719, "learning_rate": 2.94577831132453e-05, "loss": 0.4675, "step": 10279 }, { "epoch": 13.1584, "grad_norm": 0.881607174873352, "learning_rate": 2.9455782312925173e-05, "loss": 0.4969, "step": 10280 }, { "epoch": 13.15968, "grad_norm": 0.8797462582588196, "learning_rate": 2.9453781512605045e-05, "loss": 0.5216, "step": 10281 }, { "epoch": 13.16096, "grad_norm": 0.9314226508140564, "learning_rate": 2.9451780712284917e-05, "loss": 0.5117, "step": 10282 }, { "epoch": 13.16224, "grad_norm": 0.9078558683395386, "learning_rate": 2.9449779911964785e-05, "loss": 0.5344, "step": 10283 }, { "epoch": 13.16352, "grad_norm": 0.972720742225647, "learning_rate": 2.9447779111644657e-05, "loss": 0.5651, "step": 10284 }, { "epoch": 13.1648, "grad_norm": 0.8826903104782104, "learning_rate": 2.944577831132453e-05, "loss": 0.4932, "step": 10285 }, { "epoch": 13.166080000000001, "grad_norm": 0.9253196120262146, "learning_rate": 2.9443777511004404e-05, "loss": 0.5725, "step": 10286 }, { "epoch": 13.16736, "grad_norm": 0.9346468448638916, "learning_rate": 2.9441776710684276e-05, "loss": 0.5665, "step": 10287 }, { "epoch": 13.16864, "grad_norm": 0.86590176820755, "learning_rate": 2.9439775910364148e-05, "loss": 0.4922, "step": 10288 }, { "epoch": 13.16992, "grad_norm": 0.8929559588432312, "learning_rate": 2.943777511004402e-05, "loss": 0.5051, "step": 10289 }, { "epoch": 13.1712, "grad_norm": 0.9785338044166565, "learning_rate": 2.943577430972389e-05, "loss": 0.565, "step": 10290 }, { "epoch": 13.17248, "grad_norm": 0.9786810278892517, "learning_rate": 2.943377350940376e-05, "loss": 0.5285, "step": 10291 }, { "epoch": 13.17376, "grad_norm": 1.0224378108978271, "learning_rate": 2.9431772709083632e-05, "loss": 0.6223, "step": 10292 }, { "epoch": 13.17504, "grad_norm": 0.8732693195343018, "learning_rate": 2.942977190876351e-05, "loss": 0.4957, "step": 10293 }, { "epoch": 13.17632, "grad_norm": 0.8983555436134338, "learning_rate": 2.942777110844338e-05, "loss": 0.5085, "step": 10294 }, { "epoch": 13.1776, "grad_norm": 0.9694886803627014, "learning_rate": 2.942577030812325e-05, "loss": 0.534, "step": 10295 }, { "epoch": 13.17888, "grad_norm": 0.9326276183128357, "learning_rate": 2.9423769507803123e-05, "loss": 0.5244, "step": 10296 }, { "epoch": 13.18016, "grad_norm": 0.9629067778587341, "learning_rate": 2.9421768707482994e-05, "loss": 0.5082, "step": 10297 }, { "epoch": 13.18144, "grad_norm": 0.8940865397453308, "learning_rate": 2.9419767907162866e-05, "loss": 0.4947, "step": 10298 }, { "epoch": 13.18272, "grad_norm": 0.9360054135322571, "learning_rate": 2.9417767106842735e-05, "loss": 0.5556, "step": 10299 }, { "epoch": 13.184, "grad_norm": 0.8930971026420593, "learning_rate": 2.9415766306522613e-05, "loss": 0.4866, "step": 10300 }, { "epoch": 13.18528, "grad_norm": 0.9753515720367432, "learning_rate": 2.9413765506202485e-05, "loss": 0.4929, "step": 10301 }, { "epoch": 13.18656, "grad_norm": 0.8833822011947632, "learning_rate": 2.9411764705882354e-05, "loss": 0.5081, "step": 10302 }, { "epoch": 13.18784, "grad_norm": 0.9951672554016113, "learning_rate": 2.9409763905562226e-05, "loss": 0.5509, "step": 10303 }, { "epoch": 13.18912, "grad_norm": 0.9447710514068604, "learning_rate": 2.9407763105242097e-05, "loss": 0.5132, "step": 10304 }, { "epoch": 13.1904, "grad_norm": 0.960518479347229, "learning_rate": 2.940576230492197e-05, "loss": 0.5609, "step": 10305 }, { "epoch": 13.19168, "grad_norm": 0.9514713883399963, "learning_rate": 2.940376150460184e-05, "loss": 0.5378, "step": 10306 }, { "epoch": 13.19296, "grad_norm": 0.9678971767425537, "learning_rate": 2.9401760704281716e-05, "loss": 0.5464, "step": 10307 }, { "epoch": 13.19424, "grad_norm": 0.9750454425811768, "learning_rate": 2.9399759903961588e-05, "loss": 0.5489, "step": 10308 }, { "epoch": 13.19552, "grad_norm": 0.9416602849960327, "learning_rate": 2.939775910364146e-05, "loss": 0.5295, "step": 10309 }, { "epoch": 13.1968, "grad_norm": 0.9217522144317627, "learning_rate": 2.939575830332133e-05, "loss": 0.562, "step": 10310 }, { "epoch": 13.19808, "grad_norm": 0.8989900946617126, "learning_rate": 2.93937575030012e-05, "loss": 0.5067, "step": 10311 }, { "epoch": 13.19936, "grad_norm": 0.9681139588356018, "learning_rate": 2.9391756702681072e-05, "loss": 0.5817, "step": 10312 }, { "epoch": 13.20064, "grad_norm": 0.9325006604194641, "learning_rate": 2.9389755902360944e-05, "loss": 0.5379, "step": 10313 }, { "epoch": 13.20192, "grad_norm": 0.9002286195755005, "learning_rate": 2.938775510204082e-05, "loss": 0.5274, "step": 10314 }, { "epoch": 13.2032, "grad_norm": 0.9191522598266602, "learning_rate": 2.938575430172069e-05, "loss": 0.5265, "step": 10315 }, { "epoch": 13.20448, "grad_norm": 0.9868195056915283, "learning_rate": 2.9383753501400563e-05, "loss": 0.5672, "step": 10316 }, { "epoch": 13.20576, "grad_norm": 0.935142457485199, "learning_rate": 2.9381752701080435e-05, "loss": 0.559, "step": 10317 }, { "epoch": 13.20704, "grad_norm": 0.9310742616653442, "learning_rate": 2.9379751900760303e-05, "loss": 0.5183, "step": 10318 }, { "epoch": 13.20832, "grad_norm": 0.893144428730011, "learning_rate": 2.9377751100440175e-05, "loss": 0.5092, "step": 10319 }, { "epoch": 13.2096, "grad_norm": 0.9234650135040283, "learning_rate": 2.9375750300120047e-05, "loss": 0.5185, "step": 10320 }, { "epoch": 13.21088, "grad_norm": 0.868157684803009, "learning_rate": 2.9373749499799922e-05, "loss": 0.4812, "step": 10321 }, { "epoch": 13.21216, "grad_norm": 0.900688886642456, "learning_rate": 2.9371748699479794e-05, "loss": 0.5181, "step": 10322 }, { "epoch": 13.21344, "grad_norm": 0.9493388533592224, "learning_rate": 2.9369747899159666e-05, "loss": 0.5613, "step": 10323 }, { "epoch": 13.21472, "grad_norm": 0.9593266844749451, "learning_rate": 2.9367747098839538e-05, "loss": 0.5462, "step": 10324 }, { "epoch": 13.216, "grad_norm": 1.0145825147628784, "learning_rate": 2.936574629851941e-05, "loss": 0.5891, "step": 10325 }, { "epoch": 13.21728, "grad_norm": 0.9658340811729431, "learning_rate": 2.936374549819928e-05, "loss": 0.533, "step": 10326 }, { "epoch": 13.21856, "grad_norm": 0.8957772850990295, "learning_rate": 2.936174469787915e-05, "loss": 0.5246, "step": 10327 }, { "epoch": 13.21984, "grad_norm": 0.9555619955062866, "learning_rate": 2.935974389755903e-05, "loss": 0.5181, "step": 10328 }, { "epoch": 13.22112, "grad_norm": 0.9531823396682739, "learning_rate": 2.9357743097238897e-05, "loss": 0.5459, "step": 10329 }, { "epoch": 13.2224, "grad_norm": 0.8920832276344299, "learning_rate": 2.935574229691877e-05, "loss": 0.5796, "step": 10330 }, { "epoch": 13.22368, "grad_norm": 0.9434327483177185, "learning_rate": 2.935374149659864e-05, "loss": 0.5694, "step": 10331 }, { "epoch": 13.22496, "grad_norm": 0.9526799321174622, "learning_rate": 2.9351740696278513e-05, "loss": 0.5646, "step": 10332 }, { "epoch": 13.22624, "grad_norm": 1.0042227506637573, "learning_rate": 2.9349739895958385e-05, "loss": 0.5437, "step": 10333 }, { "epoch": 13.22752, "grad_norm": 0.974702775478363, "learning_rate": 2.9347739095638253e-05, "loss": 0.5569, "step": 10334 }, { "epoch": 13.2288, "grad_norm": 0.9160645604133606, "learning_rate": 2.9345738295318125e-05, "loss": 0.5174, "step": 10335 }, { "epoch": 13.23008, "grad_norm": 0.9802337884902954, "learning_rate": 2.9343737494998004e-05, "loss": 0.578, "step": 10336 }, { "epoch": 13.23136, "grad_norm": 0.9759039878845215, "learning_rate": 2.9341736694677872e-05, "loss": 0.5744, "step": 10337 }, { "epoch": 13.23264, "grad_norm": 0.9348410367965698, "learning_rate": 2.9339735894357744e-05, "loss": 0.5304, "step": 10338 }, { "epoch": 13.23392, "grad_norm": 0.9038177728652954, "learning_rate": 2.9337735094037616e-05, "loss": 0.5301, "step": 10339 }, { "epoch": 13.2352, "grad_norm": 0.880155086517334, "learning_rate": 2.9335734293717488e-05, "loss": 0.496, "step": 10340 }, { "epoch": 13.23648, "grad_norm": 0.9181184768676758, "learning_rate": 2.933373349339736e-05, "loss": 0.4825, "step": 10341 }, { "epoch": 13.23776, "grad_norm": 0.9265780448913574, "learning_rate": 2.9331732693077228e-05, "loss": 0.5313, "step": 10342 }, { "epoch": 13.23904, "grad_norm": 0.9894605875015259, "learning_rate": 2.9329731892757107e-05, "loss": 0.5617, "step": 10343 }, { "epoch": 13.24032, "grad_norm": 0.9149695634841919, "learning_rate": 2.932773109243698e-05, "loss": 0.5262, "step": 10344 }, { "epoch": 13.2416, "grad_norm": 0.9303181171417236, "learning_rate": 2.9325730292116847e-05, "loss": 0.542, "step": 10345 }, { "epoch": 13.24288, "grad_norm": 0.958263099193573, "learning_rate": 2.932372949179672e-05, "loss": 0.5283, "step": 10346 }, { "epoch": 13.24416, "grad_norm": 0.9637152552604675, "learning_rate": 2.932172869147659e-05, "loss": 0.5236, "step": 10347 }, { "epoch": 13.24544, "grad_norm": 0.9949811100959778, "learning_rate": 2.9319727891156463e-05, "loss": 0.5897, "step": 10348 }, { "epoch": 13.24672, "grad_norm": 0.988006055355072, "learning_rate": 2.9317727090836334e-05, "loss": 0.4802, "step": 10349 }, { "epoch": 13.248, "grad_norm": 0.9795376062393188, "learning_rate": 2.931572629051621e-05, "loss": 0.5514, "step": 10350 }, { "epoch": 13.24928, "grad_norm": 0.9923257827758789, "learning_rate": 2.931372549019608e-05, "loss": 0.5244, "step": 10351 }, { "epoch": 13.25056, "grad_norm": 0.9084767699241638, "learning_rate": 2.9311724689875953e-05, "loss": 0.5395, "step": 10352 }, { "epoch": 13.25184, "grad_norm": 0.9532386660575867, "learning_rate": 2.9309723889555822e-05, "loss": 0.5635, "step": 10353 }, { "epoch": 13.25312, "grad_norm": 0.9513797163963318, "learning_rate": 2.9307723089235694e-05, "loss": 0.5299, "step": 10354 }, { "epoch": 13.2544, "grad_norm": 0.9385073184967041, "learning_rate": 2.9305722288915566e-05, "loss": 0.5027, "step": 10355 }, { "epoch": 13.25568, "grad_norm": 0.9745035767555237, "learning_rate": 2.9303721488595437e-05, "loss": 0.4923, "step": 10356 }, { "epoch": 13.25696, "grad_norm": 0.9193017482757568, "learning_rate": 2.9301720688275313e-05, "loss": 0.5418, "step": 10357 }, { "epoch": 13.25824, "grad_norm": 0.9343931674957275, "learning_rate": 2.9299719887955185e-05, "loss": 0.5197, "step": 10358 }, { "epoch": 13.25952, "grad_norm": 0.954754114151001, "learning_rate": 2.9297719087635056e-05, "loss": 0.5203, "step": 10359 }, { "epoch": 13.2608, "grad_norm": 0.9113470911979675, "learning_rate": 2.9295718287314928e-05, "loss": 0.525, "step": 10360 }, { "epoch": 13.26208, "grad_norm": 0.94349205493927, "learning_rate": 2.9293717486994797e-05, "loss": 0.5263, "step": 10361 }, { "epoch": 13.26336, "grad_norm": 0.980129063129425, "learning_rate": 2.929171668667467e-05, "loss": 0.5881, "step": 10362 }, { "epoch": 13.26464, "grad_norm": 0.9003955125808716, "learning_rate": 2.928971588635454e-05, "loss": 0.5014, "step": 10363 }, { "epoch": 13.26592, "grad_norm": 0.9492081999778748, "learning_rate": 2.928771508603442e-05, "loss": 0.5945, "step": 10364 }, { "epoch": 13.2672, "grad_norm": 0.9139607548713684, "learning_rate": 2.9285714285714288e-05, "loss": 0.4997, "step": 10365 }, { "epoch": 13.26848, "grad_norm": 0.9153253436088562, "learning_rate": 2.928371348539416e-05, "loss": 0.5104, "step": 10366 }, { "epoch": 13.26976, "grad_norm": 0.944437563419342, "learning_rate": 2.928171268507403e-05, "loss": 0.5496, "step": 10367 }, { "epoch": 13.27104, "grad_norm": 0.9628861546516418, "learning_rate": 2.9279711884753903e-05, "loss": 0.5243, "step": 10368 }, { "epoch": 13.27232, "grad_norm": 0.9358019232749939, "learning_rate": 2.927771108443377e-05, "loss": 0.5218, "step": 10369 }, { "epoch": 13.2736, "grad_norm": 0.9231796860694885, "learning_rate": 2.9275710284113643e-05, "loss": 0.5187, "step": 10370 }, { "epoch": 13.27488, "grad_norm": 0.9316679239273071, "learning_rate": 2.9273709483793522e-05, "loss": 0.5469, "step": 10371 }, { "epoch": 13.27616, "grad_norm": 0.8943392634391785, "learning_rate": 2.9271708683473394e-05, "loss": 0.4963, "step": 10372 }, { "epoch": 13.27744, "grad_norm": 0.9293007850646973, "learning_rate": 2.9269707883153262e-05, "loss": 0.5534, "step": 10373 }, { "epoch": 13.27872, "grad_norm": 0.9566161632537842, "learning_rate": 2.9267707082833134e-05, "loss": 0.6032, "step": 10374 }, { "epoch": 13.28, "grad_norm": 0.945538341999054, "learning_rate": 2.9265706282513006e-05, "loss": 0.525, "step": 10375 }, { "epoch": 13.28128, "grad_norm": 0.9262788891792297, "learning_rate": 2.9263705482192878e-05, "loss": 0.5087, "step": 10376 }, { "epoch": 13.28256, "grad_norm": 0.9590131044387817, "learning_rate": 2.9261704681872746e-05, "loss": 0.4981, "step": 10377 }, { "epoch": 13.28384, "grad_norm": 0.9247324466705322, "learning_rate": 2.9259703881552625e-05, "loss": 0.5045, "step": 10378 }, { "epoch": 13.28512, "grad_norm": 0.9148200154304504, "learning_rate": 2.9257703081232497e-05, "loss": 0.4692, "step": 10379 }, { "epoch": 13.2864, "grad_norm": 0.9700202941894531, "learning_rate": 2.925570228091237e-05, "loss": 0.5181, "step": 10380 }, { "epoch": 13.28768, "grad_norm": 0.9987017512321472, "learning_rate": 2.9253701480592237e-05, "loss": 0.5395, "step": 10381 }, { "epoch": 13.28896, "grad_norm": 1.037541151046753, "learning_rate": 2.925170068027211e-05, "loss": 0.5464, "step": 10382 }, { "epoch": 13.29024, "grad_norm": 0.979844868183136, "learning_rate": 2.924969987995198e-05, "loss": 0.5191, "step": 10383 }, { "epoch": 13.29152, "grad_norm": 0.9726676344871521, "learning_rate": 2.9247699079631853e-05, "loss": 0.5464, "step": 10384 }, { "epoch": 13.2928, "grad_norm": 0.9922785758972168, "learning_rate": 2.9245698279311728e-05, "loss": 0.5719, "step": 10385 }, { "epoch": 13.29408, "grad_norm": 0.9066249132156372, "learning_rate": 2.92436974789916e-05, "loss": 0.5037, "step": 10386 }, { "epoch": 13.29536, "grad_norm": 0.9383981823921204, "learning_rate": 2.9241696678671472e-05, "loss": 0.5398, "step": 10387 }, { "epoch": 13.29664, "grad_norm": 0.9474811553955078, "learning_rate": 2.9239695878351344e-05, "loss": 0.5275, "step": 10388 }, { "epoch": 13.29792, "grad_norm": 0.9555049538612366, "learning_rate": 2.9237695078031212e-05, "loss": 0.5737, "step": 10389 }, { "epoch": 13.2992, "grad_norm": 0.9900108575820923, "learning_rate": 2.9235694277711084e-05, "loss": 0.5522, "step": 10390 }, { "epoch": 13.30048, "grad_norm": 0.9271486401557922, "learning_rate": 2.9233693477390956e-05, "loss": 0.5284, "step": 10391 }, { "epoch": 13.30176, "grad_norm": 0.9608096480369568, "learning_rate": 2.923169267707083e-05, "loss": 0.5538, "step": 10392 }, { "epoch": 13.30304, "grad_norm": 0.9278596639633179, "learning_rate": 2.9229691876750703e-05, "loss": 0.5174, "step": 10393 }, { "epoch": 13.30432, "grad_norm": 0.9708346128463745, "learning_rate": 2.9227691076430575e-05, "loss": 0.5499, "step": 10394 }, { "epoch": 13.3056, "grad_norm": 0.9422946572303772, "learning_rate": 2.9225690276110447e-05, "loss": 0.5444, "step": 10395 }, { "epoch": 13.30688, "grad_norm": 0.9658584594726562, "learning_rate": 2.922368947579032e-05, "loss": 0.4903, "step": 10396 }, { "epoch": 13.30816, "grad_norm": 0.978703498840332, "learning_rate": 2.9221688675470187e-05, "loss": 0.5569, "step": 10397 }, { "epoch": 13.30944, "grad_norm": 0.9500980973243713, "learning_rate": 2.921968787515006e-05, "loss": 0.5384, "step": 10398 }, { "epoch": 13.31072, "grad_norm": 0.9386541247367859, "learning_rate": 2.9217687074829937e-05, "loss": 0.5474, "step": 10399 }, { "epoch": 13.312, "grad_norm": 0.9124325513839722, "learning_rate": 2.9215686274509806e-05, "loss": 0.5232, "step": 10400 }, { "epoch": 13.31328, "grad_norm": 0.9420004487037659, "learning_rate": 2.9213685474189678e-05, "loss": 0.5154, "step": 10401 }, { "epoch": 13.31456, "grad_norm": 1.029594898223877, "learning_rate": 2.921168467386955e-05, "loss": 0.6302, "step": 10402 }, { "epoch": 13.31584, "grad_norm": 0.9675827622413635, "learning_rate": 2.920968387354942e-05, "loss": 0.5515, "step": 10403 }, { "epoch": 13.31712, "grad_norm": 0.9070100784301758, "learning_rate": 2.9207683073229293e-05, "loss": 0.5061, "step": 10404 }, { "epoch": 13.3184, "grad_norm": 0.9273656606674194, "learning_rate": 2.9205682272909162e-05, "loss": 0.5412, "step": 10405 }, { "epoch": 13.31968, "grad_norm": 0.9532672762870789, "learning_rate": 2.920368147258904e-05, "loss": 0.5259, "step": 10406 }, { "epoch": 13.32096, "grad_norm": 0.9627009630203247, "learning_rate": 2.9201680672268912e-05, "loss": 0.5173, "step": 10407 }, { "epoch": 13.32224, "grad_norm": 0.9319729208946228, "learning_rate": 2.919967987194878e-05, "loss": 0.5869, "step": 10408 }, { "epoch": 13.32352, "grad_norm": 0.873471200466156, "learning_rate": 2.9197679071628653e-05, "loss": 0.4851, "step": 10409 }, { "epoch": 13.3248, "grad_norm": 0.9522954821586609, "learning_rate": 2.9195678271308524e-05, "loss": 0.5164, "step": 10410 }, { "epoch": 13.32608, "grad_norm": 0.961445689201355, "learning_rate": 2.9193677470988396e-05, "loss": 0.5083, "step": 10411 }, { "epoch": 13.32736, "grad_norm": 0.9519370794296265, "learning_rate": 2.9191676670668268e-05, "loss": 0.5352, "step": 10412 }, { "epoch": 13.32864, "grad_norm": 0.9867494702339172, "learning_rate": 2.9189675870348143e-05, "loss": 0.5495, "step": 10413 }, { "epoch": 13.32992, "grad_norm": 0.8867868185043335, "learning_rate": 2.9187675070028015e-05, "loss": 0.5229, "step": 10414 }, { "epoch": 13.3312, "grad_norm": 0.9686993360519409, "learning_rate": 2.9185674269707887e-05, "loss": 0.5375, "step": 10415 }, { "epoch": 13.33248, "grad_norm": 0.9888986945152283, "learning_rate": 2.9183673469387756e-05, "loss": 0.6055, "step": 10416 }, { "epoch": 13.33376, "grad_norm": 0.9201542735099792, "learning_rate": 2.9181672669067627e-05, "loss": 0.5122, "step": 10417 }, { "epoch": 13.33504, "grad_norm": 0.9173356294631958, "learning_rate": 2.91796718687475e-05, "loss": 0.5429, "step": 10418 }, { "epoch": 13.33632, "grad_norm": 0.9418560862541199, "learning_rate": 2.917767106842737e-05, "loss": 0.5358, "step": 10419 }, { "epoch": 13.3376, "grad_norm": 0.9006747603416443, "learning_rate": 2.9175670268107246e-05, "loss": 0.4745, "step": 10420 }, { "epoch": 13.33888, "grad_norm": 0.9309629201889038, "learning_rate": 2.9173669467787118e-05, "loss": 0.5002, "step": 10421 }, { "epoch": 13.340160000000001, "grad_norm": 0.9739012718200684, "learning_rate": 2.917166866746699e-05, "loss": 0.5861, "step": 10422 }, { "epoch": 13.34144, "grad_norm": 0.9974890351295471, "learning_rate": 2.9169667867146862e-05, "loss": 0.5475, "step": 10423 }, { "epoch": 13.34272, "grad_norm": 0.9011924862861633, "learning_rate": 2.916766706682673e-05, "loss": 0.5111, "step": 10424 }, { "epoch": 13.344, "grad_norm": 1.0001959800720215, "learning_rate": 2.9165666266506602e-05, "loss": 0.5563, "step": 10425 }, { "epoch": 13.34528, "grad_norm": 1.0525249242782593, "learning_rate": 2.9163665466186474e-05, "loss": 0.5812, "step": 10426 }, { "epoch": 13.34656, "grad_norm": 0.9634531140327454, "learning_rate": 2.916166466586635e-05, "loss": 0.5414, "step": 10427 }, { "epoch": 13.34784, "grad_norm": 0.9581708908081055, "learning_rate": 2.915966386554622e-05, "loss": 0.5498, "step": 10428 }, { "epoch": 13.34912, "grad_norm": 1.0304726362228394, "learning_rate": 2.9157663065226093e-05, "loss": 0.543, "step": 10429 }, { "epoch": 13.3504, "grad_norm": 0.9573806524276733, "learning_rate": 2.9155662264905965e-05, "loss": 0.5478, "step": 10430 }, { "epoch": 13.35168, "grad_norm": 0.8998541831970215, "learning_rate": 2.9153661464585837e-05, "loss": 0.5521, "step": 10431 }, { "epoch": 13.35296, "grad_norm": 0.9216572046279907, "learning_rate": 2.9151660664265705e-05, "loss": 0.5198, "step": 10432 }, { "epoch": 13.35424, "grad_norm": 0.9832442402839661, "learning_rate": 2.9149659863945577e-05, "loss": 0.5542, "step": 10433 }, { "epoch": 13.35552, "grad_norm": 0.9601088166236877, "learning_rate": 2.9147659063625456e-05, "loss": 0.5416, "step": 10434 }, { "epoch": 13.3568, "grad_norm": 0.8933698534965515, "learning_rate": 2.9145658263305324e-05, "loss": 0.5418, "step": 10435 }, { "epoch": 13.35808, "grad_norm": 0.9842942357063293, "learning_rate": 2.9143657462985196e-05, "loss": 0.5526, "step": 10436 }, { "epoch": 13.35936, "grad_norm": 0.9776619076728821, "learning_rate": 2.9141656662665068e-05, "loss": 0.5864, "step": 10437 }, { "epoch": 13.36064, "grad_norm": 0.9885453581809998, "learning_rate": 2.913965586234494e-05, "loss": 0.584, "step": 10438 }, { "epoch": 13.36192, "grad_norm": 0.9770693182945251, "learning_rate": 2.913765506202481e-05, "loss": 0.5273, "step": 10439 }, { "epoch": 13.3632, "grad_norm": 0.9777031540870667, "learning_rate": 2.913565426170468e-05, "loss": 0.5872, "step": 10440 }, { "epoch": 13.36448, "grad_norm": 0.9541405439376831, "learning_rate": 2.913365346138456e-05, "loss": 0.5395, "step": 10441 }, { "epoch": 13.36576, "grad_norm": 0.9622824192047119, "learning_rate": 2.913165266106443e-05, "loss": 0.5505, "step": 10442 }, { "epoch": 13.36704, "grad_norm": 0.9015635848045349, "learning_rate": 2.91296518607443e-05, "loss": 0.5042, "step": 10443 }, { "epoch": 13.36832, "grad_norm": 0.9411506056785583, "learning_rate": 2.912765106042417e-05, "loss": 0.5998, "step": 10444 }, { "epoch": 13.3696, "grad_norm": 0.9454710483551025, "learning_rate": 2.9125650260104043e-05, "loss": 0.5047, "step": 10445 }, { "epoch": 13.37088, "grad_norm": 0.9664983153343201, "learning_rate": 2.9123649459783915e-05, "loss": 0.5501, "step": 10446 }, { "epoch": 13.37216, "grad_norm": 0.9950606226921082, "learning_rate": 2.9121648659463787e-05, "loss": 0.548, "step": 10447 }, { "epoch": 13.37344, "grad_norm": 0.9428080916404724, "learning_rate": 2.9119647859143655e-05, "loss": 0.515, "step": 10448 }, { "epoch": 13.37472, "grad_norm": 1.0005260705947876, "learning_rate": 2.9117647058823534e-05, "loss": 0.6053, "step": 10449 }, { "epoch": 13.376, "grad_norm": 0.9256664514541626, "learning_rate": 2.9115646258503405e-05, "loss": 0.5313, "step": 10450 }, { "epoch": 13.37728, "grad_norm": 0.9487593173980713, "learning_rate": 2.9113645458183274e-05, "loss": 0.5372, "step": 10451 }, { "epoch": 13.37856, "grad_norm": 0.9541116952896118, "learning_rate": 2.9111644657863146e-05, "loss": 0.565, "step": 10452 }, { "epoch": 13.37984, "grad_norm": 0.9423818588256836, "learning_rate": 2.9109643857543018e-05, "loss": 0.5485, "step": 10453 }, { "epoch": 13.38112, "grad_norm": 0.9149635434150696, "learning_rate": 2.910764305722289e-05, "loss": 0.4844, "step": 10454 }, { "epoch": 13.3824, "grad_norm": 1.0056577920913696, "learning_rate": 2.910564225690276e-05, "loss": 0.5753, "step": 10455 }, { "epoch": 13.38368, "grad_norm": 1.0535948276519775, "learning_rate": 2.9103641456582637e-05, "loss": 0.5831, "step": 10456 }, { "epoch": 13.38496, "grad_norm": 0.9600538015365601, "learning_rate": 2.910164065626251e-05, "loss": 0.5554, "step": 10457 }, { "epoch": 13.38624, "grad_norm": 0.9667772054672241, "learning_rate": 2.909963985594238e-05, "loss": 0.5197, "step": 10458 }, { "epoch": 13.38752, "grad_norm": 0.949582040309906, "learning_rate": 2.909763905562225e-05, "loss": 0.5152, "step": 10459 }, { "epoch": 13.3888, "grad_norm": 0.9107711315155029, "learning_rate": 2.909563825530212e-05, "loss": 0.4774, "step": 10460 }, { "epoch": 13.39008, "grad_norm": 1.0431016683578491, "learning_rate": 2.9093637454981993e-05, "loss": 0.5594, "step": 10461 }, { "epoch": 13.39136, "grad_norm": 0.9607868194580078, "learning_rate": 2.9091636654661864e-05, "loss": 0.5385, "step": 10462 }, { "epoch": 13.39264, "grad_norm": 0.9767603278160095, "learning_rate": 2.908963585434174e-05, "loss": 0.5555, "step": 10463 }, { "epoch": 13.39392, "grad_norm": 0.920050859451294, "learning_rate": 2.908763505402161e-05, "loss": 0.5062, "step": 10464 }, { "epoch": 13.395199999999999, "grad_norm": 0.9358279705047607, "learning_rate": 2.9085634253701483e-05, "loss": 0.5381, "step": 10465 }, { "epoch": 13.39648, "grad_norm": 0.9233344793319702, "learning_rate": 2.9083633453381355e-05, "loss": 0.5331, "step": 10466 }, { "epoch": 13.39776, "grad_norm": 0.9128267168998718, "learning_rate": 2.9081632653061224e-05, "loss": 0.5363, "step": 10467 }, { "epoch": 13.39904, "grad_norm": 1.0040749311447144, "learning_rate": 2.9079631852741096e-05, "loss": 0.5948, "step": 10468 }, { "epoch": 13.40032, "grad_norm": 0.9266423583030701, "learning_rate": 2.9077631052420967e-05, "loss": 0.5511, "step": 10469 }, { "epoch": 13.4016, "grad_norm": 0.8982942700386047, "learning_rate": 2.9075630252100843e-05, "loss": 0.5446, "step": 10470 }, { "epoch": 13.40288, "grad_norm": 0.9734070301055908, "learning_rate": 2.9073629451780714e-05, "loss": 0.5567, "step": 10471 }, { "epoch": 13.40416, "grad_norm": 0.9396576285362244, "learning_rate": 2.9071628651460586e-05, "loss": 0.5146, "step": 10472 }, { "epoch": 13.40544, "grad_norm": 0.9016363620758057, "learning_rate": 2.9069627851140458e-05, "loss": 0.5465, "step": 10473 }, { "epoch": 13.40672, "grad_norm": 0.950202465057373, "learning_rate": 2.906762705082033e-05, "loss": 0.5659, "step": 10474 }, { "epoch": 13.408, "grad_norm": 0.9889881610870361, "learning_rate": 2.90656262505002e-05, "loss": 0.5675, "step": 10475 }, { "epoch": 13.40928, "grad_norm": 0.9744821190834045, "learning_rate": 2.906362545018007e-05, "loss": 0.5625, "step": 10476 }, { "epoch": 13.41056, "grad_norm": 0.9143860340118408, "learning_rate": 2.906162464985995e-05, "loss": 0.522, "step": 10477 }, { "epoch": 13.41184, "grad_norm": 0.9224212169647217, "learning_rate": 2.9059623849539817e-05, "loss": 0.4886, "step": 10478 }, { "epoch": 13.41312, "grad_norm": 0.9844509959220886, "learning_rate": 2.905762304921969e-05, "loss": 0.559, "step": 10479 }, { "epoch": 13.4144, "grad_norm": 0.919024646282196, "learning_rate": 2.905562224889956e-05, "loss": 0.5372, "step": 10480 }, { "epoch": 13.41568, "grad_norm": 0.9639186859130859, "learning_rate": 2.9053621448579433e-05, "loss": 0.5797, "step": 10481 }, { "epoch": 13.41696, "grad_norm": 0.926937460899353, "learning_rate": 2.9051620648259305e-05, "loss": 0.5645, "step": 10482 }, { "epoch": 13.41824, "grad_norm": 0.9183882474899292, "learning_rate": 2.9049619847939173e-05, "loss": 0.5077, "step": 10483 }, { "epoch": 13.41952, "grad_norm": 0.9052950739860535, "learning_rate": 2.9047619047619052e-05, "loss": 0.5042, "step": 10484 }, { "epoch": 13.4208, "grad_norm": 0.9210243225097656, "learning_rate": 2.9045618247298924e-05, "loss": 0.5045, "step": 10485 }, { "epoch": 13.42208, "grad_norm": 0.9332923293113708, "learning_rate": 2.9043617446978792e-05, "loss": 0.556, "step": 10486 }, { "epoch": 13.42336, "grad_norm": 0.8918114304542542, "learning_rate": 2.9041616646658664e-05, "loss": 0.5455, "step": 10487 }, { "epoch": 13.42464, "grad_norm": 1.0036391019821167, "learning_rate": 2.9039615846338536e-05, "loss": 0.6048, "step": 10488 }, { "epoch": 13.42592, "grad_norm": 0.9452251195907593, "learning_rate": 2.9037615046018408e-05, "loss": 0.5311, "step": 10489 }, { "epoch": 13.4272, "grad_norm": 0.9231464266777039, "learning_rate": 2.903561424569828e-05, "loss": 0.5153, "step": 10490 }, { "epoch": 13.42848, "grad_norm": 0.9702645540237427, "learning_rate": 2.9033613445378155e-05, "loss": 0.549, "step": 10491 }, { "epoch": 13.42976, "grad_norm": 0.9461646676063538, "learning_rate": 2.9031612645058027e-05, "loss": 0.5309, "step": 10492 }, { "epoch": 13.43104, "grad_norm": 0.9646087288856506, "learning_rate": 2.90296118447379e-05, "loss": 0.5819, "step": 10493 }, { "epoch": 13.43232, "grad_norm": 0.9191693067550659, "learning_rate": 2.9027611044417767e-05, "loss": 0.5552, "step": 10494 }, { "epoch": 13.4336, "grad_norm": 0.9861147999763489, "learning_rate": 2.902561024409764e-05, "loss": 0.5568, "step": 10495 }, { "epoch": 13.43488, "grad_norm": 0.9543395042419434, "learning_rate": 2.902360944377751e-05, "loss": 0.5337, "step": 10496 }, { "epoch": 13.43616, "grad_norm": 0.9656491875648499, "learning_rate": 2.9021608643457383e-05, "loss": 0.5232, "step": 10497 }, { "epoch": 13.43744, "grad_norm": 0.9861603379249573, "learning_rate": 2.9019607843137258e-05, "loss": 0.5745, "step": 10498 }, { "epoch": 13.43872, "grad_norm": 0.9877046942710876, "learning_rate": 2.901760704281713e-05, "loss": 0.584, "step": 10499 }, { "epoch": 13.44, "grad_norm": 0.9454039931297302, "learning_rate": 2.9015606242497002e-05, "loss": 0.5021, "step": 10500 }, { "epoch": 13.44128, "grad_norm": 0.9773223996162415, "learning_rate": 2.9013605442176874e-05, "loss": 0.5659, "step": 10501 }, { "epoch": 13.44256, "grad_norm": 0.9586243033409119, "learning_rate": 2.9011604641856742e-05, "loss": 0.5496, "step": 10502 }, { "epoch": 13.44384, "grad_norm": 0.9668572545051575, "learning_rate": 2.9009603841536614e-05, "loss": 0.5088, "step": 10503 }, { "epoch": 13.44512, "grad_norm": 0.9630528092384338, "learning_rate": 2.9007603041216486e-05, "loss": 0.5007, "step": 10504 }, { "epoch": 13.4464, "grad_norm": 0.984404444694519, "learning_rate": 2.900560224089636e-05, "loss": 0.5157, "step": 10505 }, { "epoch": 13.44768, "grad_norm": 0.9502925276756287, "learning_rate": 2.9003601440576233e-05, "loss": 0.4632, "step": 10506 }, { "epoch": 13.44896, "grad_norm": 0.9846747517585754, "learning_rate": 2.9001600640256105e-05, "loss": 0.5392, "step": 10507 }, { "epoch": 13.45024, "grad_norm": 0.9732792377471924, "learning_rate": 2.8999599839935977e-05, "loss": 0.5066, "step": 10508 }, { "epoch": 13.45152, "grad_norm": 0.9376108050346375, "learning_rate": 2.899759903961585e-05, "loss": 0.568, "step": 10509 }, { "epoch": 13.4528, "grad_norm": 0.9324885010719299, "learning_rate": 2.8995598239295717e-05, "loss": 0.5635, "step": 10510 }, { "epoch": 13.45408, "grad_norm": 0.9335364699363708, "learning_rate": 2.899359743897559e-05, "loss": 0.5284, "step": 10511 }, { "epoch": 13.45536, "grad_norm": 0.9017517566680908, "learning_rate": 2.8991596638655467e-05, "loss": 0.4938, "step": 10512 }, { "epoch": 13.45664, "grad_norm": 0.9306479096412659, "learning_rate": 2.8989595838335336e-05, "loss": 0.5521, "step": 10513 }, { "epoch": 13.45792, "grad_norm": 0.9630993008613586, "learning_rate": 2.8987595038015208e-05, "loss": 0.5371, "step": 10514 }, { "epoch": 13.4592, "grad_norm": 0.9604276418685913, "learning_rate": 2.898559423769508e-05, "loss": 0.5543, "step": 10515 }, { "epoch": 13.46048, "grad_norm": 0.9642881155014038, "learning_rate": 2.898359343737495e-05, "loss": 0.525, "step": 10516 }, { "epoch": 13.46176, "grad_norm": 0.9426066279411316, "learning_rate": 2.8981592637054823e-05, "loss": 0.5535, "step": 10517 }, { "epoch": 13.46304, "grad_norm": 0.8915838003158569, "learning_rate": 2.8979591836734692e-05, "loss": 0.5155, "step": 10518 }, { "epoch": 13.46432, "grad_norm": 0.9411042332649231, "learning_rate": 2.897759103641457e-05, "loss": 0.5197, "step": 10519 }, { "epoch": 13.4656, "grad_norm": 0.9480369091033936, "learning_rate": 2.8975590236094442e-05, "loss": 0.5425, "step": 10520 }, { "epoch": 13.46688, "grad_norm": 0.9183311462402344, "learning_rate": 2.897358943577431e-05, "loss": 0.5394, "step": 10521 }, { "epoch": 13.46816, "grad_norm": 0.8983965516090393, "learning_rate": 2.8971588635454183e-05, "loss": 0.5216, "step": 10522 }, { "epoch": 13.46944, "grad_norm": 0.943598210811615, "learning_rate": 2.8969587835134054e-05, "loss": 0.5223, "step": 10523 }, { "epoch": 13.47072, "grad_norm": 0.9010968804359436, "learning_rate": 2.8967587034813926e-05, "loss": 0.5572, "step": 10524 }, { "epoch": 13.472, "grad_norm": 0.9356712698936462, "learning_rate": 2.8965586234493798e-05, "loss": 0.5327, "step": 10525 }, { "epoch": 13.47328, "grad_norm": 0.9779828786849976, "learning_rate": 2.8963585434173673e-05, "loss": 0.5572, "step": 10526 }, { "epoch": 13.47456, "grad_norm": 0.9834756851196289, "learning_rate": 2.8961584633853545e-05, "loss": 0.5512, "step": 10527 }, { "epoch": 13.47584, "grad_norm": 1.0116183757781982, "learning_rate": 2.8959583833533417e-05, "loss": 0.5726, "step": 10528 }, { "epoch": 13.47712, "grad_norm": 0.9891726970672607, "learning_rate": 2.8957583033213286e-05, "loss": 0.5587, "step": 10529 }, { "epoch": 13.4784, "grad_norm": 0.9156029224395752, "learning_rate": 2.8955582232893157e-05, "loss": 0.5175, "step": 10530 }, { "epoch": 13.47968, "grad_norm": 0.9811277389526367, "learning_rate": 2.895358143257303e-05, "loss": 0.5572, "step": 10531 }, { "epoch": 13.48096, "grad_norm": 0.9236703515052795, "learning_rate": 2.89515806322529e-05, "loss": 0.5119, "step": 10532 }, { "epoch": 13.482240000000001, "grad_norm": 0.9552645087242126, "learning_rate": 2.8949579831932776e-05, "loss": 0.544, "step": 10533 }, { "epoch": 13.48352, "grad_norm": 0.9037032127380371, "learning_rate": 2.8947579031612648e-05, "loss": 0.5298, "step": 10534 }, { "epoch": 13.4848, "grad_norm": 0.947192370891571, "learning_rate": 2.894557823129252e-05, "loss": 0.5685, "step": 10535 }, { "epoch": 13.48608, "grad_norm": 0.9519645571708679, "learning_rate": 2.8943577430972392e-05, "loss": 0.5339, "step": 10536 }, { "epoch": 13.48736, "grad_norm": 0.9448265433311462, "learning_rate": 2.894157663065226e-05, "loss": 0.5473, "step": 10537 }, { "epoch": 13.48864, "grad_norm": 0.9873289465904236, "learning_rate": 2.8939575830332132e-05, "loss": 0.5253, "step": 10538 }, { "epoch": 13.48992, "grad_norm": 0.9552075862884521, "learning_rate": 2.8937575030012004e-05, "loss": 0.5561, "step": 10539 }, { "epoch": 13.4912, "grad_norm": 0.9306079745292664, "learning_rate": 2.893557422969188e-05, "loss": 0.5058, "step": 10540 }, { "epoch": 13.49248, "grad_norm": 0.9096477627754211, "learning_rate": 2.893357342937175e-05, "loss": 0.4799, "step": 10541 }, { "epoch": 13.49376, "grad_norm": 1.00719153881073, "learning_rate": 2.8931572629051623e-05, "loss": 0.5475, "step": 10542 }, { "epoch": 13.49504, "grad_norm": 1.0264086723327637, "learning_rate": 2.8929571828731495e-05, "loss": 0.5664, "step": 10543 }, { "epoch": 13.49632, "grad_norm": 0.9574489593505859, "learning_rate": 2.8927571028411367e-05, "loss": 0.5239, "step": 10544 }, { "epoch": 13.4976, "grad_norm": 0.9818712472915649, "learning_rate": 2.8925570228091235e-05, "loss": 0.5373, "step": 10545 }, { "epoch": 13.49888, "grad_norm": 0.9273495674133301, "learning_rate": 2.8923569427771107e-05, "loss": 0.5535, "step": 10546 }, { "epoch": 13.50016, "grad_norm": 0.9357905983924866, "learning_rate": 2.8921568627450986e-05, "loss": 0.5536, "step": 10547 }, { "epoch": 13.50144, "grad_norm": 0.9313623309135437, "learning_rate": 2.8919567827130854e-05, "loss": 0.5436, "step": 10548 }, { "epoch": 13.50272, "grad_norm": 0.9302566051483154, "learning_rate": 2.8917567026810726e-05, "loss": 0.503, "step": 10549 }, { "epoch": 13.504, "grad_norm": 0.9522665143013, "learning_rate": 2.8915566226490598e-05, "loss": 0.5565, "step": 10550 }, { "epoch": 13.505279999999999, "grad_norm": 0.9212354421615601, "learning_rate": 2.891356542617047e-05, "loss": 0.5448, "step": 10551 }, { "epoch": 13.50656, "grad_norm": 0.9201790690422058, "learning_rate": 2.891156462585034e-05, "loss": 0.509, "step": 10552 }, { "epoch": 13.50784, "grad_norm": 0.91869056224823, "learning_rate": 2.890956382553021e-05, "loss": 0.5319, "step": 10553 }, { "epoch": 13.50912, "grad_norm": 0.9275939464569092, "learning_rate": 2.890756302521009e-05, "loss": 0.5429, "step": 10554 }, { "epoch": 13.5104, "grad_norm": 0.9292113780975342, "learning_rate": 2.890556222488996e-05, "loss": 0.5673, "step": 10555 }, { "epoch": 13.51168, "grad_norm": 0.9870272278785706, "learning_rate": 2.890356142456983e-05, "loss": 0.5295, "step": 10556 }, { "epoch": 13.51296, "grad_norm": 0.9538768529891968, "learning_rate": 2.89015606242497e-05, "loss": 0.5421, "step": 10557 }, { "epoch": 13.514240000000001, "grad_norm": 0.9605304598808289, "learning_rate": 2.8899559823929573e-05, "loss": 0.5666, "step": 10558 }, { "epoch": 13.51552, "grad_norm": 0.9257391691207886, "learning_rate": 2.8897559023609445e-05, "loss": 0.5048, "step": 10559 }, { "epoch": 13.5168, "grad_norm": 0.924060046672821, "learning_rate": 2.8895558223289317e-05, "loss": 0.5625, "step": 10560 }, { "epoch": 13.51808, "grad_norm": 0.9286275506019592, "learning_rate": 2.8893557422969185e-05, "loss": 0.4978, "step": 10561 }, { "epoch": 13.51936, "grad_norm": 1.009763240814209, "learning_rate": 2.8891556622649064e-05, "loss": 0.5748, "step": 10562 }, { "epoch": 13.52064, "grad_norm": 0.9351606369018555, "learning_rate": 2.8889555822328935e-05, "loss": 0.4949, "step": 10563 }, { "epoch": 13.52192, "grad_norm": 0.9865330457687378, "learning_rate": 2.8887555022008804e-05, "loss": 0.5523, "step": 10564 }, { "epoch": 13.5232, "grad_norm": 0.9141054749488831, "learning_rate": 2.8885554221688676e-05, "loss": 0.535, "step": 10565 }, { "epoch": 13.52448, "grad_norm": 0.8817773461341858, "learning_rate": 2.8883553421368548e-05, "loss": 0.5218, "step": 10566 }, { "epoch": 13.52576, "grad_norm": 0.918411910533905, "learning_rate": 2.888155262104842e-05, "loss": 0.5497, "step": 10567 }, { "epoch": 13.52704, "grad_norm": 0.9279268383979797, "learning_rate": 2.887955182072829e-05, "loss": 0.5519, "step": 10568 }, { "epoch": 13.52832, "grad_norm": 0.9723789095878601, "learning_rate": 2.8877551020408167e-05, "loss": 0.5554, "step": 10569 }, { "epoch": 13.5296, "grad_norm": 0.9536760449409485, "learning_rate": 2.887555022008804e-05, "loss": 0.5561, "step": 10570 }, { "epoch": 13.53088, "grad_norm": 0.9966936111450195, "learning_rate": 2.887354941976791e-05, "loss": 0.5453, "step": 10571 }, { "epoch": 13.53216, "grad_norm": 0.9413495659828186, "learning_rate": 2.887154861944778e-05, "loss": 0.5242, "step": 10572 }, { "epoch": 13.53344, "grad_norm": 0.9562821388244629, "learning_rate": 2.886954781912765e-05, "loss": 0.5672, "step": 10573 }, { "epoch": 13.53472, "grad_norm": 0.9556413292884827, "learning_rate": 2.8867547018807523e-05, "loss": 0.5895, "step": 10574 }, { "epoch": 13.536, "grad_norm": 0.9961545467376709, "learning_rate": 2.8865546218487394e-05, "loss": 0.5486, "step": 10575 }, { "epoch": 13.537279999999999, "grad_norm": 0.9472994208335876, "learning_rate": 2.886354541816727e-05, "loss": 0.5914, "step": 10576 }, { "epoch": 13.53856, "grad_norm": 0.937106728553772, "learning_rate": 2.886154461784714e-05, "loss": 0.528, "step": 10577 }, { "epoch": 13.53984, "grad_norm": 0.9636639952659607, "learning_rate": 2.8859543817527013e-05, "loss": 0.5109, "step": 10578 }, { "epoch": 13.54112, "grad_norm": 0.9437137246131897, "learning_rate": 2.8857543017206885e-05, "loss": 0.5464, "step": 10579 }, { "epoch": 13.5424, "grad_norm": 1.0153517723083496, "learning_rate": 2.8855542216886754e-05, "loss": 0.5756, "step": 10580 }, { "epoch": 13.54368, "grad_norm": 0.9016063213348389, "learning_rate": 2.8853541416566626e-05, "loss": 0.4751, "step": 10581 }, { "epoch": 13.54496, "grad_norm": 0.9339427947998047, "learning_rate": 2.8851540616246497e-05, "loss": 0.5462, "step": 10582 }, { "epoch": 13.54624, "grad_norm": 0.9382839202880859, "learning_rate": 2.8849539815926373e-05, "loss": 0.5564, "step": 10583 }, { "epoch": 13.54752, "grad_norm": 0.9853460192680359, "learning_rate": 2.8847539015606244e-05, "loss": 0.5785, "step": 10584 }, { "epoch": 13.5488, "grad_norm": 0.8800140023231506, "learning_rate": 2.8845538215286116e-05, "loss": 0.5303, "step": 10585 }, { "epoch": 13.55008, "grad_norm": 0.9256978631019592, "learning_rate": 2.8843537414965988e-05, "loss": 0.5192, "step": 10586 }, { "epoch": 13.55136, "grad_norm": 0.9170429706573486, "learning_rate": 2.884153661464586e-05, "loss": 0.5438, "step": 10587 }, { "epoch": 13.55264, "grad_norm": 0.9927389025688171, "learning_rate": 2.883953581432573e-05, "loss": 0.5558, "step": 10588 }, { "epoch": 13.55392, "grad_norm": 0.9742659330368042, "learning_rate": 2.88375350140056e-05, "loss": 0.5768, "step": 10589 }, { "epoch": 13.5552, "grad_norm": 1.032018780708313, "learning_rate": 2.883553421368548e-05, "loss": 0.5579, "step": 10590 }, { "epoch": 13.55648, "grad_norm": 0.972882091999054, "learning_rate": 2.8833533413365347e-05, "loss": 0.5939, "step": 10591 }, { "epoch": 13.55776, "grad_norm": 0.8545750975608826, "learning_rate": 2.883153261304522e-05, "loss": 0.5027, "step": 10592 }, { "epoch": 13.55904, "grad_norm": 0.896020233631134, "learning_rate": 2.882953181272509e-05, "loss": 0.472, "step": 10593 }, { "epoch": 13.56032, "grad_norm": 0.8964064717292786, "learning_rate": 2.8827531012404963e-05, "loss": 0.4956, "step": 10594 }, { "epoch": 13.5616, "grad_norm": 0.939621090888977, "learning_rate": 2.8825530212084835e-05, "loss": 0.5365, "step": 10595 }, { "epoch": 13.56288, "grad_norm": 0.9430614709854126, "learning_rate": 2.8823529411764703e-05, "loss": 0.5228, "step": 10596 }, { "epoch": 13.56416, "grad_norm": 0.91892409324646, "learning_rate": 2.8821528611444582e-05, "loss": 0.5294, "step": 10597 }, { "epoch": 13.56544, "grad_norm": 0.9037036895751953, "learning_rate": 2.8819527811124454e-05, "loss": 0.5209, "step": 10598 }, { "epoch": 13.56672, "grad_norm": 0.9548211097717285, "learning_rate": 2.8817527010804322e-05, "loss": 0.5449, "step": 10599 }, { "epoch": 13.568, "grad_norm": 0.9372276067733765, "learning_rate": 2.8815526210484194e-05, "loss": 0.5343, "step": 10600 }, { "epoch": 13.56928, "grad_norm": 0.9246383905410767, "learning_rate": 2.8813525410164066e-05, "loss": 0.5319, "step": 10601 }, { "epoch": 13.57056, "grad_norm": 0.947321355342865, "learning_rate": 2.8811524609843938e-05, "loss": 0.5573, "step": 10602 }, { "epoch": 13.57184, "grad_norm": 0.9556154012680054, "learning_rate": 2.880952380952381e-05, "loss": 0.5551, "step": 10603 }, { "epoch": 13.57312, "grad_norm": 0.9768837094306946, "learning_rate": 2.8807523009203685e-05, "loss": 0.5631, "step": 10604 }, { "epoch": 13.5744, "grad_norm": 0.9723296761512756, "learning_rate": 2.8805522208883557e-05, "loss": 0.5453, "step": 10605 }, { "epoch": 13.57568, "grad_norm": 0.9709224104881287, "learning_rate": 2.880352140856343e-05, "loss": 0.5324, "step": 10606 }, { "epoch": 13.57696, "grad_norm": 0.9383196830749512, "learning_rate": 2.8801520608243297e-05, "loss": 0.5561, "step": 10607 }, { "epoch": 13.57824, "grad_norm": 0.9480939507484436, "learning_rate": 2.879951980792317e-05, "loss": 0.51, "step": 10608 }, { "epoch": 13.57952, "grad_norm": 0.9381304383277893, "learning_rate": 2.879751900760304e-05, "loss": 0.5704, "step": 10609 }, { "epoch": 13.5808, "grad_norm": 0.9381782412528992, "learning_rate": 2.8795518207282913e-05, "loss": 0.5011, "step": 10610 }, { "epoch": 13.58208, "grad_norm": 1.024210810661316, "learning_rate": 2.8793517406962788e-05, "loss": 0.5855, "step": 10611 }, { "epoch": 13.58336, "grad_norm": 0.9958494305610657, "learning_rate": 2.879151660664266e-05, "loss": 0.5279, "step": 10612 }, { "epoch": 13.58464, "grad_norm": 0.9755289554595947, "learning_rate": 2.8789515806322532e-05, "loss": 0.5981, "step": 10613 }, { "epoch": 13.58592, "grad_norm": 0.9563091397285461, "learning_rate": 2.8787515006002404e-05, "loss": 0.5707, "step": 10614 }, { "epoch": 13.5872, "grad_norm": 0.9441842436790466, "learning_rate": 2.8785514205682272e-05, "loss": 0.5695, "step": 10615 }, { "epoch": 13.58848, "grad_norm": 0.9176508784294128, "learning_rate": 2.8783513405362144e-05, "loss": 0.4954, "step": 10616 }, { "epoch": 13.58976, "grad_norm": 0.9186686277389526, "learning_rate": 2.8781512605042016e-05, "loss": 0.5372, "step": 10617 }, { "epoch": 13.59104, "grad_norm": 0.9438495635986328, "learning_rate": 2.877951180472189e-05, "loss": 0.5552, "step": 10618 }, { "epoch": 13.59232, "grad_norm": 0.9654157161712646, "learning_rate": 2.8777511004401763e-05, "loss": 0.5388, "step": 10619 }, { "epoch": 13.5936, "grad_norm": 0.9715597629547119, "learning_rate": 2.8775510204081635e-05, "loss": 0.5435, "step": 10620 }, { "epoch": 13.59488, "grad_norm": 0.9751953482627869, "learning_rate": 2.8773509403761507e-05, "loss": 0.5689, "step": 10621 }, { "epoch": 13.59616, "grad_norm": 0.9504429697990417, "learning_rate": 2.877150860344138e-05, "loss": 0.5404, "step": 10622 }, { "epoch": 13.59744, "grad_norm": 0.9462687373161316, "learning_rate": 2.8769507803121247e-05, "loss": 0.527, "step": 10623 }, { "epoch": 13.59872, "grad_norm": 0.9122000336647034, "learning_rate": 2.876750700280112e-05, "loss": 0.5466, "step": 10624 }, { "epoch": 13.6, "grad_norm": 0.9636383056640625, "learning_rate": 2.8765506202480997e-05, "loss": 0.5357, "step": 10625 }, { "epoch": 13.60128, "grad_norm": 0.9176141023635864, "learning_rate": 2.8763505402160866e-05, "loss": 0.5333, "step": 10626 }, { "epoch": 13.60256, "grad_norm": 0.935096025466919, "learning_rate": 2.8761504601840738e-05, "loss": 0.5373, "step": 10627 }, { "epoch": 13.60384, "grad_norm": 0.9804729223251343, "learning_rate": 2.875950380152061e-05, "loss": 0.5202, "step": 10628 }, { "epoch": 13.60512, "grad_norm": 0.8921434879302979, "learning_rate": 2.875750300120048e-05, "loss": 0.5015, "step": 10629 }, { "epoch": 13.6064, "grad_norm": 0.9438501000404358, "learning_rate": 2.8755502200880353e-05, "loss": 0.5353, "step": 10630 }, { "epoch": 13.60768, "grad_norm": 1.0135152339935303, "learning_rate": 2.8753501400560222e-05, "loss": 0.5581, "step": 10631 }, { "epoch": 13.60896, "grad_norm": 0.9754303693771362, "learning_rate": 2.87515006002401e-05, "loss": 0.5702, "step": 10632 }, { "epoch": 13.61024, "grad_norm": 0.9811170101165771, "learning_rate": 2.8749499799919972e-05, "loss": 0.5316, "step": 10633 }, { "epoch": 13.61152, "grad_norm": 0.9455510377883911, "learning_rate": 2.874749899959984e-05, "loss": 0.5505, "step": 10634 }, { "epoch": 13.6128, "grad_norm": 0.9648592472076416, "learning_rate": 2.8745498199279713e-05, "loss": 0.5324, "step": 10635 }, { "epoch": 13.61408, "grad_norm": 0.9389111995697021, "learning_rate": 2.8743497398959584e-05, "loss": 0.5199, "step": 10636 }, { "epoch": 13.61536, "grad_norm": 0.8730208873748779, "learning_rate": 2.8741496598639456e-05, "loss": 0.4906, "step": 10637 }, { "epoch": 13.61664, "grad_norm": 0.903998613357544, "learning_rate": 2.8739495798319328e-05, "loss": 0.5274, "step": 10638 }, { "epoch": 13.61792, "grad_norm": 0.9008133411407471, "learning_rate": 2.8737494997999203e-05, "loss": 0.4929, "step": 10639 }, { "epoch": 13.6192, "grad_norm": 0.9376186728477478, "learning_rate": 2.8735494197679075e-05, "loss": 0.5746, "step": 10640 }, { "epoch": 13.62048, "grad_norm": 0.8832375407218933, "learning_rate": 2.8733493397358947e-05, "loss": 0.4694, "step": 10641 }, { "epoch": 13.62176, "grad_norm": 1.0239812135696411, "learning_rate": 2.8731492597038816e-05, "loss": 0.5459, "step": 10642 }, { "epoch": 13.62304, "grad_norm": 1.0099740028381348, "learning_rate": 2.8729491796718687e-05, "loss": 0.5721, "step": 10643 }, { "epoch": 13.62432, "grad_norm": 0.9773842096328735, "learning_rate": 2.872749099639856e-05, "loss": 0.5401, "step": 10644 }, { "epoch": 13.6256, "grad_norm": 0.9006150364875793, "learning_rate": 2.872549019607843e-05, "loss": 0.5157, "step": 10645 }, { "epoch": 13.62688, "grad_norm": 0.9231019616127014, "learning_rate": 2.8723489395758306e-05, "loss": 0.5334, "step": 10646 }, { "epoch": 13.62816, "grad_norm": 0.9919240474700928, "learning_rate": 2.8721488595438178e-05, "loss": 0.5759, "step": 10647 }, { "epoch": 13.62944, "grad_norm": 0.9904776215553284, "learning_rate": 2.871948779511805e-05, "loss": 0.5377, "step": 10648 }, { "epoch": 13.63072, "grad_norm": 0.9417052268981934, "learning_rate": 2.8717486994797922e-05, "loss": 0.5053, "step": 10649 }, { "epoch": 13.632, "grad_norm": 0.9234094023704529, "learning_rate": 2.871548619447779e-05, "loss": 0.5466, "step": 10650 }, { "epoch": 13.63328, "grad_norm": 0.9561535716056824, "learning_rate": 2.8713485394157662e-05, "loss": 0.5591, "step": 10651 }, { "epoch": 13.63456, "grad_norm": 0.9605762362480164, "learning_rate": 2.8711484593837534e-05, "loss": 0.5438, "step": 10652 }, { "epoch": 13.63584, "grad_norm": 1.0227789878845215, "learning_rate": 2.870948379351741e-05, "loss": 0.5768, "step": 10653 }, { "epoch": 13.63712, "grad_norm": 0.9260333776473999, "learning_rate": 2.870748299319728e-05, "loss": 0.4965, "step": 10654 }, { "epoch": 13.6384, "grad_norm": 1.0059727430343628, "learning_rate": 2.8705482192877153e-05, "loss": 0.5619, "step": 10655 }, { "epoch": 13.63968, "grad_norm": 0.9554175138473511, "learning_rate": 2.8703481392557025e-05, "loss": 0.5721, "step": 10656 }, { "epoch": 13.64096, "grad_norm": 0.9822500944137573, "learning_rate": 2.8701480592236897e-05, "loss": 0.5116, "step": 10657 }, { "epoch": 13.64224, "grad_norm": 0.9823669195175171, "learning_rate": 2.8699479791916765e-05, "loss": 0.5623, "step": 10658 }, { "epoch": 13.64352, "grad_norm": 0.962337076663971, "learning_rate": 2.8697478991596637e-05, "loss": 0.5153, "step": 10659 }, { "epoch": 13.6448, "grad_norm": 0.9577946066856384, "learning_rate": 2.8695478191276516e-05, "loss": 0.546, "step": 10660 }, { "epoch": 13.64608, "grad_norm": 0.9783328771591187, "learning_rate": 2.8693477390956384e-05, "loss": 0.5556, "step": 10661 }, { "epoch": 13.64736, "grad_norm": 0.9406751990318298, "learning_rate": 2.8691476590636256e-05, "loss": 0.5234, "step": 10662 }, { "epoch": 13.64864, "grad_norm": 0.9680593013763428, "learning_rate": 2.8689475790316128e-05, "loss": 0.5553, "step": 10663 }, { "epoch": 13.64992, "grad_norm": 0.9520314335823059, "learning_rate": 2.8687474989996e-05, "loss": 0.5647, "step": 10664 }, { "epoch": 13.6512, "grad_norm": 0.8977915048599243, "learning_rate": 2.868547418967587e-05, "loss": 0.5178, "step": 10665 }, { "epoch": 13.65248, "grad_norm": 0.8997889757156372, "learning_rate": 2.868347338935574e-05, "loss": 0.5383, "step": 10666 }, { "epoch": 13.65376, "grad_norm": 0.9149733781814575, "learning_rate": 2.8681472589035612e-05, "loss": 0.5046, "step": 10667 }, { "epoch": 13.65504, "grad_norm": 1.018157720565796, "learning_rate": 2.867947178871549e-05, "loss": 0.6097, "step": 10668 }, { "epoch": 13.656320000000001, "grad_norm": 0.9306768774986267, "learning_rate": 2.867747098839536e-05, "loss": 0.5142, "step": 10669 }, { "epoch": 13.6576, "grad_norm": 0.9335023760795593, "learning_rate": 2.867547018807523e-05, "loss": 0.5604, "step": 10670 }, { "epoch": 13.65888, "grad_norm": 0.9208192825317383, "learning_rate": 2.8673469387755103e-05, "loss": 0.5307, "step": 10671 }, { "epoch": 13.66016, "grad_norm": 0.9682392477989197, "learning_rate": 2.8671468587434975e-05, "loss": 0.5418, "step": 10672 }, { "epoch": 13.66144, "grad_norm": 0.9635206460952759, "learning_rate": 2.8669467787114846e-05, "loss": 0.5639, "step": 10673 }, { "epoch": 13.66272, "grad_norm": 0.9996086359024048, "learning_rate": 2.8667466986794715e-05, "loss": 0.5511, "step": 10674 }, { "epoch": 13.664, "grad_norm": 0.9767386317253113, "learning_rate": 2.8665466186474594e-05, "loss": 0.5375, "step": 10675 }, { "epoch": 13.66528, "grad_norm": 0.9239612221717834, "learning_rate": 2.8663465386154465e-05, "loss": 0.528, "step": 10676 }, { "epoch": 13.66656, "grad_norm": 0.9513071179389954, "learning_rate": 2.8661464585834334e-05, "loss": 0.5234, "step": 10677 }, { "epoch": 13.66784, "grad_norm": 1.0107206106185913, "learning_rate": 2.8659463785514206e-05, "loss": 0.564, "step": 10678 }, { "epoch": 13.66912, "grad_norm": 0.9832781553268433, "learning_rate": 2.8657462985194078e-05, "loss": 0.5724, "step": 10679 }, { "epoch": 13.6704, "grad_norm": 0.9765233993530273, "learning_rate": 2.865546218487395e-05, "loss": 0.5414, "step": 10680 }, { "epoch": 13.67168, "grad_norm": 0.9487534761428833, "learning_rate": 2.865346138455382e-05, "loss": 0.4998, "step": 10681 }, { "epoch": 13.67296, "grad_norm": 0.9580942988395691, "learning_rate": 2.8651460584233697e-05, "loss": 0.5587, "step": 10682 }, { "epoch": 13.67424, "grad_norm": 0.9719009399414062, "learning_rate": 2.864945978391357e-05, "loss": 0.5611, "step": 10683 }, { "epoch": 13.67552, "grad_norm": 0.9432852864265442, "learning_rate": 2.864745898359344e-05, "loss": 0.5628, "step": 10684 }, { "epoch": 13.6768, "grad_norm": 0.9318643808364868, "learning_rate": 2.864545818327331e-05, "loss": 0.5335, "step": 10685 }, { "epoch": 13.67808, "grad_norm": 0.9718388915061951, "learning_rate": 2.864345738295318e-05, "loss": 0.5531, "step": 10686 }, { "epoch": 13.679359999999999, "grad_norm": 0.9416543245315552, "learning_rate": 2.8641456582633052e-05, "loss": 0.5295, "step": 10687 }, { "epoch": 13.68064, "grad_norm": 0.9783507585525513, "learning_rate": 2.8639455782312924e-05, "loss": 0.5539, "step": 10688 }, { "epoch": 13.68192, "grad_norm": 0.9382901787757874, "learning_rate": 2.86374549819928e-05, "loss": 0.5215, "step": 10689 }, { "epoch": 13.6832, "grad_norm": 0.9459569454193115, "learning_rate": 2.863545418167267e-05, "loss": 0.5271, "step": 10690 }, { "epoch": 13.68448, "grad_norm": 0.9002374410629272, "learning_rate": 2.8633453381352543e-05, "loss": 0.5393, "step": 10691 }, { "epoch": 13.68576, "grad_norm": 0.9636310338973999, "learning_rate": 2.8631452581032415e-05, "loss": 0.5206, "step": 10692 }, { "epoch": 13.68704, "grad_norm": 0.9752256274223328, "learning_rate": 2.8629451780712284e-05, "loss": 0.578, "step": 10693 }, { "epoch": 13.688320000000001, "grad_norm": 0.9666173458099365, "learning_rate": 2.8627450980392155e-05, "loss": 0.5931, "step": 10694 }, { "epoch": 13.6896, "grad_norm": 0.9382612109184265, "learning_rate": 2.8625450180072027e-05, "loss": 0.5502, "step": 10695 }, { "epoch": 13.69088, "grad_norm": 0.9369370341300964, "learning_rate": 2.8623449379751906e-05, "loss": 0.5421, "step": 10696 }, { "epoch": 13.69216, "grad_norm": 0.8949071764945984, "learning_rate": 2.8621448579431774e-05, "loss": 0.5083, "step": 10697 }, { "epoch": 13.69344, "grad_norm": 0.9892618656158447, "learning_rate": 2.8619447779111646e-05, "loss": 0.5519, "step": 10698 }, { "epoch": 13.69472, "grad_norm": 0.8985315561294556, "learning_rate": 2.8617446978791518e-05, "loss": 0.4751, "step": 10699 }, { "epoch": 13.696, "grad_norm": 0.9138407707214355, "learning_rate": 2.861544617847139e-05, "loss": 0.5034, "step": 10700 }, { "epoch": 13.69728, "grad_norm": 0.8925737142562866, "learning_rate": 2.861344537815126e-05, "loss": 0.5013, "step": 10701 }, { "epoch": 13.69856, "grad_norm": 0.9540265798568726, "learning_rate": 2.861144457783113e-05, "loss": 0.5579, "step": 10702 }, { "epoch": 13.69984, "grad_norm": 0.9697530269622803, "learning_rate": 2.860944377751101e-05, "loss": 0.527, "step": 10703 }, { "epoch": 13.70112, "grad_norm": 0.999180257320404, "learning_rate": 2.860744297719088e-05, "loss": 0.5523, "step": 10704 }, { "epoch": 13.7024, "grad_norm": 0.9945990443229675, "learning_rate": 2.860544217687075e-05, "loss": 0.5717, "step": 10705 }, { "epoch": 13.70368, "grad_norm": 0.9230750203132629, "learning_rate": 2.860344137655062e-05, "loss": 0.5279, "step": 10706 }, { "epoch": 13.70496, "grad_norm": 0.9194453954696655, "learning_rate": 2.8601440576230493e-05, "loss": 0.5262, "step": 10707 }, { "epoch": 13.70624, "grad_norm": 0.9214555621147156, "learning_rate": 2.8599439775910365e-05, "loss": 0.5166, "step": 10708 }, { "epoch": 13.70752, "grad_norm": 1.0233759880065918, "learning_rate": 2.8597438975590233e-05, "loss": 0.6114, "step": 10709 }, { "epoch": 13.7088, "grad_norm": 0.8889565467834473, "learning_rate": 2.8595438175270112e-05, "loss": 0.4844, "step": 10710 }, { "epoch": 13.71008, "grad_norm": 0.9614165425300598, "learning_rate": 2.8593437374949984e-05, "loss": 0.5454, "step": 10711 }, { "epoch": 13.711359999999999, "grad_norm": 0.9475215673446655, "learning_rate": 2.8591436574629856e-05, "loss": 0.4936, "step": 10712 }, { "epoch": 13.71264, "grad_norm": 1.000278353691101, "learning_rate": 2.8589435774309724e-05, "loss": 0.5549, "step": 10713 }, { "epoch": 13.71392, "grad_norm": 0.918251097202301, "learning_rate": 2.8587434973989596e-05, "loss": 0.5023, "step": 10714 }, { "epoch": 13.7152, "grad_norm": 0.9441922903060913, "learning_rate": 2.8585434173669468e-05, "loss": 0.5331, "step": 10715 }, { "epoch": 13.71648, "grad_norm": 0.9622296690940857, "learning_rate": 2.858343337334934e-05, "loss": 0.5316, "step": 10716 }, { "epoch": 13.71776, "grad_norm": 0.9810208678245544, "learning_rate": 2.8581432573029215e-05, "loss": 0.5379, "step": 10717 }, { "epoch": 13.71904, "grad_norm": 0.9779439568519592, "learning_rate": 2.8579431772709087e-05, "loss": 0.5698, "step": 10718 }, { "epoch": 13.72032, "grad_norm": 0.9007058143615723, "learning_rate": 2.857743097238896e-05, "loss": 0.484, "step": 10719 }, { "epoch": 13.7216, "grad_norm": 0.964832067489624, "learning_rate": 2.857543017206883e-05, "loss": 0.5773, "step": 10720 }, { "epoch": 13.72288, "grad_norm": 0.9448238015174866, "learning_rate": 2.85734293717487e-05, "loss": 0.5122, "step": 10721 }, { "epoch": 13.72416, "grad_norm": 0.9428820610046387, "learning_rate": 2.857142857142857e-05, "loss": 0.5472, "step": 10722 }, { "epoch": 13.72544, "grad_norm": 0.9774311184883118, "learning_rate": 2.8569427771108443e-05, "loss": 0.5224, "step": 10723 }, { "epoch": 13.72672, "grad_norm": 0.9833608269691467, "learning_rate": 2.8567426970788318e-05, "loss": 0.5713, "step": 10724 }, { "epoch": 13.728, "grad_norm": 0.906108558177948, "learning_rate": 2.856542617046819e-05, "loss": 0.5289, "step": 10725 }, { "epoch": 13.72928, "grad_norm": 0.9727580547332764, "learning_rate": 2.856342537014806e-05, "loss": 0.5354, "step": 10726 }, { "epoch": 13.73056, "grad_norm": 0.9792339205741882, "learning_rate": 2.8561424569827934e-05, "loss": 0.5648, "step": 10727 }, { "epoch": 13.73184, "grad_norm": 0.962765634059906, "learning_rate": 2.8559423769507805e-05, "loss": 0.5599, "step": 10728 }, { "epoch": 13.73312, "grad_norm": 0.9317491054534912, "learning_rate": 2.8557422969187674e-05, "loss": 0.5203, "step": 10729 }, { "epoch": 13.7344, "grad_norm": 0.9817516207695007, "learning_rate": 2.8555422168867546e-05, "loss": 0.5766, "step": 10730 }, { "epoch": 13.73568, "grad_norm": 0.9059588313102722, "learning_rate": 2.8553421368547424e-05, "loss": 0.5134, "step": 10731 }, { "epoch": 13.73696, "grad_norm": 0.9380925893783569, "learning_rate": 2.8551420568227293e-05, "loss": 0.5188, "step": 10732 }, { "epoch": 13.73824, "grad_norm": 0.919386088848114, "learning_rate": 2.8549419767907165e-05, "loss": 0.5315, "step": 10733 }, { "epoch": 13.73952, "grad_norm": 0.991089403629303, "learning_rate": 2.8547418967587037e-05, "loss": 0.6093, "step": 10734 }, { "epoch": 13.7408, "grad_norm": 0.9908062219619751, "learning_rate": 2.854541816726691e-05, "loss": 0.5627, "step": 10735 }, { "epoch": 13.74208, "grad_norm": 0.9567424654960632, "learning_rate": 2.854341736694678e-05, "loss": 0.5442, "step": 10736 }, { "epoch": 13.74336, "grad_norm": 0.9408556818962097, "learning_rate": 2.854141656662665e-05, "loss": 0.5286, "step": 10737 }, { "epoch": 13.74464, "grad_norm": 0.9387040734291077, "learning_rate": 2.8539415766306527e-05, "loss": 0.548, "step": 10738 }, { "epoch": 13.74592, "grad_norm": 0.921125054359436, "learning_rate": 2.85374149659864e-05, "loss": 0.5613, "step": 10739 }, { "epoch": 13.7472, "grad_norm": 0.9063670635223389, "learning_rate": 2.8535414165666268e-05, "loss": 0.5457, "step": 10740 }, { "epoch": 13.74848, "grad_norm": 1.002338171005249, "learning_rate": 2.853341336534614e-05, "loss": 0.5935, "step": 10741 }, { "epoch": 13.74976, "grad_norm": 0.8982512950897217, "learning_rate": 2.853141256502601e-05, "loss": 0.5346, "step": 10742 }, { "epoch": 13.75104, "grad_norm": 0.8786603808403015, "learning_rate": 2.8529411764705883e-05, "loss": 0.5381, "step": 10743 }, { "epoch": 13.75232, "grad_norm": 0.9155345559120178, "learning_rate": 2.8527410964385755e-05, "loss": 0.5217, "step": 10744 }, { "epoch": 13.7536, "grad_norm": 0.9769365787506104, "learning_rate": 2.852541016406563e-05, "loss": 0.5838, "step": 10745 }, { "epoch": 13.75488, "grad_norm": 0.9437276124954224, "learning_rate": 2.8523409363745502e-05, "loss": 0.5727, "step": 10746 }, { "epoch": 13.75616, "grad_norm": 0.9436963200569153, "learning_rate": 2.8521408563425374e-05, "loss": 0.5224, "step": 10747 }, { "epoch": 13.75744, "grad_norm": 0.9029295444488525, "learning_rate": 2.8519407763105243e-05, "loss": 0.5258, "step": 10748 }, { "epoch": 13.75872, "grad_norm": 0.9773640036582947, "learning_rate": 2.8517406962785114e-05, "loss": 0.5447, "step": 10749 }, { "epoch": 13.76, "grad_norm": 0.9137765169143677, "learning_rate": 2.8515406162464986e-05, "loss": 0.543, "step": 10750 }, { "epoch": 13.76128, "grad_norm": 0.947670042514801, "learning_rate": 2.8513405362144858e-05, "loss": 0.5234, "step": 10751 }, { "epoch": 13.76256, "grad_norm": 0.9233437776565552, "learning_rate": 2.8511404561824733e-05, "loss": 0.5104, "step": 10752 }, { "epoch": 13.76384, "grad_norm": 0.9210999011993408, "learning_rate": 2.8509403761504605e-05, "loss": 0.5731, "step": 10753 }, { "epoch": 13.76512, "grad_norm": 0.9368586540222168, "learning_rate": 2.8507402961184477e-05, "loss": 0.5448, "step": 10754 }, { "epoch": 13.7664, "grad_norm": 0.933786153793335, "learning_rate": 2.850540216086435e-05, "loss": 0.5324, "step": 10755 }, { "epoch": 13.76768, "grad_norm": 0.8887897729873657, "learning_rate": 2.8503401360544217e-05, "loss": 0.5083, "step": 10756 }, { "epoch": 13.76896, "grad_norm": 0.9025307893753052, "learning_rate": 2.850140056022409e-05, "loss": 0.5212, "step": 10757 }, { "epoch": 13.77024, "grad_norm": 0.936424195766449, "learning_rate": 2.849939975990396e-05, "loss": 0.5719, "step": 10758 }, { "epoch": 13.77152, "grad_norm": 0.936571478843689, "learning_rate": 2.8497398959583836e-05, "loss": 0.5558, "step": 10759 }, { "epoch": 13.7728, "grad_norm": 0.9100072979927063, "learning_rate": 2.8495398159263708e-05, "loss": 0.5169, "step": 10760 }, { "epoch": 13.77408, "grad_norm": 0.8884329199790955, "learning_rate": 2.849339735894358e-05, "loss": 0.5174, "step": 10761 }, { "epoch": 13.77536, "grad_norm": 0.907352864742279, "learning_rate": 2.8491396558623452e-05, "loss": 0.5166, "step": 10762 }, { "epoch": 13.77664, "grad_norm": 0.929191529750824, "learning_rate": 2.8489395758303324e-05, "loss": 0.5554, "step": 10763 }, { "epoch": 13.77792, "grad_norm": 0.8757056593894958, "learning_rate": 2.8487394957983192e-05, "loss": 0.4899, "step": 10764 }, { "epoch": 13.7792, "grad_norm": 0.9026574492454529, "learning_rate": 2.8485394157663064e-05, "loss": 0.4836, "step": 10765 }, { "epoch": 13.78048, "grad_norm": 0.9860047101974487, "learning_rate": 2.8483393357342943e-05, "loss": 0.5633, "step": 10766 }, { "epoch": 13.78176, "grad_norm": 0.8981946110725403, "learning_rate": 2.848139255702281e-05, "loss": 0.5248, "step": 10767 }, { "epoch": 13.78304, "grad_norm": 0.8830896019935608, "learning_rate": 2.8479391756702683e-05, "loss": 0.5086, "step": 10768 }, { "epoch": 13.78432, "grad_norm": 0.9160583019256592, "learning_rate": 2.8477390956382555e-05, "loss": 0.5561, "step": 10769 }, { "epoch": 13.7856, "grad_norm": 0.9723164439201355, "learning_rate": 2.8475390156062427e-05, "loss": 0.6023, "step": 10770 }, { "epoch": 13.78688, "grad_norm": 0.9493298530578613, "learning_rate": 2.84733893557423e-05, "loss": 0.5237, "step": 10771 }, { "epoch": 13.78816, "grad_norm": 0.9569862484931946, "learning_rate": 2.8471388555422167e-05, "loss": 0.5436, "step": 10772 }, { "epoch": 13.78944, "grad_norm": 0.9514901638031006, "learning_rate": 2.8469387755102046e-05, "loss": 0.5559, "step": 10773 }, { "epoch": 13.79072, "grad_norm": 0.914356529712677, "learning_rate": 2.8467386954781918e-05, "loss": 0.5284, "step": 10774 }, { "epoch": 13.792, "grad_norm": 0.8957856297492981, "learning_rate": 2.8465386154461786e-05, "loss": 0.51, "step": 10775 }, { "epoch": 13.79328, "grad_norm": 0.9669928550720215, "learning_rate": 2.8463385354141658e-05, "loss": 0.5284, "step": 10776 }, { "epoch": 13.79456, "grad_norm": 0.9488115310668945, "learning_rate": 2.846138455382153e-05, "loss": 0.5306, "step": 10777 }, { "epoch": 13.79584, "grad_norm": 0.9263489246368408, "learning_rate": 2.84593837535014e-05, "loss": 0.4877, "step": 10778 }, { "epoch": 13.79712, "grad_norm": 0.9073372483253479, "learning_rate": 2.8457382953181273e-05, "loss": 0.5139, "step": 10779 }, { "epoch": 13.7984, "grad_norm": 0.9395866990089417, "learning_rate": 2.8455382152861142e-05, "loss": 0.5548, "step": 10780 }, { "epoch": 13.79968, "grad_norm": 0.9865780472755432, "learning_rate": 2.845338135254102e-05, "loss": 0.5339, "step": 10781 }, { "epoch": 13.80096, "grad_norm": 1.0401817560195923, "learning_rate": 2.8451380552220892e-05, "loss": 0.5783, "step": 10782 }, { "epoch": 13.80224, "grad_norm": 0.9715459942817688, "learning_rate": 2.844937975190076e-05, "loss": 0.5637, "step": 10783 }, { "epoch": 13.80352, "grad_norm": 0.9215720891952515, "learning_rate": 2.8447378951580633e-05, "loss": 0.5476, "step": 10784 }, { "epoch": 13.8048, "grad_norm": 0.9105191826820374, "learning_rate": 2.8445378151260505e-05, "loss": 0.5545, "step": 10785 }, { "epoch": 13.80608, "grad_norm": 0.9285398125648499, "learning_rate": 2.8443377350940376e-05, "loss": 0.5758, "step": 10786 }, { "epoch": 13.80736, "grad_norm": 0.9510117173194885, "learning_rate": 2.844137655062025e-05, "loss": 0.5287, "step": 10787 }, { "epoch": 13.80864, "grad_norm": 0.9962491989135742, "learning_rate": 2.8439375750300124e-05, "loss": 0.6091, "step": 10788 }, { "epoch": 13.80992, "grad_norm": 0.9605426788330078, "learning_rate": 2.8437374949979995e-05, "loss": 0.5504, "step": 10789 }, { "epoch": 13.8112, "grad_norm": 0.9639581441879272, "learning_rate": 2.8435374149659867e-05, "loss": 0.5177, "step": 10790 }, { "epoch": 13.81248, "grad_norm": 0.9726257920265198, "learning_rate": 2.8433373349339736e-05, "loss": 0.5312, "step": 10791 }, { "epoch": 13.81376, "grad_norm": 0.9021669030189514, "learning_rate": 2.8431372549019608e-05, "loss": 0.4934, "step": 10792 }, { "epoch": 13.81504, "grad_norm": 0.9048178195953369, "learning_rate": 2.842937174869948e-05, "loss": 0.543, "step": 10793 }, { "epoch": 13.81632, "grad_norm": 0.9584506750106812, "learning_rate": 2.842737094837935e-05, "loss": 0.5404, "step": 10794 }, { "epoch": 13.8176, "grad_norm": 0.9696805477142334, "learning_rate": 2.8425370148059227e-05, "loss": 0.5502, "step": 10795 }, { "epoch": 13.81888, "grad_norm": 0.9749921560287476, "learning_rate": 2.84233693477391e-05, "loss": 0.5678, "step": 10796 }, { "epoch": 13.82016, "grad_norm": 0.959991991519928, "learning_rate": 2.842136854741897e-05, "loss": 0.4897, "step": 10797 }, { "epoch": 13.821439999999999, "grad_norm": 0.9816640019416809, "learning_rate": 2.8419367747098842e-05, "loss": 0.5713, "step": 10798 }, { "epoch": 13.82272, "grad_norm": 0.92801833152771, "learning_rate": 2.841736694677871e-05, "loss": 0.4937, "step": 10799 }, { "epoch": 13.824, "grad_norm": 0.9041526317596436, "learning_rate": 2.8415366146458582e-05, "loss": 0.4989, "step": 10800 }, { "epoch": 13.82528, "grad_norm": 0.9021725654602051, "learning_rate": 2.8413365346138454e-05, "loss": 0.5101, "step": 10801 }, { "epoch": 13.82656, "grad_norm": 0.9045724272727966, "learning_rate": 2.841136454581833e-05, "loss": 0.5168, "step": 10802 }, { "epoch": 13.82784, "grad_norm": 0.9894302487373352, "learning_rate": 2.84093637454982e-05, "loss": 0.5861, "step": 10803 }, { "epoch": 13.82912, "grad_norm": 0.9526990652084351, "learning_rate": 2.8407362945178073e-05, "loss": 0.5543, "step": 10804 }, { "epoch": 13.830400000000001, "grad_norm": 0.9203627109527588, "learning_rate": 2.8405362144857945e-05, "loss": 0.5118, "step": 10805 }, { "epoch": 13.83168, "grad_norm": 0.9406778812408447, "learning_rate": 2.8403361344537817e-05, "loss": 0.5656, "step": 10806 }, { "epoch": 13.83296, "grad_norm": 0.959984540939331, "learning_rate": 2.8401360544217685e-05, "loss": 0.5362, "step": 10807 }, { "epoch": 13.83424, "grad_norm": 0.9703862071037292, "learning_rate": 2.8399359743897557e-05, "loss": 0.5502, "step": 10808 }, { "epoch": 13.83552, "grad_norm": 0.9299468994140625, "learning_rate": 2.8397358943577436e-05, "loss": 0.5166, "step": 10809 }, { "epoch": 13.8368, "grad_norm": 0.9478683471679688, "learning_rate": 2.8395358143257304e-05, "loss": 0.5688, "step": 10810 }, { "epoch": 13.83808, "grad_norm": 0.9263501763343811, "learning_rate": 2.8393357342937176e-05, "loss": 0.5294, "step": 10811 }, { "epoch": 13.83936, "grad_norm": 0.9850642085075378, "learning_rate": 2.8391356542617048e-05, "loss": 0.5819, "step": 10812 }, { "epoch": 13.84064, "grad_norm": 0.9924013614654541, "learning_rate": 2.838935574229692e-05, "loss": 0.5516, "step": 10813 }, { "epoch": 13.84192, "grad_norm": 0.9259412288665771, "learning_rate": 2.8387354941976792e-05, "loss": 0.5034, "step": 10814 }, { "epoch": 13.8432, "grad_norm": 0.9189561605453491, "learning_rate": 2.838535414165666e-05, "loss": 0.5612, "step": 10815 }, { "epoch": 13.84448, "grad_norm": 0.9896423816680908, "learning_rate": 2.838335334133654e-05, "loss": 0.5521, "step": 10816 }, { "epoch": 13.84576, "grad_norm": 0.9858946204185486, "learning_rate": 2.838135254101641e-05, "loss": 0.5713, "step": 10817 }, { "epoch": 13.84704, "grad_norm": 0.9236769080162048, "learning_rate": 2.837935174069628e-05, "loss": 0.5443, "step": 10818 }, { "epoch": 13.84832, "grad_norm": 0.9041772484779358, "learning_rate": 2.837735094037615e-05, "loss": 0.5474, "step": 10819 }, { "epoch": 13.8496, "grad_norm": 0.9847750663757324, "learning_rate": 2.8375350140056023e-05, "loss": 0.5703, "step": 10820 }, { "epoch": 13.85088, "grad_norm": 0.9936345815658569, "learning_rate": 2.8373349339735895e-05, "loss": 0.5808, "step": 10821 }, { "epoch": 13.85216, "grad_norm": 0.9457690715789795, "learning_rate": 2.8371348539415767e-05, "loss": 0.542, "step": 10822 }, { "epoch": 13.853439999999999, "grad_norm": 0.9375315308570862, "learning_rate": 2.8369347739095642e-05, "loss": 0.5443, "step": 10823 }, { "epoch": 13.85472, "grad_norm": 0.9101528525352478, "learning_rate": 2.8367346938775514e-05, "loss": 0.5034, "step": 10824 }, { "epoch": 13.856, "grad_norm": 0.938879668712616, "learning_rate": 2.8365346138455386e-05, "loss": 0.5565, "step": 10825 }, { "epoch": 13.85728, "grad_norm": 0.8987931609153748, "learning_rate": 2.8363345338135254e-05, "loss": 0.5148, "step": 10826 }, { "epoch": 13.85856, "grad_norm": 0.9683275818824768, "learning_rate": 2.8361344537815126e-05, "loss": 0.5337, "step": 10827 }, { "epoch": 13.85984, "grad_norm": 0.9831743240356445, "learning_rate": 2.8359343737494998e-05, "loss": 0.5295, "step": 10828 }, { "epoch": 13.86112, "grad_norm": 0.9588016867637634, "learning_rate": 2.835734293717487e-05, "loss": 0.5394, "step": 10829 }, { "epoch": 13.862400000000001, "grad_norm": 0.9667535424232483, "learning_rate": 2.8355342136854745e-05, "loss": 0.5661, "step": 10830 }, { "epoch": 13.86368, "grad_norm": 0.9933251738548279, "learning_rate": 2.8353341336534617e-05, "loss": 0.5859, "step": 10831 }, { "epoch": 13.86496, "grad_norm": 1.0498590469360352, "learning_rate": 2.835134053621449e-05, "loss": 0.6109, "step": 10832 }, { "epoch": 13.86624, "grad_norm": 0.929767370223999, "learning_rate": 2.834933973589436e-05, "loss": 0.5512, "step": 10833 }, { "epoch": 13.86752, "grad_norm": 0.9516801238059998, "learning_rate": 2.834733893557423e-05, "loss": 0.5363, "step": 10834 }, { "epoch": 13.8688, "grad_norm": 0.9916854500770569, "learning_rate": 2.83453381352541e-05, "loss": 0.5854, "step": 10835 }, { "epoch": 13.87008, "grad_norm": 0.9755607843399048, "learning_rate": 2.8343337334933973e-05, "loss": 0.5362, "step": 10836 }, { "epoch": 13.87136, "grad_norm": 0.9905216097831726, "learning_rate": 2.8341336534613848e-05, "loss": 0.5287, "step": 10837 }, { "epoch": 13.87264, "grad_norm": 0.9148777723312378, "learning_rate": 2.833933573429372e-05, "loss": 0.5432, "step": 10838 }, { "epoch": 13.87392, "grad_norm": 0.978702962398529, "learning_rate": 2.833733493397359e-05, "loss": 0.5745, "step": 10839 }, { "epoch": 13.8752, "grad_norm": 0.9320308566093445, "learning_rate": 2.8335334133653464e-05, "loss": 0.5186, "step": 10840 }, { "epoch": 13.87648, "grad_norm": 0.9999722838401794, "learning_rate": 2.8333333333333335e-05, "loss": 0.5527, "step": 10841 }, { "epoch": 13.87776, "grad_norm": 0.9607200026512146, "learning_rate": 2.8331332533013204e-05, "loss": 0.5737, "step": 10842 }, { "epoch": 13.87904, "grad_norm": 0.9080913662910461, "learning_rate": 2.8329331732693076e-05, "loss": 0.4989, "step": 10843 }, { "epoch": 13.88032, "grad_norm": 0.9405791759490967, "learning_rate": 2.8327330932372954e-05, "loss": 0.5243, "step": 10844 }, { "epoch": 13.8816, "grad_norm": 0.9514108896255493, "learning_rate": 2.8325330132052823e-05, "loss": 0.5607, "step": 10845 }, { "epoch": 13.88288, "grad_norm": 0.9832378029823303, "learning_rate": 2.8323329331732695e-05, "loss": 0.5726, "step": 10846 }, { "epoch": 13.88416, "grad_norm": 0.9148914813995361, "learning_rate": 2.8321328531412567e-05, "loss": 0.4917, "step": 10847 }, { "epoch": 13.88544, "grad_norm": 0.9236845970153809, "learning_rate": 2.831932773109244e-05, "loss": 0.5422, "step": 10848 }, { "epoch": 13.88672, "grad_norm": 0.8827064037322998, "learning_rate": 2.831732693077231e-05, "loss": 0.563, "step": 10849 }, { "epoch": 13.888, "grad_norm": 0.9374257922172546, "learning_rate": 2.831532613045218e-05, "loss": 0.5358, "step": 10850 }, { "epoch": 13.88928, "grad_norm": 1.0206034183502197, "learning_rate": 2.8313325330132057e-05, "loss": 0.5629, "step": 10851 }, { "epoch": 13.89056, "grad_norm": 0.9112451672554016, "learning_rate": 2.831132452981193e-05, "loss": 0.4899, "step": 10852 }, { "epoch": 13.89184, "grad_norm": 1.0080876350402832, "learning_rate": 2.8309323729491798e-05, "loss": 0.5934, "step": 10853 }, { "epoch": 13.89312, "grad_norm": 0.9685933589935303, "learning_rate": 2.830732292917167e-05, "loss": 0.5597, "step": 10854 }, { "epoch": 13.8944, "grad_norm": 0.9162622094154358, "learning_rate": 2.830532212885154e-05, "loss": 0.5136, "step": 10855 }, { "epoch": 13.89568, "grad_norm": 0.9283410310745239, "learning_rate": 2.8303321328531413e-05, "loss": 0.5515, "step": 10856 }, { "epoch": 13.89696, "grad_norm": 0.9511690139770508, "learning_rate": 2.8301320528211285e-05, "loss": 0.5062, "step": 10857 }, { "epoch": 13.89824, "grad_norm": 0.9601842761039734, "learning_rate": 2.829931972789116e-05, "loss": 0.5394, "step": 10858 }, { "epoch": 13.89952, "grad_norm": 0.946259617805481, "learning_rate": 2.8297318927571032e-05, "loss": 0.5835, "step": 10859 }, { "epoch": 13.9008, "grad_norm": 0.9118945598602295, "learning_rate": 2.8295318127250904e-05, "loss": 0.5668, "step": 10860 }, { "epoch": 13.90208, "grad_norm": 0.982496976852417, "learning_rate": 2.8293317326930773e-05, "loss": 0.5393, "step": 10861 }, { "epoch": 13.90336, "grad_norm": 0.9529083371162415, "learning_rate": 2.8291316526610644e-05, "loss": 0.5617, "step": 10862 }, { "epoch": 13.90464, "grad_norm": 0.9181329011917114, "learning_rate": 2.8289315726290516e-05, "loss": 0.5187, "step": 10863 }, { "epoch": 13.90592, "grad_norm": 0.9448521137237549, "learning_rate": 2.8287314925970388e-05, "loss": 0.5783, "step": 10864 }, { "epoch": 13.9072, "grad_norm": 0.9216022491455078, "learning_rate": 2.8285314125650263e-05, "loss": 0.5452, "step": 10865 }, { "epoch": 13.90848, "grad_norm": 0.9557605981826782, "learning_rate": 2.8283313325330135e-05, "loss": 0.5593, "step": 10866 }, { "epoch": 13.90976, "grad_norm": 0.9449323415756226, "learning_rate": 2.8281312525010007e-05, "loss": 0.5575, "step": 10867 }, { "epoch": 13.91104, "grad_norm": 0.945347249507904, "learning_rate": 2.827931172468988e-05, "loss": 0.5354, "step": 10868 }, { "epoch": 13.91232, "grad_norm": 0.9745570421218872, "learning_rate": 2.8277310924369747e-05, "loss": 0.5818, "step": 10869 }, { "epoch": 13.9136, "grad_norm": 0.9462365508079529, "learning_rate": 2.827531012404962e-05, "loss": 0.5278, "step": 10870 }, { "epoch": 13.91488, "grad_norm": 0.985939621925354, "learning_rate": 2.827330932372949e-05, "loss": 0.5727, "step": 10871 }, { "epoch": 13.91616, "grad_norm": 0.8899412751197815, "learning_rate": 2.8271308523409366e-05, "loss": 0.5279, "step": 10872 }, { "epoch": 13.91744, "grad_norm": 0.9762179851531982, "learning_rate": 2.8269307723089238e-05, "loss": 0.5406, "step": 10873 }, { "epoch": 13.91872, "grad_norm": 0.9601300358772278, "learning_rate": 2.826730692276911e-05, "loss": 0.5518, "step": 10874 }, { "epoch": 13.92, "grad_norm": 1.0480626821517944, "learning_rate": 2.8265306122448982e-05, "loss": 0.5649, "step": 10875 }, { "epoch": 13.92128, "grad_norm": 0.8842355012893677, "learning_rate": 2.8263305322128854e-05, "loss": 0.4951, "step": 10876 }, { "epoch": 13.92256, "grad_norm": 0.9062721729278564, "learning_rate": 2.8261304521808722e-05, "loss": 0.5746, "step": 10877 }, { "epoch": 13.92384, "grad_norm": 0.9198760986328125, "learning_rate": 2.8259303721488594e-05, "loss": 0.5183, "step": 10878 }, { "epoch": 13.92512, "grad_norm": 0.9441488981246948, "learning_rate": 2.8257302921168473e-05, "loss": 0.537, "step": 10879 }, { "epoch": 13.9264, "grad_norm": 0.9522190093994141, "learning_rate": 2.825530212084834e-05, "loss": 0.514, "step": 10880 }, { "epoch": 13.92768, "grad_norm": 0.9240538477897644, "learning_rate": 2.8253301320528213e-05, "loss": 0.5422, "step": 10881 }, { "epoch": 13.92896, "grad_norm": 0.9243524670600891, "learning_rate": 2.8251300520208085e-05, "loss": 0.5353, "step": 10882 }, { "epoch": 13.93024, "grad_norm": 0.9364755153656006, "learning_rate": 2.8249299719887957e-05, "loss": 0.5339, "step": 10883 }, { "epoch": 13.93152, "grad_norm": 0.9147524833679199, "learning_rate": 2.824729891956783e-05, "loss": 0.4999, "step": 10884 }, { "epoch": 13.9328, "grad_norm": 0.956943690776825, "learning_rate": 2.8245298119247697e-05, "loss": 0.531, "step": 10885 }, { "epoch": 13.93408, "grad_norm": 0.9214387536048889, "learning_rate": 2.8243297318927576e-05, "loss": 0.5403, "step": 10886 }, { "epoch": 13.93536, "grad_norm": 0.9463467001914978, "learning_rate": 2.8241296518607448e-05, "loss": 0.5471, "step": 10887 }, { "epoch": 13.93664, "grad_norm": 0.9077194929122925, "learning_rate": 2.8239295718287316e-05, "loss": 0.5406, "step": 10888 }, { "epoch": 13.93792, "grad_norm": 0.943578839302063, "learning_rate": 2.8237294917967188e-05, "loss": 0.5271, "step": 10889 }, { "epoch": 13.9392, "grad_norm": 0.9538133144378662, "learning_rate": 2.823529411764706e-05, "loss": 0.5552, "step": 10890 }, { "epoch": 13.94048, "grad_norm": 0.9875748157501221, "learning_rate": 2.823329331732693e-05, "loss": 0.562, "step": 10891 }, { "epoch": 13.94176, "grad_norm": 0.8694930076599121, "learning_rate": 2.8231292517006803e-05, "loss": 0.4823, "step": 10892 }, { "epoch": 13.94304, "grad_norm": 0.9158459305763245, "learning_rate": 2.8229291716686672e-05, "loss": 0.5413, "step": 10893 }, { "epoch": 13.94432, "grad_norm": 0.9385559558868408, "learning_rate": 2.822729091636655e-05, "loss": 0.5901, "step": 10894 }, { "epoch": 13.9456, "grad_norm": 0.9433490633964539, "learning_rate": 2.8225290116046422e-05, "loss": 0.5487, "step": 10895 }, { "epoch": 13.94688, "grad_norm": 0.9610795378684998, "learning_rate": 2.822328931572629e-05, "loss": 0.5375, "step": 10896 }, { "epoch": 13.94816, "grad_norm": 0.9676675796508789, "learning_rate": 2.8221288515406163e-05, "loss": 0.5581, "step": 10897 }, { "epoch": 13.94944, "grad_norm": 0.9557973742485046, "learning_rate": 2.8219287715086035e-05, "loss": 0.5599, "step": 10898 }, { "epoch": 13.95072, "grad_norm": 0.9611872434616089, "learning_rate": 2.8217286914765906e-05, "loss": 0.5688, "step": 10899 }, { "epoch": 13.952, "grad_norm": 0.9648937582969666, "learning_rate": 2.8215286114445778e-05, "loss": 0.5356, "step": 10900 }, { "epoch": 13.95328, "grad_norm": 0.9507505893707275, "learning_rate": 2.8213285314125654e-05, "loss": 0.524, "step": 10901 }, { "epoch": 13.95456, "grad_norm": 0.9989834427833557, "learning_rate": 2.8211284513805525e-05, "loss": 0.5785, "step": 10902 }, { "epoch": 13.95584, "grad_norm": 0.9809247851371765, "learning_rate": 2.8209283713485397e-05, "loss": 0.5722, "step": 10903 }, { "epoch": 13.95712, "grad_norm": 0.9577847123146057, "learning_rate": 2.8207282913165266e-05, "loss": 0.5891, "step": 10904 }, { "epoch": 13.9584, "grad_norm": 1.0070008039474487, "learning_rate": 2.8205282112845138e-05, "loss": 0.6114, "step": 10905 }, { "epoch": 13.95968, "grad_norm": 0.9523963928222656, "learning_rate": 2.820328131252501e-05, "loss": 0.545, "step": 10906 }, { "epoch": 13.96096, "grad_norm": 0.9290968179702759, "learning_rate": 2.820128051220488e-05, "loss": 0.5581, "step": 10907 }, { "epoch": 13.96224, "grad_norm": 0.8966550827026367, "learning_rate": 2.8199279711884757e-05, "loss": 0.5651, "step": 10908 }, { "epoch": 13.96352, "grad_norm": 0.9531732797622681, "learning_rate": 2.819727891156463e-05, "loss": 0.534, "step": 10909 }, { "epoch": 13.9648, "grad_norm": 0.9635023474693298, "learning_rate": 2.81952781112445e-05, "loss": 0.5338, "step": 10910 }, { "epoch": 13.96608, "grad_norm": 0.950612485408783, "learning_rate": 2.8193277310924372e-05, "loss": 0.5763, "step": 10911 }, { "epoch": 13.96736, "grad_norm": 0.9613419771194458, "learning_rate": 2.819127651060424e-05, "loss": 0.6066, "step": 10912 }, { "epoch": 13.96864, "grad_norm": 0.9048581123352051, "learning_rate": 2.8189275710284112e-05, "loss": 0.5309, "step": 10913 }, { "epoch": 13.96992, "grad_norm": 0.931669294834137, "learning_rate": 2.8187274909963984e-05, "loss": 0.5653, "step": 10914 }, { "epoch": 13.9712, "grad_norm": 0.9555082321166992, "learning_rate": 2.818527410964386e-05, "loss": 0.5659, "step": 10915 }, { "epoch": 13.972480000000001, "grad_norm": 0.9597335457801819, "learning_rate": 2.818327330932373e-05, "loss": 0.5385, "step": 10916 }, { "epoch": 13.97376, "grad_norm": 1.0083085298538208, "learning_rate": 2.8181272509003603e-05, "loss": 0.5929, "step": 10917 }, { "epoch": 13.97504, "grad_norm": 1.0068410634994507, "learning_rate": 2.8179271708683475e-05, "loss": 0.5864, "step": 10918 }, { "epoch": 13.97632, "grad_norm": 0.9447670578956604, "learning_rate": 2.8177270908363347e-05, "loss": 0.5239, "step": 10919 }, { "epoch": 13.9776, "grad_norm": 0.9446542263031006, "learning_rate": 2.8175270108043215e-05, "loss": 0.5369, "step": 10920 }, { "epoch": 13.97888, "grad_norm": 0.9933118224143982, "learning_rate": 2.8173269307723087e-05, "loss": 0.5385, "step": 10921 }, { "epoch": 13.98016, "grad_norm": 0.9696182012557983, "learning_rate": 2.8171268507402966e-05, "loss": 0.5226, "step": 10922 }, { "epoch": 13.98144, "grad_norm": 0.9453620314598083, "learning_rate": 2.8169267707082834e-05, "loss": 0.5276, "step": 10923 }, { "epoch": 13.98272, "grad_norm": 0.9572956562042236, "learning_rate": 2.8167266906762706e-05, "loss": 0.5629, "step": 10924 }, { "epoch": 13.984, "grad_norm": 0.9128978252410889, "learning_rate": 2.8165266106442578e-05, "loss": 0.5435, "step": 10925 }, { "epoch": 13.98528, "grad_norm": 0.926874577999115, "learning_rate": 2.816326530612245e-05, "loss": 0.5317, "step": 10926 }, { "epoch": 13.98656, "grad_norm": 1.0055192708969116, "learning_rate": 2.8161264505802322e-05, "loss": 0.6023, "step": 10927 }, { "epoch": 13.98784, "grad_norm": 0.9463496804237366, "learning_rate": 2.815926370548219e-05, "loss": 0.5565, "step": 10928 }, { "epoch": 13.98912, "grad_norm": 0.9550392031669617, "learning_rate": 2.815726290516207e-05, "loss": 0.5601, "step": 10929 }, { "epoch": 13.9904, "grad_norm": 0.9578831791877747, "learning_rate": 2.815526210484194e-05, "loss": 0.5703, "step": 10930 }, { "epoch": 13.99168, "grad_norm": 0.9312989711761475, "learning_rate": 2.815326130452181e-05, "loss": 0.5328, "step": 10931 }, { "epoch": 13.99296, "grad_norm": 1.000938057899475, "learning_rate": 2.815126050420168e-05, "loss": 0.5901, "step": 10932 }, { "epoch": 13.99424, "grad_norm": 0.9439278244972229, "learning_rate": 2.8149259703881553e-05, "loss": 0.5381, "step": 10933 }, { "epoch": 13.995519999999999, "grad_norm": 0.9305405616760254, "learning_rate": 2.8147258903561425e-05, "loss": 0.5736, "step": 10934 }, { "epoch": 13.9968, "grad_norm": 0.8652977347373962, "learning_rate": 2.8145258103241297e-05, "loss": 0.5142, "step": 10935 }, { "epoch": 13.99808, "grad_norm": 0.9089574217796326, "learning_rate": 2.8143257302921172e-05, "loss": 0.506, "step": 10936 }, { "epoch": 13.99936, "grad_norm": 0.9164632558822632, "learning_rate": 2.8141256502601044e-05, "loss": 0.5394, "step": 10937 }, { "epoch": 14.00064, "grad_norm": 1.9779025316238403, "learning_rate": 2.8139255702280916e-05, "loss": 0.969, "step": 10938 }, { "epoch": 14.00192, "grad_norm": 0.9149560928344727, "learning_rate": 2.8137254901960784e-05, "loss": 0.5095, "step": 10939 }, { "epoch": 14.0032, "grad_norm": 0.9277960658073425, "learning_rate": 2.8135254101640656e-05, "loss": 0.5741, "step": 10940 }, { "epoch": 14.00448, "grad_norm": 0.9176803827285767, "learning_rate": 2.8133253301320528e-05, "loss": 0.4634, "step": 10941 }, { "epoch": 14.00576, "grad_norm": 0.926750123500824, "learning_rate": 2.81312525010004e-05, "loss": 0.5669, "step": 10942 }, { "epoch": 14.00704, "grad_norm": 0.8823717832565308, "learning_rate": 2.8129251700680275e-05, "loss": 0.4973, "step": 10943 }, { "epoch": 14.00832, "grad_norm": 0.9568257927894592, "learning_rate": 2.8127250900360147e-05, "loss": 0.5307, "step": 10944 }, { "epoch": 14.0096, "grad_norm": 0.9589383006095886, "learning_rate": 2.812525010004002e-05, "loss": 0.5054, "step": 10945 }, { "epoch": 14.01088, "grad_norm": 0.9462265968322754, "learning_rate": 2.812324929971989e-05, "loss": 0.5156, "step": 10946 }, { "epoch": 14.01216, "grad_norm": 0.9722265601158142, "learning_rate": 2.812124849939976e-05, "loss": 0.504, "step": 10947 }, { "epoch": 14.01344, "grad_norm": 0.9647181034088135, "learning_rate": 2.811924769907963e-05, "loss": 0.5248, "step": 10948 }, { "epoch": 14.01472, "grad_norm": 0.9778644442558289, "learning_rate": 2.8117246898759503e-05, "loss": 0.505, "step": 10949 }, { "epoch": 14.016, "grad_norm": 0.9715518355369568, "learning_rate": 2.8115246098439378e-05, "loss": 0.5486, "step": 10950 }, { "epoch": 14.01728, "grad_norm": 0.9561265707015991, "learning_rate": 2.811324529811925e-05, "loss": 0.5333, "step": 10951 }, { "epoch": 14.01856, "grad_norm": 0.9769052863121033, "learning_rate": 2.811124449779912e-05, "loss": 0.5561, "step": 10952 }, { "epoch": 14.01984, "grad_norm": 0.9301590323448181, "learning_rate": 2.8109243697478993e-05, "loss": 0.505, "step": 10953 }, { "epoch": 14.02112, "grad_norm": 0.9344640374183655, "learning_rate": 2.8107242897158865e-05, "loss": 0.5225, "step": 10954 }, { "epoch": 14.0224, "grad_norm": 0.9300322532653809, "learning_rate": 2.8105242096838734e-05, "loss": 0.494, "step": 10955 }, { "epoch": 14.02368, "grad_norm": 0.9003793001174927, "learning_rate": 2.8103241296518606e-05, "loss": 0.4933, "step": 10956 }, { "epoch": 14.02496, "grad_norm": 0.9156431555747986, "learning_rate": 2.8101240496198484e-05, "loss": 0.5131, "step": 10957 }, { "epoch": 14.02624, "grad_norm": 0.929981529712677, "learning_rate": 2.8099239695878353e-05, "loss": 0.5237, "step": 10958 }, { "epoch": 14.02752, "grad_norm": 0.9370753169059753, "learning_rate": 2.8097238895558225e-05, "loss": 0.4931, "step": 10959 }, { "epoch": 14.0288, "grad_norm": 0.9672894477844238, "learning_rate": 2.8095238095238096e-05, "loss": 0.4916, "step": 10960 }, { "epoch": 14.03008, "grad_norm": 0.9758133888244629, "learning_rate": 2.809323729491797e-05, "loss": 0.5458, "step": 10961 }, { "epoch": 14.03136, "grad_norm": 1.002359390258789, "learning_rate": 2.809123649459784e-05, "loss": 0.5633, "step": 10962 }, { "epoch": 14.03264, "grad_norm": 0.9776928424835205, "learning_rate": 2.808923569427771e-05, "loss": 0.5331, "step": 10963 }, { "epoch": 14.03392, "grad_norm": 0.9960092306137085, "learning_rate": 2.8087234893957587e-05, "loss": 0.5458, "step": 10964 }, { "epoch": 14.0352, "grad_norm": 0.9920620918273926, "learning_rate": 2.808523409363746e-05, "loss": 0.5853, "step": 10965 }, { "epoch": 14.03648, "grad_norm": 0.9721470475196838, "learning_rate": 2.8083233293317328e-05, "loss": 0.532, "step": 10966 }, { "epoch": 14.03776, "grad_norm": 0.8846380710601807, "learning_rate": 2.80812324929972e-05, "loss": 0.4606, "step": 10967 }, { "epoch": 14.03904, "grad_norm": 0.934118926525116, "learning_rate": 2.807923169267707e-05, "loss": 0.4849, "step": 10968 }, { "epoch": 14.04032, "grad_norm": 0.944482147693634, "learning_rate": 2.8077230892356943e-05, "loss": 0.521, "step": 10969 }, { "epoch": 14.0416, "grad_norm": 0.9114336967468262, "learning_rate": 2.8075230092036815e-05, "loss": 0.5248, "step": 10970 }, { "epoch": 14.04288, "grad_norm": 0.9594119787216187, "learning_rate": 2.807322929171669e-05, "loss": 0.5069, "step": 10971 }, { "epoch": 14.04416, "grad_norm": 0.9807631373405457, "learning_rate": 2.8071228491396562e-05, "loss": 0.5334, "step": 10972 }, { "epoch": 14.04544, "grad_norm": 0.9795427322387695, "learning_rate": 2.8069227691076434e-05, "loss": 0.5194, "step": 10973 }, { "epoch": 14.04672, "grad_norm": 0.9742780923843384, "learning_rate": 2.8067226890756302e-05, "loss": 0.5439, "step": 10974 }, { "epoch": 14.048, "grad_norm": 0.9774748682975769, "learning_rate": 2.8065226090436174e-05, "loss": 0.5262, "step": 10975 }, { "epoch": 14.04928, "grad_norm": 1.0334399938583374, "learning_rate": 2.8063225290116046e-05, "loss": 0.56, "step": 10976 }, { "epoch": 14.05056, "grad_norm": 1.013529896736145, "learning_rate": 2.8061224489795918e-05, "loss": 0.5665, "step": 10977 }, { "epoch": 14.05184, "grad_norm": 0.9661319255828857, "learning_rate": 2.8059223689475793e-05, "loss": 0.5385, "step": 10978 }, { "epoch": 14.05312, "grad_norm": 0.9551675915718079, "learning_rate": 2.8057222889155665e-05, "loss": 0.533, "step": 10979 }, { "epoch": 14.0544, "grad_norm": 0.9196348786354065, "learning_rate": 2.8055222088835537e-05, "loss": 0.5153, "step": 10980 }, { "epoch": 14.05568, "grad_norm": 0.9740005135536194, "learning_rate": 2.805322128851541e-05, "loss": 0.5095, "step": 10981 }, { "epoch": 14.05696, "grad_norm": 0.9844241142272949, "learning_rate": 2.8051220488195277e-05, "loss": 0.5497, "step": 10982 }, { "epoch": 14.05824, "grad_norm": 0.9591256380081177, "learning_rate": 2.804921968787515e-05, "loss": 0.5108, "step": 10983 }, { "epoch": 14.05952, "grad_norm": 0.9638150334358215, "learning_rate": 2.804721888755502e-05, "loss": 0.5081, "step": 10984 }, { "epoch": 14.0608, "grad_norm": 0.9753637909889221, "learning_rate": 2.8045218087234896e-05, "loss": 0.4962, "step": 10985 }, { "epoch": 14.06208, "grad_norm": 0.9755197167396545, "learning_rate": 2.8043217286914768e-05, "loss": 0.5239, "step": 10986 }, { "epoch": 14.06336, "grad_norm": 1.000517725944519, "learning_rate": 2.804121648659464e-05, "loss": 0.5282, "step": 10987 }, { "epoch": 14.06464, "grad_norm": 0.9386008381843567, "learning_rate": 2.8039215686274512e-05, "loss": 0.5275, "step": 10988 }, { "epoch": 14.06592, "grad_norm": 0.9570558667182922, "learning_rate": 2.8037214885954384e-05, "loss": 0.5064, "step": 10989 }, { "epoch": 14.0672, "grad_norm": 0.955383837223053, "learning_rate": 2.8035214085634252e-05, "loss": 0.5152, "step": 10990 }, { "epoch": 14.06848, "grad_norm": 0.9390836954116821, "learning_rate": 2.8033213285314124e-05, "loss": 0.4959, "step": 10991 }, { "epoch": 14.06976, "grad_norm": 0.9478831887245178, "learning_rate": 2.8031212484994003e-05, "loss": 0.5058, "step": 10992 }, { "epoch": 14.07104, "grad_norm": 0.9145018458366394, "learning_rate": 2.802921168467387e-05, "loss": 0.4909, "step": 10993 }, { "epoch": 14.07232, "grad_norm": 0.9584047198295593, "learning_rate": 2.8027210884353743e-05, "loss": 0.5112, "step": 10994 }, { "epoch": 14.0736, "grad_norm": 0.973054826259613, "learning_rate": 2.8025210084033615e-05, "loss": 0.5226, "step": 10995 }, { "epoch": 14.07488, "grad_norm": 0.9144132733345032, "learning_rate": 2.8023209283713487e-05, "loss": 0.4899, "step": 10996 }, { "epoch": 14.07616, "grad_norm": 0.9813609719276428, "learning_rate": 2.802120848339336e-05, "loss": 0.5154, "step": 10997 }, { "epoch": 14.07744, "grad_norm": 0.9886991381645203, "learning_rate": 2.8019207683073227e-05, "loss": 0.5467, "step": 10998 }, { "epoch": 14.07872, "grad_norm": 0.9517562985420227, "learning_rate": 2.8017206882753106e-05, "loss": 0.5436, "step": 10999 }, { "epoch": 14.08, "grad_norm": 0.9583877325057983, "learning_rate": 2.8015206082432978e-05, "loss": 0.4949, "step": 11000 }, { "epoch": 14.08128, "grad_norm": 0.988237738609314, "learning_rate": 2.8013205282112846e-05, "loss": 0.5509, "step": 11001 }, { "epoch": 14.08256, "grad_norm": 0.956883430480957, "learning_rate": 2.8011204481792718e-05, "loss": 0.5071, "step": 11002 }, { "epoch": 14.08384, "grad_norm": 0.9071076512336731, "learning_rate": 2.800920368147259e-05, "loss": 0.4785, "step": 11003 }, { "epoch": 14.08512, "grad_norm": 0.9783008098602295, "learning_rate": 2.800720288115246e-05, "loss": 0.5356, "step": 11004 }, { "epoch": 14.0864, "grad_norm": 0.9609231352806091, "learning_rate": 2.8005202080832333e-05, "loss": 0.5093, "step": 11005 }, { "epoch": 14.08768, "grad_norm": 0.9345542192459106, "learning_rate": 2.8003201280512202e-05, "loss": 0.5265, "step": 11006 }, { "epoch": 14.08896, "grad_norm": 0.94151371717453, "learning_rate": 2.800120048019208e-05, "loss": 0.5561, "step": 11007 }, { "epoch": 14.09024, "grad_norm": 0.9789606928825378, "learning_rate": 2.7999199679871952e-05, "loss": 0.56, "step": 11008 }, { "epoch": 14.09152, "grad_norm": 1.0095890760421753, "learning_rate": 2.799719887955182e-05, "loss": 0.5189, "step": 11009 }, { "epoch": 14.0928, "grad_norm": 0.9807612895965576, "learning_rate": 2.7995198079231693e-05, "loss": 0.5041, "step": 11010 }, { "epoch": 14.09408, "grad_norm": 0.9574481844902039, "learning_rate": 2.7993197278911565e-05, "loss": 0.5085, "step": 11011 }, { "epoch": 14.09536, "grad_norm": 0.9162589311599731, "learning_rate": 2.7991196478591436e-05, "loss": 0.5223, "step": 11012 }, { "epoch": 14.09664, "grad_norm": 0.9016433358192444, "learning_rate": 2.7989195678271308e-05, "loss": 0.5148, "step": 11013 }, { "epoch": 14.09792, "grad_norm": 0.9376025199890137, "learning_rate": 2.7987194877951184e-05, "loss": 0.5246, "step": 11014 }, { "epoch": 14.0992, "grad_norm": 0.9752246737480164, "learning_rate": 2.7985194077631055e-05, "loss": 0.5176, "step": 11015 }, { "epoch": 14.10048, "grad_norm": 0.9751932621002197, "learning_rate": 2.7983193277310927e-05, "loss": 0.5399, "step": 11016 }, { "epoch": 14.10176, "grad_norm": 0.8926282525062561, "learning_rate": 2.7981192476990796e-05, "loss": 0.4618, "step": 11017 }, { "epoch": 14.10304, "grad_norm": 0.9403905272483826, "learning_rate": 2.7979191676670668e-05, "loss": 0.5345, "step": 11018 }, { "epoch": 14.10432, "grad_norm": 0.9546553492546082, "learning_rate": 2.797719087635054e-05, "loss": 0.5458, "step": 11019 }, { "epoch": 14.1056, "grad_norm": 0.9716793298721313, "learning_rate": 2.797519007603041e-05, "loss": 0.5325, "step": 11020 }, { "epoch": 14.10688, "grad_norm": 0.9732063412666321, "learning_rate": 2.7973189275710287e-05, "loss": 0.5074, "step": 11021 }, { "epoch": 14.10816, "grad_norm": 0.9816451072692871, "learning_rate": 2.797118847539016e-05, "loss": 0.5202, "step": 11022 }, { "epoch": 14.10944, "grad_norm": 0.9631067514419556, "learning_rate": 2.796918767507003e-05, "loss": 0.5571, "step": 11023 }, { "epoch": 14.11072, "grad_norm": 0.9688462018966675, "learning_rate": 2.7967186874749902e-05, "loss": 0.5302, "step": 11024 }, { "epoch": 14.112, "grad_norm": 1.0027779340744019, "learning_rate": 2.796518607442977e-05, "loss": 0.5303, "step": 11025 }, { "epoch": 14.11328, "grad_norm": 0.9702111482620239, "learning_rate": 2.7963185274109642e-05, "loss": 0.5138, "step": 11026 }, { "epoch": 14.11456, "grad_norm": 1.0045280456542969, "learning_rate": 2.7961184473789514e-05, "loss": 0.5441, "step": 11027 }, { "epoch": 14.11584, "grad_norm": 0.9333431720733643, "learning_rate": 2.7959183673469393e-05, "loss": 0.5327, "step": 11028 }, { "epoch": 14.11712, "grad_norm": 0.9155805110931396, "learning_rate": 2.795718287314926e-05, "loss": 0.4828, "step": 11029 }, { "epoch": 14.1184, "grad_norm": 0.9476427435874939, "learning_rate": 2.7955182072829133e-05, "loss": 0.4894, "step": 11030 }, { "epoch": 14.11968, "grad_norm": 0.9640700817108154, "learning_rate": 2.7953181272509005e-05, "loss": 0.5322, "step": 11031 }, { "epoch": 14.12096, "grad_norm": 0.9432242512702942, "learning_rate": 2.7951180472188877e-05, "loss": 0.5413, "step": 11032 }, { "epoch": 14.12224, "grad_norm": 0.9809675216674805, "learning_rate": 2.7949179671868745e-05, "loss": 0.5346, "step": 11033 }, { "epoch": 14.12352, "grad_norm": 0.9683305621147156, "learning_rate": 2.7947178871548617e-05, "loss": 0.5097, "step": 11034 }, { "epoch": 14.1248, "grad_norm": 0.9600328803062439, "learning_rate": 2.7945178071228496e-05, "loss": 0.5013, "step": 11035 }, { "epoch": 14.12608, "grad_norm": 0.9506182670593262, "learning_rate": 2.7943177270908368e-05, "loss": 0.491, "step": 11036 }, { "epoch": 14.12736, "grad_norm": 0.9323163628578186, "learning_rate": 2.7941176470588236e-05, "loss": 0.5299, "step": 11037 }, { "epoch": 14.12864, "grad_norm": 0.9958814978599548, "learning_rate": 2.7939175670268108e-05, "loss": 0.5498, "step": 11038 }, { "epoch": 14.12992, "grad_norm": 0.9677280187606812, "learning_rate": 2.793717486994798e-05, "loss": 0.5417, "step": 11039 }, { "epoch": 14.1312, "grad_norm": 0.9545553922653198, "learning_rate": 2.7935174069627852e-05, "loss": 0.4917, "step": 11040 }, { "epoch": 14.13248, "grad_norm": 0.9735832214355469, "learning_rate": 2.793317326930772e-05, "loss": 0.5313, "step": 11041 }, { "epoch": 14.13376, "grad_norm": 0.9970044493675232, "learning_rate": 2.79311724689876e-05, "loss": 0.5481, "step": 11042 }, { "epoch": 14.13504, "grad_norm": 0.9880550503730774, "learning_rate": 2.792917166866747e-05, "loss": 0.5117, "step": 11043 }, { "epoch": 14.13632, "grad_norm": 0.9498565196990967, "learning_rate": 2.7927170868347343e-05, "loss": 0.5318, "step": 11044 }, { "epoch": 14.1376, "grad_norm": 0.9987189173698425, "learning_rate": 2.792517006802721e-05, "loss": 0.5665, "step": 11045 }, { "epoch": 14.13888, "grad_norm": 0.9754243493080139, "learning_rate": 2.7923169267707083e-05, "loss": 0.5245, "step": 11046 }, { "epoch": 14.14016, "grad_norm": 0.9672796130180359, "learning_rate": 2.7921168467386955e-05, "loss": 0.5555, "step": 11047 }, { "epoch": 14.14144, "grad_norm": 0.9213780760765076, "learning_rate": 2.7919167667066827e-05, "loss": 0.4929, "step": 11048 }, { "epoch": 14.14272, "grad_norm": 0.893765389919281, "learning_rate": 2.7917166866746702e-05, "loss": 0.4783, "step": 11049 }, { "epoch": 14.144, "grad_norm": 0.9702581763267517, "learning_rate": 2.7915166066426574e-05, "loss": 0.5511, "step": 11050 }, { "epoch": 14.14528, "grad_norm": 0.9254672527313232, "learning_rate": 2.7913165266106446e-05, "loss": 0.5087, "step": 11051 }, { "epoch": 14.14656, "grad_norm": 0.9144052267074585, "learning_rate": 2.7911164465786317e-05, "loss": 0.5502, "step": 11052 }, { "epoch": 14.14784, "grad_norm": 0.937017560005188, "learning_rate": 2.7909163665466186e-05, "loss": 0.5214, "step": 11053 }, { "epoch": 14.14912, "grad_norm": 0.9533378481864929, "learning_rate": 2.7907162865146058e-05, "loss": 0.5256, "step": 11054 }, { "epoch": 14.1504, "grad_norm": 1.019874095916748, "learning_rate": 2.790516206482593e-05, "loss": 0.5798, "step": 11055 }, { "epoch": 14.15168, "grad_norm": 1.0000838041305542, "learning_rate": 2.7903161264505805e-05, "loss": 0.5462, "step": 11056 }, { "epoch": 14.15296, "grad_norm": 0.9633542895317078, "learning_rate": 2.7901160464185677e-05, "loss": 0.5429, "step": 11057 }, { "epoch": 14.15424, "grad_norm": 0.929341733455658, "learning_rate": 2.789915966386555e-05, "loss": 0.5231, "step": 11058 }, { "epoch": 14.15552, "grad_norm": 0.9698876142501831, "learning_rate": 2.789715886354542e-05, "loss": 0.5094, "step": 11059 }, { "epoch": 14.1568, "grad_norm": 0.9775946736335754, "learning_rate": 2.7895158063225292e-05, "loss": 0.5122, "step": 11060 }, { "epoch": 14.15808, "grad_norm": 0.991977870464325, "learning_rate": 2.789315726290516e-05, "loss": 0.5952, "step": 11061 }, { "epoch": 14.15936, "grad_norm": 0.9593073129653931, "learning_rate": 2.7891156462585033e-05, "loss": 0.4648, "step": 11062 }, { "epoch": 14.16064, "grad_norm": 0.9783948063850403, "learning_rate": 2.788915566226491e-05, "loss": 0.5765, "step": 11063 }, { "epoch": 14.16192, "grad_norm": 0.982947826385498, "learning_rate": 2.788715486194478e-05, "loss": 0.5264, "step": 11064 }, { "epoch": 14.1632, "grad_norm": 0.9915079474449158, "learning_rate": 2.788515406162465e-05, "loss": 0.5371, "step": 11065 }, { "epoch": 14.16448, "grad_norm": 0.9839328527450562, "learning_rate": 2.7883153261304523e-05, "loss": 0.5434, "step": 11066 }, { "epoch": 14.16576, "grad_norm": 0.9635565280914307, "learning_rate": 2.7881152460984395e-05, "loss": 0.5017, "step": 11067 }, { "epoch": 14.16704, "grad_norm": 0.9151342511177063, "learning_rate": 2.7879151660664267e-05, "loss": 0.5008, "step": 11068 }, { "epoch": 14.16832, "grad_norm": 0.9379957914352417, "learning_rate": 2.7877150860344136e-05, "loss": 0.5265, "step": 11069 }, { "epoch": 14.1696, "grad_norm": 0.9489511251449585, "learning_rate": 2.7875150060024014e-05, "loss": 0.4828, "step": 11070 }, { "epoch": 14.17088, "grad_norm": 0.9574431777000427, "learning_rate": 2.7873149259703886e-05, "loss": 0.5325, "step": 11071 }, { "epoch": 14.17216, "grad_norm": 0.9298610091209412, "learning_rate": 2.7871148459383755e-05, "loss": 0.5055, "step": 11072 }, { "epoch": 14.17344, "grad_norm": 1.00288724899292, "learning_rate": 2.7869147659063626e-05, "loss": 0.5837, "step": 11073 }, { "epoch": 14.17472, "grad_norm": 0.9784044623374939, "learning_rate": 2.78671468587435e-05, "loss": 0.5425, "step": 11074 }, { "epoch": 14.176, "grad_norm": 0.9960471391677856, "learning_rate": 2.786514605842337e-05, "loss": 0.5137, "step": 11075 }, { "epoch": 14.17728, "grad_norm": 1.01795494556427, "learning_rate": 2.7863145258103242e-05, "loss": 0.5483, "step": 11076 }, { "epoch": 14.17856, "grad_norm": 1.0112005472183228, "learning_rate": 2.7861144457783117e-05, "loss": 0.5657, "step": 11077 }, { "epoch": 14.17984, "grad_norm": 0.9345349073410034, "learning_rate": 2.785914365746299e-05, "loss": 0.5126, "step": 11078 }, { "epoch": 14.18112, "grad_norm": 0.9684646725654602, "learning_rate": 2.785714285714286e-05, "loss": 0.4951, "step": 11079 }, { "epoch": 14.1824, "grad_norm": 0.9888444542884827, "learning_rate": 2.785514205682273e-05, "loss": 0.5313, "step": 11080 }, { "epoch": 14.18368, "grad_norm": 0.9643538594245911, "learning_rate": 2.78531412565026e-05, "loss": 0.5066, "step": 11081 }, { "epoch": 14.18496, "grad_norm": 1.0346754789352417, "learning_rate": 2.7851140456182473e-05, "loss": 0.5683, "step": 11082 }, { "epoch": 14.18624, "grad_norm": 0.9812901020050049, "learning_rate": 2.7849139655862345e-05, "loss": 0.5395, "step": 11083 }, { "epoch": 14.18752, "grad_norm": 1.0163651704788208, "learning_rate": 2.784713885554222e-05, "loss": 0.5592, "step": 11084 }, { "epoch": 14.1888, "grad_norm": 0.9411138892173767, "learning_rate": 2.7845138055222092e-05, "loss": 0.5013, "step": 11085 }, { "epoch": 14.19008, "grad_norm": 0.9784141182899475, "learning_rate": 2.7843137254901964e-05, "loss": 0.5097, "step": 11086 }, { "epoch": 14.19136, "grad_norm": 0.9341786503791809, "learning_rate": 2.7841136454581836e-05, "loss": 0.5253, "step": 11087 }, { "epoch": 14.19264, "grad_norm": 0.9971571564674377, "learning_rate": 2.7839135654261704e-05, "loss": 0.5746, "step": 11088 }, { "epoch": 14.19392, "grad_norm": 0.9248690009117126, "learning_rate": 2.7837134853941576e-05, "loss": 0.5079, "step": 11089 }, { "epoch": 14.1952, "grad_norm": 0.9347603917121887, "learning_rate": 2.7835134053621448e-05, "loss": 0.5212, "step": 11090 }, { "epoch": 14.19648, "grad_norm": 0.9436104893684387, "learning_rate": 2.7833133253301323e-05, "loss": 0.5332, "step": 11091 }, { "epoch": 14.19776, "grad_norm": 0.9841783046722412, "learning_rate": 2.7831132452981195e-05, "loss": 0.524, "step": 11092 }, { "epoch": 14.19904, "grad_norm": 0.9630305171012878, "learning_rate": 2.7829131652661067e-05, "loss": 0.5511, "step": 11093 }, { "epoch": 14.20032, "grad_norm": 0.9855948090553284, "learning_rate": 2.782713085234094e-05, "loss": 0.573, "step": 11094 }, { "epoch": 14.2016, "grad_norm": 0.9675716161727905, "learning_rate": 2.782513005202081e-05, "loss": 0.5066, "step": 11095 }, { "epoch": 14.20288, "grad_norm": 1.0122812986373901, "learning_rate": 2.782312925170068e-05, "loss": 0.59, "step": 11096 }, { "epoch": 14.20416, "grad_norm": 0.9795871376991272, "learning_rate": 2.782112845138055e-05, "loss": 0.5633, "step": 11097 }, { "epoch": 14.20544, "grad_norm": 0.9450654983520508, "learning_rate": 2.781912765106043e-05, "loss": 0.5224, "step": 11098 }, { "epoch": 14.20672, "grad_norm": 0.9646390080451965, "learning_rate": 2.7817126850740298e-05, "loss": 0.5386, "step": 11099 }, { "epoch": 14.208, "grad_norm": 0.9584577679634094, "learning_rate": 2.781512605042017e-05, "loss": 0.493, "step": 11100 }, { "epoch": 14.20928, "grad_norm": 1.0012218952178955, "learning_rate": 2.7813125250100042e-05, "loss": 0.5086, "step": 11101 }, { "epoch": 14.21056, "grad_norm": 0.9794235229492188, "learning_rate": 2.7811124449779914e-05, "loss": 0.5184, "step": 11102 }, { "epoch": 14.21184, "grad_norm": 0.9602649211883545, "learning_rate": 2.7809123649459786e-05, "loss": 0.558, "step": 11103 }, { "epoch": 14.21312, "grad_norm": 0.90842205286026, "learning_rate": 2.7807122849139654e-05, "loss": 0.4799, "step": 11104 }, { "epoch": 14.2144, "grad_norm": 0.9867070913314819, "learning_rate": 2.7805122048819533e-05, "loss": 0.5553, "step": 11105 }, { "epoch": 14.21568, "grad_norm": 0.9673285484313965, "learning_rate": 2.7803121248499405e-05, "loss": 0.5077, "step": 11106 }, { "epoch": 14.21696, "grad_norm": 1.000418782234192, "learning_rate": 2.7801120448179273e-05, "loss": 0.5389, "step": 11107 }, { "epoch": 14.21824, "grad_norm": 0.9656979441642761, "learning_rate": 2.7799119647859145e-05, "loss": 0.5318, "step": 11108 }, { "epoch": 14.21952, "grad_norm": 0.9900228381156921, "learning_rate": 2.7797118847539017e-05, "loss": 0.5487, "step": 11109 }, { "epoch": 14.2208, "grad_norm": 0.9100193977355957, "learning_rate": 2.779511804721889e-05, "loss": 0.4788, "step": 11110 }, { "epoch": 14.22208, "grad_norm": 0.9718891978263855, "learning_rate": 2.779311724689876e-05, "loss": 0.5599, "step": 11111 }, { "epoch": 14.22336, "grad_norm": 1.0352201461791992, "learning_rate": 2.7791116446578636e-05, "loss": 0.5693, "step": 11112 }, { "epoch": 14.22464, "grad_norm": 0.9154449105262756, "learning_rate": 2.7789115646258508e-05, "loss": 0.5197, "step": 11113 }, { "epoch": 14.22592, "grad_norm": 0.9313166737556458, "learning_rate": 2.778711484593838e-05, "loss": 0.5425, "step": 11114 }, { "epoch": 14.2272, "grad_norm": 0.9067119359970093, "learning_rate": 2.7785114045618248e-05, "loss": 0.4738, "step": 11115 }, { "epoch": 14.22848, "grad_norm": 0.919452428817749, "learning_rate": 2.778311324529812e-05, "loss": 0.4808, "step": 11116 }, { "epoch": 14.22976, "grad_norm": 0.9764316082000732, "learning_rate": 2.778111244497799e-05, "loss": 0.5508, "step": 11117 }, { "epoch": 14.23104, "grad_norm": 1.002416968345642, "learning_rate": 2.7779111644657863e-05, "loss": 0.5727, "step": 11118 }, { "epoch": 14.23232, "grad_norm": 0.9473809599876404, "learning_rate": 2.7777110844337735e-05, "loss": 0.515, "step": 11119 }, { "epoch": 14.2336, "grad_norm": 0.9346858263015747, "learning_rate": 2.777511004401761e-05, "loss": 0.5267, "step": 11120 }, { "epoch": 14.23488, "grad_norm": 0.9307919144630432, "learning_rate": 2.7773109243697482e-05, "loss": 0.4998, "step": 11121 }, { "epoch": 14.23616, "grad_norm": 0.9973614811897278, "learning_rate": 2.7771108443377354e-05, "loss": 0.582, "step": 11122 }, { "epoch": 14.23744, "grad_norm": 0.994784414768219, "learning_rate": 2.7769107643057223e-05, "loss": 0.5207, "step": 11123 }, { "epoch": 14.23872, "grad_norm": 0.9563934206962585, "learning_rate": 2.7767106842737095e-05, "loss": 0.4937, "step": 11124 }, { "epoch": 14.24, "grad_norm": 0.9174085855484009, "learning_rate": 2.7765106042416966e-05, "loss": 0.5371, "step": 11125 }, { "epoch": 14.24128, "grad_norm": 0.9807153940200806, "learning_rate": 2.7763105242096838e-05, "loss": 0.5334, "step": 11126 }, { "epoch": 14.24256, "grad_norm": 0.9536086916923523, "learning_rate": 2.7761104441776714e-05, "loss": 0.5261, "step": 11127 }, { "epoch": 14.24384, "grad_norm": 0.9382270574569702, "learning_rate": 2.7759103641456585e-05, "loss": 0.5195, "step": 11128 }, { "epoch": 14.24512, "grad_norm": 0.98002028465271, "learning_rate": 2.7757102841136457e-05, "loss": 0.5247, "step": 11129 }, { "epoch": 14.2464, "grad_norm": 0.940831184387207, "learning_rate": 2.775510204081633e-05, "loss": 0.4827, "step": 11130 }, { "epoch": 14.24768, "grad_norm": 0.8894546031951904, "learning_rate": 2.7753101240496198e-05, "loss": 0.5129, "step": 11131 }, { "epoch": 14.24896, "grad_norm": 0.8587113618850708, "learning_rate": 2.775110044017607e-05, "loss": 0.4721, "step": 11132 }, { "epoch": 14.25024, "grad_norm": 0.9889330863952637, "learning_rate": 2.774909963985594e-05, "loss": 0.573, "step": 11133 }, { "epoch": 14.25152, "grad_norm": 0.9522219300270081, "learning_rate": 2.7747098839535817e-05, "loss": 0.4859, "step": 11134 }, { "epoch": 14.2528, "grad_norm": 1.0236855745315552, "learning_rate": 2.774509803921569e-05, "loss": 0.5632, "step": 11135 }, { "epoch": 14.25408, "grad_norm": 0.9715608358383179, "learning_rate": 2.774309723889556e-05, "loss": 0.5655, "step": 11136 }, { "epoch": 14.25536, "grad_norm": 0.9080358147621155, "learning_rate": 2.7741096438575432e-05, "loss": 0.5086, "step": 11137 }, { "epoch": 14.25664, "grad_norm": 0.9496445059776306, "learning_rate": 2.7739095638255304e-05, "loss": 0.5476, "step": 11138 }, { "epoch": 14.25792, "grad_norm": 0.9452351331710815, "learning_rate": 2.7737094837935172e-05, "loss": 0.4651, "step": 11139 }, { "epoch": 14.2592, "grad_norm": 0.9179670810699463, "learning_rate": 2.7735094037615044e-05, "loss": 0.523, "step": 11140 }, { "epoch": 14.26048, "grad_norm": 1.0410470962524414, "learning_rate": 2.7733093237294923e-05, "loss": 0.5671, "step": 11141 }, { "epoch": 14.26176, "grad_norm": 1.0138649940490723, "learning_rate": 2.773109243697479e-05, "loss": 0.5479, "step": 11142 }, { "epoch": 14.26304, "grad_norm": 1.071816325187683, "learning_rate": 2.7729091636654663e-05, "loss": 0.5894, "step": 11143 }, { "epoch": 14.26432, "grad_norm": 0.9818564653396606, "learning_rate": 2.7727090836334535e-05, "loss": 0.5264, "step": 11144 }, { "epoch": 14.2656, "grad_norm": 1.001348853111267, "learning_rate": 2.7725090036014407e-05, "loss": 0.4765, "step": 11145 }, { "epoch": 14.26688, "grad_norm": 1.0460922718048096, "learning_rate": 2.772308923569428e-05, "loss": 0.5337, "step": 11146 }, { "epoch": 14.26816, "grad_norm": 0.9920286536216736, "learning_rate": 2.7721088435374147e-05, "loss": 0.5336, "step": 11147 }, { "epoch": 14.26944, "grad_norm": 0.9713577628135681, "learning_rate": 2.7719087635054026e-05, "loss": 0.5586, "step": 11148 }, { "epoch": 14.27072, "grad_norm": 0.9439414143562317, "learning_rate": 2.7717086834733898e-05, "loss": 0.5438, "step": 11149 }, { "epoch": 14.272, "grad_norm": 0.956102192401886, "learning_rate": 2.7715086034413766e-05, "loss": 0.5383, "step": 11150 }, { "epoch": 14.27328, "grad_norm": 0.9565941095352173, "learning_rate": 2.7713085234093638e-05, "loss": 0.5267, "step": 11151 }, { "epoch": 14.27456, "grad_norm": 0.9341673851013184, "learning_rate": 2.771108443377351e-05, "loss": 0.5157, "step": 11152 }, { "epoch": 14.27584, "grad_norm": 0.948688268661499, "learning_rate": 2.7709083633453382e-05, "loss": 0.5132, "step": 11153 }, { "epoch": 14.27712, "grad_norm": 1.0164926052093506, "learning_rate": 2.7707082833133254e-05, "loss": 0.5641, "step": 11154 }, { "epoch": 14.2784, "grad_norm": 0.9440988302230835, "learning_rate": 2.770508203281313e-05, "loss": 0.5071, "step": 11155 }, { "epoch": 14.27968, "grad_norm": 0.9467429518699646, "learning_rate": 2.7703081232493e-05, "loss": 0.4859, "step": 11156 }, { "epoch": 14.28096, "grad_norm": 0.923644483089447, "learning_rate": 2.7701080432172873e-05, "loss": 0.4908, "step": 11157 }, { "epoch": 14.28224, "grad_norm": 0.924755334854126, "learning_rate": 2.769907963185274e-05, "loss": 0.5031, "step": 11158 }, { "epoch": 14.28352, "grad_norm": 0.971537709236145, "learning_rate": 2.7697078831532613e-05, "loss": 0.55, "step": 11159 }, { "epoch": 14.2848, "grad_norm": 0.9279425144195557, "learning_rate": 2.7695078031212485e-05, "loss": 0.4737, "step": 11160 }, { "epoch": 14.28608, "grad_norm": 1.0148504972457886, "learning_rate": 2.7693077230892357e-05, "loss": 0.562, "step": 11161 }, { "epoch": 14.28736, "grad_norm": 1.0114786624908447, "learning_rate": 2.7691076430572232e-05, "loss": 0.5845, "step": 11162 }, { "epoch": 14.288640000000001, "grad_norm": 1.0205031633377075, "learning_rate": 2.7689075630252104e-05, "loss": 0.5958, "step": 11163 }, { "epoch": 14.28992, "grad_norm": 1.0409082174301147, "learning_rate": 2.7687074829931976e-05, "loss": 0.5586, "step": 11164 }, { "epoch": 14.2912, "grad_norm": 0.9876707196235657, "learning_rate": 2.7685074029611847e-05, "loss": 0.5519, "step": 11165 }, { "epoch": 14.29248, "grad_norm": 1.0097538232803345, "learning_rate": 2.7683073229291716e-05, "loss": 0.5301, "step": 11166 }, { "epoch": 14.29376, "grad_norm": 0.9804957509040833, "learning_rate": 2.7681072428971588e-05, "loss": 0.5528, "step": 11167 }, { "epoch": 14.29504, "grad_norm": 0.9977876543998718, "learning_rate": 2.767907162865146e-05, "loss": 0.5245, "step": 11168 }, { "epoch": 14.29632, "grad_norm": 0.9734411239624023, "learning_rate": 2.7677070828331335e-05, "loss": 0.5306, "step": 11169 }, { "epoch": 14.2976, "grad_norm": 0.9642010927200317, "learning_rate": 2.7675070028011207e-05, "loss": 0.5472, "step": 11170 }, { "epoch": 14.29888, "grad_norm": 1.0015202760696411, "learning_rate": 2.767306922769108e-05, "loss": 0.5343, "step": 11171 }, { "epoch": 14.30016, "grad_norm": 0.9820809364318848, "learning_rate": 2.767106842737095e-05, "loss": 0.5606, "step": 11172 }, { "epoch": 14.30144, "grad_norm": 0.977370023727417, "learning_rate": 2.7669067627050822e-05, "loss": 0.5411, "step": 11173 }, { "epoch": 14.30272, "grad_norm": 0.9267496466636658, "learning_rate": 2.766706682673069e-05, "loss": 0.4905, "step": 11174 }, { "epoch": 14.304, "grad_norm": 0.9372310042381287, "learning_rate": 2.7665066026410563e-05, "loss": 0.521, "step": 11175 }, { "epoch": 14.30528, "grad_norm": 0.9425508975982666, "learning_rate": 2.766306522609044e-05, "loss": 0.5014, "step": 11176 }, { "epoch": 14.30656, "grad_norm": 0.9979614019393921, "learning_rate": 2.766106442577031e-05, "loss": 0.5129, "step": 11177 }, { "epoch": 14.30784, "grad_norm": 1.0363802909851074, "learning_rate": 2.765906362545018e-05, "loss": 0.5654, "step": 11178 }, { "epoch": 14.30912, "grad_norm": 1.0523051023483276, "learning_rate": 2.7657062825130053e-05, "loss": 0.5443, "step": 11179 }, { "epoch": 14.3104, "grad_norm": 0.9782631993293762, "learning_rate": 2.7655062024809925e-05, "loss": 0.5205, "step": 11180 }, { "epoch": 14.31168, "grad_norm": 0.9576646685600281, "learning_rate": 2.7653061224489797e-05, "loss": 0.5151, "step": 11181 }, { "epoch": 14.31296, "grad_norm": 0.9194750189781189, "learning_rate": 2.7651060424169666e-05, "loss": 0.5067, "step": 11182 }, { "epoch": 14.31424, "grad_norm": 0.9091538786888123, "learning_rate": 2.7649059623849544e-05, "loss": 0.4496, "step": 11183 }, { "epoch": 14.31552, "grad_norm": 1.0529427528381348, "learning_rate": 2.7647058823529416e-05, "loss": 0.6212, "step": 11184 }, { "epoch": 14.3168, "grad_norm": 1.0431804656982422, "learning_rate": 2.7645058023209285e-05, "loss": 0.6295, "step": 11185 }, { "epoch": 14.31808, "grad_norm": 0.9597348570823669, "learning_rate": 2.7643057222889156e-05, "loss": 0.52, "step": 11186 }, { "epoch": 14.31936, "grad_norm": 1.0246033668518066, "learning_rate": 2.7641056422569028e-05, "loss": 0.5113, "step": 11187 }, { "epoch": 14.32064, "grad_norm": 0.9777224659919739, "learning_rate": 2.76390556222489e-05, "loss": 0.4957, "step": 11188 }, { "epoch": 14.32192, "grad_norm": 0.9850444793701172, "learning_rate": 2.7637054821928772e-05, "loss": 0.5251, "step": 11189 }, { "epoch": 14.3232, "grad_norm": 0.9483627080917358, "learning_rate": 2.7635054021608647e-05, "loss": 0.5139, "step": 11190 }, { "epoch": 14.32448, "grad_norm": 0.9668890833854675, "learning_rate": 2.763305322128852e-05, "loss": 0.5483, "step": 11191 }, { "epoch": 14.32576, "grad_norm": 0.9546102285385132, "learning_rate": 2.763105242096839e-05, "loss": 0.5344, "step": 11192 }, { "epoch": 14.32704, "grad_norm": 0.9604453444480896, "learning_rate": 2.762905162064826e-05, "loss": 0.5439, "step": 11193 }, { "epoch": 14.32832, "grad_norm": 0.984946072101593, "learning_rate": 2.762705082032813e-05, "loss": 0.5376, "step": 11194 }, { "epoch": 14.3296, "grad_norm": 1.022682547569275, "learning_rate": 2.7625050020008003e-05, "loss": 0.5918, "step": 11195 }, { "epoch": 14.33088, "grad_norm": 0.9947453141212463, "learning_rate": 2.7623049219687875e-05, "loss": 0.556, "step": 11196 }, { "epoch": 14.33216, "grad_norm": 0.9552220106124878, "learning_rate": 2.762104841936775e-05, "loss": 0.5282, "step": 11197 }, { "epoch": 14.33344, "grad_norm": 0.9819296598434448, "learning_rate": 2.7619047619047622e-05, "loss": 0.474, "step": 11198 }, { "epoch": 14.33472, "grad_norm": 1.0161857604980469, "learning_rate": 2.7617046818727494e-05, "loss": 0.5428, "step": 11199 }, { "epoch": 14.336, "grad_norm": 0.950628399848938, "learning_rate": 2.7615046018407366e-05, "loss": 0.5011, "step": 11200 }, { "epoch": 14.33728, "grad_norm": 0.9224458336830139, "learning_rate": 2.7613045218087234e-05, "loss": 0.5053, "step": 11201 }, { "epoch": 14.33856, "grad_norm": 0.9512698650360107, "learning_rate": 2.7611044417767106e-05, "loss": 0.5124, "step": 11202 }, { "epoch": 14.33984, "grad_norm": 0.905447244644165, "learning_rate": 2.7609043617446978e-05, "loss": 0.4627, "step": 11203 }, { "epoch": 14.34112, "grad_norm": 0.9614179730415344, "learning_rate": 2.7607042817126853e-05, "loss": 0.5108, "step": 11204 }, { "epoch": 14.3424, "grad_norm": 1.0302612781524658, "learning_rate": 2.7605042016806725e-05, "loss": 0.588, "step": 11205 }, { "epoch": 14.343679999999999, "grad_norm": 0.9715131521224976, "learning_rate": 2.7603041216486597e-05, "loss": 0.5215, "step": 11206 }, { "epoch": 14.34496, "grad_norm": 0.9700056314468384, "learning_rate": 2.760104041616647e-05, "loss": 0.511, "step": 11207 }, { "epoch": 14.34624, "grad_norm": 0.9388474225997925, "learning_rate": 2.759903961584634e-05, "loss": 0.5359, "step": 11208 }, { "epoch": 14.34752, "grad_norm": 0.9849310517311096, "learning_rate": 2.759703881552621e-05, "loss": 0.5323, "step": 11209 }, { "epoch": 14.3488, "grad_norm": 0.990974485874176, "learning_rate": 2.759503801520608e-05, "loss": 0.5483, "step": 11210 }, { "epoch": 14.35008, "grad_norm": 1.0002540349960327, "learning_rate": 2.759303721488596e-05, "loss": 0.5135, "step": 11211 }, { "epoch": 14.35136, "grad_norm": 0.9734601378440857, "learning_rate": 2.7591036414565828e-05, "loss": 0.5341, "step": 11212 }, { "epoch": 14.35264, "grad_norm": 0.9525935649871826, "learning_rate": 2.75890356142457e-05, "loss": 0.5107, "step": 11213 }, { "epoch": 14.35392, "grad_norm": 0.9455302357673645, "learning_rate": 2.7587034813925572e-05, "loss": 0.528, "step": 11214 }, { "epoch": 14.3552, "grad_norm": 0.9510558843612671, "learning_rate": 2.7585034013605444e-05, "loss": 0.5205, "step": 11215 }, { "epoch": 14.35648, "grad_norm": 0.9504905939102173, "learning_rate": 2.7583033213285316e-05, "loss": 0.5393, "step": 11216 }, { "epoch": 14.35776, "grad_norm": 0.952308714389801, "learning_rate": 2.7581032412965184e-05, "loss": 0.5079, "step": 11217 }, { "epoch": 14.35904, "grad_norm": 1.0244531631469727, "learning_rate": 2.7579031612645063e-05, "loss": 0.5758, "step": 11218 }, { "epoch": 14.36032, "grad_norm": 0.9500291347503662, "learning_rate": 2.7577030812324934e-05, "loss": 0.5063, "step": 11219 }, { "epoch": 14.3616, "grad_norm": 0.9819626808166504, "learning_rate": 2.7575030012004803e-05, "loss": 0.5453, "step": 11220 }, { "epoch": 14.36288, "grad_norm": 0.9494471549987793, "learning_rate": 2.7573029211684675e-05, "loss": 0.525, "step": 11221 }, { "epoch": 14.36416, "grad_norm": 0.9260940551757812, "learning_rate": 2.7571028411364547e-05, "loss": 0.5371, "step": 11222 }, { "epoch": 14.36544, "grad_norm": 0.9827948808670044, "learning_rate": 2.756902761104442e-05, "loss": 0.5559, "step": 11223 }, { "epoch": 14.36672, "grad_norm": 0.9918002486228943, "learning_rate": 2.756702681072429e-05, "loss": 0.5184, "step": 11224 }, { "epoch": 14.368, "grad_norm": 0.9910105466842651, "learning_rate": 2.756502601040416e-05, "loss": 0.571, "step": 11225 }, { "epoch": 14.36928, "grad_norm": 1.0316925048828125, "learning_rate": 2.7563025210084037e-05, "loss": 0.5355, "step": 11226 }, { "epoch": 14.37056, "grad_norm": 0.9141017198562622, "learning_rate": 2.756102440976391e-05, "loss": 0.4943, "step": 11227 }, { "epoch": 14.37184, "grad_norm": 0.9571624994277954, "learning_rate": 2.7559023609443778e-05, "loss": 0.5288, "step": 11228 }, { "epoch": 14.37312, "grad_norm": 0.95545893907547, "learning_rate": 2.755702280912365e-05, "loss": 0.5231, "step": 11229 }, { "epoch": 14.3744, "grad_norm": 0.8849372863769531, "learning_rate": 2.755502200880352e-05, "loss": 0.4974, "step": 11230 }, { "epoch": 14.37568, "grad_norm": 0.9513952136039734, "learning_rate": 2.7553021208483393e-05, "loss": 0.5517, "step": 11231 }, { "epoch": 14.37696, "grad_norm": 0.9136958122253418, "learning_rate": 2.7551020408163265e-05, "loss": 0.5258, "step": 11232 }, { "epoch": 14.37824, "grad_norm": 0.9673413038253784, "learning_rate": 2.754901960784314e-05, "loss": 0.5092, "step": 11233 }, { "epoch": 14.37952, "grad_norm": 0.9891186952590942, "learning_rate": 2.7547018807523012e-05, "loss": 0.5038, "step": 11234 }, { "epoch": 14.3808, "grad_norm": 0.9878220558166504, "learning_rate": 2.7545018007202884e-05, "loss": 0.5578, "step": 11235 }, { "epoch": 14.38208, "grad_norm": 0.9703464508056641, "learning_rate": 2.7543017206882753e-05, "loss": 0.5463, "step": 11236 }, { "epoch": 14.38336, "grad_norm": 1.003145456314087, "learning_rate": 2.7541016406562625e-05, "loss": 0.5513, "step": 11237 }, { "epoch": 14.38464, "grad_norm": 0.9494891166687012, "learning_rate": 2.7539015606242496e-05, "loss": 0.4822, "step": 11238 }, { "epoch": 14.38592, "grad_norm": 0.9667163491249084, "learning_rate": 2.7537014805922368e-05, "loss": 0.5371, "step": 11239 }, { "epoch": 14.3872, "grad_norm": 0.9678400754928589, "learning_rate": 2.7535014005602243e-05, "loss": 0.5323, "step": 11240 }, { "epoch": 14.38848, "grad_norm": 1.001518964767456, "learning_rate": 2.7533013205282115e-05, "loss": 0.5314, "step": 11241 }, { "epoch": 14.38976, "grad_norm": 1.0005130767822266, "learning_rate": 2.7531012404961987e-05, "loss": 0.5229, "step": 11242 }, { "epoch": 14.39104, "grad_norm": 0.9950253367424011, "learning_rate": 2.752901160464186e-05, "loss": 0.5628, "step": 11243 }, { "epoch": 14.39232, "grad_norm": 0.8834913969039917, "learning_rate": 2.7527010804321728e-05, "loss": 0.4599, "step": 11244 }, { "epoch": 14.3936, "grad_norm": 0.9402278661727905, "learning_rate": 2.75250100040016e-05, "loss": 0.5007, "step": 11245 }, { "epoch": 14.39488, "grad_norm": 1.0537763833999634, "learning_rate": 2.752300920368147e-05, "loss": 0.569, "step": 11246 }, { "epoch": 14.39616, "grad_norm": 1.0012062788009644, "learning_rate": 2.7521008403361346e-05, "loss": 0.5324, "step": 11247 }, { "epoch": 14.39744, "grad_norm": 0.9675048589706421, "learning_rate": 2.751900760304122e-05, "loss": 0.5517, "step": 11248 }, { "epoch": 14.39872, "grad_norm": 0.9507377743721008, "learning_rate": 2.751700680272109e-05, "loss": 0.5041, "step": 11249 }, { "epoch": 14.4, "grad_norm": 0.9572357535362244, "learning_rate": 2.7515006002400962e-05, "loss": 0.5373, "step": 11250 }, { "epoch": 14.40128, "grad_norm": 0.9814470410346985, "learning_rate": 2.7513005202080834e-05, "loss": 0.5297, "step": 11251 }, { "epoch": 14.40256, "grad_norm": 0.9352920651435852, "learning_rate": 2.7511004401760702e-05, "loss": 0.4801, "step": 11252 }, { "epoch": 14.40384, "grad_norm": 0.9089069962501526, "learning_rate": 2.7509003601440574e-05, "loss": 0.4697, "step": 11253 }, { "epoch": 14.40512, "grad_norm": 0.9568341374397278, "learning_rate": 2.7507002801120453e-05, "loss": 0.53, "step": 11254 }, { "epoch": 14.4064, "grad_norm": 0.9579848647117615, "learning_rate": 2.750500200080032e-05, "loss": 0.4911, "step": 11255 }, { "epoch": 14.40768, "grad_norm": 0.9732376337051392, "learning_rate": 2.7503001200480193e-05, "loss": 0.5377, "step": 11256 }, { "epoch": 14.40896, "grad_norm": 0.9632934331893921, "learning_rate": 2.7501000400160065e-05, "loss": 0.5283, "step": 11257 }, { "epoch": 14.41024, "grad_norm": 0.9508453011512756, "learning_rate": 2.7498999599839937e-05, "loss": 0.5232, "step": 11258 }, { "epoch": 14.41152, "grad_norm": 0.9188300371170044, "learning_rate": 2.749699879951981e-05, "loss": 0.522, "step": 11259 }, { "epoch": 14.4128, "grad_norm": 0.9479809999465942, "learning_rate": 2.7494997999199677e-05, "loss": 0.5126, "step": 11260 }, { "epoch": 14.41408, "grad_norm": 0.9927715063095093, "learning_rate": 2.7492997198879556e-05, "loss": 0.5527, "step": 11261 }, { "epoch": 14.41536, "grad_norm": 1.0165045261383057, "learning_rate": 2.7490996398559428e-05, "loss": 0.5617, "step": 11262 }, { "epoch": 14.41664, "grad_norm": 0.9869691133499146, "learning_rate": 2.7488995598239296e-05, "loss": 0.5413, "step": 11263 }, { "epoch": 14.41792, "grad_norm": 0.9490245580673218, "learning_rate": 2.7486994797919168e-05, "loss": 0.4778, "step": 11264 }, { "epoch": 14.4192, "grad_norm": 0.967729389667511, "learning_rate": 2.748499399759904e-05, "loss": 0.5177, "step": 11265 }, { "epoch": 14.42048, "grad_norm": 0.9395437240600586, "learning_rate": 2.7482993197278912e-05, "loss": 0.5426, "step": 11266 }, { "epoch": 14.42176, "grad_norm": 0.9222921133041382, "learning_rate": 2.7480992396958784e-05, "loss": 0.4713, "step": 11267 }, { "epoch": 14.42304, "grad_norm": 0.9864060878753662, "learning_rate": 2.747899159663866e-05, "loss": 0.5087, "step": 11268 }, { "epoch": 14.42432, "grad_norm": 0.9627559781074524, "learning_rate": 2.747699079631853e-05, "loss": 0.4965, "step": 11269 }, { "epoch": 14.4256, "grad_norm": 0.9115942120552063, "learning_rate": 2.7474989995998403e-05, "loss": 0.4682, "step": 11270 }, { "epoch": 14.42688, "grad_norm": 0.96644127368927, "learning_rate": 2.747298919567827e-05, "loss": 0.532, "step": 11271 }, { "epoch": 14.42816, "grad_norm": 1.0033857822418213, "learning_rate": 2.7470988395358143e-05, "loss": 0.5577, "step": 11272 }, { "epoch": 14.42944, "grad_norm": 1.0521118640899658, "learning_rate": 2.7468987595038015e-05, "loss": 0.5645, "step": 11273 }, { "epoch": 14.43072, "grad_norm": 0.985821008682251, "learning_rate": 2.7466986794717887e-05, "loss": 0.5584, "step": 11274 }, { "epoch": 14.432, "grad_norm": 1.0375871658325195, "learning_rate": 2.7464985994397762e-05, "loss": 0.5631, "step": 11275 }, { "epoch": 14.43328, "grad_norm": 0.935610294342041, "learning_rate": 2.7462985194077634e-05, "loss": 0.4976, "step": 11276 }, { "epoch": 14.43456, "grad_norm": 1.0138261318206787, "learning_rate": 2.7460984393757506e-05, "loss": 0.5645, "step": 11277 }, { "epoch": 14.43584, "grad_norm": 1.0506336688995361, "learning_rate": 2.7458983593437377e-05, "loss": 0.5702, "step": 11278 }, { "epoch": 14.43712, "grad_norm": 0.951737105846405, "learning_rate": 2.7456982793117246e-05, "loss": 0.4803, "step": 11279 }, { "epoch": 14.4384, "grad_norm": 0.9894746541976929, "learning_rate": 2.7454981992797118e-05, "loss": 0.5372, "step": 11280 }, { "epoch": 14.43968, "grad_norm": 0.9843027591705322, "learning_rate": 2.745298119247699e-05, "loss": 0.5739, "step": 11281 }, { "epoch": 14.44096, "grad_norm": 0.980569064617157, "learning_rate": 2.7450980392156865e-05, "loss": 0.4889, "step": 11282 }, { "epoch": 14.44224, "grad_norm": 0.9253454208374023, "learning_rate": 2.7448979591836737e-05, "loss": 0.5233, "step": 11283 }, { "epoch": 14.44352, "grad_norm": 0.9626498818397522, "learning_rate": 2.744697879151661e-05, "loss": 0.5467, "step": 11284 }, { "epoch": 14.4448, "grad_norm": 0.9599905014038086, "learning_rate": 2.744497799119648e-05, "loss": 0.5341, "step": 11285 }, { "epoch": 14.44608, "grad_norm": 0.9758439064025879, "learning_rate": 2.7442977190876352e-05, "loss": 0.5201, "step": 11286 }, { "epoch": 14.44736, "grad_norm": 1.003113865852356, "learning_rate": 2.744097639055622e-05, "loss": 0.523, "step": 11287 }, { "epoch": 14.44864, "grad_norm": 0.9655167460441589, "learning_rate": 2.7438975590236093e-05, "loss": 0.5333, "step": 11288 }, { "epoch": 14.44992, "grad_norm": 0.9504974484443665, "learning_rate": 2.743697478991597e-05, "loss": 0.5356, "step": 11289 }, { "epoch": 14.4512, "grad_norm": 0.978461503982544, "learning_rate": 2.743497398959584e-05, "loss": 0.5443, "step": 11290 }, { "epoch": 14.45248, "grad_norm": 0.9474901556968689, "learning_rate": 2.743297318927571e-05, "loss": 0.489, "step": 11291 }, { "epoch": 14.45376, "grad_norm": 0.9023638367652893, "learning_rate": 2.7430972388955583e-05, "loss": 0.4626, "step": 11292 }, { "epoch": 14.45504, "grad_norm": 0.939704418182373, "learning_rate": 2.7428971588635455e-05, "loss": 0.4852, "step": 11293 }, { "epoch": 14.45632, "grad_norm": 0.9497430920600891, "learning_rate": 2.7426970788315327e-05, "loss": 0.496, "step": 11294 }, { "epoch": 14.4576, "grad_norm": 0.9100688695907593, "learning_rate": 2.7424969987995196e-05, "loss": 0.5156, "step": 11295 }, { "epoch": 14.45888, "grad_norm": 0.9579876661300659, "learning_rate": 2.7422969187675074e-05, "loss": 0.493, "step": 11296 }, { "epoch": 14.46016, "grad_norm": 0.9984821677207947, "learning_rate": 2.7420968387354946e-05, "loss": 0.5359, "step": 11297 }, { "epoch": 14.46144, "grad_norm": 0.9672602415084839, "learning_rate": 2.7418967587034815e-05, "loss": 0.5613, "step": 11298 }, { "epoch": 14.462720000000001, "grad_norm": 1.0040628910064697, "learning_rate": 2.7416966786714686e-05, "loss": 0.5751, "step": 11299 }, { "epoch": 14.464, "grad_norm": 1.0041166543960571, "learning_rate": 2.7414965986394558e-05, "loss": 0.5865, "step": 11300 }, { "epoch": 14.46528, "grad_norm": 0.950853168964386, "learning_rate": 2.741296518607443e-05, "loss": 0.5002, "step": 11301 }, { "epoch": 14.46656, "grad_norm": 0.9579245448112488, "learning_rate": 2.7410964385754302e-05, "loss": 0.5227, "step": 11302 }, { "epoch": 14.46784, "grad_norm": 0.9853748679161072, "learning_rate": 2.7408963585434177e-05, "loss": 0.5181, "step": 11303 }, { "epoch": 14.46912, "grad_norm": 0.9100056290626526, "learning_rate": 2.740696278511405e-05, "loss": 0.4892, "step": 11304 }, { "epoch": 14.4704, "grad_norm": 0.94825679063797, "learning_rate": 2.740496198479392e-05, "loss": 0.4969, "step": 11305 }, { "epoch": 14.47168, "grad_norm": 0.9380550384521484, "learning_rate": 2.740296118447379e-05, "loss": 0.5133, "step": 11306 }, { "epoch": 14.47296, "grad_norm": 0.9455284476280212, "learning_rate": 2.740096038415366e-05, "loss": 0.5307, "step": 11307 }, { "epoch": 14.47424, "grad_norm": 0.95405513048172, "learning_rate": 2.7398959583833533e-05, "loss": 0.4977, "step": 11308 }, { "epoch": 14.47552, "grad_norm": 0.9854465126991272, "learning_rate": 2.7396958783513405e-05, "loss": 0.5289, "step": 11309 }, { "epoch": 14.4768, "grad_norm": 1.018682599067688, "learning_rate": 2.739495798319328e-05, "loss": 0.5819, "step": 11310 }, { "epoch": 14.47808, "grad_norm": 0.9085429310798645, "learning_rate": 2.7392957182873152e-05, "loss": 0.4768, "step": 11311 }, { "epoch": 14.47936, "grad_norm": 1.025566577911377, "learning_rate": 2.7390956382553024e-05, "loss": 0.5631, "step": 11312 }, { "epoch": 14.48064, "grad_norm": 0.9369559288024902, "learning_rate": 2.7388955582232896e-05, "loss": 0.5121, "step": 11313 }, { "epoch": 14.48192, "grad_norm": 0.9811607003211975, "learning_rate": 2.7386954781912764e-05, "loss": 0.5434, "step": 11314 }, { "epoch": 14.4832, "grad_norm": 0.9261855483055115, "learning_rate": 2.7384953981592636e-05, "loss": 0.547, "step": 11315 }, { "epoch": 14.48448, "grad_norm": 0.9290302395820618, "learning_rate": 2.7382953181272508e-05, "loss": 0.5299, "step": 11316 }, { "epoch": 14.48576, "grad_norm": 0.9527462124824524, "learning_rate": 2.7380952380952383e-05, "loss": 0.5147, "step": 11317 }, { "epoch": 14.48704, "grad_norm": 0.9608799815177917, "learning_rate": 2.7378951580632255e-05, "loss": 0.5475, "step": 11318 }, { "epoch": 14.48832, "grad_norm": 0.9241880774497986, "learning_rate": 2.7376950780312127e-05, "loss": 0.5598, "step": 11319 }, { "epoch": 14.4896, "grad_norm": 0.948318362236023, "learning_rate": 2.7374949979992e-05, "loss": 0.5819, "step": 11320 }, { "epoch": 14.49088, "grad_norm": 0.9118000268936157, "learning_rate": 2.737294917967187e-05, "loss": 0.488, "step": 11321 }, { "epoch": 14.49216, "grad_norm": 0.9568605422973633, "learning_rate": 2.737094837935174e-05, "loss": 0.5289, "step": 11322 }, { "epoch": 14.49344, "grad_norm": 0.9479710459709167, "learning_rate": 2.736894757903161e-05, "loss": 0.5361, "step": 11323 }, { "epoch": 14.49472, "grad_norm": 0.9818686246871948, "learning_rate": 2.736694677871149e-05, "loss": 0.5237, "step": 11324 }, { "epoch": 14.496, "grad_norm": 0.9445642828941345, "learning_rate": 2.7364945978391358e-05, "loss": 0.4977, "step": 11325 }, { "epoch": 14.49728, "grad_norm": 0.9737011790275574, "learning_rate": 2.736294517807123e-05, "loss": 0.5182, "step": 11326 }, { "epoch": 14.49856, "grad_norm": 0.9840577244758606, "learning_rate": 2.7360944377751102e-05, "loss": 0.5137, "step": 11327 }, { "epoch": 14.49984, "grad_norm": 0.9450149536132812, "learning_rate": 2.7358943577430974e-05, "loss": 0.4973, "step": 11328 }, { "epoch": 14.50112, "grad_norm": 0.9480723738670349, "learning_rate": 2.7356942777110846e-05, "loss": 0.5709, "step": 11329 }, { "epoch": 14.5024, "grad_norm": 0.9991519451141357, "learning_rate": 2.7354941976790714e-05, "loss": 0.5573, "step": 11330 }, { "epoch": 14.50368, "grad_norm": 0.9923670291900635, "learning_rate": 2.7352941176470593e-05, "loss": 0.6224, "step": 11331 }, { "epoch": 14.50496, "grad_norm": 0.9765622615814209, "learning_rate": 2.7350940376150464e-05, "loss": 0.5312, "step": 11332 }, { "epoch": 14.50624, "grad_norm": 0.978398859500885, "learning_rate": 2.7348939575830333e-05, "loss": 0.5265, "step": 11333 }, { "epoch": 14.50752, "grad_norm": 1.0118024349212646, "learning_rate": 2.7346938775510205e-05, "loss": 0.5901, "step": 11334 }, { "epoch": 14.5088, "grad_norm": 0.9619473814964294, "learning_rate": 2.7344937975190077e-05, "loss": 0.5405, "step": 11335 }, { "epoch": 14.51008, "grad_norm": 0.970476508140564, "learning_rate": 2.734293717486995e-05, "loss": 0.5543, "step": 11336 }, { "epoch": 14.51136, "grad_norm": 0.9238805174827576, "learning_rate": 2.734093637454982e-05, "loss": 0.4825, "step": 11337 }, { "epoch": 14.51264, "grad_norm": 1.0360538959503174, "learning_rate": 2.733893557422969e-05, "loss": 0.5385, "step": 11338 }, { "epoch": 14.51392, "grad_norm": 0.9870235323905945, "learning_rate": 2.7336934773909567e-05, "loss": 0.5012, "step": 11339 }, { "epoch": 14.5152, "grad_norm": 0.9728274345397949, "learning_rate": 2.733493397358944e-05, "loss": 0.5382, "step": 11340 }, { "epoch": 14.51648, "grad_norm": 0.989501416683197, "learning_rate": 2.7332933173269308e-05, "loss": 0.5216, "step": 11341 }, { "epoch": 14.517759999999999, "grad_norm": 0.987844705581665, "learning_rate": 2.733093237294918e-05, "loss": 0.5553, "step": 11342 }, { "epoch": 14.51904, "grad_norm": 0.9757768511772156, "learning_rate": 2.732893157262905e-05, "loss": 0.5604, "step": 11343 }, { "epoch": 14.52032, "grad_norm": 0.9981015920639038, "learning_rate": 2.7326930772308923e-05, "loss": 0.5521, "step": 11344 }, { "epoch": 14.5216, "grad_norm": 0.9795306921005249, "learning_rate": 2.7324929971988795e-05, "loss": 0.5164, "step": 11345 }, { "epoch": 14.52288, "grad_norm": 0.9881823062896729, "learning_rate": 2.732292917166867e-05, "loss": 0.519, "step": 11346 }, { "epoch": 14.52416, "grad_norm": 1.0085053443908691, "learning_rate": 2.7320928371348542e-05, "loss": 0.5578, "step": 11347 }, { "epoch": 14.52544, "grad_norm": 1.0021884441375732, "learning_rate": 2.7318927571028414e-05, "loss": 0.5362, "step": 11348 }, { "epoch": 14.52672, "grad_norm": 0.9783837795257568, "learning_rate": 2.7316926770708283e-05, "loss": 0.5256, "step": 11349 }, { "epoch": 14.528, "grad_norm": 1.005036473274231, "learning_rate": 2.7314925970388155e-05, "loss": 0.5495, "step": 11350 }, { "epoch": 14.52928, "grad_norm": 0.9625844359397888, "learning_rate": 2.7312925170068026e-05, "loss": 0.5355, "step": 11351 }, { "epoch": 14.53056, "grad_norm": 0.961077094078064, "learning_rate": 2.7310924369747898e-05, "loss": 0.518, "step": 11352 }, { "epoch": 14.53184, "grad_norm": 0.9395869970321655, "learning_rate": 2.7308923569427773e-05, "loss": 0.5194, "step": 11353 }, { "epoch": 14.53312, "grad_norm": 0.9758129715919495, "learning_rate": 2.7306922769107645e-05, "loss": 0.5533, "step": 11354 }, { "epoch": 14.5344, "grad_norm": 0.9722764492034912, "learning_rate": 2.7304921968787517e-05, "loss": 0.536, "step": 11355 }, { "epoch": 14.53568, "grad_norm": 0.9164559245109558, "learning_rate": 2.730292116846739e-05, "loss": 0.468, "step": 11356 }, { "epoch": 14.53696, "grad_norm": 1.0066019296646118, "learning_rate": 2.7300920368147258e-05, "loss": 0.5945, "step": 11357 }, { "epoch": 14.53824, "grad_norm": 0.9370825886726379, "learning_rate": 2.729891956782713e-05, "loss": 0.5005, "step": 11358 }, { "epoch": 14.53952, "grad_norm": 0.9669891595840454, "learning_rate": 2.7296918767507e-05, "loss": 0.5536, "step": 11359 }, { "epoch": 14.5408, "grad_norm": 0.9711414575576782, "learning_rate": 2.729491796718688e-05, "loss": 0.5114, "step": 11360 }, { "epoch": 14.54208, "grad_norm": 0.9631317257881165, "learning_rate": 2.729291716686675e-05, "loss": 0.5221, "step": 11361 }, { "epoch": 14.54336, "grad_norm": 0.9969499707221985, "learning_rate": 2.729091636654662e-05, "loss": 0.5097, "step": 11362 }, { "epoch": 14.54464, "grad_norm": 0.9977988004684448, "learning_rate": 2.7288915566226492e-05, "loss": 0.5569, "step": 11363 }, { "epoch": 14.54592, "grad_norm": 0.90044105052948, "learning_rate": 2.7286914765906364e-05, "loss": 0.4707, "step": 11364 }, { "epoch": 14.5472, "grad_norm": 0.9298081398010254, "learning_rate": 2.7284913965586232e-05, "loss": 0.5013, "step": 11365 }, { "epoch": 14.54848, "grad_norm": 0.987301766872406, "learning_rate": 2.7282913165266104e-05, "loss": 0.5666, "step": 11366 }, { "epoch": 14.54976, "grad_norm": 0.9727098345756531, "learning_rate": 2.7280912364945983e-05, "loss": 0.5379, "step": 11367 }, { "epoch": 14.55104, "grad_norm": 0.9952585101127625, "learning_rate": 2.7278911564625855e-05, "loss": 0.5741, "step": 11368 }, { "epoch": 14.55232, "grad_norm": 0.9384405612945557, "learning_rate": 2.7276910764305723e-05, "loss": 0.502, "step": 11369 }, { "epoch": 14.5536, "grad_norm": 0.9742892980575562, "learning_rate": 2.7274909963985595e-05, "loss": 0.5407, "step": 11370 }, { "epoch": 14.55488, "grad_norm": 0.9823377132415771, "learning_rate": 2.7272909163665467e-05, "loss": 0.5218, "step": 11371 }, { "epoch": 14.55616, "grad_norm": 0.9821147322654724, "learning_rate": 2.727090836334534e-05, "loss": 0.5134, "step": 11372 }, { "epoch": 14.55744, "grad_norm": 1.0253291130065918, "learning_rate": 2.7268907563025207e-05, "loss": 0.5562, "step": 11373 }, { "epoch": 14.55872, "grad_norm": 0.9838807582855225, "learning_rate": 2.7266906762705086e-05, "loss": 0.5282, "step": 11374 }, { "epoch": 14.56, "grad_norm": 0.9515048861503601, "learning_rate": 2.7264905962384958e-05, "loss": 0.5034, "step": 11375 }, { "epoch": 14.56128, "grad_norm": 0.9546962976455688, "learning_rate": 2.726290516206483e-05, "loss": 0.5325, "step": 11376 }, { "epoch": 14.56256, "grad_norm": 0.9903567433357239, "learning_rate": 2.7260904361744698e-05, "loss": 0.5126, "step": 11377 }, { "epoch": 14.56384, "grad_norm": 0.9805188775062561, "learning_rate": 2.725890356142457e-05, "loss": 0.513, "step": 11378 }, { "epoch": 14.56512, "grad_norm": 1.0438790321350098, "learning_rate": 2.7256902761104442e-05, "loss": 0.5515, "step": 11379 }, { "epoch": 14.5664, "grad_norm": 0.9887431263923645, "learning_rate": 2.7254901960784314e-05, "loss": 0.526, "step": 11380 }, { "epoch": 14.56768, "grad_norm": 0.9955745339393616, "learning_rate": 2.725290116046419e-05, "loss": 0.5261, "step": 11381 }, { "epoch": 14.56896, "grad_norm": 0.9546988606452942, "learning_rate": 2.725090036014406e-05, "loss": 0.5462, "step": 11382 }, { "epoch": 14.57024, "grad_norm": 0.966931939125061, "learning_rate": 2.7248899559823933e-05, "loss": 0.5326, "step": 11383 }, { "epoch": 14.57152, "grad_norm": 0.9506143927574158, "learning_rate": 2.7246898759503804e-05, "loss": 0.4998, "step": 11384 }, { "epoch": 14.5728, "grad_norm": 0.9455040097236633, "learning_rate": 2.7244897959183673e-05, "loss": 0.5466, "step": 11385 }, { "epoch": 14.57408, "grad_norm": 0.9447420239448547, "learning_rate": 2.7242897158863545e-05, "loss": 0.4939, "step": 11386 }, { "epoch": 14.57536, "grad_norm": 0.9344480037689209, "learning_rate": 2.7240896358543417e-05, "loss": 0.5223, "step": 11387 }, { "epoch": 14.57664, "grad_norm": 0.9362328052520752, "learning_rate": 2.7238895558223292e-05, "loss": 0.5015, "step": 11388 }, { "epoch": 14.57792, "grad_norm": 0.9401606917381287, "learning_rate": 2.7236894757903164e-05, "loss": 0.4988, "step": 11389 }, { "epoch": 14.5792, "grad_norm": 0.9665933847427368, "learning_rate": 2.7234893957583036e-05, "loss": 0.5126, "step": 11390 }, { "epoch": 14.58048, "grad_norm": 0.8998576998710632, "learning_rate": 2.7232893157262907e-05, "loss": 0.505, "step": 11391 }, { "epoch": 14.58176, "grad_norm": 0.9965265393257141, "learning_rate": 2.723089235694278e-05, "loss": 0.569, "step": 11392 }, { "epoch": 14.58304, "grad_norm": 0.9181508421897888, "learning_rate": 2.7228891556622648e-05, "loss": 0.4929, "step": 11393 }, { "epoch": 14.58432, "grad_norm": 0.9306789040565491, "learning_rate": 2.722689075630252e-05, "loss": 0.5179, "step": 11394 }, { "epoch": 14.5856, "grad_norm": 0.998447597026825, "learning_rate": 2.7224889955982398e-05, "loss": 0.5673, "step": 11395 }, { "epoch": 14.58688, "grad_norm": 0.9387089014053345, "learning_rate": 2.7222889155662267e-05, "loss": 0.5328, "step": 11396 }, { "epoch": 14.58816, "grad_norm": 0.9925042986869812, "learning_rate": 2.722088835534214e-05, "loss": 0.5403, "step": 11397 }, { "epoch": 14.58944, "grad_norm": 0.9419589638710022, "learning_rate": 2.721888755502201e-05, "loss": 0.5322, "step": 11398 }, { "epoch": 14.59072, "grad_norm": 0.9522401094436646, "learning_rate": 2.7216886754701882e-05, "loss": 0.5214, "step": 11399 }, { "epoch": 14.592, "grad_norm": 0.9598711729049683, "learning_rate": 2.7214885954381754e-05, "loss": 0.5013, "step": 11400 }, { "epoch": 14.59328, "grad_norm": 0.9690408706665039, "learning_rate": 2.7212885154061623e-05, "loss": 0.5262, "step": 11401 }, { "epoch": 14.59456, "grad_norm": 0.9637435078620911, "learning_rate": 2.72108843537415e-05, "loss": 0.5114, "step": 11402 }, { "epoch": 14.59584, "grad_norm": 0.9893304109573364, "learning_rate": 2.7208883553421373e-05, "loss": 0.5414, "step": 11403 }, { "epoch": 14.59712, "grad_norm": 1.0310964584350586, "learning_rate": 2.720688275310124e-05, "loss": 0.5245, "step": 11404 }, { "epoch": 14.5984, "grad_norm": 0.9509328007698059, "learning_rate": 2.7204881952781113e-05, "loss": 0.5463, "step": 11405 }, { "epoch": 14.59968, "grad_norm": 0.9694564938545227, "learning_rate": 2.7202881152460985e-05, "loss": 0.5419, "step": 11406 }, { "epoch": 14.60096, "grad_norm": 0.970043420791626, "learning_rate": 2.7200880352140857e-05, "loss": 0.5445, "step": 11407 }, { "epoch": 14.60224, "grad_norm": 0.9511335492134094, "learning_rate": 2.719887955182073e-05, "loss": 0.5206, "step": 11408 }, { "epoch": 14.60352, "grad_norm": 1.030502200126648, "learning_rate": 2.7196878751500604e-05, "loss": 0.6213, "step": 11409 }, { "epoch": 14.604800000000001, "grad_norm": 1.0002635717391968, "learning_rate": 2.7194877951180476e-05, "loss": 0.5467, "step": 11410 }, { "epoch": 14.60608, "grad_norm": 0.9449097514152527, "learning_rate": 2.7192877150860348e-05, "loss": 0.5446, "step": 11411 }, { "epoch": 14.60736, "grad_norm": 0.9701420664787292, "learning_rate": 2.7190876350540216e-05, "loss": 0.5615, "step": 11412 }, { "epoch": 14.60864, "grad_norm": 0.9248257279396057, "learning_rate": 2.7188875550220088e-05, "loss": 0.5041, "step": 11413 }, { "epoch": 14.60992, "grad_norm": 0.9535519480705261, "learning_rate": 2.718687474989996e-05, "loss": 0.5423, "step": 11414 }, { "epoch": 14.6112, "grad_norm": 0.9663887619972229, "learning_rate": 2.7184873949579832e-05, "loss": 0.5678, "step": 11415 }, { "epoch": 14.61248, "grad_norm": 1.0246509313583374, "learning_rate": 2.7182873149259707e-05, "loss": 0.6351, "step": 11416 }, { "epoch": 14.61376, "grad_norm": 0.9978649616241455, "learning_rate": 2.718087234893958e-05, "loss": 0.5342, "step": 11417 }, { "epoch": 14.61504, "grad_norm": 0.9514932036399841, "learning_rate": 2.717887154861945e-05, "loss": 0.4586, "step": 11418 }, { "epoch": 14.61632, "grad_norm": 1.0197707414627075, "learning_rate": 2.7176870748299323e-05, "loss": 0.5932, "step": 11419 }, { "epoch": 14.6176, "grad_norm": 1.017253041267395, "learning_rate": 2.717486994797919e-05, "loss": 0.5063, "step": 11420 }, { "epoch": 14.61888, "grad_norm": 0.9654613137245178, "learning_rate": 2.7172869147659063e-05, "loss": 0.5249, "step": 11421 }, { "epoch": 14.62016, "grad_norm": 0.9739159345626831, "learning_rate": 2.7170868347338935e-05, "loss": 0.5241, "step": 11422 }, { "epoch": 14.62144, "grad_norm": 0.9443714022636414, "learning_rate": 2.716886754701881e-05, "loss": 0.5465, "step": 11423 }, { "epoch": 14.62272, "grad_norm": 1.0117498636245728, "learning_rate": 2.7166866746698682e-05, "loss": 0.5488, "step": 11424 }, { "epoch": 14.624, "grad_norm": 0.9801613092422485, "learning_rate": 2.7164865946378554e-05, "loss": 0.5194, "step": 11425 }, { "epoch": 14.62528, "grad_norm": 0.9613388180732727, "learning_rate": 2.7162865146058426e-05, "loss": 0.5651, "step": 11426 }, { "epoch": 14.62656, "grad_norm": 0.9645081758499146, "learning_rate": 2.7160864345738298e-05, "loss": 0.5064, "step": 11427 }, { "epoch": 14.627839999999999, "grad_norm": 0.9990333318710327, "learning_rate": 2.7158863545418166e-05, "loss": 0.5582, "step": 11428 }, { "epoch": 14.62912, "grad_norm": 0.9323675036430359, "learning_rate": 2.7156862745098038e-05, "loss": 0.5146, "step": 11429 }, { "epoch": 14.6304, "grad_norm": 0.9680500030517578, "learning_rate": 2.7154861944777917e-05, "loss": 0.4641, "step": 11430 }, { "epoch": 14.63168, "grad_norm": 0.9934640526771545, "learning_rate": 2.7152861144457785e-05, "loss": 0.5237, "step": 11431 }, { "epoch": 14.63296, "grad_norm": 0.9925525188446045, "learning_rate": 2.7150860344137657e-05, "loss": 0.526, "step": 11432 }, { "epoch": 14.63424, "grad_norm": 0.9799636602401733, "learning_rate": 2.714885954381753e-05, "loss": 0.5483, "step": 11433 }, { "epoch": 14.63552, "grad_norm": 0.945448637008667, "learning_rate": 2.71468587434974e-05, "loss": 0.5187, "step": 11434 }, { "epoch": 14.636800000000001, "grad_norm": 0.9392439126968384, "learning_rate": 2.7144857943177272e-05, "loss": 0.5117, "step": 11435 }, { "epoch": 14.63808, "grad_norm": 0.9761615991592407, "learning_rate": 2.714285714285714e-05, "loss": 0.5538, "step": 11436 }, { "epoch": 14.63936, "grad_norm": 0.9610094428062439, "learning_rate": 2.714085634253702e-05, "loss": 0.5368, "step": 11437 }, { "epoch": 14.64064, "grad_norm": 0.9939576387405396, "learning_rate": 2.713885554221689e-05, "loss": 0.6027, "step": 11438 }, { "epoch": 14.64192, "grad_norm": 0.9574117064476013, "learning_rate": 2.713685474189676e-05, "loss": 0.5258, "step": 11439 }, { "epoch": 14.6432, "grad_norm": 0.9553491473197937, "learning_rate": 2.7134853941576632e-05, "loss": 0.5076, "step": 11440 }, { "epoch": 14.64448, "grad_norm": 0.9836270809173584, "learning_rate": 2.7132853141256504e-05, "loss": 0.5249, "step": 11441 }, { "epoch": 14.64576, "grad_norm": 0.9523620009422302, "learning_rate": 2.7130852340936375e-05, "loss": 0.5237, "step": 11442 }, { "epoch": 14.64704, "grad_norm": 0.9464503526687622, "learning_rate": 2.7128851540616247e-05, "loss": 0.5415, "step": 11443 }, { "epoch": 14.64832, "grad_norm": 0.9364238381385803, "learning_rate": 2.7126850740296123e-05, "loss": 0.5156, "step": 11444 }, { "epoch": 14.6496, "grad_norm": 0.943153440952301, "learning_rate": 2.7124849939975994e-05, "loss": 0.516, "step": 11445 }, { "epoch": 14.65088, "grad_norm": 0.9400473833084106, "learning_rate": 2.7122849139655866e-05, "loss": 0.5315, "step": 11446 }, { "epoch": 14.65216, "grad_norm": 0.9996716380119324, "learning_rate": 2.7120848339335735e-05, "loss": 0.5381, "step": 11447 }, { "epoch": 14.65344, "grad_norm": 0.9744866490364075, "learning_rate": 2.7118847539015607e-05, "loss": 0.5502, "step": 11448 }, { "epoch": 14.65472, "grad_norm": 0.9782332181930542, "learning_rate": 2.711684673869548e-05, "loss": 0.5651, "step": 11449 }, { "epoch": 14.656, "grad_norm": 1.014737844467163, "learning_rate": 2.711484593837535e-05, "loss": 0.5673, "step": 11450 }, { "epoch": 14.65728, "grad_norm": 0.9900893568992615, "learning_rate": 2.7112845138055222e-05, "loss": 0.5413, "step": 11451 }, { "epoch": 14.65856, "grad_norm": 0.9428865909576416, "learning_rate": 2.7110844337735097e-05, "loss": 0.5564, "step": 11452 }, { "epoch": 14.659839999999999, "grad_norm": 0.973512589931488, "learning_rate": 2.710884353741497e-05, "loss": 0.5656, "step": 11453 }, { "epoch": 14.66112, "grad_norm": 0.9764522910118103, "learning_rate": 2.710684273709484e-05, "loss": 0.5316, "step": 11454 }, { "epoch": 14.6624, "grad_norm": 0.962592363357544, "learning_rate": 2.710484193677471e-05, "loss": 0.5184, "step": 11455 }, { "epoch": 14.66368, "grad_norm": 0.9462336301803589, "learning_rate": 2.710284113645458e-05, "loss": 0.5146, "step": 11456 }, { "epoch": 14.66496, "grad_norm": 0.9827730655670166, "learning_rate": 2.7100840336134453e-05, "loss": 0.5192, "step": 11457 }, { "epoch": 14.66624, "grad_norm": 1.0178593397140503, "learning_rate": 2.7098839535814325e-05, "loss": 0.5204, "step": 11458 }, { "epoch": 14.66752, "grad_norm": 0.9616353511810303, "learning_rate": 2.70968387354942e-05, "loss": 0.5118, "step": 11459 }, { "epoch": 14.6688, "grad_norm": 0.9945552349090576, "learning_rate": 2.7094837935174072e-05, "loss": 0.5167, "step": 11460 }, { "epoch": 14.67008, "grad_norm": 0.967586100101471, "learning_rate": 2.7092837134853944e-05, "loss": 0.5346, "step": 11461 }, { "epoch": 14.67136, "grad_norm": 0.9664031863212585, "learning_rate": 2.7090836334533816e-05, "loss": 0.5105, "step": 11462 }, { "epoch": 14.67264, "grad_norm": 0.9893897771835327, "learning_rate": 2.7088835534213684e-05, "loss": 0.5307, "step": 11463 }, { "epoch": 14.67392, "grad_norm": 1.0116097927093506, "learning_rate": 2.7086834733893556e-05, "loss": 0.4891, "step": 11464 }, { "epoch": 14.6752, "grad_norm": 0.9823921322822571, "learning_rate": 2.7084833933573428e-05, "loss": 0.542, "step": 11465 }, { "epoch": 14.67648, "grad_norm": 1.0313193798065186, "learning_rate": 2.7082833133253303e-05, "loss": 0.5909, "step": 11466 }, { "epoch": 14.67776, "grad_norm": 0.9933533072471619, "learning_rate": 2.7080832332933175e-05, "loss": 0.5606, "step": 11467 }, { "epoch": 14.67904, "grad_norm": 0.9638269543647766, "learning_rate": 2.7078831532613047e-05, "loss": 0.5142, "step": 11468 }, { "epoch": 14.68032, "grad_norm": 1.006666660308838, "learning_rate": 2.707683073229292e-05, "loss": 0.5522, "step": 11469 }, { "epoch": 14.6816, "grad_norm": 0.9415097832679749, "learning_rate": 2.707482993197279e-05, "loss": 0.5229, "step": 11470 }, { "epoch": 14.68288, "grad_norm": 0.9513869881629944, "learning_rate": 2.707282913165266e-05, "loss": 0.5032, "step": 11471 }, { "epoch": 14.68416, "grad_norm": 1.0023972988128662, "learning_rate": 2.707082833133253e-05, "loss": 0.5571, "step": 11472 }, { "epoch": 14.68544, "grad_norm": 0.9924998879432678, "learning_rate": 2.706882753101241e-05, "loss": 0.5413, "step": 11473 }, { "epoch": 14.68672, "grad_norm": 0.9861060380935669, "learning_rate": 2.7066826730692278e-05, "loss": 0.5158, "step": 11474 }, { "epoch": 14.688, "grad_norm": 0.964842677116394, "learning_rate": 2.706482593037215e-05, "loss": 0.5428, "step": 11475 }, { "epoch": 14.68928, "grad_norm": 0.9527415633201599, "learning_rate": 2.7062825130052022e-05, "loss": 0.5302, "step": 11476 }, { "epoch": 14.69056, "grad_norm": 0.9425960779190063, "learning_rate": 2.7060824329731894e-05, "loss": 0.5503, "step": 11477 }, { "epoch": 14.69184, "grad_norm": 0.9891713261604309, "learning_rate": 2.7058823529411766e-05, "loss": 0.5782, "step": 11478 }, { "epoch": 14.69312, "grad_norm": 0.9919348359107971, "learning_rate": 2.7056822729091634e-05, "loss": 0.5411, "step": 11479 }, { "epoch": 14.6944, "grad_norm": 0.9964098334312439, "learning_rate": 2.7054821928771513e-05, "loss": 0.5517, "step": 11480 }, { "epoch": 14.69568, "grad_norm": 0.9663441181182861, "learning_rate": 2.7052821128451385e-05, "loss": 0.5354, "step": 11481 }, { "epoch": 14.69696, "grad_norm": 1.0156707763671875, "learning_rate": 2.7050820328131253e-05, "loss": 0.5209, "step": 11482 }, { "epoch": 14.69824, "grad_norm": 0.9387539625167847, "learning_rate": 2.7048819527811125e-05, "loss": 0.4976, "step": 11483 }, { "epoch": 14.69952, "grad_norm": 0.9886208772659302, "learning_rate": 2.7046818727490997e-05, "loss": 0.5635, "step": 11484 }, { "epoch": 14.7008, "grad_norm": 0.9804397225379944, "learning_rate": 2.704481792717087e-05, "loss": 0.5384, "step": 11485 }, { "epoch": 14.70208, "grad_norm": 0.944187581539154, "learning_rate": 2.704281712685074e-05, "loss": 0.5429, "step": 11486 }, { "epoch": 14.70336, "grad_norm": 0.9738987684249878, "learning_rate": 2.7040816326530616e-05, "loss": 0.5619, "step": 11487 }, { "epoch": 14.70464, "grad_norm": 0.9703164100646973, "learning_rate": 2.7038815526210488e-05, "loss": 0.546, "step": 11488 }, { "epoch": 14.70592, "grad_norm": 0.9917868375778198, "learning_rate": 2.703681472589036e-05, "loss": 0.558, "step": 11489 }, { "epoch": 14.7072, "grad_norm": 0.9812465906143188, "learning_rate": 2.7034813925570228e-05, "loss": 0.5587, "step": 11490 }, { "epoch": 14.70848, "grad_norm": 0.9691387414932251, "learning_rate": 2.70328131252501e-05, "loss": 0.5575, "step": 11491 }, { "epoch": 14.70976, "grad_norm": 0.912678062915802, "learning_rate": 2.7030812324929972e-05, "loss": 0.4773, "step": 11492 }, { "epoch": 14.71104, "grad_norm": 0.9488722085952759, "learning_rate": 2.7028811524609844e-05, "loss": 0.5633, "step": 11493 }, { "epoch": 14.71232, "grad_norm": 0.958460807800293, "learning_rate": 2.702681072428972e-05, "loss": 0.509, "step": 11494 }, { "epoch": 14.7136, "grad_norm": 0.9496520757675171, "learning_rate": 2.702480992396959e-05, "loss": 0.5236, "step": 11495 }, { "epoch": 14.71488, "grad_norm": 0.9681755900382996, "learning_rate": 2.7022809123649463e-05, "loss": 0.5415, "step": 11496 }, { "epoch": 14.71616, "grad_norm": 1.0062313079833984, "learning_rate": 2.7020808323329334e-05, "loss": 0.5432, "step": 11497 }, { "epoch": 14.71744, "grad_norm": 0.962822675704956, "learning_rate": 2.7018807523009203e-05, "loss": 0.5481, "step": 11498 }, { "epoch": 14.71872, "grad_norm": 1.0065944194793701, "learning_rate": 2.7016806722689075e-05, "loss": 0.5707, "step": 11499 }, { "epoch": 14.72, "grad_norm": 0.9401991367340088, "learning_rate": 2.7014805922368947e-05, "loss": 0.5246, "step": 11500 }, { "epoch": 14.72128, "grad_norm": 0.8997707366943359, "learning_rate": 2.7012805122048822e-05, "loss": 0.4671, "step": 11501 }, { "epoch": 14.72256, "grad_norm": 0.9574297070503235, "learning_rate": 2.7010804321728694e-05, "loss": 0.5616, "step": 11502 }, { "epoch": 14.72384, "grad_norm": 0.9319608807563782, "learning_rate": 2.7008803521408566e-05, "loss": 0.5094, "step": 11503 }, { "epoch": 14.72512, "grad_norm": 0.9995977282524109, "learning_rate": 2.7006802721088437e-05, "loss": 0.5743, "step": 11504 }, { "epoch": 14.7264, "grad_norm": 0.9879163503646851, "learning_rate": 2.700480192076831e-05, "loss": 0.5305, "step": 11505 }, { "epoch": 14.72768, "grad_norm": 0.9732846021652222, "learning_rate": 2.7002801120448178e-05, "loss": 0.5276, "step": 11506 }, { "epoch": 14.72896, "grad_norm": 0.9981286525726318, "learning_rate": 2.700080032012805e-05, "loss": 0.5467, "step": 11507 }, { "epoch": 14.73024, "grad_norm": 1.0308117866516113, "learning_rate": 2.6998799519807928e-05, "loss": 0.5996, "step": 11508 }, { "epoch": 14.73152, "grad_norm": 0.9829885363578796, "learning_rate": 2.6996798719487797e-05, "loss": 0.529, "step": 11509 }, { "epoch": 14.7328, "grad_norm": 0.9994106292724609, "learning_rate": 2.699479791916767e-05, "loss": 0.5486, "step": 11510 }, { "epoch": 14.73408, "grad_norm": 0.9333558082580566, "learning_rate": 2.699279711884754e-05, "loss": 0.4884, "step": 11511 }, { "epoch": 14.73536, "grad_norm": 0.9170252680778503, "learning_rate": 2.6990796318527412e-05, "loss": 0.5259, "step": 11512 }, { "epoch": 14.73664, "grad_norm": 0.9396823644638062, "learning_rate": 2.6988795518207284e-05, "loss": 0.5302, "step": 11513 }, { "epoch": 14.73792, "grad_norm": 0.9388831853866577, "learning_rate": 2.6986794717887153e-05, "loss": 0.4803, "step": 11514 }, { "epoch": 14.7392, "grad_norm": 1.0120586156845093, "learning_rate": 2.698479391756703e-05, "loss": 0.581, "step": 11515 }, { "epoch": 14.74048, "grad_norm": 1.0425275564193726, "learning_rate": 2.6982793117246903e-05, "loss": 0.5611, "step": 11516 }, { "epoch": 14.74176, "grad_norm": 1.0189977884292603, "learning_rate": 2.698079231692677e-05, "loss": 0.566, "step": 11517 }, { "epoch": 14.74304, "grad_norm": 0.9831307530403137, "learning_rate": 2.6978791516606643e-05, "loss": 0.5258, "step": 11518 }, { "epoch": 14.74432, "grad_norm": 0.9544398188591003, "learning_rate": 2.6976790716286515e-05, "loss": 0.5207, "step": 11519 }, { "epoch": 14.7456, "grad_norm": 0.9739946722984314, "learning_rate": 2.6974789915966387e-05, "loss": 0.5362, "step": 11520 }, { "epoch": 14.74688, "grad_norm": 1.0266259908676147, "learning_rate": 2.697278911564626e-05, "loss": 0.5669, "step": 11521 }, { "epoch": 14.74816, "grad_norm": 0.9911479949951172, "learning_rate": 2.6970788315326134e-05, "loss": 0.529, "step": 11522 }, { "epoch": 14.74944, "grad_norm": 0.978696882724762, "learning_rate": 2.6968787515006006e-05, "loss": 0.5497, "step": 11523 }, { "epoch": 14.75072, "grad_norm": 0.9742518663406372, "learning_rate": 2.6966786714685878e-05, "loss": 0.4836, "step": 11524 }, { "epoch": 14.752, "grad_norm": 0.9718377590179443, "learning_rate": 2.6964785914365746e-05, "loss": 0.513, "step": 11525 }, { "epoch": 14.75328, "grad_norm": 1.0205025672912598, "learning_rate": 2.6962785114045618e-05, "loss": 0.582, "step": 11526 }, { "epoch": 14.75456, "grad_norm": 0.9338187575340271, "learning_rate": 2.696078431372549e-05, "loss": 0.4745, "step": 11527 }, { "epoch": 14.75584, "grad_norm": 0.9623693823814392, "learning_rate": 2.6958783513405362e-05, "loss": 0.5462, "step": 11528 }, { "epoch": 14.75712, "grad_norm": 0.9271833896636963, "learning_rate": 2.6956782713085237e-05, "loss": 0.4699, "step": 11529 }, { "epoch": 14.7584, "grad_norm": 1.0047674179077148, "learning_rate": 2.695478191276511e-05, "loss": 0.5474, "step": 11530 }, { "epoch": 14.75968, "grad_norm": 0.9866589903831482, "learning_rate": 2.695278111244498e-05, "loss": 0.5667, "step": 11531 }, { "epoch": 14.76096, "grad_norm": 0.9769319891929626, "learning_rate": 2.6950780312124853e-05, "loss": 0.5532, "step": 11532 }, { "epoch": 14.76224, "grad_norm": 0.9511915445327759, "learning_rate": 2.694877951180472e-05, "loss": 0.522, "step": 11533 }, { "epoch": 14.76352, "grad_norm": 0.9802474975585938, "learning_rate": 2.6946778711484593e-05, "loss": 0.5575, "step": 11534 }, { "epoch": 14.7648, "grad_norm": 0.8974894285202026, "learning_rate": 2.6944777911164465e-05, "loss": 0.4573, "step": 11535 }, { "epoch": 14.76608, "grad_norm": 0.9369537830352783, "learning_rate": 2.694277711084434e-05, "loss": 0.5447, "step": 11536 }, { "epoch": 14.76736, "grad_norm": 0.9481881260871887, "learning_rate": 2.6940776310524212e-05, "loss": 0.5191, "step": 11537 }, { "epoch": 14.76864, "grad_norm": 0.9813864231109619, "learning_rate": 2.6938775510204084e-05, "loss": 0.5228, "step": 11538 }, { "epoch": 14.76992, "grad_norm": 0.9654099345207214, "learning_rate": 2.6936774709883956e-05, "loss": 0.4354, "step": 11539 }, { "epoch": 14.7712, "grad_norm": 1.015555739402771, "learning_rate": 2.6934773909563828e-05, "loss": 0.5356, "step": 11540 }, { "epoch": 14.77248, "grad_norm": 0.9228314161300659, "learning_rate": 2.6932773109243696e-05, "loss": 0.497, "step": 11541 }, { "epoch": 14.77376, "grad_norm": 0.9956296682357788, "learning_rate": 2.6930772308923568e-05, "loss": 0.5217, "step": 11542 }, { "epoch": 14.77504, "grad_norm": 1.0242211818695068, "learning_rate": 2.6928771508603447e-05, "loss": 0.4918, "step": 11543 }, { "epoch": 14.77632, "grad_norm": 1.0041379928588867, "learning_rate": 2.6926770708283315e-05, "loss": 0.5375, "step": 11544 }, { "epoch": 14.7776, "grad_norm": 0.9402958750724792, "learning_rate": 2.6924769907963187e-05, "loss": 0.4983, "step": 11545 }, { "epoch": 14.778880000000001, "grad_norm": 0.9316099882125854, "learning_rate": 2.692276910764306e-05, "loss": 0.4897, "step": 11546 }, { "epoch": 14.78016, "grad_norm": 1.0062158107757568, "learning_rate": 2.692076830732293e-05, "loss": 0.6163, "step": 11547 }, { "epoch": 14.78144, "grad_norm": 0.93266361951828, "learning_rate": 2.6918767507002802e-05, "loss": 0.5614, "step": 11548 }, { "epoch": 14.78272, "grad_norm": 0.9960542321205139, "learning_rate": 2.691676670668267e-05, "loss": 0.556, "step": 11549 }, { "epoch": 14.784, "grad_norm": 0.9723289608955383, "learning_rate": 2.691476590636255e-05, "loss": 0.5528, "step": 11550 }, { "epoch": 14.78528, "grad_norm": 1.003118872642517, "learning_rate": 2.691276510604242e-05, "loss": 0.5594, "step": 11551 }, { "epoch": 14.78656, "grad_norm": 0.8769537806510925, "learning_rate": 2.691076430572229e-05, "loss": 0.4636, "step": 11552 }, { "epoch": 14.78784, "grad_norm": 0.929743766784668, "learning_rate": 2.6908763505402162e-05, "loss": 0.497, "step": 11553 }, { "epoch": 14.78912, "grad_norm": 0.9782609343528748, "learning_rate": 2.6906762705082034e-05, "loss": 0.5343, "step": 11554 }, { "epoch": 14.7904, "grad_norm": 0.9649235010147095, "learning_rate": 2.6904761904761905e-05, "loss": 0.5329, "step": 11555 }, { "epoch": 14.79168, "grad_norm": 1.0030887126922607, "learning_rate": 2.6902761104441777e-05, "loss": 0.5787, "step": 11556 }, { "epoch": 14.79296, "grad_norm": 1.0339874029159546, "learning_rate": 2.6900760304121653e-05, "loss": 0.5822, "step": 11557 }, { "epoch": 14.79424, "grad_norm": 1.0336726903915405, "learning_rate": 2.6898759503801524e-05, "loss": 0.573, "step": 11558 }, { "epoch": 14.79552, "grad_norm": 0.9409865140914917, "learning_rate": 2.6896758703481396e-05, "loss": 0.5171, "step": 11559 }, { "epoch": 14.7968, "grad_norm": 0.9395885467529297, "learning_rate": 2.6894757903161265e-05, "loss": 0.52, "step": 11560 }, { "epoch": 14.79808, "grad_norm": 1.0146170854568481, "learning_rate": 2.6892757102841137e-05, "loss": 0.538, "step": 11561 }, { "epoch": 14.79936, "grad_norm": 1.051053524017334, "learning_rate": 2.689075630252101e-05, "loss": 0.6081, "step": 11562 }, { "epoch": 14.80064, "grad_norm": 0.9499222636222839, "learning_rate": 2.688875550220088e-05, "loss": 0.5111, "step": 11563 }, { "epoch": 14.801919999999999, "grad_norm": 1.0103505849838257, "learning_rate": 2.6886754701880752e-05, "loss": 0.5686, "step": 11564 }, { "epoch": 14.8032, "grad_norm": 1.0131018161773682, "learning_rate": 2.6884753901560627e-05, "loss": 0.5578, "step": 11565 }, { "epoch": 14.80448, "grad_norm": 1.0048017501831055, "learning_rate": 2.68827531012405e-05, "loss": 0.5235, "step": 11566 }, { "epoch": 14.80576, "grad_norm": 1.0479151010513306, "learning_rate": 2.688075230092037e-05, "loss": 0.5589, "step": 11567 }, { "epoch": 14.80704, "grad_norm": 1.0059313774108887, "learning_rate": 2.687875150060024e-05, "loss": 0.5576, "step": 11568 }, { "epoch": 14.80832, "grad_norm": 0.9548071622848511, "learning_rate": 2.687675070028011e-05, "loss": 0.5122, "step": 11569 }, { "epoch": 14.8096, "grad_norm": 0.9280616641044617, "learning_rate": 2.6874749899959983e-05, "loss": 0.5034, "step": 11570 }, { "epoch": 14.810880000000001, "grad_norm": 0.9565810561180115, "learning_rate": 2.6872749099639855e-05, "loss": 0.516, "step": 11571 }, { "epoch": 14.81216, "grad_norm": 0.9888594150543213, "learning_rate": 2.687074829931973e-05, "loss": 0.5403, "step": 11572 }, { "epoch": 14.81344, "grad_norm": 1.0492602586746216, "learning_rate": 2.6868747498999602e-05, "loss": 0.5664, "step": 11573 }, { "epoch": 14.81472, "grad_norm": 0.9663681983947754, "learning_rate": 2.6866746698679474e-05, "loss": 0.5895, "step": 11574 }, { "epoch": 14.816, "grad_norm": 0.9314732551574707, "learning_rate": 2.6864745898359346e-05, "loss": 0.4985, "step": 11575 }, { "epoch": 14.81728, "grad_norm": 0.9417006373405457, "learning_rate": 2.6862745098039214e-05, "loss": 0.5434, "step": 11576 }, { "epoch": 14.81856, "grad_norm": 0.968472421169281, "learning_rate": 2.6860744297719086e-05, "loss": 0.5591, "step": 11577 }, { "epoch": 14.81984, "grad_norm": 0.9163793325424194, "learning_rate": 2.6858743497398958e-05, "loss": 0.5035, "step": 11578 }, { "epoch": 14.82112, "grad_norm": 0.9645053744316101, "learning_rate": 2.6856742697078833e-05, "loss": 0.5461, "step": 11579 }, { "epoch": 14.8224, "grad_norm": 0.9942086338996887, "learning_rate": 2.6854741896758705e-05, "loss": 0.5775, "step": 11580 }, { "epoch": 14.82368, "grad_norm": 0.9701970219612122, "learning_rate": 2.6852741096438577e-05, "loss": 0.5237, "step": 11581 }, { "epoch": 14.82496, "grad_norm": 0.9851134419441223, "learning_rate": 2.685074029611845e-05, "loss": 0.5184, "step": 11582 }, { "epoch": 14.82624, "grad_norm": 0.8879162073135376, "learning_rate": 2.684873949579832e-05, "loss": 0.4711, "step": 11583 }, { "epoch": 14.82752, "grad_norm": 0.9536734819412231, "learning_rate": 2.684673869547819e-05, "loss": 0.5822, "step": 11584 }, { "epoch": 14.8288, "grad_norm": 0.9417433142662048, "learning_rate": 2.684473789515806e-05, "loss": 0.5429, "step": 11585 }, { "epoch": 14.83008, "grad_norm": 0.9906011819839478, "learning_rate": 2.684273709483794e-05, "loss": 0.5599, "step": 11586 }, { "epoch": 14.83136, "grad_norm": 1.0022450685501099, "learning_rate": 2.6840736294517808e-05, "loss": 0.565, "step": 11587 }, { "epoch": 14.83264, "grad_norm": 0.9821887016296387, "learning_rate": 2.683873549419768e-05, "loss": 0.5372, "step": 11588 }, { "epoch": 14.833919999999999, "grad_norm": 0.9912195801734924, "learning_rate": 2.6836734693877552e-05, "loss": 0.5902, "step": 11589 }, { "epoch": 14.8352, "grad_norm": 0.940270721912384, "learning_rate": 2.6834733893557424e-05, "loss": 0.5177, "step": 11590 }, { "epoch": 14.83648, "grad_norm": 0.9974461793899536, "learning_rate": 2.6832733093237296e-05, "loss": 0.5458, "step": 11591 }, { "epoch": 14.83776, "grad_norm": 1.0043706893920898, "learning_rate": 2.6830732292917164e-05, "loss": 0.6071, "step": 11592 }, { "epoch": 14.83904, "grad_norm": 0.982296347618103, "learning_rate": 2.6828731492597043e-05, "loss": 0.5237, "step": 11593 }, { "epoch": 14.84032, "grad_norm": 1.0043891668319702, "learning_rate": 2.6826730692276915e-05, "loss": 0.5551, "step": 11594 }, { "epoch": 14.8416, "grad_norm": 1.0041606426239014, "learning_rate": 2.6824729891956783e-05, "loss": 0.5376, "step": 11595 }, { "epoch": 14.84288, "grad_norm": 0.952582061290741, "learning_rate": 2.6822729091636655e-05, "loss": 0.5425, "step": 11596 }, { "epoch": 14.84416, "grad_norm": 0.905483603477478, "learning_rate": 2.6820728291316527e-05, "loss": 0.5078, "step": 11597 }, { "epoch": 14.84544, "grad_norm": 0.9707022309303284, "learning_rate": 2.68187274909964e-05, "loss": 0.5593, "step": 11598 }, { "epoch": 14.84672, "grad_norm": 0.9420384168624878, "learning_rate": 2.681672669067627e-05, "loss": 0.5346, "step": 11599 }, { "epoch": 14.848, "grad_norm": 1.0081489086151123, "learning_rate": 2.6814725890356146e-05, "loss": 0.5217, "step": 11600 }, { "epoch": 14.84928, "grad_norm": 1.0194686651229858, "learning_rate": 2.6812725090036018e-05, "loss": 0.6055, "step": 11601 }, { "epoch": 14.85056, "grad_norm": 0.9814406037330627, "learning_rate": 2.681072428971589e-05, "loss": 0.5336, "step": 11602 }, { "epoch": 14.85184, "grad_norm": 0.9600327610969543, "learning_rate": 2.6808723489395758e-05, "loss": 0.5383, "step": 11603 }, { "epoch": 14.85312, "grad_norm": 0.9688712954521179, "learning_rate": 2.680672268907563e-05, "loss": 0.5513, "step": 11604 }, { "epoch": 14.8544, "grad_norm": 0.9412886500358582, "learning_rate": 2.6804721888755502e-05, "loss": 0.5537, "step": 11605 }, { "epoch": 14.85568, "grad_norm": 0.9556344151496887, "learning_rate": 2.6802721088435374e-05, "loss": 0.5508, "step": 11606 }, { "epoch": 14.85696, "grad_norm": 0.9847762584686279, "learning_rate": 2.680072028811525e-05, "loss": 0.5563, "step": 11607 }, { "epoch": 14.85824, "grad_norm": 0.9027768969535828, "learning_rate": 2.679871948779512e-05, "loss": 0.5691, "step": 11608 }, { "epoch": 14.85952, "grad_norm": 0.998230516910553, "learning_rate": 2.6796718687474993e-05, "loss": 0.5422, "step": 11609 }, { "epoch": 14.8608, "grad_norm": 1.0053776502609253, "learning_rate": 2.6794717887154864e-05, "loss": 0.5396, "step": 11610 }, { "epoch": 14.86208, "grad_norm": 1.0294034481048584, "learning_rate": 2.6792717086834733e-05, "loss": 0.5769, "step": 11611 }, { "epoch": 14.86336, "grad_norm": 0.9711460471153259, "learning_rate": 2.6790716286514605e-05, "loss": 0.5277, "step": 11612 }, { "epoch": 14.86464, "grad_norm": 0.9948602318763733, "learning_rate": 2.6788715486194477e-05, "loss": 0.5497, "step": 11613 }, { "epoch": 14.86592, "grad_norm": 0.9797696471214294, "learning_rate": 2.6786714685874352e-05, "loss": 0.5707, "step": 11614 }, { "epoch": 14.8672, "grad_norm": 1.006882905960083, "learning_rate": 2.6784713885554224e-05, "loss": 0.5857, "step": 11615 }, { "epoch": 14.86848, "grad_norm": 0.9534305930137634, "learning_rate": 2.6782713085234096e-05, "loss": 0.5066, "step": 11616 }, { "epoch": 14.86976, "grad_norm": 1.002995491027832, "learning_rate": 2.6780712284913967e-05, "loss": 0.5673, "step": 11617 }, { "epoch": 14.87104, "grad_norm": 0.9653766751289368, "learning_rate": 2.677871148459384e-05, "loss": 0.5393, "step": 11618 }, { "epoch": 14.87232, "grad_norm": 0.9714248776435852, "learning_rate": 2.6776710684273708e-05, "loss": 0.5344, "step": 11619 }, { "epoch": 14.8736, "grad_norm": 0.9720675349235535, "learning_rate": 2.677470988395358e-05, "loss": 0.5118, "step": 11620 }, { "epoch": 14.87488, "grad_norm": 0.9434537887573242, "learning_rate": 2.6772709083633458e-05, "loss": 0.5116, "step": 11621 }, { "epoch": 14.87616, "grad_norm": 0.9508132338523865, "learning_rate": 2.6770708283313327e-05, "loss": 0.5432, "step": 11622 }, { "epoch": 14.87744, "grad_norm": 0.9770334362983704, "learning_rate": 2.67687074829932e-05, "loss": 0.5533, "step": 11623 }, { "epoch": 14.87872, "grad_norm": 0.9566938877105713, "learning_rate": 2.676670668267307e-05, "loss": 0.5081, "step": 11624 }, { "epoch": 14.88, "grad_norm": 1.0178238153457642, "learning_rate": 2.6764705882352942e-05, "loss": 0.5921, "step": 11625 }, { "epoch": 14.88128, "grad_norm": 0.9234164357185364, "learning_rate": 2.6762705082032814e-05, "loss": 0.523, "step": 11626 }, { "epoch": 14.88256, "grad_norm": 0.9184479713439941, "learning_rate": 2.6760704281712683e-05, "loss": 0.5278, "step": 11627 }, { "epoch": 14.88384, "grad_norm": 0.9186223745346069, "learning_rate": 2.675870348139256e-05, "loss": 0.4659, "step": 11628 }, { "epoch": 14.88512, "grad_norm": 0.9434010982513428, "learning_rate": 2.6756702681072433e-05, "loss": 0.517, "step": 11629 }, { "epoch": 14.8864, "grad_norm": 0.9369627833366394, "learning_rate": 2.67547018807523e-05, "loss": 0.5197, "step": 11630 }, { "epoch": 14.88768, "grad_norm": 0.9298577308654785, "learning_rate": 2.6752701080432173e-05, "loss": 0.5219, "step": 11631 }, { "epoch": 14.88896, "grad_norm": 0.9377476572990417, "learning_rate": 2.6750700280112045e-05, "loss": 0.5519, "step": 11632 }, { "epoch": 14.89024, "grad_norm": 0.919325590133667, "learning_rate": 2.6748699479791917e-05, "loss": 0.5225, "step": 11633 }, { "epoch": 14.89152, "grad_norm": 0.9516395330429077, "learning_rate": 2.674669867947179e-05, "loss": 0.5508, "step": 11634 }, { "epoch": 14.8928, "grad_norm": 0.9320101737976074, "learning_rate": 2.6744697879151664e-05, "loss": 0.5241, "step": 11635 }, { "epoch": 14.89408, "grad_norm": 1.018315315246582, "learning_rate": 2.6742697078831536e-05, "loss": 0.5826, "step": 11636 }, { "epoch": 14.89536, "grad_norm": 0.973010241985321, "learning_rate": 2.6740696278511408e-05, "loss": 0.5401, "step": 11637 }, { "epoch": 14.89664, "grad_norm": 0.9724129438400269, "learning_rate": 2.6738695478191276e-05, "loss": 0.5432, "step": 11638 }, { "epoch": 14.89792, "grad_norm": 0.9453209042549133, "learning_rate": 2.6736694677871148e-05, "loss": 0.5506, "step": 11639 }, { "epoch": 14.8992, "grad_norm": 1.0045145750045776, "learning_rate": 2.673469387755102e-05, "loss": 0.5437, "step": 11640 }, { "epoch": 14.90048, "grad_norm": 1.0044034719467163, "learning_rate": 2.6732693077230892e-05, "loss": 0.5566, "step": 11641 }, { "epoch": 14.90176, "grad_norm": 0.9657561182975769, "learning_rate": 2.6730692276910767e-05, "loss": 0.5102, "step": 11642 }, { "epoch": 14.90304, "grad_norm": 0.9859712719917297, "learning_rate": 2.672869147659064e-05, "loss": 0.5598, "step": 11643 }, { "epoch": 14.90432, "grad_norm": 0.9568543434143066, "learning_rate": 2.672669067627051e-05, "loss": 0.5439, "step": 11644 }, { "epoch": 14.9056, "grad_norm": 1.0132269859313965, "learning_rate": 2.6724689875950383e-05, "loss": 0.5572, "step": 11645 }, { "epoch": 14.90688, "grad_norm": 0.9919137358665466, "learning_rate": 2.672268907563025e-05, "loss": 0.5823, "step": 11646 }, { "epoch": 14.90816, "grad_norm": 0.9762164354324341, "learning_rate": 2.6720688275310123e-05, "loss": 0.5324, "step": 11647 }, { "epoch": 14.90944, "grad_norm": 0.9875562191009521, "learning_rate": 2.6718687474989995e-05, "loss": 0.5269, "step": 11648 }, { "epoch": 14.91072, "grad_norm": 0.9606843590736389, "learning_rate": 2.671668667466987e-05, "loss": 0.5141, "step": 11649 }, { "epoch": 14.912, "grad_norm": 0.979857861995697, "learning_rate": 2.6714685874349742e-05, "loss": 0.5595, "step": 11650 }, { "epoch": 14.91328, "grad_norm": 0.9310894012451172, "learning_rate": 2.6712685074029614e-05, "loss": 0.5369, "step": 11651 }, { "epoch": 14.91456, "grad_norm": 0.9621683955192566, "learning_rate": 2.6710684273709486e-05, "loss": 0.5379, "step": 11652 }, { "epoch": 14.91584, "grad_norm": 0.9423104524612427, "learning_rate": 2.6708683473389358e-05, "loss": 0.5239, "step": 11653 }, { "epoch": 14.91712, "grad_norm": 0.9782088398933411, "learning_rate": 2.6706682673069226e-05, "loss": 0.5157, "step": 11654 }, { "epoch": 14.9184, "grad_norm": 1.0111697912216187, "learning_rate": 2.6704681872749098e-05, "loss": 0.5385, "step": 11655 }, { "epoch": 14.91968, "grad_norm": 0.9448067545890808, "learning_rate": 2.6702681072428977e-05, "loss": 0.5221, "step": 11656 }, { "epoch": 14.920960000000001, "grad_norm": 0.9532007575035095, "learning_rate": 2.6700680272108845e-05, "loss": 0.5573, "step": 11657 }, { "epoch": 14.92224, "grad_norm": 0.9769750237464905, "learning_rate": 2.6698679471788717e-05, "loss": 0.5437, "step": 11658 }, { "epoch": 14.92352, "grad_norm": 0.9078409671783447, "learning_rate": 2.669667867146859e-05, "loss": 0.4773, "step": 11659 }, { "epoch": 14.9248, "grad_norm": 0.9596759676933289, "learning_rate": 2.669467787114846e-05, "loss": 0.5117, "step": 11660 }, { "epoch": 14.92608, "grad_norm": 0.9927992820739746, "learning_rate": 2.6692677070828332e-05, "loss": 0.5405, "step": 11661 }, { "epoch": 14.92736, "grad_norm": 0.9593184590339661, "learning_rate": 2.66906762705082e-05, "loss": 0.4832, "step": 11662 }, { "epoch": 14.92864, "grad_norm": 0.9789624214172363, "learning_rate": 2.668867547018808e-05, "loss": 0.5316, "step": 11663 }, { "epoch": 14.92992, "grad_norm": 0.9484257698059082, "learning_rate": 2.668667466986795e-05, "loss": 0.5088, "step": 11664 }, { "epoch": 14.9312, "grad_norm": 0.987421989440918, "learning_rate": 2.668467386954782e-05, "loss": 0.5787, "step": 11665 }, { "epoch": 14.93248, "grad_norm": 0.973088264465332, "learning_rate": 2.6682673069227692e-05, "loss": 0.5346, "step": 11666 }, { "epoch": 14.93376, "grad_norm": 0.985235333442688, "learning_rate": 2.6680672268907564e-05, "loss": 0.5391, "step": 11667 }, { "epoch": 14.93504, "grad_norm": 1.0299049615859985, "learning_rate": 2.6678671468587435e-05, "loss": 0.5586, "step": 11668 }, { "epoch": 14.93632, "grad_norm": 0.9780097603797913, "learning_rate": 2.6676670668267307e-05, "loss": 0.5194, "step": 11669 }, { "epoch": 14.9376, "grad_norm": 1.037703037261963, "learning_rate": 2.6674669867947183e-05, "loss": 0.5629, "step": 11670 }, { "epoch": 14.93888, "grad_norm": 0.9950383305549622, "learning_rate": 2.6672669067627054e-05, "loss": 0.5533, "step": 11671 }, { "epoch": 14.94016, "grad_norm": 0.9457634091377258, "learning_rate": 2.6670668267306926e-05, "loss": 0.5002, "step": 11672 }, { "epoch": 14.94144, "grad_norm": 1.0216537714004517, "learning_rate": 2.6668667466986795e-05, "loss": 0.5612, "step": 11673 }, { "epoch": 14.94272, "grad_norm": 0.9794032573699951, "learning_rate": 2.6666666666666667e-05, "loss": 0.5796, "step": 11674 }, { "epoch": 14.943999999999999, "grad_norm": 0.965833306312561, "learning_rate": 2.666466586634654e-05, "loss": 0.5874, "step": 11675 }, { "epoch": 14.94528, "grad_norm": 0.9718341827392578, "learning_rate": 2.666266506602641e-05, "loss": 0.5002, "step": 11676 }, { "epoch": 14.94656, "grad_norm": 0.9411438703536987, "learning_rate": 2.6660664265706282e-05, "loss": 0.5241, "step": 11677 }, { "epoch": 14.94784, "grad_norm": 0.9548771381378174, "learning_rate": 2.6658663465386157e-05, "loss": 0.5152, "step": 11678 }, { "epoch": 14.94912, "grad_norm": 0.9769624471664429, "learning_rate": 2.665666266506603e-05, "loss": 0.5417, "step": 11679 }, { "epoch": 14.9504, "grad_norm": 0.9090695977210999, "learning_rate": 2.66546618647459e-05, "loss": 0.5338, "step": 11680 }, { "epoch": 14.95168, "grad_norm": 0.9659811854362488, "learning_rate": 2.665266106442577e-05, "loss": 0.5788, "step": 11681 }, { "epoch": 14.952960000000001, "grad_norm": 0.94170081615448, "learning_rate": 2.665066026410564e-05, "loss": 0.5491, "step": 11682 }, { "epoch": 14.95424, "grad_norm": 0.9707318544387817, "learning_rate": 2.6648659463785513e-05, "loss": 0.5736, "step": 11683 }, { "epoch": 14.95552, "grad_norm": 0.9650655388832092, "learning_rate": 2.6646658663465385e-05, "loss": 0.5422, "step": 11684 }, { "epoch": 14.9568, "grad_norm": 0.9502249360084534, "learning_rate": 2.664465786314526e-05, "loss": 0.5211, "step": 11685 }, { "epoch": 14.95808, "grad_norm": 0.9722442030906677, "learning_rate": 2.6642657062825132e-05, "loss": 0.5209, "step": 11686 }, { "epoch": 14.95936, "grad_norm": 0.9735600352287292, "learning_rate": 2.6640656262505004e-05, "loss": 0.5587, "step": 11687 }, { "epoch": 14.96064, "grad_norm": 0.9050617218017578, "learning_rate": 2.6638655462184876e-05, "loss": 0.495, "step": 11688 }, { "epoch": 14.96192, "grad_norm": 1.0128991603851318, "learning_rate": 2.6636654661864744e-05, "loss": 0.5692, "step": 11689 }, { "epoch": 14.9632, "grad_norm": 0.970399796962738, "learning_rate": 2.6634653861544616e-05, "loss": 0.5175, "step": 11690 }, { "epoch": 14.96448, "grad_norm": 0.9725888967514038, "learning_rate": 2.6632653061224488e-05, "loss": 0.5323, "step": 11691 }, { "epoch": 14.96576, "grad_norm": 0.9731080532073975, "learning_rate": 2.6630652260904367e-05, "loss": 0.5548, "step": 11692 }, { "epoch": 14.96704, "grad_norm": 0.9690375924110413, "learning_rate": 2.6628651460584235e-05, "loss": 0.5147, "step": 11693 }, { "epoch": 14.96832, "grad_norm": 1.0425267219543457, "learning_rate": 2.6626650660264107e-05, "loss": 0.5284, "step": 11694 }, { "epoch": 14.9696, "grad_norm": 0.949582576751709, "learning_rate": 2.662464985994398e-05, "loss": 0.4982, "step": 11695 }, { "epoch": 14.97088, "grad_norm": 0.9666782021522522, "learning_rate": 2.662264905962385e-05, "loss": 0.5504, "step": 11696 }, { "epoch": 14.97216, "grad_norm": 1.0077440738677979, "learning_rate": 2.662064825930372e-05, "loss": 0.5647, "step": 11697 }, { "epoch": 14.97344, "grad_norm": 0.930424690246582, "learning_rate": 2.661864745898359e-05, "loss": 0.484, "step": 11698 }, { "epoch": 14.97472, "grad_norm": 0.9621975421905518, "learning_rate": 2.661664665866347e-05, "loss": 0.5133, "step": 11699 }, { "epoch": 14.975999999999999, "grad_norm": 0.9930300116539001, "learning_rate": 2.661464585834334e-05, "loss": 0.5704, "step": 11700 }, { "epoch": 14.97728, "grad_norm": 0.965140163898468, "learning_rate": 2.661264505802321e-05, "loss": 0.5111, "step": 11701 }, { "epoch": 14.97856, "grad_norm": 0.9955794215202332, "learning_rate": 2.6610644257703082e-05, "loss": 0.5608, "step": 11702 }, { "epoch": 14.97984, "grad_norm": 0.9986573457717896, "learning_rate": 2.6608643457382954e-05, "loss": 0.5501, "step": 11703 }, { "epoch": 14.98112, "grad_norm": 0.9306737780570984, "learning_rate": 2.6606642657062826e-05, "loss": 0.5065, "step": 11704 }, { "epoch": 14.9824, "grad_norm": 0.9776188135147095, "learning_rate": 2.6604641856742694e-05, "loss": 0.5241, "step": 11705 }, { "epoch": 14.98368, "grad_norm": 0.9727427363395691, "learning_rate": 2.6602641056422573e-05, "loss": 0.515, "step": 11706 }, { "epoch": 14.98496, "grad_norm": 1.011610507965088, "learning_rate": 2.6600640256102445e-05, "loss": 0.5472, "step": 11707 }, { "epoch": 14.98624, "grad_norm": 1.0158885717391968, "learning_rate": 2.6598639455782316e-05, "loss": 0.5642, "step": 11708 }, { "epoch": 14.98752, "grad_norm": 1.021340250968933, "learning_rate": 2.6596638655462185e-05, "loss": 0.5585, "step": 11709 }, { "epoch": 14.9888, "grad_norm": 0.91712486743927, "learning_rate": 2.6594637855142057e-05, "loss": 0.5085, "step": 11710 }, { "epoch": 14.99008, "grad_norm": 0.9759134650230408, "learning_rate": 2.659263705482193e-05, "loss": 0.5493, "step": 11711 }, { "epoch": 14.99136, "grad_norm": 1.0136007070541382, "learning_rate": 2.65906362545018e-05, "loss": 0.5891, "step": 11712 }, { "epoch": 14.99264, "grad_norm": 0.9995754361152649, "learning_rate": 2.6588635454181676e-05, "loss": 0.4942, "step": 11713 }, { "epoch": 14.99392, "grad_norm": 0.9608016610145569, "learning_rate": 2.6586634653861548e-05, "loss": 0.5207, "step": 11714 }, { "epoch": 14.9952, "grad_norm": 0.9471200108528137, "learning_rate": 2.658463385354142e-05, "loss": 0.5334, "step": 11715 }, { "epoch": 14.99648, "grad_norm": 0.971295952796936, "learning_rate": 2.658263305322129e-05, "loss": 0.5604, "step": 11716 }, { "epoch": 14.99776, "grad_norm": 0.9903369545936584, "learning_rate": 2.658063225290116e-05, "loss": 0.6, "step": 11717 }, { "epoch": 14.99904, "grad_norm": 0.9256072044372559, "learning_rate": 2.657863145258103e-05, "loss": 0.5046, "step": 11718 }, { "epoch": 15.00032, "grad_norm": 1.9394627809524536, "learning_rate": 2.6576630652260904e-05, "loss": 0.813, "step": 11719 }, { "epoch": 15.0016, "grad_norm": 0.9253193140029907, "learning_rate": 2.657462985194078e-05, "loss": 0.5114, "step": 11720 }, { "epoch": 15.00288, "grad_norm": 0.9425203800201416, "learning_rate": 2.657262905162065e-05, "loss": 0.4764, "step": 11721 }, { "epoch": 15.00416, "grad_norm": 0.9434118866920471, "learning_rate": 2.6570628251300522e-05, "loss": 0.4907, "step": 11722 }, { "epoch": 15.00544, "grad_norm": 0.9589188694953918, "learning_rate": 2.6568627450980394e-05, "loss": 0.5433, "step": 11723 }, { "epoch": 15.00672, "grad_norm": 0.9243043661117554, "learning_rate": 2.6566626650660266e-05, "loss": 0.5402, "step": 11724 }, { "epoch": 15.008, "grad_norm": 0.9611799120903015, "learning_rate": 2.6564625850340135e-05, "loss": 0.5056, "step": 11725 }, { "epoch": 15.00928, "grad_norm": 0.8841933608055115, "learning_rate": 2.6562625050020007e-05, "loss": 0.4763, "step": 11726 }, { "epoch": 15.01056, "grad_norm": 0.9348115921020508, "learning_rate": 2.6560624249699885e-05, "loss": 0.4711, "step": 11727 }, { "epoch": 15.01184, "grad_norm": 0.9851812720298767, "learning_rate": 2.6558623449379754e-05, "loss": 0.5166, "step": 11728 }, { "epoch": 15.01312, "grad_norm": 0.967349648475647, "learning_rate": 2.6556622649059625e-05, "loss": 0.4667, "step": 11729 }, { "epoch": 15.0144, "grad_norm": 1.0413135290145874, "learning_rate": 2.6554621848739497e-05, "loss": 0.5798, "step": 11730 }, { "epoch": 15.01568, "grad_norm": 0.9787873029708862, "learning_rate": 2.655262104841937e-05, "loss": 0.5299, "step": 11731 }, { "epoch": 15.01696, "grad_norm": 0.9922900199890137, "learning_rate": 2.655062024809924e-05, "loss": 0.5363, "step": 11732 }, { "epoch": 15.01824, "grad_norm": 0.9843411445617676, "learning_rate": 2.654861944777911e-05, "loss": 0.5272, "step": 11733 }, { "epoch": 15.01952, "grad_norm": 0.9954893589019775, "learning_rate": 2.6546618647458988e-05, "loss": 0.5566, "step": 11734 }, { "epoch": 15.0208, "grad_norm": 0.9984802603721619, "learning_rate": 2.654461784713886e-05, "loss": 0.561, "step": 11735 }, { "epoch": 15.02208, "grad_norm": 0.992762565612793, "learning_rate": 2.654261704681873e-05, "loss": 0.4935, "step": 11736 }, { "epoch": 15.02336, "grad_norm": 0.9480631351470947, "learning_rate": 2.65406162464986e-05, "loss": 0.493, "step": 11737 }, { "epoch": 15.02464, "grad_norm": 1.0148262977600098, "learning_rate": 2.6538615446178472e-05, "loss": 0.5401, "step": 11738 }, { "epoch": 15.02592, "grad_norm": 0.9743524193763733, "learning_rate": 2.6536614645858344e-05, "loss": 0.5219, "step": 11739 }, { "epoch": 15.0272, "grad_norm": 1.019026279449463, "learning_rate": 2.6534613845538216e-05, "loss": 0.5246, "step": 11740 }, { "epoch": 15.02848, "grad_norm": 0.9823682308197021, "learning_rate": 2.653261304521809e-05, "loss": 0.5357, "step": 11741 }, { "epoch": 15.02976, "grad_norm": 0.9612826108932495, "learning_rate": 2.6530612244897963e-05, "loss": 0.522, "step": 11742 }, { "epoch": 15.03104, "grad_norm": 0.9693880677223206, "learning_rate": 2.6528611444577835e-05, "loss": 0.5181, "step": 11743 }, { "epoch": 15.03232, "grad_norm": 0.9547830820083618, "learning_rate": 2.6526610644257703e-05, "loss": 0.5113, "step": 11744 }, { "epoch": 15.0336, "grad_norm": 0.9662365317344666, "learning_rate": 2.6524609843937575e-05, "loss": 0.5069, "step": 11745 }, { "epoch": 15.03488, "grad_norm": 0.9782978296279907, "learning_rate": 2.6522609043617447e-05, "loss": 0.5354, "step": 11746 }, { "epoch": 15.03616, "grad_norm": 0.9016302824020386, "learning_rate": 2.652060824329732e-05, "loss": 0.4629, "step": 11747 }, { "epoch": 15.03744, "grad_norm": 0.8719879388809204, "learning_rate": 2.6518607442977194e-05, "loss": 0.4583, "step": 11748 }, { "epoch": 15.03872, "grad_norm": 1.0020391941070557, "learning_rate": 2.6516606642657066e-05, "loss": 0.5467, "step": 11749 }, { "epoch": 15.04, "grad_norm": 0.999260663986206, "learning_rate": 2.6514605842336938e-05, "loss": 0.5304, "step": 11750 }, { "epoch": 15.04128, "grad_norm": 1.00063157081604, "learning_rate": 2.651260504201681e-05, "loss": 0.5185, "step": 11751 }, { "epoch": 15.04256, "grad_norm": 1.0226551294326782, "learning_rate": 2.6510604241696678e-05, "loss": 0.5016, "step": 11752 }, { "epoch": 15.04384, "grad_norm": 0.99168461561203, "learning_rate": 2.650860344137655e-05, "loss": 0.5583, "step": 11753 }, { "epoch": 15.04512, "grad_norm": 0.9974046349525452, "learning_rate": 2.6506602641056422e-05, "loss": 0.4811, "step": 11754 }, { "epoch": 15.0464, "grad_norm": 1.019737958908081, "learning_rate": 2.6504601840736297e-05, "loss": 0.5457, "step": 11755 }, { "epoch": 15.04768, "grad_norm": 0.9845877289772034, "learning_rate": 2.650260104041617e-05, "loss": 0.4997, "step": 11756 }, { "epoch": 15.04896, "grad_norm": 0.9450182318687439, "learning_rate": 2.650060024009604e-05, "loss": 0.51, "step": 11757 }, { "epoch": 15.05024, "grad_norm": 1.0113811492919922, "learning_rate": 2.6498599439775913e-05, "loss": 0.5266, "step": 11758 }, { "epoch": 15.05152, "grad_norm": 0.9556260108947754, "learning_rate": 2.6496598639455785e-05, "loss": 0.517, "step": 11759 }, { "epoch": 15.0528, "grad_norm": 0.9880708456039429, "learning_rate": 2.6494597839135653e-05, "loss": 0.5527, "step": 11760 }, { "epoch": 15.05408, "grad_norm": 0.8803279399871826, "learning_rate": 2.6492597038815525e-05, "loss": 0.4476, "step": 11761 }, { "epoch": 15.05536, "grad_norm": 0.974906861782074, "learning_rate": 2.6490596238495404e-05, "loss": 0.5251, "step": 11762 }, { "epoch": 15.05664, "grad_norm": 0.9600889086723328, "learning_rate": 2.6488595438175272e-05, "loss": 0.4955, "step": 11763 }, { "epoch": 15.05792, "grad_norm": 0.9697887301445007, "learning_rate": 2.6486594637855144e-05, "loss": 0.5455, "step": 11764 }, { "epoch": 15.0592, "grad_norm": 0.9973766207695007, "learning_rate": 2.6484593837535016e-05, "loss": 0.4653, "step": 11765 }, { "epoch": 15.06048, "grad_norm": 1.0244485139846802, "learning_rate": 2.6482593037214888e-05, "loss": 0.5189, "step": 11766 }, { "epoch": 15.06176, "grad_norm": 0.9942975640296936, "learning_rate": 2.648059223689476e-05, "loss": 0.5736, "step": 11767 }, { "epoch": 15.06304, "grad_norm": 0.9399450421333313, "learning_rate": 2.6478591436574628e-05, "loss": 0.5184, "step": 11768 }, { "epoch": 15.06432, "grad_norm": 0.9376527070999146, "learning_rate": 2.6476590636254507e-05, "loss": 0.494, "step": 11769 }, { "epoch": 15.0656, "grad_norm": 0.9570677280426025, "learning_rate": 2.647458983593438e-05, "loss": 0.5268, "step": 11770 }, { "epoch": 15.06688, "grad_norm": 0.9823542833328247, "learning_rate": 2.6472589035614247e-05, "loss": 0.5312, "step": 11771 }, { "epoch": 15.06816, "grad_norm": 1.0721344947814941, "learning_rate": 2.647058823529412e-05, "loss": 0.52, "step": 11772 }, { "epoch": 15.06944, "grad_norm": 1.008894681930542, "learning_rate": 2.646858743497399e-05, "loss": 0.5164, "step": 11773 }, { "epoch": 15.07072, "grad_norm": 0.9895505905151367, "learning_rate": 2.6466586634653862e-05, "loss": 0.5588, "step": 11774 }, { "epoch": 15.072, "grad_norm": 1.0512263774871826, "learning_rate": 2.6464585834333734e-05, "loss": 0.5231, "step": 11775 }, { "epoch": 15.07328, "grad_norm": 0.891127347946167, "learning_rate": 2.646258503401361e-05, "loss": 0.4783, "step": 11776 }, { "epoch": 15.07456, "grad_norm": 0.9510053992271423, "learning_rate": 2.646058423369348e-05, "loss": 0.5321, "step": 11777 }, { "epoch": 15.07584, "grad_norm": 0.954017698764801, "learning_rate": 2.6458583433373353e-05, "loss": 0.5153, "step": 11778 }, { "epoch": 15.07712, "grad_norm": 1.0456651449203491, "learning_rate": 2.6456582633053222e-05, "loss": 0.5779, "step": 11779 }, { "epoch": 15.0784, "grad_norm": 0.9539825320243835, "learning_rate": 2.6454581832733094e-05, "loss": 0.5285, "step": 11780 }, { "epoch": 15.07968, "grad_norm": 0.952126681804657, "learning_rate": 2.6452581032412965e-05, "loss": 0.512, "step": 11781 }, { "epoch": 15.08096, "grad_norm": 0.9592217803001404, "learning_rate": 2.6450580232092837e-05, "loss": 0.4852, "step": 11782 }, { "epoch": 15.08224, "grad_norm": 0.9491934180259705, "learning_rate": 2.644857943177271e-05, "loss": 0.4663, "step": 11783 }, { "epoch": 15.08352, "grad_norm": 0.9953993558883667, "learning_rate": 2.6446578631452584e-05, "loss": 0.4667, "step": 11784 }, { "epoch": 15.0848, "grad_norm": 1.0062161684036255, "learning_rate": 2.6444577831132456e-05, "loss": 0.4812, "step": 11785 }, { "epoch": 15.08608, "grad_norm": 0.9338573217391968, "learning_rate": 2.6442577030812328e-05, "loss": 0.5201, "step": 11786 }, { "epoch": 15.08736, "grad_norm": 0.9554229378700256, "learning_rate": 2.6440576230492197e-05, "loss": 0.5064, "step": 11787 }, { "epoch": 15.08864, "grad_norm": 0.9917376041412354, "learning_rate": 2.643857543017207e-05, "loss": 0.528, "step": 11788 }, { "epoch": 15.08992, "grad_norm": 0.9698293209075928, "learning_rate": 2.643657462985194e-05, "loss": 0.4922, "step": 11789 }, { "epoch": 15.0912, "grad_norm": 1.039360761642456, "learning_rate": 2.6434573829531812e-05, "loss": 0.5244, "step": 11790 }, { "epoch": 15.09248, "grad_norm": 0.9987143874168396, "learning_rate": 2.6432573029211687e-05, "loss": 0.5006, "step": 11791 }, { "epoch": 15.09376, "grad_norm": 0.9327332377433777, "learning_rate": 2.643057222889156e-05, "loss": 0.4795, "step": 11792 }, { "epoch": 15.09504, "grad_norm": 0.9864438772201538, "learning_rate": 2.642857142857143e-05, "loss": 0.4949, "step": 11793 }, { "epoch": 15.09632, "grad_norm": 0.99493008852005, "learning_rate": 2.6426570628251303e-05, "loss": 0.5297, "step": 11794 }, { "epoch": 15.0976, "grad_norm": 0.9412096738815308, "learning_rate": 2.642456982793117e-05, "loss": 0.4775, "step": 11795 }, { "epoch": 15.09888, "grad_norm": 0.9617460370063782, "learning_rate": 2.6422569027611043e-05, "loss": 0.4855, "step": 11796 }, { "epoch": 15.10016, "grad_norm": 0.982379674911499, "learning_rate": 2.6420568227290915e-05, "loss": 0.5227, "step": 11797 }, { "epoch": 15.10144, "grad_norm": 1.020799994468689, "learning_rate": 2.641856742697079e-05, "loss": 0.579, "step": 11798 }, { "epoch": 15.10272, "grad_norm": 1.0289467573165894, "learning_rate": 2.6416566626650662e-05, "loss": 0.5353, "step": 11799 }, { "epoch": 15.104, "grad_norm": 1.0211548805236816, "learning_rate": 2.6414565826330534e-05, "loss": 0.5503, "step": 11800 }, { "epoch": 15.10528, "grad_norm": 1.011851191520691, "learning_rate": 2.6412565026010406e-05, "loss": 0.5338, "step": 11801 }, { "epoch": 15.10656, "grad_norm": 1.0263190269470215, "learning_rate": 2.6410564225690278e-05, "loss": 0.5643, "step": 11802 }, { "epoch": 15.10784, "grad_norm": 0.9878085851669312, "learning_rate": 2.6408563425370146e-05, "loss": 0.5279, "step": 11803 }, { "epoch": 15.10912, "grad_norm": 0.9296584129333496, "learning_rate": 2.6406562625050018e-05, "loss": 0.4767, "step": 11804 }, { "epoch": 15.1104, "grad_norm": 0.9973602294921875, "learning_rate": 2.6404561824729897e-05, "loss": 0.5266, "step": 11805 }, { "epoch": 15.11168, "grad_norm": 0.9674133658409119, "learning_rate": 2.6402561024409765e-05, "loss": 0.511, "step": 11806 }, { "epoch": 15.11296, "grad_norm": 1.0054680109024048, "learning_rate": 2.6400560224089637e-05, "loss": 0.5167, "step": 11807 }, { "epoch": 15.11424, "grad_norm": 0.9890505075454712, "learning_rate": 2.639855942376951e-05, "loss": 0.5279, "step": 11808 }, { "epoch": 15.11552, "grad_norm": 0.9986791610717773, "learning_rate": 2.639655862344938e-05, "loss": 0.5025, "step": 11809 }, { "epoch": 15.1168, "grad_norm": 0.9749217629432678, "learning_rate": 2.6394557823129253e-05, "loss": 0.4886, "step": 11810 }, { "epoch": 15.11808, "grad_norm": 0.9737854599952698, "learning_rate": 2.639255702280912e-05, "loss": 0.4664, "step": 11811 }, { "epoch": 15.11936, "grad_norm": 0.9293329119682312, "learning_rate": 2.6390556222489e-05, "loss": 0.4985, "step": 11812 }, { "epoch": 15.12064, "grad_norm": 0.9365719556808472, "learning_rate": 2.638855542216887e-05, "loss": 0.5201, "step": 11813 }, { "epoch": 15.12192, "grad_norm": 0.9597119092941284, "learning_rate": 2.638655462184874e-05, "loss": 0.5045, "step": 11814 }, { "epoch": 15.1232, "grad_norm": 1.0304876565933228, "learning_rate": 2.6384553821528612e-05, "loss": 0.5272, "step": 11815 }, { "epoch": 15.12448, "grad_norm": 0.9947621822357178, "learning_rate": 2.6382553021208484e-05, "loss": 0.5527, "step": 11816 }, { "epoch": 15.12576, "grad_norm": 0.9298506379127502, "learning_rate": 2.6380552220888356e-05, "loss": 0.4879, "step": 11817 }, { "epoch": 15.12704, "grad_norm": 1.0342029333114624, "learning_rate": 2.6378551420568228e-05, "loss": 0.5352, "step": 11818 }, { "epoch": 15.12832, "grad_norm": 0.9854941368103027, "learning_rate": 2.6376550620248103e-05, "loss": 0.5068, "step": 11819 }, { "epoch": 15.1296, "grad_norm": 1.0126068592071533, "learning_rate": 2.6374549819927975e-05, "loss": 0.5505, "step": 11820 }, { "epoch": 15.13088, "grad_norm": 1.0106240510940552, "learning_rate": 2.6372549019607846e-05, "loss": 0.5167, "step": 11821 }, { "epoch": 15.13216, "grad_norm": 1.0682857036590576, "learning_rate": 2.6370548219287715e-05, "loss": 0.5697, "step": 11822 }, { "epoch": 15.13344, "grad_norm": 1.0406882762908936, "learning_rate": 2.6368547418967587e-05, "loss": 0.5549, "step": 11823 }, { "epoch": 15.13472, "grad_norm": 0.9883754253387451, "learning_rate": 2.636654661864746e-05, "loss": 0.4826, "step": 11824 }, { "epoch": 15.136, "grad_norm": 1.0199334621429443, "learning_rate": 2.636454581832733e-05, "loss": 0.5189, "step": 11825 }, { "epoch": 15.13728, "grad_norm": 1.0423178672790527, "learning_rate": 2.6362545018007206e-05, "loss": 0.5777, "step": 11826 }, { "epoch": 15.13856, "grad_norm": 1.055101752281189, "learning_rate": 2.6360544217687078e-05, "loss": 0.555, "step": 11827 }, { "epoch": 15.13984, "grad_norm": 0.9862048625946045, "learning_rate": 2.635854341736695e-05, "loss": 0.5281, "step": 11828 }, { "epoch": 15.14112, "grad_norm": 0.9423441886901855, "learning_rate": 2.635654261704682e-05, "loss": 0.4727, "step": 11829 }, { "epoch": 15.1424, "grad_norm": 0.9520114660263062, "learning_rate": 2.635454181672669e-05, "loss": 0.541, "step": 11830 }, { "epoch": 15.14368, "grad_norm": 0.9528147578239441, "learning_rate": 2.635254101640656e-05, "loss": 0.5085, "step": 11831 }, { "epoch": 15.14496, "grad_norm": 0.9359756112098694, "learning_rate": 2.6350540216086434e-05, "loss": 0.4601, "step": 11832 }, { "epoch": 15.14624, "grad_norm": 0.9768989086151123, "learning_rate": 2.634853941576631e-05, "loss": 0.494, "step": 11833 }, { "epoch": 15.14752, "grad_norm": 0.9752456545829773, "learning_rate": 2.634653861544618e-05, "loss": 0.5484, "step": 11834 }, { "epoch": 15.1488, "grad_norm": 0.9548261165618896, "learning_rate": 2.6344537815126052e-05, "loss": 0.5219, "step": 11835 }, { "epoch": 15.150079999999999, "grad_norm": 1.0379620790481567, "learning_rate": 2.6342537014805924e-05, "loss": 0.5714, "step": 11836 }, { "epoch": 15.15136, "grad_norm": 0.9867843389511108, "learning_rate": 2.6340536214485796e-05, "loss": 0.5445, "step": 11837 }, { "epoch": 15.15264, "grad_norm": 0.9981434941291809, "learning_rate": 2.6338535414165665e-05, "loss": 0.4993, "step": 11838 }, { "epoch": 15.15392, "grad_norm": 1.0145291090011597, "learning_rate": 2.6336534613845537e-05, "loss": 0.52, "step": 11839 }, { "epoch": 15.1552, "grad_norm": 0.9970989227294922, "learning_rate": 2.6334533813525415e-05, "loss": 0.5516, "step": 11840 }, { "epoch": 15.15648, "grad_norm": 0.9794284701347351, "learning_rate": 2.6332533013205284e-05, "loss": 0.5283, "step": 11841 }, { "epoch": 15.15776, "grad_norm": 1.0003759860992432, "learning_rate": 2.6330532212885155e-05, "loss": 0.4844, "step": 11842 }, { "epoch": 15.15904, "grad_norm": 0.949975311756134, "learning_rate": 2.6328531412565027e-05, "loss": 0.4862, "step": 11843 }, { "epoch": 15.16032, "grad_norm": 0.9530737400054932, "learning_rate": 2.63265306122449e-05, "loss": 0.4736, "step": 11844 }, { "epoch": 15.1616, "grad_norm": 0.9621790051460266, "learning_rate": 2.632452981192477e-05, "loss": 0.4617, "step": 11845 }, { "epoch": 15.16288, "grad_norm": 1.0266780853271484, "learning_rate": 2.632252901160464e-05, "loss": 0.5228, "step": 11846 }, { "epoch": 15.16416, "grad_norm": 0.9723334908485413, "learning_rate": 2.6320528211284518e-05, "loss": 0.4968, "step": 11847 }, { "epoch": 15.16544, "grad_norm": 0.9815566539764404, "learning_rate": 2.631852741096439e-05, "loss": 0.5549, "step": 11848 }, { "epoch": 15.16672, "grad_norm": 0.9635928869247437, "learning_rate": 2.631652661064426e-05, "loss": 0.5149, "step": 11849 }, { "epoch": 15.168, "grad_norm": 0.9444295167922974, "learning_rate": 2.631452581032413e-05, "loss": 0.5338, "step": 11850 }, { "epoch": 15.16928, "grad_norm": 0.9650683403015137, "learning_rate": 2.6312525010004002e-05, "loss": 0.5011, "step": 11851 }, { "epoch": 15.17056, "grad_norm": 0.97637939453125, "learning_rate": 2.6310524209683874e-05, "loss": 0.4643, "step": 11852 }, { "epoch": 15.17184, "grad_norm": 0.9784893989562988, "learning_rate": 2.6308523409363746e-05, "loss": 0.4875, "step": 11853 }, { "epoch": 15.17312, "grad_norm": 0.9220801591873169, "learning_rate": 2.630652260904362e-05, "loss": 0.4402, "step": 11854 }, { "epoch": 15.1744, "grad_norm": 1.0712640285491943, "learning_rate": 2.6304521808723493e-05, "loss": 0.5405, "step": 11855 }, { "epoch": 15.17568, "grad_norm": 0.9892664551734924, "learning_rate": 2.6302521008403365e-05, "loss": 0.5264, "step": 11856 }, { "epoch": 15.17696, "grad_norm": 0.9567806720733643, "learning_rate": 2.6300520208083233e-05, "loss": 0.4772, "step": 11857 }, { "epoch": 15.17824, "grad_norm": 0.9925405979156494, "learning_rate": 2.6298519407763105e-05, "loss": 0.5128, "step": 11858 }, { "epoch": 15.17952, "grad_norm": 0.9644914865493774, "learning_rate": 2.6296518607442977e-05, "loss": 0.4847, "step": 11859 }, { "epoch": 15.1808, "grad_norm": 0.9735480546951294, "learning_rate": 2.629451780712285e-05, "loss": 0.5335, "step": 11860 }, { "epoch": 15.18208, "grad_norm": 0.99360591173172, "learning_rate": 2.6292517006802724e-05, "loss": 0.5185, "step": 11861 }, { "epoch": 15.18336, "grad_norm": 0.9777383208274841, "learning_rate": 2.6290516206482596e-05, "loss": 0.5173, "step": 11862 }, { "epoch": 15.18464, "grad_norm": 0.997850775718689, "learning_rate": 2.6288515406162468e-05, "loss": 0.5199, "step": 11863 }, { "epoch": 15.18592, "grad_norm": 0.9816862344741821, "learning_rate": 2.628651460584234e-05, "loss": 0.5277, "step": 11864 }, { "epoch": 15.1872, "grad_norm": 1.0436856746673584, "learning_rate": 2.6284513805522208e-05, "loss": 0.5121, "step": 11865 }, { "epoch": 15.18848, "grad_norm": 0.9631795287132263, "learning_rate": 2.628251300520208e-05, "loss": 0.4858, "step": 11866 }, { "epoch": 15.18976, "grad_norm": 0.8962351083755493, "learning_rate": 2.6280512204881952e-05, "loss": 0.5049, "step": 11867 }, { "epoch": 15.19104, "grad_norm": 0.99549400806427, "learning_rate": 2.6278511404561827e-05, "loss": 0.5051, "step": 11868 }, { "epoch": 15.19232, "grad_norm": 0.926495373249054, "learning_rate": 2.62765106042417e-05, "loss": 0.4831, "step": 11869 }, { "epoch": 15.1936, "grad_norm": 1.0090394020080566, "learning_rate": 2.627450980392157e-05, "loss": 0.5091, "step": 11870 }, { "epoch": 15.19488, "grad_norm": 1.0354359149932861, "learning_rate": 2.6272509003601443e-05, "loss": 0.558, "step": 11871 }, { "epoch": 15.19616, "grad_norm": 1.04960036277771, "learning_rate": 2.6270508203281315e-05, "loss": 0.5582, "step": 11872 }, { "epoch": 15.19744, "grad_norm": 1.0015323162078857, "learning_rate": 2.6268507402961183e-05, "loss": 0.4953, "step": 11873 }, { "epoch": 15.19872, "grad_norm": 0.971566915512085, "learning_rate": 2.6266506602641055e-05, "loss": 0.5299, "step": 11874 }, { "epoch": 15.2, "grad_norm": 0.9685811996459961, "learning_rate": 2.6264505802320934e-05, "loss": 0.5111, "step": 11875 }, { "epoch": 15.20128, "grad_norm": 0.9589853882789612, "learning_rate": 2.6262505002000802e-05, "loss": 0.4904, "step": 11876 }, { "epoch": 15.20256, "grad_norm": 1.025733232498169, "learning_rate": 2.6260504201680674e-05, "loss": 0.4765, "step": 11877 }, { "epoch": 15.20384, "grad_norm": 1.0392835140228271, "learning_rate": 2.6258503401360546e-05, "loss": 0.5459, "step": 11878 }, { "epoch": 15.20512, "grad_norm": 0.9788011312484741, "learning_rate": 2.6256502601040418e-05, "loss": 0.4549, "step": 11879 }, { "epoch": 15.2064, "grad_norm": 0.9786105751991272, "learning_rate": 2.625450180072029e-05, "loss": 0.5015, "step": 11880 }, { "epoch": 15.20768, "grad_norm": 0.9867866635322571, "learning_rate": 2.6252501000400158e-05, "loss": 0.5019, "step": 11881 }, { "epoch": 15.20896, "grad_norm": 1.0612010955810547, "learning_rate": 2.6250500200080037e-05, "loss": 0.5723, "step": 11882 }, { "epoch": 15.21024, "grad_norm": 0.9832754731178284, "learning_rate": 2.624849939975991e-05, "loss": 0.5225, "step": 11883 }, { "epoch": 15.21152, "grad_norm": 1.044143795967102, "learning_rate": 2.6246498599439777e-05, "loss": 0.5303, "step": 11884 }, { "epoch": 15.2128, "grad_norm": 0.9782372713088989, "learning_rate": 2.624449779911965e-05, "loss": 0.5279, "step": 11885 }, { "epoch": 15.21408, "grad_norm": 1.020448923110962, "learning_rate": 2.624249699879952e-05, "loss": 0.5357, "step": 11886 }, { "epoch": 15.21536, "grad_norm": 1.0035127401351929, "learning_rate": 2.6240496198479392e-05, "loss": 0.5383, "step": 11887 }, { "epoch": 15.21664, "grad_norm": 0.9856539964675903, "learning_rate": 2.6238495398159264e-05, "loss": 0.5267, "step": 11888 }, { "epoch": 15.21792, "grad_norm": 0.979160726070404, "learning_rate": 2.623649459783914e-05, "loss": 0.4924, "step": 11889 }, { "epoch": 15.2192, "grad_norm": 0.9597509503364563, "learning_rate": 2.623449379751901e-05, "loss": 0.5109, "step": 11890 }, { "epoch": 15.22048, "grad_norm": 0.9971436262130737, "learning_rate": 2.6232492997198883e-05, "loss": 0.5233, "step": 11891 }, { "epoch": 15.22176, "grad_norm": 0.9758315086364746, "learning_rate": 2.623049219687875e-05, "loss": 0.5042, "step": 11892 }, { "epoch": 15.22304, "grad_norm": 1.0089176893234253, "learning_rate": 2.6228491396558624e-05, "loss": 0.5225, "step": 11893 }, { "epoch": 15.22432, "grad_norm": 0.9823707342147827, "learning_rate": 2.6226490596238495e-05, "loss": 0.5532, "step": 11894 }, { "epoch": 15.2256, "grad_norm": 1.033499836921692, "learning_rate": 2.6224489795918367e-05, "loss": 0.5525, "step": 11895 }, { "epoch": 15.22688, "grad_norm": 0.9601119160652161, "learning_rate": 2.622248899559824e-05, "loss": 0.5249, "step": 11896 }, { "epoch": 15.22816, "grad_norm": 0.9555276036262512, "learning_rate": 2.6220488195278114e-05, "loss": 0.4677, "step": 11897 }, { "epoch": 15.22944, "grad_norm": 1.0126163959503174, "learning_rate": 2.6218487394957986e-05, "loss": 0.5543, "step": 11898 }, { "epoch": 15.23072, "grad_norm": 1.0423436164855957, "learning_rate": 2.6216486594637858e-05, "loss": 0.5366, "step": 11899 }, { "epoch": 15.232, "grad_norm": 1.0581696033477783, "learning_rate": 2.6214485794317727e-05, "loss": 0.5205, "step": 11900 }, { "epoch": 15.23328, "grad_norm": 1.026396632194519, "learning_rate": 2.62124849939976e-05, "loss": 0.4984, "step": 11901 }, { "epoch": 15.23456, "grad_norm": 1.0131559371948242, "learning_rate": 2.621048419367747e-05, "loss": 0.498, "step": 11902 }, { "epoch": 15.23584, "grad_norm": 1.0236692428588867, "learning_rate": 2.6208483393357342e-05, "loss": 0.5357, "step": 11903 }, { "epoch": 15.23712, "grad_norm": 0.9787890911102295, "learning_rate": 2.6206482593037217e-05, "loss": 0.4933, "step": 11904 }, { "epoch": 15.2384, "grad_norm": 0.9795733094215393, "learning_rate": 2.620448179271709e-05, "loss": 0.523, "step": 11905 }, { "epoch": 15.23968, "grad_norm": 1.0700660943984985, "learning_rate": 2.620248099239696e-05, "loss": 0.5248, "step": 11906 }, { "epoch": 15.24096, "grad_norm": 0.9715010523796082, "learning_rate": 2.6200480192076833e-05, "loss": 0.4913, "step": 11907 }, { "epoch": 15.24224, "grad_norm": 0.9805188179016113, "learning_rate": 2.61984793917567e-05, "loss": 0.5119, "step": 11908 }, { "epoch": 15.24352, "grad_norm": 1.0102686882019043, "learning_rate": 2.6196478591436573e-05, "loss": 0.5117, "step": 11909 }, { "epoch": 15.2448, "grad_norm": 1.045552372932434, "learning_rate": 2.6194477791116445e-05, "loss": 0.563, "step": 11910 }, { "epoch": 15.24608, "grad_norm": 0.9204160571098328, "learning_rate": 2.619247699079632e-05, "loss": 0.4714, "step": 11911 }, { "epoch": 15.24736, "grad_norm": 1.0518666505813599, "learning_rate": 2.6190476190476192e-05, "loss": 0.5461, "step": 11912 }, { "epoch": 15.24864, "grad_norm": 1.0590685606002808, "learning_rate": 2.6188475390156064e-05, "loss": 0.5677, "step": 11913 }, { "epoch": 15.24992, "grad_norm": 0.9414172172546387, "learning_rate": 2.6186474589835936e-05, "loss": 0.4822, "step": 11914 }, { "epoch": 15.2512, "grad_norm": 0.9399723410606384, "learning_rate": 2.6184473789515808e-05, "loss": 0.509, "step": 11915 }, { "epoch": 15.25248, "grad_norm": 0.9907810688018799, "learning_rate": 2.6182472989195676e-05, "loss": 0.5117, "step": 11916 }, { "epoch": 15.25376, "grad_norm": 1.0035853385925293, "learning_rate": 2.6180472188875548e-05, "loss": 0.5313, "step": 11917 }, { "epoch": 15.25504, "grad_norm": 0.9608493447303772, "learning_rate": 2.6178471388555427e-05, "loss": 0.4925, "step": 11918 }, { "epoch": 15.25632, "grad_norm": 0.933178722858429, "learning_rate": 2.6176470588235295e-05, "loss": 0.5003, "step": 11919 }, { "epoch": 15.2576, "grad_norm": 1.016798734664917, "learning_rate": 2.6174469787915167e-05, "loss": 0.5243, "step": 11920 }, { "epoch": 15.25888, "grad_norm": 1.0794856548309326, "learning_rate": 2.617246898759504e-05, "loss": 0.5542, "step": 11921 }, { "epoch": 15.26016, "grad_norm": 0.988269031047821, "learning_rate": 2.617046818727491e-05, "loss": 0.5095, "step": 11922 }, { "epoch": 15.26144, "grad_norm": 1.0120782852172852, "learning_rate": 2.6168467386954783e-05, "loss": 0.5286, "step": 11923 }, { "epoch": 15.26272, "grad_norm": 0.9555339813232422, "learning_rate": 2.616646658663465e-05, "loss": 0.4668, "step": 11924 }, { "epoch": 15.264, "grad_norm": 0.9648301005363464, "learning_rate": 2.616446578631453e-05, "loss": 0.5279, "step": 11925 }, { "epoch": 15.26528, "grad_norm": 1.041354775428772, "learning_rate": 2.61624649859944e-05, "loss": 0.5173, "step": 11926 }, { "epoch": 15.26656, "grad_norm": 0.9829549193382263, "learning_rate": 2.616046418567427e-05, "loss": 0.5126, "step": 11927 }, { "epoch": 15.26784, "grad_norm": 1.0028904676437378, "learning_rate": 2.6158463385354142e-05, "loss": 0.5008, "step": 11928 }, { "epoch": 15.269120000000001, "grad_norm": 0.9798859357833862, "learning_rate": 2.6156462585034014e-05, "loss": 0.5285, "step": 11929 }, { "epoch": 15.2704, "grad_norm": 1.0254261493682861, "learning_rate": 2.6154461784713886e-05, "loss": 0.5012, "step": 11930 }, { "epoch": 15.27168, "grad_norm": 0.9710574746131897, "learning_rate": 2.6152460984393757e-05, "loss": 0.5052, "step": 11931 }, { "epoch": 15.27296, "grad_norm": 1.0092233419418335, "learning_rate": 2.6150460184073633e-05, "loss": 0.5666, "step": 11932 }, { "epoch": 15.27424, "grad_norm": 1.0055060386657715, "learning_rate": 2.6148459383753505e-05, "loss": 0.5386, "step": 11933 }, { "epoch": 15.27552, "grad_norm": 1.033969759941101, "learning_rate": 2.6146458583433376e-05, "loss": 0.5583, "step": 11934 }, { "epoch": 15.2768, "grad_norm": 0.95125812292099, "learning_rate": 2.6144457783113245e-05, "loss": 0.4865, "step": 11935 }, { "epoch": 15.27808, "grad_norm": 1.0022237300872803, "learning_rate": 2.6142456982793117e-05, "loss": 0.5342, "step": 11936 }, { "epoch": 15.27936, "grad_norm": 0.9819777011871338, "learning_rate": 2.614045618247299e-05, "loss": 0.5323, "step": 11937 }, { "epoch": 15.28064, "grad_norm": 0.982841968536377, "learning_rate": 2.613845538215286e-05, "loss": 0.4979, "step": 11938 }, { "epoch": 15.28192, "grad_norm": 1.0762674808502197, "learning_rate": 2.6136454581832736e-05, "loss": 0.559, "step": 11939 }, { "epoch": 15.2832, "grad_norm": 1.0399508476257324, "learning_rate": 2.6134453781512608e-05, "loss": 0.5466, "step": 11940 }, { "epoch": 15.28448, "grad_norm": 0.9770174026489258, "learning_rate": 2.613245298119248e-05, "loss": 0.5081, "step": 11941 }, { "epoch": 15.28576, "grad_norm": 1.06191885471344, "learning_rate": 2.613045218087235e-05, "loss": 0.5334, "step": 11942 }, { "epoch": 15.28704, "grad_norm": 0.9418057203292847, "learning_rate": 2.612845138055222e-05, "loss": 0.4954, "step": 11943 }, { "epoch": 15.28832, "grad_norm": 0.9617568254470825, "learning_rate": 2.612645058023209e-05, "loss": 0.5103, "step": 11944 }, { "epoch": 15.2896, "grad_norm": 1.0067156553268433, "learning_rate": 2.6124449779911963e-05, "loss": 0.5035, "step": 11945 }, { "epoch": 15.29088, "grad_norm": 1.002389907836914, "learning_rate": 2.612244897959184e-05, "loss": 0.5344, "step": 11946 }, { "epoch": 15.292159999999999, "grad_norm": 1.0987991094589233, "learning_rate": 2.612044817927171e-05, "loss": 0.5497, "step": 11947 }, { "epoch": 15.29344, "grad_norm": 1.0003621578216553, "learning_rate": 2.6118447378951582e-05, "loss": 0.4983, "step": 11948 }, { "epoch": 15.29472, "grad_norm": 0.9967955946922302, "learning_rate": 2.6116446578631454e-05, "loss": 0.5338, "step": 11949 }, { "epoch": 15.296, "grad_norm": 1.0217162370681763, "learning_rate": 2.6114445778311326e-05, "loss": 0.5592, "step": 11950 }, { "epoch": 15.29728, "grad_norm": 1.0223591327667236, "learning_rate": 2.6112444977991195e-05, "loss": 0.5181, "step": 11951 }, { "epoch": 15.29856, "grad_norm": 0.9928198456764221, "learning_rate": 2.6110444177671066e-05, "loss": 0.5228, "step": 11952 }, { "epoch": 15.29984, "grad_norm": 1.0652741193771362, "learning_rate": 2.6108443377350945e-05, "loss": 0.5615, "step": 11953 }, { "epoch": 15.30112, "grad_norm": 1.0429683923721313, "learning_rate": 2.6106442577030814e-05, "loss": 0.5492, "step": 11954 }, { "epoch": 15.3024, "grad_norm": 0.9588310718536377, "learning_rate": 2.6104441776710685e-05, "loss": 0.4757, "step": 11955 }, { "epoch": 15.30368, "grad_norm": 0.9648290872573853, "learning_rate": 2.6102440976390557e-05, "loss": 0.4989, "step": 11956 }, { "epoch": 15.30496, "grad_norm": 1.0313849449157715, "learning_rate": 2.610044017607043e-05, "loss": 0.5461, "step": 11957 }, { "epoch": 15.30624, "grad_norm": 1.0368759632110596, "learning_rate": 2.60984393757503e-05, "loss": 0.5372, "step": 11958 }, { "epoch": 15.30752, "grad_norm": 0.9525302052497864, "learning_rate": 2.609643857543017e-05, "loss": 0.4805, "step": 11959 }, { "epoch": 15.3088, "grad_norm": 0.9847881197929382, "learning_rate": 2.6094437775110048e-05, "loss": 0.5264, "step": 11960 }, { "epoch": 15.31008, "grad_norm": 0.9874829053878784, "learning_rate": 2.609243697478992e-05, "loss": 0.5335, "step": 11961 }, { "epoch": 15.31136, "grad_norm": 1.0466731786727905, "learning_rate": 2.609043617446979e-05, "loss": 0.5521, "step": 11962 }, { "epoch": 15.31264, "grad_norm": 1.0074479579925537, "learning_rate": 2.608843537414966e-05, "loss": 0.5141, "step": 11963 }, { "epoch": 15.31392, "grad_norm": 1.0698914527893066, "learning_rate": 2.6086434573829532e-05, "loss": 0.5662, "step": 11964 }, { "epoch": 15.3152, "grad_norm": 0.9645558595657349, "learning_rate": 2.6084433773509404e-05, "loss": 0.5126, "step": 11965 }, { "epoch": 15.31648, "grad_norm": 0.9718447923660278, "learning_rate": 2.6082432973189276e-05, "loss": 0.5003, "step": 11966 }, { "epoch": 15.31776, "grad_norm": 0.9619754552841187, "learning_rate": 2.608043217286915e-05, "loss": 0.52, "step": 11967 }, { "epoch": 15.31904, "grad_norm": 0.940642237663269, "learning_rate": 2.6078431372549023e-05, "loss": 0.5113, "step": 11968 }, { "epoch": 15.32032, "grad_norm": 1.0012346506118774, "learning_rate": 2.6076430572228895e-05, "loss": 0.4932, "step": 11969 }, { "epoch": 15.3216, "grad_norm": 0.9923637509346008, "learning_rate": 2.6074429771908763e-05, "loss": 0.5567, "step": 11970 }, { "epoch": 15.32288, "grad_norm": 0.9815508723258972, "learning_rate": 2.6072428971588635e-05, "loss": 0.5412, "step": 11971 }, { "epoch": 15.32416, "grad_norm": 1.0121711492538452, "learning_rate": 2.6070428171268507e-05, "loss": 0.54, "step": 11972 }, { "epoch": 15.32544, "grad_norm": 1.0295419692993164, "learning_rate": 2.606842737094838e-05, "loss": 0.5261, "step": 11973 }, { "epoch": 15.32672, "grad_norm": 0.9861733913421631, "learning_rate": 2.6066426570628254e-05, "loss": 0.5237, "step": 11974 }, { "epoch": 15.328, "grad_norm": 0.9513747692108154, "learning_rate": 2.6064425770308126e-05, "loss": 0.5107, "step": 11975 }, { "epoch": 15.32928, "grad_norm": 0.9383821487426758, "learning_rate": 2.6062424969987998e-05, "loss": 0.5109, "step": 11976 }, { "epoch": 15.33056, "grad_norm": 0.9647513628005981, "learning_rate": 2.606042416966787e-05, "loss": 0.5326, "step": 11977 }, { "epoch": 15.33184, "grad_norm": 1.0194227695465088, "learning_rate": 2.6058423369347738e-05, "loss": 0.5523, "step": 11978 }, { "epoch": 15.33312, "grad_norm": 0.9552738070487976, "learning_rate": 2.605642256902761e-05, "loss": 0.494, "step": 11979 }, { "epoch": 15.3344, "grad_norm": 0.9982839822769165, "learning_rate": 2.6054421768707482e-05, "loss": 0.5138, "step": 11980 }, { "epoch": 15.33568, "grad_norm": 0.926050066947937, "learning_rate": 2.6052420968387357e-05, "loss": 0.4827, "step": 11981 }, { "epoch": 15.33696, "grad_norm": 0.9754160046577454, "learning_rate": 2.605042016806723e-05, "loss": 0.4864, "step": 11982 }, { "epoch": 15.33824, "grad_norm": 1.0375244617462158, "learning_rate": 2.60484193677471e-05, "loss": 0.5324, "step": 11983 }, { "epoch": 15.33952, "grad_norm": 0.998441755771637, "learning_rate": 2.6046418567426973e-05, "loss": 0.5374, "step": 11984 }, { "epoch": 15.3408, "grad_norm": 0.9597004055976868, "learning_rate": 2.6044417767106845e-05, "loss": 0.5497, "step": 11985 }, { "epoch": 15.34208, "grad_norm": 0.984380304813385, "learning_rate": 2.6042416966786713e-05, "loss": 0.4974, "step": 11986 }, { "epoch": 15.34336, "grad_norm": 1.0443543195724487, "learning_rate": 2.6040416166466585e-05, "loss": 0.5094, "step": 11987 }, { "epoch": 15.34464, "grad_norm": 1.033092737197876, "learning_rate": 2.6038415366146463e-05, "loss": 0.5502, "step": 11988 }, { "epoch": 15.34592, "grad_norm": 1.0299837589263916, "learning_rate": 2.6036414565826332e-05, "loss": 0.5757, "step": 11989 }, { "epoch": 15.3472, "grad_norm": 0.9337131381034851, "learning_rate": 2.6034413765506204e-05, "loss": 0.4797, "step": 11990 }, { "epoch": 15.34848, "grad_norm": 0.9382844567298889, "learning_rate": 2.6032412965186076e-05, "loss": 0.4631, "step": 11991 }, { "epoch": 15.34976, "grad_norm": 0.9595440626144409, "learning_rate": 2.6030412164865948e-05, "loss": 0.5302, "step": 11992 }, { "epoch": 15.35104, "grad_norm": 0.9686310887336731, "learning_rate": 2.602841136454582e-05, "loss": 0.5061, "step": 11993 }, { "epoch": 15.35232, "grad_norm": 0.9132741093635559, "learning_rate": 2.6026410564225688e-05, "loss": 0.4801, "step": 11994 }, { "epoch": 15.3536, "grad_norm": 0.9716625213623047, "learning_rate": 2.6024409763905566e-05, "loss": 0.5035, "step": 11995 }, { "epoch": 15.35488, "grad_norm": 1.003369927406311, "learning_rate": 2.602240896358544e-05, "loss": 0.547, "step": 11996 }, { "epoch": 15.35616, "grad_norm": 1.0026856660842896, "learning_rate": 2.6020408163265307e-05, "loss": 0.547, "step": 11997 }, { "epoch": 15.35744, "grad_norm": 0.9878264665603638, "learning_rate": 2.601840736294518e-05, "loss": 0.5284, "step": 11998 }, { "epoch": 15.35872, "grad_norm": 1.0457453727722168, "learning_rate": 2.601640656262505e-05, "loss": 0.5354, "step": 11999 }, { "epoch": 15.36, "grad_norm": 1.0366843938827515, "learning_rate": 2.6014405762304922e-05, "loss": 0.5955, "step": 12000 }, { "epoch": 15.36128, "grad_norm": 0.9786233901977539, "learning_rate": 2.6012404961984794e-05, "loss": 0.521, "step": 12001 }, { "epoch": 15.36256, "grad_norm": 1.0292565822601318, "learning_rate": 2.601040416166467e-05, "loss": 0.5014, "step": 12002 }, { "epoch": 15.36384, "grad_norm": 1.0155521631240845, "learning_rate": 2.600840336134454e-05, "loss": 0.5361, "step": 12003 }, { "epoch": 15.36512, "grad_norm": 0.9816110134124756, "learning_rate": 2.6006402561024413e-05, "loss": 0.5233, "step": 12004 }, { "epoch": 15.3664, "grad_norm": 0.9563271999359131, "learning_rate": 2.600440176070428e-05, "loss": 0.5141, "step": 12005 }, { "epoch": 15.36768, "grad_norm": 1.029533863067627, "learning_rate": 2.6002400960384154e-05, "loss": 0.5331, "step": 12006 }, { "epoch": 15.36896, "grad_norm": 1.0046855211257935, "learning_rate": 2.6000400160064025e-05, "loss": 0.535, "step": 12007 }, { "epoch": 15.37024, "grad_norm": 0.9804155826568604, "learning_rate": 2.5998399359743897e-05, "loss": 0.513, "step": 12008 }, { "epoch": 15.37152, "grad_norm": 0.9942843914031982, "learning_rate": 2.599639855942377e-05, "loss": 0.5471, "step": 12009 }, { "epoch": 15.3728, "grad_norm": 1.0089268684387207, "learning_rate": 2.5994397759103644e-05, "loss": 0.5792, "step": 12010 }, { "epoch": 15.37408, "grad_norm": 0.9854798913002014, "learning_rate": 2.5992396958783516e-05, "loss": 0.5647, "step": 12011 }, { "epoch": 15.37536, "grad_norm": 1.0184553861618042, "learning_rate": 2.5990396158463388e-05, "loss": 0.4929, "step": 12012 }, { "epoch": 15.37664, "grad_norm": 0.9660348296165466, "learning_rate": 2.5988395358143257e-05, "loss": 0.5246, "step": 12013 }, { "epoch": 15.37792, "grad_norm": 0.9935752153396606, "learning_rate": 2.598639455782313e-05, "loss": 0.5349, "step": 12014 }, { "epoch": 15.3792, "grad_norm": 0.953775942325592, "learning_rate": 2.5984393757503e-05, "loss": 0.5124, "step": 12015 }, { "epoch": 15.38048, "grad_norm": 0.9608764052391052, "learning_rate": 2.5982392957182872e-05, "loss": 0.5077, "step": 12016 }, { "epoch": 15.38176, "grad_norm": 0.9942414164543152, "learning_rate": 2.5980392156862747e-05, "loss": 0.5343, "step": 12017 }, { "epoch": 15.38304, "grad_norm": 0.9368318915367126, "learning_rate": 2.597839135654262e-05, "loss": 0.5105, "step": 12018 }, { "epoch": 15.38432, "grad_norm": 1.0121771097183228, "learning_rate": 2.597639055622249e-05, "loss": 0.5283, "step": 12019 }, { "epoch": 15.3856, "grad_norm": 1.025707721710205, "learning_rate": 2.5974389755902363e-05, "loss": 0.5341, "step": 12020 }, { "epoch": 15.38688, "grad_norm": 1.002144694328308, "learning_rate": 2.597238895558223e-05, "loss": 0.5056, "step": 12021 }, { "epoch": 15.38816, "grad_norm": 0.9893521070480347, "learning_rate": 2.5970388155262103e-05, "loss": 0.5587, "step": 12022 }, { "epoch": 15.38944, "grad_norm": 0.9755342602729797, "learning_rate": 2.5968387354941975e-05, "loss": 0.5413, "step": 12023 }, { "epoch": 15.39072, "grad_norm": 0.972171425819397, "learning_rate": 2.5966386554621854e-05, "loss": 0.5081, "step": 12024 }, { "epoch": 15.392, "grad_norm": 1.0347623825073242, "learning_rate": 2.5964385754301722e-05, "loss": 0.5589, "step": 12025 }, { "epoch": 15.39328, "grad_norm": 0.9687240123748779, "learning_rate": 2.5962384953981594e-05, "loss": 0.5451, "step": 12026 }, { "epoch": 15.39456, "grad_norm": 0.9607928991317749, "learning_rate": 2.5960384153661466e-05, "loss": 0.5309, "step": 12027 }, { "epoch": 15.39584, "grad_norm": 0.9592061042785645, "learning_rate": 2.5958383353341338e-05, "loss": 0.5466, "step": 12028 }, { "epoch": 15.39712, "grad_norm": 1.015899419784546, "learning_rate": 2.5956382553021206e-05, "loss": 0.5245, "step": 12029 }, { "epoch": 15.3984, "grad_norm": 0.9896045327186584, "learning_rate": 2.5954381752701078e-05, "loss": 0.5316, "step": 12030 }, { "epoch": 15.39968, "grad_norm": 0.9522262811660767, "learning_rate": 2.5952380952380957e-05, "loss": 0.5192, "step": 12031 }, { "epoch": 15.40096, "grad_norm": 0.987667441368103, "learning_rate": 2.595038015206083e-05, "loss": 0.5136, "step": 12032 }, { "epoch": 15.40224, "grad_norm": 1.0220928192138672, "learning_rate": 2.5948379351740697e-05, "loss": 0.57, "step": 12033 }, { "epoch": 15.40352, "grad_norm": 1.0101734399795532, "learning_rate": 2.594637855142057e-05, "loss": 0.5165, "step": 12034 }, { "epoch": 15.4048, "grad_norm": 0.9381194114685059, "learning_rate": 2.594437775110044e-05, "loss": 0.4597, "step": 12035 }, { "epoch": 15.40608, "grad_norm": 1.0812232494354248, "learning_rate": 2.5942376950780313e-05, "loss": 0.5632, "step": 12036 }, { "epoch": 15.40736, "grad_norm": 1.0491623878479004, "learning_rate": 2.594037615046018e-05, "loss": 0.538, "step": 12037 }, { "epoch": 15.40864, "grad_norm": 0.9841446876525879, "learning_rate": 2.593837535014006e-05, "loss": 0.5059, "step": 12038 }, { "epoch": 15.40992, "grad_norm": 0.9881943464279175, "learning_rate": 2.593637454981993e-05, "loss": 0.5154, "step": 12039 }, { "epoch": 15.411200000000001, "grad_norm": 1.0402194261550903, "learning_rate": 2.5934373749499803e-05, "loss": 0.5756, "step": 12040 }, { "epoch": 15.41248, "grad_norm": 1.095178246498108, "learning_rate": 2.5932372949179672e-05, "loss": 0.5601, "step": 12041 }, { "epoch": 15.41376, "grad_norm": 1.0817269086837769, "learning_rate": 2.5930372148859544e-05, "loss": 0.5595, "step": 12042 }, { "epoch": 15.41504, "grad_norm": 0.9906639456748962, "learning_rate": 2.5928371348539416e-05, "loss": 0.5067, "step": 12043 }, { "epoch": 15.41632, "grad_norm": 0.9647719860076904, "learning_rate": 2.5926370548219287e-05, "loss": 0.4848, "step": 12044 }, { "epoch": 15.4176, "grad_norm": 0.9558652639389038, "learning_rate": 2.5924369747899163e-05, "loss": 0.5381, "step": 12045 }, { "epoch": 15.41888, "grad_norm": 0.9958375692367554, "learning_rate": 2.5922368947579035e-05, "loss": 0.5672, "step": 12046 }, { "epoch": 15.42016, "grad_norm": 0.980514645576477, "learning_rate": 2.5920368147258906e-05, "loss": 0.5479, "step": 12047 }, { "epoch": 15.42144, "grad_norm": 1.041264533996582, "learning_rate": 2.5918367346938778e-05, "loss": 0.5433, "step": 12048 }, { "epoch": 15.42272, "grad_norm": 0.9948632121086121, "learning_rate": 2.5916366546618647e-05, "loss": 0.5445, "step": 12049 }, { "epoch": 15.424, "grad_norm": 0.9686235189437866, "learning_rate": 2.591436574629852e-05, "loss": 0.5375, "step": 12050 }, { "epoch": 15.42528, "grad_norm": 0.9626991748809814, "learning_rate": 2.591236494597839e-05, "loss": 0.5071, "step": 12051 }, { "epoch": 15.42656, "grad_norm": 0.9826670289039612, "learning_rate": 2.5910364145658266e-05, "loss": 0.52, "step": 12052 }, { "epoch": 15.42784, "grad_norm": 0.9592919945716858, "learning_rate": 2.5908363345338138e-05, "loss": 0.5125, "step": 12053 }, { "epoch": 15.42912, "grad_norm": 0.9787027835845947, "learning_rate": 2.590636254501801e-05, "loss": 0.5241, "step": 12054 }, { "epoch": 15.4304, "grad_norm": 0.9847190380096436, "learning_rate": 2.590436174469788e-05, "loss": 0.4971, "step": 12055 }, { "epoch": 15.43168, "grad_norm": 0.9840123653411865, "learning_rate": 2.5902360944377753e-05, "loss": 0.55, "step": 12056 }, { "epoch": 15.43296, "grad_norm": 0.9877153635025024, "learning_rate": 2.590036014405762e-05, "loss": 0.4923, "step": 12057 }, { "epoch": 15.43424, "grad_norm": 0.9981076717376709, "learning_rate": 2.5898359343737493e-05, "loss": 0.5311, "step": 12058 }, { "epoch": 15.43552, "grad_norm": 1.1250180006027222, "learning_rate": 2.5896358543417372e-05, "loss": 0.5702, "step": 12059 }, { "epoch": 15.4368, "grad_norm": 1.031806230545044, "learning_rate": 2.589435774309724e-05, "loss": 0.568, "step": 12060 }, { "epoch": 15.43808, "grad_norm": 0.9630105495452881, "learning_rate": 2.5892356942777112e-05, "loss": 0.4828, "step": 12061 }, { "epoch": 15.43936, "grad_norm": 1.0042251348495483, "learning_rate": 2.5890356142456984e-05, "loss": 0.4779, "step": 12062 }, { "epoch": 15.44064, "grad_norm": 0.9796602725982666, "learning_rate": 2.5888355342136856e-05, "loss": 0.4996, "step": 12063 }, { "epoch": 15.44192, "grad_norm": 1.0524052381515503, "learning_rate": 2.5886354541816728e-05, "loss": 0.5759, "step": 12064 }, { "epoch": 15.4432, "grad_norm": 1.0064433813095093, "learning_rate": 2.5884353741496596e-05, "loss": 0.4933, "step": 12065 }, { "epoch": 15.44448, "grad_norm": 0.9477246403694153, "learning_rate": 2.5882352941176475e-05, "loss": 0.5054, "step": 12066 }, { "epoch": 15.44576, "grad_norm": 0.9989529848098755, "learning_rate": 2.5880352140856347e-05, "loss": 0.5503, "step": 12067 }, { "epoch": 15.44704, "grad_norm": 1.0528007745742798, "learning_rate": 2.5878351340536215e-05, "loss": 0.5832, "step": 12068 }, { "epoch": 15.44832, "grad_norm": 0.9893178939819336, "learning_rate": 2.5876350540216087e-05, "loss": 0.4902, "step": 12069 }, { "epoch": 15.4496, "grad_norm": 0.9771265387535095, "learning_rate": 2.587434973989596e-05, "loss": 0.531, "step": 12070 }, { "epoch": 15.45088, "grad_norm": 1.0311421155929565, "learning_rate": 2.587234893957583e-05, "loss": 0.6119, "step": 12071 }, { "epoch": 15.45216, "grad_norm": 0.9262276291847229, "learning_rate": 2.5870348139255703e-05, "loss": 0.5396, "step": 12072 }, { "epoch": 15.45344, "grad_norm": 0.9662413001060486, "learning_rate": 2.5868347338935578e-05, "loss": 0.5006, "step": 12073 }, { "epoch": 15.45472, "grad_norm": 1.0002639293670654, "learning_rate": 2.586634653861545e-05, "loss": 0.5263, "step": 12074 }, { "epoch": 15.456, "grad_norm": 1.010391354560852, "learning_rate": 2.5864345738295322e-05, "loss": 0.5296, "step": 12075 }, { "epoch": 15.45728, "grad_norm": 0.9372914433479309, "learning_rate": 2.586234493797519e-05, "loss": 0.4601, "step": 12076 }, { "epoch": 15.45856, "grad_norm": 0.9640786647796631, "learning_rate": 2.5860344137655062e-05, "loss": 0.5049, "step": 12077 }, { "epoch": 15.45984, "grad_norm": 0.9395653009414673, "learning_rate": 2.5858343337334934e-05, "loss": 0.4935, "step": 12078 }, { "epoch": 15.46112, "grad_norm": 0.9529640078544617, "learning_rate": 2.5856342537014806e-05, "loss": 0.5194, "step": 12079 }, { "epoch": 15.4624, "grad_norm": 0.9985076189041138, "learning_rate": 2.585434173669468e-05, "loss": 0.5531, "step": 12080 }, { "epoch": 15.46368, "grad_norm": 0.924523115158081, "learning_rate": 2.5852340936374553e-05, "loss": 0.4675, "step": 12081 }, { "epoch": 15.46496, "grad_norm": 1.0334564447402954, "learning_rate": 2.5850340136054425e-05, "loss": 0.5463, "step": 12082 }, { "epoch": 15.466239999999999, "grad_norm": 0.9668273329734802, "learning_rate": 2.5848339335734297e-05, "loss": 0.5158, "step": 12083 }, { "epoch": 15.46752, "grad_norm": 1.0096427202224731, "learning_rate": 2.5846338535414165e-05, "loss": 0.5181, "step": 12084 }, { "epoch": 15.4688, "grad_norm": 0.9349405169487, "learning_rate": 2.5844337735094037e-05, "loss": 0.4826, "step": 12085 }, { "epoch": 15.47008, "grad_norm": 0.9573792219161987, "learning_rate": 2.584233693477391e-05, "loss": 0.4755, "step": 12086 }, { "epoch": 15.47136, "grad_norm": 1.014906644821167, "learning_rate": 2.5840336134453784e-05, "loss": 0.515, "step": 12087 }, { "epoch": 15.47264, "grad_norm": 0.9619317054748535, "learning_rate": 2.5838335334133656e-05, "loss": 0.5234, "step": 12088 }, { "epoch": 15.47392, "grad_norm": 0.9710426330566406, "learning_rate": 2.5836334533813528e-05, "loss": 0.4808, "step": 12089 }, { "epoch": 15.4752, "grad_norm": 1.0196417570114136, "learning_rate": 2.58343337334934e-05, "loss": 0.5433, "step": 12090 }, { "epoch": 15.47648, "grad_norm": 0.9249922037124634, "learning_rate": 2.583233293317327e-05, "loss": 0.4858, "step": 12091 }, { "epoch": 15.47776, "grad_norm": 1.008486032485962, "learning_rate": 2.583033213285314e-05, "loss": 0.5453, "step": 12092 }, { "epoch": 15.47904, "grad_norm": 0.9991050362586975, "learning_rate": 2.5828331332533012e-05, "loss": 0.5167, "step": 12093 }, { "epoch": 15.48032, "grad_norm": 0.9885373115539551, "learning_rate": 2.582633053221289e-05, "loss": 0.5235, "step": 12094 }, { "epoch": 15.4816, "grad_norm": 0.9774200320243835, "learning_rate": 2.582432973189276e-05, "loss": 0.4835, "step": 12095 }, { "epoch": 15.48288, "grad_norm": 0.9708588123321533, "learning_rate": 2.582232893157263e-05, "loss": 0.5181, "step": 12096 }, { "epoch": 15.48416, "grad_norm": 1.0187863111495972, "learning_rate": 2.5820328131252503e-05, "loss": 0.5702, "step": 12097 }, { "epoch": 15.48544, "grad_norm": 0.9514103531837463, "learning_rate": 2.5818327330932375e-05, "loss": 0.4906, "step": 12098 }, { "epoch": 15.48672, "grad_norm": 0.965501606464386, "learning_rate": 2.5816326530612246e-05, "loss": 0.5514, "step": 12099 }, { "epoch": 15.488, "grad_norm": 1.0036544799804688, "learning_rate": 2.5814325730292115e-05, "loss": 0.4937, "step": 12100 }, { "epoch": 15.48928, "grad_norm": 1.047786831855774, "learning_rate": 2.5812324929971993e-05, "loss": 0.535, "step": 12101 }, { "epoch": 15.49056, "grad_norm": 1.0028244256973267, "learning_rate": 2.5810324129651865e-05, "loss": 0.5845, "step": 12102 }, { "epoch": 15.49184, "grad_norm": 0.9661297798156738, "learning_rate": 2.5808323329331734e-05, "loss": 0.4615, "step": 12103 }, { "epoch": 15.49312, "grad_norm": 0.9932764172554016, "learning_rate": 2.5806322529011606e-05, "loss": 0.4964, "step": 12104 }, { "epoch": 15.4944, "grad_norm": 0.9625186920166016, "learning_rate": 2.5804321728691478e-05, "loss": 0.5339, "step": 12105 }, { "epoch": 15.49568, "grad_norm": 0.9661415219306946, "learning_rate": 2.580232092837135e-05, "loss": 0.5432, "step": 12106 }, { "epoch": 15.49696, "grad_norm": 1.0386898517608643, "learning_rate": 2.580032012805122e-05, "loss": 0.5427, "step": 12107 }, { "epoch": 15.49824, "grad_norm": 0.9474533200263977, "learning_rate": 2.5798319327731096e-05, "loss": 0.5278, "step": 12108 }, { "epoch": 15.49952, "grad_norm": 1.0017415285110474, "learning_rate": 2.579631852741097e-05, "loss": 0.5476, "step": 12109 }, { "epoch": 15.5008, "grad_norm": 1.017276406288147, "learning_rate": 2.579431772709084e-05, "loss": 0.5015, "step": 12110 }, { "epoch": 15.50208, "grad_norm": 1.0097792148590088, "learning_rate": 2.579231692677071e-05, "loss": 0.5492, "step": 12111 }, { "epoch": 15.50336, "grad_norm": 0.9869034290313721, "learning_rate": 2.579031612645058e-05, "loss": 0.535, "step": 12112 }, { "epoch": 15.50464, "grad_norm": 1.0194756984710693, "learning_rate": 2.5788315326130452e-05, "loss": 0.5019, "step": 12113 }, { "epoch": 15.50592, "grad_norm": 1.0005152225494385, "learning_rate": 2.5786314525810324e-05, "loss": 0.525, "step": 12114 }, { "epoch": 15.5072, "grad_norm": 0.9427708387374878, "learning_rate": 2.57843137254902e-05, "loss": 0.5217, "step": 12115 }, { "epoch": 15.50848, "grad_norm": 0.9799354672431946, "learning_rate": 2.578231292517007e-05, "loss": 0.4981, "step": 12116 }, { "epoch": 15.50976, "grad_norm": 0.9807402491569519, "learning_rate": 2.5780312124849943e-05, "loss": 0.5026, "step": 12117 }, { "epoch": 15.51104, "grad_norm": 0.9583036303520203, "learning_rate": 2.5778311324529815e-05, "loss": 0.5072, "step": 12118 }, { "epoch": 15.51232, "grad_norm": 1.0171658992767334, "learning_rate": 2.5776310524209684e-05, "loss": 0.5592, "step": 12119 }, { "epoch": 15.5136, "grad_norm": 0.9614086747169495, "learning_rate": 2.5774309723889555e-05, "loss": 0.4961, "step": 12120 }, { "epoch": 15.51488, "grad_norm": 0.9986427426338196, "learning_rate": 2.5772308923569427e-05, "loss": 0.4993, "step": 12121 }, { "epoch": 15.51616, "grad_norm": 0.9052167534828186, "learning_rate": 2.57703081232493e-05, "loss": 0.4783, "step": 12122 }, { "epoch": 15.51744, "grad_norm": 0.9978095889091492, "learning_rate": 2.5768307322929174e-05, "loss": 0.5733, "step": 12123 }, { "epoch": 15.51872, "grad_norm": 0.9685370326042175, "learning_rate": 2.5766306522609046e-05, "loss": 0.51, "step": 12124 }, { "epoch": 15.52, "grad_norm": 1.008240818977356, "learning_rate": 2.5764305722288918e-05, "loss": 0.5129, "step": 12125 }, { "epoch": 15.52128, "grad_norm": 0.9818560481071472, "learning_rate": 2.576230492196879e-05, "loss": 0.5501, "step": 12126 }, { "epoch": 15.52256, "grad_norm": 0.9996564984321594, "learning_rate": 2.576030412164866e-05, "loss": 0.4986, "step": 12127 }, { "epoch": 15.52384, "grad_norm": 1.0474154949188232, "learning_rate": 2.575830332132853e-05, "loss": 0.5526, "step": 12128 }, { "epoch": 15.52512, "grad_norm": 1.0156049728393555, "learning_rate": 2.5756302521008402e-05, "loss": 0.5273, "step": 12129 }, { "epoch": 15.5264, "grad_norm": 0.9961782693862915, "learning_rate": 2.5754301720688277e-05, "loss": 0.491, "step": 12130 }, { "epoch": 15.52768, "grad_norm": 1.0487329959869385, "learning_rate": 2.575230092036815e-05, "loss": 0.5798, "step": 12131 }, { "epoch": 15.52896, "grad_norm": 0.9667060971260071, "learning_rate": 2.575030012004802e-05, "loss": 0.5385, "step": 12132 }, { "epoch": 15.53024, "grad_norm": 0.9694793820381165, "learning_rate": 2.5748299319727893e-05, "loss": 0.5248, "step": 12133 }, { "epoch": 15.53152, "grad_norm": 0.9100515246391296, "learning_rate": 2.5746298519407765e-05, "loss": 0.4769, "step": 12134 }, { "epoch": 15.5328, "grad_norm": 0.9730302095413208, "learning_rate": 2.5744297719087633e-05, "loss": 0.5182, "step": 12135 }, { "epoch": 15.53408, "grad_norm": 0.9823408126831055, "learning_rate": 2.5742296918767505e-05, "loss": 0.5195, "step": 12136 }, { "epoch": 15.53536, "grad_norm": 1.023830771446228, "learning_rate": 2.5740296118447384e-05, "loss": 0.5501, "step": 12137 }, { "epoch": 15.53664, "grad_norm": 0.9773147106170654, "learning_rate": 2.5738295318127252e-05, "loss": 0.5068, "step": 12138 }, { "epoch": 15.53792, "grad_norm": 0.9705550074577332, "learning_rate": 2.5736294517807124e-05, "loss": 0.5073, "step": 12139 }, { "epoch": 15.5392, "grad_norm": 0.9751460552215576, "learning_rate": 2.5734293717486996e-05, "loss": 0.5036, "step": 12140 }, { "epoch": 15.54048, "grad_norm": 0.9804943799972534, "learning_rate": 2.5732292917166868e-05, "loss": 0.4845, "step": 12141 }, { "epoch": 15.54176, "grad_norm": 1.020609974861145, "learning_rate": 2.573029211684674e-05, "loss": 0.5516, "step": 12142 }, { "epoch": 15.54304, "grad_norm": 0.9483124017715454, "learning_rate": 2.5728291316526608e-05, "loss": 0.4902, "step": 12143 }, { "epoch": 15.54432, "grad_norm": 1.0113613605499268, "learning_rate": 2.5726290516206487e-05, "loss": 0.5527, "step": 12144 }, { "epoch": 15.5456, "grad_norm": 1.037793755531311, "learning_rate": 2.572428971588636e-05, "loss": 0.543, "step": 12145 }, { "epoch": 15.54688, "grad_norm": 1.0010935068130493, "learning_rate": 2.5722288915566227e-05, "loss": 0.5157, "step": 12146 }, { "epoch": 15.54816, "grad_norm": 0.9994384050369263, "learning_rate": 2.57202881152461e-05, "loss": 0.5223, "step": 12147 }, { "epoch": 15.54944, "grad_norm": 0.9662322998046875, "learning_rate": 2.571828731492597e-05, "loss": 0.5161, "step": 12148 }, { "epoch": 15.55072, "grad_norm": 1.0219558477401733, "learning_rate": 2.5716286514605843e-05, "loss": 0.534, "step": 12149 }, { "epoch": 15.552, "grad_norm": 0.9742558002471924, "learning_rate": 2.5714285714285714e-05, "loss": 0.5151, "step": 12150 }, { "epoch": 15.55328, "grad_norm": 1.0059118270874023, "learning_rate": 2.571228491396559e-05, "loss": 0.5637, "step": 12151 }, { "epoch": 15.55456, "grad_norm": 0.9485430121421814, "learning_rate": 2.571028411364546e-05, "loss": 0.4919, "step": 12152 }, { "epoch": 15.55584, "grad_norm": 0.9600156545639038, "learning_rate": 2.5708283313325333e-05, "loss": 0.529, "step": 12153 }, { "epoch": 15.55712, "grad_norm": 0.9811621308326721, "learning_rate": 2.5706282513005202e-05, "loss": 0.5033, "step": 12154 }, { "epoch": 15.5584, "grad_norm": 1.0301507711410522, "learning_rate": 2.5704281712685074e-05, "loss": 0.501, "step": 12155 }, { "epoch": 15.55968, "grad_norm": 0.9439396858215332, "learning_rate": 2.5702280912364946e-05, "loss": 0.5265, "step": 12156 }, { "epoch": 15.56096, "grad_norm": 1.0617479085922241, "learning_rate": 2.5700280112044817e-05, "loss": 0.5651, "step": 12157 }, { "epoch": 15.56224, "grad_norm": 0.9912658333778381, "learning_rate": 2.5698279311724693e-05, "loss": 0.5087, "step": 12158 }, { "epoch": 15.56352, "grad_norm": 0.9808444380760193, "learning_rate": 2.5696278511404565e-05, "loss": 0.5334, "step": 12159 }, { "epoch": 15.5648, "grad_norm": 0.9171379804611206, "learning_rate": 2.5694277711084436e-05, "loss": 0.5009, "step": 12160 }, { "epoch": 15.56608, "grad_norm": 0.9609277248382568, "learning_rate": 2.5692276910764308e-05, "loss": 0.5078, "step": 12161 }, { "epoch": 15.56736, "grad_norm": 0.9825374484062195, "learning_rate": 2.5690276110444177e-05, "loss": 0.5374, "step": 12162 }, { "epoch": 15.56864, "grad_norm": 0.9431901574134827, "learning_rate": 2.568827531012405e-05, "loss": 0.4915, "step": 12163 }, { "epoch": 15.56992, "grad_norm": 1.0128616094589233, "learning_rate": 2.568627450980392e-05, "loss": 0.5483, "step": 12164 }, { "epoch": 15.5712, "grad_norm": 1.0459868907928467, "learning_rate": 2.5684273709483796e-05, "loss": 0.5579, "step": 12165 }, { "epoch": 15.57248, "grad_norm": 1.015820026397705, "learning_rate": 2.5682272909163668e-05, "loss": 0.5315, "step": 12166 }, { "epoch": 15.57376, "grad_norm": 0.9853371977806091, "learning_rate": 2.568027210884354e-05, "loss": 0.513, "step": 12167 }, { "epoch": 15.57504, "grad_norm": 0.9857305288314819, "learning_rate": 2.567827130852341e-05, "loss": 0.5006, "step": 12168 }, { "epoch": 15.57632, "grad_norm": 1.0142581462860107, "learning_rate": 2.5676270508203283e-05, "loss": 0.5309, "step": 12169 }, { "epoch": 15.5776, "grad_norm": 0.9901478886604309, "learning_rate": 2.567426970788315e-05, "loss": 0.5302, "step": 12170 }, { "epoch": 15.57888, "grad_norm": 0.9273139238357544, "learning_rate": 2.5672268907563023e-05, "loss": 0.4937, "step": 12171 }, { "epoch": 15.58016, "grad_norm": 0.984489917755127, "learning_rate": 2.5670268107242902e-05, "loss": 0.547, "step": 12172 }, { "epoch": 15.58144, "grad_norm": 1.0063872337341309, "learning_rate": 2.566826730692277e-05, "loss": 0.5502, "step": 12173 }, { "epoch": 15.58272, "grad_norm": 1.0095993280410767, "learning_rate": 2.5666266506602642e-05, "loss": 0.5082, "step": 12174 }, { "epoch": 15.584, "grad_norm": 0.9566926956176758, "learning_rate": 2.5664265706282514e-05, "loss": 0.5288, "step": 12175 }, { "epoch": 15.585280000000001, "grad_norm": 0.9695842862129211, "learning_rate": 2.5662264905962386e-05, "loss": 0.5461, "step": 12176 }, { "epoch": 15.58656, "grad_norm": 0.9567855000495911, "learning_rate": 2.5660264105642258e-05, "loss": 0.5013, "step": 12177 }, { "epoch": 15.58784, "grad_norm": 0.983958899974823, "learning_rate": 2.5658263305322126e-05, "loss": 0.5322, "step": 12178 }, { "epoch": 15.58912, "grad_norm": 1.0091723203659058, "learning_rate": 2.5656262505002005e-05, "loss": 0.5172, "step": 12179 }, { "epoch": 15.5904, "grad_norm": 1.0233546495437622, "learning_rate": 2.5654261704681877e-05, "loss": 0.5247, "step": 12180 }, { "epoch": 15.59168, "grad_norm": 0.9641100764274597, "learning_rate": 2.5652260904361745e-05, "loss": 0.5123, "step": 12181 }, { "epoch": 15.59296, "grad_norm": 0.9538204669952393, "learning_rate": 2.5650260104041617e-05, "loss": 0.5048, "step": 12182 }, { "epoch": 15.59424, "grad_norm": 1.0550053119659424, "learning_rate": 2.564825930372149e-05, "loss": 0.5824, "step": 12183 }, { "epoch": 15.59552, "grad_norm": 0.9931796193122864, "learning_rate": 2.564625850340136e-05, "loss": 0.526, "step": 12184 }, { "epoch": 15.5968, "grad_norm": 1.0209304094314575, "learning_rate": 2.5644257703081233e-05, "loss": 0.5275, "step": 12185 }, { "epoch": 15.59808, "grad_norm": 0.9978035688400269, "learning_rate": 2.5642256902761108e-05, "loss": 0.5207, "step": 12186 }, { "epoch": 15.59936, "grad_norm": 1.0060955286026, "learning_rate": 2.564025610244098e-05, "loss": 0.5259, "step": 12187 }, { "epoch": 15.60064, "grad_norm": 1.0725250244140625, "learning_rate": 2.5638255302120852e-05, "loss": 0.5654, "step": 12188 }, { "epoch": 15.60192, "grad_norm": 1.0049766302108765, "learning_rate": 2.563625450180072e-05, "loss": 0.5537, "step": 12189 }, { "epoch": 15.6032, "grad_norm": 0.9810140132904053, "learning_rate": 2.5634253701480592e-05, "loss": 0.4873, "step": 12190 }, { "epoch": 15.60448, "grad_norm": 0.9922715425491333, "learning_rate": 2.5632252901160464e-05, "loss": 0.5372, "step": 12191 }, { "epoch": 15.60576, "grad_norm": 0.9921250939369202, "learning_rate": 2.5630252100840336e-05, "loss": 0.523, "step": 12192 }, { "epoch": 15.60704, "grad_norm": 1.0218485593795776, "learning_rate": 2.562825130052021e-05, "loss": 0.5281, "step": 12193 }, { "epoch": 15.608319999999999, "grad_norm": 1.0210494995117188, "learning_rate": 2.5626250500200083e-05, "loss": 0.5154, "step": 12194 }, { "epoch": 15.6096, "grad_norm": 1.0493415594100952, "learning_rate": 2.5624249699879955e-05, "loss": 0.5801, "step": 12195 }, { "epoch": 15.61088, "grad_norm": 0.95387864112854, "learning_rate": 2.5622248899559827e-05, "loss": 0.5033, "step": 12196 }, { "epoch": 15.61216, "grad_norm": 0.9641955494880676, "learning_rate": 2.5620248099239695e-05, "loss": 0.521, "step": 12197 }, { "epoch": 15.61344, "grad_norm": 0.979554295539856, "learning_rate": 2.5618247298919567e-05, "loss": 0.5124, "step": 12198 }, { "epoch": 15.61472, "grad_norm": 1.063732385635376, "learning_rate": 2.561624649859944e-05, "loss": 0.5712, "step": 12199 }, { "epoch": 15.616, "grad_norm": 1.0626933574676514, "learning_rate": 2.5614245698279314e-05, "loss": 0.5761, "step": 12200 }, { "epoch": 15.617280000000001, "grad_norm": 1.0086123943328857, "learning_rate": 2.5612244897959186e-05, "loss": 0.5195, "step": 12201 }, { "epoch": 15.61856, "grad_norm": 0.990342915058136, "learning_rate": 2.5610244097639058e-05, "loss": 0.5288, "step": 12202 }, { "epoch": 15.61984, "grad_norm": 0.9492666125297546, "learning_rate": 2.560824329731893e-05, "loss": 0.4753, "step": 12203 }, { "epoch": 15.62112, "grad_norm": 1.011853575706482, "learning_rate": 2.56062424969988e-05, "loss": 0.5095, "step": 12204 }, { "epoch": 15.6224, "grad_norm": 1.003688931465149, "learning_rate": 2.560424169667867e-05, "loss": 0.4808, "step": 12205 }, { "epoch": 15.62368, "grad_norm": 0.9985147714614868, "learning_rate": 2.5602240896358542e-05, "loss": 0.5343, "step": 12206 }, { "epoch": 15.62496, "grad_norm": 0.9690064787864685, "learning_rate": 2.560024009603842e-05, "loss": 0.4995, "step": 12207 }, { "epoch": 15.62624, "grad_norm": 0.9825969338417053, "learning_rate": 2.559823929571829e-05, "loss": 0.5231, "step": 12208 }, { "epoch": 15.62752, "grad_norm": 0.995742917060852, "learning_rate": 2.559623849539816e-05, "loss": 0.5243, "step": 12209 }, { "epoch": 15.6288, "grad_norm": 1.037103295326233, "learning_rate": 2.5594237695078033e-05, "loss": 0.5265, "step": 12210 }, { "epoch": 15.63008, "grad_norm": 1.0141079425811768, "learning_rate": 2.5592236894757904e-05, "loss": 0.5545, "step": 12211 }, { "epoch": 15.63136, "grad_norm": 0.9893406629562378, "learning_rate": 2.5590236094437776e-05, "loss": 0.5033, "step": 12212 }, { "epoch": 15.63264, "grad_norm": 1.0220900774002075, "learning_rate": 2.5588235294117645e-05, "loss": 0.5212, "step": 12213 }, { "epoch": 15.63392, "grad_norm": 0.9686633348464966, "learning_rate": 2.5586234493797523e-05, "loss": 0.5521, "step": 12214 }, { "epoch": 15.6352, "grad_norm": 1.001504898071289, "learning_rate": 2.5584233693477395e-05, "loss": 0.5756, "step": 12215 }, { "epoch": 15.63648, "grad_norm": 1.0070838928222656, "learning_rate": 2.5582232893157264e-05, "loss": 0.5075, "step": 12216 }, { "epoch": 15.63776, "grad_norm": 0.9980425834655762, "learning_rate": 2.5580232092837136e-05, "loss": 0.5527, "step": 12217 }, { "epoch": 15.63904, "grad_norm": 0.9670035243034363, "learning_rate": 2.5578231292517007e-05, "loss": 0.49, "step": 12218 }, { "epoch": 15.64032, "grad_norm": 0.9716927409172058, "learning_rate": 2.557623049219688e-05, "loss": 0.4885, "step": 12219 }, { "epoch": 15.6416, "grad_norm": 0.9749883413314819, "learning_rate": 2.557422969187675e-05, "loss": 0.5409, "step": 12220 }, { "epoch": 15.64288, "grad_norm": 0.9985603094100952, "learning_rate": 2.5572228891556626e-05, "loss": 0.5294, "step": 12221 }, { "epoch": 15.64416, "grad_norm": 1.0051274299621582, "learning_rate": 2.5570228091236498e-05, "loss": 0.5685, "step": 12222 }, { "epoch": 15.64544, "grad_norm": 1.0328760147094727, "learning_rate": 2.556822729091637e-05, "loss": 0.6053, "step": 12223 }, { "epoch": 15.64672, "grad_norm": 1.0340243577957153, "learning_rate": 2.556622649059624e-05, "loss": 0.5212, "step": 12224 }, { "epoch": 15.648, "grad_norm": 1.0244271755218506, "learning_rate": 2.556422569027611e-05, "loss": 0.5555, "step": 12225 }, { "epoch": 15.64928, "grad_norm": 0.9867590069770813, "learning_rate": 2.5562224889955982e-05, "loss": 0.5331, "step": 12226 }, { "epoch": 15.65056, "grad_norm": 1.015299916267395, "learning_rate": 2.5560224089635854e-05, "loss": 0.5381, "step": 12227 }, { "epoch": 15.65184, "grad_norm": 0.9973687529563904, "learning_rate": 2.555822328931573e-05, "loss": 0.5776, "step": 12228 }, { "epoch": 15.65312, "grad_norm": 0.9658114314079285, "learning_rate": 2.55562224889956e-05, "loss": 0.4938, "step": 12229 }, { "epoch": 15.6544, "grad_norm": 0.9462615847587585, "learning_rate": 2.5554221688675473e-05, "loss": 0.4878, "step": 12230 }, { "epoch": 15.65568, "grad_norm": 0.9629518389701843, "learning_rate": 2.5552220888355345e-05, "loss": 0.544, "step": 12231 }, { "epoch": 15.65696, "grad_norm": 1.0248364210128784, "learning_rate": 2.5550220088035213e-05, "loss": 0.5085, "step": 12232 }, { "epoch": 15.65824, "grad_norm": 0.9903928637504578, "learning_rate": 2.5548219287715085e-05, "loss": 0.5624, "step": 12233 }, { "epoch": 15.65952, "grad_norm": 0.9919432401657104, "learning_rate": 2.5546218487394957e-05, "loss": 0.5008, "step": 12234 }, { "epoch": 15.6608, "grad_norm": 1.0050952434539795, "learning_rate": 2.554421768707483e-05, "loss": 0.5812, "step": 12235 }, { "epoch": 15.66208, "grad_norm": 0.9851815104484558, "learning_rate": 2.5542216886754704e-05, "loss": 0.5016, "step": 12236 }, { "epoch": 15.66336, "grad_norm": 0.9741938710212708, "learning_rate": 2.5540216086434576e-05, "loss": 0.5038, "step": 12237 }, { "epoch": 15.66464, "grad_norm": 1.0300729274749756, "learning_rate": 2.5538215286114448e-05, "loss": 0.5195, "step": 12238 }, { "epoch": 15.66592, "grad_norm": 0.9596061110496521, "learning_rate": 2.553621448579432e-05, "loss": 0.5052, "step": 12239 }, { "epoch": 15.6672, "grad_norm": 0.9867904186248779, "learning_rate": 2.553421368547419e-05, "loss": 0.5353, "step": 12240 }, { "epoch": 15.66848, "grad_norm": 0.9471587538719177, "learning_rate": 2.553221288515406e-05, "loss": 0.4976, "step": 12241 }, { "epoch": 15.66976, "grad_norm": 0.9685238599777222, "learning_rate": 2.5530212084833932e-05, "loss": 0.5168, "step": 12242 }, { "epoch": 15.67104, "grad_norm": 0.9731021523475647, "learning_rate": 2.5528211284513807e-05, "loss": 0.5298, "step": 12243 }, { "epoch": 15.67232, "grad_norm": 0.9390130043029785, "learning_rate": 2.552621048419368e-05, "loss": 0.517, "step": 12244 }, { "epoch": 15.6736, "grad_norm": 1.0047056674957275, "learning_rate": 2.552420968387355e-05, "loss": 0.5198, "step": 12245 }, { "epoch": 15.67488, "grad_norm": 1.0100324153900146, "learning_rate": 2.5522208883553423e-05, "loss": 0.5573, "step": 12246 }, { "epoch": 15.67616, "grad_norm": 1.0420469045639038, "learning_rate": 2.5520208083233295e-05, "loss": 0.5394, "step": 12247 }, { "epoch": 15.67744, "grad_norm": 1.0220342874526978, "learning_rate": 2.5518207282913163e-05, "loss": 0.5864, "step": 12248 }, { "epoch": 15.67872, "grad_norm": 1.0437568426132202, "learning_rate": 2.5516206482593035e-05, "loss": 0.5309, "step": 12249 }, { "epoch": 15.68, "grad_norm": 1.0043889284133911, "learning_rate": 2.5514205682272914e-05, "loss": 0.5631, "step": 12250 }, { "epoch": 15.68128, "grad_norm": 1.024249792098999, "learning_rate": 2.5512204881952782e-05, "loss": 0.5329, "step": 12251 }, { "epoch": 15.68256, "grad_norm": 0.9508040547370911, "learning_rate": 2.5510204081632654e-05, "loss": 0.5257, "step": 12252 }, { "epoch": 15.68384, "grad_norm": 1.0146198272705078, "learning_rate": 2.5508203281312526e-05, "loss": 0.5494, "step": 12253 }, { "epoch": 15.68512, "grad_norm": 0.9662773609161377, "learning_rate": 2.5506202480992398e-05, "loss": 0.4899, "step": 12254 }, { "epoch": 15.6864, "grad_norm": 0.9972757697105408, "learning_rate": 2.550420168067227e-05, "loss": 0.5409, "step": 12255 }, { "epoch": 15.68768, "grad_norm": 0.9856722950935364, "learning_rate": 2.5502200880352138e-05, "loss": 0.5048, "step": 12256 }, { "epoch": 15.68896, "grad_norm": 1.0038642883300781, "learning_rate": 2.5500200080032017e-05, "loss": 0.5522, "step": 12257 }, { "epoch": 15.69024, "grad_norm": 1.0079247951507568, "learning_rate": 2.549819927971189e-05, "loss": 0.5588, "step": 12258 }, { "epoch": 15.69152, "grad_norm": 1.0044342279434204, "learning_rate": 2.5496198479391757e-05, "loss": 0.5339, "step": 12259 }, { "epoch": 15.6928, "grad_norm": 0.9543859362602234, "learning_rate": 2.549419767907163e-05, "loss": 0.5141, "step": 12260 }, { "epoch": 15.69408, "grad_norm": 1.0886956453323364, "learning_rate": 2.54921968787515e-05, "loss": 0.5861, "step": 12261 }, { "epoch": 15.69536, "grad_norm": 0.9997818470001221, "learning_rate": 2.5490196078431373e-05, "loss": 0.4868, "step": 12262 }, { "epoch": 15.69664, "grad_norm": 0.9534517526626587, "learning_rate": 2.5488195278111244e-05, "loss": 0.5402, "step": 12263 }, { "epoch": 15.69792, "grad_norm": 0.9827191233634949, "learning_rate": 2.548619447779112e-05, "loss": 0.5272, "step": 12264 }, { "epoch": 15.6992, "grad_norm": 0.9801238775253296, "learning_rate": 2.548419367747099e-05, "loss": 0.5243, "step": 12265 }, { "epoch": 15.70048, "grad_norm": 0.9996204376220703, "learning_rate": 2.5482192877150863e-05, "loss": 0.535, "step": 12266 }, { "epoch": 15.70176, "grad_norm": 1.0119619369506836, "learning_rate": 2.5480192076830732e-05, "loss": 0.5261, "step": 12267 }, { "epoch": 15.70304, "grad_norm": 1.0047118663787842, "learning_rate": 2.5478191276510604e-05, "loss": 0.4841, "step": 12268 }, { "epoch": 15.70432, "grad_norm": 1.0151153802871704, "learning_rate": 2.5476190476190476e-05, "loss": 0.5364, "step": 12269 }, { "epoch": 15.7056, "grad_norm": 1.0285605192184448, "learning_rate": 2.5474189675870347e-05, "loss": 0.5459, "step": 12270 }, { "epoch": 15.70688, "grad_norm": 0.9856500625610352, "learning_rate": 2.5472188875550223e-05, "loss": 0.5358, "step": 12271 }, { "epoch": 15.70816, "grad_norm": 1.0488793849945068, "learning_rate": 2.5470188075230095e-05, "loss": 0.5405, "step": 12272 }, { "epoch": 15.70944, "grad_norm": 1.0134892463684082, "learning_rate": 2.5468187274909966e-05, "loss": 0.5732, "step": 12273 }, { "epoch": 15.71072, "grad_norm": 0.9521108269691467, "learning_rate": 2.5466186474589838e-05, "loss": 0.4745, "step": 12274 }, { "epoch": 15.712, "grad_norm": 0.9995734095573425, "learning_rate": 2.5464185674269707e-05, "loss": 0.5472, "step": 12275 }, { "epoch": 15.71328, "grad_norm": 1.0318055152893066, "learning_rate": 2.546218487394958e-05, "loss": 0.5725, "step": 12276 }, { "epoch": 15.71456, "grad_norm": 1.0022940635681152, "learning_rate": 2.546018407362945e-05, "loss": 0.5544, "step": 12277 }, { "epoch": 15.71584, "grad_norm": 0.9959270358085632, "learning_rate": 2.5458183273309326e-05, "loss": 0.52, "step": 12278 }, { "epoch": 15.71712, "grad_norm": 1.0101356506347656, "learning_rate": 2.5456182472989198e-05, "loss": 0.5537, "step": 12279 }, { "epoch": 15.7184, "grad_norm": 0.9922164678573608, "learning_rate": 2.545418167266907e-05, "loss": 0.5343, "step": 12280 }, { "epoch": 15.71968, "grad_norm": 1.0045051574707031, "learning_rate": 2.545218087234894e-05, "loss": 0.5776, "step": 12281 }, { "epoch": 15.72096, "grad_norm": 0.9495313763618469, "learning_rate": 2.5450180072028813e-05, "loss": 0.509, "step": 12282 }, { "epoch": 15.72224, "grad_norm": 0.9955906867980957, "learning_rate": 2.544817927170868e-05, "loss": 0.5389, "step": 12283 }, { "epoch": 15.72352, "grad_norm": 1.0342867374420166, "learning_rate": 2.5446178471388553e-05, "loss": 0.5492, "step": 12284 }, { "epoch": 15.7248, "grad_norm": 0.966540515422821, "learning_rate": 2.5444177671068432e-05, "loss": 0.5449, "step": 12285 }, { "epoch": 15.72608, "grad_norm": 0.9552671313285828, "learning_rate": 2.54421768707483e-05, "loss": 0.5026, "step": 12286 }, { "epoch": 15.727360000000001, "grad_norm": 0.9747936129570007, "learning_rate": 2.5440176070428172e-05, "loss": 0.524, "step": 12287 }, { "epoch": 15.72864, "grad_norm": 1.0628736019134521, "learning_rate": 2.5438175270108044e-05, "loss": 0.5114, "step": 12288 }, { "epoch": 15.72992, "grad_norm": 1.016642689704895, "learning_rate": 2.5436174469787916e-05, "loss": 0.5479, "step": 12289 }, { "epoch": 15.7312, "grad_norm": 0.95323246717453, "learning_rate": 2.5434173669467788e-05, "loss": 0.5381, "step": 12290 }, { "epoch": 15.73248, "grad_norm": 1.0170929431915283, "learning_rate": 2.5432172869147656e-05, "loss": 0.5459, "step": 12291 }, { "epoch": 15.73376, "grad_norm": 0.990404486656189, "learning_rate": 2.5430172068827535e-05, "loss": 0.5469, "step": 12292 }, { "epoch": 15.73504, "grad_norm": 1.013763189315796, "learning_rate": 2.5428171268507407e-05, "loss": 0.498, "step": 12293 }, { "epoch": 15.73632, "grad_norm": 0.9921509623527527, "learning_rate": 2.5426170468187275e-05, "loss": 0.5439, "step": 12294 }, { "epoch": 15.7376, "grad_norm": 0.9957842230796814, "learning_rate": 2.5424169667867147e-05, "loss": 0.5197, "step": 12295 }, { "epoch": 15.73888, "grad_norm": 0.9728386998176575, "learning_rate": 2.542216886754702e-05, "loss": 0.4966, "step": 12296 }, { "epoch": 15.74016, "grad_norm": 1.0001862049102783, "learning_rate": 2.542016806722689e-05, "loss": 0.5375, "step": 12297 }, { "epoch": 15.74144, "grad_norm": 0.9885843992233276, "learning_rate": 2.5418167266906763e-05, "loss": 0.545, "step": 12298 }, { "epoch": 15.74272, "grad_norm": 1.0801782608032227, "learning_rate": 2.5416166466586638e-05, "loss": 0.548, "step": 12299 }, { "epoch": 15.744, "grad_norm": 1.014716625213623, "learning_rate": 2.541416566626651e-05, "loss": 0.5159, "step": 12300 }, { "epoch": 15.74528, "grad_norm": 1.070065975189209, "learning_rate": 2.5412164865946382e-05, "loss": 0.5531, "step": 12301 }, { "epoch": 15.74656, "grad_norm": 0.9282522201538086, "learning_rate": 2.541016406562625e-05, "loss": 0.5189, "step": 12302 }, { "epoch": 15.74784, "grad_norm": 0.9973820447921753, "learning_rate": 2.5408163265306122e-05, "loss": 0.5093, "step": 12303 }, { "epoch": 15.74912, "grad_norm": 0.9329702258110046, "learning_rate": 2.5406162464985994e-05, "loss": 0.4989, "step": 12304 }, { "epoch": 15.750399999999999, "grad_norm": 0.9909161925315857, "learning_rate": 2.5404161664665866e-05, "loss": 0.5578, "step": 12305 }, { "epoch": 15.75168, "grad_norm": 1.0105434656143188, "learning_rate": 2.540216086434574e-05, "loss": 0.5251, "step": 12306 }, { "epoch": 15.75296, "grad_norm": 1.0197798013687134, "learning_rate": 2.5400160064025613e-05, "loss": 0.5527, "step": 12307 }, { "epoch": 15.75424, "grad_norm": 1.027341604232788, "learning_rate": 2.5398159263705485e-05, "loss": 0.5716, "step": 12308 }, { "epoch": 15.75552, "grad_norm": 0.9711216688156128, "learning_rate": 2.5396158463385357e-05, "loss": 0.5455, "step": 12309 }, { "epoch": 15.7568, "grad_norm": 0.9567652940750122, "learning_rate": 2.5394157663065225e-05, "loss": 0.475, "step": 12310 }, { "epoch": 15.75808, "grad_norm": 0.9756172895431519, "learning_rate": 2.5392156862745097e-05, "loss": 0.534, "step": 12311 }, { "epoch": 15.759360000000001, "grad_norm": 1.0259792804718018, "learning_rate": 2.539015606242497e-05, "loss": 0.5237, "step": 12312 }, { "epoch": 15.76064, "grad_norm": 0.9864662885665894, "learning_rate": 2.5388155262104844e-05, "loss": 0.5443, "step": 12313 }, { "epoch": 15.76192, "grad_norm": 0.9610289335250854, "learning_rate": 2.5386154461784716e-05, "loss": 0.5058, "step": 12314 }, { "epoch": 15.7632, "grad_norm": 0.9790290594100952, "learning_rate": 2.5384153661464588e-05, "loss": 0.5117, "step": 12315 }, { "epoch": 15.76448, "grad_norm": 0.9570515751838684, "learning_rate": 2.538215286114446e-05, "loss": 0.5391, "step": 12316 }, { "epoch": 15.76576, "grad_norm": 0.979626476764679, "learning_rate": 2.538015206082433e-05, "loss": 0.5167, "step": 12317 }, { "epoch": 15.76704, "grad_norm": 0.9989627599716187, "learning_rate": 2.53781512605042e-05, "loss": 0.5157, "step": 12318 }, { "epoch": 15.76832, "grad_norm": 1.0324374437332153, "learning_rate": 2.5376150460184072e-05, "loss": 0.5204, "step": 12319 }, { "epoch": 15.7696, "grad_norm": 0.9673824906349182, "learning_rate": 2.537414965986395e-05, "loss": 0.5313, "step": 12320 }, { "epoch": 15.77088, "grad_norm": 1.0042349100112915, "learning_rate": 2.537214885954382e-05, "loss": 0.5402, "step": 12321 }, { "epoch": 15.77216, "grad_norm": 1.0308949947357178, "learning_rate": 2.537014805922369e-05, "loss": 0.5681, "step": 12322 }, { "epoch": 15.77344, "grad_norm": 0.9506542086601257, "learning_rate": 2.5368147258903563e-05, "loss": 0.5449, "step": 12323 }, { "epoch": 15.77472, "grad_norm": 0.9522877335548401, "learning_rate": 2.5366146458583434e-05, "loss": 0.5617, "step": 12324 }, { "epoch": 15.776, "grad_norm": 0.9548900127410889, "learning_rate": 2.5364145658263306e-05, "loss": 0.5247, "step": 12325 }, { "epoch": 15.77728, "grad_norm": 0.9789671301841736, "learning_rate": 2.5362144857943175e-05, "loss": 0.5114, "step": 12326 }, { "epoch": 15.77856, "grad_norm": 0.942644476890564, "learning_rate": 2.5360144057623053e-05, "loss": 0.4917, "step": 12327 }, { "epoch": 15.77984, "grad_norm": 0.9783756732940674, "learning_rate": 2.5358143257302925e-05, "loss": 0.4895, "step": 12328 }, { "epoch": 15.78112, "grad_norm": 0.9837256669998169, "learning_rate": 2.5356142456982794e-05, "loss": 0.519, "step": 12329 }, { "epoch": 15.782399999999999, "grad_norm": 0.9281373023986816, "learning_rate": 2.5354141656662666e-05, "loss": 0.4883, "step": 12330 }, { "epoch": 15.78368, "grad_norm": 1.0375429391860962, "learning_rate": 2.5352140856342537e-05, "loss": 0.5655, "step": 12331 }, { "epoch": 15.78496, "grad_norm": 1.0052881240844727, "learning_rate": 2.535014005602241e-05, "loss": 0.5547, "step": 12332 }, { "epoch": 15.78624, "grad_norm": 1.0248472690582275, "learning_rate": 2.534813925570228e-05, "loss": 0.5778, "step": 12333 }, { "epoch": 15.78752, "grad_norm": 1.0107492208480835, "learning_rate": 2.5346138455382156e-05, "loss": 0.5068, "step": 12334 }, { "epoch": 15.7888, "grad_norm": 0.9795581698417664, "learning_rate": 2.5344137655062028e-05, "loss": 0.5389, "step": 12335 }, { "epoch": 15.79008, "grad_norm": 0.9817120432853699, "learning_rate": 2.53421368547419e-05, "loss": 0.5071, "step": 12336 }, { "epoch": 15.79136, "grad_norm": 0.9362112879753113, "learning_rate": 2.534013605442177e-05, "loss": 0.5537, "step": 12337 }, { "epoch": 15.79264, "grad_norm": 0.9294254183769226, "learning_rate": 2.533813525410164e-05, "loss": 0.4974, "step": 12338 }, { "epoch": 15.79392, "grad_norm": 0.9781113862991333, "learning_rate": 2.5336134453781512e-05, "loss": 0.4655, "step": 12339 }, { "epoch": 15.7952, "grad_norm": 0.9783672094345093, "learning_rate": 2.5334133653461384e-05, "loss": 0.5533, "step": 12340 }, { "epoch": 15.79648, "grad_norm": 0.9659748673439026, "learning_rate": 2.5332132853141256e-05, "loss": 0.5486, "step": 12341 }, { "epoch": 15.79776, "grad_norm": 0.9942366480827332, "learning_rate": 2.533013205282113e-05, "loss": 0.5433, "step": 12342 }, { "epoch": 15.79904, "grad_norm": 0.9866067171096802, "learning_rate": 2.5328131252501003e-05, "loss": 0.5127, "step": 12343 }, { "epoch": 15.80032, "grad_norm": 1.0013525485992432, "learning_rate": 2.5326130452180875e-05, "loss": 0.5194, "step": 12344 }, { "epoch": 15.8016, "grad_norm": 0.9939999580383301, "learning_rate": 2.5324129651860743e-05, "loss": 0.5284, "step": 12345 }, { "epoch": 15.80288, "grad_norm": 0.9597903490066528, "learning_rate": 2.5322128851540615e-05, "loss": 0.5152, "step": 12346 }, { "epoch": 15.80416, "grad_norm": 0.9546245336532593, "learning_rate": 2.5320128051220487e-05, "loss": 0.487, "step": 12347 }, { "epoch": 15.80544, "grad_norm": 1.0091270208358765, "learning_rate": 2.531812725090036e-05, "loss": 0.5678, "step": 12348 }, { "epoch": 15.80672, "grad_norm": 0.9878095984458923, "learning_rate": 2.5316126450580234e-05, "loss": 0.5059, "step": 12349 }, { "epoch": 15.808, "grad_norm": 1.0209447145462036, "learning_rate": 2.5314125650260106e-05, "loss": 0.5348, "step": 12350 }, { "epoch": 15.80928, "grad_norm": 1.0176520347595215, "learning_rate": 2.5312124849939978e-05, "loss": 0.4969, "step": 12351 }, { "epoch": 15.81056, "grad_norm": 1.0019172430038452, "learning_rate": 2.531012404961985e-05, "loss": 0.5235, "step": 12352 }, { "epoch": 15.81184, "grad_norm": 1.047505259513855, "learning_rate": 2.530812324929972e-05, "loss": 0.6304, "step": 12353 }, { "epoch": 15.81312, "grad_norm": 0.9699926376342773, "learning_rate": 2.530612244897959e-05, "loss": 0.5278, "step": 12354 }, { "epoch": 15.8144, "grad_norm": 0.9239999055862427, "learning_rate": 2.5304121648659462e-05, "loss": 0.5137, "step": 12355 }, { "epoch": 15.81568, "grad_norm": 0.9999382495880127, "learning_rate": 2.530212084833934e-05, "loss": 0.5398, "step": 12356 }, { "epoch": 15.81696, "grad_norm": 0.9642220139503479, "learning_rate": 2.530012004801921e-05, "loss": 0.5051, "step": 12357 }, { "epoch": 15.81824, "grad_norm": 0.9850364327430725, "learning_rate": 2.529811924769908e-05, "loss": 0.5086, "step": 12358 }, { "epoch": 15.81952, "grad_norm": 0.9347919225692749, "learning_rate": 2.5296118447378953e-05, "loss": 0.5029, "step": 12359 }, { "epoch": 15.8208, "grad_norm": 1.001004695892334, "learning_rate": 2.5294117647058825e-05, "loss": 0.5255, "step": 12360 }, { "epoch": 15.82208, "grad_norm": 1.0347155332565308, "learning_rate": 2.5292116846738693e-05, "loss": 0.5276, "step": 12361 }, { "epoch": 15.82336, "grad_norm": 1.0250365734100342, "learning_rate": 2.5290116046418565e-05, "loss": 0.5609, "step": 12362 }, { "epoch": 15.82464, "grad_norm": 1.0703151226043701, "learning_rate": 2.5288115246098444e-05, "loss": 0.5144, "step": 12363 }, { "epoch": 15.82592, "grad_norm": 1.0669794082641602, "learning_rate": 2.5286114445778316e-05, "loss": 0.5801, "step": 12364 }, { "epoch": 15.8272, "grad_norm": 1.0120995044708252, "learning_rate": 2.5284113645458184e-05, "loss": 0.4951, "step": 12365 }, { "epoch": 15.82848, "grad_norm": 0.9758439660072327, "learning_rate": 2.5282112845138056e-05, "loss": 0.5294, "step": 12366 }, { "epoch": 15.82976, "grad_norm": 0.9557347893714905, "learning_rate": 2.5280112044817928e-05, "loss": 0.4897, "step": 12367 }, { "epoch": 15.83104, "grad_norm": 0.9893117547035217, "learning_rate": 2.52781112444978e-05, "loss": 0.5063, "step": 12368 }, { "epoch": 15.83232, "grad_norm": 1.008176326751709, "learning_rate": 2.5276110444177668e-05, "loss": 0.5255, "step": 12369 }, { "epoch": 15.8336, "grad_norm": 0.9538099765777588, "learning_rate": 2.5274109643857547e-05, "loss": 0.5154, "step": 12370 }, { "epoch": 15.83488, "grad_norm": 0.9922953844070435, "learning_rate": 2.527210884353742e-05, "loss": 0.5088, "step": 12371 }, { "epoch": 15.83616, "grad_norm": 0.9348974823951721, "learning_rate": 2.527010804321729e-05, "loss": 0.4637, "step": 12372 }, { "epoch": 15.83744, "grad_norm": 1.0178333520889282, "learning_rate": 2.526810724289716e-05, "loss": 0.532, "step": 12373 }, { "epoch": 15.83872, "grad_norm": 1.0431467294692993, "learning_rate": 2.526610644257703e-05, "loss": 0.5307, "step": 12374 }, { "epoch": 15.84, "grad_norm": 1.0939209461212158, "learning_rate": 2.5264105642256903e-05, "loss": 0.5397, "step": 12375 }, { "epoch": 15.84128, "grad_norm": 1.0265597105026245, "learning_rate": 2.5262104841936774e-05, "loss": 0.5354, "step": 12376 }, { "epoch": 15.84256, "grad_norm": 0.9757692813873291, "learning_rate": 2.526010404161665e-05, "loss": 0.4957, "step": 12377 }, { "epoch": 15.84384, "grad_norm": 0.9945160150527954, "learning_rate": 2.525810324129652e-05, "loss": 0.5204, "step": 12378 }, { "epoch": 15.84512, "grad_norm": 0.9997515082359314, "learning_rate": 2.5256102440976393e-05, "loss": 0.5403, "step": 12379 }, { "epoch": 15.8464, "grad_norm": 0.9899279475212097, "learning_rate": 2.5254101640656265e-05, "loss": 0.5597, "step": 12380 }, { "epoch": 15.84768, "grad_norm": 0.941484272480011, "learning_rate": 2.5252100840336134e-05, "loss": 0.5027, "step": 12381 }, { "epoch": 15.84896, "grad_norm": 0.984250545501709, "learning_rate": 2.5250100040016006e-05, "loss": 0.5449, "step": 12382 }, { "epoch": 15.85024, "grad_norm": 0.9902025461196899, "learning_rate": 2.5248099239695877e-05, "loss": 0.5443, "step": 12383 }, { "epoch": 15.85152, "grad_norm": 0.9709980487823486, "learning_rate": 2.5246098439375753e-05, "loss": 0.5031, "step": 12384 }, { "epoch": 15.8528, "grad_norm": 0.9857783317565918, "learning_rate": 2.5244097639055625e-05, "loss": 0.5027, "step": 12385 }, { "epoch": 15.85408, "grad_norm": 1.0132930278778076, "learning_rate": 2.5242096838735496e-05, "loss": 0.5154, "step": 12386 }, { "epoch": 15.85536, "grad_norm": 0.9710603952407837, "learning_rate": 2.5240096038415368e-05, "loss": 0.5182, "step": 12387 }, { "epoch": 15.85664, "grad_norm": 0.9620773196220398, "learning_rate": 2.523809523809524e-05, "loss": 0.4874, "step": 12388 }, { "epoch": 15.85792, "grad_norm": 0.9953191876411438, "learning_rate": 2.523609443777511e-05, "loss": 0.5538, "step": 12389 }, { "epoch": 15.8592, "grad_norm": 0.956991970539093, "learning_rate": 2.523409363745498e-05, "loss": 0.512, "step": 12390 }, { "epoch": 15.86048, "grad_norm": 1.0285273790359497, "learning_rate": 2.523209283713486e-05, "loss": 0.5422, "step": 12391 }, { "epoch": 15.86176, "grad_norm": 1.0568751096725464, "learning_rate": 2.5230092036814728e-05, "loss": 0.5794, "step": 12392 }, { "epoch": 15.86304, "grad_norm": 1.0147608518600464, "learning_rate": 2.52280912364946e-05, "loss": 0.5809, "step": 12393 }, { "epoch": 15.86432, "grad_norm": 0.9740559458732605, "learning_rate": 2.522609043617447e-05, "loss": 0.5204, "step": 12394 }, { "epoch": 15.8656, "grad_norm": 1.0392513275146484, "learning_rate": 2.5224089635854343e-05, "loss": 0.5702, "step": 12395 }, { "epoch": 15.86688, "grad_norm": 0.9570985436439514, "learning_rate": 2.5222088835534215e-05, "loss": 0.5251, "step": 12396 }, { "epoch": 15.86816, "grad_norm": 1.0077378749847412, "learning_rate": 2.5220088035214083e-05, "loss": 0.5459, "step": 12397 }, { "epoch": 15.86944, "grad_norm": 0.9814931154251099, "learning_rate": 2.5218087234893962e-05, "loss": 0.5089, "step": 12398 }, { "epoch": 15.87072, "grad_norm": 1.0061900615692139, "learning_rate": 2.5216086434573834e-05, "loss": 0.534, "step": 12399 }, { "epoch": 15.872, "grad_norm": 0.9620992541313171, "learning_rate": 2.5214085634253702e-05, "loss": 0.5163, "step": 12400 }, { "epoch": 15.87328, "grad_norm": 0.9781507253646851, "learning_rate": 2.5212084833933574e-05, "loss": 0.5391, "step": 12401 }, { "epoch": 15.87456, "grad_norm": 0.9385673403739929, "learning_rate": 2.5210084033613446e-05, "loss": 0.5046, "step": 12402 }, { "epoch": 15.87584, "grad_norm": 0.9664863348007202, "learning_rate": 2.5208083233293318e-05, "loss": 0.5104, "step": 12403 }, { "epoch": 15.87712, "grad_norm": 0.9696258306503296, "learning_rate": 2.520608243297319e-05, "loss": 0.4796, "step": 12404 }, { "epoch": 15.8784, "grad_norm": 0.9488195776939392, "learning_rate": 2.5204081632653065e-05, "loss": 0.4884, "step": 12405 }, { "epoch": 15.87968, "grad_norm": 1.0127061605453491, "learning_rate": 2.5202080832332937e-05, "loss": 0.5115, "step": 12406 }, { "epoch": 15.88096, "grad_norm": 0.9693582057952881, "learning_rate": 2.520008003201281e-05, "loss": 0.5386, "step": 12407 }, { "epoch": 15.88224, "grad_norm": 0.9904215335845947, "learning_rate": 2.5198079231692677e-05, "loss": 0.556, "step": 12408 }, { "epoch": 15.88352, "grad_norm": 0.9979194402694702, "learning_rate": 2.519607843137255e-05, "loss": 0.5033, "step": 12409 }, { "epoch": 15.8848, "grad_norm": 1.007330060005188, "learning_rate": 2.519407763105242e-05, "loss": 0.5607, "step": 12410 }, { "epoch": 15.88608, "grad_norm": 1.0108498334884644, "learning_rate": 2.5192076830732293e-05, "loss": 0.5434, "step": 12411 }, { "epoch": 15.88736, "grad_norm": 0.9750909805297852, "learning_rate": 2.5190076030412168e-05, "loss": 0.5233, "step": 12412 }, { "epoch": 15.88864, "grad_norm": 0.982916533946991, "learning_rate": 2.518807523009204e-05, "loss": 0.5241, "step": 12413 }, { "epoch": 15.88992, "grad_norm": 0.9335137009620667, "learning_rate": 2.5186074429771912e-05, "loss": 0.4939, "step": 12414 }, { "epoch": 15.8912, "grad_norm": 0.922966718673706, "learning_rate": 2.5184073629451784e-05, "loss": 0.5243, "step": 12415 }, { "epoch": 15.89248, "grad_norm": 1.0012139081954956, "learning_rate": 2.5182072829131652e-05, "loss": 0.5089, "step": 12416 }, { "epoch": 15.89376, "grad_norm": 0.9846066832542419, "learning_rate": 2.5180072028811524e-05, "loss": 0.5127, "step": 12417 }, { "epoch": 15.89504, "grad_norm": 0.9632936120033264, "learning_rate": 2.5178071228491396e-05, "loss": 0.5822, "step": 12418 }, { "epoch": 15.89632, "grad_norm": 0.9547779560089111, "learning_rate": 2.517607042817127e-05, "loss": 0.5153, "step": 12419 }, { "epoch": 15.8976, "grad_norm": 0.9924589991569519, "learning_rate": 2.5174069627851143e-05, "loss": 0.558, "step": 12420 }, { "epoch": 15.89888, "grad_norm": 0.9812869429588318, "learning_rate": 2.5172068827531015e-05, "loss": 0.5061, "step": 12421 }, { "epoch": 15.90016, "grad_norm": 1.0087809562683105, "learning_rate": 2.5170068027210887e-05, "loss": 0.5524, "step": 12422 }, { "epoch": 15.901440000000001, "grad_norm": 0.9655302166938782, "learning_rate": 2.516806722689076e-05, "loss": 0.5159, "step": 12423 }, { "epoch": 15.90272, "grad_norm": 1.032762050628662, "learning_rate": 2.5166066426570627e-05, "loss": 0.5757, "step": 12424 }, { "epoch": 15.904, "grad_norm": 0.9734963178634644, "learning_rate": 2.51640656262505e-05, "loss": 0.4966, "step": 12425 }, { "epoch": 15.90528, "grad_norm": 0.9694871306419373, "learning_rate": 2.5162064825930377e-05, "loss": 0.5307, "step": 12426 }, { "epoch": 15.90656, "grad_norm": 1.0066063404083252, "learning_rate": 2.5160064025610246e-05, "loss": 0.5717, "step": 12427 }, { "epoch": 15.90784, "grad_norm": 0.9474746584892273, "learning_rate": 2.5158063225290118e-05, "loss": 0.5199, "step": 12428 }, { "epoch": 15.90912, "grad_norm": 0.9809539914131165, "learning_rate": 2.515606242496999e-05, "loss": 0.5082, "step": 12429 }, { "epoch": 15.9104, "grad_norm": 1.0064231157302856, "learning_rate": 2.515406162464986e-05, "loss": 0.4924, "step": 12430 }, { "epoch": 15.91168, "grad_norm": 0.9995524883270264, "learning_rate": 2.5152060824329733e-05, "loss": 0.5142, "step": 12431 }, { "epoch": 15.91296, "grad_norm": 0.9583382606506348, "learning_rate": 2.5150060024009602e-05, "loss": 0.4687, "step": 12432 }, { "epoch": 15.91424, "grad_norm": 0.9433925747871399, "learning_rate": 2.514805922368948e-05, "loss": 0.5122, "step": 12433 }, { "epoch": 15.91552, "grad_norm": 0.9556870460510254, "learning_rate": 2.5146058423369352e-05, "loss": 0.5444, "step": 12434 }, { "epoch": 15.9168, "grad_norm": 1.0067849159240723, "learning_rate": 2.514405762304922e-05, "loss": 0.5555, "step": 12435 }, { "epoch": 15.91808, "grad_norm": 0.9572538733482361, "learning_rate": 2.5142056822729093e-05, "loss": 0.5077, "step": 12436 }, { "epoch": 15.91936, "grad_norm": 1.0347596406936646, "learning_rate": 2.5140056022408964e-05, "loss": 0.5287, "step": 12437 }, { "epoch": 15.92064, "grad_norm": 0.9975804090499878, "learning_rate": 2.5138055222088836e-05, "loss": 0.5213, "step": 12438 }, { "epoch": 15.92192, "grad_norm": 0.9736266136169434, "learning_rate": 2.5136054421768708e-05, "loss": 0.5386, "step": 12439 }, { "epoch": 15.9232, "grad_norm": 1.077915072441101, "learning_rate": 2.5134053621448583e-05, "loss": 0.5772, "step": 12440 }, { "epoch": 15.924479999999999, "grad_norm": 1.0086665153503418, "learning_rate": 2.5132052821128455e-05, "loss": 0.5173, "step": 12441 }, { "epoch": 15.92576, "grad_norm": 1.012130856513977, "learning_rate": 2.5130052020808327e-05, "loss": 0.5162, "step": 12442 }, { "epoch": 15.92704, "grad_norm": 0.9995039701461792, "learning_rate": 2.5128051220488196e-05, "loss": 0.5068, "step": 12443 }, { "epoch": 15.92832, "grad_norm": 0.9592408537864685, "learning_rate": 2.5126050420168067e-05, "loss": 0.507, "step": 12444 }, { "epoch": 15.9296, "grad_norm": 0.9532192945480347, "learning_rate": 2.512404961984794e-05, "loss": 0.5134, "step": 12445 }, { "epoch": 15.93088, "grad_norm": 1.0366421937942505, "learning_rate": 2.512204881952781e-05, "loss": 0.5393, "step": 12446 }, { "epoch": 15.93216, "grad_norm": 0.9751626253128052, "learning_rate": 2.5120048019207686e-05, "loss": 0.5326, "step": 12447 }, { "epoch": 15.933440000000001, "grad_norm": 0.9007095694541931, "learning_rate": 2.5118047218887558e-05, "loss": 0.4805, "step": 12448 }, { "epoch": 15.93472, "grad_norm": 1.0274094343185425, "learning_rate": 2.511604641856743e-05, "loss": 0.5352, "step": 12449 }, { "epoch": 15.936, "grad_norm": 0.9921882152557373, "learning_rate": 2.5114045618247302e-05, "loss": 0.5379, "step": 12450 }, { "epoch": 15.93728, "grad_norm": 1.0404365062713623, "learning_rate": 2.511204481792717e-05, "loss": 0.5624, "step": 12451 }, { "epoch": 15.93856, "grad_norm": 0.9763307571411133, "learning_rate": 2.5110044017607042e-05, "loss": 0.5373, "step": 12452 }, { "epoch": 15.93984, "grad_norm": 0.939691424369812, "learning_rate": 2.5108043217286914e-05, "loss": 0.5621, "step": 12453 }, { "epoch": 15.94112, "grad_norm": 0.9770581126213074, "learning_rate": 2.5106042416966786e-05, "loss": 0.5202, "step": 12454 }, { "epoch": 15.9424, "grad_norm": 1.0360689163208008, "learning_rate": 2.510404161664666e-05, "loss": 0.5714, "step": 12455 }, { "epoch": 15.94368, "grad_norm": 0.9993544816970825, "learning_rate": 2.5102040816326533e-05, "loss": 0.5125, "step": 12456 }, { "epoch": 15.94496, "grad_norm": 1.0267115831375122, "learning_rate": 2.5100040016006405e-05, "loss": 0.5312, "step": 12457 }, { "epoch": 15.94624, "grad_norm": 0.9762638807296753, "learning_rate": 2.5098039215686277e-05, "loss": 0.529, "step": 12458 }, { "epoch": 15.94752, "grad_norm": 1.0173263549804688, "learning_rate": 2.5096038415366145e-05, "loss": 0.5196, "step": 12459 }, { "epoch": 15.9488, "grad_norm": 1.0800774097442627, "learning_rate": 2.5094037615046017e-05, "loss": 0.6021, "step": 12460 }, { "epoch": 15.95008, "grad_norm": 1.0533719062805176, "learning_rate": 2.509203681472589e-05, "loss": 0.5971, "step": 12461 }, { "epoch": 15.95136, "grad_norm": 1.0251033306121826, "learning_rate": 2.5090036014405764e-05, "loss": 0.5547, "step": 12462 }, { "epoch": 15.95264, "grad_norm": 0.9884242415428162, "learning_rate": 2.5088035214085636e-05, "loss": 0.5788, "step": 12463 }, { "epoch": 15.95392, "grad_norm": 0.9535346627235413, "learning_rate": 2.5086034413765508e-05, "loss": 0.488, "step": 12464 }, { "epoch": 15.9552, "grad_norm": 0.9252434372901917, "learning_rate": 2.508403361344538e-05, "loss": 0.4886, "step": 12465 }, { "epoch": 15.956479999999999, "grad_norm": 1.00466787815094, "learning_rate": 2.508203281312525e-05, "loss": 0.5489, "step": 12466 }, { "epoch": 15.95776, "grad_norm": 0.9779563546180725, "learning_rate": 2.508003201280512e-05, "loss": 0.5357, "step": 12467 }, { "epoch": 15.95904, "grad_norm": 0.9743767380714417, "learning_rate": 2.5078031212484992e-05, "loss": 0.4939, "step": 12468 }, { "epoch": 15.96032, "grad_norm": 0.9605897665023804, "learning_rate": 2.507603041216487e-05, "loss": 0.4879, "step": 12469 }, { "epoch": 15.9616, "grad_norm": 0.9653512835502625, "learning_rate": 2.507402961184474e-05, "loss": 0.51, "step": 12470 }, { "epoch": 15.96288, "grad_norm": 0.9319489002227783, "learning_rate": 2.507202881152461e-05, "loss": 0.4609, "step": 12471 }, { "epoch": 15.96416, "grad_norm": 0.994040310382843, "learning_rate": 2.5070028011204483e-05, "loss": 0.5493, "step": 12472 }, { "epoch": 15.96544, "grad_norm": 1.0005146265029907, "learning_rate": 2.5068027210884355e-05, "loss": 0.5584, "step": 12473 }, { "epoch": 15.96672, "grad_norm": 0.9767276048660278, "learning_rate": 2.5066026410564227e-05, "loss": 0.4839, "step": 12474 }, { "epoch": 15.968, "grad_norm": 0.9705262780189514, "learning_rate": 2.5064025610244095e-05, "loss": 0.5139, "step": 12475 }, { "epoch": 15.96928, "grad_norm": 1.0184811353683472, "learning_rate": 2.5062024809923974e-05, "loss": 0.5444, "step": 12476 }, { "epoch": 15.97056, "grad_norm": 1.0089943408966064, "learning_rate": 2.5060024009603845e-05, "loss": 0.5257, "step": 12477 }, { "epoch": 15.97184, "grad_norm": 1.0058419704437256, "learning_rate": 2.5058023209283714e-05, "loss": 0.4931, "step": 12478 }, { "epoch": 15.97312, "grad_norm": 1.0035637617111206, "learning_rate": 2.5056022408963586e-05, "loss": 0.4789, "step": 12479 }, { "epoch": 15.9744, "grad_norm": 1.051369309425354, "learning_rate": 2.5054021608643458e-05, "loss": 0.5567, "step": 12480 }, { "epoch": 15.97568, "grad_norm": 1.0520974397659302, "learning_rate": 2.505202080832333e-05, "loss": 0.5678, "step": 12481 }, { "epoch": 15.97696, "grad_norm": 0.9842495918273926, "learning_rate": 2.50500200080032e-05, "loss": 0.5092, "step": 12482 }, { "epoch": 15.97824, "grad_norm": 0.9604758620262146, "learning_rate": 2.5048019207683077e-05, "loss": 0.526, "step": 12483 }, { "epoch": 15.97952, "grad_norm": 0.9821440577507019, "learning_rate": 2.504601840736295e-05, "loss": 0.4723, "step": 12484 }, { "epoch": 15.9808, "grad_norm": 0.9930018782615662, "learning_rate": 2.504401760704282e-05, "loss": 0.5205, "step": 12485 }, { "epoch": 15.98208, "grad_norm": 1.0078686475753784, "learning_rate": 2.504201680672269e-05, "loss": 0.5439, "step": 12486 }, { "epoch": 15.98336, "grad_norm": 1.0228537321090698, "learning_rate": 2.504001600640256e-05, "loss": 0.5895, "step": 12487 }, { "epoch": 15.98464, "grad_norm": 1.0596520900726318, "learning_rate": 2.5038015206082433e-05, "loss": 0.5966, "step": 12488 }, { "epoch": 15.98592, "grad_norm": 0.9747412800788879, "learning_rate": 2.5036014405762304e-05, "loss": 0.5209, "step": 12489 }, { "epoch": 15.9872, "grad_norm": 1.090663194656372, "learning_rate": 2.503401360544218e-05, "loss": 0.545, "step": 12490 }, { "epoch": 15.98848, "grad_norm": 0.9801415205001831, "learning_rate": 2.503201280512205e-05, "loss": 0.5044, "step": 12491 }, { "epoch": 15.98976, "grad_norm": 0.9882249236106873, "learning_rate": 2.5030012004801923e-05, "loss": 0.5395, "step": 12492 }, { "epoch": 15.99104, "grad_norm": 0.9727967381477356, "learning_rate": 2.5028011204481795e-05, "loss": 0.5298, "step": 12493 }, { "epoch": 15.99232, "grad_norm": 1.004541277885437, "learning_rate": 2.5026010404161664e-05, "loss": 0.5377, "step": 12494 }, { "epoch": 15.9936, "grad_norm": 0.9552838206291199, "learning_rate": 2.5024009603841536e-05, "loss": 0.5197, "step": 12495 }, { "epoch": 15.99488, "grad_norm": 0.9584947228431702, "learning_rate": 2.5022008803521407e-05, "loss": 0.5345, "step": 12496 }, { "epoch": 15.99616, "grad_norm": 0.9853367209434509, "learning_rate": 2.5020008003201283e-05, "loss": 0.5047, "step": 12497 }, { "epoch": 15.99744, "grad_norm": 0.9928620457649231, "learning_rate": 2.5018007202881154e-05, "loss": 0.5282, "step": 12498 }, { "epoch": 15.99872, "grad_norm": 1.0169389247894287, "learning_rate": 2.5016006402561026e-05, "loss": 0.5764, "step": 12499 }, { "epoch": 16.0, "grad_norm": Infinity, "learning_rate": 2.5016006402561026e-05, "loss": 0.9264, "step": 12500 }, { "epoch": 16.00128, "grad_norm": 0.9896093010902405, "learning_rate": 2.5014005602240898e-05, "loss": 0.4865, "step": 12501 }, { "epoch": 16.00256, "grad_norm": 0.9680584669113159, "learning_rate": 2.501200480192077e-05, "loss": 0.4852, "step": 12502 }, { "epoch": 16.00384, "grad_norm": 0.9140750765800476, "learning_rate": 2.501000400160064e-05, "loss": 0.48, "step": 12503 }, { "epoch": 16.00512, "grad_norm": 0.9292126893997192, "learning_rate": 2.500800320128051e-05, "loss": 0.4647, "step": 12504 }, { "epoch": 16.0064, "grad_norm": 0.9622963070869446, "learning_rate": 2.500600240096039e-05, "loss": 0.4908, "step": 12505 }, { "epoch": 16.00768, "grad_norm": 0.9972251653671265, "learning_rate": 2.5004001600640257e-05, "loss": 0.5065, "step": 12506 }, { "epoch": 16.00896, "grad_norm": 1.0023291110992432, "learning_rate": 2.500200080032013e-05, "loss": 0.5344, "step": 12507 }, { "epoch": 16.01024, "grad_norm": 0.9497984647750854, "learning_rate": 2.5e-05, "loss": 0.4935, "step": 12508 }, { "epoch": 16.01152, "grad_norm": 0.9904937148094177, "learning_rate": 2.4997999199679873e-05, "loss": 0.4871, "step": 12509 }, { "epoch": 16.0128, "grad_norm": 0.9689871668815613, "learning_rate": 2.4995998399359745e-05, "loss": 0.471, "step": 12510 }, { "epoch": 16.01408, "grad_norm": 0.9814728498458862, "learning_rate": 2.4993997599039617e-05, "loss": 0.5067, "step": 12511 }, { "epoch": 16.01536, "grad_norm": 0.97007817029953, "learning_rate": 2.499199679871949e-05, "loss": 0.507, "step": 12512 }, { "epoch": 16.01664, "grad_norm": 1.0243626832962036, "learning_rate": 2.498999599839936e-05, "loss": 0.5726, "step": 12513 }, { "epoch": 16.01792, "grad_norm": 1.0011252164840698, "learning_rate": 2.4987995198079232e-05, "loss": 0.4805, "step": 12514 }, { "epoch": 16.0192, "grad_norm": 0.9926708936691284, "learning_rate": 2.4985994397759104e-05, "loss": 0.5031, "step": 12515 }, { "epoch": 16.02048, "grad_norm": 0.9110226035118103, "learning_rate": 2.4983993597438976e-05, "loss": 0.4117, "step": 12516 }, { "epoch": 16.02176, "grad_norm": 1.0108290910720825, "learning_rate": 2.498199279711885e-05, "loss": 0.5429, "step": 12517 }, { "epoch": 16.02304, "grad_norm": 0.990385115146637, "learning_rate": 2.497999199679872e-05, "loss": 0.4741, "step": 12518 }, { "epoch": 16.02432, "grad_norm": 0.982943058013916, "learning_rate": 2.497799119647859e-05, "loss": 0.5495, "step": 12519 }, { "epoch": 16.0256, "grad_norm": 0.9531762599945068, "learning_rate": 2.4975990396158463e-05, "loss": 0.4758, "step": 12520 }, { "epoch": 16.02688, "grad_norm": 0.9687629342079163, "learning_rate": 2.497398959583834e-05, "loss": 0.503, "step": 12521 }, { "epoch": 16.02816, "grad_norm": 1.029827356338501, "learning_rate": 2.4971988795518207e-05, "loss": 0.5105, "step": 12522 }, { "epoch": 16.02944, "grad_norm": 1.062757968902588, "learning_rate": 2.496998799519808e-05, "loss": 0.5112, "step": 12523 }, { "epoch": 16.03072, "grad_norm": 1.0137965679168701, "learning_rate": 2.4967987194877954e-05, "loss": 0.5003, "step": 12524 }, { "epoch": 16.032, "grad_norm": 1.0039608478546143, "learning_rate": 2.4965986394557826e-05, "loss": 0.5081, "step": 12525 }, { "epoch": 16.03328, "grad_norm": 0.9910953044891357, "learning_rate": 2.4963985594237695e-05, "loss": 0.5416, "step": 12526 }, { "epoch": 16.03456, "grad_norm": 1.0009675025939941, "learning_rate": 2.4961984793917566e-05, "loss": 0.5186, "step": 12527 }, { "epoch": 16.03584, "grad_norm": 0.9520965814590454, "learning_rate": 2.4959983993597442e-05, "loss": 0.499, "step": 12528 }, { "epoch": 16.03712, "grad_norm": 0.9937947392463684, "learning_rate": 2.4957983193277314e-05, "loss": 0.5301, "step": 12529 }, { "epoch": 16.0384, "grad_norm": 1.0274006128311157, "learning_rate": 2.4955982392957182e-05, "loss": 0.5275, "step": 12530 }, { "epoch": 16.03968, "grad_norm": 0.9785746932029724, "learning_rate": 2.4953981592637057e-05, "loss": 0.5235, "step": 12531 }, { "epoch": 16.04096, "grad_norm": 0.9842962026596069, "learning_rate": 2.495198079231693e-05, "loss": 0.4919, "step": 12532 }, { "epoch": 16.04224, "grad_norm": 0.9303730726242065, "learning_rate": 2.49499799919968e-05, "loss": 0.4506, "step": 12533 }, { "epoch": 16.04352, "grad_norm": 1.0411185026168823, "learning_rate": 2.494797919167667e-05, "loss": 0.5188, "step": 12534 }, { "epoch": 16.0448, "grad_norm": 0.9499898552894592, "learning_rate": 2.4945978391356545e-05, "loss": 0.4906, "step": 12535 }, { "epoch": 16.04608, "grad_norm": 0.9907384514808655, "learning_rate": 2.4943977591036417e-05, "loss": 0.5185, "step": 12536 }, { "epoch": 16.04736, "grad_norm": 0.9777514338493347, "learning_rate": 2.494197679071629e-05, "loss": 0.5008, "step": 12537 }, { "epoch": 16.04864, "grad_norm": 1.0125657320022583, "learning_rate": 2.4939975990396157e-05, "loss": 0.4957, "step": 12538 }, { "epoch": 16.04992, "grad_norm": 1.0564517974853516, "learning_rate": 2.4937975190076032e-05, "loss": 0.5403, "step": 12539 }, { "epoch": 16.0512, "grad_norm": 1.0502736568450928, "learning_rate": 2.4935974389755904e-05, "loss": 0.5516, "step": 12540 }, { "epoch": 16.05248, "grad_norm": 0.9993465542793274, "learning_rate": 2.4933973589435776e-05, "loss": 0.5124, "step": 12541 }, { "epoch": 16.05376, "grad_norm": 1.0142031908035278, "learning_rate": 2.4931972789115648e-05, "loss": 0.4857, "step": 12542 }, { "epoch": 16.05504, "grad_norm": 1.0206118822097778, "learning_rate": 2.492997198879552e-05, "loss": 0.5747, "step": 12543 }, { "epoch": 16.05632, "grad_norm": 1.0780686140060425, "learning_rate": 2.492797118847539e-05, "loss": 0.5194, "step": 12544 }, { "epoch": 16.0576, "grad_norm": 0.9203510284423828, "learning_rate": 2.4925970388155263e-05, "loss": 0.5112, "step": 12545 }, { "epoch": 16.05888, "grad_norm": 0.9816039800643921, "learning_rate": 2.4923969587835135e-05, "loss": 0.5326, "step": 12546 }, { "epoch": 16.06016, "grad_norm": 1.0037397146224976, "learning_rate": 2.4921968787515007e-05, "loss": 0.5013, "step": 12547 }, { "epoch": 16.06144, "grad_norm": 1.0208714008331299, "learning_rate": 2.491996798719488e-05, "loss": 0.4871, "step": 12548 }, { "epoch": 16.06272, "grad_norm": 1.050378441810608, "learning_rate": 2.491796718687475e-05, "loss": 0.551, "step": 12549 }, { "epoch": 16.064, "grad_norm": 1.0114853382110596, "learning_rate": 2.4915966386554623e-05, "loss": 0.5621, "step": 12550 }, { "epoch": 16.06528, "grad_norm": 0.9831593036651611, "learning_rate": 2.4913965586234494e-05, "loss": 0.5084, "step": 12551 }, { "epoch": 16.06656, "grad_norm": 0.9667739868164062, "learning_rate": 2.4911964785914366e-05, "loss": 0.4955, "step": 12552 }, { "epoch": 16.06784, "grad_norm": 1.0448590517044067, "learning_rate": 2.4909963985594238e-05, "loss": 0.5371, "step": 12553 }, { "epoch": 16.06912, "grad_norm": 1.012335181236267, "learning_rate": 2.490796318527411e-05, "loss": 0.505, "step": 12554 }, { "epoch": 16.0704, "grad_norm": 1.0684611797332764, "learning_rate": 2.4905962384953982e-05, "loss": 0.5381, "step": 12555 }, { "epoch": 16.07168, "grad_norm": 1.0244140625, "learning_rate": 2.4903961584633857e-05, "loss": 0.4584, "step": 12556 }, { "epoch": 16.07296, "grad_norm": 1.0357544422149658, "learning_rate": 2.4901960784313726e-05, "loss": 0.5328, "step": 12557 }, { "epoch": 16.07424, "grad_norm": 1.0519928932189941, "learning_rate": 2.4899959983993597e-05, "loss": 0.4919, "step": 12558 }, { "epoch": 16.07552, "grad_norm": 1.0284450054168701, "learning_rate": 2.489795918367347e-05, "loss": 0.5221, "step": 12559 }, { "epoch": 16.0768, "grad_norm": 0.9368478059768677, "learning_rate": 2.4895958383353345e-05, "loss": 0.4828, "step": 12560 }, { "epoch": 16.07808, "grad_norm": 0.9959625601768494, "learning_rate": 2.4893957583033213e-05, "loss": 0.5332, "step": 12561 }, { "epoch": 16.07936, "grad_norm": 1.113194465637207, "learning_rate": 2.4891956782713085e-05, "loss": 0.509, "step": 12562 }, { "epoch": 16.08064, "grad_norm": 0.9688543081283569, "learning_rate": 2.488995598239296e-05, "loss": 0.5131, "step": 12563 }, { "epoch": 16.08192, "grad_norm": 1.0220595598220825, "learning_rate": 2.4887955182072832e-05, "loss": 0.4878, "step": 12564 }, { "epoch": 16.0832, "grad_norm": 1.0453155040740967, "learning_rate": 2.48859543817527e-05, "loss": 0.5074, "step": 12565 }, { "epoch": 16.08448, "grad_norm": 1.0528833866119385, "learning_rate": 2.4883953581432572e-05, "loss": 0.5193, "step": 12566 }, { "epoch": 16.08576, "grad_norm": 1.0217548608779907, "learning_rate": 2.4881952781112448e-05, "loss": 0.5449, "step": 12567 }, { "epoch": 16.087040000000002, "grad_norm": 1.0063393115997314, "learning_rate": 2.487995198079232e-05, "loss": 0.4872, "step": 12568 }, { "epoch": 16.08832, "grad_norm": 1.0022279024124146, "learning_rate": 2.4877951180472188e-05, "loss": 0.5315, "step": 12569 }, { "epoch": 16.0896, "grad_norm": 0.9380884170532227, "learning_rate": 2.4875950380152063e-05, "loss": 0.4819, "step": 12570 }, { "epoch": 16.09088, "grad_norm": 1.018511176109314, "learning_rate": 2.4873949579831935e-05, "loss": 0.5077, "step": 12571 }, { "epoch": 16.09216, "grad_norm": 0.9977579116821289, "learning_rate": 2.4871948779511807e-05, "loss": 0.491, "step": 12572 }, { "epoch": 16.09344, "grad_norm": 1.025163173675537, "learning_rate": 2.4869947979191675e-05, "loss": 0.4826, "step": 12573 }, { "epoch": 16.09472, "grad_norm": 1.0350524187088013, "learning_rate": 2.486794717887155e-05, "loss": 0.5445, "step": 12574 }, { "epoch": 16.096, "grad_norm": 1.023727297782898, "learning_rate": 2.4865946378551422e-05, "loss": 0.5394, "step": 12575 }, { "epoch": 16.09728, "grad_norm": 0.9656332731246948, "learning_rate": 2.4863945578231294e-05, "loss": 0.4724, "step": 12576 }, { "epoch": 16.09856, "grad_norm": 1.0451605319976807, "learning_rate": 2.4861944777911166e-05, "loss": 0.5221, "step": 12577 }, { "epoch": 16.09984, "grad_norm": 1.0490944385528564, "learning_rate": 2.4859943977591038e-05, "loss": 0.5555, "step": 12578 }, { "epoch": 16.10112, "grad_norm": 1.0549135208129883, "learning_rate": 2.485794317727091e-05, "loss": 0.4902, "step": 12579 }, { "epoch": 16.1024, "grad_norm": 0.9879729151725769, "learning_rate": 2.485594237695078e-05, "loss": 0.4875, "step": 12580 }, { "epoch": 16.10368, "grad_norm": 1.011025071144104, "learning_rate": 2.4853941576630654e-05, "loss": 0.4903, "step": 12581 }, { "epoch": 16.10496, "grad_norm": 0.9577101469039917, "learning_rate": 2.4851940776310525e-05, "loss": 0.5068, "step": 12582 }, { "epoch": 16.10624, "grad_norm": 0.9981372356414795, "learning_rate": 2.4849939975990397e-05, "loss": 0.5308, "step": 12583 }, { "epoch": 16.10752, "grad_norm": 0.9399566054344177, "learning_rate": 2.484793917567027e-05, "loss": 0.4912, "step": 12584 }, { "epoch": 16.1088, "grad_norm": 0.9757966995239258, "learning_rate": 2.484593837535014e-05, "loss": 0.5236, "step": 12585 }, { "epoch": 16.11008, "grad_norm": 1.0582964420318604, "learning_rate": 2.4843937575030013e-05, "loss": 0.5207, "step": 12586 }, { "epoch": 16.11136, "grad_norm": 0.9841398596763611, "learning_rate": 2.4841936774709885e-05, "loss": 0.5375, "step": 12587 }, { "epoch": 16.11264, "grad_norm": 0.9746212959289551, "learning_rate": 2.4839935974389757e-05, "loss": 0.4938, "step": 12588 }, { "epoch": 16.11392, "grad_norm": 0.9631856679916382, "learning_rate": 2.483793517406963e-05, "loss": 0.4965, "step": 12589 }, { "epoch": 16.1152, "grad_norm": 0.99147629737854, "learning_rate": 2.48359343737495e-05, "loss": 0.5078, "step": 12590 }, { "epoch": 16.11648, "grad_norm": 1.013726830482483, "learning_rate": 2.4833933573429375e-05, "loss": 0.5205, "step": 12591 }, { "epoch": 16.11776, "grad_norm": 1.0126923322677612, "learning_rate": 2.4831932773109244e-05, "loss": 0.47, "step": 12592 }, { "epoch": 16.11904, "grad_norm": 0.9509507417678833, "learning_rate": 2.4829931972789116e-05, "loss": 0.5231, "step": 12593 }, { "epoch": 16.12032, "grad_norm": 1.1204897165298462, "learning_rate": 2.4827931172468988e-05, "loss": 0.537, "step": 12594 }, { "epoch": 16.1216, "grad_norm": 1.0702080726623535, "learning_rate": 2.4825930372148863e-05, "loss": 0.5298, "step": 12595 }, { "epoch": 16.12288, "grad_norm": 0.982305645942688, "learning_rate": 2.482392957182873e-05, "loss": 0.5061, "step": 12596 }, { "epoch": 16.12416, "grad_norm": 0.9869879484176636, "learning_rate": 2.4821928771508603e-05, "loss": 0.5259, "step": 12597 }, { "epoch": 16.12544, "grad_norm": 1.026171088218689, "learning_rate": 2.4819927971188475e-05, "loss": 0.4958, "step": 12598 }, { "epoch": 16.12672, "grad_norm": 0.9694840312004089, "learning_rate": 2.481792717086835e-05, "loss": 0.5093, "step": 12599 }, { "epoch": 16.128, "grad_norm": 1.0091004371643066, "learning_rate": 2.481592637054822e-05, "loss": 0.4943, "step": 12600 }, { "epoch": 16.12928, "grad_norm": 1.0162972211837769, "learning_rate": 2.481392557022809e-05, "loss": 0.4813, "step": 12601 }, { "epoch": 16.13056, "grad_norm": 1.0108006000518799, "learning_rate": 2.4811924769907966e-05, "loss": 0.5231, "step": 12602 }, { "epoch": 16.13184, "grad_norm": 1.009252667427063, "learning_rate": 2.4809923969587838e-05, "loss": 0.5657, "step": 12603 }, { "epoch": 16.13312, "grad_norm": 0.9438086748123169, "learning_rate": 2.4807923169267706e-05, "loss": 0.4868, "step": 12604 }, { "epoch": 16.1344, "grad_norm": 0.9571229815483093, "learning_rate": 2.4805922368947578e-05, "loss": 0.4877, "step": 12605 }, { "epoch": 16.13568, "grad_norm": 1.0769860744476318, "learning_rate": 2.4803921568627453e-05, "loss": 0.5616, "step": 12606 }, { "epoch": 16.13696, "grad_norm": 0.9902837872505188, "learning_rate": 2.4801920768307325e-05, "loss": 0.5381, "step": 12607 }, { "epoch": 16.13824, "grad_norm": 0.9671089053153992, "learning_rate": 2.4799919967987194e-05, "loss": 0.5305, "step": 12608 }, { "epoch": 16.13952, "grad_norm": 1.058725118637085, "learning_rate": 2.479791916766707e-05, "loss": 0.4747, "step": 12609 }, { "epoch": 16.1408, "grad_norm": 1.0563637018203735, "learning_rate": 2.479591836734694e-05, "loss": 0.5424, "step": 12610 }, { "epoch": 16.14208, "grad_norm": 0.9643771052360535, "learning_rate": 2.4793917567026813e-05, "loss": 0.4893, "step": 12611 }, { "epoch": 16.14336, "grad_norm": 1.021338939666748, "learning_rate": 2.479191676670668e-05, "loss": 0.5032, "step": 12612 }, { "epoch": 16.14464, "grad_norm": 0.9763515591621399, "learning_rate": 2.4789915966386556e-05, "loss": 0.5038, "step": 12613 }, { "epoch": 16.14592, "grad_norm": 1.0414294004440308, "learning_rate": 2.4787915166066428e-05, "loss": 0.4937, "step": 12614 }, { "epoch": 16.1472, "grad_norm": 1.0470645427703857, "learning_rate": 2.47859143657463e-05, "loss": 0.5073, "step": 12615 }, { "epoch": 16.14848, "grad_norm": 0.9787927269935608, "learning_rate": 2.4783913565426172e-05, "loss": 0.517, "step": 12616 }, { "epoch": 16.14976, "grad_norm": 1.0010355710983276, "learning_rate": 2.4781912765106044e-05, "loss": 0.4683, "step": 12617 }, { "epoch": 16.15104, "grad_norm": 0.978118896484375, "learning_rate": 2.4779911964785916e-05, "loss": 0.4818, "step": 12618 }, { "epoch": 16.15232, "grad_norm": 0.9748190641403198, "learning_rate": 2.4777911164465787e-05, "loss": 0.5617, "step": 12619 }, { "epoch": 16.1536, "grad_norm": 0.9519315361976624, "learning_rate": 2.477591036414566e-05, "loss": 0.4726, "step": 12620 }, { "epoch": 16.15488, "grad_norm": 1.0163748264312744, "learning_rate": 2.477390956382553e-05, "loss": 0.5284, "step": 12621 }, { "epoch": 16.15616, "grad_norm": 1.035246729850769, "learning_rate": 2.4771908763505403e-05, "loss": 0.5376, "step": 12622 }, { "epoch": 16.15744, "grad_norm": 1.0474730730056763, "learning_rate": 2.4769907963185275e-05, "loss": 0.5061, "step": 12623 }, { "epoch": 16.15872, "grad_norm": 0.9941502809524536, "learning_rate": 2.4767907162865147e-05, "loss": 0.5582, "step": 12624 }, { "epoch": 16.16, "grad_norm": 1.0640026330947876, "learning_rate": 2.476590636254502e-05, "loss": 0.513, "step": 12625 }, { "epoch": 16.16128, "grad_norm": 1.030907154083252, "learning_rate": 2.476390556222489e-05, "loss": 0.4757, "step": 12626 }, { "epoch": 16.16256, "grad_norm": 0.996875524520874, "learning_rate": 2.4761904761904762e-05, "loss": 0.4995, "step": 12627 }, { "epoch": 16.16384, "grad_norm": 0.9607611894607544, "learning_rate": 2.4759903961584634e-05, "loss": 0.4947, "step": 12628 }, { "epoch": 16.16512, "grad_norm": 0.9883370995521545, "learning_rate": 2.4757903161264506e-05, "loss": 0.5161, "step": 12629 }, { "epoch": 16.1664, "grad_norm": 1.0071051120758057, "learning_rate": 2.475590236094438e-05, "loss": 0.4924, "step": 12630 }, { "epoch": 16.16768, "grad_norm": 1.0023144483566284, "learning_rate": 2.475390156062425e-05, "loss": 0.4846, "step": 12631 }, { "epoch": 16.16896, "grad_norm": 1.0280662775039673, "learning_rate": 2.475190076030412e-05, "loss": 0.5245, "step": 12632 }, { "epoch": 16.17024, "grad_norm": 1.0610262155532837, "learning_rate": 2.4749899959983993e-05, "loss": 0.5165, "step": 12633 }, { "epoch": 16.17152, "grad_norm": 0.9850628972053528, "learning_rate": 2.474789915966387e-05, "loss": 0.4994, "step": 12634 }, { "epoch": 16.1728, "grad_norm": 1.0291600227355957, "learning_rate": 2.4745898359343737e-05, "loss": 0.5098, "step": 12635 }, { "epoch": 16.17408, "grad_norm": 0.9818286895751953, "learning_rate": 2.474389755902361e-05, "loss": 0.5086, "step": 12636 }, { "epoch": 16.17536, "grad_norm": 0.9875121712684631, "learning_rate": 2.4741896758703484e-05, "loss": 0.475, "step": 12637 }, { "epoch": 16.17664, "grad_norm": 0.9203592538833618, "learning_rate": 2.4739895958383356e-05, "loss": 0.4558, "step": 12638 }, { "epoch": 16.17792, "grad_norm": 0.9402249455451965, "learning_rate": 2.4737895158063225e-05, "loss": 0.4928, "step": 12639 }, { "epoch": 16.1792, "grad_norm": 0.9855098724365234, "learning_rate": 2.4735894357743096e-05, "loss": 0.4993, "step": 12640 }, { "epoch": 16.18048, "grad_norm": 0.9504002332687378, "learning_rate": 2.4733893557422972e-05, "loss": 0.5067, "step": 12641 }, { "epoch": 16.18176, "grad_norm": 1.0600100755691528, "learning_rate": 2.4731892757102844e-05, "loss": 0.5632, "step": 12642 }, { "epoch": 16.18304, "grad_norm": 1.0117945671081543, "learning_rate": 2.4729891956782712e-05, "loss": 0.5322, "step": 12643 }, { "epoch": 16.18432, "grad_norm": 0.9786906242370605, "learning_rate": 2.4727891156462587e-05, "loss": 0.4928, "step": 12644 }, { "epoch": 16.1856, "grad_norm": 0.9911783337593079, "learning_rate": 2.472589035614246e-05, "loss": 0.4789, "step": 12645 }, { "epoch": 16.18688, "grad_norm": 1.0481369495391846, "learning_rate": 2.472388955582233e-05, "loss": 0.5374, "step": 12646 }, { "epoch": 16.18816, "grad_norm": 0.9751612544059753, "learning_rate": 2.47218887555022e-05, "loss": 0.4826, "step": 12647 }, { "epoch": 16.18944, "grad_norm": 1.0269287824630737, "learning_rate": 2.4719887955182075e-05, "loss": 0.5436, "step": 12648 }, { "epoch": 16.19072, "grad_norm": 0.966488242149353, "learning_rate": 2.4717887154861947e-05, "loss": 0.4886, "step": 12649 }, { "epoch": 16.192, "grad_norm": 1.0197010040283203, "learning_rate": 2.471588635454182e-05, "loss": 0.5519, "step": 12650 }, { "epoch": 16.19328, "grad_norm": 0.9842081665992737, "learning_rate": 2.4713885554221687e-05, "loss": 0.5198, "step": 12651 }, { "epoch": 16.19456, "grad_norm": 1.0210785865783691, "learning_rate": 2.4711884753901562e-05, "loss": 0.4755, "step": 12652 }, { "epoch": 16.19584, "grad_norm": 1.0439190864562988, "learning_rate": 2.4709883953581434e-05, "loss": 0.4878, "step": 12653 }, { "epoch": 16.19712, "grad_norm": 0.9892864227294922, "learning_rate": 2.4707883153261306e-05, "loss": 0.4941, "step": 12654 }, { "epoch": 16.1984, "grad_norm": 1.1083271503448486, "learning_rate": 2.4705882352941178e-05, "loss": 0.5406, "step": 12655 }, { "epoch": 16.19968, "grad_norm": 1.0279945135116577, "learning_rate": 2.470388155262105e-05, "loss": 0.5296, "step": 12656 }, { "epoch": 16.20096, "grad_norm": 0.9678087830543518, "learning_rate": 2.470188075230092e-05, "loss": 0.4879, "step": 12657 }, { "epoch": 16.20224, "grad_norm": 1.0200779438018799, "learning_rate": 2.4699879951980793e-05, "loss": 0.5342, "step": 12658 }, { "epoch": 16.20352, "grad_norm": 1.0152758359909058, "learning_rate": 2.4697879151660665e-05, "loss": 0.5535, "step": 12659 }, { "epoch": 16.2048, "grad_norm": 1.0245957374572754, "learning_rate": 2.4695878351340537e-05, "loss": 0.5309, "step": 12660 }, { "epoch": 16.20608, "grad_norm": 1.054991364479065, "learning_rate": 2.469387755102041e-05, "loss": 0.5356, "step": 12661 }, { "epoch": 16.20736, "grad_norm": 1.0215634107589722, "learning_rate": 2.4691876750700284e-05, "loss": 0.5165, "step": 12662 }, { "epoch": 16.20864, "grad_norm": 0.9687957763671875, "learning_rate": 2.4689875950380153e-05, "loss": 0.4773, "step": 12663 }, { "epoch": 16.20992, "grad_norm": 1.0658154487609863, "learning_rate": 2.4687875150060024e-05, "loss": 0.582, "step": 12664 }, { "epoch": 16.2112, "grad_norm": 1.0069797039031982, "learning_rate": 2.4685874349739896e-05, "loss": 0.5316, "step": 12665 }, { "epoch": 16.21248, "grad_norm": 1.0188541412353516, "learning_rate": 2.468387354941977e-05, "loss": 0.5182, "step": 12666 }, { "epoch": 16.21376, "grad_norm": 1.0730429887771606, "learning_rate": 2.468187274909964e-05, "loss": 0.5226, "step": 12667 }, { "epoch": 16.21504, "grad_norm": 1.0113489627838135, "learning_rate": 2.4679871948779512e-05, "loss": 0.4887, "step": 12668 }, { "epoch": 16.21632, "grad_norm": 0.9426849484443665, "learning_rate": 2.4677871148459387e-05, "loss": 0.4777, "step": 12669 }, { "epoch": 16.2176, "grad_norm": 0.9736514687538147, "learning_rate": 2.467587034813926e-05, "loss": 0.4941, "step": 12670 }, { "epoch": 16.21888, "grad_norm": 1.0153673887252808, "learning_rate": 2.4673869547819127e-05, "loss": 0.5524, "step": 12671 }, { "epoch": 16.22016, "grad_norm": 1.018025279045105, "learning_rate": 2.4671868747499e-05, "loss": 0.4976, "step": 12672 }, { "epoch": 16.22144, "grad_norm": 0.9701156616210938, "learning_rate": 2.4669867947178875e-05, "loss": 0.4712, "step": 12673 }, { "epoch": 16.22272, "grad_norm": 1.0330697298049927, "learning_rate": 2.4667867146858746e-05, "loss": 0.4938, "step": 12674 }, { "epoch": 16.224, "grad_norm": 0.9708216190338135, "learning_rate": 2.4665866346538615e-05, "loss": 0.4665, "step": 12675 }, { "epoch": 16.22528, "grad_norm": 0.9829319715499878, "learning_rate": 2.466386554621849e-05, "loss": 0.5134, "step": 12676 }, { "epoch": 16.22656, "grad_norm": 1.0143637657165527, "learning_rate": 2.4661864745898362e-05, "loss": 0.5448, "step": 12677 }, { "epoch": 16.22784, "grad_norm": 1.0487371683120728, "learning_rate": 2.4659863945578234e-05, "loss": 0.5299, "step": 12678 }, { "epoch": 16.22912, "grad_norm": 0.9548150300979614, "learning_rate": 2.4657863145258102e-05, "loss": 0.4779, "step": 12679 }, { "epoch": 16.2304, "grad_norm": 1.0021804571151733, "learning_rate": 2.4655862344937978e-05, "loss": 0.497, "step": 12680 }, { "epoch": 16.23168, "grad_norm": 0.9995138049125671, "learning_rate": 2.465386154461785e-05, "loss": 0.5175, "step": 12681 }, { "epoch": 16.23296, "grad_norm": 1.010904312133789, "learning_rate": 2.465186074429772e-05, "loss": 0.5227, "step": 12682 }, { "epoch": 16.23424, "grad_norm": 0.9441868662834167, "learning_rate": 2.4649859943977593e-05, "loss": 0.5053, "step": 12683 }, { "epoch": 16.23552, "grad_norm": 1.0118534564971924, "learning_rate": 2.4647859143657465e-05, "loss": 0.5208, "step": 12684 }, { "epoch": 16.2368, "grad_norm": 1.0776724815368652, "learning_rate": 2.4645858343337337e-05, "loss": 0.5097, "step": 12685 }, { "epoch": 16.23808, "grad_norm": 1.064697504043579, "learning_rate": 2.464385754301721e-05, "loss": 0.5166, "step": 12686 }, { "epoch": 16.23936, "grad_norm": 1.0309760570526123, "learning_rate": 2.464185674269708e-05, "loss": 0.5428, "step": 12687 }, { "epoch": 16.24064, "grad_norm": 0.9784319400787354, "learning_rate": 2.4639855942376952e-05, "loss": 0.4882, "step": 12688 }, { "epoch": 16.24192, "grad_norm": 1.0319559574127197, "learning_rate": 2.4637855142056824e-05, "loss": 0.527, "step": 12689 }, { "epoch": 16.2432, "grad_norm": 0.991915762424469, "learning_rate": 2.4635854341736696e-05, "loss": 0.523, "step": 12690 }, { "epoch": 16.24448, "grad_norm": 1.043216586112976, "learning_rate": 2.4633853541416568e-05, "loss": 0.4977, "step": 12691 }, { "epoch": 16.24576, "grad_norm": 1.063425064086914, "learning_rate": 2.463185274109644e-05, "loss": 0.5245, "step": 12692 }, { "epoch": 16.24704, "grad_norm": 1.0236256122589111, "learning_rate": 2.462985194077631e-05, "loss": 0.5112, "step": 12693 }, { "epoch": 16.24832, "grad_norm": 0.992674708366394, "learning_rate": 2.4627851140456183e-05, "loss": 0.5062, "step": 12694 }, { "epoch": 16.2496, "grad_norm": 1.0538854598999023, "learning_rate": 2.4625850340136055e-05, "loss": 0.5244, "step": 12695 }, { "epoch": 16.25088, "grad_norm": 1.021213412284851, "learning_rate": 2.4623849539815927e-05, "loss": 0.5097, "step": 12696 }, { "epoch": 16.25216, "grad_norm": 1.0427947044372559, "learning_rate": 2.4621848739495802e-05, "loss": 0.5288, "step": 12697 }, { "epoch": 16.25344, "grad_norm": 1.079925537109375, "learning_rate": 2.461984793917567e-05, "loss": 0.511, "step": 12698 }, { "epoch": 16.25472, "grad_norm": 1.0293481349945068, "learning_rate": 2.4617847138855543e-05, "loss": 0.534, "step": 12699 }, { "epoch": 16.256, "grad_norm": 1.0903656482696533, "learning_rate": 2.4615846338535415e-05, "loss": 0.5344, "step": 12700 }, { "epoch": 16.25728, "grad_norm": 0.9660569429397583, "learning_rate": 2.461384553821529e-05, "loss": 0.507, "step": 12701 }, { "epoch": 16.25856, "grad_norm": 0.9629020094871521, "learning_rate": 2.461184473789516e-05, "loss": 0.4976, "step": 12702 }, { "epoch": 16.25984, "grad_norm": 0.9647486209869385, "learning_rate": 2.460984393757503e-05, "loss": 0.504, "step": 12703 }, { "epoch": 16.26112, "grad_norm": 0.996180534362793, "learning_rate": 2.4607843137254902e-05, "loss": 0.5255, "step": 12704 }, { "epoch": 16.2624, "grad_norm": 0.9972898960113525, "learning_rate": 2.4605842336934777e-05, "loss": 0.5179, "step": 12705 }, { "epoch": 16.26368, "grad_norm": 0.97672438621521, "learning_rate": 2.4603841536614646e-05, "loss": 0.4798, "step": 12706 }, { "epoch": 16.26496, "grad_norm": 1.013742446899414, "learning_rate": 2.4601840736294518e-05, "loss": 0.542, "step": 12707 }, { "epoch": 16.26624, "grad_norm": 1.0334280729293823, "learning_rate": 2.4599839935974393e-05, "loss": 0.517, "step": 12708 }, { "epoch": 16.26752, "grad_norm": 0.966270923614502, "learning_rate": 2.4597839135654265e-05, "loss": 0.4617, "step": 12709 }, { "epoch": 16.2688, "grad_norm": 1.0279269218444824, "learning_rate": 2.4595838335334133e-05, "loss": 0.5156, "step": 12710 }, { "epoch": 16.27008, "grad_norm": 0.993587851524353, "learning_rate": 2.4593837535014005e-05, "loss": 0.5515, "step": 12711 }, { "epoch": 16.27136, "grad_norm": 1.055559515953064, "learning_rate": 2.459183673469388e-05, "loss": 0.5437, "step": 12712 }, { "epoch": 16.27264, "grad_norm": 1.0224788188934326, "learning_rate": 2.4589835934373752e-05, "loss": 0.5237, "step": 12713 }, { "epoch": 16.27392, "grad_norm": 1.0352107286453247, "learning_rate": 2.458783513405362e-05, "loss": 0.518, "step": 12714 }, { "epoch": 16.2752, "grad_norm": 1.0856186151504517, "learning_rate": 2.4585834333733496e-05, "loss": 0.5168, "step": 12715 }, { "epoch": 16.27648, "grad_norm": 1.0590870380401611, "learning_rate": 2.4583833533413368e-05, "loss": 0.517, "step": 12716 }, { "epoch": 16.27776, "grad_norm": 1.0563161373138428, "learning_rate": 2.458183273309324e-05, "loss": 0.5362, "step": 12717 }, { "epoch": 16.27904, "grad_norm": 1.0422947406768799, "learning_rate": 2.4579831932773108e-05, "loss": 0.5014, "step": 12718 }, { "epoch": 16.28032, "grad_norm": 1.0459036827087402, "learning_rate": 2.4577831132452983e-05, "loss": 0.5276, "step": 12719 }, { "epoch": 16.2816, "grad_norm": 1.057701587677002, "learning_rate": 2.4575830332132855e-05, "loss": 0.4813, "step": 12720 }, { "epoch": 16.28288, "grad_norm": 1.0466841459274292, "learning_rate": 2.4573829531812727e-05, "loss": 0.5175, "step": 12721 }, { "epoch": 16.28416, "grad_norm": 0.9757436513900757, "learning_rate": 2.45718287314926e-05, "loss": 0.4863, "step": 12722 }, { "epoch": 16.28544, "grad_norm": 1.0128544569015503, "learning_rate": 2.456982793117247e-05, "loss": 0.507, "step": 12723 }, { "epoch": 16.28672, "grad_norm": 0.9682973027229309, "learning_rate": 2.4567827130852343e-05, "loss": 0.5172, "step": 12724 }, { "epoch": 16.288, "grad_norm": 0.9596603512763977, "learning_rate": 2.4565826330532214e-05, "loss": 0.522, "step": 12725 }, { "epoch": 16.28928, "grad_norm": 0.9290403723716736, "learning_rate": 2.4563825530212086e-05, "loss": 0.472, "step": 12726 }, { "epoch": 16.29056, "grad_norm": 1.052056908607483, "learning_rate": 2.4561824729891958e-05, "loss": 0.5415, "step": 12727 }, { "epoch": 16.29184, "grad_norm": 1.0477123260498047, "learning_rate": 2.455982392957183e-05, "loss": 0.5393, "step": 12728 }, { "epoch": 16.29312, "grad_norm": 0.9668442010879517, "learning_rate": 2.4557823129251702e-05, "loss": 0.4741, "step": 12729 }, { "epoch": 16.2944, "grad_norm": 1.0437530279159546, "learning_rate": 2.4555822328931574e-05, "loss": 0.5, "step": 12730 }, { "epoch": 16.29568, "grad_norm": 0.9850262999534607, "learning_rate": 2.4553821528611446e-05, "loss": 0.5108, "step": 12731 }, { "epoch": 16.29696, "grad_norm": 0.9622449278831482, "learning_rate": 2.4551820728291317e-05, "loss": 0.506, "step": 12732 }, { "epoch": 16.29824, "grad_norm": 1.0005285739898682, "learning_rate": 2.454981992797119e-05, "loss": 0.5083, "step": 12733 }, { "epoch": 16.29952, "grad_norm": 0.9644542336463928, "learning_rate": 2.454781912765106e-05, "loss": 0.4986, "step": 12734 }, { "epoch": 16.3008, "grad_norm": 1.0753077268600464, "learning_rate": 2.4545818327330933e-05, "loss": 0.5464, "step": 12735 }, { "epoch": 16.30208, "grad_norm": 1.04744291305542, "learning_rate": 2.4543817527010808e-05, "loss": 0.5277, "step": 12736 }, { "epoch": 16.30336, "grad_norm": 0.9650123119354248, "learning_rate": 2.4541816726690677e-05, "loss": 0.4648, "step": 12737 }, { "epoch": 16.30464, "grad_norm": 0.9497844576835632, "learning_rate": 2.453981592637055e-05, "loss": 0.5344, "step": 12738 }, { "epoch": 16.30592, "grad_norm": 1.0927493572235107, "learning_rate": 2.453781512605042e-05, "loss": 0.5106, "step": 12739 }, { "epoch": 16.3072, "grad_norm": 1.0164883136749268, "learning_rate": 2.4535814325730296e-05, "loss": 0.5394, "step": 12740 }, { "epoch": 16.30848, "grad_norm": 0.9408139586448669, "learning_rate": 2.4533813525410164e-05, "loss": 0.4882, "step": 12741 }, { "epoch": 16.30976, "grad_norm": 0.9942525625228882, "learning_rate": 2.4531812725090036e-05, "loss": 0.5058, "step": 12742 }, { "epoch": 16.31104, "grad_norm": 1.0041530132293701, "learning_rate": 2.452981192476991e-05, "loss": 0.4444, "step": 12743 }, { "epoch": 16.31232, "grad_norm": 0.9434515833854675, "learning_rate": 2.4527811124449783e-05, "loss": 0.4969, "step": 12744 }, { "epoch": 16.3136, "grad_norm": 1.0025871992111206, "learning_rate": 2.452581032412965e-05, "loss": 0.4901, "step": 12745 }, { "epoch": 16.31488, "grad_norm": 0.9953147768974304, "learning_rate": 2.4523809523809523e-05, "loss": 0.512, "step": 12746 }, { "epoch": 16.31616, "grad_norm": 1.1160242557525635, "learning_rate": 2.45218087234894e-05, "loss": 0.5995, "step": 12747 }, { "epoch": 16.31744, "grad_norm": 0.9796026349067688, "learning_rate": 2.451980792316927e-05, "loss": 0.4865, "step": 12748 }, { "epoch": 16.31872, "grad_norm": 1.0219937562942505, "learning_rate": 2.451780712284914e-05, "loss": 0.4902, "step": 12749 }, { "epoch": 16.32, "grad_norm": 1.036454200744629, "learning_rate": 2.4515806322529014e-05, "loss": 0.5686, "step": 12750 }, { "epoch": 16.32128, "grad_norm": 0.9680700302124023, "learning_rate": 2.4513805522208886e-05, "loss": 0.5085, "step": 12751 }, { "epoch": 16.32256, "grad_norm": 1.001165509223938, "learning_rate": 2.4511804721888758e-05, "loss": 0.5052, "step": 12752 }, { "epoch": 16.32384, "grad_norm": 0.9691818356513977, "learning_rate": 2.4509803921568626e-05, "loss": 0.5176, "step": 12753 }, { "epoch": 16.32512, "grad_norm": 0.9570626616477966, "learning_rate": 2.45078031212485e-05, "loss": 0.4953, "step": 12754 }, { "epoch": 16.3264, "grad_norm": 1.052276849746704, "learning_rate": 2.4505802320928374e-05, "loss": 0.5756, "step": 12755 }, { "epoch": 16.32768, "grad_norm": 0.9954919219017029, "learning_rate": 2.4503801520608245e-05, "loss": 0.4592, "step": 12756 }, { "epoch": 16.32896, "grad_norm": 1.041548490524292, "learning_rate": 2.4501800720288117e-05, "loss": 0.4829, "step": 12757 }, { "epoch": 16.33024, "grad_norm": 1.0538829565048218, "learning_rate": 2.449979991996799e-05, "loss": 0.5347, "step": 12758 }, { "epoch": 16.33152, "grad_norm": 1.0308974981307983, "learning_rate": 2.449779911964786e-05, "loss": 0.5045, "step": 12759 }, { "epoch": 16.3328, "grad_norm": 1.0308443307876587, "learning_rate": 2.4495798319327733e-05, "loss": 0.5121, "step": 12760 }, { "epoch": 16.33408, "grad_norm": 1.0774739980697632, "learning_rate": 2.4493797519007605e-05, "loss": 0.5345, "step": 12761 }, { "epoch": 16.33536, "grad_norm": 1.0473657846450806, "learning_rate": 2.4491796718687477e-05, "loss": 0.5251, "step": 12762 }, { "epoch": 16.33664, "grad_norm": 1.0154544115066528, "learning_rate": 2.448979591836735e-05, "loss": 0.498, "step": 12763 }, { "epoch": 16.33792, "grad_norm": 0.9398629069328308, "learning_rate": 2.448779511804722e-05, "loss": 0.493, "step": 12764 }, { "epoch": 16.3392, "grad_norm": 1.0081340074539185, "learning_rate": 2.4485794317727092e-05, "loss": 0.5177, "step": 12765 }, { "epoch": 16.34048, "grad_norm": 1.0168988704681396, "learning_rate": 2.4483793517406964e-05, "loss": 0.5382, "step": 12766 }, { "epoch": 16.34176, "grad_norm": 1.0033296346664429, "learning_rate": 2.4481792717086836e-05, "loss": 0.5449, "step": 12767 }, { "epoch": 16.34304, "grad_norm": 0.9844812154769897, "learning_rate": 2.4479791916766708e-05, "loss": 0.4916, "step": 12768 }, { "epoch": 16.34432, "grad_norm": 1.042959213256836, "learning_rate": 2.447779111644658e-05, "loss": 0.4871, "step": 12769 }, { "epoch": 16.3456, "grad_norm": 1.0168309211730957, "learning_rate": 2.447579031612645e-05, "loss": 0.4914, "step": 12770 }, { "epoch": 16.34688, "grad_norm": 1.0198049545288086, "learning_rate": 2.4473789515806323e-05, "loss": 0.4982, "step": 12771 }, { "epoch": 16.34816, "grad_norm": 1.0532821416854858, "learning_rate": 2.4471788715486195e-05, "loss": 0.5367, "step": 12772 }, { "epoch": 16.34944, "grad_norm": 1.0325431823730469, "learning_rate": 2.4469787915166067e-05, "loss": 0.5169, "step": 12773 }, { "epoch": 16.35072, "grad_norm": 0.9796453714370728, "learning_rate": 2.446778711484594e-05, "loss": 0.4951, "step": 12774 }, { "epoch": 16.352, "grad_norm": 1.0543535947799683, "learning_rate": 2.4465786314525814e-05, "loss": 0.5832, "step": 12775 }, { "epoch": 16.35328, "grad_norm": 1.0445586442947388, "learning_rate": 2.4463785514205683e-05, "loss": 0.5339, "step": 12776 }, { "epoch": 16.35456, "grad_norm": 1.0602067708969116, "learning_rate": 2.4461784713885554e-05, "loss": 0.5414, "step": 12777 }, { "epoch": 16.35584, "grad_norm": 1.0592375993728638, "learning_rate": 2.4459783913565426e-05, "loss": 0.5309, "step": 12778 }, { "epoch": 16.35712, "grad_norm": 1.0167428255081177, "learning_rate": 2.44577831132453e-05, "loss": 0.5576, "step": 12779 }, { "epoch": 16.3584, "grad_norm": 1.0088391304016113, "learning_rate": 2.445578231292517e-05, "loss": 0.4863, "step": 12780 }, { "epoch": 16.35968, "grad_norm": 1.0060917139053345, "learning_rate": 2.4453781512605042e-05, "loss": 0.4916, "step": 12781 }, { "epoch": 16.36096, "grad_norm": 0.9650666117668152, "learning_rate": 2.4451780712284917e-05, "loss": 0.483, "step": 12782 }, { "epoch": 16.36224, "grad_norm": 1.0193016529083252, "learning_rate": 2.444977991196479e-05, "loss": 0.5077, "step": 12783 }, { "epoch": 16.36352, "grad_norm": 1.00182044506073, "learning_rate": 2.4447779111644657e-05, "loss": 0.4533, "step": 12784 }, { "epoch": 16.3648, "grad_norm": 1.016948938369751, "learning_rate": 2.444577831132453e-05, "loss": 0.4998, "step": 12785 }, { "epoch": 16.36608, "grad_norm": 1.0034083127975464, "learning_rate": 2.4443777511004404e-05, "loss": 0.5126, "step": 12786 }, { "epoch": 16.36736, "grad_norm": 1.0436108112335205, "learning_rate": 2.4441776710684276e-05, "loss": 0.503, "step": 12787 }, { "epoch": 16.36864, "grad_norm": 1.076514482498169, "learning_rate": 2.4439775910364145e-05, "loss": 0.5608, "step": 12788 }, { "epoch": 16.36992, "grad_norm": 1.050675868988037, "learning_rate": 2.443777511004402e-05, "loss": 0.5245, "step": 12789 }, { "epoch": 16.3712, "grad_norm": 0.9627003073692322, "learning_rate": 2.4435774309723892e-05, "loss": 0.4655, "step": 12790 }, { "epoch": 16.37248, "grad_norm": 1.0105419158935547, "learning_rate": 2.4433773509403764e-05, "loss": 0.5192, "step": 12791 }, { "epoch": 16.37376, "grad_norm": 1.0531201362609863, "learning_rate": 2.4431772709083632e-05, "loss": 0.5213, "step": 12792 }, { "epoch": 16.37504, "grad_norm": 1.0447622537612915, "learning_rate": 2.4429771908763507e-05, "loss": 0.4838, "step": 12793 }, { "epoch": 16.37632, "grad_norm": 1.0392229557037354, "learning_rate": 2.442777110844338e-05, "loss": 0.5205, "step": 12794 }, { "epoch": 16.3776, "grad_norm": 0.9850015640258789, "learning_rate": 2.442577030812325e-05, "loss": 0.5161, "step": 12795 }, { "epoch": 16.37888, "grad_norm": 1.0995105504989624, "learning_rate": 2.4423769507803123e-05, "loss": 0.5643, "step": 12796 }, { "epoch": 16.38016, "grad_norm": 1.0674548149108887, "learning_rate": 2.4421768707482995e-05, "loss": 0.5182, "step": 12797 }, { "epoch": 16.38144, "grad_norm": 1.1218688488006592, "learning_rate": 2.4419767907162867e-05, "loss": 0.5739, "step": 12798 }, { "epoch": 16.38272, "grad_norm": 0.980241060256958, "learning_rate": 2.441776710684274e-05, "loss": 0.5029, "step": 12799 }, { "epoch": 16.384, "grad_norm": 1.0264852046966553, "learning_rate": 2.441576630652261e-05, "loss": 0.5188, "step": 12800 }, { "epoch": 16.38528, "grad_norm": 1.005523920059204, "learning_rate": 2.4413765506202482e-05, "loss": 0.5254, "step": 12801 }, { "epoch": 16.38656, "grad_norm": 1.0361162424087524, "learning_rate": 2.4411764705882354e-05, "loss": 0.4976, "step": 12802 }, { "epoch": 16.38784, "grad_norm": 1.035775899887085, "learning_rate": 2.4409763905562226e-05, "loss": 0.5238, "step": 12803 }, { "epoch": 16.38912, "grad_norm": 1.037307858467102, "learning_rate": 2.4407763105242098e-05, "loss": 0.5143, "step": 12804 }, { "epoch": 16.3904, "grad_norm": 0.9811749458312988, "learning_rate": 2.440576230492197e-05, "loss": 0.5074, "step": 12805 }, { "epoch": 16.39168, "grad_norm": 1.11452317237854, "learning_rate": 2.440376150460184e-05, "loss": 0.4935, "step": 12806 }, { "epoch": 16.39296, "grad_norm": 0.9943965077400208, "learning_rate": 2.4401760704281713e-05, "loss": 0.5208, "step": 12807 }, { "epoch": 16.39424, "grad_norm": 1.129791259765625, "learning_rate": 2.4399759903961585e-05, "loss": 0.5443, "step": 12808 }, { "epoch": 16.39552, "grad_norm": 1.0286704301834106, "learning_rate": 2.4397759103641457e-05, "loss": 0.5206, "step": 12809 }, { "epoch": 16.3968, "grad_norm": 1.0669382810592651, "learning_rate": 2.4395758303321332e-05, "loss": 0.5445, "step": 12810 }, { "epoch": 16.39808, "grad_norm": 1.0054514408111572, "learning_rate": 2.43937575030012e-05, "loss": 0.5141, "step": 12811 }, { "epoch": 16.39936, "grad_norm": 1.0013278722763062, "learning_rate": 2.4391756702681073e-05, "loss": 0.5338, "step": 12812 }, { "epoch": 16.40064, "grad_norm": 1.0158824920654297, "learning_rate": 2.4389755902360945e-05, "loss": 0.5281, "step": 12813 }, { "epoch": 16.40192, "grad_norm": 0.9622918367385864, "learning_rate": 2.438775510204082e-05, "loss": 0.4957, "step": 12814 }, { "epoch": 16.4032, "grad_norm": 1.0246028900146484, "learning_rate": 2.438575430172069e-05, "loss": 0.5386, "step": 12815 }, { "epoch": 16.40448, "grad_norm": 1.0182970762252808, "learning_rate": 2.438375350140056e-05, "loss": 0.4567, "step": 12816 }, { "epoch": 16.40576, "grad_norm": 1.0306717157363892, "learning_rate": 2.4381752701080432e-05, "loss": 0.574, "step": 12817 }, { "epoch": 16.40704, "grad_norm": 1.003909707069397, "learning_rate": 2.4379751900760307e-05, "loss": 0.4996, "step": 12818 }, { "epoch": 16.40832, "grad_norm": 1.0095973014831543, "learning_rate": 2.4377751100440176e-05, "loss": 0.5096, "step": 12819 }, { "epoch": 16.4096, "grad_norm": 1.0749574899673462, "learning_rate": 2.4375750300120048e-05, "loss": 0.5673, "step": 12820 }, { "epoch": 16.41088, "grad_norm": 1.0384900569915771, "learning_rate": 2.4373749499799923e-05, "loss": 0.4899, "step": 12821 }, { "epoch": 16.41216, "grad_norm": 1.0096982717514038, "learning_rate": 2.4371748699479795e-05, "loss": 0.5459, "step": 12822 }, { "epoch": 16.41344, "grad_norm": 0.9829618334770203, "learning_rate": 2.4369747899159663e-05, "loss": 0.5178, "step": 12823 }, { "epoch": 16.41472, "grad_norm": 1.046731948852539, "learning_rate": 2.4367747098839535e-05, "loss": 0.5299, "step": 12824 }, { "epoch": 16.416, "grad_norm": 1.0445644855499268, "learning_rate": 2.436574629851941e-05, "loss": 0.5256, "step": 12825 }, { "epoch": 16.41728, "grad_norm": 1.0414682626724243, "learning_rate": 2.4363745498199282e-05, "loss": 0.5141, "step": 12826 }, { "epoch": 16.41856, "grad_norm": 1.0373891592025757, "learning_rate": 2.436174469787915e-05, "loss": 0.4973, "step": 12827 }, { "epoch": 16.41984, "grad_norm": 0.9886151552200317, "learning_rate": 2.4359743897559026e-05, "loss": 0.5466, "step": 12828 }, { "epoch": 16.42112, "grad_norm": 1.0570114850997925, "learning_rate": 2.4357743097238898e-05, "loss": 0.5385, "step": 12829 }, { "epoch": 16.4224, "grad_norm": 1.0138678550720215, "learning_rate": 2.435574229691877e-05, "loss": 0.5263, "step": 12830 }, { "epoch": 16.42368, "grad_norm": 1.007300615310669, "learning_rate": 2.4353741496598638e-05, "loss": 0.5041, "step": 12831 }, { "epoch": 16.42496, "grad_norm": 1.026974081993103, "learning_rate": 2.4351740696278513e-05, "loss": 0.521, "step": 12832 }, { "epoch": 16.42624, "grad_norm": 0.9623899459838867, "learning_rate": 2.4349739895958385e-05, "loss": 0.4611, "step": 12833 }, { "epoch": 16.42752, "grad_norm": 1.0570974349975586, "learning_rate": 2.4347739095638257e-05, "loss": 0.5223, "step": 12834 }, { "epoch": 16.4288, "grad_norm": 0.989608883857727, "learning_rate": 2.434573829531813e-05, "loss": 0.4902, "step": 12835 }, { "epoch": 16.43008, "grad_norm": 1.0434807538986206, "learning_rate": 2.4343737494998e-05, "loss": 0.5691, "step": 12836 }, { "epoch": 16.43136, "grad_norm": 1.035138487815857, "learning_rate": 2.4341736694677873e-05, "loss": 0.5493, "step": 12837 }, { "epoch": 16.43264, "grad_norm": 1.1263927221298218, "learning_rate": 2.4339735894357744e-05, "loss": 0.5908, "step": 12838 }, { "epoch": 16.43392, "grad_norm": 1.0934780836105347, "learning_rate": 2.4337735094037616e-05, "loss": 0.5754, "step": 12839 }, { "epoch": 16.4352, "grad_norm": 1.0011106729507446, "learning_rate": 2.4335734293717488e-05, "loss": 0.4959, "step": 12840 }, { "epoch": 16.43648, "grad_norm": 1.0350295305252075, "learning_rate": 2.433373349339736e-05, "loss": 0.5492, "step": 12841 }, { "epoch": 16.43776, "grad_norm": 1.044190526008606, "learning_rate": 2.4331732693077232e-05, "loss": 0.5304, "step": 12842 }, { "epoch": 16.43904, "grad_norm": 0.9419692158699036, "learning_rate": 2.4329731892757104e-05, "loss": 0.4753, "step": 12843 }, { "epoch": 16.44032, "grad_norm": 0.9518587589263916, "learning_rate": 2.4327731092436976e-05, "loss": 0.4901, "step": 12844 }, { "epoch": 16.4416, "grad_norm": 0.9863883852958679, "learning_rate": 2.4325730292116847e-05, "loss": 0.5236, "step": 12845 }, { "epoch": 16.44288, "grad_norm": 1.0617551803588867, "learning_rate": 2.432372949179672e-05, "loss": 0.5631, "step": 12846 }, { "epoch": 16.44416, "grad_norm": 1.0354115962982178, "learning_rate": 2.432172869147659e-05, "loss": 0.499, "step": 12847 }, { "epoch": 16.44544, "grad_norm": 1.103115439414978, "learning_rate": 2.4319727891156463e-05, "loss": 0.5406, "step": 12848 }, { "epoch": 16.44672, "grad_norm": 0.9908240437507629, "learning_rate": 2.4317727090836338e-05, "loss": 0.478, "step": 12849 }, { "epoch": 16.448, "grad_norm": 1.0611495971679688, "learning_rate": 2.4315726290516207e-05, "loss": 0.5106, "step": 12850 }, { "epoch": 16.44928, "grad_norm": 1.039263367652893, "learning_rate": 2.431372549019608e-05, "loss": 0.5012, "step": 12851 }, { "epoch": 16.45056, "grad_norm": 1.0724480152130127, "learning_rate": 2.431172468987595e-05, "loss": 0.5161, "step": 12852 }, { "epoch": 16.45184, "grad_norm": 1.0709526538848877, "learning_rate": 2.4309723889555826e-05, "loss": 0.4908, "step": 12853 }, { "epoch": 16.45312, "grad_norm": 1.0412678718566895, "learning_rate": 2.4307723089235694e-05, "loss": 0.5438, "step": 12854 }, { "epoch": 16.4544, "grad_norm": 0.9475101828575134, "learning_rate": 2.4305722288915566e-05, "loss": 0.4536, "step": 12855 }, { "epoch": 16.45568, "grad_norm": 0.9541323781013489, "learning_rate": 2.430372148859544e-05, "loss": 0.5116, "step": 12856 }, { "epoch": 16.45696, "grad_norm": 1.0412318706512451, "learning_rate": 2.4301720688275313e-05, "loss": 0.538, "step": 12857 }, { "epoch": 16.45824, "grad_norm": 1.0488194227218628, "learning_rate": 2.429971988795518e-05, "loss": 0.5507, "step": 12858 }, { "epoch": 16.45952, "grad_norm": 0.9851821660995483, "learning_rate": 2.4297719087635053e-05, "loss": 0.5014, "step": 12859 }, { "epoch": 16.4608, "grad_norm": 0.9385454654693604, "learning_rate": 2.429571828731493e-05, "loss": 0.4872, "step": 12860 }, { "epoch": 16.46208, "grad_norm": 1.0448015928268433, "learning_rate": 2.42937174869948e-05, "loss": 0.5212, "step": 12861 }, { "epoch": 16.46336, "grad_norm": 0.9894904494285583, "learning_rate": 2.429171668667467e-05, "loss": 0.5181, "step": 12862 }, { "epoch": 16.46464, "grad_norm": 1.0554355382919312, "learning_rate": 2.4289715886354544e-05, "loss": 0.557, "step": 12863 }, { "epoch": 16.46592, "grad_norm": 1.055113434791565, "learning_rate": 2.4287715086034416e-05, "loss": 0.5408, "step": 12864 }, { "epoch": 16.4672, "grad_norm": 0.9813659191131592, "learning_rate": 2.4285714285714288e-05, "loss": 0.518, "step": 12865 }, { "epoch": 16.46848, "grad_norm": 1.0583056211471558, "learning_rate": 2.4283713485394156e-05, "loss": 0.5171, "step": 12866 }, { "epoch": 16.46976, "grad_norm": 0.9968990087509155, "learning_rate": 2.428171268507403e-05, "loss": 0.5158, "step": 12867 }, { "epoch": 16.47104, "grad_norm": 0.9917585849761963, "learning_rate": 2.4279711884753904e-05, "loss": 0.5027, "step": 12868 }, { "epoch": 16.47232, "grad_norm": 0.9894549250602722, "learning_rate": 2.4277711084433775e-05, "loss": 0.4815, "step": 12869 }, { "epoch": 16.4736, "grad_norm": 1.0116682052612305, "learning_rate": 2.4275710284113647e-05, "loss": 0.5373, "step": 12870 }, { "epoch": 16.47488, "grad_norm": 0.9584359526634216, "learning_rate": 2.427370948379352e-05, "loss": 0.4933, "step": 12871 }, { "epoch": 16.47616, "grad_norm": 0.9404587745666504, "learning_rate": 2.427170868347339e-05, "loss": 0.4687, "step": 12872 }, { "epoch": 16.47744, "grad_norm": 0.9797256588935852, "learning_rate": 2.4269707883153263e-05, "loss": 0.5015, "step": 12873 }, { "epoch": 16.47872, "grad_norm": 1.0498969554901123, "learning_rate": 2.4267707082833135e-05, "loss": 0.5247, "step": 12874 }, { "epoch": 16.48, "grad_norm": 0.9874524474143982, "learning_rate": 2.4265706282513007e-05, "loss": 0.4865, "step": 12875 }, { "epoch": 16.48128, "grad_norm": 0.9777351021766663, "learning_rate": 2.426370548219288e-05, "loss": 0.4579, "step": 12876 }, { "epoch": 16.48256, "grad_norm": 1.0781558752059937, "learning_rate": 2.426170468187275e-05, "loss": 0.5586, "step": 12877 }, { "epoch": 16.48384, "grad_norm": 1.029046893119812, "learning_rate": 2.4259703881552622e-05, "loss": 0.5054, "step": 12878 }, { "epoch": 16.48512, "grad_norm": 0.9987738728523254, "learning_rate": 2.4257703081232494e-05, "loss": 0.4803, "step": 12879 }, { "epoch": 16.4864, "grad_norm": 1.0528926849365234, "learning_rate": 2.4255702280912366e-05, "loss": 0.5177, "step": 12880 }, { "epoch": 16.48768, "grad_norm": 1.0812759399414062, "learning_rate": 2.4253701480592238e-05, "loss": 0.5125, "step": 12881 }, { "epoch": 16.48896, "grad_norm": 0.9843981266021729, "learning_rate": 2.425170068027211e-05, "loss": 0.4758, "step": 12882 }, { "epoch": 16.49024, "grad_norm": 0.9296069145202637, "learning_rate": 2.424969987995198e-05, "loss": 0.5027, "step": 12883 }, { "epoch": 16.49152, "grad_norm": 0.9559636116027832, "learning_rate": 2.4247699079631853e-05, "loss": 0.511, "step": 12884 }, { "epoch": 16.4928, "grad_norm": 1.0256993770599365, "learning_rate": 2.4245698279311725e-05, "loss": 0.5539, "step": 12885 }, { "epoch": 16.49408, "grad_norm": 1.0049717426300049, "learning_rate": 2.4243697478991597e-05, "loss": 0.5549, "step": 12886 }, { "epoch": 16.49536, "grad_norm": 1.005260705947876, "learning_rate": 2.424169667867147e-05, "loss": 0.5164, "step": 12887 }, { "epoch": 16.49664, "grad_norm": 0.9868449568748474, "learning_rate": 2.4239695878351344e-05, "loss": 0.5297, "step": 12888 }, { "epoch": 16.49792, "grad_norm": 1.0604848861694336, "learning_rate": 2.4237695078031213e-05, "loss": 0.5506, "step": 12889 }, { "epoch": 16.4992, "grad_norm": 1.0443841218948364, "learning_rate": 2.4235694277711084e-05, "loss": 0.5574, "step": 12890 }, { "epoch": 16.50048, "grad_norm": 1.0032176971435547, "learning_rate": 2.4233693477390956e-05, "loss": 0.5244, "step": 12891 }, { "epoch": 16.50176, "grad_norm": 1.0291366577148438, "learning_rate": 2.423169267707083e-05, "loss": 0.5231, "step": 12892 }, { "epoch": 16.50304, "grad_norm": 1.0801621675491333, "learning_rate": 2.42296918767507e-05, "loss": 0.5645, "step": 12893 }, { "epoch": 16.50432, "grad_norm": 1.0463597774505615, "learning_rate": 2.4227691076430572e-05, "loss": 0.4742, "step": 12894 }, { "epoch": 16.5056, "grad_norm": 1.0148074626922607, "learning_rate": 2.4225690276110447e-05, "loss": 0.5203, "step": 12895 }, { "epoch": 16.50688, "grad_norm": 0.9906325340270996, "learning_rate": 2.422368947579032e-05, "loss": 0.4931, "step": 12896 }, { "epoch": 16.50816, "grad_norm": 0.9908265471458435, "learning_rate": 2.4221688675470187e-05, "loss": 0.5016, "step": 12897 }, { "epoch": 16.50944, "grad_norm": 0.9555728435516357, "learning_rate": 2.421968787515006e-05, "loss": 0.4763, "step": 12898 }, { "epoch": 16.51072, "grad_norm": 0.9900997281074524, "learning_rate": 2.4217687074829934e-05, "loss": 0.4907, "step": 12899 }, { "epoch": 16.512, "grad_norm": 0.9736559391021729, "learning_rate": 2.4215686274509806e-05, "loss": 0.4814, "step": 12900 }, { "epoch": 16.51328, "grad_norm": 1.0533136129379272, "learning_rate": 2.4213685474189675e-05, "loss": 0.5556, "step": 12901 }, { "epoch": 16.51456, "grad_norm": 0.982058584690094, "learning_rate": 2.421168467386955e-05, "loss": 0.4757, "step": 12902 }, { "epoch": 16.51584, "grad_norm": 1.0134705305099487, "learning_rate": 2.4209683873549422e-05, "loss": 0.5173, "step": 12903 }, { "epoch": 16.51712, "grad_norm": 1.0609053373336792, "learning_rate": 2.4207683073229294e-05, "loss": 0.5624, "step": 12904 }, { "epoch": 16.5184, "grad_norm": 1.0328656435012817, "learning_rate": 2.4205682272909162e-05, "loss": 0.5456, "step": 12905 }, { "epoch": 16.51968, "grad_norm": 0.997097373008728, "learning_rate": 2.4203681472589037e-05, "loss": 0.4958, "step": 12906 }, { "epoch": 16.52096, "grad_norm": 1.0784469842910767, "learning_rate": 2.420168067226891e-05, "loss": 0.4847, "step": 12907 }, { "epoch": 16.52224, "grad_norm": 1.0922346115112305, "learning_rate": 2.419967987194878e-05, "loss": 0.5269, "step": 12908 }, { "epoch": 16.52352, "grad_norm": 1.0385076999664307, "learning_rate": 2.4197679071628653e-05, "loss": 0.472, "step": 12909 }, { "epoch": 16.5248, "grad_norm": 0.9775056838989258, "learning_rate": 2.4195678271308525e-05, "loss": 0.5025, "step": 12910 }, { "epoch": 16.52608, "grad_norm": 1.0673224925994873, "learning_rate": 2.4193677470988397e-05, "loss": 0.5495, "step": 12911 }, { "epoch": 16.52736, "grad_norm": 1.0127609968185425, "learning_rate": 2.419167667066827e-05, "loss": 0.5031, "step": 12912 }, { "epoch": 16.52864, "grad_norm": 0.9412071108818054, "learning_rate": 2.418967587034814e-05, "loss": 0.4704, "step": 12913 }, { "epoch": 16.52992, "grad_norm": 0.9896215200424194, "learning_rate": 2.4187675070028012e-05, "loss": 0.4851, "step": 12914 }, { "epoch": 16.5312, "grad_norm": 1.0404068231582642, "learning_rate": 2.4185674269707884e-05, "loss": 0.5212, "step": 12915 }, { "epoch": 16.53248, "grad_norm": 1.0728304386138916, "learning_rate": 2.4183673469387756e-05, "loss": 0.5685, "step": 12916 }, { "epoch": 16.53376, "grad_norm": 1.0526484251022339, "learning_rate": 2.4181672669067628e-05, "loss": 0.5223, "step": 12917 }, { "epoch": 16.53504, "grad_norm": 1.0103858709335327, "learning_rate": 2.41796718687475e-05, "loss": 0.5132, "step": 12918 }, { "epoch": 16.53632, "grad_norm": 1.032500982284546, "learning_rate": 2.417767106842737e-05, "loss": 0.4861, "step": 12919 }, { "epoch": 16.5376, "grad_norm": 1.0344327688217163, "learning_rate": 2.4175670268107243e-05, "loss": 0.5153, "step": 12920 }, { "epoch": 16.53888, "grad_norm": 1.008581280708313, "learning_rate": 2.4173669467787115e-05, "loss": 0.5462, "step": 12921 }, { "epoch": 16.54016, "grad_norm": 1.00281822681427, "learning_rate": 2.4171668667466987e-05, "loss": 0.5099, "step": 12922 }, { "epoch": 16.54144, "grad_norm": 1.0224007368087769, "learning_rate": 2.4169667867146862e-05, "loss": 0.5216, "step": 12923 }, { "epoch": 16.54272, "grad_norm": 0.9643240571022034, "learning_rate": 2.416766706682673e-05, "loss": 0.5092, "step": 12924 }, { "epoch": 16.544, "grad_norm": 1.013013482093811, "learning_rate": 2.4165666266506603e-05, "loss": 0.5128, "step": 12925 }, { "epoch": 16.545279999999998, "grad_norm": 1.0628739595413208, "learning_rate": 2.4163665466186475e-05, "loss": 0.5326, "step": 12926 }, { "epoch": 16.54656, "grad_norm": 0.9615119099617004, "learning_rate": 2.416166466586635e-05, "loss": 0.5209, "step": 12927 }, { "epoch": 16.54784, "grad_norm": 1.0781883001327515, "learning_rate": 2.415966386554622e-05, "loss": 0.5558, "step": 12928 }, { "epoch": 16.54912, "grad_norm": 0.9772311449050903, "learning_rate": 2.415766306522609e-05, "loss": 0.5129, "step": 12929 }, { "epoch": 16.5504, "grad_norm": 1.0191627740859985, "learning_rate": 2.4155662264905962e-05, "loss": 0.5298, "step": 12930 }, { "epoch": 16.55168, "grad_norm": 1.017059087753296, "learning_rate": 2.4153661464585837e-05, "loss": 0.5203, "step": 12931 }, { "epoch": 16.55296, "grad_norm": 0.9743010997772217, "learning_rate": 2.4151660664265706e-05, "loss": 0.5098, "step": 12932 }, { "epoch": 16.55424, "grad_norm": 1.0007681846618652, "learning_rate": 2.4149659863945578e-05, "loss": 0.4947, "step": 12933 }, { "epoch": 16.55552, "grad_norm": 1.0352661609649658, "learning_rate": 2.4147659063625453e-05, "loss": 0.5854, "step": 12934 }, { "epoch": 16.5568, "grad_norm": 1.0481826066970825, "learning_rate": 2.4145658263305325e-05, "loss": 0.5548, "step": 12935 }, { "epoch": 16.55808, "grad_norm": 0.9723829030990601, "learning_rate": 2.4143657462985193e-05, "loss": 0.5207, "step": 12936 }, { "epoch": 16.55936, "grad_norm": 1.0767929553985596, "learning_rate": 2.4141656662665065e-05, "loss": 0.5427, "step": 12937 }, { "epoch": 16.56064, "grad_norm": 1.0616075992584229, "learning_rate": 2.413965586234494e-05, "loss": 0.5186, "step": 12938 }, { "epoch": 16.56192, "grad_norm": 1.0053772926330566, "learning_rate": 2.4137655062024812e-05, "loss": 0.5074, "step": 12939 }, { "epoch": 16.5632, "grad_norm": 0.963870108127594, "learning_rate": 2.413565426170468e-05, "loss": 0.522, "step": 12940 }, { "epoch": 16.56448, "grad_norm": 1.0813841819763184, "learning_rate": 2.4133653461384556e-05, "loss": 0.523, "step": 12941 }, { "epoch": 16.56576, "grad_norm": 1.0653431415557861, "learning_rate": 2.4131652661064428e-05, "loss": 0.5574, "step": 12942 }, { "epoch": 16.56704, "grad_norm": 0.9429889917373657, "learning_rate": 2.41296518607443e-05, "loss": 0.4838, "step": 12943 }, { "epoch": 16.56832, "grad_norm": 1.001644492149353, "learning_rate": 2.4127651060424168e-05, "loss": 0.5048, "step": 12944 }, { "epoch": 16.5696, "grad_norm": 1.0215190649032593, "learning_rate": 2.4125650260104043e-05, "loss": 0.5575, "step": 12945 }, { "epoch": 16.57088, "grad_norm": 1.084291696548462, "learning_rate": 2.4123649459783915e-05, "loss": 0.583, "step": 12946 }, { "epoch": 16.57216, "grad_norm": 0.9871845245361328, "learning_rate": 2.4121648659463787e-05, "loss": 0.4813, "step": 12947 }, { "epoch": 16.57344, "grad_norm": 1.0017796754837036, "learning_rate": 2.411964785914366e-05, "loss": 0.5364, "step": 12948 }, { "epoch": 16.57472, "grad_norm": 0.9939451813697815, "learning_rate": 2.411764705882353e-05, "loss": 0.529, "step": 12949 }, { "epoch": 16.576, "grad_norm": 1.0428829193115234, "learning_rate": 2.4115646258503403e-05, "loss": 0.5333, "step": 12950 }, { "epoch": 16.577280000000002, "grad_norm": 1.0062199831008911, "learning_rate": 2.4113645458183274e-05, "loss": 0.4992, "step": 12951 }, { "epoch": 16.57856, "grad_norm": 1.0425946712493896, "learning_rate": 2.4111644657863146e-05, "loss": 0.5304, "step": 12952 }, { "epoch": 16.57984, "grad_norm": 1.0212525129318237, "learning_rate": 2.4109643857543018e-05, "loss": 0.52, "step": 12953 }, { "epoch": 16.58112, "grad_norm": 1.069036841392517, "learning_rate": 2.410764305722289e-05, "loss": 0.5671, "step": 12954 }, { "epoch": 16.5824, "grad_norm": 1.0660767555236816, "learning_rate": 2.4105642256902762e-05, "loss": 0.5542, "step": 12955 }, { "epoch": 16.58368, "grad_norm": 1.009103536605835, "learning_rate": 2.4103641456582634e-05, "loss": 0.5098, "step": 12956 }, { "epoch": 16.58496, "grad_norm": 1.0511337518692017, "learning_rate": 2.4101640656262506e-05, "loss": 0.5654, "step": 12957 }, { "epoch": 16.58624, "grad_norm": 0.9621989727020264, "learning_rate": 2.4099639855942377e-05, "loss": 0.4795, "step": 12958 }, { "epoch": 16.58752, "grad_norm": 1.049293041229248, "learning_rate": 2.409763905562225e-05, "loss": 0.5408, "step": 12959 }, { "epoch": 16.5888, "grad_norm": 1.0931252241134644, "learning_rate": 2.409563825530212e-05, "loss": 0.5048, "step": 12960 }, { "epoch": 16.59008, "grad_norm": 1.0250253677368164, "learning_rate": 2.4093637454981993e-05, "loss": 0.5235, "step": 12961 }, { "epoch": 16.59136, "grad_norm": 1.0003156661987305, "learning_rate": 2.4091636654661868e-05, "loss": 0.4948, "step": 12962 }, { "epoch": 16.59264, "grad_norm": 1.0581244230270386, "learning_rate": 2.4089635854341737e-05, "loss": 0.5393, "step": 12963 }, { "epoch": 16.59392, "grad_norm": 1.029645323753357, "learning_rate": 2.408763505402161e-05, "loss": 0.5131, "step": 12964 }, { "epoch": 16.5952, "grad_norm": 0.9942212700843811, "learning_rate": 2.408563425370148e-05, "loss": 0.5036, "step": 12965 }, { "epoch": 16.59648, "grad_norm": 1.1347296237945557, "learning_rate": 2.4083633453381356e-05, "loss": 0.5557, "step": 12966 }, { "epoch": 16.59776, "grad_norm": 1.0200735330581665, "learning_rate": 2.4081632653061224e-05, "loss": 0.5125, "step": 12967 }, { "epoch": 16.59904, "grad_norm": 1.0105204582214355, "learning_rate": 2.4079631852741096e-05, "loss": 0.4875, "step": 12968 }, { "epoch": 16.60032, "grad_norm": 0.9845042824745178, "learning_rate": 2.407763105242097e-05, "loss": 0.4829, "step": 12969 }, { "epoch": 16.6016, "grad_norm": 1.0377755165100098, "learning_rate": 2.4075630252100843e-05, "loss": 0.5135, "step": 12970 }, { "epoch": 16.60288, "grad_norm": 1.0604853630065918, "learning_rate": 2.407362945178071e-05, "loss": 0.5411, "step": 12971 }, { "epoch": 16.60416, "grad_norm": 0.9907283782958984, "learning_rate": 2.4071628651460583e-05, "loss": 0.4962, "step": 12972 }, { "epoch": 16.60544, "grad_norm": 1.0216723680496216, "learning_rate": 2.406962785114046e-05, "loss": 0.5194, "step": 12973 }, { "epoch": 16.60672, "grad_norm": 0.9833100438117981, "learning_rate": 2.406762705082033e-05, "loss": 0.5267, "step": 12974 }, { "epoch": 16.608, "grad_norm": 0.9777483940124512, "learning_rate": 2.40656262505002e-05, "loss": 0.5334, "step": 12975 }, { "epoch": 16.60928, "grad_norm": 1.0363882780075073, "learning_rate": 2.4063625450180074e-05, "loss": 0.5079, "step": 12976 }, { "epoch": 16.61056, "grad_norm": 1.0367538928985596, "learning_rate": 2.4061624649859946e-05, "loss": 0.5033, "step": 12977 }, { "epoch": 16.61184, "grad_norm": 1.020905613899231, "learning_rate": 2.4059623849539818e-05, "loss": 0.5121, "step": 12978 }, { "epoch": 16.61312, "grad_norm": 0.9727598428726196, "learning_rate": 2.4057623049219686e-05, "loss": 0.4858, "step": 12979 }, { "epoch": 16.6144, "grad_norm": 1.0102119445800781, "learning_rate": 2.405562224889956e-05, "loss": 0.5275, "step": 12980 }, { "epoch": 16.61568, "grad_norm": 1.0345350503921509, "learning_rate": 2.4053621448579433e-05, "loss": 0.5482, "step": 12981 }, { "epoch": 16.61696, "grad_norm": 1.0067529678344727, "learning_rate": 2.4051620648259305e-05, "loss": 0.5192, "step": 12982 }, { "epoch": 16.61824, "grad_norm": 0.9604501724243164, "learning_rate": 2.4049619847939174e-05, "loss": 0.4999, "step": 12983 }, { "epoch": 16.61952, "grad_norm": 0.9932731986045837, "learning_rate": 2.404761904761905e-05, "loss": 0.5266, "step": 12984 }, { "epoch": 16.6208, "grad_norm": 0.9318062663078308, "learning_rate": 2.404561824729892e-05, "loss": 0.4916, "step": 12985 }, { "epoch": 16.62208, "grad_norm": 1.0295798778533936, "learning_rate": 2.4043617446978793e-05, "loss": 0.5223, "step": 12986 }, { "epoch": 16.62336, "grad_norm": 1.082562804222107, "learning_rate": 2.4041616646658665e-05, "loss": 0.5536, "step": 12987 }, { "epoch": 16.62464, "grad_norm": 0.9973915219306946, "learning_rate": 2.4039615846338536e-05, "loss": 0.4904, "step": 12988 }, { "epoch": 16.62592, "grad_norm": 0.9800871014595032, "learning_rate": 2.403761504601841e-05, "loss": 0.516, "step": 12989 }, { "epoch": 16.6272, "grad_norm": 1.0225999355316162, "learning_rate": 2.403561424569828e-05, "loss": 0.5226, "step": 12990 }, { "epoch": 16.62848, "grad_norm": 0.9972506761550903, "learning_rate": 2.4033613445378152e-05, "loss": 0.5076, "step": 12991 }, { "epoch": 16.62976, "grad_norm": 1.0018681287765503, "learning_rate": 2.4031612645058024e-05, "loss": 0.5113, "step": 12992 }, { "epoch": 16.63104, "grad_norm": 1.0438956022262573, "learning_rate": 2.4029611844737896e-05, "loss": 0.5164, "step": 12993 }, { "epoch": 16.63232, "grad_norm": 1.0386935472488403, "learning_rate": 2.402761104441777e-05, "loss": 0.5395, "step": 12994 }, { "epoch": 16.6336, "grad_norm": 1.0958852767944336, "learning_rate": 2.402561024409764e-05, "loss": 0.5501, "step": 12995 }, { "epoch": 16.63488, "grad_norm": 0.9334178566932678, "learning_rate": 2.402360944377751e-05, "loss": 0.4921, "step": 12996 }, { "epoch": 16.63616, "grad_norm": 1.008756399154663, "learning_rate": 2.4021608643457383e-05, "loss": 0.5099, "step": 12997 }, { "epoch": 16.63744, "grad_norm": 0.9561886787414551, "learning_rate": 2.401960784313726e-05, "loss": 0.4785, "step": 12998 }, { "epoch": 16.63872, "grad_norm": 1.0232385396957397, "learning_rate": 2.4017607042817127e-05, "loss": 0.5235, "step": 12999 }, { "epoch": 16.64, "grad_norm": 0.9976513981819153, "learning_rate": 2.4015606242497e-05, "loss": 0.5444, "step": 13000 }, { "epoch": 16.64128, "grad_norm": 1.0245577096939087, "learning_rate": 2.4013605442176874e-05, "loss": 0.5077, "step": 13001 }, { "epoch": 16.64256, "grad_norm": 1.0046392679214478, "learning_rate": 2.4011604641856746e-05, "loss": 0.5291, "step": 13002 }, { "epoch": 16.64384, "grad_norm": 1.0134575366973877, "learning_rate": 2.4009603841536614e-05, "loss": 0.5246, "step": 13003 }, { "epoch": 16.64512, "grad_norm": 1.0273959636688232, "learning_rate": 2.4007603041216486e-05, "loss": 0.5474, "step": 13004 }, { "epoch": 16.6464, "grad_norm": 1.09654700756073, "learning_rate": 2.400560224089636e-05, "loss": 0.5338, "step": 13005 }, { "epoch": 16.64768, "grad_norm": 0.9553311467170715, "learning_rate": 2.4003601440576233e-05, "loss": 0.5073, "step": 13006 }, { "epoch": 16.64896, "grad_norm": 1.0136662721633911, "learning_rate": 2.4001600640256102e-05, "loss": 0.4933, "step": 13007 }, { "epoch": 16.65024, "grad_norm": 0.9974261522293091, "learning_rate": 2.3999599839935977e-05, "loss": 0.4961, "step": 13008 }, { "epoch": 16.65152, "grad_norm": 1.0348639488220215, "learning_rate": 2.399759903961585e-05, "loss": 0.5167, "step": 13009 }, { "epoch": 16.6528, "grad_norm": 1.0358352661132812, "learning_rate": 2.399559823929572e-05, "loss": 0.5012, "step": 13010 }, { "epoch": 16.65408, "grad_norm": 0.9727697372436523, "learning_rate": 2.399359743897559e-05, "loss": 0.4838, "step": 13011 }, { "epoch": 16.65536, "grad_norm": 1.0957095623016357, "learning_rate": 2.3991596638655464e-05, "loss": 0.5401, "step": 13012 }, { "epoch": 16.65664, "grad_norm": 1.0502195358276367, "learning_rate": 2.3989595838335336e-05, "loss": 0.5342, "step": 13013 }, { "epoch": 16.65792, "grad_norm": 0.9960721135139465, "learning_rate": 2.3987595038015208e-05, "loss": 0.4892, "step": 13014 }, { "epoch": 16.6592, "grad_norm": 0.9923244714736938, "learning_rate": 2.398559423769508e-05, "loss": 0.5019, "step": 13015 }, { "epoch": 16.66048, "grad_norm": 1.0378888845443726, "learning_rate": 2.3983593437374952e-05, "loss": 0.4938, "step": 13016 }, { "epoch": 16.66176, "grad_norm": 0.9792540669441223, "learning_rate": 2.3981592637054824e-05, "loss": 0.5203, "step": 13017 }, { "epoch": 16.66304, "grad_norm": 1.0740691423416138, "learning_rate": 2.3979591836734696e-05, "loss": 0.4963, "step": 13018 }, { "epoch": 16.66432, "grad_norm": 1.0150448083877563, "learning_rate": 2.3977591036414567e-05, "loss": 0.5391, "step": 13019 }, { "epoch": 16.6656, "grad_norm": 0.9979774355888367, "learning_rate": 2.397559023609444e-05, "loss": 0.5039, "step": 13020 }, { "epoch": 16.66688, "grad_norm": 0.9845618009567261, "learning_rate": 2.397358943577431e-05, "loss": 0.5409, "step": 13021 }, { "epoch": 16.66816, "grad_norm": 0.9184175133705139, "learning_rate": 2.3971588635454183e-05, "loss": 0.5081, "step": 13022 }, { "epoch": 16.66944, "grad_norm": 1.0316041707992554, "learning_rate": 2.3969587835134055e-05, "loss": 0.539, "step": 13023 }, { "epoch": 16.67072, "grad_norm": 1.0392125844955444, "learning_rate": 2.3967587034813927e-05, "loss": 0.5423, "step": 13024 }, { "epoch": 16.672, "grad_norm": 0.9866761565208435, "learning_rate": 2.39655862344938e-05, "loss": 0.5351, "step": 13025 }, { "epoch": 16.67328, "grad_norm": 1.0141420364379883, "learning_rate": 2.396358543417367e-05, "loss": 0.5219, "step": 13026 }, { "epoch": 16.67456, "grad_norm": 1.0628118515014648, "learning_rate": 2.3961584633853542e-05, "loss": 0.5491, "step": 13027 }, { "epoch": 16.67584, "grad_norm": 1.0219802856445312, "learning_rate": 2.3959583833533414e-05, "loss": 0.5423, "step": 13028 }, { "epoch": 16.67712, "grad_norm": 1.0285046100616455, "learning_rate": 2.395758303321329e-05, "loss": 0.5219, "step": 13029 }, { "epoch": 16.6784, "grad_norm": 0.9643558859825134, "learning_rate": 2.3955582232893158e-05, "loss": 0.4952, "step": 13030 }, { "epoch": 16.67968, "grad_norm": 1.1057844161987305, "learning_rate": 2.395358143257303e-05, "loss": 0.5293, "step": 13031 }, { "epoch": 16.68096, "grad_norm": 1.0154691934585571, "learning_rate": 2.39515806322529e-05, "loss": 0.5028, "step": 13032 }, { "epoch": 16.68224, "grad_norm": 0.9593093991279602, "learning_rate": 2.3949579831932777e-05, "loss": 0.4931, "step": 13033 }, { "epoch": 16.68352, "grad_norm": 1.0532431602478027, "learning_rate": 2.3947579031612645e-05, "loss": 0.5077, "step": 13034 }, { "epoch": 16.6848, "grad_norm": 0.959784984588623, "learning_rate": 2.3945578231292517e-05, "loss": 0.5014, "step": 13035 }, { "epoch": 16.68608, "grad_norm": 1.0308352708816528, "learning_rate": 2.3943577430972392e-05, "loss": 0.5016, "step": 13036 }, { "epoch": 16.687359999999998, "grad_norm": 0.9219370484352112, "learning_rate": 2.3941576630652264e-05, "loss": 0.5001, "step": 13037 }, { "epoch": 16.68864, "grad_norm": 0.9760138988494873, "learning_rate": 2.3939575830332133e-05, "loss": 0.5212, "step": 13038 }, { "epoch": 16.68992, "grad_norm": 1.0665395259857178, "learning_rate": 2.3937575030012005e-05, "loss": 0.5621, "step": 13039 }, { "epoch": 16.6912, "grad_norm": 0.9828092455863953, "learning_rate": 2.393557422969188e-05, "loss": 0.5331, "step": 13040 }, { "epoch": 16.69248, "grad_norm": 1.0022916793823242, "learning_rate": 2.393357342937175e-05, "loss": 0.5121, "step": 13041 }, { "epoch": 16.69376, "grad_norm": 0.931938886642456, "learning_rate": 2.393157262905162e-05, "loss": 0.4934, "step": 13042 }, { "epoch": 16.69504, "grad_norm": 1.0225458145141602, "learning_rate": 2.3929571828731492e-05, "loss": 0.5217, "step": 13043 }, { "epoch": 16.69632, "grad_norm": 1.0572493076324463, "learning_rate": 2.3927571028411367e-05, "loss": 0.5562, "step": 13044 }, { "epoch": 16.6976, "grad_norm": 0.9570837616920471, "learning_rate": 2.392557022809124e-05, "loss": 0.4917, "step": 13045 }, { "epoch": 16.69888, "grad_norm": 0.9432888627052307, "learning_rate": 2.3923569427771108e-05, "loss": 0.4917, "step": 13046 }, { "epoch": 16.70016, "grad_norm": 0.9940226078033447, "learning_rate": 2.3921568627450983e-05, "loss": 0.5083, "step": 13047 }, { "epoch": 16.70144, "grad_norm": 0.9966458082199097, "learning_rate": 2.3919567827130855e-05, "loss": 0.5097, "step": 13048 }, { "epoch": 16.70272, "grad_norm": 1.0165756940841675, "learning_rate": 2.3917567026810727e-05, "loss": 0.5624, "step": 13049 }, { "epoch": 16.704, "grad_norm": 1.0315126180648804, "learning_rate": 2.3915566226490595e-05, "loss": 0.5547, "step": 13050 }, { "epoch": 16.70528, "grad_norm": 1.0564510822296143, "learning_rate": 2.391356542617047e-05, "loss": 0.5763, "step": 13051 }, { "epoch": 16.70656, "grad_norm": 1.0285402536392212, "learning_rate": 2.3911564625850342e-05, "loss": 0.5526, "step": 13052 }, { "epoch": 16.70784, "grad_norm": 0.9533161520957947, "learning_rate": 2.3909563825530214e-05, "loss": 0.5125, "step": 13053 }, { "epoch": 16.70912, "grad_norm": 1.0255242586135864, "learning_rate": 2.3907563025210086e-05, "loss": 0.5404, "step": 13054 }, { "epoch": 16.7104, "grad_norm": 0.9758644104003906, "learning_rate": 2.3905562224889958e-05, "loss": 0.4887, "step": 13055 }, { "epoch": 16.71168, "grad_norm": 0.9290502667427063, "learning_rate": 2.390356142456983e-05, "loss": 0.4621, "step": 13056 }, { "epoch": 16.71296, "grad_norm": 1.0182784795761108, "learning_rate": 2.39015606242497e-05, "loss": 0.5575, "step": 13057 }, { "epoch": 16.71424, "grad_norm": 1.022233009338379, "learning_rate": 2.3899559823929573e-05, "loss": 0.5254, "step": 13058 }, { "epoch": 16.71552, "grad_norm": 1.0084254741668701, "learning_rate": 2.3897559023609445e-05, "loss": 0.5153, "step": 13059 }, { "epoch": 16.7168, "grad_norm": 0.9809401631355286, "learning_rate": 2.3895558223289317e-05, "loss": 0.5125, "step": 13060 }, { "epoch": 16.71808, "grad_norm": 0.9509784579277039, "learning_rate": 2.389355742296919e-05, "loss": 0.4663, "step": 13061 }, { "epoch": 16.71936, "grad_norm": 1.1078130006790161, "learning_rate": 2.389155662264906e-05, "loss": 0.5595, "step": 13062 }, { "epoch": 16.72064, "grad_norm": 1.0031615495681763, "learning_rate": 2.3889555822328933e-05, "loss": 0.5288, "step": 13063 }, { "epoch": 16.72192, "grad_norm": 0.9790761470794678, "learning_rate": 2.3887555022008804e-05, "loss": 0.4976, "step": 13064 }, { "epoch": 16.7232, "grad_norm": 0.9689204096794128, "learning_rate": 2.3885554221688676e-05, "loss": 0.5193, "step": 13065 }, { "epoch": 16.72448, "grad_norm": 1.0444097518920898, "learning_rate": 2.3883553421368548e-05, "loss": 0.5472, "step": 13066 }, { "epoch": 16.72576, "grad_norm": 1.0322445631027222, "learning_rate": 2.388155262104842e-05, "loss": 0.5432, "step": 13067 }, { "epoch": 16.72704, "grad_norm": 0.9607560038566589, "learning_rate": 2.3879551820728295e-05, "loss": 0.4699, "step": 13068 }, { "epoch": 16.72832, "grad_norm": 1.0295300483703613, "learning_rate": 2.3877551020408164e-05, "loss": 0.5077, "step": 13069 }, { "epoch": 16.7296, "grad_norm": 1.0513696670532227, "learning_rate": 2.3875550220088036e-05, "loss": 0.5587, "step": 13070 }, { "epoch": 16.73088, "grad_norm": 1.0276129245758057, "learning_rate": 2.3873549419767907e-05, "loss": 0.5124, "step": 13071 }, { "epoch": 16.73216, "grad_norm": 0.9594248533248901, "learning_rate": 2.3871548619447783e-05, "loss": 0.4769, "step": 13072 }, { "epoch": 16.73344, "grad_norm": 1.0701425075531006, "learning_rate": 2.386954781912765e-05, "loss": 0.5459, "step": 13073 }, { "epoch": 16.73472, "grad_norm": 1.044277548789978, "learning_rate": 2.3867547018807523e-05, "loss": 0.5692, "step": 13074 }, { "epoch": 16.736, "grad_norm": 1.0274498462677002, "learning_rate": 2.3865546218487398e-05, "loss": 0.5312, "step": 13075 }, { "epoch": 16.73728, "grad_norm": 1.0216330289840698, "learning_rate": 2.386354541816727e-05, "loss": 0.5514, "step": 13076 }, { "epoch": 16.73856, "grad_norm": 1.0108169317245483, "learning_rate": 2.386154461784714e-05, "loss": 0.537, "step": 13077 }, { "epoch": 16.73984, "grad_norm": 1.015303134918213, "learning_rate": 2.385954381752701e-05, "loss": 0.522, "step": 13078 }, { "epoch": 16.74112, "grad_norm": 1.0391994714736938, "learning_rate": 2.3857543017206886e-05, "loss": 0.5815, "step": 13079 }, { "epoch": 16.7424, "grad_norm": 1.0588417053222656, "learning_rate": 2.3855542216886757e-05, "loss": 0.5296, "step": 13080 }, { "epoch": 16.74368, "grad_norm": 0.9854347109794617, "learning_rate": 2.3853541416566626e-05, "loss": 0.5049, "step": 13081 }, { "epoch": 16.74496, "grad_norm": 1.0034525394439697, "learning_rate": 2.38515406162465e-05, "loss": 0.5144, "step": 13082 }, { "epoch": 16.74624, "grad_norm": 1.0394407510757446, "learning_rate": 2.3849539815926373e-05, "loss": 0.4741, "step": 13083 }, { "epoch": 16.74752, "grad_norm": 0.9602055549621582, "learning_rate": 2.3847539015606245e-05, "loss": 0.5129, "step": 13084 }, { "epoch": 16.7488, "grad_norm": 1.01785147190094, "learning_rate": 2.3845538215286113e-05, "loss": 0.5215, "step": 13085 }, { "epoch": 16.75008, "grad_norm": 1.021774411201477, "learning_rate": 2.384353741496599e-05, "loss": 0.5161, "step": 13086 }, { "epoch": 16.75136, "grad_norm": 1.0218771696090698, "learning_rate": 2.384153661464586e-05, "loss": 0.5066, "step": 13087 }, { "epoch": 16.75264, "grad_norm": 0.9877253174781799, "learning_rate": 2.3839535814325732e-05, "loss": 0.4995, "step": 13088 }, { "epoch": 16.75392, "grad_norm": 1.0353214740753174, "learning_rate": 2.3837535014005604e-05, "loss": 0.4538, "step": 13089 }, { "epoch": 16.7552, "grad_norm": 1.052165150642395, "learning_rate": 2.3835534213685476e-05, "loss": 0.5353, "step": 13090 }, { "epoch": 16.75648, "grad_norm": 0.9751322269439697, "learning_rate": 2.3833533413365348e-05, "loss": 0.4861, "step": 13091 }, { "epoch": 16.75776, "grad_norm": 1.0614244937896729, "learning_rate": 2.383153261304522e-05, "loss": 0.562, "step": 13092 }, { "epoch": 16.75904, "grad_norm": 1.001287817955017, "learning_rate": 2.382953181272509e-05, "loss": 0.5248, "step": 13093 }, { "epoch": 16.76032, "grad_norm": 1.015852451324463, "learning_rate": 2.3827531012404963e-05, "loss": 0.5339, "step": 13094 }, { "epoch": 16.7616, "grad_norm": 1.0684832334518433, "learning_rate": 2.3825530212084835e-05, "loss": 0.5295, "step": 13095 }, { "epoch": 16.76288, "grad_norm": 0.9663676619529724, "learning_rate": 2.3823529411764707e-05, "loss": 0.4559, "step": 13096 }, { "epoch": 16.76416, "grad_norm": 0.949224591255188, "learning_rate": 2.382152861144458e-05, "loss": 0.5483, "step": 13097 }, { "epoch": 16.76544, "grad_norm": 1.01194167137146, "learning_rate": 2.381952781112445e-05, "loss": 0.5269, "step": 13098 }, { "epoch": 16.76672, "grad_norm": 1.0378564596176147, "learning_rate": 2.3817527010804323e-05, "loss": 0.5227, "step": 13099 }, { "epoch": 16.768, "grad_norm": 1.0125080347061157, "learning_rate": 2.3815526210484195e-05, "loss": 0.5238, "step": 13100 }, { "epoch": 16.76928, "grad_norm": 1.0728344917297363, "learning_rate": 2.3813525410164066e-05, "loss": 0.5434, "step": 13101 }, { "epoch": 16.77056, "grad_norm": 0.9800313711166382, "learning_rate": 2.381152460984394e-05, "loss": 0.4794, "step": 13102 }, { "epoch": 16.77184, "grad_norm": 1.0031391382217407, "learning_rate": 2.380952380952381e-05, "loss": 0.505, "step": 13103 }, { "epoch": 16.77312, "grad_norm": 0.9527682662010193, "learning_rate": 2.3807523009203682e-05, "loss": 0.5031, "step": 13104 }, { "epoch": 16.7744, "grad_norm": 1.0169705152511597, "learning_rate": 2.3805522208883554e-05, "loss": 0.4872, "step": 13105 }, { "epoch": 16.77568, "grad_norm": 0.9913395643234253, "learning_rate": 2.3803521408563426e-05, "loss": 0.5042, "step": 13106 }, { "epoch": 16.77696, "grad_norm": 1.0133116245269775, "learning_rate": 2.38015206082433e-05, "loss": 0.5202, "step": 13107 }, { "epoch": 16.77824, "grad_norm": 1.0359431505203247, "learning_rate": 2.379951980792317e-05, "loss": 0.5379, "step": 13108 }, { "epoch": 16.77952, "grad_norm": 0.9904114603996277, "learning_rate": 2.379751900760304e-05, "loss": 0.486, "step": 13109 }, { "epoch": 16.7808, "grad_norm": 0.9596001505851746, "learning_rate": 2.3795518207282913e-05, "loss": 0.4627, "step": 13110 }, { "epoch": 16.78208, "grad_norm": 1.0215861797332764, "learning_rate": 2.379351740696279e-05, "loss": 0.5149, "step": 13111 }, { "epoch": 16.78336, "grad_norm": 1.0180429220199585, "learning_rate": 2.3791516606642657e-05, "loss": 0.5304, "step": 13112 }, { "epoch": 16.78464, "grad_norm": 0.942301332950592, "learning_rate": 2.378951580632253e-05, "loss": 0.5205, "step": 13113 }, { "epoch": 16.78592, "grad_norm": 0.9987159371376038, "learning_rate": 2.3787515006002404e-05, "loss": 0.5267, "step": 13114 }, { "epoch": 16.7872, "grad_norm": 1.0503463745117188, "learning_rate": 2.3785514205682276e-05, "loss": 0.5245, "step": 13115 }, { "epoch": 16.78848, "grad_norm": 1.0316410064697266, "learning_rate": 2.3783513405362144e-05, "loss": 0.5262, "step": 13116 }, { "epoch": 16.78976, "grad_norm": 0.9606108665466309, "learning_rate": 2.3781512605042016e-05, "loss": 0.5006, "step": 13117 }, { "epoch": 16.79104, "grad_norm": 1.021532654762268, "learning_rate": 2.377951180472189e-05, "loss": 0.5441, "step": 13118 }, { "epoch": 16.79232, "grad_norm": 0.9799433350563049, "learning_rate": 2.3777511004401763e-05, "loss": 0.5082, "step": 13119 }, { "epoch": 16.7936, "grad_norm": 0.9904650449752808, "learning_rate": 2.3775510204081632e-05, "loss": 0.55, "step": 13120 }, { "epoch": 16.79488, "grad_norm": 1.006087303161621, "learning_rate": 2.3773509403761507e-05, "loss": 0.5431, "step": 13121 }, { "epoch": 16.79616, "grad_norm": 1.0185682773590088, "learning_rate": 2.377150860344138e-05, "loss": 0.5338, "step": 13122 }, { "epoch": 16.79744, "grad_norm": 1.039462924003601, "learning_rate": 2.376950780312125e-05, "loss": 0.5525, "step": 13123 }, { "epoch": 16.79872, "grad_norm": 1.048966884613037, "learning_rate": 2.376750700280112e-05, "loss": 0.5458, "step": 13124 }, { "epoch": 16.8, "grad_norm": 1.0096321105957031, "learning_rate": 2.3765506202480994e-05, "loss": 0.5175, "step": 13125 }, { "epoch": 16.80128, "grad_norm": 1.0281754732131958, "learning_rate": 2.3763505402160866e-05, "loss": 0.54, "step": 13126 }, { "epoch": 16.80256, "grad_norm": 1.0194934606552124, "learning_rate": 2.3761504601840738e-05, "loss": 0.5144, "step": 13127 }, { "epoch": 16.80384, "grad_norm": 1.0557507276535034, "learning_rate": 2.375950380152061e-05, "loss": 0.537, "step": 13128 }, { "epoch": 16.80512, "grad_norm": 0.926566481590271, "learning_rate": 2.3757503001200482e-05, "loss": 0.4973, "step": 13129 }, { "epoch": 16.8064, "grad_norm": 1.016688585281372, "learning_rate": 2.3755502200880354e-05, "loss": 0.524, "step": 13130 }, { "epoch": 16.80768, "grad_norm": 1.0396153926849365, "learning_rate": 2.3753501400560226e-05, "loss": 0.5364, "step": 13131 }, { "epoch": 16.80896, "grad_norm": 0.9891914129257202, "learning_rate": 2.3751500600240097e-05, "loss": 0.5359, "step": 13132 }, { "epoch": 16.81024, "grad_norm": 1.0839710235595703, "learning_rate": 2.374949979991997e-05, "loss": 0.5577, "step": 13133 }, { "epoch": 16.81152, "grad_norm": 0.9661899209022522, "learning_rate": 2.374749899959984e-05, "loss": 0.4845, "step": 13134 }, { "epoch": 16.8128, "grad_norm": 0.9867687821388245, "learning_rate": 2.3745498199279713e-05, "loss": 0.5227, "step": 13135 }, { "epoch": 16.81408, "grad_norm": 1.0412554740905762, "learning_rate": 2.3743497398959585e-05, "loss": 0.5154, "step": 13136 }, { "epoch": 16.81536, "grad_norm": 0.9606292843818665, "learning_rate": 2.3741496598639457e-05, "loss": 0.4987, "step": 13137 }, { "epoch": 16.81664, "grad_norm": 0.9731855392456055, "learning_rate": 2.373949579831933e-05, "loss": 0.4813, "step": 13138 }, { "epoch": 16.81792, "grad_norm": 1.0362776517868042, "learning_rate": 2.37374949979992e-05, "loss": 0.5173, "step": 13139 }, { "epoch": 16.8192, "grad_norm": 1.0115454196929932, "learning_rate": 2.3735494197679072e-05, "loss": 0.4787, "step": 13140 }, { "epoch": 16.82048, "grad_norm": 1.0400524139404297, "learning_rate": 2.3733493397358944e-05, "loss": 0.5218, "step": 13141 }, { "epoch": 16.82176, "grad_norm": 1.0309675931930542, "learning_rate": 2.373149259703882e-05, "loss": 0.5008, "step": 13142 }, { "epoch": 16.82304, "grad_norm": 0.9864956140518188, "learning_rate": 2.3729491796718688e-05, "loss": 0.4848, "step": 13143 }, { "epoch": 16.82432, "grad_norm": 0.9826066493988037, "learning_rate": 2.372749099639856e-05, "loss": 0.4918, "step": 13144 }, { "epoch": 16.8256, "grad_norm": 1.0326327085494995, "learning_rate": 2.372549019607843e-05, "loss": 0.5138, "step": 13145 }, { "epoch": 16.82688, "grad_norm": 0.9792665839195251, "learning_rate": 2.3723489395758307e-05, "loss": 0.5088, "step": 13146 }, { "epoch": 16.82816, "grad_norm": 0.9995619654655457, "learning_rate": 2.3721488595438175e-05, "loss": 0.5337, "step": 13147 }, { "epoch": 16.829439999999998, "grad_norm": 1.030884027481079, "learning_rate": 2.3719487795118047e-05, "loss": 0.541, "step": 13148 }, { "epoch": 16.83072, "grad_norm": 1.0245966911315918, "learning_rate": 2.3717486994797922e-05, "loss": 0.4978, "step": 13149 }, { "epoch": 16.832, "grad_norm": 1.035640835762024, "learning_rate": 2.3715486194477794e-05, "loss": 0.5688, "step": 13150 }, { "epoch": 16.83328, "grad_norm": 1.0522053241729736, "learning_rate": 2.3713485394157663e-05, "loss": 0.5527, "step": 13151 }, { "epoch": 16.83456, "grad_norm": 1.0623236894607544, "learning_rate": 2.3711484593837535e-05, "loss": 0.5676, "step": 13152 }, { "epoch": 16.83584, "grad_norm": 0.9654913544654846, "learning_rate": 2.370948379351741e-05, "loss": 0.5085, "step": 13153 }, { "epoch": 16.83712, "grad_norm": 0.9618136286735535, "learning_rate": 2.370748299319728e-05, "loss": 0.5171, "step": 13154 }, { "epoch": 16.8384, "grad_norm": 1.009698748588562, "learning_rate": 2.370548219287715e-05, "loss": 0.4982, "step": 13155 }, { "epoch": 16.83968, "grad_norm": 1.0301358699798584, "learning_rate": 2.3703481392557022e-05, "loss": 0.5463, "step": 13156 }, { "epoch": 16.84096, "grad_norm": 1.008836269378662, "learning_rate": 2.3701480592236897e-05, "loss": 0.4709, "step": 13157 }, { "epoch": 16.84224, "grad_norm": 1.0866061449050903, "learning_rate": 2.369947979191677e-05, "loss": 0.5407, "step": 13158 }, { "epoch": 16.84352, "grad_norm": 1.0506260395050049, "learning_rate": 2.3697478991596638e-05, "loss": 0.5297, "step": 13159 }, { "epoch": 16.8448, "grad_norm": 1.0603467226028442, "learning_rate": 2.3695478191276513e-05, "loss": 0.5368, "step": 13160 }, { "epoch": 16.84608, "grad_norm": 0.9975579977035522, "learning_rate": 2.3693477390956385e-05, "loss": 0.5138, "step": 13161 }, { "epoch": 16.84736, "grad_norm": 1.0625996589660645, "learning_rate": 2.3691476590636257e-05, "loss": 0.5583, "step": 13162 }, { "epoch": 16.84864, "grad_norm": 1.0248199701309204, "learning_rate": 2.3689475790316125e-05, "loss": 0.5415, "step": 13163 }, { "epoch": 16.84992, "grad_norm": 1.0083729028701782, "learning_rate": 2.3687474989996e-05, "loss": 0.5248, "step": 13164 }, { "epoch": 16.8512, "grad_norm": 1.0073386430740356, "learning_rate": 2.3685474189675872e-05, "loss": 0.523, "step": 13165 }, { "epoch": 16.85248, "grad_norm": 1.023363471031189, "learning_rate": 2.3683473389355744e-05, "loss": 0.5001, "step": 13166 }, { "epoch": 16.85376, "grad_norm": 0.9759731888771057, "learning_rate": 2.3681472589035616e-05, "loss": 0.5141, "step": 13167 }, { "epoch": 16.85504, "grad_norm": 1.0707515478134155, "learning_rate": 2.3679471788715488e-05, "loss": 0.5621, "step": 13168 }, { "epoch": 16.85632, "grad_norm": 1.0116777420043945, "learning_rate": 2.367747098839536e-05, "loss": 0.5197, "step": 13169 }, { "epoch": 16.8576, "grad_norm": 1.0196561813354492, "learning_rate": 2.367547018807523e-05, "loss": 0.5688, "step": 13170 }, { "epoch": 16.85888, "grad_norm": 1.0545904636383057, "learning_rate": 2.3673469387755103e-05, "loss": 0.5704, "step": 13171 }, { "epoch": 16.86016, "grad_norm": 1.0500190258026123, "learning_rate": 2.3671468587434975e-05, "loss": 0.5012, "step": 13172 }, { "epoch": 16.86144, "grad_norm": 0.9819349646568298, "learning_rate": 2.3669467787114847e-05, "loss": 0.5261, "step": 13173 }, { "epoch": 16.86272, "grad_norm": 0.993928074836731, "learning_rate": 2.366746698679472e-05, "loss": 0.5433, "step": 13174 }, { "epoch": 16.864, "grad_norm": 0.9953247308731079, "learning_rate": 2.366546618647459e-05, "loss": 0.5125, "step": 13175 }, { "epoch": 16.86528, "grad_norm": 1.0047765970230103, "learning_rate": 2.3663465386154463e-05, "loss": 0.5015, "step": 13176 }, { "epoch": 16.86656, "grad_norm": 1.0829325914382935, "learning_rate": 2.3661464585834334e-05, "loss": 0.5706, "step": 13177 }, { "epoch": 16.86784, "grad_norm": 1.0480440855026245, "learning_rate": 2.3659463785514206e-05, "loss": 0.5776, "step": 13178 }, { "epoch": 16.86912, "grad_norm": 1.0479168891906738, "learning_rate": 2.3657462985194078e-05, "loss": 0.5047, "step": 13179 }, { "epoch": 16.8704, "grad_norm": 0.9871065616607666, "learning_rate": 2.365546218487395e-05, "loss": 0.4929, "step": 13180 }, { "epoch": 16.87168, "grad_norm": 1.0528372526168823, "learning_rate": 2.3653461384553825e-05, "loss": 0.5289, "step": 13181 }, { "epoch": 16.87296, "grad_norm": 1.0125209093093872, "learning_rate": 2.3651460584233694e-05, "loss": 0.5539, "step": 13182 }, { "epoch": 16.87424, "grad_norm": 1.0450409650802612, "learning_rate": 2.3649459783913565e-05, "loss": 0.5374, "step": 13183 }, { "epoch": 16.87552, "grad_norm": 1.061608910560608, "learning_rate": 2.3647458983593437e-05, "loss": 0.52, "step": 13184 }, { "epoch": 16.8768, "grad_norm": 1.019355058670044, "learning_rate": 2.3645458183273313e-05, "loss": 0.499, "step": 13185 }, { "epoch": 16.87808, "grad_norm": 0.9892388582229614, "learning_rate": 2.364345738295318e-05, "loss": 0.5473, "step": 13186 }, { "epoch": 16.87936, "grad_norm": 1.001673936843872, "learning_rate": 2.3641456582633053e-05, "loss": 0.4966, "step": 13187 }, { "epoch": 16.88064, "grad_norm": 1.078865647315979, "learning_rate": 2.3639455782312928e-05, "loss": 0.5499, "step": 13188 }, { "epoch": 16.88192, "grad_norm": 0.9952927827835083, "learning_rate": 2.36374549819928e-05, "loss": 0.5131, "step": 13189 }, { "epoch": 16.8832, "grad_norm": 0.9789214730262756, "learning_rate": 2.363545418167267e-05, "loss": 0.513, "step": 13190 }, { "epoch": 16.88448, "grad_norm": 1.0156183242797852, "learning_rate": 2.363345338135254e-05, "loss": 0.5402, "step": 13191 }, { "epoch": 16.88576, "grad_norm": 1.0390909910202026, "learning_rate": 2.3631452581032416e-05, "loss": 0.571, "step": 13192 }, { "epoch": 16.88704, "grad_norm": 1.0474461317062378, "learning_rate": 2.3629451780712287e-05, "loss": 0.5456, "step": 13193 }, { "epoch": 16.88832, "grad_norm": 1.0926357507705688, "learning_rate": 2.3627450980392156e-05, "loss": 0.51, "step": 13194 }, { "epoch": 16.8896, "grad_norm": 1.0534778833389282, "learning_rate": 2.362545018007203e-05, "loss": 0.5292, "step": 13195 }, { "epoch": 16.89088, "grad_norm": 1.0174009799957275, "learning_rate": 2.3623449379751903e-05, "loss": 0.5295, "step": 13196 }, { "epoch": 16.89216, "grad_norm": 1.018369436264038, "learning_rate": 2.3621448579431775e-05, "loss": 0.5362, "step": 13197 }, { "epoch": 16.89344, "grad_norm": 1.008921504020691, "learning_rate": 2.3619447779111643e-05, "loss": 0.5035, "step": 13198 }, { "epoch": 16.89472, "grad_norm": 1.040116548538208, "learning_rate": 2.361744697879152e-05, "loss": 0.5335, "step": 13199 }, { "epoch": 16.896, "grad_norm": 1.0576521158218384, "learning_rate": 2.361544617847139e-05, "loss": 0.5574, "step": 13200 }, { "epoch": 16.89728, "grad_norm": 1.048539161682129, "learning_rate": 2.3613445378151262e-05, "loss": 0.5106, "step": 13201 }, { "epoch": 16.89856, "grad_norm": 1.0253418684005737, "learning_rate": 2.3611444577831134e-05, "loss": 0.5217, "step": 13202 }, { "epoch": 16.89984, "grad_norm": 1.0338140726089478, "learning_rate": 2.3609443777511006e-05, "loss": 0.5126, "step": 13203 }, { "epoch": 16.90112, "grad_norm": 1.0068703889846802, "learning_rate": 2.3607442977190878e-05, "loss": 0.5127, "step": 13204 }, { "epoch": 16.9024, "grad_norm": 0.9766025543212891, "learning_rate": 2.360544217687075e-05, "loss": 0.4986, "step": 13205 }, { "epoch": 16.90368, "grad_norm": 1.0029348134994507, "learning_rate": 2.360344137655062e-05, "loss": 0.509, "step": 13206 }, { "epoch": 16.90496, "grad_norm": 0.9763104915618896, "learning_rate": 2.3601440576230493e-05, "loss": 0.4673, "step": 13207 }, { "epoch": 16.90624, "grad_norm": 0.9850437045097351, "learning_rate": 2.3599439775910365e-05, "loss": 0.544, "step": 13208 }, { "epoch": 16.90752, "grad_norm": 1.063561201095581, "learning_rate": 2.3597438975590237e-05, "loss": 0.5269, "step": 13209 }, { "epoch": 16.9088, "grad_norm": 1.0321555137634277, "learning_rate": 2.359543817527011e-05, "loss": 0.5038, "step": 13210 }, { "epoch": 16.91008, "grad_norm": 1.0148087739944458, "learning_rate": 2.359343737494998e-05, "loss": 0.5437, "step": 13211 }, { "epoch": 16.91136, "grad_norm": 0.9977015852928162, "learning_rate": 2.3591436574629853e-05, "loss": 0.5425, "step": 13212 }, { "epoch": 16.91264, "grad_norm": 1.0292987823486328, "learning_rate": 2.3589435774309725e-05, "loss": 0.5345, "step": 13213 }, { "epoch": 16.91392, "grad_norm": 0.9989826679229736, "learning_rate": 2.3587434973989596e-05, "loss": 0.5079, "step": 13214 }, { "epoch": 16.9152, "grad_norm": 0.987629771232605, "learning_rate": 2.3585434173669468e-05, "loss": 0.5053, "step": 13215 }, { "epoch": 16.91648, "grad_norm": 1.0139482021331787, "learning_rate": 2.358343337334934e-05, "loss": 0.5206, "step": 13216 }, { "epoch": 16.91776, "grad_norm": 0.9933033585548401, "learning_rate": 2.3581432573029212e-05, "loss": 0.5358, "step": 13217 }, { "epoch": 16.91904, "grad_norm": 1.0258675813674927, "learning_rate": 2.3579431772709084e-05, "loss": 0.5204, "step": 13218 }, { "epoch": 16.92032, "grad_norm": 1.0290369987487793, "learning_rate": 2.3577430972388956e-05, "loss": 0.5262, "step": 13219 }, { "epoch": 16.9216, "grad_norm": 1.0307328701019287, "learning_rate": 2.357543017206883e-05, "loss": 0.5321, "step": 13220 }, { "epoch": 16.92288, "grad_norm": 1.0345529317855835, "learning_rate": 2.35734293717487e-05, "loss": 0.5526, "step": 13221 }, { "epoch": 16.92416, "grad_norm": 1.028792381286621, "learning_rate": 2.357142857142857e-05, "loss": 0.5265, "step": 13222 }, { "epoch": 16.925440000000002, "grad_norm": 1.0457243919372559, "learning_rate": 2.3569427771108443e-05, "loss": 0.5508, "step": 13223 }, { "epoch": 16.92672, "grad_norm": 1.011638879776001, "learning_rate": 2.356742697078832e-05, "loss": 0.5209, "step": 13224 }, { "epoch": 16.928, "grad_norm": 1.0729031562805176, "learning_rate": 2.3565426170468187e-05, "loss": 0.5676, "step": 13225 }, { "epoch": 16.92928, "grad_norm": 0.988837718963623, "learning_rate": 2.356342537014806e-05, "loss": 0.5399, "step": 13226 }, { "epoch": 16.93056, "grad_norm": 0.9590950608253479, "learning_rate": 2.3561424569827934e-05, "loss": 0.4938, "step": 13227 }, { "epoch": 16.93184, "grad_norm": 0.9690542817115784, "learning_rate": 2.3559423769507806e-05, "loss": 0.4957, "step": 13228 }, { "epoch": 16.93312, "grad_norm": 1.0184234380722046, "learning_rate": 2.3557422969187674e-05, "loss": 0.5152, "step": 13229 }, { "epoch": 16.9344, "grad_norm": 1.0626813173294067, "learning_rate": 2.3555422168867546e-05, "loss": 0.558, "step": 13230 }, { "epoch": 16.93568, "grad_norm": 0.9686869978904724, "learning_rate": 2.355342136854742e-05, "loss": 0.5276, "step": 13231 }, { "epoch": 16.93696, "grad_norm": 0.9972003102302551, "learning_rate": 2.3551420568227293e-05, "loss": 0.5422, "step": 13232 }, { "epoch": 16.93824, "grad_norm": 1.024174690246582, "learning_rate": 2.3549419767907162e-05, "loss": 0.5273, "step": 13233 }, { "epoch": 16.93952, "grad_norm": 1.0208815336227417, "learning_rate": 2.3547418967587037e-05, "loss": 0.5221, "step": 13234 }, { "epoch": 16.9408, "grad_norm": 0.9970613718032837, "learning_rate": 2.354541816726691e-05, "loss": 0.5001, "step": 13235 }, { "epoch": 16.94208, "grad_norm": 1.0984002351760864, "learning_rate": 2.354341736694678e-05, "loss": 0.5119, "step": 13236 }, { "epoch": 16.94336, "grad_norm": 1.0116389989852905, "learning_rate": 2.354141656662665e-05, "loss": 0.541, "step": 13237 }, { "epoch": 16.94464, "grad_norm": 1.0177326202392578, "learning_rate": 2.3539415766306524e-05, "loss": 0.5292, "step": 13238 }, { "epoch": 16.94592, "grad_norm": 0.9551138877868652, "learning_rate": 2.3537414965986396e-05, "loss": 0.4805, "step": 13239 }, { "epoch": 16.9472, "grad_norm": 1.0099093914031982, "learning_rate": 2.3535414165666268e-05, "loss": 0.5158, "step": 13240 }, { "epoch": 16.94848, "grad_norm": 1.0355970859527588, "learning_rate": 2.353341336534614e-05, "loss": 0.517, "step": 13241 }, { "epoch": 16.94976, "grad_norm": 1.0377235412597656, "learning_rate": 2.3531412565026012e-05, "loss": 0.5256, "step": 13242 }, { "epoch": 16.95104, "grad_norm": 0.973661482334137, "learning_rate": 2.3529411764705884e-05, "loss": 0.4841, "step": 13243 }, { "epoch": 16.95232, "grad_norm": 0.950306236743927, "learning_rate": 2.3527410964385756e-05, "loss": 0.5096, "step": 13244 }, { "epoch": 16.9536, "grad_norm": 1.070381999015808, "learning_rate": 2.3525410164065627e-05, "loss": 0.5756, "step": 13245 }, { "epoch": 16.95488, "grad_norm": 1.0381155014038086, "learning_rate": 2.35234093637455e-05, "loss": 0.5439, "step": 13246 }, { "epoch": 16.95616, "grad_norm": 1.0218827724456787, "learning_rate": 2.352140856342537e-05, "loss": 0.5185, "step": 13247 }, { "epoch": 16.95744, "grad_norm": 1.0366841554641724, "learning_rate": 2.3519407763105243e-05, "loss": 0.5542, "step": 13248 }, { "epoch": 16.95872, "grad_norm": 0.9674044251441956, "learning_rate": 2.3517406962785115e-05, "loss": 0.4658, "step": 13249 }, { "epoch": 16.96, "grad_norm": 0.9855802655220032, "learning_rate": 2.3515406162464987e-05, "loss": 0.5267, "step": 13250 }, { "epoch": 16.96128, "grad_norm": 1.0081998109817505, "learning_rate": 2.351340536214486e-05, "loss": 0.5058, "step": 13251 }, { "epoch": 16.96256, "grad_norm": 0.9830219149589539, "learning_rate": 2.351140456182473e-05, "loss": 0.4796, "step": 13252 }, { "epoch": 16.96384, "grad_norm": 0.97882479429245, "learning_rate": 2.3509403761504602e-05, "loss": 0.4897, "step": 13253 }, { "epoch": 16.96512, "grad_norm": 1.0170930624008179, "learning_rate": 2.3507402961184474e-05, "loss": 0.513, "step": 13254 }, { "epoch": 16.9664, "grad_norm": 1.030259132385254, "learning_rate": 2.350540216086435e-05, "loss": 0.4973, "step": 13255 }, { "epoch": 16.96768, "grad_norm": 1.0129339694976807, "learning_rate": 2.3503401360544218e-05, "loss": 0.4936, "step": 13256 }, { "epoch": 16.96896, "grad_norm": 1.0687109231948853, "learning_rate": 2.350140056022409e-05, "loss": 0.5226, "step": 13257 }, { "epoch": 16.97024, "grad_norm": 1.0357649326324463, "learning_rate": 2.349939975990396e-05, "loss": 0.5376, "step": 13258 }, { "epoch": 16.97152, "grad_norm": 1.0332026481628418, "learning_rate": 2.3497398959583837e-05, "loss": 0.4773, "step": 13259 }, { "epoch": 16.9728, "grad_norm": 0.9495790600776672, "learning_rate": 2.3495398159263705e-05, "loss": 0.4737, "step": 13260 }, { "epoch": 16.97408, "grad_norm": 1.0137715339660645, "learning_rate": 2.3493397358943577e-05, "loss": 0.5459, "step": 13261 }, { "epoch": 16.97536, "grad_norm": 0.9835166931152344, "learning_rate": 2.349139655862345e-05, "loss": 0.5019, "step": 13262 }, { "epoch": 16.97664, "grad_norm": 1.000338077545166, "learning_rate": 2.3489395758303324e-05, "loss": 0.5223, "step": 13263 }, { "epoch": 16.97792, "grad_norm": 0.9843599796295166, "learning_rate": 2.3487394957983193e-05, "loss": 0.4935, "step": 13264 }, { "epoch": 16.9792, "grad_norm": 1.0332714319229126, "learning_rate": 2.3485394157663065e-05, "loss": 0.5263, "step": 13265 }, { "epoch": 16.98048, "grad_norm": 1.008151650428772, "learning_rate": 2.348339335734294e-05, "loss": 0.5458, "step": 13266 }, { "epoch": 16.98176, "grad_norm": 1.0616765022277832, "learning_rate": 2.348139255702281e-05, "loss": 0.5493, "step": 13267 }, { "epoch": 16.98304, "grad_norm": 1.007568359375, "learning_rate": 2.347939175670268e-05, "loss": 0.5289, "step": 13268 }, { "epoch": 16.98432, "grad_norm": 1.0306227207183838, "learning_rate": 2.3477390956382552e-05, "loss": 0.5618, "step": 13269 }, { "epoch": 16.9856, "grad_norm": 1.0478519201278687, "learning_rate": 2.3475390156062427e-05, "loss": 0.5575, "step": 13270 }, { "epoch": 16.98688, "grad_norm": 0.9916083216667175, "learning_rate": 2.34733893557423e-05, "loss": 0.5117, "step": 13271 }, { "epoch": 16.98816, "grad_norm": 1.0166401863098145, "learning_rate": 2.3471388555422168e-05, "loss": 0.5053, "step": 13272 }, { "epoch": 16.98944, "grad_norm": 0.9944577217102051, "learning_rate": 2.3469387755102043e-05, "loss": 0.5063, "step": 13273 }, { "epoch": 16.99072, "grad_norm": 1.0171867609024048, "learning_rate": 2.3467386954781915e-05, "loss": 0.5275, "step": 13274 }, { "epoch": 16.992, "grad_norm": 1.002347469329834, "learning_rate": 2.3465386154461786e-05, "loss": 0.504, "step": 13275 }, { "epoch": 16.99328, "grad_norm": 0.9947987794876099, "learning_rate": 2.3463385354141655e-05, "loss": 0.4975, "step": 13276 }, { "epoch": 16.99456, "grad_norm": 1.0772300958633423, "learning_rate": 2.346138455382153e-05, "loss": 0.5841, "step": 13277 }, { "epoch": 16.99584, "grad_norm": 1.0796716213226318, "learning_rate": 2.3459383753501402e-05, "loss": 0.5478, "step": 13278 }, { "epoch": 16.99712, "grad_norm": 1.0109952688217163, "learning_rate": 2.3457382953181274e-05, "loss": 0.497, "step": 13279 }, { "epoch": 16.9984, "grad_norm": 1.00393545627594, "learning_rate": 2.3455382152861146e-05, "loss": 0.4905, "step": 13280 }, { "epoch": 16.99968, "grad_norm": 1.0317285060882568, "learning_rate": 2.3453381352541018e-05, "loss": 0.5052, "step": 13281 }, { "epoch": 17.00096, "grad_norm": 2.4376354217529297, "learning_rate": 2.345138055222089e-05, "loss": 0.999, "step": 13282 }, { "epoch": 17.00224, "grad_norm": 0.9784718155860901, "learning_rate": 2.344937975190076e-05, "loss": 0.4915, "step": 13283 }, { "epoch": 17.00352, "grad_norm": 1.0082448720932007, "learning_rate": 2.3447378951580633e-05, "loss": 0.4813, "step": 13284 }, { "epoch": 17.0048, "grad_norm": 1.0144582986831665, "learning_rate": 2.3445378151260505e-05, "loss": 0.5056, "step": 13285 }, { "epoch": 17.00608, "grad_norm": 0.9649600386619568, "learning_rate": 2.3443377350940377e-05, "loss": 0.4901, "step": 13286 }, { "epoch": 17.00736, "grad_norm": 0.9816195368766785, "learning_rate": 2.344137655062025e-05, "loss": 0.491, "step": 13287 }, { "epoch": 17.00864, "grad_norm": 0.9808714985847473, "learning_rate": 2.343937575030012e-05, "loss": 0.4976, "step": 13288 }, { "epoch": 17.00992, "grad_norm": 0.9497635364532471, "learning_rate": 2.3437374949979992e-05, "loss": 0.4637, "step": 13289 }, { "epoch": 17.0112, "grad_norm": 1.0557515621185303, "learning_rate": 2.3435374149659864e-05, "loss": 0.5267, "step": 13290 }, { "epoch": 17.01248, "grad_norm": 1.0407460927963257, "learning_rate": 2.3433373349339736e-05, "loss": 0.5172, "step": 13291 }, { "epoch": 17.01376, "grad_norm": 1.0044418573379517, "learning_rate": 2.3431372549019608e-05, "loss": 0.5482, "step": 13292 }, { "epoch": 17.01504, "grad_norm": 1.036288857460022, "learning_rate": 2.342937174869948e-05, "loss": 0.54, "step": 13293 }, { "epoch": 17.01632, "grad_norm": 1.0211939811706543, "learning_rate": 2.3427370948379355e-05, "loss": 0.4912, "step": 13294 }, { "epoch": 17.0176, "grad_norm": 0.9974328279495239, "learning_rate": 2.3425370148059224e-05, "loss": 0.4945, "step": 13295 }, { "epoch": 17.01888, "grad_norm": 1.0395445823669434, "learning_rate": 2.3423369347739095e-05, "loss": 0.5235, "step": 13296 }, { "epoch": 17.02016, "grad_norm": 1.0685923099517822, "learning_rate": 2.3421368547418967e-05, "loss": 0.5011, "step": 13297 }, { "epoch": 17.02144, "grad_norm": 0.9720517992973328, "learning_rate": 2.3419367747098843e-05, "loss": 0.4791, "step": 13298 }, { "epoch": 17.02272, "grad_norm": 1.0298750400543213, "learning_rate": 2.341736694677871e-05, "loss": 0.5083, "step": 13299 }, { "epoch": 17.024, "grad_norm": 1.0151759386062622, "learning_rate": 2.3415366146458583e-05, "loss": 0.4656, "step": 13300 }, { "epoch": 17.02528, "grad_norm": 1.1034287214279175, "learning_rate": 2.3413365346138458e-05, "loss": 0.5559, "step": 13301 }, { "epoch": 17.02656, "grad_norm": 0.9982373714447021, "learning_rate": 2.341136454581833e-05, "loss": 0.4586, "step": 13302 }, { "epoch": 17.02784, "grad_norm": 1.0372883081436157, "learning_rate": 2.34093637454982e-05, "loss": 0.4949, "step": 13303 }, { "epoch": 17.02912, "grad_norm": 1.0765658617019653, "learning_rate": 2.340736294517807e-05, "loss": 0.5206, "step": 13304 }, { "epoch": 17.0304, "grad_norm": 1.0272283554077148, "learning_rate": 2.3405362144857946e-05, "loss": 0.4676, "step": 13305 }, { "epoch": 17.03168, "grad_norm": 1.091143012046814, "learning_rate": 2.3403361344537817e-05, "loss": 0.5431, "step": 13306 }, { "epoch": 17.03296, "grad_norm": 0.9813713431358337, "learning_rate": 2.3401360544217686e-05, "loss": 0.4549, "step": 13307 }, { "epoch": 17.03424, "grad_norm": 0.9769755601882935, "learning_rate": 2.339935974389756e-05, "loss": 0.468, "step": 13308 }, { "epoch": 17.03552, "grad_norm": 1.0127575397491455, "learning_rate": 2.3397358943577433e-05, "loss": 0.5391, "step": 13309 }, { "epoch": 17.0368, "grad_norm": 1.0613768100738525, "learning_rate": 2.3395358143257305e-05, "loss": 0.5372, "step": 13310 }, { "epoch": 17.03808, "grad_norm": 0.9925457239151001, "learning_rate": 2.3393357342937173e-05, "loss": 0.4997, "step": 13311 }, { "epoch": 17.03936, "grad_norm": 0.9370602965354919, "learning_rate": 2.339135654261705e-05, "loss": 0.474, "step": 13312 }, { "epoch": 17.04064, "grad_norm": 1.0538114309310913, "learning_rate": 2.338935574229692e-05, "loss": 0.4834, "step": 13313 }, { "epoch": 17.04192, "grad_norm": 1.0021767616271973, "learning_rate": 2.3387354941976792e-05, "loss": 0.503, "step": 13314 }, { "epoch": 17.0432, "grad_norm": 1.0035548210144043, "learning_rate": 2.3385354141656664e-05, "loss": 0.4991, "step": 13315 }, { "epoch": 17.04448, "grad_norm": 1.0278247594833374, "learning_rate": 2.3383353341336536e-05, "loss": 0.4885, "step": 13316 }, { "epoch": 17.04576, "grad_norm": 1.0547192096710205, "learning_rate": 2.3381352541016408e-05, "loss": 0.5255, "step": 13317 }, { "epoch": 17.04704, "grad_norm": 1.0186964273452759, "learning_rate": 2.337935174069628e-05, "loss": 0.5052, "step": 13318 }, { "epoch": 17.04832, "grad_norm": 1.0712857246398926, "learning_rate": 2.337735094037615e-05, "loss": 0.4951, "step": 13319 }, { "epoch": 17.0496, "grad_norm": 1.0552232265472412, "learning_rate": 2.3375350140056023e-05, "loss": 0.5391, "step": 13320 }, { "epoch": 17.05088, "grad_norm": 0.9899628758430481, "learning_rate": 2.3373349339735895e-05, "loss": 0.4996, "step": 13321 }, { "epoch": 17.05216, "grad_norm": 1.085460901260376, "learning_rate": 2.3371348539415767e-05, "loss": 0.5247, "step": 13322 }, { "epoch": 17.05344, "grad_norm": 1.0540425777435303, "learning_rate": 2.336934773909564e-05, "loss": 0.5785, "step": 13323 }, { "epoch": 17.05472, "grad_norm": 1.0214723348617554, "learning_rate": 2.336734693877551e-05, "loss": 0.5305, "step": 13324 }, { "epoch": 17.056, "grad_norm": 1.0059022903442383, "learning_rate": 2.3365346138455383e-05, "loss": 0.487, "step": 13325 }, { "epoch": 17.05728, "grad_norm": 1.063796877861023, "learning_rate": 2.3363345338135258e-05, "loss": 0.5328, "step": 13326 }, { "epoch": 17.05856, "grad_norm": 1.0171808004379272, "learning_rate": 2.3361344537815126e-05, "loss": 0.4785, "step": 13327 }, { "epoch": 17.05984, "grad_norm": 1.0254714488983154, "learning_rate": 2.3359343737494998e-05, "loss": 0.501, "step": 13328 }, { "epoch": 17.06112, "grad_norm": 1.0111802816390991, "learning_rate": 2.335734293717487e-05, "loss": 0.5116, "step": 13329 }, { "epoch": 17.0624, "grad_norm": 1.0492706298828125, "learning_rate": 2.3355342136854745e-05, "loss": 0.5129, "step": 13330 }, { "epoch": 17.06368, "grad_norm": 1.0869669914245605, "learning_rate": 2.3353341336534614e-05, "loss": 0.4696, "step": 13331 }, { "epoch": 17.06496, "grad_norm": 1.0306788682937622, "learning_rate": 2.3351340536214486e-05, "loss": 0.4977, "step": 13332 }, { "epoch": 17.06624, "grad_norm": 0.9910968542098999, "learning_rate": 2.334933973589436e-05, "loss": 0.4801, "step": 13333 }, { "epoch": 17.06752, "grad_norm": 0.9870019555091858, "learning_rate": 2.3347338935574233e-05, "loss": 0.4426, "step": 13334 }, { "epoch": 17.0688, "grad_norm": 1.0062360763549805, "learning_rate": 2.33453381352541e-05, "loss": 0.5187, "step": 13335 }, { "epoch": 17.07008, "grad_norm": 1.0343031883239746, "learning_rate": 2.3343337334933973e-05, "loss": 0.5119, "step": 13336 }, { "epoch": 17.07136, "grad_norm": 1.0217961072921753, "learning_rate": 2.334133653461385e-05, "loss": 0.5541, "step": 13337 }, { "epoch": 17.07264, "grad_norm": 1.0784296989440918, "learning_rate": 2.333933573429372e-05, "loss": 0.5345, "step": 13338 }, { "epoch": 17.07392, "grad_norm": 1.029133677482605, "learning_rate": 2.333733493397359e-05, "loss": 0.5253, "step": 13339 }, { "epoch": 17.0752, "grad_norm": 1.0548806190490723, "learning_rate": 2.3335334133653464e-05, "loss": 0.5053, "step": 13340 }, { "epoch": 17.07648, "grad_norm": 1.0157487392425537, "learning_rate": 2.3333333333333336e-05, "loss": 0.4751, "step": 13341 }, { "epoch": 17.07776, "grad_norm": 1.0173470973968506, "learning_rate": 2.3331332533013208e-05, "loss": 0.4752, "step": 13342 }, { "epoch": 17.07904, "grad_norm": 0.9745169281959534, "learning_rate": 2.3329331732693076e-05, "loss": 0.486, "step": 13343 }, { "epoch": 17.08032, "grad_norm": 0.9814679622650146, "learning_rate": 2.332733093237295e-05, "loss": 0.5124, "step": 13344 }, { "epoch": 17.0816, "grad_norm": 0.9977136850357056, "learning_rate": 2.3325330132052823e-05, "loss": 0.4984, "step": 13345 }, { "epoch": 17.08288, "grad_norm": 0.9431197643280029, "learning_rate": 2.3323329331732695e-05, "loss": 0.4411, "step": 13346 }, { "epoch": 17.08416, "grad_norm": 1.0017660856246948, "learning_rate": 2.3321328531412567e-05, "loss": 0.5211, "step": 13347 }, { "epoch": 17.08544, "grad_norm": 0.9581298232078552, "learning_rate": 2.331932773109244e-05, "loss": 0.5069, "step": 13348 }, { "epoch": 17.08672, "grad_norm": 1.0260916948318481, "learning_rate": 2.331732693077231e-05, "loss": 0.5305, "step": 13349 }, { "epoch": 17.088, "grad_norm": 1.032827615737915, "learning_rate": 2.3315326130452183e-05, "loss": 0.5041, "step": 13350 }, { "epoch": 17.08928, "grad_norm": 1.0370451211929321, "learning_rate": 2.3313325330132054e-05, "loss": 0.4872, "step": 13351 }, { "epoch": 17.09056, "grad_norm": 1.0834031105041504, "learning_rate": 2.3311324529811926e-05, "loss": 0.5399, "step": 13352 }, { "epoch": 17.09184, "grad_norm": 1.0523909330368042, "learning_rate": 2.3309323729491798e-05, "loss": 0.4924, "step": 13353 }, { "epoch": 17.09312, "grad_norm": 1.0100966691970825, "learning_rate": 2.330732292917167e-05, "loss": 0.5309, "step": 13354 }, { "epoch": 17.0944, "grad_norm": 1.0047094821929932, "learning_rate": 2.3305322128851542e-05, "loss": 0.4504, "step": 13355 }, { "epoch": 17.09568, "grad_norm": 1.0484347343444824, "learning_rate": 2.3303321328531414e-05, "loss": 0.5239, "step": 13356 }, { "epoch": 17.09696, "grad_norm": 1.0204596519470215, "learning_rate": 2.3301320528211286e-05, "loss": 0.5148, "step": 13357 }, { "epoch": 17.09824, "grad_norm": 0.9914649128913879, "learning_rate": 2.3299319727891157e-05, "loss": 0.46, "step": 13358 }, { "epoch": 17.09952, "grad_norm": 1.0509700775146484, "learning_rate": 2.329731892757103e-05, "loss": 0.4891, "step": 13359 }, { "epoch": 17.1008, "grad_norm": 1.0409367084503174, "learning_rate": 2.32953181272509e-05, "loss": 0.5121, "step": 13360 }, { "epoch": 17.10208, "grad_norm": 1.0810407400131226, "learning_rate": 2.3293317326930776e-05, "loss": 0.4976, "step": 13361 }, { "epoch": 17.10336, "grad_norm": 1.0771454572677612, "learning_rate": 2.3291316526610645e-05, "loss": 0.4679, "step": 13362 }, { "epoch": 17.10464, "grad_norm": 1.1189974546432495, "learning_rate": 2.3289315726290517e-05, "loss": 0.572, "step": 13363 }, { "epoch": 17.10592, "grad_norm": 1.017249584197998, "learning_rate": 2.328731492597039e-05, "loss": 0.5068, "step": 13364 }, { "epoch": 17.1072, "grad_norm": 0.990572988986969, "learning_rate": 2.3285314125650264e-05, "loss": 0.4766, "step": 13365 }, { "epoch": 17.10848, "grad_norm": 1.0602976083755493, "learning_rate": 2.3283313325330132e-05, "loss": 0.4712, "step": 13366 }, { "epoch": 17.10976, "grad_norm": 1.0583609342575073, "learning_rate": 2.3281312525010004e-05, "loss": 0.5533, "step": 13367 }, { "epoch": 17.11104, "grad_norm": 1.0422335863113403, "learning_rate": 2.327931172468988e-05, "loss": 0.5399, "step": 13368 }, { "epoch": 17.11232, "grad_norm": 0.9954720139503479, "learning_rate": 2.327731092436975e-05, "loss": 0.4705, "step": 13369 }, { "epoch": 17.1136, "grad_norm": 1.0625905990600586, "learning_rate": 2.327531012404962e-05, "loss": 0.5102, "step": 13370 }, { "epoch": 17.11488, "grad_norm": 1.03073251247406, "learning_rate": 2.327330932372949e-05, "loss": 0.5117, "step": 13371 }, { "epoch": 17.11616, "grad_norm": 0.9936673045158386, "learning_rate": 2.3271308523409367e-05, "loss": 0.5368, "step": 13372 }, { "epoch": 17.11744, "grad_norm": 1.014151692390442, "learning_rate": 2.326930772308924e-05, "loss": 0.4523, "step": 13373 }, { "epoch": 17.11872, "grad_norm": 1.0433018207550049, "learning_rate": 2.3267306922769107e-05, "loss": 0.5037, "step": 13374 }, { "epoch": 17.12, "grad_norm": 1.0825138092041016, "learning_rate": 2.326530612244898e-05, "loss": 0.5088, "step": 13375 }, { "epoch": 17.12128, "grad_norm": 1.0675740242004395, "learning_rate": 2.3263305322128854e-05, "loss": 0.5024, "step": 13376 }, { "epoch": 17.12256, "grad_norm": 1.002501130104065, "learning_rate": 2.3261304521808726e-05, "loss": 0.4748, "step": 13377 }, { "epoch": 17.12384, "grad_norm": 1.042352557182312, "learning_rate": 2.3259303721488595e-05, "loss": 0.4869, "step": 13378 }, { "epoch": 17.12512, "grad_norm": 1.0172719955444336, "learning_rate": 2.325730292116847e-05, "loss": 0.4917, "step": 13379 }, { "epoch": 17.1264, "grad_norm": 0.9971911907196045, "learning_rate": 2.325530212084834e-05, "loss": 0.4934, "step": 13380 }, { "epoch": 17.12768, "grad_norm": 0.9829162359237671, "learning_rate": 2.3253301320528213e-05, "loss": 0.5106, "step": 13381 }, { "epoch": 17.12896, "grad_norm": 1.0853031873703003, "learning_rate": 2.3251300520208082e-05, "loss": 0.5248, "step": 13382 }, { "epoch": 17.13024, "grad_norm": 1.0231006145477295, "learning_rate": 2.3249299719887957e-05, "loss": 0.5012, "step": 13383 }, { "epoch": 17.13152, "grad_norm": 1.0397844314575195, "learning_rate": 2.324729891956783e-05, "loss": 0.5402, "step": 13384 }, { "epoch": 17.1328, "grad_norm": 1.0465348958969116, "learning_rate": 2.32452981192477e-05, "loss": 0.5115, "step": 13385 }, { "epoch": 17.13408, "grad_norm": 1.0421459674835205, "learning_rate": 2.3243297318927573e-05, "loss": 0.4913, "step": 13386 }, { "epoch": 17.13536, "grad_norm": 0.991023063659668, "learning_rate": 2.3241296518607445e-05, "loss": 0.5016, "step": 13387 }, { "epoch": 17.13664, "grad_norm": 1.0662450790405273, "learning_rate": 2.3239295718287316e-05, "loss": 0.5424, "step": 13388 }, { "epoch": 17.13792, "grad_norm": 1.0365347862243652, "learning_rate": 2.323729491796719e-05, "loss": 0.534, "step": 13389 }, { "epoch": 17.1392, "grad_norm": 1.0068080425262451, "learning_rate": 2.323529411764706e-05, "loss": 0.4582, "step": 13390 }, { "epoch": 17.14048, "grad_norm": 0.9789687991142273, "learning_rate": 2.3233293317326932e-05, "loss": 0.4683, "step": 13391 }, { "epoch": 17.14176, "grad_norm": 0.9680244326591492, "learning_rate": 2.3231292517006804e-05, "loss": 0.47, "step": 13392 }, { "epoch": 17.14304, "grad_norm": 1.0483875274658203, "learning_rate": 2.3229291716686676e-05, "loss": 0.5163, "step": 13393 }, { "epoch": 17.14432, "grad_norm": 0.998321533203125, "learning_rate": 2.3227290916366548e-05, "loss": 0.5058, "step": 13394 }, { "epoch": 17.1456, "grad_norm": 1.0452215671539307, "learning_rate": 2.322529011604642e-05, "loss": 0.5377, "step": 13395 }, { "epoch": 17.14688, "grad_norm": 1.0813066959381104, "learning_rate": 2.322328931572629e-05, "loss": 0.4744, "step": 13396 }, { "epoch": 17.14816, "grad_norm": 1.0850361585617065, "learning_rate": 2.3221288515406163e-05, "loss": 0.5534, "step": 13397 }, { "epoch": 17.14944, "grad_norm": 1.034781575202942, "learning_rate": 2.3219287715086035e-05, "loss": 0.5279, "step": 13398 }, { "epoch": 17.15072, "grad_norm": 1.0639398097991943, "learning_rate": 2.3217286914765907e-05, "loss": 0.5009, "step": 13399 }, { "epoch": 17.152, "grad_norm": 1.0536409616470337, "learning_rate": 2.3215286114445782e-05, "loss": 0.5132, "step": 13400 }, { "epoch": 17.15328, "grad_norm": 1.0476963520050049, "learning_rate": 2.321328531412565e-05, "loss": 0.5241, "step": 13401 }, { "epoch": 17.15456, "grad_norm": 1.0148664712905884, "learning_rate": 2.3211284513805522e-05, "loss": 0.4843, "step": 13402 }, { "epoch": 17.15584, "grad_norm": 0.9978867769241333, "learning_rate": 2.3209283713485394e-05, "loss": 0.5003, "step": 13403 }, { "epoch": 17.15712, "grad_norm": 1.0880756378173828, "learning_rate": 2.320728291316527e-05, "loss": 0.5188, "step": 13404 }, { "epoch": 17.1584, "grad_norm": 1.064671516418457, "learning_rate": 2.3205282112845138e-05, "loss": 0.5898, "step": 13405 }, { "epoch": 17.15968, "grad_norm": 1.0149790048599243, "learning_rate": 2.320328131252501e-05, "loss": 0.4774, "step": 13406 }, { "epoch": 17.16096, "grad_norm": 0.992540180683136, "learning_rate": 2.3201280512204885e-05, "loss": 0.4796, "step": 13407 }, { "epoch": 17.16224, "grad_norm": 1.0193244218826294, "learning_rate": 2.3199279711884757e-05, "loss": 0.503, "step": 13408 }, { "epoch": 17.16352, "grad_norm": 1.0211812257766724, "learning_rate": 2.3197278911564625e-05, "loss": 0.5344, "step": 13409 }, { "epoch": 17.1648, "grad_norm": 0.9962586164474487, "learning_rate": 2.3195278111244497e-05, "loss": 0.477, "step": 13410 }, { "epoch": 17.16608, "grad_norm": 1.0423258543014526, "learning_rate": 2.3193277310924373e-05, "loss": 0.5273, "step": 13411 }, { "epoch": 17.16736, "grad_norm": 1.0666061639785767, "learning_rate": 2.3191276510604244e-05, "loss": 0.4738, "step": 13412 }, { "epoch": 17.16864, "grad_norm": 1.0447877645492554, "learning_rate": 2.3189275710284113e-05, "loss": 0.5042, "step": 13413 }, { "epoch": 17.16992, "grad_norm": 1.041363000869751, "learning_rate": 2.3187274909963988e-05, "loss": 0.5165, "step": 13414 }, { "epoch": 17.1712, "grad_norm": 0.9531150460243225, "learning_rate": 2.318527410964386e-05, "loss": 0.4656, "step": 13415 }, { "epoch": 17.17248, "grad_norm": 0.975502610206604, "learning_rate": 2.3183273309323732e-05, "loss": 0.4421, "step": 13416 }, { "epoch": 17.17376, "grad_norm": 1.0709878206253052, "learning_rate": 2.31812725090036e-05, "loss": 0.5181, "step": 13417 }, { "epoch": 17.17504, "grad_norm": 1.0228393077850342, "learning_rate": 2.3179271708683476e-05, "loss": 0.488, "step": 13418 }, { "epoch": 17.17632, "grad_norm": 0.9908788800239563, "learning_rate": 2.3177270908363347e-05, "loss": 0.5135, "step": 13419 }, { "epoch": 17.1776, "grad_norm": 1.0089093446731567, "learning_rate": 2.317527010804322e-05, "loss": 0.4801, "step": 13420 }, { "epoch": 17.17888, "grad_norm": 1.0426452159881592, "learning_rate": 2.317326930772309e-05, "loss": 0.5075, "step": 13421 }, { "epoch": 17.18016, "grad_norm": 1.021355390548706, "learning_rate": 2.3171268507402963e-05, "loss": 0.4907, "step": 13422 }, { "epoch": 17.18144, "grad_norm": 1.0270839929580688, "learning_rate": 2.3169267707082835e-05, "loss": 0.4559, "step": 13423 }, { "epoch": 17.18272, "grad_norm": 0.9954252243041992, "learning_rate": 2.3167266906762707e-05, "loss": 0.5026, "step": 13424 }, { "epoch": 17.184, "grad_norm": 0.9519053101539612, "learning_rate": 2.316526610644258e-05, "loss": 0.465, "step": 13425 }, { "epoch": 17.18528, "grad_norm": 0.966871976852417, "learning_rate": 2.316326530612245e-05, "loss": 0.4922, "step": 13426 }, { "epoch": 17.18656, "grad_norm": 1.1276544332504272, "learning_rate": 2.3161264505802322e-05, "loss": 0.5007, "step": 13427 }, { "epoch": 17.18784, "grad_norm": 1.0328774452209473, "learning_rate": 2.3159263705482194e-05, "loss": 0.4846, "step": 13428 }, { "epoch": 17.18912, "grad_norm": 1.0253841876983643, "learning_rate": 2.3157262905162066e-05, "loss": 0.48, "step": 13429 }, { "epoch": 17.1904, "grad_norm": 1.1044909954071045, "learning_rate": 2.3155262104841938e-05, "loss": 0.4854, "step": 13430 }, { "epoch": 17.19168, "grad_norm": 1.1031279563903809, "learning_rate": 2.315326130452181e-05, "loss": 0.5431, "step": 13431 }, { "epoch": 17.19296, "grad_norm": 1.0845425128936768, "learning_rate": 2.315126050420168e-05, "loss": 0.5446, "step": 13432 }, { "epoch": 17.19424, "grad_norm": 1.0719091892242432, "learning_rate": 2.3149259703881553e-05, "loss": 0.525, "step": 13433 }, { "epoch": 17.19552, "grad_norm": 1.115065097808838, "learning_rate": 2.3147258903561425e-05, "loss": 0.5301, "step": 13434 }, { "epoch": 17.1968, "grad_norm": 1.0398420095443726, "learning_rate": 2.3145258103241297e-05, "loss": 0.5075, "step": 13435 }, { "epoch": 17.19808, "grad_norm": 0.9958305358886719, "learning_rate": 2.314325730292117e-05, "loss": 0.4951, "step": 13436 }, { "epoch": 17.19936, "grad_norm": 1.0259790420532227, "learning_rate": 2.314125650260104e-05, "loss": 0.4844, "step": 13437 }, { "epoch": 17.20064, "grad_norm": 1.1417039632797241, "learning_rate": 2.3139255702280913e-05, "loss": 0.5455, "step": 13438 }, { "epoch": 17.20192, "grad_norm": 1.0283827781677246, "learning_rate": 2.3137254901960788e-05, "loss": 0.4931, "step": 13439 }, { "epoch": 17.2032, "grad_norm": 0.9671483635902405, "learning_rate": 2.3135254101640656e-05, "loss": 0.4683, "step": 13440 }, { "epoch": 17.20448, "grad_norm": 0.9877151250839233, "learning_rate": 2.3133253301320528e-05, "loss": 0.4812, "step": 13441 }, { "epoch": 17.20576, "grad_norm": 1.0239356756210327, "learning_rate": 2.31312525010004e-05, "loss": 0.4782, "step": 13442 }, { "epoch": 17.20704, "grad_norm": 1.1128602027893066, "learning_rate": 2.3129251700680275e-05, "loss": 0.5034, "step": 13443 }, { "epoch": 17.20832, "grad_norm": 1.0178550481796265, "learning_rate": 2.3127250900360144e-05, "loss": 0.5063, "step": 13444 }, { "epoch": 17.209600000000002, "grad_norm": 1.0738757848739624, "learning_rate": 2.3125250100040016e-05, "loss": 0.5127, "step": 13445 }, { "epoch": 17.21088, "grad_norm": 1.0246436595916748, "learning_rate": 2.312324929971989e-05, "loss": 0.5128, "step": 13446 }, { "epoch": 17.21216, "grad_norm": 1.0808448791503906, "learning_rate": 2.3121248499399763e-05, "loss": 0.5116, "step": 13447 }, { "epoch": 17.21344, "grad_norm": 1.0280914306640625, "learning_rate": 2.311924769907963e-05, "loss": 0.5175, "step": 13448 }, { "epoch": 17.21472, "grad_norm": 1.0112377405166626, "learning_rate": 2.3117246898759503e-05, "loss": 0.4666, "step": 13449 }, { "epoch": 17.216, "grad_norm": 1.0588704347610474, "learning_rate": 2.311524609843938e-05, "loss": 0.4821, "step": 13450 }, { "epoch": 17.21728, "grad_norm": 1.005056381225586, "learning_rate": 2.311324529811925e-05, "loss": 0.5253, "step": 13451 }, { "epoch": 17.21856, "grad_norm": 0.979955792427063, "learning_rate": 2.311124449779912e-05, "loss": 0.4832, "step": 13452 }, { "epoch": 17.21984, "grad_norm": 0.989325225353241, "learning_rate": 2.3109243697478994e-05, "loss": 0.514, "step": 13453 }, { "epoch": 17.22112, "grad_norm": 1.1037169694900513, "learning_rate": 2.3107242897158866e-05, "loss": 0.5589, "step": 13454 }, { "epoch": 17.2224, "grad_norm": 1.101965069770813, "learning_rate": 2.3105242096838738e-05, "loss": 0.5082, "step": 13455 }, { "epoch": 17.22368, "grad_norm": 1.0133678913116455, "learning_rate": 2.3103241296518606e-05, "loss": 0.5253, "step": 13456 }, { "epoch": 17.22496, "grad_norm": 0.9965882301330566, "learning_rate": 2.310124049619848e-05, "loss": 0.4828, "step": 13457 }, { "epoch": 17.22624, "grad_norm": 1.0856853723526, "learning_rate": 2.3099239695878353e-05, "loss": 0.533, "step": 13458 }, { "epoch": 17.22752, "grad_norm": 0.9921742081642151, "learning_rate": 2.3097238895558225e-05, "loss": 0.5221, "step": 13459 }, { "epoch": 17.2288, "grad_norm": 1.0070276260375977, "learning_rate": 2.3095238095238097e-05, "loss": 0.4822, "step": 13460 }, { "epoch": 17.23008, "grad_norm": 1.0681109428405762, "learning_rate": 2.309323729491797e-05, "loss": 0.5146, "step": 13461 }, { "epoch": 17.23136, "grad_norm": 1.0588202476501465, "learning_rate": 2.309123649459784e-05, "loss": 0.484, "step": 13462 }, { "epoch": 17.23264, "grad_norm": 1.072527527809143, "learning_rate": 2.3089235694277712e-05, "loss": 0.5442, "step": 13463 }, { "epoch": 17.23392, "grad_norm": 1.0427266359329224, "learning_rate": 2.3087234893957584e-05, "loss": 0.4935, "step": 13464 }, { "epoch": 17.2352, "grad_norm": 1.0477319955825806, "learning_rate": 2.3085234093637456e-05, "loss": 0.4786, "step": 13465 }, { "epoch": 17.23648, "grad_norm": 1.1097187995910645, "learning_rate": 2.3083233293317328e-05, "loss": 0.5198, "step": 13466 }, { "epoch": 17.23776, "grad_norm": 1.0430151224136353, "learning_rate": 2.30812324929972e-05, "loss": 0.5408, "step": 13467 }, { "epoch": 17.23904, "grad_norm": 1.084143042564392, "learning_rate": 2.3079231692677072e-05, "loss": 0.5259, "step": 13468 }, { "epoch": 17.24032, "grad_norm": 1.034934163093567, "learning_rate": 2.3077230892356944e-05, "loss": 0.5068, "step": 13469 }, { "epoch": 17.2416, "grad_norm": 0.9962584972381592, "learning_rate": 2.3075230092036815e-05, "loss": 0.4753, "step": 13470 }, { "epoch": 17.24288, "grad_norm": 1.0584473609924316, "learning_rate": 2.3073229291716687e-05, "loss": 0.4369, "step": 13471 }, { "epoch": 17.24416, "grad_norm": 1.0417580604553223, "learning_rate": 2.307122849139656e-05, "loss": 0.547, "step": 13472 }, { "epoch": 17.24544, "grad_norm": 0.9787613749504089, "learning_rate": 2.306922769107643e-05, "loss": 0.4825, "step": 13473 }, { "epoch": 17.24672, "grad_norm": 0.97577303647995, "learning_rate": 2.3067226890756306e-05, "loss": 0.4792, "step": 13474 }, { "epoch": 17.248, "grad_norm": 1.0570039749145508, "learning_rate": 2.3065226090436175e-05, "loss": 0.4757, "step": 13475 }, { "epoch": 17.24928, "grad_norm": 0.9857975840568542, "learning_rate": 2.3063225290116047e-05, "loss": 0.541, "step": 13476 }, { "epoch": 17.25056, "grad_norm": 1.0609130859375, "learning_rate": 2.306122448979592e-05, "loss": 0.547, "step": 13477 }, { "epoch": 17.25184, "grad_norm": 1.0602465867996216, "learning_rate": 2.3059223689475794e-05, "loss": 0.5039, "step": 13478 }, { "epoch": 17.25312, "grad_norm": 1.033346176147461, "learning_rate": 2.3057222889155662e-05, "loss": 0.4925, "step": 13479 }, { "epoch": 17.2544, "grad_norm": 1.0470037460327148, "learning_rate": 2.3055222088835534e-05, "loss": 0.5261, "step": 13480 }, { "epoch": 17.25568, "grad_norm": 1.0750315189361572, "learning_rate": 2.305322128851541e-05, "loss": 0.5026, "step": 13481 }, { "epoch": 17.25696, "grad_norm": 1.0455011129379272, "learning_rate": 2.305122048819528e-05, "loss": 0.4955, "step": 13482 }, { "epoch": 17.25824, "grad_norm": 0.9804509282112122, "learning_rate": 2.304921968787515e-05, "loss": 0.4493, "step": 13483 }, { "epoch": 17.25952, "grad_norm": 1.0425822734832764, "learning_rate": 2.304721888755502e-05, "loss": 0.5265, "step": 13484 }, { "epoch": 17.2608, "grad_norm": 1.0288803577423096, "learning_rate": 2.3045218087234897e-05, "loss": 0.5335, "step": 13485 }, { "epoch": 17.26208, "grad_norm": 1.0013490915298462, "learning_rate": 2.304321728691477e-05, "loss": 0.5016, "step": 13486 }, { "epoch": 17.26336, "grad_norm": 0.9813631176948547, "learning_rate": 2.3041216486594637e-05, "loss": 0.4945, "step": 13487 }, { "epoch": 17.26464, "grad_norm": 1.004841923713684, "learning_rate": 2.303921568627451e-05, "loss": 0.5357, "step": 13488 }, { "epoch": 17.26592, "grad_norm": 1.007587194442749, "learning_rate": 2.3037214885954384e-05, "loss": 0.4896, "step": 13489 }, { "epoch": 17.2672, "grad_norm": 1.0714519023895264, "learning_rate": 2.3035214085634256e-05, "loss": 0.5111, "step": 13490 }, { "epoch": 17.26848, "grad_norm": 1.0348763465881348, "learning_rate": 2.3033213285314124e-05, "loss": 0.4855, "step": 13491 }, { "epoch": 17.26976, "grad_norm": 1.0198546648025513, "learning_rate": 2.3031212484994e-05, "loss": 0.5217, "step": 13492 }, { "epoch": 17.27104, "grad_norm": 1.0204856395721436, "learning_rate": 2.302921168467387e-05, "loss": 0.5029, "step": 13493 }, { "epoch": 17.27232, "grad_norm": 1.0740776062011719, "learning_rate": 2.3027210884353743e-05, "loss": 0.5458, "step": 13494 }, { "epoch": 17.2736, "grad_norm": 1.0617952346801758, "learning_rate": 2.3025210084033612e-05, "loss": 0.5018, "step": 13495 }, { "epoch": 17.27488, "grad_norm": 0.9945394992828369, "learning_rate": 2.3023209283713487e-05, "loss": 0.5174, "step": 13496 }, { "epoch": 17.27616, "grad_norm": 1.033125400543213, "learning_rate": 2.302120848339336e-05, "loss": 0.5133, "step": 13497 }, { "epoch": 17.27744, "grad_norm": 1.0384323596954346, "learning_rate": 2.301920768307323e-05, "loss": 0.5283, "step": 13498 }, { "epoch": 17.27872, "grad_norm": 1.0113089084625244, "learning_rate": 2.3017206882753103e-05, "loss": 0.4772, "step": 13499 }, { "epoch": 17.28, "grad_norm": 1.0131186246871948, "learning_rate": 2.3015206082432975e-05, "loss": 0.4923, "step": 13500 }, { "epoch": 17.28128, "grad_norm": 1.0085313320159912, "learning_rate": 2.3013205282112846e-05, "loss": 0.525, "step": 13501 }, { "epoch": 17.28256, "grad_norm": 0.9978111386299133, "learning_rate": 2.3011204481792718e-05, "loss": 0.5065, "step": 13502 }, { "epoch": 17.28384, "grad_norm": 1.0002354383468628, "learning_rate": 2.300920368147259e-05, "loss": 0.4846, "step": 13503 }, { "epoch": 17.28512, "grad_norm": 1.0522061586380005, "learning_rate": 2.3007202881152462e-05, "loss": 0.5395, "step": 13504 }, { "epoch": 17.2864, "grad_norm": 1.0922776460647583, "learning_rate": 2.3005202080832334e-05, "loss": 0.5106, "step": 13505 }, { "epoch": 17.28768, "grad_norm": 1.027981162071228, "learning_rate": 2.3003201280512206e-05, "loss": 0.4703, "step": 13506 }, { "epoch": 17.28896, "grad_norm": 1.0434186458587646, "learning_rate": 2.3001200480192078e-05, "loss": 0.5184, "step": 13507 }, { "epoch": 17.29024, "grad_norm": 1.038582682609558, "learning_rate": 2.299919967987195e-05, "loss": 0.5318, "step": 13508 }, { "epoch": 17.29152, "grad_norm": 1.0460221767425537, "learning_rate": 2.299719887955182e-05, "loss": 0.5173, "step": 13509 }, { "epoch": 17.2928, "grad_norm": 1.0172295570373535, "learning_rate": 2.2995198079231693e-05, "loss": 0.4987, "step": 13510 }, { "epoch": 17.29408, "grad_norm": 1.1028789281845093, "learning_rate": 2.2993197278911565e-05, "loss": 0.545, "step": 13511 }, { "epoch": 17.29536, "grad_norm": 1.0653462409973145, "learning_rate": 2.2991196478591437e-05, "loss": 0.5482, "step": 13512 }, { "epoch": 17.29664, "grad_norm": 1.0706826448440552, "learning_rate": 2.2989195678271312e-05, "loss": 0.5027, "step": 13513 }, { "epoch": 17.29792, "grad_norm": 1.0168458223342896, "learning_rate": 2.298719487795118e-05, "loss": 0.5167, "step": 13514 }, { "epoch": 17.2992, "grad_norm": 0.9857696890830994, "learning_rate": 2.2985194077631052e-05, "loss": 0.5102, "step": 13515 }, { "epoch": 17.30048, "grad_norm": 1.0181854963302612, "learning_rate": 2.2983193277310924e-05, "loss": 0.4949, "step": 13516 }, { "epoch": 17.30176, "grad_norm": 1.10274338722229, "learning_rate": 2.29811924769908e-05, "loss": 0.5449, "step": 13517 }, { "epoch": 17.30304, "grad_norm": 1.0308619737625122, "learning_rate": 2.2979191676670668e-05, "loss": 0.5152, "step": 13518 }, { "epoch": 17.30432, "grad_norm": 1.0274208784103394, "learning_rate": 2.297719087635054e-05, "loss": 0.5126, "step": 13519 }, { "epoch": 17.3056, "grad_norm": 1.0590589046478271, "learning_rate": 2.2975190076030415e-05, "loss": 0.516, "step": 13520 }, { "epoch": 17.30688, "grad_norm": 1.0562139749526978, "learning_rate": 2.2973189275710287e-05, "loss": 0.5627, "step": 13521 }, { "epoch": 17.30816, "grad_norm": 1.004634141921997, "learning_rate": 2.2971188475390155e-05, "loss": 0.49, "step": 13522 }, { "epoch": 17.30944, "grad_norm": 1.0177441835403442, "learning_rate": 2.2969187675070027e-05, "loss": 0.4957, "step": 13523 }, { "epoch": 17.31072, "grad_norm": 1.0136632919311523, "learning_rate": 2.2967186874749903e-05, "loss": 0.507, "step": 13524 }, { "epoch": 17.312, "grad_norm": 0.9950380325317383, "learning_rate": 2.2965186074429774e-05, "loss": 0.4692, "step": 13525 }, { "epoch": 17.31328, "grad_norm": 1.0641790628433228, "learning_rate": 2.2963185274109643e-05, "loss": 0.4797, "step": 13526 }, { "epoch": 17.31456, "grad_norm": 1.120981216430664, "learning_rate": 2.2961184473789518e-05, "loss": 0.5234, "step": 13527 }, { "epoch": 17.31584, "grad_norm": 1.044729471206665, "learning_rate": 2.295918367346939e-05, "loss": 0.4913, "step": 13528 }, { "epoch": 17.31712, "grad_norm": 1.029380202293396, "learning_rate": 2.2957182873149262e-05, "loss": 0.5046, "step": 13529 }, { "epoch": 17.3184, "grad_norm": 1.0592548847198486, "learning_rate": 2.295518207282913e-05, "loss": 0.4982, "step": 13530 }, { "epoch": 17.31968, "grad_norm": 1.0422892570495605, "learning_rate": 2.2953181272509006e-05, "loss": 0.5258, "step": 13531 }, { "epoch": 17.32096, "grad_norm": 1.0550442934036255, "learning_rate": 2.2951180472188877e-05, "loss": 0.5656, "step": 13532 }, { "epoch": 17.32224, "grad_norm": 1.009332299232483, "learning_rate": 2.294917967186875e-05, "loss": 0.473, "step": 13533 }, { "epoch": 17.32352, "grad_norm": 1.0183335542678833, "learning_rate": 2.294717887154862e-05, "loss": 0.5133, "step": 13534 }, { "epoch": 17.3248, "grad_norm": 1.013777732849121, "learning_rate": 2.2945178071228493e-05, "loss": 0.5414, "step": 13535 }, { "epoch": 17.32608, "grad_norm": 1.0590869188308716, "learning_rate": 2.2943177270908365e-05, "loss": 0.5051, "step": 13536 }, { "epoch": 17.32736, "grad_norm": 1.0427137613296509, "learning_rate": 2.2941176470588237e-05, "loss": 0.5346, "step": 13537 }, { "epoch": 17.32864, "grad_norm": 0.9942336678504944, "learning_rate": 2.293917567026811e-05, "loss": 0.4948, "step": 13538 }, { "epoch": 17.32992, "grad_norm": 1.0325044393539429, "learning_rate": 2.293717486994798e-05, "loss": 0.5118, "step": 13539 }, { "epoch": 17.3312, "grad_norm": 1.0152339935302734, "learning_rate": 2.2935174069627852e-05, "loss": 0.5144, "step": 13540 }, { "epoch": 17.33248, "grad_norm": 1.0435190200805664, "learning_rate": 2.2933173269307724e-05, "loss": 0.5059, "step": 13541 }, { "epoch": 17.33376, "grad_norm": 1.0658721923828125, "learning_rate": 2.2931172468987596e-05, "loss": 0.5129, "step": 13542 }, { "epoch": 17.33504, "grad_norm": 1.0623787641525269, "learning_rate": 2.2929171668667468e-05, "loss": 0.4906, "step": 13543 }, { "epoch": 17.33632, "grad_norm": 1.0778841972351074, "learning_rate": 2.292717086834734e-05, "loss": 0.5309, "step": 13544 }, { "epoch": 17.3376, "grad_norm": 1.048268437385559, "learning_rate": 2.292517006802721e-05, "loss": 0.5067, "step": 13545 }, { "epoch": 17.33888, "grad_norm": 1.0772168636322021, "learning_rate": 2.2923169267707083e-05, "loss": 0.5369, "step": 13546 }, { "epoch": 17.34016, "grad_norm": 1.0442776679992676, "learning_rate": 2.2921168467386955e-05, "loss": 0.5185, "step": 13547 }, { "epoch": 17.34144, "grad_norm": 1.0044589042663574, "learning_rate": 2.2919167667066827e-05, "loss": 0.4811, "step": 13548 }, { "epoch": 17.34272, "grad_norm": 1.067563533782959, "learning_rate": 2.29171668667467e-05, "loss": 0.5228, "step": 13549 }, { "epoch": 17.344, "grad_norm": 1.026764988899231, "learning_rate": 2.291516606642657e-05, "loss": 0.5043, "step": 13550 }, { "epoch": 17.34528, "grad_norm": 1.0351366996765137, "learning_rate": 2.2913165266106443e-05, "loss": 0.5151, "step": 13551 }, { "epoch": 17.34656, "grad_norm": 1.0662847757339478, "learning_rate": 2.2911164465786318e-05, "loss": 0.5239, "step": 13552 }, { "epoch": 17.34784, "grad_norm": 1.1130237579345703, "learning_rate": 2.2909163665466186e-05, "loss": 0.5437, "step": 13553 }, { "epoch": 17.34912, "grad_norm": 1.0121694803237915, "learning_rate": 2.2907162865146058e-05, "loss": 0.4844, "step": 13554 }, { "epoch": 17.3504, "grad_norm": 1.0110938549041748, "learning_rate": 2.290516206482593e-05, "loss": 0.4779, "step": 13555 }, { "epoch": 17.35168, "grad_norm": 1.0952173471450806, "learning_rate": 2.2903161264505805e-05, "loss": 0.5603, "step": 13556 }, { "epoch": 17.35296, "grad_norm": 1.018991231918335, "learning_rate": 2.2901160464185674e-05, "loss": 0.5099, "step": 13557 }, { "epoch": 17.35424, "grad_norm": 1.0593852996826172, "learning_rate": 2.2899159663865546e-05, "loss": 0.5467, "step": 13558 }, { "epoch": 17.35552, "grad_norm": 1.0670006275177002, "learning_rate": 2.289715886354542e-05, "loss": 0.495, "step": 13559 }, { "epoch": 17.3568, "grad_norm": 1.0311278104782104, "learning_rate": 2.2895158063225293e-05, "loss": 0.481, "step": 13560 }, { "epoch": 17.35808, "grad_norm": 1.0428582429885864, "learning_rate": 2.289315726290516e-05, "loss": 0.4989, "step": 13561 }, { "epoch": 17.35936, "grad_norm": 1.0917315483093262, "learning_rate": 2.2891156462585033e-05, "loss": 0.5338, "step": 13562 }, { "epoch": 17.36064, "grad_norm": 0.999539852142334, "learning_rate": 2.288915566226491e-05, "loss": 0.5455, "step": 13563 }, { "epoch": 17.36192, "grad_norm": 1.0183244943618774, "learning_rate": 2.288715486194478e-05, "loss": 0.515, "step": 13564 }, { "epoch": 17.3632, "grad_norm": 1.0455024242401123, "learning_rate": 2.288515406162465e-05, "loss": 0.5141, "step": 13565 }, { "epoch": 17.36448, "grad_norm": 1.0188703536987305, "learning_rate": 2.2883153261304524e-05, "loss": 0.4722, "step": 13566 }, { "epoch": 17.36576, "grad_norm": 1.0280158519744873, "learning_rate": 2.2881152460984396e-05, "loss": 0.5013, "step": 13567 }, { "epoch": 17.36704, "grad_norm": 1.048551082611084, "learning_rate": 2.2879151660664268e-05, "loss": 0.5232, "step": 13568 }, { "epoch": 17.36832, "grad_norm": 0.9960765838623047, "learning_rate": 2.2877150860344136e-05, "loss": 0.5111, "step": 13569 }, { "epoch": 17.3696, "grad_norm": 0.9852240085601807, "learning_rate": 2.287515006002401e-05, "loss": 0.4906, "step": 13570 }, { "epoch": 17.37088, "grad_norm": 1.0797041654586792, "learning_rate": 2.2873149259703883e-05, "loss": 0.5014, "step": 13571 }, { "epoch": 17.37216, "grad_norm": 1.0308854579925537, "learning_rate": 2.2871148459383755e-05, "loss": 0.4957, "step": 13572 }, { "epoch": 17.37344, "grad_norm": 1.0648754835128784, "learning_rate": 2.2869147659063627e-05, "loss": 0.4862, "step": 13573 }, { "epoch": 17.37472, "grad_norm": 1.0644230842590332, "learning_rate": 2.28671468587435e-05, "loss": 0.5365, "step": 13574 }, { "epoch": 17.376, "grad_norm": 1.071189522743225, "learning_rate": 2.286514605842337e-05, "loss": 0.5117, "step": 13575 }, { "epoch": 17.37728, "grad_norm": 1.0346791744232178, "learning_rate": 2.2863145258103242e-05, "loss": 0.532, "step": 13576 }, { "epoch": 17.37856, "grad_norm": 1.0855563879013062, "learning_rate": 2.2861144457783114e-05, "loss": 0.5115, "step": 13577 }, { "epoch": 17.37984, "grad_norm": 0.9857675433158875, "learning_rate": 2.2859143657462986e-05, "loss": 0.473, "step": 13578 }, { "epoch": 17.38112, "grad_norm": 0.993156373500824, "learning_rate": 2.2857142857142858e-05, "loss": 0.4923, "step": 13579 }, { "epoch": 17.3824, "grad_norm": 1.099673867225647, "learning_rate": 2.285514205682273e-05, "loss": 0.5255, "step": 13580 }, { "epoch": 17.38368, "grad_norm": 1.0772475004196167, "learning_rate": 2.2853141256502602e-05, "loss": 0.5019, "step": 13581 }, { "epoch": 17.38496, "grad_norm": 1.078121304512024, "learning_rate": 2.2851140456182474e-05, "loss": 0.5288, "step": 13582 }, { "epoch": 17.38624, "grad_norm": 0.9957267642021179, "learning_rate": 2.2849139655862345e-05, "loss": 0.4716, "step": 13583 }, { "epoch": 17.38752, "grad_norm": 1.0318092107772827, "learning_rate": 2.2847138855542217e-05, "loss": 0.4994, "step": 13584 }, { "epoch": 17.3888, "grad_norm": 1.078931212425232, "learning_rate": 2.284513805522209e-05, "loss": 0.538, "step": 13585 }, { "epoch": 17.39008, "grad_norm": 1.092041254043579, "learning_rate": 2.284313725490196e-05, "loss": 0.5409, "step": 13586 }, { "epoch": 17.39136, "grad_norm": 1.0563862323760986, "learning_rate": 2.2841136454581836e-05, "loss": 0.4859, "step": 13587 }, { "epoch": 17.39264, "grad_norm": 1.045304298400879, "learning_rate": 2.2839135654261705e-05, "loss": 0.4956, "step": 13588 }, { "epoch": 17.39392, "grad_norm": 1.0128166675567627, "learning_rate": 2.2837134853941577e-05, "loss": 0.476, "step": 13589 }, { "epoch": 17.3952, "grad_norm": 0.9987632036209106, "learning_rate": 2.283513405362145e-05, "loss": 0.4838, "step": 13590 }, { "epoch": 17.39648, "grad_norm": 1.012587547302246, "learning_rate": 2.2833133253301324e-05, "loss": 0.4904, "step": 13591 }, { "epoch": 17.39776, "grad_norm": 1.0492074489593506, "learning_rate": 2.2831132452981192e-05, "loss": 0.5372, "step": 13592 }, { "epoch": 17.39904, "grad_norm": 1.0137112140655518, "learning_rate": 2.2829131652661064e-05, "loss": 0.4681, "step": 13593 }, { "epoch": 17.40032, "grad_norm": 1.0790600776672363, "learning_rate": 2.282713085234094e-05, "loss": 0.5596, "step": 13594 }, { "epoch": 17.4016, "grad_norm": 0.9867367744445801, "learning_rate": 2.282513005202081e-05, "loss": 0.4537, "step": 13595 }, { "epoch": 17.40288, "grad_norm": 1.0197902917861938, "learning_rate": 2.282312925170068e-05, "loss": 0.5013, "step": 13596 }, { "epoch": 17.40416, "grad_norm": 1.0174394845962524, "learning_rate": 2.282112845138055e-05, "loss": 0.512, "step": 13597 }, { "epoch": 17.40544, "grad_norm": 1.000441312789917, "learning_rate": 2.2819127651060427e-05, "loss": 0.4996, "step": 13598 }, { "epoch": 17.40672, "grad_norm": 1.0385953187942505, "learning_rate": 2.28171268507403e-05, "loss": 0.5161, "step": 13599 }, { "epoch": 17.408, "grad_norm": 1.1014004945755005, "learning_rate": 2.2815126050420167e-05, "loss": 0.4972, "step": 13600 }, { "epoch": 17.40928, "grad_norm": 1.0908480882644653, "learning_rate": 2.281312525010004e-05, "loss": 0.5294, "step": 13601 }, { "epoch": 17.41056, "grad_norm": 1.0491387844085693, "learning_rate": 2.2811124449779914e-05, "loss": 0.5295, "step": 13602 }, { "epoch": 17.41184, "grad_norm": 1.1002565622329712, "learning_rate": 2.2809123649459786e-05, "loss": 0.5095, "step": 13603 }, { "epoch": 17.41312, "grad_norm": 1.077600359916687, "learning_rate": 2.2807122849139654e-05, "loss": 0.485, "step": 13604 }, { "epoch": 17.4144, "grad_norm": 1.1393417119979858, "learning_rate": 2.280512204881953e-05, "loss": 0.5553, "step": 13605 }, { "epoch": 17.41568, "grad_norm": 1.087091088294983, "learning_rate": 2.28031212484994e-05, "loss": 0.5519, "step": 13606 }, { "epoch": 17.41696, "grad_norm": 1.0322144031524658, "learning_rate": 2.2801120448179273e-05, "loss": 0.5036, "step": 13607 }, { "epoch": 17.41824, "grad_norm": 1.0630682706832886, "learning_rate": 2.2799119647859142e-05, "loss": 0.5113, "step": 13608 }, { "epoch": 17.41952, "grad_norm": 1.0402500629425049, "learning_rate": 2.2797118847539017e-05, "loss": 0.4692, "step": 13609 }, { "epoch": 17.4208, "grad_norm": 1.02988600730896, "learning_rate": 2.279511804721889e-05, "loss": 0.4878, "step": 13610 }, { "epoch": 17.42208, "grad_norm": 1.0270915031433105, "learning_rate": 2.279311724689876e-05, "loss": 0.4991, "step": 13611 }, { "epoch": 17.42336, "grad_norm": 0.9723539352416992, "learning_rate": 2.2791116446578633e-05, "loss": 0.4838, "step": 13612 }, { "epoch": 17.42464, "grad_norm": 1.06536066532135, "learning_rate": 2.2789115646258505e-05, "loss": 0.5087, "step": 13613 }, { "epoch": 17.42592, "grad_norm": 1.0233323574066162, "learning_rate": 2.2787114845938376e-05, "loss": 0.5379, "step": 13614 }, { "epoch": 17.4272, "grad_norm": 1.0208466053009033, "learning_rate": 2.2785114045618248e-05, "loss": 0.4942, "step": 13615 }, { "epoch": 17.42848, "grad_norm": 1.020458698272705, "learning_rate": 2.278311324529812e-05, "loss": 0.5093, "step": 13616 }, { "epoch": 17.42976, "grad_norm": 1.0593698024749756, "learning_rate": 2.2781112444977992e-05, "loss": 0.5102, "step": 13617 }, { "epoch": 17.43104, "grad_norm": 1.0400246381759644, "learning_rate": 2.2779111644657864e-05, "loss": 0.5102, "step": 13618 }, { "epoch": 17.43232, "grad_norm": 1.0877729654312134, "learning_rate": 2.2777110844337736e-05, "loss": 0.5376, "step": 13619 }, { "epoch": 17.4336, "grad_norm": 1.0149822235107422, "learning_rate": 2.2775110044017608e-05, "loss": 0.5096, "step": 13620 }, { "epoch": 17.43488, "grad_norm": 1.0287227630615234, "learning_rate": 2.277310924369748e-05, "loss": 0.5049, "step": 13621 }, { "epoch": 17.43616, "grad_norm": 0.98603755235672, "learning_rate": 2.277110844337735e-05, "loss": 0.4833, "step": 13622 }, { "epoch": 17.43744, "grad_norm": 1.0274131298065186, "learning_rate": 2.2769107643057223e-05, "loss": 0.4929, "step": 13623 }, { "epoch": 17.43872, "grad_norm": 1.006752610206604, "learning_rate": 2.2767106842737095e-05, "loss": 0.4812, "step": 13624 }, { "epoch": 17.44, "grad_norm": 1.0845232009887695, "learning_rate": 2.2765106042416967e-05, "loss": 0.5392, "step": 13625 }, { "epoch": 17.44128, "grad_norm": 1.0171184539794922, "learning_rate": 2.2763105242096842e-05, "loss": 0.4821, "step": 13626 }, { "epoch": 17.44256, "grad_norm": 1.019047498703003, "learning_rate": 2.276110444177671e-05, "loss": 0.5066, "step": 13627 }, { "epoch": 17.44384, "grad_norm": 0.9561603665351868, "learning_rate": 2.2759103641456582e-05, "loss": 0.4448, "step": 13628 }, { "epoch": 17.44512, "grad_norm": 1.011527180671692, "learning_rate": 2.2757102841136454e-05, "loss": 0.4852, "step": 13629 }, { "epoch": 17.4464, "grad_norm": 1.019681453704834, "learning_rate": 2.275510204081633e-05, "loss": 0.4853, "step": 13630 }, { "epoch": 17.44768, "grad_norm": 1.005802035331726, "learning_rate": 2.2753101240496198e-05, "loss": 0.4922, "step": 13631 }, { "epoch": 17.44896, "grad_norm": 1.0661100149154663, "learning_rate": 2.275110044017607e-05, "loss": 0.5282, "step": 13632 }, { "epoch": 17.45024, "grad_norm": 1.1106263399124146, "learning_rate": 2.2749099639855945e-05, "loss": 0.5464, "step": 13633 }, { "epoch": 17.45152, "grad_norm": 1.0604326725006104, "learning_rate": 2.2747098839535817e-05, "loss": 0.4823, "step": 13634 }, { "epoch": 17.4528, "grad_norm": 1.089737057685852, "learning_rate": 2.2745098039215685e-05, "loss": 0.5092, "step": 13635 }, { "epoch": 17.45408, "grad_norm": 1.071022391319275, "learning_rate": 2.2743097238895557e-05, "loss": 0.5489, "step": 13636 }, { "epoch": 17.45536, "grad_norm": 1.0083519220352173, "learning_rate": 2.2741096438575433e-05, "loss": 0.479, "step": 13637 }, { "epoch": 17.45664, "grad_norm": 1.0257757902145386, "learning_rate": 2.2739095638255304e-05, "loss": 0.5014, "step": 13638 }, { "epoch": 17.45792, "grad_norm": 1.0760114192962646, "learning_rate": 2.2737094837935173e-05, "loss": 0.57, "step": 13639 }, { "epoch": 17.4592, "grad_norm": 1.0728118419647217, "learning_rate": 2.2735094037615048e-05, "loss": 0.5392, "step": 13640 }, { "epoch": 17.46048, "grad_norm": 1.043785572052002, "learning_rate": 2.273309323729492e-05, "loss": 0.5259, "step": 13641 }, { "epoch": 17.46176, "grad_norm": 1.0340181589126587, "learning_rate": 2.2731092436974792e-05, "loss": 0.5326, "step": 13642 }, { "epoch": 17.46304, "grad_norm": 1.094905138015747, "learning_rate": 2.272909163665466e-05, "loss": 0.5099, "step": 13643 }, { "epoch": 17.46432, "grad_norm": 1.0578844547271729, "learning_rate": 2.2727090836334536e-05, "loss": 0.5155, "step": 13644 }, { "epoch": 17.4656, "grad_norm": 1.028712272644043, "learning_rate": 2.2725090036014407e-05, "loss": 0.487, "step": 13645 }, { "epoch": 17.46688, "grad_norm": 1.080743670463562, "learning_rate": 2.272308923569428e-05, "loss": 0.5096, "step": 13646 }, { "epoch": 17.46816, "grad_norm": 1.0488895177841187, "learning_rate": 2.272108843537415e-05, "loss": 0.477, "step": 13647 }, { "epoch": 17.46944, "grad_norm": 1.0804858207702637, "learning_rate": 2.2719087635054023e-05, "loss": 0.5193, "step": 13648 }, { "epoch": 17.47072, "grad_norm": 1.074198603630066, "learning_rate": 2.2717086834733895e-05, "loss": 0.518, "step": 13649 }, { "epoch": 17.472, "grad_norm": 1.0862224102020264, "learning_rate": 2.2715086034413767e-05, "loss": 0.5612, "step": 13650 }, { "epoch": 17.47328, "grad_norm": 1.0850809812545776, "learning_rate": 2.271308523409364e-05, "loss": 0.5247, "step": 13651 }, { "epoch": 17.47456, "grad_norm": 1.0903500318527222, "learning_rate": 2.271108443377351e-05, "loss": 0.5088, "step": 13652 }, { "epoch": 17.47584, "grad_norm": 1.0023061037063599, "learning_rate": 2.2709083633453382e-05, "loss": 0.5279, "step": 13653 }, { "epoch": 17.47712, "grad_norm": 0.980223536491394, "learning_rate": 2.2707082833133254e-05, "loss": 0.5039, "step": 13654 }, { "epoch": 17.4784, "grad_norm": 1.0036488771438599, "learning_rate": 2.2705082032813126e-05, "loss": 0.5017, "step": 13655 }, { "epoch": 17.47968, "grad_norm": 1.0191391706466675, "learning_rate": 2.2703081232492998e-05, "loss": 0.4988, "step": 13656 }, { "epoch": 17.48096, "grad_norm": 0.9996136426925659, "learning_rate": 2.270108043217287e-05, "loss": 0.5057, "step": 13657 }, { "epoch": 17.48224, "grad_norm": 1.033124327659607, "learning_rate": 2.2699079631852745e-05, "loss": 0.5378, "step": 13658 }, { "epoch": 17.48352, "grad_norm": 1.1039026975631714, "learning_rate": 2.2697078831532613e-05, "loss": 0.5528, "step": 13659 }, { "epoch": 17.4848, "grad_norm": 1.0512938499450684, "learning_rate": 2.2695078031212485e-05, "loss": 0.4989, "step": 13660 }, { "epoch": 17.48608, "grad_norm": 1.054443120956421, "learning_rate": 2.2693077230892357e-05, "loss": 0.5054, "step": 13661 }, { "epoch": 17.48736, "grad_norm": 1.0660535097122192, "learning_rate": 2.2691076430572232e-05, "loss": 0.504, "step": 13662 }, { "epoch": 17.48864, "grad_norm": 0.9800325036048889, "learning_rate": 2.26890756302521e-05, "loss": 0.4377, "step": 13663 }, { "epoch": 17.48992, "grad_norm": 1.013704538345337, "learning_rate": 2.2687074829931973e-05, "loss": 0.5319, "step": 13664 }, { "epoch": 17.4912, "grad_norm": 1.0099461078643799, "learning_rate": 2.2685074029611848e-05, "loss": 0.4664, "step": 13665 }, { "epoch": 17.49248, "grad_norm": 1.0758755207061768, "learning_rate": 2.268307322929172e-05, "loss": 0.5314, "step": 13666 }, { "epoch": 17.49376, "grad_norm": 1.0263993740081787, "learning_rate": 2.2681072428971588e-05, "loss": 0.4989, "step": 13667 }, { "epoch": 17.49504, "grad_norm": 1.1413928270339966, "learning_rate": 2.267907162865146e-05, "loss": 0.5251, "step": 13668 }, { "epoch": 17.49632, "grad_norm": 0.9962292909622192, "learning_rate": 2.2677070828331335e-05, "loss": 0.485, "step": 13669 }, { "epoch": 17.4976, "grad_norm": 0.9963700771331787, "learning_rate": 2.2675070028011207e-05, "loss": 0.5073, "step": 13670 }, { "epoch": 17.49888, "grad_norm": 1.0219050645828247, "learning_rate": 2.2673069227691076e-05, "loss": 0.5144, "step": 13671 }, { "epoch": 17.50016, "grad_norm": 0.9802215695381165, "learning_rate": 2.267106842737095e-05, "loss": 0.5066, "step": 13672 }, { "epoch": 17.50144, "grad_norm": 0.9986078143119812, "learning_rate": 2.2669067627050823e-05, "loss": 0.4959, "step": 13673 }, { "epoch": 17.50272, "grad_norm": 1.017099142074585, "learning_rate": 2.2667066826730695e-05, "loss": 0.49, "step": 13674 }, { "epoch": 17.504, "grad_norm": 1.0355932712554932, "learning_rate": 2.2665066026410563e-05, "loss": 0.5282, "step": 13675 }, { "epoch": 17.50528, "grad_norm": 0.9949687123298645, "learning_rate": 2.266306522609044e-05, "loss": 0.4931, "step": 13676 }, { "epoch": 17.50656, "grad_norm": 0.975506603717804, "learning_rate": 2.266106442577031e-05, "loss": 0.525, "step": 13677 }, { "epoch": 17.50784, "grad_norm": 1.0513207912445068, "learning_rate": 2.2659063625450182e-05, "loss": 0.5377, "step": 13678 }, { "epoch": 17.50912, "grad_norm": 1.0164129734039307, "learning_rate": 2.2657062825130054e-05, "loss": 0.4787, "step": 13679 }, { "epoch": 17.5104, "grad_norm": 1.022405982017517, "learning_rate": 2.2655062024809926e-05, "loss": 0.4741, "step": 13680 }, { "epoch": 17.51168, "grad_norm": 1.0068978071212769, "learning_rate": 2.2653061224489798e-05, "loss": 0.4911, "step": 13681 }, { "epoch": 17.51296, "grad_norm": 0.9547240138053894, "learning_rate": 2.265106042416967e-05, "loss": 0.4298, "step": 13682 }, { "epoch": 17.51424, "grad_norm": 0.969224750995636, "learning_rate": 2.264905962384954e-05, "loss": 0.4678, "step": 13683 }, { "epoch": 17.51552, "grad_norm": 0.9585568904876709, "learning_rate": 2.2647058823529413e-05, "loss": 0.4658, "step": 13684 }, { "epoch": 17.5168, "grad_norm": 1.0375971794128418, "learning_rate": 2.2645058023209285e-05, "loss": 0.5459, "step": 13685 }, { "epoch": 17.51808, "grad_norm": 1.1199681758880615, "learning_rate": 2.2643057222889157e-05, "loss": 0.5432, "step": 13686 }, { "epoch": 17.51936, "grad_norm": 1.0504252910614014, "learning_rate": 2.264105642256903e-05, "loss": 0.531, "step": 13687 }, { "epoch": 17.52064, "grad_norm": 1.0494788885116577, "learning_rate": 2.26390556222489e-05, "loss": 0.5018, "step": 13688 }, { "epoch": 17.52192, "grad_norm": 1.0414574146270752, "learning_rate": 2.2637054821928772e-05, "loss": 0.5317, "step": 13689 }, { "epoch": 17.5232, "grad_norm": 1.058502197265625, "learning_rate": 2.2635054021608644e-05, "loss": 0.5139, "step": 13690 }, { "epoch": 17.52448, "grad_norm": 1.1048195362091064, "learning_rate": 2.2633053221288516e-05, "loss": 0.5667, "step": 13691 }, { "epoch": 17.52576, "grad_norm": 1.0545276403427124, "learning_rate": 2.2631052420968388e-05, "loss": 0.4954, "step": 13692 }, { "epoch": 17.52704, "grad_norm": 1.0869113206863403, "learning_rate": 2.2629051620648263e-05, "loss": 0.564, "step": 13693 }, { "epoch": 17.52832, "grad_norm": 1.0492844581604004, "learning_rate": 2.2627050820328132e-05, "loss": 0.54, "step": 13694 }, { "epoch": 17.5296, "grad_norm": 1.0702095031738281, "learning_rate": 2.2625050020008004e-05, "loss": 0.5171, "step": 13695 }, { "epoch": 17.53088, "grad_norm": 1.0479813814163208, "learning_rate": 2.2623049219687875e-05, "loss": 0.5329, "step": 13696 }, { "epoch": 17.53216, "grad_norm": 1.0335677862167358, "learning_rate": 2.262104841936775e-05, "loss": 0.5585, "step": 13697 }, { "epoch": 17.53344, "grad_norm": 1.0604044198989868, "learning_rate": 2.261904761904762e-05, "loss": 0.5967, "step": 13698 }, { "epoch": 17.53472, "grad_norm": 0.9840775728225708, "learning_rate": 2.261704681872749e-05, "loss": 0.496, "step": 13699 }, { "epoch": 17.536, "grad_norm": 0.9827370643615723, "learning_rate": 2.2615046018407366e-05, "loss": 0.4696, "step": 13700 }, { "epoch": 17.53728, "grad_norm": 1.0692596435546875, "learning_rate": 2.2613045218087238e-05, "loss": 0.5455, "step": 13701 }, { "epoch": 17.53856, "grad_norm": 1.096990704536438, "learning_rate": 2.2611044417767107e-05, "loss": 0.5268, "step": 13702 }, { "epoch": 17.53984, "grad_norm": 1.0311455726623535, "learning_rate": 2.260904361744698e-05, "loss": 0.5189, "step": 13703 }, { "epoch": 17.54112, "grad_norm": 0.9897575378417969, "learning_rate": 2.2607042817126854e-05, "loss": 0.4777, "step": 13704 }, { "epoch": 17.5424, "grad_norm": 1.0446470975875854, "learning_rate": 2.2605042016806726e-05, "loss": 0.5085, "step": 13705 }, { "epoch": 17.54368, "grad_norm": 1.0381147861480713, "learning_rate": 2.2603041216486594e-05, "loss": 0.5308, "step": 13706 }, { "epoch": 17.54496, "grad_norm": 1.0593794584274292, "learning_rate": 2.260104041616647e-05, "loss": 0.4676, "step": 13707 }, { "epoch": 17.54624, "grad_norm": 1.0510727167129517, "learning_rate": 2.259903961584634e-05, "loss": 0.5292, "step": 13708 }, { "epoch": 17.54752, "grad_norm": 1.047689437866211, "learning_rate": 2.2597038815526213e-05, "loss": 0.5205, "step": 13709 }, { "epoch": 17.5488, "grad_norm": 1.0579311847686768, "learning_rate": 2.259503801520608e-05, "loss": 0.5061, "step": 13710 }, { "epoch": 17.55008, "grad_norm": 1.0636060237884521, "learning_rate": 2.2593037214885957e-05, "loss": 0.5463, "step": 13711 }, { "epoch": 17.55136, "grad_norm": 0.990552544593811, "learning_rate": 2.259103641456583e-05, "loss": 0.4527, "step": 13712 }, { "epoch": 17.55264, "grad_norm": 1.0721439123153687, "learning_rate": 2.25890356142457e-05, "loss": 0.5183, "step": 13713 }, { "epoch": 17.55392, "grad_norm": 1.0679312944412231, "learning_rate": 2.258703481392557e-05, "loss": 0.557, "step": 13714 }, { "epoch": 17.5552, "grad_norm": 0.9733208417892456, "learning_rate": 2.2585034013605444e-05, "loss": 0.507, "step": 13715 }, { "epoch": 17.55648, "grad_norm": 1.1034499406814575, "learning_rate": 2.2583033213285316e-05, "loss": 0.5612, "step": 13716 }, { "epoch": 17.557760000000002, "grad_norm": 1.0827926397323608, "learning_rate": 2.2581032412965188e-05, "loss": 0.5139, "step": 13717 }, { "epoch": 17.55904, "grad_norm": 0.9990882873535156, "learning_rate": 2.257903161264506e-05, "loss": 0.4797, "step": 13718 }, { "epoch": 17.56032, "grad_norm": 0.9691088199615479, "learning_rate": 2.257703081232493e-05, "loss": 0.4385, "step": 13719 }, { "epoch": 17.5616, "grad_norm": 1.0070648193359375, "learning_rate": 2.2575030012004803e-05, "loss": 0.4986, "step": 13720 }, { "epoch": 17.56288, "grad_norm": 1.0735926628112793, "learning_rate": 2.2573029211684675e-05, "loss": 0.5415, "step": 13721 }, { "epoch": 17.56416, "grad_norm": 1.0106046199798584, "learning_rate": 2.2571028411364547e-05, "loss": 0.4826, "step": 13722 }, { "epoch": 17.56544, "grad_norm": 1.0311856269836426, "learning_rate": 2.256902761104442e-05, "loss": 0.5534, "step": 13723 }, { "epoch": 17.56672, "grad_norm": 1.014507532119751, "learning_rate": 2.256702681072429e-05, "loss": 0.4764, "step": 13724 }, { "epoch": 17.568, "grad_norm": 1.0640530586242676, "learning_rate": 2.2565026010404163e-05, "loss": 0.5272, "step": 13725 }, { "epoch": 17.56928, "grad_norm": 1.1166555881500244, "learning_rate": 2.2563025210084035e-05, "loss": 0.572, "step": 13726 }, { "epoch": 17.57056, "grad_norm": 1.061682105064392, "learning_rate": 2.2561024409763906e-05, "loss": 0.5373, "step": 13727 }, { "epoch": 17.57184, "grad_norm": 1.0302103757858276, "learning_rate": 2.2559023609443778e-05, "loss": 0.4871, "step": 13728 }, { "epoch": 17.57312, "grad_norm": 1.040071964263916, "learning_rate": 2.255702280912365e-05, "loss": 0.5096, "step": 13729 }, { "epoch": 17.5744, "grad_norm": 1.0365045070648193, "learning_rate": 2.2555022008803522e-05, "loss": 0.4992, "step": 13730 }, { "epoch": 17.57568, "grad_norm": 1.0997463464736938, "learning_rate": 2.2553021208483394e-05, "loss": 0.532, "step": 13731 }, { "epoch": 17.57696, "grad_norm": 1.0069595575332642, "learning_rate": 2.255102040816327e-05, "loss": 0.4886, "step": 13732 }, { "epoch": 17.57824, "grad_norm": 1.1098484992980957, "learning_rate": 2.2549019607843138e-05, "loss": 0.526, "step": 13733 }, { "epoch": 17.57952, "grad_norm": 1.034224033355713, "learning_rate": 2.254701880752301e-05, "loss": 0.5107, "step": 13734 }, { "epoch": 17.5808, "grad_norm": 0.9972882866859436, "learning_rate": 2.254501800720288e-05, "loss": 0.4979, "step": 13735 }, { "epoch": 17.58208, "grad_norm": 1.033673644065857, "learning_rate": 2.2543017206882756e-05, "loss": 0.491, "step": 13736 }, { "epoch": 17.58336, "grad_norm": 1.0336534976959229, "learning_rate": 2.2541016406562625e-05, "loss": 0.5155, "step": 13737 }, { "epoch": 17.58464, "grad_norm": 1.0323609113693237, "learning_rate": 2.2539015606242497e-05, "loss": 0.5079, "step": 13738 }, { "epoch": 17.58592, "grad_norm": 1.0115076303482056, "learning_rate": 2.2537014805922372e-05, "loss": 0.5356, "step": 13739 }, { "epoch": 17.5872, "grad_norm": 0.9737477898597717, "learning_rate": 2.2535014005602244e-05, "loss": 0.4799, "step": 13740 }, { "epoch": 17.58848, "grad_norm": 1.0207149982452393, "learning_rate": 2.2533013205282112e-05, "loss": 0.4796, "step": 13741 }, { "epoch": 17.58976, "grad_norm": 1.0256177186965942, "learning_rate": 2.2531012404961984e-05, "loss": 0.5228, "step": 13742 }, { "epoch": 17.59104, "grad_norm": 1.0296961069107056, "learning_rate": 2.252901160464186e-05, "loss": 0.5046, "step": 13743 }, { "epoch": 17.59232, "grad_norm": 1.0195798873901367, "learning_rate": 2.252701080432173e-05, "loss": 0.4933, "step": 13744 }, { "epoch": 17.5936, "grad_norm": 1.1169263124465942, "learning_rate": 2.25250100040016e-05, "loss": 0.5553, "step": 13745 }, { "epoch": 17.59488, "grad_norm": 1.0939453840255737, "learning_rate": 2.2523009203681475e-05, "loss": 0.5122, "step": 13746 }, { "epoch": 17.59616, "grad_norm": 1.0578433275222778, "learning_rate": 2.2521008403361347e-05, "loss": 0.5133, "step": 13747 }, { "epoch": 17.59744, "grad_norm": 1.0464441776275635, "learning_rate": 2.251900760304122e-05, "loss": 0.5562, "step": 13748 }, { "epoch": 17.59872, "grad_norm": 1.0032329559326172, "learning_rate": 2.2517006802721087e-05, "loss": 0.5132, "step": 13749 }, { "epoch": 17.6, "grad_norm": 1.0605465173721313, "learning_rate": 2.2515006002400962e-05, "loss": 0.5201, "step": 13750 }, { "epoch": 17.60128, "grad_norm": 1.0465607643127441, "learning_rate": 2.2513005202080834e-05, "loss": 0.5203, "step": 13751 }, { "epoch": 17.60256, "grad_norm": 1.0221221446990967, "learning_rate": 2.2511004401760706e-05, "loss": 0.4927, "step": 13752 }, { "epoch": 17.60384, "grad_norm": 0.970077633857727, "learning_rate": 2.2509003601440578e-05, "loss": 0.4861, "step": 13753 }, { "epoch": 17.60512, "grad_norm": 1.0048155784606934, "learning_rate": 2.250700280112045e-05, "loss": 0.479, "step": 13754 }, { "epoch": 17.6064, "grad_norm": 1.0469225645065308, "learning_rate": 2.2505002000800322e-05, "loss": 0.5249, "step": 13755 }, { "epoch": 17.60768, "grad_norm": 1.0476946830749512, "learning_rate": 2.2503001200480194e-05, "loss": 0.4966, "step": 13756 }, { "epoch": 17.60896, "grad_norm": 1.0547471046447754, "learning_rate": 2.2501000400160065e-05, "loss": 0.496, "step": 13757 }, { "epoch": 17.61024, "grad_norm": 1.0567193031311035, "learning_rate": 2.2498999599839937e-05, "loss": 0.5211, "step": 13758 }, { "epoch": 17.61152, "grad_norm": 1.0392005443572998, "learning_rate": 2.249699879951981e-05, "loss": 0.5055, "step": 13759 }, { "epoch": 17.6128, "grad_norm": 1.090825080871582, "learning_rate": 2.249499799919968e-05, "loss": 0.5679, "step": 13760 }, { "epoch": 17.61408, "grad_norm": 1.0225039720535278, "learning_rate": 2.2492997198879553e-05, "loss": 0.4868, "step": 13761 }, { "epoch": 17.61536, "grad_norm": 1.011189341545105, "learning_rate": 2.2490996398559425e-05, "loss": 0.5217, "step": 13762 }, { "epoch": 17.61664, "grad_norm": 1.0232630968093872, "learning_rate": 2.2488995598239297e-05, "loss": 0.4865, "step": 13763 }, { "epoch": 17.61792, "grad_norm": 1.0764962434768677, "learning_rate": 2.248699479791917e-05, "loss": 0.5524, "step": 13764 }, { "epoch": 17.6192, "grad_norm": 1.0205249786376953, "learning_rate": 2.248499399759904e-05, "loss": 0.4819, "step": 13765 }, { "epoch": 17.62048, "grad_norm": 1.089855670928955, "learning_rate": 2.2482993197278912e-05, "loss": 0.5628, "step": 13766 }, { "epoch": 17.62176, "grad_norm": 1.1043068170547485, "learning_rate": 2.2480992396958784e-05, "loss": 0.5314, "step": 13767 }, { "epoch": 17.62304, "grad_norm": 1.0724689960479736, "learning_rate": 2.2478991596638656e-05, "loss": 0.5377, "step": 13768 }, { "epoch": 17.62432, "grad_norm": 1.0592753887176514, "learning_rate": 2.2476990796318528e-05, "loss": 0.5109, "step": 13769 }, { "epoch": 17.6256, "grad_norm": 1.0443251132965088, "learning_rate": 2.24749899959984e-05, "loss": 0.54, "step": 13770 }, { "epoch": 17.62688, "grad_norm": 1.0627379417419434, "learning_rate": 2.2472989195678275e-05, "loss": 0.5206, "step": 13771 }, { "epoch": 17.62816, "grad_norm": 1.033038854598999, "learning_rate": 2.2470988395358143e-05, "loss": 0.4665, "step": 13772 }, { "epoch": 17.62944, "grad_norm": 1.1007275581359863, "learning_rate": 2.2468987595038015e-05, "loss": 0.5408, "step": 13773 }, { "epoch": 17.63072, "grad_norm": 1.051284670829773, "learning_rate": 2.2466986794717887e-05, "loss": 0.509, "step": 13774 }, { "epoch": 17.632, "grad_norm": 1.059944748878479, "learning_rate": 2.2464985994397762e-05, "loss": 0.5193, "step": 13775 }, { "epoch": 17.63328, "grad_norm": 1.0539425611495972, "learning_rate": 2.246298519407763e-05, "loss": 0.5161, "step": 13776 }, { "epoch": 17.63456, "grad_norm": 0.9573315382003784, "learning_rate": 2.2460984393757503e-05, "loss": 0.4629, "step": 13777 }, { "epoch": 17.63584, "grad_norm": 1.0628639459609985, "learning_rate": 2.2458983593437378e-05, "loss": 0.5524, "step": 13778 }, { "epoch": 17.63712, "grad_norm": 0.9728816747665405, "learning_rate": 2.245698279311725e-05, "loss": 0.4881, "step": 13779 }, { "epoch": 17.6384, "grad_norm": 0.996763288974762, "learning_rate": 2.2454981992797118e-05, "loss": 0.4589, "step": 13780 }, { "epoch": 17.63968, "grad_norm": 1.0928605794906616, "learning_rate": 2.245298119247699e-05, "loss": 0.5651, "step": 13781 }, { "epoch": 17.64096, "grad_norm": 1.067138433456421, "learning_rate": 2.2450980392156865e-05, "loss": 0.5257, "step": 13782 }, { "epoch": 17.64224, "grad_norm": 1.0764461755752563, "learning_rate": 2.2448979591836737e-05, "loss": 0.5539, "step": 13783 }, { "epoch": 17.64352, "grad_norm": 1.0611051321029663, "learning_rate": 2.2446978791516606e-05, "loss": 0.5756, "step": 13784 }, { "epoch": 17.6448, "grad_norm": 1.0139261484146118, "learning_rate": 2.244497799119648e-05, "loss": 0.4869, "step": 13785 }, { "epoch": 17.64608, "grad_norm": 1.0263339281082153, "learning_rate": 2.2442977190876353e-05, "loss": 0.5112, "step": 13786 }, { "epoch": 17.64736, "grad_norm": 1.041606068611145, "learning_rate": 2.2440976390556225e-05, "loss": 0.5362, "step": 13787 }, { "epoch": 17.64864, "grad_norm": 1.01603102684021, "learning_rate": 2.2438975590236093e-05, "loss": 0.4863, "step": 13788 }, { "epoch": 17.64992, "grad_norm": 1.0228776931762695, "learning_rate": 2.2436974789915968e-05, "loss": 0.5174, "step": 13789 }, { "epoch": 17.6512, "grad_norm": 1.0253472328186035, "learning_rate": 2.243497398959584e-05, "loss": 0.502, "step": 13790 }, { "epoch": 17.65248, "grad_norm": 1.0483967065811157, "learning_rate": 2.2432973189275712e-05, "loss": 0.5241, "step": 13791 }, { "epoch": 17.65376, "grad_norm": 0.9999913573265076, "learning_rate": 2.2430972388955584e-05, "loss": 0.5112, "step": 13792 }, { "epoch": 17.65504, "grad_norm": 0.9996472001075745, "learning_rate": 2.2428971588635456e-05, "loss": 0.4946, "step": 13793 }, { "epoch": 17.65632, "grad_norm": 1.0371149778366089, "learning_rate": 2.2426970788315328e-05, "loss": 0.5217, "step": 13794 }, { "epoch": 17.6576, "grad_norm": 1.040626883506775, "learning_rate": 2.24249699879952e-05, "loss": 0.5226, "step": 13795 }, { "epoch": 17.65888, "grad_norm": 1.0527793169021606, "learning_rate": 2.242296918767507e-05, "loss": 0.5302, "step": 13796 }, { "epoch": 17.66016, "grad_norm": 1.0551269054412842, "learning_rate": 2.2420968387354943e-05, "loss": 0.524, "step": 13797 }, { "epoch": 17.66144, "grad_norm": 1.0026994943618774, "learning_rate": 2.2418967587034815e-05, "loss": 0.4582, "step": 13798 }, { "epoch": 17.66272, "grad_norm": 1.0396666526794434, "learning_rate": 2.2416966786714687e-05, "loss": 0.49, "step": 13799 }, { "epoch": 17.664, "grad_norm": 1.0443949699401855, "learning_rate": 2.241496598639456e-05, "loss": 0.4863, "step": 13800 }, { "epoch": 17.66528, "grad_norm": 1.02041757106781, "learning_rate": 2.241296518607443e-05, "loss": 0.5075, "step": 13801 }, { "epoch": 17.66656, "grad_norm": 1.0015771389007568, "learning_rate": 2.2410964385754302e-05, "loss": 0.4828, "step": 13802 }, { "epoch": 17.667839999999998, "grad_norm": 1.0166736841201782, "learning_rate": 2.2408963585434174e-05, "loss": 0.4961, "step": 13803 }, { "epoch": 17.66912, "grad_norm": 1.0742013454437256, "learning_rate": 2.2406962785114046e-05, "loss": 0.5438, "step": 13804 }, { "epoch": 17.6704, "grad_norm": 1.0339466333389282, "learning_rate": 2.2404961984793918e-05, "loss": 0.5245, "step": 13805 }, { "epoch": 17.67168, "grad_norm": 1.0903747081756592, "learning_rate": 2.2402961184473793e-05, "loss": 0.5367, "step": 13806 }, { "epoch": 17.67296, "grad_norm": 1.049980878829956, "learning_rate": 2.2400960384153662e-05, "loss": 0.5121, "step": 13807 }, { "epoch": 17.67424, "grad_norm": 1.027825117111206, "learning_rate": 2.2398959583833534e-05, "loss": 0.4781, "step": 13808 }, { "epoch": 17.67552, "grad_norm": 1.0278798341751099, "learning_rate": 2.2396958783513405e-05, "loss": 0.4927, "step": 13809 }, { "epoch": 17.6768, "grad_norm": 1.1051456928253174, "learning_rate": 2.239495798319328e-05, "loss": 0.5276, "step": 13810 }, { "epoch": 17.67808, "grad_norm": 1.029605507850647, "learning_rate": 2.239295718287315e-05, "loss": 0.4806, "step": 13811 }, { "epoch": 17.67936, "grad_norm": 1.0395790338516235, "learning_rate": 2.239095638255302e-05, "loss": 0.4941, "step": 13812 }, { "epoch": 17.68064, "grad_norm": 1.017499566078186, "learning_rate": 2.2388955582232896e-05, "loss": 0.5287, "step": 13813 }, { "epoch": 17.68192, "grad_norm": 1.0549243688583374, "learning_rate": 2.2386954781912768e-05, "loss": 0.5473, "step": 13814 }, { "epoch": 17.6832, "grad_norm": 0.9981441497802734, "learning_rate": 2.2384953981592637e-05, "loss": 0.5042, "step": 13815 }, { "epoch": 17.68448, "grad_norm": 1.0279474258422852, "learning_rate": 2.238295318127251e-05, "loss": 0.5113, "step": 13816 }, { "epoch": 17.68576, "grad_norm": 1.023318886756897, "learning_rate": 2.2380952380952384e-05, "loss": 0.5169, "step": 13817 }, { "epoch": 17.68704, "grad_norm": 1.0410925149917603, "learning_rate": 2.2378951580632256e-05, "loss": 0.5097, "step": 13818 }, { "epoch": 17.68832, "grad_norm": 1.0423842668533325, "learning_rate": 2.2376950780312124e-05, "loss": 0.5164, "step": 13819 }, { "epoch": 17.6896, "grad_norm": 1.0338177680969238, "learning_rate": 2.2374949979991996e-05, "loss": 0.5043, "step": 13820 }, { "epoch": 17.69088, "grad_norm": 1.0916544198989868, "learning_rate": 2.237294917967187e-05, "loss": 0.5311, "step": 13821 }, { "epoch": 17.69216, "grad_norm": 1.0973963737487793, "learning_rate": 2.2370948379351743e-05, "loss": 0.5184, "step": 13822 }, { "epoch": 17.69344, "grad_norm": 1.0347501039505005, "learning_rate": 2.236894757903161e-05, "loss": 0.4996, "step": 13823 }, { "epoch": 17.69472, "grad_norm": 0.9996992349624634, "learning_rate": 2.2366946778711487e-05, "loss": 0.4913, "step": 13824 }, { "epoch": 17.696, "grad_norm": 1.081697702407837, "learning_rate": 2.236494597839136e-05, "loss": 0.5354, "step": 13825 }, { "epoch": 17.69728, "grad_norm": 1.0013635158538818, "learning_rate": 2.236294517807123e-05, "loss": 0.5452, "step": 13826 }, { "epoch": 17.69856, "grad_norm": 1.0059187412261963, "learning_rate": 2.23609443777511e-05, "loss": 0.5179, "step": 13827 }, { "epoch": 17.699840000000002, "grad_norm": 1.0418435335159302, "learning_rate": 2.2358943577430974e-05, "loss": 0.4985, "step": 13828 }, { "epoch": 17.70112, "grad_norm": 1.1040401458740234, "learning_rate": 2.2356942777110846e-05, "loss": 0.5672, "step": 13829 }, { "epoch": 17.7024, "grad_norm": 1.0777002573013306, "learning_rate": 2.2354941976790718e-05, "loss": 0.5346, "step": 13830 }, { "epoch": 17.70368, "grad_norm": 1.057003378868103, "learning_rate": 2.235294117647059e-05, "loss": 0.529, "step": 13831 }, { "epoch": 17.70496, "grad_norm": 1.0604835748672485, "learning_rate": 2.235094037615046e-05, "loss": 0.504, "step": 13832 }, { "epoch": 17.70624, "grad_norm": 1.0369086265563965, "learning_rate": 2.2348939575830333e-05, "loss": 0.512, "step": 13833 }, { "epoch": 17.70752, "grad_norm": 1.0409032106399536, "learning_rate": 2.2346938775510205e-05, "loss": 0.4881, "step": 13834 }, { "epoch": 17.7088, "grad_norm": 1.0097156763076782, "learning_rate": 2.2344937975190077e-05, "loss": 0.5367, "step": 13835 }, { "epoch": 17.71008, "grad_norm": 0.9942651391029358, "learning_rate": 2.234293717486995e-05, "loss": 0.4977, "step": 13836 }, { "epoch": 17.71136, "grad_norm": 1.0652927160263062, "learning_rate": 2.234093637454982e-05, "loss": 0.5629, "step": 13837 }, { "epoch": 17.71264, "grad_norm": 1.0204808712005615, "learning_rate": 2.2338935574229693e-05, "loss": 0.4821, "step": 13838 }, { "epoch": 17.71392, "grad_norm": 1.051592230796814, "learning_rate": 2.2336934773909565e-05, "loss": 0.4978, "step": 13839 }, { "epoch": 17.7152, "grad_norm": 1.0325385332107544, "learning_rate": 2.2334933973589436e-05, "loss": 0.5587, "step": 13840 }, { "epoch": 17.71648, "grad_norm": 0.9920241236686707, "learning_rate": 2.2332933173269308e-05, "loss": 0.5126, "step": 13841 }, { "epoch": 17.71776, "grad_norm": 0.9792672395706177, "learning_rate": 2.233093237294918e-05, "loss": 0.4736, "step": 13842 }, { "epoch": 17.71904, "grad_norm": 1.0789382457733154, "learning_rate": 2.2328931572629052e-05, "loss": 0.5335, "step": 13843 }, { "epoch": 17.72032, "grad_norm": 1.0387771129608154, "learning_rate": 2.2326930772308924e-05, "loss": 0.5135, "step": 13844 }, { "epoch": 17.7216, "grad_norm": 1.0767048597335815, "learning_rate": 2.23249299719888e-05, "loss": 0.5385, "step": 13845 }, { "epoch": 17.72288, "grad_norm": 1.025465965270996, "learning_rate": 2.2322929171668668e-05, "loss": 0.4867, "step": 13846 }, { "epoch": 17.72416, "grad_norm": 1.0004117488861084, "learning_rate": 2.232092837134854e-05, "loss": 0.5034, "step": 13847 }, { "epoch": 17.72544, "grad_norm": 1.0588462352752686, "learning_rate": 2.231892757102841e-05, "loss": 0.5126, "step": 13848 }, { "epoch": 17.72672, "grad_norm": 1.0342752933502197, "learning_rate": 2.2316926770708286e-05, "loss": 0.5027, "step": 13849 }, { "epoch": 17.728, "grad_norm": 1.0341858863830566, "learning_rate": 2.2314925970388155e-05, "loss": 0.512, "step": 13850 }, { "epoch": 17.72928, "grad_norm": 0.9943906664848328, "learning_rate": 2.2312925170068027e-05, "loss": 0.4869, "step": 13851 }, { "epoch": 17.73056, "grad_norm": 1.0998703241348267, "learning_rate": 2.2310924369747902e-05, "loss": 0.5629, "step": 13852 }, { "epoch": 17.73184, "grad_norm": 1.0420924425125122, "learning_rate": 2.2308923569427774e-05, "loss": 0.505, "step": 13853 }, { "epoch": 17.73312, "grad_norm": 1.0193160772323608, "learning_rate": 2.2306922769107642e-05, "loss": 0.5082, "step": 13854 }, { "epoch": 17.7344, "grad_norm": 1.0729001760482788, "learning_rate": 2.2304921968787514e-05, "loss": 0.4944, "step": 13855 }, { "epoch": 17.73568, "grad_norm": 0.9842273592948914, "learning_rate": 2.230292116846739e-05, "loss": 0.4526, "step": 13856 }, { "epoch": 17.73696, "grad_norm": 1.1139209270477295, "learning_rate": 2.230092036814726e-05, "loss": 0.5272, "step": 13857 }, { "epoch": 17.73824, "grad_norm": 1.0628962516784668, "learning_rate": 2.229891956782713e-05, "loss": 0.5155, "step": 13858 }, { "epoch": 17.73952, "grad_norm": 1.0056374073028564, "learning_rate": 2.2296918767507005e-05, "loss": 0.4806, "step": 13859 }, { "epoch": 17.7408, "grad_norm": 1.0062882900238037, "learning_rate": 2.2294917967186877e-05, "loss": 0.5315, "step": 13860 }, { "epoch": 17.74208, "grad_norm": 1.046491265296936, "learning_rate": 2.229291716686675e-05, "loss": 0.5553, "step": 13861 }, { "epoch": 17.74336, "grad_norm": 1.0050787925720215, "learning_rate": 2.2290916366546617e-05, "loss": 0.5059, "step": 13862 }, { "epoch": 17.74464, "grad_norm": 1.0405722856521606, "learning_rate": 2.2288915566226492e-05, "loss": 0.5053, "step": 13863 }, { "epoch": 17.74592, "grad_norm": 1.0229580402374268, "learning_rate": 2.2286914765906364e-05, "loss": 0.5597, "step": 13864 }, { "epoch": 17.7472, "grad_norm": 1.025348424911499, "learning_rate": 2.2284913965586236e-05, "loss": 0.5118, "step": 13865 }, { "epoch": 17.74848, "grad_norm": 1.0150192975997925, "learning_rate": 2.2282913165266108e-05, "loss": 0.4987, "step": 13866 }, { "epoch": 17.74976, "grad_norm": 1.0555064678192139, "learning_rate": 2.228091236494598e-05, "loss": 0.5402, "step": 13867 }, { "epoch": 17.75104, "grad_norm": 1.0361968278884888, "learning_rate": 2.2278911564625852e-05, "loss": 0.5342, "step": 13868 }, { "epoch": 17.75232, "grad_norm": 1.0574556589126587, "learning_rate": 2.2276910764305724e-05, "loss": 0.5591, "step": 13869 }, { "epoch": 17.7536, "grad_norm": 1.021784782409668, "learning_rate": 2.2274909963985595e-05, "loss": 0.538, "step": 13870 }, { "epoch": 17.75488, "grad_norm": 0.9862391352653503, "learning_rate": 2.2272909163665467e-05, "loss": 0.4779, "step": 13871 }, { "epoch": 17.75616, "grad_norm": 1.0507396459579468, "learning_rate": 2.227090836334534e-05, "loss": 0.4927, "step": 13872 }, { "epoch": 17.75744, "grad_norm": 0.9640161991119385, "learning_rate": 2.226890756302521e-05, "loss": 0.4648, "step": 13873 }, { "epoch": 17.75872, "grad_norm": 1.0764763355255127, "learning_rate": 2.2266906762705083e-05, "loss": 0.5556, "step": 13874 }, { "epoch": 17.76, "grad_norm": 1.0238678455352783, "learning_rate": 2.2264905962384955e-05, "loss": 0.5237, "step": 13875 }, { "epoch": 17.76128, "grad_norm": 1.0710828304290771, "learning_rate": 2.2262905162064827e-05, "loss": 0.579, "step": 13876 }, { "epoch": 17.76256, "grad_norm": 1.02402663230896, "learning_rate": 2.22609043617447e-05, "loss": 0.5118, "step": 13877 }, { "epoch": 17.76384, "grad_norm": 1.037920355796814, "learning_rate": 2.225890356142457e-05, "loss": 0.5287, "step": 13878 }, { "epoch": 17.76512, "grad_norm": 0.9906246066093445, "learning_rate": 2.2256902761104442e-05, "loss": 0.5074, "step": 13879 }, { "epoch": 17.7664, "grad_norm": 1.0306636095046997, "learning_rate": 2.2254901960784314e-05, "loss": 0.4908, "step": 13880 }, { "epoch": 17.76768, "grad_norm": 1.0256168842315674, "learning_rate": 2.2252901160464186e-05, "loss": 0.4749, "step": 13881 }, { "epoch": 17.76896, "grad_norm": 1.0150201320648193, "learning_rate": 2.2250900360144058e-05, "loss": 0.5489, "step": 13882 }, { "epoch": 17.77024, "grad_norm": 1.1066970825195312, "learning_rate": 2.224889955982393e-05, "loss": 0.583, "step": 13883 }, { "epoch": 17.77152, "grad_norm": 0.9935269355773926, "learning_rate": 2.2246898759503805e-05, "loss": 0.471, "step": 13884 }, { "epoch": 17.7728, "grad_norm": 1.0902072191238403, "learning_rate": 2.2244897959183673e-05, "loss": 0.5516, "step": 13885 }, { "epoch": 17.77408, "grad_norm": 1.0285429954528809, "learning_rate": 2.2242897158863545e-05, "loss": 0.5101, "step": 13886 }, { "epoch": 17.77536, "grad_norm": 0.9977599382400513, "learning_rate": 2.2240896358543417e-05, "loss": 0.5045, "step": 13887 }, { "epoch": 17.77664, "grad_norm": 1.0018473863601685, "learning_rate": 2.2238895558223292e-05, "loss": 0.5293, "step": 13888 }, { "epoch": 17.77792, "grad_norm": 1.027341604232788, "learning_rate": 2.223689475790316e-05, "loss": 0.5137, "step": 13889 }, { "epoch": 17.7792, "grad_norm": 1.0242568254470825, "learning_rate": 2.2234893957583033e-05, "loss": 0.5404, "step": 13890 }, { "epoch": 17.78048, "grad_norm": 0.9905862212181091, "learning_rate": 2.2232893157262908e-05, "loss": 0.5376, "step": 13891 }, { "epoch": 17.78176, "grad_norm": 0.919065535068512, "learning_rate": 2.223089235694278e-05, "loss": 0.4514, "step": 13892 }, { "epoch": 17.78304, "grad_norm": 0.9959515333175659, "learning_rate": 2.2228891556622648e-05, "loss": 0.4923, "step": 13893 }, { "epoch": 17.78432, "grad_norm": 1.0490683317184448, "learning_rate": 2.222689075630252e-05, "loss": 0.4993, "step": 13894 }, { "epoch": 17.7856, "grad_norm": 1.04204261302948, "learning_rate": 2.2224889955982395e-05, "loss": 0.523, "step": 13895 }, { "epoch": 17.78688, "grad_norm": 1.001023530960083, "learning_rate": 2.2222889155662267e-05, "loss": 0.524, "step": 13896 }, { "epoch": 17.78816, "grad_norm": 1.0491348505020142, "learning_rate": 2.2220888355342136e-05, "loss": 0.4963, "step": 13897 }, { "epoch": 17.78944, "grad_norm": 1.046494960784912, "learning_rate": 2.221888755502201e-05, "loss": 0.5521, "step": 13898 }, { "epoch": 17.79072, "grad_norm": 1.0331013202667236, "learning_rate": 2.2216886754701883e-05, "loss": 0.5306, "step": 13899 }, { "epoch": 17.792, "grad_norm": 0.9884205460548401, "learning_rate": 2.2214885954381755e-05, "loss": 0.469, "step": 13900 }, { "epoch": 17.79328, "grad_norm": 1.1054868698120117, "learning_rate": 2.2212885154061623e-05, "loss": 0.4882, "step": 13901 }, { "epoch": 17.79456, "grad_norm": 1.0739738941192627, "learning_rate": 2.2210884353741498e-05, "loss": 0.5627, "step": 13902 }, { "epoch": 17.79584, "grad_norm": 0.9757269024848938, "learning_rate": 2.220888355342137e-05, "loss": 0.474, "step": 13903 }, { "epoch": 17.79712, "grad_norm": 0.9898340702056885, "learning_rate": 2.2206882753101242e-05, "loss": 0.4801, "step": 13904 }, { "epoch": 17.7984, "grad_norm": 0.9995481967926025, "learning_rate": 2.2204881952781114e-05, "loss": 0.468, "step": 13905 }, { "epoch": 17.79968, "grad_norm": 1.0464537143707275, "learning_rate": 2.2202881152460986e-05, "loss": 0.4884, "step": 13906 }, { "epoch": 17.80096, "grad_norm": 1.0073833465576172, "learning_rate": 2.2200880352140858e-05, "loss": 0.5333, "step": 13907 }, { "epoch": 17.80224, "grad_norm": 1.025449514389038, "learning_rate": 2.219887955182073e-05, "loss": 0.5314, "step": 13908 }, { "epoch": 17.80352, "grad_norm": 0.9930814504623413, "learning_rate": 2.21968787515006e-05, "loss": 0.5318, "step": 13909 }, { "epoch": 17.8048, "grad_norm": 0.9848999977111816, "learning_rate": 2.2194877951180473e-05, "loss": 0.4798, "step": 13910 }, { "epoch": 17.80608, "grad_norm": 1.0277302265167236, "learning_rate": 2.2192877150860345e-05, "loss": 0.4967, "step": 13911 }, { "epoch": 17.80736, "grad_norm": 1.0796613693237305, "learning_rate": 2.2190876350540217e-05, "loss": 0.5304, "step": 13912 }, { "epoch": 17.80864, "grad_norm": 1.055225133895874, "learning_rate": 2.218887555022009e-05, "loss": 0.5225, "step": 13913 }, { "epoch": 17.809919999999998, "grad_norm": 1.039272665977478, "learning_rate": 2.218687474989996e-05, "loss": 0.4892, "step": 13914 }, { "epoch": 17.8112, "grad_norm": 1.0163494348526, "learning_rate": 2.2184873949579832e-05, "loss": 0.4807, "step": 13915 }, { "epoch": 17.81248, "grad_norm": 1.0309205055236816, "learning_rate": 2.2182873149259704e-05, "loss": 0.534, "step": 13916 }, { "epoch": 17.81376, "grad_norm": 0.9438468813896179, "learning_rate": 2.2180872348939576e-05, "loss": 0.4871, "step": 13917 }, { "epoch": 17.81504, "grad_norm": 1.0279080867767334, "learning_rate": 2.2178871548619448e-05, "loss": 0.474, "step": 13918 }, { "epoch": 17.81632, "grad_norm": 1.0761826038360596, "learning_rate": 2.2176870748299323e-05, "loss": 0.53, "step": 13919 }, { "epoch": 17.8176, "grad_norm": 1.025966763496399, "learning_rate": 2.217486994797919e-05, "loss": 0.505, "step": 13920 }, { "epoch": 17.81888, "grad_norm": 1.0639162063598633, "learning_rate": 2.2172869147659064e-05, "loss": 0.5237, "step": 13921 }, { "epoch": 17.82016, "grad_norm": 1.0175074338912964, "learning_rate": 2.2170868347338935e-05, "loss": 0.5159, "step": 13922 }, { "epoch": 17.82144, "grad_norm": 1.073966145515442, "learning_rate": 2.216886754701881e-05, "loss": 0.5687, "step": 13923 }, { "epoch": 17.82272, "grad_norm": 1.0590076446533203, "learning_rate": 2.216686674669868e-05, "loss": 0.4935, "step": 13924 }, { "epoch": 17.824, "grad_norm": 0.9631067514419556, "learning_rate": 2.216486594637855e-05, "loss": 0.4657, "step": 13925 }, { "epoch": 17.82528, "grad_norm": 1.0229510068893433, "learning_rate": 2.2162865146058426e-05, "loss": 0.4885, "step": 13926 }, { "epoch": 17.82656, "grad_norm": 1.0246883630752563, "learning_rate": 2.2160864345738298e-05, "loss": 0.5242, "step": 13927 }, { "epoch": 17.82784, "grad_norm": 1.0278314352035522, "learning_rate": 2.2158863545418167e-05, "loss": 0.5112, "step": 13928 }, { "epoch": 17.82912, "grad_norm": 1.0408381223678589, "learning_rate": 2.215686274509804e-05, "loss": 0.5122, "step": 13929 }, { "epoch": 17.8304, "grad_norm": 0.9866704940795898, "learning_rate": 2.2154861944777914e-05, "loss": 0.5143, "step": 13930 }, { "epoch": 17.83168, "grad_norm": 1.005631446838379, "learning_rate": 2.2152861144457786e-05, "loss": 0.4981, "step": 13931 }, { "epoch": 17.83296, "grad_norm": 0.9910889267921448, "learning_rate": 2.2150860344137654e-05, "loss": 0.4984, "step": 13932 }, { "epoch": 17.83424, "grad_norm": 1.0972979068756104, "learning_rate": 2.2148859543817526e-05, "loss": 0.5075, "step": 13933 }, { "epoch": 17.83552, "grad_norm": 1.0541285276412964, "learning_rate": 2.21468587434974e-05, "loss": 0.484, "step": 13934 }, { "epoch": 17.8368, "grad_norm": 1.0120394229888916, "learning_rate": 2.2144857943177273e-05, "loss": 0.4929, "step": 13935 }, { "epoch": 17.83808, "grad_norm": 1.0291119813919067, "learning_rate": 2.214285714285714e-05, "loss": 0.5132, "step": 13936 }, { "epoch": 17.83936, "grad_norm": 1.034484624862671, "learning_rate": 2.2140856342537017e-05, "loss": 0.4899, "step": 13937 }, { "epoch": 17.84064, "grad_norm": 1.0646271705627441, "learning_rate": 2.213885554221689e-05, "loss": 0.4747, "step": 13938 }, { "epoch": 17.841920000000002, "grad_norm": 1.0118857622146606, "learning_rate": 2.213685474189676e-05, "loss": 0.4824, "step": 13939 }, { "epoch": 17.8432, "grad_norm": 1.0535541772842407, "learning_rate": 2.213485394157663e-05, "loss": 0.4842, "step": 13940 }, { "epoch": 17.84448, "grad_norm": 1.028926968574524, "learning_rate": 2.2132853141256504e-05, "loss": 0.5243, "step": 13941 }, { "epoch": 17.84576, "grad_norm": 1.0521934032440186, "learning_rate": 2.2130852340936376e-05, "loss": 0.5343, "step": 13942 }, { "epoch": 17.84704, "grad_norm": 1.0421346426010132, "learning_rate": 2.2128851540616248e-05, "loss": 0.4903, "step": 13943 }, { "epoch": 17.84832, "grad_norm": 1.0207431316375732, "learning_rate": 2.212685074029612e-05, "loss": 0.509, "step": 13944 }, { "epoch": 17.8496, "grad_norm": 1.0445460081100464, "learning_rate": 2.212484993997599e-05, "loss": 0.5096, "step": 13945 }, { "epoch": 17.85088, "grad_norm": 1.0498018264770508, "learning_rate": 2.2122849139655863e-05, "loss": 0.4767, "step": 13946 }, { "epoch": 17.85216, "grad_norm": 1.0488752126693726, "learning_rate": 2.2120848339335735e-05, "loss": 0.5048, "step": 13947 }, { "epoch": 17.85344, "grad_norm": 0.981802225112915, "learning_rate": 2.2118847539015607e-05, "loss": 0.5113, "step": 13948 }, { "epoch": 17.85472, "grad_norm": 0.988862156867981, "learning_rate": 2.211684673869548e-05, "loss": 0.4887, "step": 13949 }, { "epoch": 17.856, "grad_norm": 1.0477064847946167, "learning_rate": 2.211484593837535e-05, "loss": 0.5259, "step": 13950 }, { "epoch": 17.85728, "grad_norm": 1.0353902578353882, "learning_rate": 2.2112845138055223e-05, "loss": 0.5022, "step": 13951 }, { "epoch": 17.85856, "grad_norm": 1.0380783081054688, "learning_rate": 2.2110844337735094e-05, "loss": 0.5197, "step": 13952 }, { "epoch": 17.85984, "grad_norm": 0.9977851510047913, "learning_rate": 2.2108843537414966e-05, "loss": 0.4844, "step": 13953 }, { "epoch": 17.86112, "grad_norm": 1.031292200088501, "learning_rate": 2.2106842737094838e-05, "loss": 0.5341, "step": 13954 }, { "epoch": 17.8624, "grad_norm": 1.0284069776535034, "learning_rate": 2.210484193677471e-05, "loss": 0.4888, "step": 13955 }, { "epoch": 17.86368, "grad_norm": 1.0368765592575073, "learning_rate": 2.2102841136454582e-05, "loss": 0.5599, "step": 13956 }, { "epoch": 17.86496, "grad_norm": 0.9683583378791809, "learning_rate": 2.2100840336134454e-05, "loss": 0.5076, "step": 13957 }, { "epoch": 17.86624, "grad_norm": 1.0280176401138306, "learning_rate": 2.209883953581433e-05, "loss": 0.5281, "step": 13958 }, { "epoch": 17.86752, "grad_norm": 1.078447937965393, "learning_rate": 2.2096838735494197e-05, "loss": 0.5285, "step": 13959 }, { "epoch": 17.8688, "grad_norm": 1.0445505380630493, "learning_rate": 2.209483793517407e-05, "loss": 0.5509, "step": 13960 }, { "epoch": 17.87008, "grad_norm": 1.0536412000656128, "learning_rate": 2.209283713485394e-05, "loss": 0.5003, "step": 13961 }, { "epoch": 17.87136, "grad_norm": 0.9787836074829102, "learning_rate": 2.2090836334533816e-05, "loss": 0.4461, "step": 13962 }, { "epoch": 17.87264, "grad_norm": 1.0095018148422241, "learning_rate": 2.2088835534213685e-05, "loss": 0.4955, "step": 13963 }, { "epoch": 17.87392, "grad_norm": 1.0202372074127197, "learning_rate": 2.2086834733893557e-05, "loss": 0.5101, "step": 13964 }, { "epoch": 17.8752, "grad_norm": 1.010644555091858, "learning_rate": 2.2084833933573432e-05, "loss": 0.508, "step": 13965 }, { "epoch": 17.87648, "grad_norm": 1.0773608684539795, "learning_rate": 2.2082833133253304e-05, "loss": 0.4861, "step": 13966 }, { "epoch": 17.87776, "grad_norm": 1.0331664085388184, "learning_rate": 2.2080832332933172e-05, "loss": 0.5204, "step": 13967 }, { "epoch": 17.87904, "grad_norm": 1.081826090812683, "learning_rate": 2.2078831532613044e-05, "loss": 0.5497, "step": 13968 }, { "epoch": 17.88032, "grad_norm": 1.0755976438522339, "learning_rate": 2.207683073229292e-05, "loss": 0.5408, "step": 13969 }, { "epoch": 17.8816, "grad_norm": 1.0068060159683228, "learning_rate": 2.207482993197279e-05, "loss": 0.5003, "step": 13970 }, { "epoch": 17.88288, "grad_norm": 0.9964351058006287, "learning_rate": 2.207282913165266e-05, "loss": 0.4895, "step": 13971 }, { "epoch": 17.88416, "grad_norm": 1.0196988582611084, "learning_rate": 2.2070828331332535e-05, "loss": 0.5039, "step": 13972 }, { "epoch": 17.88544, "grad_norm": 1.0305639505386353, "learning_rate": 2.2068827531012407e-05, "loss": 0.5213, "step": 13973 }, { "epoch": 17.88672, "grad_norm": 1.0133947134017944, "learning_rate": 2.206682673069228e-05, "loss": 0.5076, "step": 13974 }, { "epoch": 17.888, "grad_norm": 1.0059137344360352, "learning_rate": 2.2064825930372147e-05, "loss": 0.5173, "step": 13975 }, { "epoch": 17.88928, "grad_norm": 1.0029923915863037, "learning_rate": 2.2062825130052022e-05, "loss": 0.5354, "step": 13976 }, { "epoch": 17.89056, "grad_norm": 0.9647006392478943, "learning_rate": 2.2060824329731894e-05, "loss": 0.5142, "step": 13977 }, { "epoch": 17.89184, "grad_norm": 1.0146187543869019, "learning_rate": 2.2058823529411766e-05, "loss": 0.5561, "step": 13978 }, { "epoch": 17.89312, "grad_norm": 1.0384979248046875, "learning_rate": 2.2056822729091638e-05, "loss": 0.5139, "step": 13979 }, { "epoch": 17.8944, "grad_norm": 0.9908726215362549, "learning_rate": 2.205482192877151e-05, "loss": 0.5435, "step": 13980 }, { "epoch": 17.89568, "grad_norm": 1.0110816955566406, "learning_rate": 2.2052821128451382e-05, "loss": 0.5114, "step": 13981 }, { "epoch": 17.89696, "grad_norm": 1.0027389526367188, "learning_rate": 2.2050820328131254e-05, "loss": 0.5125, "step": 13982 }, { "epoch": 17.89824, "grad_norm": 0.9981145262718201, "learning_rate": 2.2048819527811125e-05, "loss": 0.5052, "step": 13983 }, { "epoch": 17.89952, "grad_norm": 1.0781457424163818, "learning_rate": 2.2046818727490997e-05, "loss": 0.5885, "step": 13984 }, { "epoch": 17.9008, "grad_norm": 1.0383636951446533, "learning_rate": 2.204481792717087e-05, "loss": 0.5163, "step": 13985 }, { "epoch": 17.90208, "grad_norm": 1.020270824432373, "learning_rate": 2.2042817126850744e-05, "loss": 0.5207, "step": 13986 }, { "epoch": 17.90336, "grad_norm": 1.0009729862213135, "learning_rate": 2.2040816326530613e-05, "loss": 0.6061, "step": 13987 }, { "epoch": 17.90464, "grad_norm": 1.0783268213272095, "learning_rate": 2.2038815526210485e-05, "loss": 0.521, "step": 13988 }, { "epoch": 17.90592, "grad_norm": 1.0087323188781738, "learning_rate": 2.2036814725890357e-05, "loss": 0.4846, "step": 13989 }, { "epoch": 17.9072, "grad_norm": 1.005253553390503, "learning_rate": 2.2034813925570232e-05, "loss": 0.5024, "step": 13990 }, { "epoch": 17.90848, "grad_norm": 1.0598344802856445, "learning_rate": 2.20328131252501e-05, "loss": 0.5176, "step": 13991 }, { "epoch": 17.90976, "grad_norm": 1.010216474533081, "learning_rate": 2.2030812324929972e-05, "loss": 0.4949, "step": 13992 }, { "epoch": 17.91104, "grad_norm": 1.0626128911972046, "learning_rate": 2.2028811524609844e-05, "loss": 0.5698, "step": 13993 }, { "epoch": 17.91232, "grad_norm": 1.0089972019195557, "learning_rate": 2.202681072428972e-05, "loss": 0.4999, "step": 13994 }, { "epoch": 17.9136, "grad_norm": 1.0127496719360352, "learning_rate": 2.2024809923969588e-05, "loss": 0.5169, "step": 13995 }, { "epoch": 17.91488, "grad_norm": 1.058738350868225, "learning_rate": 2.202280912364946e-05, "loss": 0.4963, "step": 13996 }, { "epoch": 17.91616, "grad_norm": 1.0318009853363037, "learning_rate": 2.2020808323329335e-05, "loss": 0.517, "step": 13997 }, { "epoch": 17.91744, "grad_norm": 1.0412254333496094, "learning_rate": 2.2018807523009207e-05, "loss": 0.5259, "step": 13998 }, { "epoch": 17.91872, "grad_norm": 1.061491847038269, "learning_rate": 2.2016806722689075e-05, "loss": 0.5451, "step": 13999 }, { "epoch": 17.92, "grad_norm": 1.0782477855682373, "learning_rate": 2.2014805922368947e-05, "loss": 0.5462, "step": 14000 }, { "epoch": 17.92128, "grad_norm": 1.0478885173797607, "learning_rate": 2.2012805122048822e-05, "loss": 0.5489, "step": 14001 }, { "epoch": 17.92256, "grad_norm": 1.0254915952682495, "learning_rate": 2.2010804321728694e-05, "loss": 0.5149, "step": 14002 }, { "epoch": 17.92384, "grad_norm": 1.0770113468170166, "learning_rate": 2.2008803521408563e-05, "loss": 0.5494, "step": 14003 }, { "epoch": 17.92512, "grad_norm": 1.0335642099380493, "learning_rate": 2.2006802721088438e-05, "loss": 0.4951, "step": 14004 }, { "epoch": 17.9264, "grad_norm": 1.0393280982971191, "learning_rate": 2.200480192076831e-05, "loss": 0.5505, "step": 14005 }, { "epoch": 17.92768, "grad_norm": 0.9929527044296265, "learning_rate": 2.200280112044818e-05, "loss": 0.4796, "step": 14006 }, { "epoch": 17.92896, "grad_norm": 0.9717233777046204, "learning_rate": 2.200080032012805e-05, "loss": 0.4864, "step": 14007 }, { "epoch": 17.93024, "grad_norm": 1.0142488479614258, "learning_rate": 2.1998799519807925e-05, "loss": 0.5637, "step": 14008 }, { "epoch": 17.93152, "grad_norm": 1.0879509449005127, "learning_rate": 2.1996798719487797e-05, "loss": 0.5425, "step": 14009 }, { "epoch": 17.9328, "grad_norm": 1.0561232566833496, "learning_rate": 2.199479791916767e-05, "loss": 0.4915, "step": 14010 }, { "epoch": 17.93408, "grad_norm": 1.0183889865875244, "learning_rate": 2.199279711884754e-05, "loss": 0.4996, "step": 14011 }, { "epoch": 17.93536, "grad_norm": 0.9836266040802002, "learning_rate": 2.1990796318527413e-05, "loss": 0.4174, "step": 14012 }, { "epoch": 17.93664, "grad_norm": 1.0205684900283813, "learning_rate": 2.1988795518207285e-05, "loss": 0.5009, "step": 14013 }, { "epoch": 17.93792, "grad_norm": 1.0839546918869019, "learning_rate": 2.1986794717887156e-05, "loss": 0.5532, "step": 14014 }, { "epoch": 17.9392, "grad_norm": 0.9612104296684265, "learning_rate": 2.1984793917567028e-05, "loss": 0.4711, "step": 14015 }, { "epoch": 17.94048, "grad_norm": 1.0516858100891113, "learning_rate": 2.19827931172469e-05, "loss": 0.5215, "step": 14016 }, { "epoch": 17.94176, "grad_norm": 1.1283824443817139, "learning_rate": 2.1980792316926772e-05, "loss": 0.6009, "step": 14017 }, { "epoch": 17.94304, "grad_norm": 0.9951545000076294, "learning_rate": 2.1978791516606644e-05, "loss": 0.4589, "step": 14018 }, { "epoch": 17.94432, "grad_norm": 0.9906835556030273, "learning_rate": 2.1976790716286516e-05, "loss": 0.5127, "step": 14019 }, { "epoch": 17.9456, "grad_norm": 1.0642714500427246, "learning_rate": 2.1974789915966388e-05, "loss": 0.5101, "step": 14020 }, { "epoch": 17.94688, "grad_norm": 1.098826289176941, "learning_rate": 2.197278911564626e-05, "loss": 0.5939, "step": 14021 }, { "epoch": 17.94816, "grad_norm": 1.0480844974517822, "learning_rate": 2.197078831532613e-05, "loss": 0.5587, "step": 14022 }, { "epoch": 17.94944, "grad_norm": 1.0224511623382568, "learning_rate": 2.1968787515006003e-05, "loss": 0.4883, "step": 14023 }, { "epoch": 17.95072, "grad_norm": 1.0356502532958984, "learning_rate": 2.1966786714685875e-05, "loss": 0.5223, "step": 14024 }, { "epoch": 17.951999999999998, "grad_norm": 1.0777286291122437, "learning_rate": 2.196478591436575e-05, "loss": 0.5237, "step": 14025 }, { "epoch": 17.95328, "grad_norm": 1.1121007204055786, "learning_rate": 2.196278511404562e-05, "loss": 0.5715, "step": 14026 }, { "epoch": 17.95456, "grad_norm": 1.068606972694397, "learning_rate": 2.196078431372549e-05, "loss": 0.5258, "step": 14027 }, { "epoch": 17.95584, "grad_norm": 0.943596363067627, "learning_rate": 2.1958783513405362e-05, "loss": 0.4419, "step": 14028 }, { "epoch": 17.95712, "grad_norm": 0.9647386074066162, "learning_rate": 2.1956782713085238e-05, "loss": 0.5067, "step": 14029 }, { "epoch": 17.9584, "grad_norm": 0.9991288781166077, "learning_rate": 2.1954781912765106e-05, "loss": 0.4659, "step": 14030 }, { "epoch": 17.95968, "grad_norm": 1.0615739822387695, "learning_rate": 2.1952781112444978e-05, "loss": 0.5157, "step": 14031 }, { "epoch": 17.96096, "grad_norm": 1.0784728527069092, "learning_rate": 2.1950780312124853e-05, "loss": 0.5038, "step": 14032 }, { "epoch": 17.96224, "grad_norm": 1.0348066091537476, "learning_rate": 2.1948779511804725e-05, "loss": 0.5264, "step": 14033 }, { "epoch": 17.96352, "grad_norm": 0.9429087042808533, "learning_rate": 2.1946778711484594e-05, "loss": 0.4768, "step": 14034 }, { "epoch": 17.9648, "grad_norm": 1.0070911645889282, "learning_rate": 2.1944777911164465e-05, "loss": 0.491, "step": 14035 }, { "epoch": 17.96608, "grad_norm": 1.0489453077316284, "learning_rate": 2.194277711084434e-05, "loss": 0.5086, "step": 14036 }, { "epoch": 17.96736, "grad_norm": 0.9969155788421631, "learning_rate": 2.1940776310524212e-05, "loss": 0.5171, "step": 14037 }, { "epoch": 17.96864, "grad_norm": 1.0200451612472534, "learning_rate": 2.193877551020408e-05, "loss": 0.4763, "step": 14038 }, { "epoch": 17.96992, "grad_norm": 1.1074117422103882, "learning_rate": 2.1936774709883956e-05, "loss": 0.5653, "step": 14039 }, { "epoch": 17.9712, "grad_norm": 0.9458688497543335, "learning_rate": 2.1934773909563828e-05, "loss": 0.4885, "step": 14040 }, { "epoch": 17.97248, "grad_norm": 0.9838624000549316, "learning_rate": 2.19327731092437e-05, "loss": 0.4665, "step": 14041 }, { "epoch": 17.97376, "grad_norm": 0.9943659901618958, "learning_rate": 2.193077230892357e-05, "loss": 0.5485, "step": 14042 }, { "epoch": 17.97504, "grad_norm": 1.0677356719970703, "learning_rate": 2.1928771508603444e-05, "loss": 0.5672, "step": 14043 }, { "epoch": 17.97632, "grad_norm": 0.9993770122528076, "learning_rate": 2.1926770708283315e-05, "loss": 0.5041, "step": 14044 }, { "epoch": 17.9776, "grad_norm": 1.0094391107559204, "learning_rate": 2.1924769907963187e-05, "loss": 0.5, "step": 14045 }, { "epoch": 17.97888, "grad_norm": 0.9990932941436768, "learning_rate": 2.1922769107643056e-05, "loss": 0.5015, "step": 14046 }, { "epoch": 17.98016, "grad_norm": 0.9945876002311707, "learning_rate": 2.192076830732293e-05, "loss": 0.4928, "step": 14047 }, { "epoch": 17.98144, "grad_norm": 1.1232936382293701, "learning_rate": 2.1918767507002803e-05, "loss": 0.5627, "step": 14048 }, { "epoch": 17.98272, "grad_norm": 1.0336296558380127, "learning_rate": 2.1916766706682675e-05, "loss": 0.514, "step": 14049 }, { "epoch": 17.984, "grad_norm": 1.111663818359375, "learning_rate": 2.1914765906362547e-05, "loss": 0.5868, "step": 14050 }, { "epoch": 17.98528, "grad_norm": 1.0357799530029297, "learning_rate": 2.191276510604242e-05, "loss": 0.5266, "step": 14051 }, { "epoch": 17.98656, "grad_norm": 1.0117226839065552, "learning_rate": 2.191076430572229e-05, "loss": 0.5048, "step": 14052 }, { "epoch": 17.98784, "grad_norm": 0.9796508550643921, "learning_rate": 2.1908763505402162e-05, "loss": 0.5113, "step": 14053 }, { "epoch": 17.98912, "grad_norm": 1.028061032295227, "learning_rate": 2.1906762705082034e-05, "loss": 0.4864, "step": 14054 }, { "epoch": 17.9904, "grad_norm": 1.0755497217178345, "learning_rate": 2.1904761904761906e-05, "loss": 0.5686, "step": 14055 }, { "epoch": 17.99168, "grad_norm": 1.0548936128616333, "learning_rate": 2.1902761104441778e-05, "loss": 0.5476, "step": 14056 }, { "epoch": 17.99296, "grad_norm": 1.0421359539031982, "learning_rate": 2.190076030412165e-05, "loss": 0.5111, "step": 14057 }, { "epoch": 17.99424, "grad_norm": 1.0015569925308228, "learning_rate": 2.189875950380152e-05, "loss": 0.5427, "step": 14058 }, { "epoch": 17.99552, "grad_norm": 1.0161628723144531, "learning_rate": 2.1896758703481393e-05, "loss": 0.5307, "step": 14059 }, { "epoch": 17.9968, "grad_norm": 1.0471055507659912, "learning_rate": 2.1894757903161265e-05, "loss": 0.5164, "step": 14060 }, { "epoch": 17.99808, "grad_norm": 1.1203445196151733, "learning_rate": 2.1892757102841137e-05, "loss": 0.557, "step": 14061 }, { "epoch": 17.99936, "grad_norm": 1.0305187702178955, "learning_rate": 2.189075630252101e-05, "loss": 0.499, "step": 14062 }, { "epoch": 18.00064, "grad_norm": 2.1932191848754883, "learning_rate": 2.188875550220088e-05, "loss": 0.9041, "step": 14063 }, { "epoch": 18.00192, "grad_norm": 1.0409256219863892, "learning_rate": 2.1886754701880756e-05, "loss": 0.5222, "step": 14064 }, { "epoch": 18.0032, "grad_norm": 1.000382900238037, "learning_rate": 2.1884753901560624e-05, "loss": 0.5282, "step": 14065 }, { "epoch": 18.00448, "grad_norm": 0.9593517780303955, "learning_rate": 2.1882753101240496e-05, "loss": 0.4422, "step": 14066 }, { "epoch": 18.00576, "grad_norm": 1.0473594665527344, "learning_rate": 2.1880752300920368e-05, "loss": 0.5121, "step": 14067 }, { "epoch": 18.00704, "grad_norm": 1.0217726230621338, "learning_rate": 2.1878751500600243e-05, "loss": 0.4877, "step": 14068 }, { "epoch": 18.00832, "grad_norm": 0.9588239789009094, "learning_rate": 2.1876750700280112e-05, "loss": 0.4513, "step": 14069 }, { "epoch": 18.0096, "grad_norm": 0.9952200651168823, "learning_rate": 2.1874749899959984e-05, "loss": 0.4666, "step": 14070 }, { "epoch": 18.01088, "grad_norm": 1.0490752458572388, "learning_rate": 2.187274909963986e-05, "loss": 0.5474, "step": 14071 }, { "epoch": 18.01216, "grad_norm": 1.0104339122772217, "learning_rate": 2.187074829931973e-05, "loss": 0.5175, "step": 14072 }, { "epoch": 18.01344, "grad_norm": 1.013339877128601, "learning_rate": 2.18687474989996e-05, "loss": 0.5109, "step": 14073 }, { "epoch": 18.01472, "grad_norm": 1.0241261720657349, "learning_rate": 2.186674669867947e-05, "loss": 0.5022, "step": 14074 }, { "epoch": 18.016, "grad_norm": 1.0270612239837646, "learning_rate": 2.1864745898359346e-05, "loss": 0.4703, "step": 14075 }, { "epoch": 18.01728, "grad_norm": 1.0369747877120972, "learning_rate": 2.1862745098039218e-05, "loss": 0.5075, "step": 14076 }, { "epoch": 18.01856, "grad_norm": 1.053289771080017, "learning_rate": 2.1860744297719087e-05, "loss": 0.5392, "step": 14077 }, { "epoch": 18.01984, "grad_norm": 1.0217084884643555, "learning_rate": 2.1858743497398962e-05, "loss": 0.4777, "step": 14078 }, { "epoch": 18.02112, "grad_norm": 0.9881057143211365, "learning_rate": 2.1856742697078834e-05, "loss": 0.4531, "step": 14079 }, { "epoch": 18.0224, "grad_norm": 1.0281909704208374, "learning_rate": 2.1854741896758706e-05, "loss": 0.528, "step": 14080 }, { "epoch": 18.02368, "grad_norm": 0.9666361808776855, "learning_rate": 2.1852741096438574e-05, "loss": 0.4326, "step": 14081 }, { "epoch": 18.02496, "grad_norm": 1.0549180507659912, "learning_rate": 2.185074029611845e-05, "loss": 0.5504, "step": 14082 }, { "epoch": 18.02624, "grad_norm": 0.9973421692848206, "learning_rate": 2.184873949579832e-05, "loss": 0.4681, "step": 14083 }, { "epoch": 18.02752, "grad_norm": 1.0544997453689575, "learning_rate": 2.1846738695478193e-05, "loss": 0.4742, "step": 14084 }, { "epoch": 18.0288, "grad_norm": 1.0692542791366577, "learning_rate": 2.1844737895158065e-05, "loss": 0.5059, "step": 14085 }, { "epoch": 18.03008, "grad_norm": 1.024929165840149, "learning_rate": 2.1842737094837937e-05, "loss": 0.4643, "step": 14086 }, { "epoch": 18.03136, "grad_norm": 1.0285801887512207, "learning_rate": 2.184073629451781e-05, "loss": 0.5188, "step": 14087 }, { "epoch": 18.03264, "grad_norm": 1.0161205530166626, "learning_rate": 2.183873549419768e-05, "loss": 0.485, "step": 14088 }, { "epoch": 18.03392, "grad_norm": 1.0766185522079468, "learning_rate": 2.1836734693877552e-05, "loss": 0.5295, "step": 14089 }, { "epoch": 18.0352, "grad_norm": 1.038028359413147, "learning_rate": 2.1834733893557424e-05, "loss": 0.4863, "step": 14090 }, { "epoch": 18.03648, "grad_norm": 1.0519113540649414, "learning_rate": 2.1832733093237296e-05, "loss": 0.5356, "step": 14091 }, { "epoch": 18.03776, "grad_norm": 0.9999911189079285, "learning_rate": 2.1830732292917168e-05, "loss": 0.455, "step": 14092 }, { "epoch": 18.03904, "grad_norm": 1.067815899848938, "learning_rate": 2.182873149259704e-05, "loss": 0.4824, "step": 14093 }, { "epoch": 18.04032, "grad_norm": 1.0944994688034058, "learning_rate": 2.1826730692276912e-05, "loss": 0.468, "step": 14094 }, { "epoch": 18.0416, "grad_norm": 1.105892300605774, "learning_rate": 2.1824729891956784e-05, "loss": 0.5392, "step": 14095 }, { "epoch": 18.04288, "grad_norm": 1.038599967956543, "learning_rate": 2.1822729091636655e-05, "loss": 0.5009, "step": 14096 }, { "epoch": 18.04416, "grad_norm": 1.0022287368774414, "learning_rate": 2.1820728291316527e-05, "loss": 0.473, "step": 14097 }, { "epoch": 18.04544, "grad_norm": 1.0601282119750977, "learning_rate": 2.18187274909964e-05, "loss": 0.5105, "step": 14098 }, { "epoch": 18.04672, "grad_norm": 1.0425713062286377, "learning_rate": 2.181672669067627e-05, "loss": 0.448, "step": 14099 }, { "epoch": 18.048, "grad_norm": 1.0768316984176636, "learning_rate": 2.1814725890356143e-05, "loss": 0.5035, "step": 14100 }, { "epoch": 18.04928, "grad_norm": 1.0370937585830688, "learning_rate": 2.1812725090036015e-05, "loss": 0.4883, "step": 14101 }, { "epoch": 18.05056, "grad_norm": 1.0263327360153198, "learning_rate": 2.1810724289715887e-05, "loss": 0.5022, "step": 14102 }, { "epoch": 18.05184, "grad_norm": 0.9880037307739258, "learning_rate": 2.1808723489395762e-05, "loss": 0.4505, "step": 14103 }, { "epoch": 18.05312, "grad_norm": 1.0344130992889404, "learning_rate": 2.180672268907563e-05, "loss": 0.5191, "step": 14104 }, { "epoch": 18.0544, "grad_norm": 1.0637491941452026, "learning_rate": 2.1804721888755502e-05, "loss": 0.4676, "step": 14105 }, { "epoch": 18.05568, "grad_norm": 1.0038073062896729, "learning_rate": 2.1802721088435374e-05, "loss": 0.4869, "step": 14106 }, { "epoch": 18.05696, "grad_norm": 1.0341646671295166, "learning_rate": 2.180072028811525e-05, "loss": 0.4734, "step": 14107 }, { "epoch": 18.05824, "grad_norm": 1.0689383745193481, "learning_rate": 2.1798719487795118e-05, "loss": 0.4993, "step": 14108 }, { "epoch": 18.05952, "grad_norm": 1.0641204118728638, "learning_rate": 2.179671868747499e-05, "loss": 0.5123, "step": 14109 }, { "epoch": 18.0608, "grad_norm": 1.047566533088684, "learning_rate": 2.1794717887154865e-05, "loss": 0.5061, "step": 14110 }, { "epoch": 18.06208, "grad_norm": 1.0506558418273926, "learning_rate": 2.1792717086834737e-05, "loss": 0.4856, "step": 14111 }, { "epoch": 18.06336, "grad_norm": 0.985125720500946, "learning_rate": 2.1790716286514605e-05, "loss": 0.4718, "step": 14112 }, { "epoch": 18.06464, "grad_norm": 1.012773036956787, "learning_rate": 2.1788715486194477e-05, "loss": 0.4976, "step": 14113 }, { "epoch": 18.06592, "grad_norm": 0.9665741920471191, "learning_rate": 2.1786714685874352e-05, "loss": 0.4588, "step": 14114 }, { "epoch": 18.0672, "grad_norm": 1.0558841228485107, "learning_rate": 2.1784713885554224e-05, "loss": 0.5345, "step": 14115 }, { "epoch": 18.06848, "grad_norm": 1.1132677793502808, "learning_rate": 2.1782713085234093e-05, "loss": 0.5223, "step": 14116 }, { "epoch": 18.06976, "grad_norm": 0.9984009265899658, "learning_rate": 2.1780712284913968e-05, "loss": 0.4445, "step": 14117 }, { "epoch": 18.07104, "grad_norm": 1.0193313360214233, "learning_rate": 2.177871148459384e-05, "loss": 0.4674, "step": 14118 }, { "epoch": 18.07232, "grad_norm": 1.009688138961792, "learning_rate": 2.177671068427371e-05, "loss": 0.449, "step": 14119 }, { "epoch": 18.0736, "grad_norm": 0.9733455777168274, "learning_rate": 2.177470988395358e-05, "loss": 0.4378, "step": 14120 }, { "epoch": 18.07488, "grad_norm": 1.0891609191894531, "learning_rate": 2.1772709083633455e-05, "loss": 0.4709, "step": 14121 }, { "epoch": 18.07616, "grad_norm": 1.1113479137420654, "learning_rate": 2.1770708283313327e-05, "loss": 0.5089, "step": 14122 }, { "epoch": 18.07744, "grad_norm": 1.1308077573776245, "learning_rate": 2.17687074829932e-05, "loss": 0.5187, "step": 14123 }, { "epoch": 18.07872, "grad_norm": 1.1170744895935059, "learning_rate": 2.176670668267307e-05, "loss": 0.528, "step": 14124 }, { "epoch": 18.08, "grad_norm": 1.0902800559997559, "learning_rate": 2.1764705882352943e-05, "loss": 0.5186, "step": 14125 }, { "epoch": 18.08128, "grad_norm": 1.0982624292373657, "learning_rate": 2.1762705082032815e-05, "loss": 0.5318, "step": 14126 }, { "epoch": 18.08256, "grad_norm": 1.066246747970581, "learning_rate": 2.1760704281712686e-05, "loss": 0.4535, "step": 14127 }, { "epoch": 18.08384, "grad_norm": 1.0559955835342407, "learning_rate": 2.1758703481392558e-05, "loss": 0.5188, "step": 14128 }, { "epoch": 18.08512, "grad_norm": 1.0126625299453735, "learning_rate": 2.175670268107243e-05, "loss": 0.4805, "step": 14129 }, { "epoch": 18.0864, "grad_norm": 1.0629305839538574, "learning_rate": 2.1754701880752302e-05, "loss": 0.492, "step": 14130 }, { "epoch": 18.08768, "grad_norm": 1.0466065406799316, "learning_rate": 2.1752701080432174e-05, "loss": 0.5372, "step": 14131 }, { "epoch": 18.08896, "grad_norm": 1.072338581085205, "learning_rate": 2.1750700280112046e-05, "loss": 0.5614, "step": 14132 }, { "epoch": 18.09024, "grad_norm": 1.0793755054473877, "learning_rate": 2.1748699479791918e-05, "loss": 0.5204, "step": 14133 }, { "epoch": 18.09152, "grad_norm": 1.063680648803711, "learning_rate": 2.174669867947179e-05, "loss": 0.4683, "step": 14134 }, { "epoch": 18.0928, "grad_norm": 1.070969820022583, "learning_rate": 2.174469787915166e-05, "loss": 0.515, "step": 14135 }, { "epoch": 18.09408, "grad_norm": 1.0573210716247559, "learning_rate": 2.1742697078831533e-05, "loss": 0.5124, "step": 14136 }, { "epoch": 18.09536, "grad_norm": 0.9679538607597351, "learning_rate": 2.1740696278511405e-05, "loss": 0.4349, "step": 14137 }, { "epoch": 18.09664, "grad_norm": 1.0116899013519287, "learning_rate": 2.173869547819128e-05, "loss": 0.5204, "step": 14138 }, { "epoch": 18.09792, "grad_norm": 1.0702815055847168, "learning_rate": 2.173669467787115e-05, "loss": 0.5164, "step": 14139 }, { "epoch": 18.0992, "grad_norm": 1.0475645065307617, "learning_rate": 2.173469387755102e-05, "loss": 0.5105, "step": 14140 }, { "epoch": 18.10048, "grad_norm": 1.0068633556365967, "learning_rate": 2.1732693077230892e-05, "loss": 0.4967, "step": 14141 }, { "epoch": 18.10176, "grad_norm": 1.0416619777679443, "learning_rate": 2.1730692276910768e-05, "loss": 0.4917, "step": 14142 }, { "epoch": 18.10304, "grad_norm": 0.996741533279419, "learning_rate": 2.1728691476590636e-05, "loss": 0.4628, "step": 14143 }, { "epoch": 18.10432, "grad_norm": 1.0638675689697266, "learning_rate": 2.1726690676270508e-05, "loss": 0.5081, "step": 14144 }, { "epoch": 18.1056, "grad_norm": 1.0904065370559692, "learning_rate": 2.1724689875950383e-05, "loss": 0.4946, "step": 14145 }, { "epoch": 18.10688, "grad_norm": 1.032350778579712, "learning_rate": 2.1722689075630255e-05, "loss": 0.4844, "step": 14146 }, { "epoch": 18.10816, "grad_norm": 1.0421230792999268, "learning_rate": 2.1720688275310124e-05, "loss": 0.4894, "step": 14147 }, { "epoch": 18.10944, "grad_norm": 1.130836009979248, "learning_rate": 2.1718687474989995e-05, "loss": 0.5482, "step": 14148 }, { "epoch": 18.11072, "grad_norm": 1.1370742321014404, "learning_rate": 2.171668667466987e-05, "loss": 0.5494, "step": 14149 }, { "epoch": 18.112, "grad_norm": 1.0708554983139038, "learning_rate": 2.1714685874349742e-05, "loss": 0.5181, "step": 14150 }, { "epoch": 18.11328, "grad_norm": 1.0703370571136475, "learning_rate": 2.171268507402961e-05, "loss": 0.5118, "step": 14151 }, { "epoch": 18.11456, "grad_norm": 1.0429341793060303, "learning_rate": 2.1710684273709486e-05, "loss": 0.4991, "step": 14152 }, { "epoch": 18.11584, "grad_norm": 1.0283002853393555, "learning_rate": 2.1708683473389358e-05, "loss": 0.481, "step": 14153 }, { "epoch": 18.11712, "grad_norm": 1.0293543338775635, "learning_rate": 2.170668267306923e-05, "loss": 0.5489, "step": 14154 }, { "epoch": 18.1184, "grad_norm": 1.0159963369369507, "learning_rate": 2.17046818727491e-05, "loss": 0.4763, "step": 14155 }, { "epoch": 18.11968, "grad_norm": 1.0897349119186401, "learning_rate": 2.1702681072428974e-05, "loss": 0.5192, "step": 14156 }, { "epoch": 18.12096, "grad_norm": 1.015059471130371, "learning_rate": 2.1700680272108845e-05, "loss": 0.4625, "step": 14157 }, { "epoch": 18.12224, "grad_norm": 1.0573114156723022, "learning_rate": 2.1698679471788717e-05, "loss": 0.5278, "step": 14158 }, { "epoch": 18.12352, "grad_norm": 1.0435305833816528, "learning_rate": 2.1696678671468586e-05, "loss": 0.4909, "step": 14159 }, { "epoch": 18.1248, "grad_norm": 1.0256880521774292, "learning_rate": 2.169467787114846e-05, "loss": 0.4836, "step": 14160 }, { "epoch": 18.12608, "grad_norm": 1.0937752723693848, "learning_rate": 2.1692677070828333e-05, "loss": 0.5833, "step": 14161 }, { "epoch": 18.12736, "grad_norm": 1.005741834640503, "learning_rate": 2.1690676270508205e-05, "loss": 0.4886, "step": 14162 }, { "epoch": 18.12864, "grad_norm": 1.0671494007110596, "learning_rate": 2.1688675470188077e-05, "loss": 0.4829, "step": 14163 }, { "epoch": 18.12992, "grad_norm": 1.0546166896820068, "learning_rate": 2.168667466986795e-05, "loss": 0.5001, "step": 14164 }, { "epoch": 18.1312, "grad_norm": 1.0181262493133545, "learning_rate": 2.168467386954782e-05, "loss": 0.4707, "step": 14165 }, { "epoch": 18.13248, "grad_norm": 1.008811116218567, "learning_rate": 2.1682673069227692e-05, "loss": 0.4869, "step": 14166 }, { "epoch": 18.13376, "grad_norm": 1.00252366065979, "learning_rate": 2.1680672268907564e-05, "loss": 0.4692, "step": 14167 }, { "epoch": 18.13504, "grad_norm": 1.0217723846435547, "learning_rate": 2.1678671468587436e-05, "loss": 0.5281, "step": 14168 }, { "epoch": 18.13632, "grad_norm": 1.0290299654006958, "learning_rate": 2.1676670668267308e-05, "loss": 0.4796, "step": 14169 }, { "epoch": 18.1376, "grad_norm": 1.0708261728286743, "learning_rate": 2.167466986794718e-05, "loss": 0.4746, "step": 14170 }, { "epoch": 18.13888, "grad_norm": 1.0332971811294556, "learning_rate": 2.167266906762705e-05, "loss": 0.485, "step": 14171 }, { "epoch": 18.14016, "grad_norm": 1.0188629627227783, "learning_rate": 2.1670668267306923e-05, "loss": 0.4689, "step": 14172 }, { "epoch": 18.14144, "grad_norm": 1.0185388326644897, "learning_rate": 2.1668667466986795e-05, "loss": 0.5077, "step": 14173 }, { "epoch": 18.14272, "grad_norm": 1.0127745866775513, "learning_rate": 2.1666666666666667e-05, "loss": 0.4879, "step": 14174 }, { "epoch": 18.144, "grad_norm": 1.0742979049682617, "learning_rate": 2.166466586634654e-05, "loss": 0.5216, "step": 14175 }, { "epoch": 18.14528, "grad_norm": 1.0509237051010132, "learning_rate": 2.166266506602641e-05, "loss": 0.4848, "step": 14176 }, { "epoch": 18.14656, "grad_norm": 1.0471779108047485, "learning_rate": 2.1660664265706286e-05, "loss": 0.5013, "step": 14177 }, { "epoch": 18.14784, "grad_norm": 1.0413541793823242, "learning_rate": 2.1658663465386154e-05, "loss": 0.4817, "step": 14178 }, { "epoch": 18.14912, "grad_norm": 1.007503867149353, "learning_rate": 2.1656662665066026e-05, "loss": 0.4622, "step": 14179 }, { "epoch": 18.1504, "grad_norm": 1.0402361154556274, "learning_rate": 2.1654661864745898e-05, "loss": 0.5028, "step": 14180 }, { "epoch": 18.15168, "grad_norm": 1.0889134407043457, "learning_rate": 2.1652661064425773e-05, "loss": 0.5201, "step": 14181 }, { "epoch": 18.15296, "grad_norm": 1.0945775508880615, "learning_rate": 2.1650660264105642e-05, "loss": 0.4916, "step": 14182 }, { "epoch": 18.15424, "grad_norm": 1.025062918663025, "learning_rate": 2.1648659463785514e-05, "loss": 0.5115, "step": 14183 }, { "epoch": 18.15552, "grad_norm": 1.0531753301620483, "learning_rate": 2.164665866346539e-05, "loss": 0.4737, "step": 14184 }, { "epoch": 18.1568, "grad_norm": 1.0863885879516602, "learning_rate": 2.164465786314526e-05, "loss": 0.5113, "step": 14185 }, { "epoch": 18.158079999999998, "grad_norm": 1.0436323881149292, "learning_rate": 2.164265706282513e-05, "loss": 0.5154, "step": 14186 }, { "epoch": 18.15936, "grad_norm": 1.0325742959976196, "learning_rate": 2.1640656262505e-05, "loss": 0.4873, "step": 14187 }, { "epoch": 18.16064, "grad_norm": 1.0843673944473267, "learning_rate": 2.1638655462184876e-05, "loss": 0.4948, "step": 14188 }, { "epoch": 18.16192, "grad_norm": 1.0292965173721313, "learning_rate": 2.1636654661864748e-05, "loss": 0.5001, "step": 14189 }, { "epoch": 18.1632, "grad_norm": 1.010935664176941, "learning_rate": 2.1634653861544617e-05, "loss": 0.4492, "step": 14190 }, { "epoch": 18.16448, "grad_norm": 1.0702733993530273, "learning_rate": 2.1632653061224492e-05, "loss": 0.486, "step": 14191 }, { "epoch": 18.16576, "grad_norm": 1.0180492401123047, "learning_rate": 2.1630652260904364e-05, "loss": 0.4948, "step": 14192 }, { "epoch": 18.16704, "grad_norm": 1.0777477025985718, "learning_rate": 2.1628651460584236e-05, "loss": 0.5253, "step": 14193 }, { "epoch": 18.16832, "grad_norm": 1.03486168384552, "learning_rate": 2.1626650660264104e-05, "loss": 0.4609, "step": 14194 }, { "epoch": 18.1696, "grad_norm": 1.0693778991699219, "learning_rate": 2.162464985994398e-05, "loss": 0.5002, "step": 14195 }, { "epoch": 18.17088, "grad_norm": 1.0229696035385132, "learning_rate": 2.162264905962385e-05, "loss": 0.4825, "step": 14196 }, { "epoch": 18.17216, "grad_norm": 1.0002144575119019, "learning_rate": 2.1620648259303723e-05, "loss": 0.4239, "step": 14197 }, { "epoch": 18.17344, "grad_norm": 1.0359293222427368, "learning_rate": 2.1618647458983595e-05, "loss": 0.4582, "step": 14198 }, { "epoch": 18.17472, "grad_norm": 1.0600343942642212, "learning_rate": 2.1616646658663467e-05, "loss": 0.5052, "step": 14199 }, { "epoch": 18.176, "grad_norm": 1.0161858797073364, "learning_rate": 2.161464585834334e-05, "loss": 0.4748, "step": 14200 }, { "epoch": 18.17728, "grad_norm": 1.0281199216842651, "learning_rate": 2.161264505802321e-05, "loss": 0.4505, "step": 14201 }, { "epoch": 18.17856, "grad_norm": 1.0303138494491577, "learning_rate": 2.1610644257703082e-05, "loss": 0.5022, "step": 14202 }, { "epoch": 18.17984, "grad_norm": 1.0298250913619995, "learning_rate": 2.1608643457382954e-05, "loss": 0.4955, "step": 14203 }, { "epoch": 18.18112, "grad_norm": 1.0383046865463257, "learning_rate": 2.1606642657062826e-05, "loss": 0.503, "step": 14204 }, { "epoch": 18.1824, "grad_norm": 1.0609245300292969, "learning_rate": 2.1604641856742698e-05, "loss": 0.4975, "step": 14205 }, { "epoch": 18.18368, "grad_norm": 1.0384618043899536, "learning_rate": 2.160264105642257e-05, "loss": 0.5371, "step": 14206 }, { "epoch": 18.18496, "grad_norm": 1.044183373451233, "learning_rate": 2.160064025610244e-05, "loss": 0.452, "step": 14207 }, { "epoch": 18.18624, "grad_norm": 1.0944687128067017, "learning_rate": 2.1598639455782314e-05, "loss": 0.5241, "step": 14208 }, { "epoch": 18.18752, "grad_norm": 1.0087506771087646, "learning_rate": 2.1596638655462185e-05, "loss": 0.4625, "step": 14209 }, { "epoch": 18.1888, "grad_norm": 1.0070358514785767, "learning_rate": 2.1594637855142057e-05, "loss": 0.4719, "step": 14210 }, { "epoch": 18.19008, "grad_norm": 1.0301635265350342, "learning_rate": 2.159263705482193e-05, "loss": 0.448, "step": 14211 }, { "epoch": 18.19136, "grad_norm": 1.077388048171997, "learning_rate": 2.15906362545018e-05, "loss": 0.5245, "step": 14212 }, { "epoch": 18.19264, "grad_norm": 1.0530251264572144, "learning_rate": 2.1588635454181673e-05, "loss": 0.5032, "step": 14213 }, { "epoch": 18.19392, "grad_norm": 1.058570384979248, "learning_rate": 2.1586634653861545e-05, "loss": 0.5114, "step": 14214 }, { "epoch": 18.1952, "grad_norm": 1.0303319692611694, "learning_rate": 2.1584633853541417e-05, "loss": 0.4486, "step": 14215 }, { "epoch": 18.19648, "grad_norm": 1.0652695894241333, "learning_rate": 2.1582633053221292e-05, "loss": 0.5292, "step": 14216 }, { "epoch": 18.19776, "grad_norm": 1.0437886714935303, "learning_rate": 2.158063225290116e-05, "loss": 0.5014, "step": 14217 }, { "epoch": 18.19904, "grad_norm": 1.0610305070877075, "learning_rate": 2.1578631452581032e-05, "loss": 0.5458, "step": 14218 }, { "epoch": 18.20032, "grad_norm": 0.995061457157135, "learning_rate": 2.1576630652260904e-05, "loss": 0.4919, "step": 14219 }, { "epoch": 18.2016, "grad_norm": 1.0778111219406128, "learning_rate": 2.157462985194078e-05, "loss": 0.5306, "step": 14220 }, { "epoch": 18.20288, "grad_norm": 1.0350569486618042, "learning_rate": 2.1572629051620648e-05, "loss": 0.5296, "step": 14221 }, { "epoch": 18.20416, "grad_norm": 1.0401970148086548, "learning_rate": 2.157062825130052e-05, "loss": 0.5086, "step": 14222 }, { "epoch": 18.20544, "grad_norm": 1.0326118469238281, "learning_rate": 2.1568627450980395e-05, "loss": 0.5022, "step": 14223 }, { "epoch": 18.20672, "grad_norm": 1.0625585317611694, "learning_rate": 2.1566626650660267e-05, "loss": 0.5218, "step": 14224 }, { "epoch": 18.208, "grad_norm": 1.0629734992980957, "learning_rate": 2.1564625850340135e-05, "loss": 0.5225, "step": 14225 }, { "epoch": 18.20928, "grad_norm": 1.0436673164367676, "learning_rate": 2.1562625050020007e-05, "loss": 0.4921, "step": 14226 }, { "epoch": 18.21056, "grad_norm": 1.0012400150299072, "learning_rate": 2.1560624249699882e-05, "loss": 0.4427, "step": 14227 }, { "epoch": 18.21184, "grad_norm": 1.1108235120773315, "learning_rate": 2.1558623449379754e-05, "loss": 0.5484, "step": 14228 }, { "epoch": 18.21312, "grad_norm": 1.080520510673523, "learning_rate": 2.1556622649059623e-05, "loss": 0.5021, "step": 14229 }, { "epoch": 18.2144, "grad_norm": 1.0559675693511963, "learning_rate": 2.1554621848739498e-05, "loss": 0.4703, "step": 14230 }, { "epoch": 18.21568, "grad_norm": 1.0659006834030151, "learning_rate": 2.155262104841937e-05, "loss": 0.5139, "step": 14231 }, { "epoch": 18.21696, "grad_norm": 1.0316319465637207, "learning_rate": 2.155062024809924e-05, "loss": 0.4966, "step": 14232 }, { "epoch": 18.21824, "grad_norm": 1.0869457721710205, "learning_rate": 2.154861944777911e-05, "loss": 0.5206, "step": 14233 }, { "epoch": 18.21952, "grad_norm": 0.9864334464073181, "learning_rate": 2.1546618647458985e-05, "loss": 0.4524, "step": 14234 }, { "epoch": 18.2208, "grad_norm": 1.0641202926635742, "learning_rate": 2.1544617847138857e-05, "loss": 0.5093, "step": 14235 }, { "epoch": 18.22208, "grad_norm": 0.9992152452468872, "learning_rate": 2.154261704681873e-05, "loss": 0.448, "step": 14236 }, { "epoch": 18.22336, "grad_norm": 1.1175810098648071, "learning_rate": 2.15406162464986e-05, "loss": 0.4918, "step": 14237 }, { "epoch": 18.22464, "grad_norm": 1.0763076543807983, "learning_rate": 2.1538615446178473e-05, "loss": 0.4894, "step": 14238 }, { "epoch": 18.22592, "grad_norm": 1.0875471830368042, "learning_rate": 2.1536614645858344e-05, "loss": 0.5458, "step": 14239 }, { "epoch": 18.2272, "grad_norm": 1.0565011501312256, "learning_rate": 2.1534613845538216e-05, "loss": 0.488, "step": 14240 }, { "epoch": 18.22848, "grad_norm": 1.0078827142715454, "learning_rate": 2.1532613045218088e-05, "loss": 0.4727, "step": 14241 }, { "epoch": 18.22976, "grad_norm": 1.0515302419662476, "learning_rate": 2.153061224489796e-05, "loss": 0.4798, "step": 14242 }, { "epoch": 18.23104, "grad_norm": 1.0603551864624023, "learning_rate": 2.1528611444577832e-05, "loss": 0.5047, "step": 14243 }, { "epoch": 18.23232, "grad_norm": 1.032323956489563, "learning_rate": 2.1526610644257704e-05, "loss": 0.4825, "step": 14244 }, { "epoch": 18.2336, "grad_norm": 1.0973374843597412, "learning_rate": 2.1524609843937576e-05, "loss": 0.4856, "step": 14245 }, { "epoch": 18.23488, "grad_norm": 1.1034042835235596, "learning_rate": 2.1522609043617447e-05, "loss": 0.5269, "step": 14246 }, { "epoch": 18.23616, "grad_norm": 1.0152696371078491, "learning_rate": 2.152060824329732e-05, "loss": 0.47, "step": 14247 }, { "epoch": 18.23744, "grad_norm": 1.0043752193450928, "learning_rate": 2.151860744297719e-05, "loss": 0.5078, "step": 14248 }, { "epoch": 18.23872, "grad_norm": 1.1099114418029785, "learning_rate": 2.1516606642657063e-05, "loss": 0.528, "step": 14249 }, { "epoch": 18.24, "grad_norm": 1.0465238094329834, "learning_rate": 2.1514605842336935e-05, "loss": 0.5168, "step": 14250 }, { "epoch": 18.24128, "grad_norm": 1.0789815187454224, "learning_rate": 2.151260504201681e-05, "loss": 0.5096, "step": 14251 }, { "epoch": 18.24256, "grad_norm": 1.1060971021652222, "learning_rate": 2.151060424169668e-05, "loss": 0.5233, "step": 14252 }, { "epoch": 18.24384, "grad_norm": 1.0800647735595703, "learning_rate": 2.150860344137655e-05, "loss": 0.5079, "step": 14253 }, { "epoch": 18.24512, "grad_norm": 0.9907492995262146, "learning_rate": 2.1506602641056422e-05, "loss": 0.4908, "step": 14254 }, { "epoch": 18.2464, "grad_norm": 1.0035676956176758, "learning_rate": 2.1504601840736298e-05, "loss": 0.4876, "step": 14255 }, { "epoch": 18.24768, "grad_norm": 1.0252283811569214, "learning_rate": 2.1502601040416166e-05, "loss": 0.4663, "step": 14256 }, { "epoch": 18.24896, "grad_norm": 1.023335337638855, "learning_rate": 2.1500600240096038e-05, "loss": 0.494, "step": 14257 }, { "epoch": 18.25024, "grad_norm": 1.0962717533111572, "learning_rate": 2.1498599439775913e-05, "loss": 0.5021, "step": 14258 }, { "epoch": 18.25152, "grad_norm": 1.0230556726455688, "learning_rate": 2.1496598639455785e-05, "loss": 0.5067, "step": 14259 }, { "epoch": 18.2528, "grad_norm": 0.9827935099601746, "learning_rate": 2.1494597839135653e-05, "loss": 0.4315, "step": 14260 }, { "epoch": 18.25408, "grad_norm": 1.0110362768173218, "learning_rate": 2.1492597038815525e-05, "loss": 0.4864, "step": 14261 }, { "epoch": 18.25536, "grad_norm": 1.0713372230529785, "learning_rate": 2.14905962384954e-05, "loss": 0.5581, "step": 14262 }, { "epoch": 18.25664, "grad_norm": 1.0925161838531494, "learning_rate": 2.1488595438175272e-05, "loss": 0.4843, "step": 14263 }, { "epoch": 18.25792, "grad_norm": 1.0575942993164062, "learning_rate": 2.148659463785514e-05, "loss": 0.5034, "step": 14264 }, { "epoch": 18.2592, "grad_norm": 1.080422282218933, "learning_rate": 2.1484593837535016e-05, "loss": 0.5236, "step": 14265 }, { "epoch": 18.26048, "grad_norm": 1.0370043516159058, "learning_rate": 2.1482593037214888e-05, "loss": 0.4743, "step": 14266 }, { "epoch": 18.26176, "grad_norm": 1.0181622505187988, "learning_rate": 2.148059223689476e-05, "loss": 0.449, "step": 14267 }, { "epoch": 18.26304, "grad_norm": 1.0741147994995117, "learning_rate": 2.147859143657463e-05, "loss": 0.544, "step": 14268 }, { "epoch": 18.26432, "grad_norm": 1.0276820659637451, "learning_rate": 2.1476590636254504e-05, "loss": 0.4783, "step": 14269 }, { "epoch": 18.2656, "grad_norm": 1.094807744026184, "learning_rate": 2.1474589835934375e-05, "loss": 0.488, "step": 14270 }, { "epoch": 18.26688, "grad_norm": 1.0752545595169067, "learning_rate": 2.1472589035614247e-05, "loss": 0.4985, "step": 14271 }, { "epoch": 18.26816, "grad_norm": 1.1067184209823608, "learning_rate": 2.1470588235294116e-05, "loss": 0.4627, "step": 14272 }, { "epoch": 18.26944, "grad_norm": 1.0492066144943237, "learning_rate": 2.146858743497399e-05, "loss": 0.5197, "step": 14273 }, { "epoch": 18.27072, "grad_norm": 1.0525888204574585, "learning_rate": 2.1466586634653863e-05, "loss": 0.5238, "step": 14274 }, { "epoch": 18.272, "grad_norm": 1.0903679132461548, "learning_rate": 2.1464585834333735e-05, "loss": 0.5231, "step": 14275 }, { "epoch": 18.27328, "grad_norm": 1.1250532865524292, "learning_rate": 2.1462585034013607e-05, "loss": 0.5518, "step": 14276 }, { "epoch": 18.27456, "grad_norm": 1.0479706525802612, "learning_rate": 2.146058423369348e-05, "loss": 0.4735, "step": 14277 }, { "epoch": 18.27584, "grad_norm": 1.100510597229004, "learning_rate": 2.145858343337335e-05, "loss": 0.5421, "step": 14278 }, { "epoch": 18.27712, "grad_norm": 1.0894125699996948, "learning_rate": 2.1456582633053222e-05, "loss": 0.4891, "step": 14279 }, { "epoch": 18.2784, "grad_norm": 1.0481358766555786, "learning_rate": 2.1454581832733094e-05, "loss": 0.4968, "step": 14280 }, { "epoch": 18.27968, "grad_norm": 1.0981334447860718, "learning_rate": 2.1452581032412966e-05, "loss": 0.506, "step": 14281 }, { "epoch": 18.28096, "grad_norm": 1.0324424505233765, "learning_rate": 2.1450580232092838e-05, "loss": 0.5348, "step": 14282 }, { "epoch": 18.28224, "grad_norm": 0.9970609545707703, "learning_rate": 2.144857943177271e-05, "loss": 0.5035, "step": 14283 }, { "epoch": 18.28352, "grad_norm": 1.0144636631011963, "learning_rate": 2.144657863145258e-05, "loss": 0.499, "step": 14284 }, { "epoch": 18.2848, "grad_norm": 1.009353756904602, "learning_rate": 2.1444577831132453e-05, "loss": 0.5351, "step": 14285 }, { "epoch": 18.28608, "grad_norm": 0.9811288118362427, "learning_rate": 2.1442577030812325e-05, "loss": 0.4582, "step": 14286 }, { "epoch": 18.28736, "grad_norm": 1.0266668796539307, "learning_rate": 2.1440576230492197e-05, "loss": 0.4908, "step": 14287 }, { "epoch": 18.28864, "grad_norm": 1.0499911308288574, "learning_rate": 2.143857543017207e-05, "loss": 0.5122, "step": 14288 }, { "epoch": 18.28992, "grad_norm": 1.0684877634048462, "learning_rate": 2.143657462985194e-05, "loss": 0.5143, "step": 14289 }, { "epoch": 18.2912, "grad_norm": 1.0018022060394287, "learning_rate": 2.1434573829531816e-05, "loss": 0.4595, "step": 14290 }, { "epoch": 18.29248, "grad_norm": 1.0234333276748657, "learning_rate": 2.1432573029211684e-05, "loss": 0.4952, "step": 14291 }, { "epoch": 18.29376, "grad_norm": 1.015306830406189, "learning_rate": 2.1430572228891556e-05, "loss": 0.4971, "step": 14292 }, { "epoch": 18.29504, "grad_norm": 1.069213628768921, "learning_rate": 2.1428571428571428e-05, "loss": 0.5289, "step": 14293 }, { "epoch": 18.29632, "grad_norm": 1.0142141580581665, "learning_rate": 2.1426570628251303e-05, "loss": 0.502, "step": 14294 }, { "epoch": 18.2976, "grad_norm": 1.0849502086639404, "learning_rate": 2.1424569827931172e-05, "loss": 0.4753, "step": 14295 }, { "epoch": 18.29888, "grad_norm": 1.010981798171997, "learning_rate": 2.1422569027611044e-05, "loss": 0.4907, "step": 14296 }, { "epoch": 18.300159999999998, "grad_norm": 1.0261670351028442, "learning_rate": 2.142056822729092e-05, "loss": 0.4917, "step": 14297 }, { "epoch": 18.30144, "grad_norm": 1.057773232460022, "learning_rate": 2.141856742697079e-05, "loss": 0.5051, "step": 14298 }, { "epoch": 18.30272, "grad_norm": 1.0207065343856812, "learning_rate": 2.141656662665066e-05, "loss": 0.4841, "step": 14299 }, { "epoch": 18.304, "grad_norm": 1.052815318107605, "learning_rate": 2.141456582633053e-05, "loss": 0.4918, "step": 14300 }, { "epoch": 18.30528, "grad_norm": 1.0421321392059326, "learning_rate": 2.1412565026010406e-05, "loss": 0.5357, "step": 14301 }, { "epoch": 18.30656, "grad_norm": 1.0197725296020508, "learning_rate": 2.1410564225690278e-05, "loss": 0.4925, "step": 14302 }, { "epoch": 18.30784, "grad_norm": 1.0184986591339111, "learning_rate": 2.1408563425370147e-05, "loss": 0.4907, "step": 14303 }, { "epoch": 18.30912, "grad_norm": 1.0371520519256592, "learning_rate": 2.1406562625050022e-05, "loss": 0.469, "step": 14304 }, { "epoch": 18.3104, "grad_norm": 1.0472283363342285, "learning_rate": 2.1404561824729894e-05, "loss": 0.4778, "step": 14305 }, { "epoch": 18.31168, "grad_norm": 1.0578492879867554, "learning_rate": 2.1402561024409766e-05, "loss": 0.5058, "step": 14306 }, { "epoch": 18.31296, "grad_norm": 1.044542670249939, "learning_rate": 2.1400560224089634e-05, "loss": 0.5086, "step": 14307 }, { "epoch": 18.31424, "grad_norm": 1.0305312871932983, "learning_rate": 2.139855942376951e-05, "loss": 0.4927, "step": 14308 }, { "epoch": 18.31552, "grad_norm": 1.1355465650558472, "learning_rate": 2.139655862344938e-05, "loss": 0.5151, "step": 14309 }, { "epoch": 18.3168, "grad_norm": 1.0777387619018555, "learning_rate": 2.1394557823129253e-05, "loss": 0.5034, "step": 14310 }, { "epoch": 18.31808, "grad_norm": 1.1117095947265625, "learning_rate": 2.1392557022809125e-05, "loss": 0.5293, "step": 14311 }, { "epoch": 18.31936, "grad_norm": 1.0630356073379517, "learning_rate": 2.1390556222488997e-05, "loss": 0.5152, "step": 14312 }, { "epoch": 18.32064, "grad_norm": 1.0686274766921997, "learning_rate": 2.138855542216887e-05, "loss": 0.4792, "step": 14313 }, { "epoch": 18.32192, "grad_norm": 1.0616755485534668, "learning_rate": 2.138655462184874e-05, "loss": 0.4932, "step": 14314 }, { "epoch": 18.3232, "grad_norm": 1.079272985458374, "learning_rate": 2.1384553821528612e-05, "loss": 0.4785, "step": 14315 }, { "epoch": 18.32448, "grad_norm": 1.0683273077011108, "learning_rate": 2.1382553021208484e-05, "loss": 0.517, "step": 14316 }, { "epoch": 18.32576, "grad_norm": 1.0856717824935913, "learning_rate": 2.1380552220888356e-05, "loss": 0.489, "step": 14317 }, { "epoch": 18.32704, "grad_norm": 1.0788426399230957, "learning_rate": 2.137855142056823e-05, "loss": 0.4987, "step": 14318 }, { "epoch": 18.32832, "grad_norm": 1.0643631219863892, "learning_rate": 2.13765506202481e-05, "loss": 0.4891, "step": 14319 }, { "epoch": 18.3296, "grad_norm": 1.1336820125579834, "learning_rate": 2.137454981992797e-05, "loss": 0.5683, "step": 14320 }, { "epoch": 18.33088, "grad_norm": 1.0837833881378174, "learning_rate": 2.1372549019607844e-05, "loss": 0.4934, "step": 14321 }, { "epoch": 18.332160000000002, "grad_norm": 1.0664664506912231, "learning_rate": 2.137054821928772e-05, "loss": 0.5126, "step": 14322 }, { "epoch": 18.33344, "grad_norm": 1.0915263891220093, "learning_rate": 2.1368547418967587e-05, "loss": 0.5302, "step": 14323 }, { "epoch": 18.33472, "grad_norm": 1.0284439325332642, "learning_rate": 2.136654661864746e-05, "loss": 0.4844, "step": 14324 }, { "epoch": 18.336, "grad_norm": 1.0181630849838257, "learning_rate": 2.136454581832733e-05, "loss": 0.5208, "step": 14325 }, { "epoch": 18.33728, "grad_norm": 1.0594981908798218, "learning_rate": 2.1362545018007206e-05, "loss": 0.5249, "step": 14326 }, { "epoch": 18.33856, "grad_norm": 1.0511082410812378, "learning_rate": 2.1360544217687075e-05, "loss": 0.5147, "step": 14327 }, { "epoch": 18.33984, "grad_norm": 1.042351484298706, "learning_rate": 2.1358543417366947e-05, "loss": 0.498, "step": 14328 }, { "epoch": 18.34112, "grad_norm": 1.043094515800476, "learning_rate": 2.1356542617046822e-05, "loss": 0.4971, "step": 14329 }, { "epoch": 18.3424, "grad_norm": 1.0453187227249146, "learning_rate": 2.1354541816726694e-05, "loss": 0.5092, "step": 14330 }, { "epoch": 18.34368, "grad_norm": 1.0761491060256958, "learning_rate": 2.1352541016406562e-05, "loss": 0.5334, "step": 14331 }, { "epoch": 18.34496, "grad_norm": 0.980190098285675, "learning_rate": 2.1350540216086434e-05, "loss": 0.4569, "step": 14332 }, { "epoch": 18.34624, "grad_norm": 1.0307832956314087, "learning_rate": 2.134853941576631e-05, "loss": 0.4986, "step": 14333 }, { "epoch": 18.34752, "grad_norm": 1.0275218486785889, "learning_rate": 2.134653861544618e-05, "loss": 0.4659, "step": 14334 }, { "epoch": 18.3488, "grad_norm": 1.1118059158325195, "learning_rate": 2.134453781512605e-05, "loss": 0.5302, "step": 14335 }, { "epoch": 18.35008, "grad_norm": 1.1193236112594604, "learning_rate": 2.1342537014805925e-05, "loss": 0.5402, "step": 14336 }, { "epoch": 18.35136, "grad_norm": 1.0722352266311646, "learning_rate": 2.1340536214485797e-05, "loss": 0.458, "step": 14337 }, { "epoch": 18.35264, "grad_norm": 1.0813183784484863, "learning_rate": 2.133853541416567e-05, "loss": 0.5093, "step": 14338 }, { "epoch": 18.35392, "grad_norm": 1.0807271003723145, "learning_rate": 2.1336534613845537e-05, "loss": 0.5005, "step": 14339 }, { "epoch": 18.3552, "grad_norm": 1.1022156476974487, "learning_rate": 2.1334533813525412e-05, "loss": 0.5259, "step": 14340 }, { "epoch": 18.35648, "grad_norm": 1.0361143350601196, "learning_rate": 2.1332533013205284e-05, "loss": 0.5219, "step": 14341 }, { "epoch": 18.35776, "grad_norm": 1.035565972328186, "learning_rate": 2.1330532212885156e-05, "loss": 0.4797, "step": 14342 }, { "epoch": 18.35904, "grad_norm": 1.088816523551941, "learning_rate": 2.1328531412565028e-05, "loss": 0.4918, "step": 14343 }, { "epoch": 18.36032, "grad_norm": 1.0977002382278442, "learning_rate": 2.13265306122449e-05, "loss": 0.5464, "step": 14344 }, { "epoch": 18.3616, "grad_norm": 1.0365880727767944, "learning_rate": 2.132452981192477e-05, "loss": 0.5105, "step": 14345 }, { "epoch": 18.36288, "grad_norm": 1.0541160106658936, "learning_rate": 2.1322529011604643e-05, "loss": 0.5156, "step": 14346 }, { "epoch": 18.36416, "grad_norm": 1.0975985527038574, "learning_rate": 2.1320528211284515e-05, "loss": 0.5529, "step": 14347 }, { "epoch": 18.36544, "grad_norm": 1.104960322380066, "learning_rate": 2.1318527410964387e-05, "loss": 0.5088, "step": 14348 }, { "epoch": 18.36672, "grad_norm": 1.0740171670913696, "learning_rate": 2.131652661064426e-05, "loss": 0.508, "step": 14349 }, { "epoch": 18.368, "grad_norm": 1.1651798486709595, "learning_rate": 2.131452581032413e-05, "loss": 0.5813, "step": 14350 }, { "epoch": 18.36928, "grad_norm": 1.0417823791503906, "learning_rate": 2.1312525010004003e-05, "loss": 0.5102, "step": 14351 }, { "epoch": 18.37056, "grad_norm": 1.0664454698562622, "learning_rate": 2.1310524209683874e-05, "loss": 0.544, "step": 14352 }, { "epoch": 18.37184, "grad_norm": 1.0032341480255127, "learning_rate": 2.1308523409363746e-05, "loss": 0.4847, "step": 14353 }, { "epoch": 18.37312, "grad_norm": 1.0495922565460205, "learning_rate": 2.1306522609043618e-05, "loss": 0.4937, "step": 14354 }, { "epoch": 18.3744, "grad_norm": 1.0448967218399048, "learning_rate": 2.130452180872349e-05, "loss": 0.5093, "step": 14355 }, { "epoch": 18.37568, "grad_norm": 1.141594648361206, "learning_rate": 2.1302521008403362e-05, "loss": 0.5038, "step": 14356 }, { "epoch": 18.37696, "grad_norm": 1.0529148578643799, "learning_rate": 2.1300520208083237e-05, "loss": 0.5381, "step": 14357 }, { "epoch": 18.37824, "grad_norm": 1.064096450805664, "learning_rate": 2.1298519407763106e-05, "loss": 0.4849, "step": 14358 }, { "epoch": 18.37952, "grad_norm": 1.0783796310424805, "learning_rate": 2.1296518607442977e-05, "loss": 0.483, "step": 14359 }, { "epoch": 18.3808, "grad_norm": 1.0386112928390503, "learning_rate": 2.129451780712285e-05, "loss": 0.5142, "step": 14360 }, { "epoch": 18.38208, "grad_norm": 1.104622721672058, "learning_rate": 2.1292517006802725e-05, "loss": 0.5947, "step": 14361 }, { "epoch": 18.38336, "grad_norm": 1.0468897819519043, "learning_rate": 2.1290516206482593e-05, "loss": 0.4647, "step": 14362 }, { "epoch": 18.38464, "grad_norm": 1.0939141511917114, "learning_rate": 2.1288515406162465e-05, "loss": 0.5201, "step": 14363 }, { "epoch": 18.38592, "grad_norm": 1.0154157876968384, "learning_rate": 2.128651460584234e-05, "loss": 0.4947, "step": 14364 }, { "epoch": 18.3872, "grad_norm": 1.0433145761489868, "learning_rate": 2.1284513805522212e-05, "loss": 0.4675, "step": 14365 }, { "epoch": 18.38848, "grad_norm": 1.0545381307601929, "learning_rate": 2.128251300520208e-05, "loss": 0.5372, "step": 14366 }, { "epoch": 18.38976, "grad_norm": 1.0403685569763184, "learning_rate": 2.1280512204881952e-05, "loss": 0.5044, "step": 14367 }, { "epoch": 18.39104, "grad_norm": 1.0547093152999878, "learning_rate": 2.1278511404561828e-05, "loss": 0.5217, "step": 14368 }, { "epoch": 18.39232, "grad_norm": 1.0452656745910645, "learning_rate": 2.12765106042417e-05, "loss": 0.5011, "step": 14369 }, { "epoch": 18.3936, "grad_norm": 1.124005675315857, "learning_rate": 2.1274509803921568e-05, "loss": 0.5562, "step": 14370 }, { "epoch": 18.39488, "grad_norm": 1.0460633039474487, "learning_rate": 2.1272509003601443e-05, "loss": 0.4712, "step": 14371 }, { "epoch": 18.39616, "grad_norm": 1.0718797445297241, "learning_rate": 2.1270508203281315e-05, "loss": 0.5197, "step": 14372 }, { "epoch": 18.39744, "grad_norm": 1.036122441291809, "learning_rate": 2.1268507402961187e-05, "loss": 0.4993, "step": 14373 }, { "epoch": 18.39872, "grad_norm": 1.087517499923706, "learning_rate": 2.1266506602641055e-05, "loss": 0.5288, "step": 14374 }, { "epoch": 18.4, "grad_norm": 1.0326744318008423, "learning_rate": 2.126450580232093e-05, "loss": 0.4968, "step": 14375 }, { "epoch": 18.40128, "grad_norm": 1.0811477899551392, "learning_rate": 2.1262505002000802e-05, "loss": 0.5151, "step": 14376 }, { "epoch": 18.40256, "grad_norm": 1.0682767629623413, "learning_rate": 2.1260504201680674e-05, "loss": 0.5454, "step": 14377 }, { "epoch": 18.40384, "grad_norm": 1.0771913528442383, "learning_rate": 2.1258503401360543e-05, "loss": 0.5331, "step": 14378 }, { "epoch": 18.40512, "grad_norm": 1.0536373853683472, "learning_rate": 2.1256502601040418e-05, "loss": 0.5086, "step": 14379 }, { "epoch": 18.4064, "grad_norm": 1.0189549922943115, "learning_rate": 2.125450180072029e-05, "loss": 0.497, "step": 14380 }, { "epoch": 18.40768, "grad_norm": 1.0349353551864624, "learning_rate": 2.1252501000400162e-05, "loss": 0.4787, "step": 14381 }, { "epoch": 18.40896, "grad_norm": 1.0494370460510254, "learning_rate": 2.1250500200080034e-05, "loss": 0.5013, "step": 14382 }, { "epoch": 18.41024, "grad_norm": 1.0900458097457886, "learning_rate": 2.1248499399759905e-05, "loss": 0.5222, "step": 14383 }, { "epoch": 18.41152, "grad_norm": 1.0162975788116455, "learning_rate": 2.1246498599439777e-05, "loss": 0.4884, "step": 14384 }, { "epoch": 18.4128, "grad_norm": 1.079209804534912, "learning_rate": 2.124449779911965e-05, "loss": 0.5416, "step": 14385 }, { "epoch": 18.41408, "grad_norm": 1.0865408182144165, "learning_rate": 2.124249699879952e-05, "loss": 0.5044, "step": 14386 }, { "epoch": 18.41536, "grad_norm": 1.0583715438842773, "learning_rate": 2.1240496198479393e-05, "loss": 0.5338, "step": 14387 }, { "epoch": 18.41664, "grad_norm": 1.0370880365371704, "learning_rate": 2.1238495398159265e-05, "loss": 0.4833, "step": 14388 }, { "epoch": 18.41792, "grad_norm": 1.0636683702468872, "learning_rate": 2.1236494597839137e-05, "loss": 0.4915, "step": 14389 }, { "epoch": 18.4192, "grad_norm": 1.079131841659546, "learning_rate": 2.123449379751901e-05, "loss": 0.4931, "step": 14390 }, { "epoch": 18.42048, "grad_norm": 1.0714484453201294, "learning_rate": 2.123249299719888e-05, "loss": 0.4701, "step": 14391 }, { "epoch": 18.42176, "grad_norm": 1.1398820877075195, "learning_rate": 2.1230492196878752e-05, "loss": 0.5147, "step": 14392 }, { "epoch": 18.42304, "grad_norm": 1.0825884342193604, "learning_rate": 2.1228491396558624e-05, "loss": 0.4938, "step": 14393 }, { "epoch": 18.42432, "grad_norm": 1.0806691646575928, "learning_rate": 2.1226490596238496e-05, "loss": 0.5118, "step": 14394 }, { "epoch": 18.4256, "grad_norm": 1.0657216310501099, "learning_rate": 2.1224489795918368e-05, "loss": 0.5168, "step": 14395 }, { "epoch": 18.42688, "grad_norm": 1.0217489004135132, "learning_rate": 2.1222488995598243e-05, "loss": 0.4725, "step": 14396 }, { "epoch": 18.42816, "grad_norm": 1.0732911825180054, "learning_rate": 2.122048819527811e-05, "loss": 0.5417, "step": 14397 }, { "epoch": 18.42944, "grad_norm": 1.091342568397522, "learning_rate": 2.1218487394957983e-05, "loss": 0.5097, "step": 14398 }, { "epoch": 18.43072, "grad_norm": 1.0096945762634277, "learning_rate": 2.1216486594637855e-05, "loss": 0.4584, "step": 14399 }, { "epoch": 18.432, "grad_norm": 0.9952658414840698, "learning_rate": 2.121448579431773e-05, "loss": 0.4642, "step": 14400 }, { "epoch": 18.43328, "grad_norm": 1.108870506286621, "learning_rate": 2.12124849939976e-05, "loss": 0.5275, "step": 14401 }, { "epoch": 18.43456, "grad_norm": 1.1087889671325684, "learning_rate": 2.121048419367747e-05, "loss": 0.5089, "step": 14402 }, { "epoch": 18.43584, "grad_norm": 1.057420253753662, "learning_rate": 2.1208483393357346e-05, "loss": 0.513, "step": 14403 }, { "epoch": 18.43712, "grad_norm": 1.1254922151565552, "learning_rate": 2.1206482593037218e-05, "loss": 0.5404, "step": 14404 }, { "epoch": 18.4384, "grad_norm": 1.0429891347885132, "learning_rate": 2.1204481792717086e-05, "loss": 0.4935, "step": 14405 }, { "epoch": 18.43968, "grad_norm": 1.089011549949646, "learning_rate": 2.1202480992396958e-05, "loss": 0.5318, "step": 14406 }, { "epoch": 18.44096, "grad_norm": 1.080314040184021, "learning_rate": 2.1200480192076833e-05, "loss": 0.5239, "step": 14407 }, { "epoch": 18.44224, "grad_norm": 1.047875165939331, "learning_rate": 2.1198479391756705e-05, "loss": 0.4918, "step": 14408 }, { "epoch": 18.44352, "grad_norm": 1.0228180885314941, "learning_rate": 2.1196478591436574e-05, "loss": 0.4958, "step": 14409 }, { "epoch": 18.4448, "grad_norm": 1.0188748836517334, "learning_rate": 2.119447779111645e-05, "loss": 0.4798, "step": 14410 }, { "epoch": 18.44608, "grad_norm": 1.0608867406845093, "learning_rate": 2.119247699079632e-05, "loss": 0.5117, "step": 14411 }, { "epoch": 18.44736, "grad_norm": 1.0850874185562134, "learning_rate": 2.1190476190476193e-05, "loss": 0.5156, "step": 14412 }, { "epoch": 18.44864, "grad_norm": 1.093931794166565, "learning_rate": 2.118847539015606e-05, "loss": 0.5186, "step": 14413 }, { "epoch": 18.44992, "grad_norm": 1.0857752561569214, "learning_rate": 2.1186474589835936e-05, "loss": 0.5215, "step": 14414 }, { "epoch": 18.4512, "grad_norm": 1.0228488445281982, "learning_rate": 2.1184473789515808e-05, "loss": 0.4972, "step": 14415 }, { "epoch": 18.45248, "grad_norm": 0.979611337184906, "learning_rate": 2.118247298919568e-05, "loss": 0.4884, "step": 14416 }, { "epoch": 18.45376, "grad_norm": 1.0769535303115845, "learning_rate": 2.1180472188875552e-05, "loss": 0.53, "step": 14417 }, { "epoch": 18.45504, "grad_norm": 1.103134274482727, "learning_rate": 2.1178471388555424e-05, "loss": 0.5321, "step": 14418 }, { "epoch": 18.45632, "grad_norm": 1.0389997959136963, "learning_rate": 2.1176470588235296e-05, "loss": 0.4775, "step": 14419 }, { "epoch": 18.4576, "grad_norm": 1.014994740486145, "learning_rate": 2.1174469787915168e-05, "loss": 0.4521, "step": 14420 }, { "epoch": 18.45888, "grad_norm": 1.0282139778137207, "learning_rate": 2.117246898759504e-05, "loss": 0.5012, "step": 14421 }, { "epoch": 18.46016, "grad_norm": 1.0462567806243896, "learning_rate": 2.117046818727491e-05, "loss": 0.5489, "step": 14422 }, { "epoch": 18.46144, "grad_norm": 1.0593222379684448, "learning_rate": 2.1168467386954783e-05, "loss": 0.528, "step": 14423 }, { "epoch": 18.46272, "grad_norm": 1.0701686143875122, "learning_rate": 2.1166466586634655e-05, "loss": 0.4987, "step": 14424 }, { "epoch": 18.464, "grad_norm": 1.0602664947509766, "learning_rate": 2.1164465786314527e-05, "loss": 0.503, "step": 14425 }, { "epoch": 18.46528, "grad_norm": 1.018227458000183, "learning_rate": 2.11624649859944e-05, "loss": 0.4681, "step": 14426 }, { "epoch": 18.46656, "grad_norm": 1.078520655632019, "learning_rate": 2.116046418567427e-05, "loss": 0.5599, "step": 14427 }, { "epoch": 18.46784, "grad_norm": 1.0530571937561035, "learning_rate": 2.1158463385354142e-05, "loss": 0.4804, "step": 14428 }, { "epoch": 18.46912, "grad_norm": 1.029566764831543, "learning_rate": 2.1156462585034014e-05, "loss": 0.4644, "step": 14429 }, { "epoch": 18.4704, "grad_norm": 1.0319150686264038, "learning_rate": 2.1154461784713886e-05, "loss": 0.4924, "step": 14430 }, { "epoch": 18.47168, "grad_norm": 1.114824891090393, "learning_rate": 2.115246098439376e-05, "loss": 0.57, "step": 14431 }, { "epoch": 18.47296, "grad_norm": 1.093182921409607, "learning_rate": 2.115046018407363e-05, "loss": 0.552, "step": 14432 }, { "epoch": 18.47424, "grad_norm": 1.050522804260254, "learning_rate": 2.11484593837535e-05, "loss": 0.4954, "step": 14433 }, { "epoch": 18.47552, "grad_norm": 1.0565167665481567, "learning_rate": 2.1146458583433374e-05, "loss": 0.5101, "step": 14434 }, { "epoch": 18.4768, "grad_norm": 1.065156102180481, "learning_rate": 2.114445778311325e-05, "loss": 0.5184, "step": 14435 }, { "epoch": 18.47808, "grad_norm": 0.9988115429878235, "learning_rate": 2.1142456982793117e-05, "loss": 0.4639, "step": 14436 }, { "epoch": 18.47936, "grad_norm": 1.0403028726577759, "learning_rate": 2.114045618247299e-05, "loss": 0.4858, "step": 14437 }, { "epoch": 18.48064, "grad_norm": 1.0715668201446533, "learning_rate": 2.113845538215286e-05, "loss": 0.5176, "step": 14438 }, { "epoch": 18.48192, "grad_norm": 0.9925726652145386, "learning_rate": 2.1136454581832736e-05, "loss": 0.4261, "step": 14439 }, { "epoch": 18.4832, "grad_norm": 1.02934730052948, "learning_rate": 2.1134453781512605e-05, "loss": 0.491, "step": 14440 }, { "epoch": 18.48448, "grad_norm": 1.0200088024139404, "learning_rate": 2.1132452981192476e-05, "loss": 0.4953, "step": 14441 }, { "epoch": 18.48576, "grad_norm": 1.080019235610962, "learning_rate": 2.1130452180872352e-05, "loss": 0.5132, "step": 14442 }, { "epoch": 18.48704, "grad_norm": 1.0620362758636475, "learning_rate": 2.1128451380552224e-05, "loss": 0.5103, "step": 14443 }, { "epoch": 18.48832, "grad_norm": 1.0634804964065552, "learning_rate": 2.1126450580232092e-05, "loss": 0.5085, "step": 14444 }, { "epoch": 18.4896, "grad_norm": 1.110755443572998, "learning_rate": 2.1124449779911964e-05, "loss": 0.532, "step": 14445 }, { "epoch": 18.49088, "grad_norm": 1.0549490451812744, "learning_rate": 2.112244897959184e-05, "loss": 0.4641, "step": 14446 }, { "epoch": 18.49216, "grad_norm": 1.0575416088104248, "learning_rate": 2.112044817927171e-05, "loss": 0.4892, "step": 14447 }, { "epoch": 18.49344, "grad_norm": 1.1010053157806396, "learning_rate": 2.111844737895158e-05, "loss": 0.4996, "step": 14448 }, { "epoch": 18.49472, "grad_norm": 1.0477495193481445, "learning_rate": 2.1116446578631455e-05, "loss": 0.4811, "step": 14449 }, { "epoch": 18.496, "grad_norm": 1.125105619430542, "learning_rate": 2.1114445778311327e-05, "loss": 0.5519, "step": 14450 }, { "epoch": 18.49728, "grad_norm": 1.1108721494674683, "learning_rate": 2.11124449779912e-05, "loss": 0.5117, "step": 14451 }, { "epoch": 18.49856, "grad_norm": 1.0684090852737427, "learning_rate": 2.1110444177671067e-05, "loss": 0.4912, "step": 14452 }, { "epoch": 18.49984, "grad_norm": 1.1287049055099487, "learning_rate": 2.1108443377350942e-05, "loss": 0.5469, "step": 14453 }, { "epoch": 18.50112, "grad_norm": 1.048940658569336, "learning_rate": 2.1106442577030814e-05, "loss": 0.4936, "step": 14454 }, { "epoch": 18.5024, "grad_norm": 1.0817766189575195, "learning_rate": 2.1104441776710686e-05, "loss": 0.5038, "step": 14455 }, { "epoch": 18.50368, "grad_norm": 1.0768336057662964, "learning_rate": 2.1102440976390558e-05, "loss": 0.5887, "step": 14456 }, { "epoch": 18.50496, "grad_norm": 1.0072085857391357, "learning_rate": 2.110044017607043e-05, "loss": 0.478, "step": 14457 }, { "epoch": 18.50624, "grad_norm": 0.9800832867622375, "learning_rate": 2.10984393757503e-05, "loss": 0.4799, "step": 14458 }, { "epoch": 18.50752, "grad_norm": 1.042318344116211, "learning_rate": 2.1096438575430173e-05, "loss": 0.5065, "step": 14459 }, { "epoch": 18.5088, "grad_norm": 1.1013107299804688, "learning_rate": 2.1094437775110045e-05, "loss": 0.6008, "step": 14460 }, { "epoch": 18.51008, "grad_norm": 0.9804616570472717, "learning_rate": 2.1092436974789917e-05, "loss": 0.4585, "step": 14461 }, { "epoch": 18.51136, "grad_norm": 1.0614451169967651, "learning_rate": 2.109043617446979e-05, "loss": 0.4966, "step": 14462 }, { "epoch": 18.51264, "grad_norm": 0.9891878962516785, "learning_rate": 2.108843537414966e-05, "loss": 0.516, "step": 14463 }, { "epoch": 18.51392, "grad_norm": 1.0645172595977783, "learning_rate": 2.1086434573829533e-05, "loss": 0.5074, "step": 14464 }, { "epoch": 18.5152, "grad_norm": 1.0641040802001953, "learning_rate": 2.1084433773509404e-05, "loss": 0.5201, "step": 14465 }, { "epoch": 18.51648, "grad_norm": 0.9872971177101135, "learning_rate": 2.1082432973189276e-05, "loss": 0.4698, "step": 14466 }, { "epoch": 18.51776, "grad_norm": 1.167199730873108, "learning_rate": 2.1080432172869148e-05, "loss": 0.5102, "step": 14467 }, { "epoch": 18.51904, "grad_norm": 1.1635416746139526, "learning_rate": 2.107843137254902e-05, "loss": 0.5385, "step": 14468 }, { "epoch": 18.52032, "grad_norm": 1.1156731843948364, "learning_rate": 2.1076430572228892e-05, "loss": 0.5442, "step": 14469 }, { "epoch": 18.5216, "grad_norm": 1.0268969535827637, "learning_rate": 2.1074429771908767e-05, "loss": 0.4385, "step": 14470 }, { "epoch": 18.52288, "grad_norm": 1.0554258823394775, "learning_rate": 2.1072428971588636e-05, "loss": 0.5172, "step": 14471 }, { "epoch": 18.52416, "grad_norm": 1.07488214969635, "learning_rate": 2.1070428171268507e-05, "loss": 0.5373, "step": 14472 }, { "epoch": 18.52544, "grad_norm": 0.9701617360115051, "learning_rate": 2.106842737094838e-05, "loss": 0.4699, "step": 14473 }, { "epoch": 18.52672, "grad_norm": 1.0536491870880127, "learning_rate": 2.1066426570628255e-05, "loss": 0.5075, "step": 14474 }, { "epoch": 18.528, "grad_norm": 1.1077998876571655, "learning_rate": 2.1064425770308123e-05, "loss": 0.559, "step": 14475 }, { "epoch": 18.52928, "grad_norm": 1.043217658996582, "learning_rate": 2.1062424969987995e-05, "loss": 0.4921, "step": 14476 }, { "epoch": 18.53056, "grad_norm": 1.0634691715240479, "learning_rate": 2.106042416966787e-05, "loss": 0.4999, "step": 14477 }, { "epoch": 18.53184, "grad_norm": 1.024617075920105, "learning_rate": 2.1058423369347742e-05, "loss": 0.5021, "step": 14478 }, { "epoch": 18.53312, "grad_norm": 1.026159405708313, "learning_rate": 2.105642256902761e-05, "loss": 0.5201, "step": 14479 }, { "epoch": 18.5344, "grad_norm": 1.085721731185913, "learning_rate": 2.1054421768707482e-05, "loss": 0.5542, "step": 14480 }, { "epoch": 18.53568, "grad_norm": 1.0542292594909668, "learning_rate": 2.1052420968387358e-05, "loss": 0.5239, "step": 14481 }, { "epoch": 18.53696, "grad_norm": 1.0141193866729736, "learning_rate": 2.105042016806723e-05, "loss": 0.5116, "step": 14482 }, { "epoch": 18.538240000000002, "grad_norm": 1.106048822402954, "learning_rate": 2.1048419367747098e-05, "loss": 0.5843, "step": 14483 }, { "epoch": 18.53952, "grad_norm": 1.0620492696762085, "learning_rate": 2.1046418567426973e-05, "loss": 0.5049, "step": 14484 }, { "epoch": 18.5408, "grad_norm": 1.0204299688339233, "learning_rate": 2.1044417767106845e-05, "loss": 0.507, "step": 14485 }, { "epoch": 18.54208, "grad_norm": 1.0955986976623535, "learning_rate": 2.1042416966786717e-05, "loss": 0.5323, "step": 14486 }, { "epoch": 18.54336, "grad_norm": 1.0099011659622192, "learning_rate": 2.1040416166466585e-05, "loss": 0.5007, "step": 14487 }, { "epoch": 18.54464, "grad_norm": 0.9762060642242432, "learning_rate": 2.103841536614646e-05, "loss": 0.4201, "step": 14488 }, { "epoch": 18.54592, "grad_norm": 1.060850739479065, "learning_rate": 2.1036414565826332e-05, "loss": 0.4815, "step": 14489 }, { "epoch": 18.5472, "grad_norm": 1.077972173690796, "learning_rate": 2.1034413765506204e-05, "loss": 0.5148, "step": 14490 }, { "epoch": 18.54848, "grad_norm": 1.1198941469192505, "learning_rate": 2.1032412965186073e-05, "loss": 0.5361, "step": 14491 }, { "epoch": 18.54976, "grad_norm": 1.1299846172332764, "learning_rate": 2.1030412164865948e-05, "loss": 0.5398, "step": 14492 }, { "epoch": 18.55104, "grad_norm": 0.9961510896682739, "learning_rate": 2.102841136454582e-05, "loss": 0.425, "step": 14493 }, { "epoch": 18.55232, "grad_norm": 1.0336405038833618, "learning_rate": 2.102641056422569e-05, "loss": 0.5029, "step": 14494 }, { "epoch": 18.5536, "grad_norm": 1.073876142501831, "learning_rate": 2.1024409763905564e-05, "loss": 0.4941, "step": 14495 }, { "epoch": 18.55488, "grad_norm": 1.0787442922592163, "learning_rate": 2.1022408963585435e-05, "loss": 0.5394, "step": 14496 }, { "epoch": 18.55616, "grad_norm": 1.0297425985336304, "learning_rate": 2.1020408163265307e-05, "loss": 0.4841, "step": 14497 }, { "epoch": 18.55744, "grad_norm": 1.0471930503845215, "learning_rate": 2.101840736294518e-05, "loss": 0.4956, "step": 14498 }, { "epoch": 18.55872, "grad_norm": 1.0681778192520142, "learning_rate": 2.101640656262505e-05, "loss": 0.5614, "step": 14499 }, { "epoch": 18.56, "grad_norm": 1.0257924795150757, "learning_rate": 2.1014405762304923e-05, "loss": 0.4542, "step": 14500 }, { "epoch": 18.56128, "grad_norm": 1.028685450553894, "learning_rate": 2.1012404961984795e-05, "loss": 0.4824, "step": 14501 }, { "epoch": 18.56256, "grad_norm": 1.0713627338409424, "learning_rate": 2.1010404161664667e-05, "loss": 0.543, "step": 14502 }, { "epoch": 18.56384, "grad_norm": 1.0989772081375122, "learning_rate": 2.100840336134454e-05, "loss": 0.5345, "step": 14503 }, { "epoch": 18.56512, "grad_norm": 1.0844435691833496, "learning_rate": 2.100640256102441e-05, "loss": 0.514, "step": 14504 }, { "epoch": 18.5664, "grad_norm": 1.0661566257476807, "learning_rate": 2.1004401760704282e-05, "loss": 0.5177, "step": 14505 }, { "epoch": 18.56768, "grad_norm": 1.0676720142364502, "learning_rate": 2.1002400960384154e-05, "loss": 0.5007, "step": 14506 }, { "epoch": 18.56896, "grad_norm": 1.0638859272003174, "learning_rate": 2.1000400160064026e-05, "loss": 0.5185, "step": 14507 }, { "epoch": 18.57024, "grad_norm": 1.020068883895874, "learning_rate": 2.0998399359743898e-05, "loss": 0.4734, "step": 14508 }, { "epoch": 18.57152, "grad_norm": 1.103440523147583, "learning_rate": 2.0996398559423773e-05, "loss": 0.5404, "step": 14509 }, { "epoch": 18.5728, "grad_norm": 1.064695119857788, "learning_rate": 2.099439775910364e-05, "loss": 0.5363, "step": 14510 }, { "epoch": 18.57408, "grad_norm": 1.0438659191131592, "learning_rate": 2.0992396958783513e-05, "loss": 0.534, "step": 14511 }, { "epoch": 18.57536, "grad_norm": 1.0252081155776978, "learning_rate": 2.0990396158463385e-05, "loss": 0.4815, "step": 14512 }, { "epoch": 18.57664, "grad_norm": 1.0469212532043457, "learning_rate": 2.098839535814326e-05, "loss": 0.5046, "step": 14513 }, { "epoch": 18.57792, "grad_norm": 1.011010766029358, "learning_rate": 2.098639455782313e-05, "loss": 0.4726, "step": 14514 }, { "epoch": 18.5792, "grad_norm": 1.0593057870864868, "learning_rate": 2.0984393757503e-05, "loss": 0.5198, "step": 14515 }, { "epoch": 18.58048, "grad_norm": 1.0215234756469727, "learning_rate": 2.0982392957182876e-05, "loss": 0.5168, "step": 14516 }, { "epoch": 18.58176, "grad_norm": 1.0414518117904663, "learning_rate": 2.0980392156862748e-05, "loss": 0.5049, "step": 14517 }, { "epoch": 18.58304, "grad_norm": 1.1356487274169922, "learning_rate": 2.0978391356542616e-05, "loss": 0.5467, "step": 14518 }, { "epoch": 18.584319999999998, "grad_norm": 1.0748487710952759, "learning_rate": 2.0976390556222488e-05, "loss": 0.5602, "step": 14519 }, { "epoch": 18.5856, "grad_norm": 1.0394898653030396, "learning_rate": 2.0974389755902363e-05, "loss": 0.5286, "step": 14520 }, { "epoch": 18.58688, "grad_norm": 1.0443836450576782, "learning_rate": 2.0972388955582235e-05, "loss": 0.5007, "step": 14521 }, { "epoch": 18.58816, "grad_norm": 1.083796739578247, "learning_rate": 2.0970388155262104e-05, "loss": 0.5159, "step": 14522 }, { "epoch": 18.58944, "grad_norm": 1.0455292463302612, "learning_rate": 2.096838735494198e-05, "loss": 0.4783, "step": 14523 }, { "epoch": 18.59072, "grad_norm": 1.0481806993484497, "learning_rate": 2.096638655462185e-05, "loss": 0.5356, "step": 14524 }, { "epoch": 18.592, "grad_norm": 0.9991112947463989, "learning_rate": 2.0964385754301723e-05, "loss": 0.479, "step": 14525 }, { "epoch": 18.59328, "grad_norm": 1.079669713973999, "learning_rate": 2.096238495398159e-05, "loss": 0.5138, "step": 14526 }, { "epoch": 18.59456, "grad_norm": 1.0523369312286377, "learning_rate": 2.0960384153661466e-05, "loss": 0.4889, "step": 14527 }, { "epoch": 18.59584, "grad_norm": 1.1188772916793823, "learning_rate": 2.0958383353341338e-05, "loss": 0.5201, "step": 14528 }, { "epoch": 18.59712, "grad_norm": 1.111080527305603, "learning_rate": 2.095638255302121e-05, "loss": 0.4918, "step": 14529 }, { "epoch": 18.5984, "grad_norm": 1.019318699836731, "learning_rate": 2.0954381752701082e-05, "loss": 0.4547, "step": 14530 }, { "epoch": 18.59968, "grad_norm": 1.0236701965332031, "learning_rate": 2.0952380952380954e-05, "loss": 0.4956, "step": 14531 }, { "epoch": 18.60096, "grad_norm": 1.06638503074646, "learning_rate": 2.0950380152060826e-05, "loss": 0.4941, "step": 14532 }, { "epoch": 18.60224, "grad_norm": 1.0441734790802002, "learning_rate": 2.0948379351740697e-05, "loss": 0.4714, "step": 14533 }, { "epoch": 18.60352, "grad_norm": 1.0851494073867798, "learning_rate": 2.094637855142057e-05, "loss": 0.5392, "step": 14534 }, { "epoch": 18.6048, "grad_norm": 1.0086928606033325, "learning_rate": 2.094437775110044e-05, "loss": 0.4795, "step": 14535 }, { "epoch": 18.60608, "grad_norm": 1.0636036396026611, "learning_rate": 2.0942376950780313e-05, "loss": 0.5342, "step": 14536 }, { "epoch": 18.60736, "grad_norm": 1.0684483051300049, "learning_rate": 2.0940376150460185e-05, "loss": 0.5094, "step": 14537 }, { "epoch": 18.60864, "grad_norm": 1.0535985231399536, "learning_rate": 2.0938375350140057e-05, "loss": 0.4892, "step": 14538 }, { "epoch": 18.60992, "grad_norm": 1.0609737634658813, "learning_rate": 2.093637454981993e-05, "loss": 0.5016, "step": 14539 }, { "epoch": 18.6112, "grad_norm": 1.0694406032562256, "learning_rate": 2.09343737494998e-05, "loss": 0.5134, "step": 14540 }, { "epoch": 18.61248, "grad_norm": 1.0116039514541626, "learning_rate": 2.0932372949179672e-05, "loss": 0.4585, "step": 14541 }, { "epoch": 18.61376, "grad_norm": 1.125255823135376, "learning_rate": 2.0930372148859544e-05, "loss": 0.5355, "step": 14542 }, { "epoch": 18.61504, "grad_norm": 1.111659288406372, "learning_rate": 2.0928371348539416e-05, "loss": 0.5312, "step": 14543 }, { "epoch": 18.61632, "grad_norm": 1.1183501482009888, "learning_rate": 2.092637054821929e-05, "loss": 0.5408, "step": 14544 }, { "epoch": 18.6176, "grad_norm": 1.0673730373382568, "learning_rate": 2.092436974789916e-05, "loss": 0.5031, "step": 14545 }, { "epoch": 18.61888, "grad_norm": 1.039713978767395, "learning_rate": 2.092236894757903e-05, "loss": 0.5115, "step": 14546 }, { "epoch": 18.62016, "grad_norm": 1.0131070613861084, "learning_rate": 2.0920368147258903e-05, "loss": 0.4742, "step": 14547 }, { "epoch": 18.62144, "grad_norm": 1.0833195447921753, "learning_rate": 2.091836734693878e-05, "loss": 0.5393, "step": 14548 }, { "epoch": 18.62272, "grad_norm": 1.193190097808838, "learning_rate": 2.0916366546618647e-05, "loss": 0.5043, "step": 14549 }, { "epoch": 18.624, "grad_norm": 1.0585458278656006, "learning_rate": 2.091436574629852e-05, "loss": 0.4841, "step": 14550 }, { "epoch": 18.62528, "grad_norm": 1.0493146181106567, "learning_rate": 2.091236494597839e-05, "loss": 0.5084, "step": 14551 }, { "epoch": 18.62656, "grad_norm": 1.0405904054641724, "learning_rate": 2.0910364145658266e-05, "loss": 0.5273, "step": 14552 }, { "epoch": 18.62784, "grad_norm": 1.0221456289291382, "learning_rate": 2.0908363345338135e-05, "loss": 0.4846, "step": 14553 }, { "epoch": 18.62912, "grad_norm": 1.0705711841583252, "learning_rate": 2.0906362545018006e-05, "loss": 0.5271, "step": 14554 }, { "epoch": 18.6304, "grad_norm": 1.0186147689819336, "learning_rate": 2.0904361744697882e-05, "loss": 0.4886, "step": 14555 }, { "epoch": 18.63168, "grad_norm": 1.0867236852645874, "learning_rate": 2.0902360944377754e-05, "loss": 0.517, "step": 14556 }, { "epoch": 18.63296, "grad_norm": 1.061066746711731, "learning_rate": 2.0900360144057622e-05, "loss": 0.5189, "step": 14557 }, { "epoch": 18.63424, "grad_norm": 1.0720415115356445, "learning_rate": 2.0898359343737494e-05, "loss": 0.5382, "step": 14558 }, { "epoch": 18.63552, "grad_norm": 1.061384916305542, "learning_rate": 2.089635854341737e-05, "loss": 0.5331, "step": 14559 }, { "epoch": 18.6368, "grad_norm": 1.0454514026641846, "learning_rate": 2.089435774309724e-05, "loss": 0.4793, "step": 14560 }, { "epoch": 18.63808, "grad_norm": 1.0604616403579712, "learning_rate": 2.089235694277711e-05, "loss": 0.4637, "step": 14561 }, { "epoch": 18.63936, "grad_norm": 1.0945936441421509, "learning_rate": 2.0890356142456985e-05, "loss": 0.5413, "step": 14562 }, { "epoch": 18.64064, "grad_norm": 1.0144293308258057, "learning_rate": 2.0888355342136857e-05, "loss": 0.5071, "step": 14563 }, { "epoch": 18.64192, "grad_norm": 1.0849329233169556, "learning_rate": 2.088635454181673e-05, "loss": 0.5343, "step": 14564 }, { "epoch": 18.6432, "grad_norm": 0.9900054931640625, "learning_rate": 2.0884353741496597e-05, "loss": 0.4709, "step": 14565 }, { "epoch": 18.64448, "grad_norm": 1.0568320751190186, "learning_rate": 2.0882352941176472e-05, "loss": 0.4829, "step": 14566 }, { "epoch": 18.64576, "grad_norm": 1.1182734966278076, "learning_rate": 2.0880352140856344e-05, "loss": 0.5742, "step": 14567 }, { "epoch": 18.64704, "grad_norm": 1.1429896354675293, "learning_rate": 2.0878351340536216e-05, "loss": 0.5731, "step": 14568 }, { "epoch": 18.64832, "grad_norm": 1.0689339637756348, "learning_rate": 2.0876350540216088e-05, "loss": 0.496, "step": 14569 }, { "epoch": 18.6496, "grad_norm": 1.0403820276260376, "learning_rate": 2.087434973989596e-05, "loss": 0.4286, "step": 14570 }, { "epoch": 18.65088, "grad_norm": 1.0727622509002686, "learning_rate": 2.087234893957583e-05, "loss": 0.5124, "step": 14571 }, { "epoch": 18.65216, "grad_norm": 1.0290236473083496, "learning_rate": 2.0870348139255703e-05, "loss": 0.4925, "step": 14572 }, { "epoch": 18.65344, "grad_norm": 1.0462875366210938, "learning_rate": 2.0868347338935575e-05, "loss": 0.4666, "step": 14573 }, { "epoch": 18.65472, "grad_norm": 1.0664476156234741, "learning_rate": 2.0866346538615447e-05, "loss": 0.5112, "step": 14574 }, { "epoch": 18.656, "grad_norm": 1.0481314659118652, "learning_rate": 2.086434573829532e-05, "loss": 0.4763, "step": 14575 }, { "epoch": 18.65728, "grad_norm": 1.0135314464569092, "learning_rate": 2.086234493797519e-05, "loss": 0.4885, "step": 14576 }, { "epoch": 18.65856, "grad_norm": 0.9977083206176758, "learning_rate": 2.0860344137655063e-05, "loss": 0.4949, "step": 14577 }, { "epoch": 18.65984, "grad_norm": 1.043670892715454, "learning_rate": 2.0858343337334934e-05, "loss": 0.5031, "step": 14578 }, { "epoch": 18.66112, "grad_norm": 1.0433337688446045, "learning_rate": 2.0856342537014806e-05, "loss": 0.5061, "step": 14579 }, { "epoch": 18.6624, "grad_norm": 1.006962537765503, "learning_rate": 2.0854341736694678e-05, "loss": 0.5039, "step": 14580 }, { "epoch": 18.66368, "grad_norm": 1.046007513999939, "learning_rate": 2.085234093637455e-05, "loss": 0.5434, "step": 14581 }, { "epoch": 18.66496, "grad_norm": 1.0263768434524536, "learning_rate": 2.0850340136054422e-05, "loss": 0.5117, "step": 14582 }, { "epoch": 18.66624, "grad_norm": 1.0261712074279785, "learning_rate": 2.0848339335734297e-05, "loss": 0.4705, "step": 14583 }, { "epoch": 18.66752, "grad_norm": 1.0606975555419922, "learning_rate": 2.0846338535414166e-05, "loss": 0.4931, "step": 14584 }, { "epoch": 18.6688, "grad_norm": 1.0955727100372314, "learning_rate": 2.0844337735094037e-05, "loss": 0.5149, "step": 14585 }, { "epoch": 18.67008, "grad_norm": 0.9943707585334778, "learning_rate": 2.084233693477391e-05, "loss": 0.4668, "step": 14586 }, { "epoch": 18.67136, "grad_norm": 1.082153081893921, "learning_rate": 2.0840336134453785e-05, "loss": 0.5313, "step": 14587 }, { "epoch": 18.67264, "grad_norm": 1.0203741788864136, "learning_rate": 2.0838335334133653e-05, "loss": 0.4759, "step": 14588 }, { "epoch": 18.67392, "grad_norm": 1.0411763191223145, "learning_rate": 2.0836334533813525e-05, "loss": 0.5068, "step": 14589 }, { "epoch": 18.6752, "grad_norm": 1.0571635961532593, "learning_rate": 2.08343337334934e-05, "loss": 0.5386, "step": 14590 }, { "epoch": 18.67648, "grad_norm": 1.0805469751358032, "learning_rate": 2.0832332933173272e-05, "loss": 0.5289, "step": 14591 }, { "epoch": 18.67776, "grad_norm": 1.01336669921875, "learning_rate": 2.083033213285314e-05, "loss": 0.4634, "step": 14592 }, { "epoch": 18.67904, "grad_norm": 1.0346022844314575, "learning_rate": 2.0828331332533012e-05, "loss": 0.504, "step": 14593 }, { "epoch": 18.680320000000002, "grad_norm": 1.0586795806884766, "learning_rate": 2.0826330532212888e-05, "loss": 0.4977, "step": 14594 }, { "epoch": 18.6816, "grad_norm": 1.0512717962265015, "learning_rate": 2.082432973189276e-05, "loss": 0.4652, "step": 14595 }, { "epoch": 18.68288, "grad_norm": 1.0366772413253784, "learning_rate": 2.0822328931572628e-05, "loss": 0.4688, "step": 14596 }, { "epoch": 18.68416, "grad_norm": 1.096342921257019, "learning_rate": 2.0820328131252503e-05, "loss": 0.5036, "step": 14597 }, { "epoch": 18.68544, "grad_norm": 1.0564435720443726, "learning_rate": 2.0818327330932375e-05, "loss": 0.4801, "step": 14598 }, { "epoch": 18.68672, "grad_norm": 1.0218154191970825, "learning_rate": 2.0816326530612247e-05, "loss": 0.5102, "step": 14599 }, { "epoch": 18.688, "grad_norm": 1.0529659986495972, "learning_rate": 2.0814325730292115e-05, "loss": 0.5007, "step": 14600 }, { "epoch": 18.68928, "grad_norm": 1.0413590669631958, "learning_rate": 2.081232492997199e-05, "loss": 0.4678, "step": 14601 }, { "epoch": 18.69056, "grad_norm": 1.0885825157165527, "learning_rate": 2.0810324129651862e-05, "loss": 0.5116, "step": 14602 }, { "epoch": 18.69184, "grad_norm": 1.0246473550796509, "learning_rate": 2.0808323329331734e-05, "loss": 0.4818, "step": 14603 }, { "epoch": 18.69312, "grad_norm": 1.1003787517547607, "learning_rate": 2.0806322529011603e-05, "loss": 0.5168, "step": 14604 }, { "epoch": 18.6944, "grad_norm": 1.0684752464294434, "learning_rate": 2.0804321728691478e-05, "loss": 0.4637, "step": 14605 }, { "epoch": 18.69568, "grad_norm": 1.1062880754470825, "learning_rate": 2.080232092837135e-05, "loss": 0.5151, "step": 14606 }, { "epoch": 18.69696, "grad_norm": 1.0668532848358154, "learning_rate": 2.080032012805122e-05, "loss": 0.5465, "step": 14607 }, { "epoch": 18.69824, "grad_norm": 1.0295997858047485, "learning_rate": 2.0798319327731094e-05, "loss": 0.4645, "step": 14608 }, { "epoch": 18.69952, "grad_norm": 1.020206093788147, "learning_rate": 2.0796318527410965e-05, "loss": 0.4971, "step": 14609 }, { "epoch": 18.7008, "grad_norm": 1.0235021114349365, "learning_rate": 2.0794317727090837e-05, "loss": 0.5198, "step": 14610 }, { "epoch": 18.70208, "grad_norm": 1.0551282167434692, "learning_rate": 2.079231692677071e-05, "loss": 0.5083, "step": 14611 }, { "epoch": 18.70336, "grad_norm": 1.0582573413848877, "learning_rate": 2.079031612645058e-05, "loss": 0.5274, "step": 14612 }, { "epoch": 18.70464, "grad_norm": 1.0594959259033203, "learning_rate": 2.0788315326130453e-05, "loss": 0.4954, "step": 14613 }, { "epoch": 18.70592, "grad_norm": 1.0503864288330078, "learning_rate": 2.0786314525810325e-05, "loss": 0.5453, "step": 14614 }, { "epoch": 18.7072, "grad_norm": 1.0637823343276978, "learning_rate": 2.0784313725490197e-05, "loss": 0.5736, "step": 14615 }, { "epoch": 18.70848, "grad_norm": 1.0056936740875244, "learning_rate": 2.078231292517007e-05, "loss": 0.4663, "step": 14616 }, { "epoch": 18.70976, "grad_norm": 1.081607699394226, "learning_rate": 2.078031212484994e-05, "loss": 0.5333, "step": 14617 }, { "epoch": 18.71104, "grad_norm": 1.1000036001205444, "learning_rate": 2.0778311324529812e-05, "loss": 0.5505, "step": 14618 }, { "epoch": 18.71232, "grad_norm": 1.0323201417922974, "learning_rate": 2.0776310524209684e-05, "loss": 0.4896, "step": 14619 }, { "epoch": 18.7136, "grad_norm": 1.0627686977386475, "learning_rate": 2.0774309723889556e-05, "loss": 0.5104, "step": 14620 }, { "epoch": 18.71488, "grad_norm": 1.0364760160446167, "learning_rate": 2.0772308923569428e-05, "loss": 0.5054, "step": 14621 }, { "epoch": 18.71616, "grad_norm": 1.0415117740631104, "learning_rate": 2.0770308123249303e-05, "loss": 0.4796, "step": 14622 }, { "epoch": 18.71744, "grad_norm": 1.0772327184677124, "learning_rate": 2.076830732292917e-05, "loss": 0.527, "step": 14623 }, { "epoch": 18.71872, "grad_norm": 1.140191674232483, "learning_rate": 2.0766306522609043e-05, "loss": 0.5161, "step": 14624 }, { "epoch": 18.72, "grad_norm": 1.1282105445861816, "learning_rate": 2.0764305722288915e-05, "loss": 0.5677, "step": 14625 }, { "epoch": 18.72128, "grad_norm": 1.060404658317566, "learning_rate": 2.076230492196879e-05, "loss": 0.5116, "step": 14626 }, { "epoch": 18.72256, "grad_norm": 1.0670077800750732, "learning_rate": 2.076030412164866e-05, "loss": 0.4953, "step": 14627 }, { "epoch": 18.72384, "grad_norm": 1.036635398864746, "learning_rate": 2.075830332132853e-05, "loss": 0.4991, "step": 14628 }, { "epoch": 18.72512, "grad_norm": 1.0683354139328003, "learning_rate": 2.0756302521008406e-05, "loss": 0.5511, "step": 14629 }, { "epoch": 18.7264, "grad_norm": 1.094277024269104, "learning_rate": 2.0754301720688278e-05, "loss": 0.5233, "step": 14630 }, { "epoch": 18.72768, "grad_norm": 1.093468189239502, "learning_rate": 2.0752300920368146e-05, "loss": 0.5616, "step": 14631 }, { "epoch": 18.72896, "grad_norm": 1.0831431150436401, "learning_rate": 2.0750300120048018e-05, "loss": 0.5404, "step": 14632 }, { "epoch": 18.73024, "grad_norm": 1.1620758771896362, "learning_rate": 2.0748299319727893e-05, "loss": 0.5539, "step": 14633 }, { "epoch": 18.73152, "grad_norm": 1.077682614326477, "learning_rate": 2.0746298519407765e-05, "loss": 0.4842, "step": 14634 }, { "epoch": 18.7328, "grad_norm": 1.0813980102539062, "learning_rate": 2.0744297719087634e-05, "loss": 0.4992, "step": 14635 }, { "epoch": 18.73408, "grad_norm": 1.1097936630249023, "learning_rate": 2.074229691876751e-05, "loss": 0.569, "step": 14636 }, { "epoch": 18.73536, "grad_norm": 1.0540353059768677, "learning_rate": 2.074029611844738e-05, "loss": 0.4809, "step": 14637 }, { "epoch": 18.73664, "grad_norm": 1.0522795915603638, "learning_rate": 2.0738295318127253e-05, "loss": 0.5611, "step": 14638 }, { "epoch": 18.73792, "grad_norm": 1.0796175003051758, "learning_rate": 2.073629451780712e-05, "loss": 0.522, "step": 14639 }, { "epoch": 18.7392, "grad_norm": 1.0437400341033936, "learning_rate": 2.0734293717486996e-05, "loss": 0.503, "step": 14640 }, { "epoch": 18.74048, "grad_norm": 1.0430114269256592, "learning_rate": 2.0732292917166868e-05, "loss": 0.4834, "step": 14641 }, { "epoch": 18.74176, "grad_norm": 1.015177845954895, "learning_rate": 2.073029211684674e-05, "loss": 0.4529, "step": 14642 }, { "epoch": 18.74304, "grad_norm": 1.0426579713821411, "learning_rate": 2.0728291316526612e-05, "loss": 0.4777, "step": 14643 }, { "epoch": 18.74432, "grad_norm": 1.106436848640442, "learning_rate": 2.0726290516206484e-05, "loss": 0.5592, "step": 14644 }, { "epoch": 18.7456, "grad_norm": 1.096053123474121, "learning_rate": 2.0724289715886356e-05, "loss": 0.5066, "step": 14645 }, { "epoch": 18.74688, "grad_norm": 1.077257752418518, "learning_rate": 2.0722288915566227e-05, "loss": 0.4936, "step": 14646 }, { "epoch": 18.74816, "grad_norm": 1.0427172183990479, "learning_rate": 2.07202881152461e-05, "loss": 0.5277, "step": 14647 }, { "epoch": 18.74944, "grad_norm": 1.0899598598480225, "learning_rate": 2.071828731492597e-05, "loss": 0.5181, "step": 14648 }, { "epoch": 18.75072, "grad_norm": 1.05521559715271, "learning_rate": 2.0716286514605843e-05, "loss": 0.5036, "step": 14649 }, { "epoch": 18.752, "grad_norm": 1.0131468772888184, "learning_rate": 2.0714285714285718e-05, "loss": 0.4956, "step": 14650 }, { "epoch": 18.75328, "grad_norm": 1.0845153331756592, "learning_rate": 2.0712284913965587e-05, "loss": 0.5164, "step": 14651 }, { "epoch": 18.75456, "grad_norm": 1.0508294105529785, "learning_rate": 2.071028411364546e-05, "loss": 0.4958, "step": 14652 }, { "epoch": 18.75584, "grad_norm": 1.0884485244750977, "learning_rate": 2.070828331332533e-05, "loss": 0.5257, "step": 14653 }, { "epoch": 18.75712, "grad_norm": 1.0835343599319458, "learning_rate": 2.0706282513005206e-05, "loss": 0.5102, "step": 14654 }, { "epoch": 18.7584, "grad_norm": 1.0350970029830933, "learning_rate": 2.0704281712685074e-05, "loss": 0.4823, "step": 14655 }, { "epoch": 18.75968, "grad_norm": 1.0485219955444336, "learning_rate": 2.0702280912364946e-05, "loss": 0.4848, "step": 14656 }, { "epoch": 18.76096, "grad_norm": 1.053740382194519, "learning_rate": 2.0700280112044818e-05, "loss": 0.5062, "step": 14657 }, { "epoch": 18.76224, "grad_norm": 1.0165501832962036, "learning_rate": 2.0698279311724693e-05, "loss": 0.4833, "step": 14658 }, { "epoch": 18.76352, "grad_norm": 1.0042489767074585, "learning_rate": 2.069627851140456e-05, "loss": 0.4763, "step": 14659 }, { "epoch": 18.7648, "grad_norm": 1.03883695602417, "learning_rate": 2.0694277711084433e-05, "loss": 0.5079, "step": 14660 }, { "epoch": 18.76608, "grad_norm": 1.0785133838653564, "learning_rate": 2.069227691076431e-05, "loss": 0.5352, "step": 14661 }, { "epoch": 18.76736, "grad_norm": 1.0216038227081299, "learning_rate": 2.069027611044418e-05, "loss": 0.4591, "step": 14662 }, { "epoch": 18.76864, "grad_norm": 1.0611780881881714, "learning_rate": 2.068827531012405e-05, "loss": 0.5309, "step": 14663 }, { "epoch": 18.76992, "grad_norm": 1.055109977722168, "learning_rate": 2.068627450980392e-05, "loss": 0.5455, "step": 14664 }, { "epoch": 18.7712, "grad_norm": 1.064369559288025, "learning_rate": 2.0684273709483796e-05, "loss": 0.5099, "step": 14665 }, { "epoch": 18.77248, "grad_norm": 1.0237936973571777, "learning_rate": 2.0682272909163668e-05, "loss": 0.4916, "step": 14666 }, { "epoch": 18.77376, "grad_norm": 1.1315503120422363, "learning_rate": 2.0680272108843536e-05, "loss": 0.5178, "step": 14667 }, { "epoch": 18.77504, "grad_norm": 1.0587965250015259, "learning_rate": 2.0678271308523412e-05, "loss": 0.5022, "step": 14668 }, { "epoch": 18.77632, "grad_norm": 1.0879102945327759, "learning_rate": 2.0676270508203284e-05, "loss": 0.4688, "step": 14669 }, { "epoch": 18.7776, "grad_norm": 1.0429378747940063, "learning_rate": 2.0674269707883155e-05, "loss": 0.483, "step": 14670 }, { "epoch": 18.77888, "grad_norm": 1.0255515575408936, "learning_rate": 2.0672268907563024e-05, "loss": 0.49, "step": 14671 }, { "epoch": 18.78016, "grad_norm": 1.1541240215301514, "learning_rate": 2.06702681072429e-05, "loss": 0.5842, "step": 14672 }, { "epoch": 18.78144, "grad_norm": 1.0616282224655151, "learning_rate": 2.066826730692277e-05, "loss": 0.4856, "step": 14673 }, { "epoch": 18.78272, "grad_norm": 1.0617082118988037, "learning_rate": 2.0666266506602643e-05, "loss": 0.4908, "step": 14674 }, { "epoch": 18.784, "grad_norm": 1.1243386268615723, "learning_rate": 2.0664265706282515e-05, "loss": 0.5216, "step": 14675 }, { "epoch": 18.78528, "grad_norm": 1.103163480758667, "learning_rate": 2.0662264905962387e-05, "loss": 0.5228, "step": 14676 }, { "epoch": 18.78656, "grad_norm": 1.0311192274093628, "learning_rate": 2.066026410564226e-05, "loss": 0.5204, "step": 14677 }, { "epoch": 18.78784, "grad_norm": 1.1256968975067139, "learning_rate": 2.065826330532213e-05, "loss": 0.5192, "step": 14678 }, { "epoch": 18.78912, "grad_norm": 1.0992389917373657, "learning_rate": 2.0656262505002002e-05, "loss": 0.5015, "step": 14679 }, { "epoch": 18.790399999999998, "grad_norm": 1.0932484865188599, "learning_rate": 2.0654261704681874e-05, "loss": 0.5712, "step": 14680 }, { "epoch": 18.79168, "grad_norm": 1.0706785917282104, "learning_rate": 2.0652260904361746e-05, "loss": 0.5222, "step": 14681 }, { "epoch": 18.79296, "grad_norm": 1.0165241956710815, "learning_rate": 2.0650260104041618e-05, "loss": 0.4922, "step": 14682 }, { "epoch": 18.79424, "grad_norm": 1.0056289434432983, "learning_rate": 2.064825930372149e-05, "loss": 0.4751, "step": 14683 }, { "epoch": 18.79552, "grad_norm": 1.0656893253326416, "learning_rate": 2.064625850340136e-05, "loss": 0.4966, "step": 14684 }, { "epoch": 18.7968, "grad_norm": 1.0573127269744873, "learning_rate": 2.0644257703081233e-05, "loss": 0.4845, "step": 14685 }, { "epoch": 18.79808, "grad_norm": 1.0371816158294678, "learning_rate": 2.0642256902761105e-05, "loss": 0.4863, "step": 14686 }, { "epoch": 18.79936, "grad_norm": 1.0891573429107666, "learning_rate": 2.0640256102440977e-05, "loss": 0.5613, "step": 14687 }, { "epoch": 18.80064, "grad_norm": 1.053895115852356, "learning_rate": 2.063825530212085e-05, "loss": 0.5053, "step": 14688 }, { "epoch": 18.80192, "grad_norm": 1.0640217065811157, "learning_rate": 2.0636254501800724e-05, "loss": 0.5254, "step": 14689 }, { "epoch": 18.8032, "grad_norm": 1.1202378273010254, "learning_rate": 2.0634253701480593e-05, "loss": 0.5121, "step": 14690 }, { "epoch": 18.80448, "grad_norm": 1.0097864866256714, "learning_rate": 2.0632252901160464e-05, "loss": 0.4796, "step": 14691 }, { "epoch": 18.80576, "grad_norm": 1.1119987964630127, "learning_rate": 2.0630252100840336e-05, "loss": 0.5685, "step": 14692 }, { "epoch": 18.80704, "grad_norm": 1.0607649087905884, "learning_rate": 2.062825130052021e-05, "loss": 0.519, "step": 14693 }, { "epoch": 18.80832, "grad_norm": 1.0458481311798096, "learning_rate": 2.062625050020008e-05, "loss": 0.4971, "step": 14694 }, { "epoch": 18.8096, "grad_norm": 1.0886461734771729, "learning_rate": 2.0624249699879952e-05, "loss": 0.5249, "step": 14695 }, { "epoch": 18.81088, "grad_norm": 1.0727638006210327, "learning_rate": 2.0622248899559827e-05, "loss": 0.5262, "step": 14696 }, { "epoch": 18.81216, "grad_norm": 1.00741446018219, "learning_rate": 2.06202480992397e-05, "loss": 0.5311, "step": 14697 }, { "epoch": 18.81344, "grad_norm": 1.0804330110549927, "learning_rate": 2.0618247298919567e-05, "loss": 0.547, "step": 14698 }, { "epoch": 18.81472, "grad_norm": 1.0355829000473022, "learning_rate": 2.061624649859944e-05, "loss": 0.5129, "step": 14699 }, { "epoch": 18.816, "grad_norm": 1.0258690118789673, "learning_rate": 2.0614245698279315e-05, "loss": 0.4925, "step": 14700 }, { "epoch": 18.81728, "grad_norm": 1.0901426076889038, "learning_rate": 2.0612244897959186e-05, "loss": 0.4997, "step": 14701 }, { "epoch": 18.81856, "grad_norm": 1.072962760925293, "learning_rate": 2.0610244097639055e-05, "loss": 0.5183, "step": 14702 }, { "epoch": 18.81984, "grad_norm": 1.004193902015686, "learning_rate": 2.060824329731893e-05, "loss": 0.5215, "step": 14703 }, { "epoch": 18.82112, "grad_norm": 1.0526185035705566, "learning_rate": 2.0606242496998802e-05, "loss": 0.5038, "step": 14704 }, { "epoch": 18.822400000000002, "grad_norm": 1.0493122339248657, "learning_rate": 2.0604241696678674e-05, "loss": 0.5235, "step": 14705 }, { "epoch": 18.82368, "grad_norm": 0.9919229745864868, "learning_rate": 2.0602240896358542e-05, "loss": 0.496, "step": 14706 }, { "epoch": 18.82496, "grad_norm": 1.027870535850525, "learning_rate": 2.0600240096038418e-05, "loss": 0.4837, "step": 14707 }, { "epoch": 18.82624, "grad_norm": 1.0324792861938477, "learning_rate": 2.059823929571829e-05, "loss": 0.5041, "step": 14708 }, { "epoch": 18.82752, "grad_norm": 1.0613089799880981, "learning_rate": 2.059623849539816e-05, "loss": 0.5293, "step": 14709 }, { "epoch": 18.8288, "grad_norm": 1.0319715738296509, "learning_rate": 2.0594237695078033e-05, "loss": 0.51, "step": 14710 }, { "epoch": 18.83008, "grad_norm": 1.0587095022201538, "learning_rate": 2.0592236894757905e-05, "loss": 0.5019, "step": 14711 }, { "epoch": 18.83136, "grad_norm": 1.0132025480270386, "learning_rate": 2.0590236094437777e-05, "loss": 0.4797, "step": 14712 }, { "epoch": 18.83264, "grad_norm": 0.9891220331192017, "learning_rate": 2.058823529411765e-05, "loss": 0.4877, "step": 14713 }, { "epoch": 18.83392, "grad_norm": 1.0485026836395264, "learning_rate": 2.058623449379752e-05, "loss": 0.5021, "step": 14714 }, { "epoch": 18.8352, "grad_norm": 1.0996410846710205, "learning_rate": 2.0584233693477392e-05, "loss": 0.5152, "step": 14715 }, { "epoch": 18.83648, "grad_norm": 1.065560221672058, "learning_rate": 2.0582232893157264e-05, "loss": 0.5149, "step": 14716 }, { "epoch": 18.83776, "grad_norm": 1.12687349319458, "learning_rate": 2.0580232092837136e-05, "loss": 0.5378, "step": 14717 }, { "epoch": 18.83904, "grad_norm": 1.1263388395309448, "learning_rate": 2.0578231292517008e-05, "loss": 0.5573, "step": 14718 }, { "epoch": 18.84032, "grad_norm": 1.0918395519256592, "learning_rate": 2.057623049219688e-05, "loss": 0.5072, "step": 14719 }, { "epoch": 18.8416, "grad_norm": 1.0265675783157349, "learning_rate": 2.057422969187675e-05, "loss": 0.506, "step": 14720 }, { "epoch": 18.84288, "grad_norm": 0.9929207563400269, "learning_rate": 2.0572228891556623e-05, "loss": 0.4728, "step": 14721 }, { "epoch": 18.84416, "grad_norm": 1.0715802907943726, "learning_rate": 2.0570228091236495e-05, "loss": 0.4897, "step": 14722 }, { "epoch": 18.84544, "grad_norm": 0.9983776211738586, "learning_rate": 2.0568227290916367e-05, "loss": 0.481, "step": 14723 }, { "epoch": 18.84672, "grad_norm": 1.0175827741622925, "learning_rate": 2.056622649059624e-05, "loss": 0.5271, "step": 14724 }, { "epoch": 18.848, "grad_norm": 1.0712765455245972, "learning_rate": 2.056422569027611e-05, "loss": 0.5686, "step": 14725 }, { "epoch": 18.84928, "grad_norm": 1.0037692785263062, "learning_rate": 2.0562224889955983e-05, "loss": 0.4798, "step": 14726 }, { "epoch": 18.85056, "grad_norm": 0.9978422522544861, "learning_rate": 2.0560224089635855e-05, "loss": 0.44, "step": 14727 }, { "epoch": 18.85184, "grad_norm": 1.021209955215454, "learning_rate": 2.055822328931573e-05, "loss": 0.4934, "step": 14728 }, { "epoch": 18.85312, "grad_norm": 1.0505516529083252, "learning_rate": 2.05562224889956e-05, "loss": 0.5043, "step": 14729 }, { "epoch": 18.8544, "grad_norm": 1.0562058687210083, "learning_rate": 2.055422168867547e-05, "loss": 0.5147, "step": 14730 }, { "epoch": 18.85568, "grad_norm": 1.0676435232162476, "learning_rate": 2.0552220888355342e-05, "loss": 0.5092, "step": 14731 }, { "epoch": 18.85696, "grad_norm": 1.0419723987579346, "learning_rate": 2.0550220088035217e-05, "loss": 0.529, "step": 14732 }, { "epoch": 18.85824, "grad_norm": 1.0583957433700562, "learning_rate": 2.0548219287715086e-05, "loss": 0.4971, "step": 14733 }, { "epoch": 18.85952, "grad_norm": 1.0169494152069092, "learning_rate": 2.0546218487394958e-05, "loss": 0.508, "step": 14734 }, { "epoch": 18.8608, "grad_norm": 1.019565224647522, "learning_rate": 2.0544217687074833e-05, "loss": 0.4782, "step": 14735 }, { "epoch": 18.86208, "grad_norm": 1.087472915649414, "learning_rate": 2.0542216886754705e-05, "loss": 0.5106, "step": 14736 }, { "epoch": 18.86336, "grad_norm": 1.0848655700683594, "learning_rate": 2.0540216086434573e-05, "loss": 0.4834, "step": 14737 }, { "epoch": 18.86464, "grad_norm": 1.049041509628296, "learning_rate": 2.0538215286114445e-05, "loss": 0.5162, "step": 14738 }, { "epoch": 18.86592, "grad_norm": 1.0465503931045532, "learning_rate": 2.053621448579432e-05, "loss": 0.5309, "step": 14739 }, { "epoch": 18.8672, "grad_norm": 1.0960649251937866, "learning_rate": 2.0534213685474192e-05, "loss": 0.4926, "step": 14740 }, { "epoch": 18.86848, "grad_norm": 1.0588141679763794, "learning_rate": 2.053221288515406e-05, "loss": 0.5142, "step": 14741 }, { "epoch": 18.86976, "grad_norm": 1.0418102741241455, "learning_rate": 2.0530212084833936e-05, "loss": 0.4991, "step": 14742 }, { "epoch": 18.87104, "grad_norm": 1.0588616132736206, "learning_rate": 2.0528211284513808e-05, "loss": 0.5113, "step": 14743 }, { "epoch": 18.87232, "grad_norm": 1.0481520891189575, "learning_rate": 2.052621048419368e-05, "loss": 0.474, "step": 14744 }, { "epoch": 18.8736, "grad_norm": 1.0571566820144653, "learning_rate": 2.0524209683873548e-05, "loss": 0.5072, "step": 14745 }, { "epoch": 18.87488, "grad_norm": 1.0196195840835571, "learning_rate": 2.0522208883553423e-05, "loss": 0.535, "step": 14746 }, { "epoch": 18.87616, "grad_norm": 1.1058493852615356, "learning_rate": 2.0520208083233295e-05, "loss": 0.5374, "step": 14747 }, { "epoch": 18.87744, "grad_norm": 1.0693318843841553, "learning_rate": 2.0518207282913167e-05, "loss": 0.4748, "step": 14748 }, { "epoch": 18.87872, "grad_norm": 1.0735576152801514, "learning_rate": 2.051620648259304e-05, "loss": 0.5005, "step": 14749 }, { "epoch": 18.88, "grad_norm": 1.0688385963439941, "learning_rate": 2.051420568227291e-05, "loss": 0.5158, "step": 14750 }, { "epoch": 18.88128, "grad_norm": 1.0036412477493286, "learning_rate": 2.0512204881952783e-05, "loss": 0.5041, "step": 14751 }, { "epoch": 18.88256, "grad_norm": 1.089450478553772, "learning_rate": 2.0510204081632654e-05, "loss": 0.5485, "step": 14752 }, { "epoch": 18.88384, "grad_norm": 1.050765872001648, "learning_rate": 2.0508203281312526e-05, "loss": 0.496, "step": 14753 }, { "epoch": 18.88512, "grad_norm": 1.0228477716445923, "learning_rate": 2.0506202480992398e-05, "loss": 0.5171, "step": 14754 }, { "epoch": 18.8864, "grad_norm": 1.0970298051834106, "learning_rate": 2.050420168067227e-05, "loss": 0.5334, "step": 14755 }, { "epoch": 18.88768, "grad_norm": 1.0475279092788696, "learning_rate": 2.0502200880352142e-05, "loss": 0.4951, "step": 14756 }, { "epoch": 18.88896, "grad_norm": 1.0471172332763672, "learning_rate": 2.0500200080032014e-05, "loss": 0.5046, "step": 14757 }, { "epoch": 18.89024, "grad_norm": 1.0250929594039917, "learning_rate": 2.0498199279711886e-05, "loss": 0.4712, "step": 14758 }, { "epoch": 18.89152, "grad_norm": 1.0119768381118774, "learning_rate": 2.0496198479391757e-05, "loss": 0.5076, "step": 14759 }, { "epoch": 18.8928, "grad_norm": 1.1097235679626465, "learning_rate": 2.049419767907163e-05, "loss": 0.5243, "step": 14760 }, { "epoch": 18.89408, "grad_norm": 1.0308051109313965, "learning_rate": 2.04921968787515e-05, "loss": 0.5459, "step": 14761 }, { "epoch": 18.89536, "grad_norm": 1.027444839477539, "learning_rate": 2.0490196078431373e-05, "loss": 0.532, "step": 14762 }, { "epoch": 18.89664, "grad_norm": 1.0802733898162842, "learning_rate": 2.0488195278111248e-05, "loss": 0.5034, "step": 14763 }, { "epoch": 18.89792, "grad_norm": 1.0943602323532104, "learning_rate": 2.0486194477791117e-05, "loss": 0.5108, "step": 14764 }, { "epoch": 18.8992, "grad_norm": 1.0663763284683228, "learning_rate": 2.048419367747099e-05, "loss": 0.4789, "step": 14765 }, { "epoch": 18.90048, "grad_norm": 1.0287508964538574, "learning_rate": 2.048219287715086e-05, "loss": 0.4964, "step": 14766 }, { "epoch": 18.90176, "grad_norm": 1.0889275074005127, "learning_rate": 2.0480192076830736e-05, "loss": 0.513, "step": 14767 }, { "epoch": 18.90304, "grad_norm": 1.0352449417114258, "learning_rate": 2.0478191276510604e-05, "loss": 0.4871, "step": 14768 }, { "epoch": 18.90432, "grad_norm": 1.0806697607040405, "learning_rate": 2.0476190476190476e-05, "loss": 0.5069, "step": 14769 }, { "epoch": 18.9056, "grad_norm": 1.0483494997024536, "learning_rate": 2.0474189675870348e-05, "loss": 0.493, "step": 14770 }, { "epoch": 18.90688, "grad_norm": 0.9899161458015442, "learning_rate": 2.0472188875550223e-05, "loss": 0.4689, "step": 14771 }, { "epoch": 18.90816, "grad_norm": 0.9991376399993896, "learning_rate": 2.047018807523009e-05, "loss": 0.441, "step": 14772 }, { "epoch": 18.90944, "grad_norm": 1.0828230381011963, "learning_rate": 2.0468187274909963e-05, "loss": 0.4842, "step": 14773 }, { "epoch": 18.91072, "grad_norm": 1.0319201946258545, "learning_rate": 2.046618647458984e-05, "loss": 0.4966, "step": 14774 }, { "epoch": 18.912, "grad_norm": 1.1114946603775024, "learning_rate": 2.046418567426971e-05, "loss": 0.5483, "step": 14775 }, { "epoch": 18.91328, "grad_norm": 1.1752065420150757, "learning_rate": 2.046218487394958e-05, "loss": 0.5939, "step": 14776 }, { "epoch": 18.91456, "grad_norm": 1.0463483333587646, "learning_rate": 2.046018407362945e-05, "loss": 0.509, "step": 14777 }, { "epoch": 18.91584, "grad_norm": 1.0437871217727661, "learning_rate": 2.0458183273309326e-05, "loss": 0.4781, "step": 14778 }, { "epoch": 18.91712, "grad_norm": 1.054076910018921, "learning_rate": 2.0456182472989198e-05, "loss": 0.5121, "step": 14779 }, { "epoch": 18.9184, "grad_norm": 1.0515813827514648, "learning_rate": 2.0454181672669066e-05, "loss": 0.5137, "step": 14780 }, { "epoch": 18.91968, "grad_norm": 1.0603755712509155, "learning_rate": 2.045218087234894e-05, "loss": 0.4898, "step": 14781 }, { "epoch": 18.92096, "grad_norm": 1.0575968027114868, "learning_rate": 2.0450180072028814e-05, "loss": 0.497, "step": 14782 }, { "epoch": 18.92224, "grad_norm": 1.0913869142532349, "learning_rate": 2.0448179271708685e-05, "loss": 0.5125, "step": 14783 }, { "epoch": 18.92352, "grad_norm": 1.018833041191101, "learning_rate": 2.0446178471388554e-05, "loss": 0.4733, "step": 14784 }, { "epoch": 18.9248, "grad_norm": 1.0295926332473755, "learning_rate": 2.044417767106843e-05, "loss": 0.4817, "step": 14785 }, { "epoch": 18.92608, "grad_norm": 1.0770633220672607, "learning_rate": 2.04421768707483e-05, "loss": 0.4856, "step": 14786 }, { "epoch": 18.92736, "grad_norm": 1.0353119373321533, "learning_rate": 2.0440176070428173e-05, "loss": 0.5007, "step": 14787 }, { "epoch": 18.92864, "grad_norm": 1.082660436630249, "learning_rate": 2.0438175270108045e-05, "loss": 0.5117, "step": 14788 }, { "epoch": 18.92992, "grad_norm": 1.0934934616088867, "learning_rate": 2.0436174469787917e-05, "loss": 0.5001, "step": 14789 }, { "epoch": 18.9312, "grad_norm": 1.0217633247375488, "learning_rate": 2.043417366946779e-05, "loss": 0.5105, "step": 14790 }, { "epoch": 18.932479999999998, "grad_norm": 0.9955496788024902, "learning_rate": 2.043217286914766e-05, "loss": 0.4753, "step": 14791 }, { "epoch": 18.93376, "grad_norm": 1.0870311260223389, "learning_rate": 2.0430172068827532e-05, "loss": 0.5214, "step": 14792 }, { "epoch": 18.93504, "grad_norm": 1.0062708854675293, "learning_rate": 2.0428171268507404e-05, "loss": 0.471, "step": 14793 }, { "epoch": 18.93632, "grad_norm": 1.0651724338531494, "learning_rate": 2.0426170468187276e-05, "loss": 0.5747, "step": 14794 }, { "epoch": 18.9376, "grad_norm": 0.9868699908256531, "learning_rate": 2.0424169667867148e-05, "loss": 0.4606, "step": 14795 }, { "epoch": 18.93888, "grad_norm": 1.0331077575683594, "learning_rate": 2.042216886754702e-05, "loss": 0.5139, "step": 14796 }, { "epoch": 18.94016, "grad_norm": 1.0682381391525269, "learning_rate": 2.042016806722689e-05, "loss": 0.4899, "step": 14797 }, { "epoch": 18.94144, "grad_norm": 1.1549947261810303, "learning_rate": 2.0418167266906763e-05, "loss": 0.5366, "step": 14798 }, { "epoch": 18.94272, "grad_norm": 1.0490201711654663, "learning_rate": 2.0416166466586635e-05, "loss": 0.5038, "step": 14799 }, { "epoch": 18.944, "grad_norm": 1.097759485244751, "learning_rate": 2.0414165666266507e-05, "loss": 0.5136, "step": 14800 }, { "epoch": 18.94528, "grad_norm": 1.0571385622024536, "learning_rate": 2.041216486594638e-05, "loss": 0.4652, "step": 14801 }, { "epoch": 18.94656, "grad_norm": 1.071684718132019, "learning_rate": 2.0410164065626254e-05, "loss": 0.5625, "step": 14802 }, { "epoch": 18.94784, "grad_norm": 1.0939278602600098, "learning_rate": 2.0408163265306123e-05, "loss": 0.5401, "step": 14803 }, { "epoch": 18.94912, "grad_norm": 1.045653223991394, "learning_rate": 2.0406162464985994e-05, "loss": 0.5289, "step": 14804 }, { "epoch": 18.9504, "grad_norm": 1.070016622543335, "learning_rate": 2.0404161664665866e-05, "loss": 0.542, "step": 14805 }, { "epoch": 18.95168, "grad_norm": 0.992372989654541, "learning_rate": 2.040216086434574e-05, "loss": 0.4731, "step": 14806 }, { "epoch": 18.95296, "grad_norm": 1.062925100326538, "learning_rate": 2.040016006402561e-05, "loss": 0.5159, "step": 14807 }, { "epoch": 18.95424, "grad_norm": 1.0661933422088623, "learning_rate": 2.0398159263705482e-05, "loss": 0.5155, "step": 14808 }, { "epoch": 18.95552, "grad_norm": 1.0385395288467407, "learning_rate": 2.0396158463385357e-05, "loss": 0.4745, "step": 14809 }, { "epoch": 18.9568, "grad_norm": 1.0102157592773438, "learning_rate": 2.039415766306523e-05, "loss": 0.4902, "step": 14810 }, { "epoch": 18.95808, "grad_norm": 1.0179786682128906, "learning_rate": 2.0392156862745097e-05, "loss": 0.4641, "step": 14811 }, { "epoch": 18.95936, "grad_norm": 1.062633991241455, "learning_rate": 2.039015606242497e-05, "loss": 0.5406, "step": 14812 }, { "epoch": 18.96064, "grad_norm": 1.0476435422897339, "learning_rate": 2.0388155262104844e-05, "loss": 0.5176, "step": 14813 }, { "epoch": 18.96192, "grad_norm": 1.0489333868026733, "learning_rate": 2.0386154461784716e-05, "loss": 0.5355, "step": 14814 }, { "epoch": 18.9632, "grad_norm": 1.0147716999053955, "learning_rate": 2.0384153661464585e-05, "loss": 0.5249, "step": 14815 }, { "epoch": 18.964480000000002, "grad_norm": 1.0759072303771973, "learning_rate": 2.038215286114446e-05, "loss": 0.4902, "step": 14816 }, { "epoch": 18.96576, "grad_norm": 1.095579743385315, "learning_rate": 2.0380152060824332e-05, "loss": 0.5045, "step": 14817 }, { "epoch": 18.96704, "grad_norm": 1.0580558776855469, "learning_rate": 2.0378151260504204e-05, "loss": 0.5302, "step": 14818 }, { "epoch": 18.96832, "grad_norm": 1.0755257606506348, "learning_rate": 2.0376150460184072e-05, "loss": 0.5294, "step": 14819 }, { "epoch": 18.9696, "grad_norm": 1.0983492136001587, "learning_rate": 2.0374149659863947e-05, "loss": 0.5071, "step": 14820 }, { "epoch": 18.97088, "grad_norm": 1.050110936164856, "learning_rate": 2.037214885954382e-05, "loss": 0.4988, "step": 14821 }, { "epoch": 18.97216, "grad_norm": 1.052294135093689, "learning_rate": 2.037014805922369e-05, "loss": 0.5414, "step": 14822 }, { "epoch": 18.97344, "grad_norm": 1.1228139400482178, "learning_rate": 2.036814725890356e-05, "loss": 0.535, "step": 14823 }, { "epoch": 18.97472, "grad_norm": 1.0272384881973267, "learning_rate": 2.0366146458583435e-05, "loss": 0.4597, "step": 14824 }, { "epoch": 18.976, "grad_norm": 1.0405932664871216, "learning_rate": 2.0364145658263307e-05, "loss": 0.4597, "step": 14825 }, { "epoch": 18.97728, "grad_norm": 1.1259437799453735, "learning_rate": 2.036214485794318e-05, "loss": 0.5336, "step": 14826 }, { "epoch": 18.97856, "grad_norm": 1.0937503576278687, "learning_rate": 2.036014405762305e-05, "loss": 0.4824, "step": 14827 }, { "epoch": 18.97984, "grad_norm": 1.0712480545043945, "learning_rate": 2.0358143257302922e-05, "loss": 0.5119, "step": 14828 }, { "epoch": 18.98112, "grad_norm": 1.1139438152313232, "learning_rate": 2.0356142456982794e-05, "loss": 0.5282, "step": 14829 }, { "epoch": 18.9824, "grad_norm": 1.103958010673523, "learning_rate": 2.0354141656662666e-05, "loss": 0.5595, "step": 14830 }, { "epoch": 18.98368, "grad_norm": 1.0063518285751343, "learning_rate": 2.0352140856342538e-05, "loss": 0.4573, "step": 14831 }, { "epoch": 18.98496, "grad_norm": 1.033522605895996, "learning_rate": 2.035014005602241e-05, "loss": 0.4847, "step": 14832 }, { "epoch": 18.98624, "grad_norm": 1.0984948873519897, "learning_rate": 2.034813925570228e-05, "loss": 0.5332, "step": 14833 }, { "epoch": 18.98752, "grad_norm": 1.1030117273330688, "learning_rate": 2.0346138455382153e-05, "loss": 0.538, "step": 14834 }, { "epoch": 18.9888, "grad_norm": 1.097759485244751, "learning_rate": 2.0344137655062025e-05, "loss": 0.5191, "step": 14835 }, { "epoch": 18.99008, "grad_norm": 1.111032485961914, "learning_rate": 2.0342136854741897e-05, "loss": 0.4796, "step": 14836 }, { "epoch": 18.99136, "grad_norm": 1.1036945581436157, "learning_rate": 2.034013605442177e-05, "loss": 0.5219, "step": 14837 }, { "epoch": 18.99264, "grad_norm": 1.1179360151290894, "learning_rate": 2.033813525410164e-05, "loss": 0.5537, "step": 14838 }, { "epoch": 18.99392, "grad_norm": 1.048097848892212, "learning_rate": 2.0336134453781513e-05, "loss": 0.5053, "step": 14839 }, { "epoch": 18.9952, "grad_norm": 1.0695916414260864, "learning_rate": 2.0334133653461385e-05, "loss": 0.5526, "step": 14840 }, { "epoch": 18.99648, "grad_norm": 1.0661946535110474, "learning_rate": 2.033213285314126e-05, "loss": 0.5055, "step": 14841 }, { "epoch": 18.99776, "grad_norm": 1.0300498008728027, "learning_rate": 2.033013205282113e-05, "loss": 0.4859, "step": 14842 }, { "epoch": 18.99904, "grad_norm": 1.1209156513214111, "learning_rate": 2.0328131252501e-05, "loss": 0.5599, "step": 14843 }, { "epoch": 19.00032, "grad_norm": Infinity, "learning_rate": 2.0328131252501e-05, "loss": 0.9375, "step": 14844 }, { "epoch": 19.0016, "grad_norm": 0.9681119918823242, "learning_rate": 2.0326130452180872e-05, "loss": 0.501, "step": 14845 }, { "epoch": 19.00288, "grad_norm": 1.0563879013061523, "learning_rate": 2.0324129651860747e-05, "loss": 0.4601, "step": 14846 }, { "epoch": 19.00416, "grad_norm": 1.0467544794082642, "learning_rate": 2.0322128851540616e-05, "loss": 0.5335, "step": 14847 }, { "epoch": 19.00544, "grad_norm": 1.031529188156128, "learning_rate": 2.0320128051220488e-05, "loss": 0.463, "step": 14848 }, { "epoch": 19.00672, "grad_norm": 1.036137580871582, "learning_rate": 2.0318127250900363e-05, "loss": 0.4777, "step": 14849 }, { "epoch": 19.008, "grad_norm": 1.0127391815185547, "learning_rate": 2.0316126450580235e-05, "loss": 0.4845, "step": 14850 }, { "epoch": 19.00928, "grad_norm": 1.0757827758789062, "learning_rate": 2.0314125650260103e-05, "loss": 0.5228, "step": 14851 }, { "epoch": 19.01056, "grad_norm": 1.035569190979004, "learning_rate": 2.0312124849939975e-05, "loss": 0.4674, "step": 14852 }, { "epoch": 19.01184, "grad_norm": 1.0188312530517578, "learning_rate": 2.031012404961985e-05, "loss": 0.486, "step": 14853 }, { "epoch": 19.01312, "grad_norm": 1.087680697441101, "learning_rate": 2.0308123249299722e-05, "loss": 0.4803, "step": 14854 }, { "epoch": 19.0144, "grad_norm": 1.090846061706543, "learning_rate": 2.030612244897959e-05, "loss": 0.473, "step": 14855 }, { "epoch": 19.01568, "grad_norm": 1.1319047212600708, "learning_rate": 2.0304121648659466e-05, "loss": 0.5024, "step": 14856 }, { "epoch": 19.01696, "grad_norm": 1.1232948303222656, "learning_rate": 2.0302120848339338e-05, "loss": 0.5176, "step": 14857 }, { "epoch": 19.01824, "grad_norm": 1.08295476436615, "learning_rate": 2.030012004801921e-05, "loss": 0.4821, "step": 14858 }, { "epoch": 19.01952, "grad_norm": 1.0505188703536987, "learning_rate": 2.0298119247699078e-05, "loss": 0.4906, "step": 14859 }, { "epoch": 19.0208, "grad_norm": 1.0271227359771729, "learning_rate": 2.0296118447378953e-05, "loss": 0.4796, "step": 14860 }, { "epoch": 19.02208, "grad_norm": 1.0881240367889404, "learning_rate": 2.0294117647058825e-05, "loss": 0.4729, "step": 14861 }, { "epoch": 19.02336, "grad_norm": 1.1086496114730835, "learning_rate": 2.0292116846738697e-05, "loss": 0.4996, "step": 14862 }, { "epoch": 19.02464, "grad_norm": 1.087936520576477, "learning_rate": 2.029011604641857e-05, "loss": 0.4926, "step": 14863 }, { "epoch": 19.02592, "grad_norm": 1.062474250793457, "learning_rate": 2.028811524609844e-05, "loss": 0.495, "step": 14864 }, { "epoch": 19.0272, "grad_norm": 1.049436092376709, "learning_rate": 2.0286114445778313e-05, "loss": 0.4552, "step": 14865 }, { "epoch": 19.02848, "grad_norm": 1.0556187629699707, "learning_rate": 2.0284113645458184e-05, "loss": 0.4789, "step": 14866 }, { "epoch": 19.02976, "grad_norm": 0.9977188110351562, "learning_rate": 2.0282112845138056e-05, "loss": 0.4788, "step": 14867 }, { "epoch": 19.03104, "grad_norm": 1.0484732389450073, "learning_rate": 2.0280112044817928e-05, "loss": 0.4596, "step": 14868 }, { "epoch": 19.03232, "grad_norm": 1.135587453842163, "learning_rate": 2.02781112444978e-05, "loss": 0.4965, "step": 14869 }, { "epoch": 19.0336, "grad_norm": 1.0908023118972778, "learning_rate": 2.0276110444177672e-05, "loss": 0.4776, "step": 14870 }, { "epoch": 19.03488, "grad_norm": 1.1048251390457153, "learning_rate": 2.0274109643857544e-05, "loss": 0.5342, "step": 14871 }, { "epoch": 19.03616, "grad_norm": 1.1822048425674438, "learning_rate": 2.0272108843537416e-05, "loss": 0.5458, "step": 14872 }, { "epoch": 19.03744, "grad_norm": 1.0560060739517212, "learning_rate": 2.0270108043217287e-05, "loss": 0.477, "step": 14873 }, { "epoch": 19.03872, "grad_norm": 1.0473270416259766, "learning_rate": 2.026810724289716e-05, "loss": 0.519, "step": 14874 }, { "epoch": 19.04, "grad_norm": 1.0593985319137573, "learning_rate": 2.026610644257703e-05, "loss": 0.4724, "step": 14875 }, { "epoch": 19.04128, "grad_norm": 1.1364285945892334, "learning_rate": 2.0264105642256903e-05, "loss": 0.4902, "step": 14876 }, { "epoch": 19.04256, "grad_norm": 1.1284868717193604, "learning_rate": 2.0262104841936778e-05, "loss": 0.5167, "step": 14877 }, { "epoch": 19.04384, "grad_norm": 1.0828670263290405, "learning_rate": 2.0260104041616647e-05, "loss": 0.4799, "step": 14878 }, { "epoch": 19.04512, "grad_norm": 1.0774503946304321, "learning_rate": 2.025810324129652e-05, "loss": 0.516, "step": 14879 }, { "epoch": 19.0464, "grad_norm": 1.074471354484558, "learning_rate": 2.025610244097639e-05, "loss": 0.5001, "step": 14880 }, { "epoch": 19.04768, "grad_norm": 1.0600991249084473, "learning_rate": 2.0254101640656266e-05, "loss": 0.475, "step": 14881 }, { "epoch": 19.04896, "grad_norm": 1.0403833389282227, "learning_rate": 2.0252100840336134e-05, "loss": 0.4729, "step": 14882 }, { "epoch": 19.05024, "grad_norm": 1.0715597867965698, "learning_rate": 2.0250100040016006e-05, "loss": 0.5526, "step": 14883 }, { "epoch": 19.05152, "grad_norm": 1.0039360523223877, "learning_rate": 2.0248099239695878e-05, "loss": 0.4799, "step": 14884 }, { "epoch": 19.0528, "grad_norm": 0.990351676940918, "learning_rate": 2.0246098439375753e-05, "loss": 0.4442, "step": 14885 }, { "epoch": 19.05408, "grad_norm": 1.0488803386688232, "learning_rate": 2.024409763905562e-05, "loss": 0.4884, "step": 14886 }, { "epoch": 19.05536, "grad_norm": 1.0708200931549072, "learning_rate": 2.0242096838735493e-05, "loss": 0.5011, "step": 14887 }, { "epoch": 19.05664, "grad_norm": 1.0574580430984497, "learning_rate": 2.024009603841537e-05, "loss": 0.4919, "step": 14888 }, { "epoch": 19.05792, "grad_norm": 1.0483922958374023, "learning_rate": 2.023809523809524e-05, "loss": 0.5121, "step": 14889 }, { "epoch": 19.0592, "grad_norm": 1.039752721786499, "learning_rate": 2.023609443777511e-05, "loss": 0.4642, "step": 14890 }, { "epoch": 19.06048, "grad_norm": 1.0242259502410889, "learning_rate": 2.023409363745498e-05, "loss": 0.4452, "step": 14891 }, { "epoch": 19.06176, "grad_norm": 1.0347176790237427, "learning_rate": 2.0232092837134856e-05, "loss": 0.5101, "step": 14892 }, { "epoch": 19.06304, "grad_norm": 1.0924898386001587, "learning_rate": 2.0230092036814728e-05, "loss": 0.4904, "step": 14893 }, { "epoch": 19.06432, "grad_norm": 1.0558232069015503, "learning_rate": 2.0228091236494596e-05, "loss": 0.4689, "step": 14894 }, { "epoch": 19.0656, "grad_norm": 1.082149863243103, "learning_rate": 2.022609043617447e-05, "loss": 0.5111, "step": 14895 }, { "epoch": 19.06688, "grad_norm": 1.0433650016784668, "learning_rate": 2.0224089635854344e-05, "loss": 0.4458, "step": 14896 }, { "epoch": 19.06816, "grad_norm": 1.053171157836914, "learning_rate": 2.0222088835534215e-05, "loss": 0.4724, "step": 14897 }, { "epoch": 19.06944, "grad_norm": 1.0355948209762573, "learning_rate": 2.0220088035214084e-05, "loss": 0.5265, "step": 14898 }, { "epoch": 19.07072, "grad_norm": 1.0293118953704834, "learning_rate": 2.021808723489396e-05, "loss": 0.4963, "step": 14899 }, { "epoch": 19.072, "grad_norm": 1.065530776977539, "learning_rate": 2.021608643457383e-05, "loss": 0.4682, "step": 14900 }, { "epoch": 19.07328, "grad_norm": 1.0934525728225708, "learning_rate": 2.0214085634253703e-05, "loss": 0.4689, "step": 14901 }, { "epoch": 19.07456, "grad_norm": 0.997524082660675, "learning_rate": 2.0212084833933575e-05, "loss": 0.4693, "step": 14902 }, { "epoch": 19.07584, "grad_norm": 1.1037408113479614, "learning_rate": 2.0210084033613447e-05, "loss": 0.5157, "step": 14903 }, { "epoch": 19.07712, "grad_norm": 1.0279779434204102, "learning_rate": 2.020808323329332e-05, "loss": 0.4996, "step": 14904 }, { "epoch": 19.0784, "grad_norm": 1.0512694120407104, "learning_rate": 2.020608243297319e-05, "loss": 0.4589, "step": 14905 }, { "epoch": 19.07968, "grad_norm": 1.048998475074768, "learning_rate": 2.0204081632653062e-05, "loss": 0.4715, "step": 14906 }, { "epoch": 19.08096, "grad_norm": 1.1140145063400269, "learning_rate": 2.0202080832332934e-05, "loss": 0.5188, "step": 14907 }, { "epoch": 19.08224, "grad_norm": 1.1645013093948364, "learning_rate": 2.0200080032012806e-05, "loss": 0.5306, "step": 14908 }, { "epoch": 19.08352, "grad_norm": 1.1035782098770142, "learning_rate": 2.0198079231692678e-05, "loss": 0.5419, "step": 14909 }, { "epoch": 19.0848, "grad_norm": 1.1250030994415283, "learning_rate": 2.019607843137255e-05, "loss": 0.4853, "step": 14910 }, { "epoch": 19.08608, "grad_norm": 1.019386887550354, "learning_rate": 2.019407763105242e-05, "loss": 0.4822, "step": 14911 }, { "epoch": 19.08736, "grad_norm": 1.0403432846069336, "learning_rate": 2.0192076830732293e-05, "loss": 0.4773, "step": 14912 }, { "epoch": 19.08864, "grad_norm": 1.073562741279602, "learning_rate": 2.0190076030412165e-05, "loss": 0.5333, "step": 14913 }, { "epoch": 19.08992, "grad_norm": 1.0300809144973755, "learning_rate": 2.0188075230092037e-05, "loss": 0.4463, "step": 14914 }, { "epoch": 19.0912, "grad_norm": 1.0957305431365967, "learning_rate": 2.018607442977191e-05, "loss": 0.5052, "step": 14915 }, { "epoch": 19.09248, "grad_norm": 1.155129313468933, "learning_rate": 2.0184073629451784e-05, "loss": 0.5286, "step": 14916 }, { "epoch": 19.09376, "grad_norm": 1.0918629169464111, "learning_rate": 2.0182072829131653e-05, "loss": 0.5577, "step": 14917 }, { "epoch": 19.09504, "grad_norm": 1.0622458457946777, "learning_rate": 2.0180072028811524e-05, "loss": 0.4989, "step": 14918 }, { "epoch": 19.09632, "grad_norm": 1.0885812044143677, "learning_rate": 2.0178071228491396e-05, "loss": 0.522, "step": 14919 }, { "epoch": 19.0976, "grad_norm": 1.0788886547088623, "learning_rate": 2.017607042817127e-05, "loss": 0.5262, "step": 14920 }, { "epoch": 19.09888, "grad_norm": 1.0732629299163818, "learning_rate": 2.017406962785114e-05, "loss": 0.4768, "step": 14921 }, { "epoch": 19.10016, "grad_norm": 1.0360231399536133, "learning_rate": 2.0172068827531012e-05, "loss": 0.4685, "step": 14922 }, { "epoch": 19.10144, "grad_norm": 1.0716302394866943, "learning_rate": 2.0170068027210887e-05, "loss": 0.497, "step": 14923 }, { "epoch": 19.10272, "grad_norm": 1.0575581789016724, "learning_rate": 2.016806722689076e-05, "loss": 0.4984, "step": 14924 }, { "epoch": 19.104, "grad_norm": 1.0178310871124268, "learning_rate": 2.0166066426570627e-05, "loss": 0.4571, "step": 14925 }, { "epoch": 19.10528, "grad_norm": 1.0144866704940796, "learning_rate": 2.01640656262505e-05, "loss": 0.4566, "step": 14926 }, { "epoch": 19.10656, "grad_norm": 1.0763506889343262, "learning_rate": 2.0162064825930374e-05, "loss": 0.4842, "step": 14927 }, { "epoch": 19.10784, "grad_norm": 1.0165890455245972, "learning_rate": 2.0160064025610246e-05, "loss": 0.4678, "step": 14928 }, { "epoch": 19.10912, "grad_norm": 1.0530028343200684, "learning_rate": 2.0158063225290115e-05, "loss": 0.4977, "step": 14929 }, { "epoch": 19.1104, "grad_norm": 1.109286904335022, "learning_rate": 2.015606242496999e-05, "loss": 0.5079, "step": 14930 }, { "epoch": 19.11168, "grad_norm": 1.081906795501709, "learning_rate": 2.0154061624649862e-05, "loss": 0.4783, "step": 14931 }, { "epoch": 19.11296, "grad_norm": 1.0961225032806396, "learning_rate": 2.0152060824329734e-05, "loss": 0.5479, "step": 14932 }, { "epoch": 19.11424, "grad_norm": 1.105312705039978, "learning_rate": 2.0150060024009602e-05, "loss": 0.5046, "step": 14933 }, { "epoch": 19.11552, "grad_norm": 1.146582841873169, "learning_rate": 2.0148059223689477e-05, "loss": 0.5447, "step": 14934 }, { "epoch": 19.1168, "grad_norm": 1.0598522424697876, "learning_rate": 2.014605842336935e-05, "loss": 0.4873, "step": 14935 }, { "epoch": 19.11808, "grad_norm": 1.0884562730789185, "learning_rate": 2.014405762304922e-05, "loss": 0.49, "step": 14936 }, { "epoch": 19.11936, "grad_norm": 1.1092275381088257, "learning_rate": 2.014205682272909e-05, "loss": 0.5157, "step": 14937 }, { "epoch": 19.12064, "grad_norm": 1.0648193359375, "learning_rate": 2.0140056022408965e-05, "loss": 0.4773, "step": 14938 }, { "epoch": 19.12192, "grad_norm": 1.0710915327072144, "learning_rate": 2.0138055222088837e-05, "loss": 0.5003, "step": 14939 }, { "epoch": 19.1232, "grad_norm": 1.0918967723846436, "learning_rate": 2.013605442176871e-05, "loss": 0.4891, "step": 14940 }, { "epoch": 19.12448, "grad_norm": 1.0287532806396484, "learning_rate": 2.013405362144858e-05, "loss": 0.4392, "step": 14941 }, { "epoch": 19.12576, "grad_norm": 1.050429105758667, "learning_rate": 2.0132052821128452e-05, "loss": 0.4599, "step": 14942 }, { "epoch": 19.12704, "grad_norm": 1.1075915098190308, "learning_rate": 2.0130052020808324e-05, "loss": 0.5304, "step": 14943 }, { "epoch": 19.12832, "grad_norm": 1.040389060974121, "learning_rate": 2.0128051220488196e-05, "loss": 0.5047, "step": 14944 }, { "epoch": 19.1296, "grad_norm": 1.0519245862960815, "learning_rate": 2.0126050420168068e-05, "loss": 0.4767, "step": 14945 }, { "epoch": 19.13088, "grad_norm": 1.1048765182495117, "learning_rate": 2.012404961984794e-05, "loss": 0.5269, "step": 14946 }, { "epoch": 19.13216, "grad_norm": 1.0859326124191284, "learning_rate": 2.012204881952781e-05, "loss": 0.5006, "step": 14947 }, { "epoch": 19.13344, "grad_norm": 1.0915594100952148, "learning_rate": 2.0120048019207683e-05, "loss": 0.4768, "step": 14948 }, { "epoch": 19.13472, "grad_norm": 1.1056619882583618, "learning_rate": 2.0118047218887555e-05, "loss": 0.5421, "step": 14949 }, { "epoch": 19.136, "grad_norm": 1.124221920967102, "learning_rate": 2.0116046418567427e-05, "loss": 0.4826, "step": 14950 }, { "epoch": 19.13728, "grad_norm": 1.0759928226470947, "learning_rate": 2.01140456182473e-05, "loss": 0.5218, "step": 14951 }, { "epoch": 19.13856, "grad_norm": 1.0738402605056763, "learning_rate": 2.011204481792717e-05, "loss": 0.4804, "step": 14952 }, { "epoch": 19.13984, "grad_norm": 1.070008397102356, "learning_rate": 2.0110044017607043e-05, "loss": 0.5434, "step": 14953 }, { "epoch": 19.14112, "grad_norm": 1.1064351797103882, "learning_rate": 2.0108043217286915e-05, "loss": 0.5187, "step": 14954 }, { "epoch": 19.1424, "grad_norm": 1.0281625986099243, "learning_rate": 2.010604241696679e-05, "loss": 0.4896, "step": 14955 }, { "epoch": 19.14368, "grad_norm": 1.088550329208374, "learning_rate": 2.010404161664666e-05, "loss": 0.5228, "step": 14956 }, { "epoch": 19.14496, "grad_norm": 1.1894739866256714, "learning_rate": 2.010204081632653e-05, "loss": 0.5813, "step": 14957 }, { "epoch": 19.14624, "grad_norm": 1.1209919452667236, "learning_rate": 2.0100040016006402e-05, "loss": 0.483, "step": 14958 }, { "epoch": 19.14752, "grad_norm": 1.055732011795044, "learning_rate": 2.0098039215686277e-05, "loss": 0.5229, "step": 14959 }, { "epoch": 19.1488, "grad_norm": 1.0904511213302612, "learning_rate": 2.0096038415366146e-05, "loss": 0.521, "step": 14960 }, { "epoch": 19.15008, "grad_norm": 1.0930554866790771, "learning_rate": 2.0094037615046018e-05, "loss": 0.4945, "step": 14961 }, { "epoch": 19.15136, "grad_norm": 1.0355908870697021, "learning_rate": 2.0092036814725893e-05, "loss": 0.5065, "step": 14962 }, { "epoch": 19.15264, "grad_norm": 1.0164462327957153, "learning_rate": 2.0090036014405765e-05, "loss": 0.4642, "step": 14963 }, { "epoch": 19.15392, "grad_norm": 1.0890769958496094, "learning_rate": 2.0088035214085633e-05, "loss": 0.4984, "step": 14964 }, { "epoch": 19.1552, "grad_norm": 1.0881518125534058, "learning_rate": 2.0086034413765505e-05, "loss": 0.5328, "step": 14965 }, { "epoch": 19.15648, "grad_norm": 1.0909157991409302, "learning_rate": 2.008403361344538e-05, "loss": 0.4777, "step": 14966 }, { "epoch": 19.15776, "grad_norm": 1.0730060338974, "learning_rate": 2.0082032813125252e-05, "loss": 0.4747, "step": 14967 }, { "epoch": 19.15904, "grad_norm": 1.0788432359695435, "learning_rate": 2.008003201280512e-05, "loss": 0.4639, "step": 14968 }, { "epoch": 19.16032, "grad_norm": 1.0355757474899292, "learning_rate": 2.0078031212484996e-05, "loss": 0.4812, "step": 14969 }, { "epoch": 19.1616, "grad_norm": 1.1290099620819092, "learning_rate": 2.0076030412164868e-05, "loss": 0.5178, "step": 14970 }, { "epoch": 19.16288, "grad_norm": 1.0541107654571533, "learning_rate": 2.007402961184474e-05, "loss": 0.4936, "step": 14971 }, { "epoch": 19.16416, "grad_norm": 1.0474649667739868, "learning_rate": 2.0072028811524608e-05, "loss": 0.5262, "step": 14972 }, { "epoch": 19.16544, "grad_norm": 1.0278681516647339, "learning_rate": 2.0070028011204483e-05, "loss": 0.4955, "step": 14973 }, { "epoch": 19.16672, "grad_norm": 1.0625478029251099, "learning_rate": 2.0068027210884355e-05, "loss": 0.4842, "step": 14974 }, { "epoch": 19.168, "grad_norm": 1.0970510244369507, "learning_rate": 2.0066026410564227e-05, "loss": 0.5195, "step": 14975 }, { "epoch": 19.16928, "grad_norm": 1.0882428884506226, "learning_rate": 2.00640256102441e-05, "loss": 0.5226, "step": 14976 }, { "epoch": 19.17056, "grad_norm": 1.0383604764938354, "learning_rate": 2.006202480992397e-05, "loss": 0.4514, "step": 14977 }, { "epoch": 19.17184, "grad_norm": 1.1663484573364258, "learning_rate": 2.0060024009603843e-05, "loss": 0.5614, "step": 14978 }, { "epoch": 19.17312, "grad_norm": 1.0208680629730225, "learning_rate": 2.0058023209283714e-05, "loss": 0.4491, "step": 14979 }, { "epoch": 19.1744, "grad_norm": 1.0704641342163086, "learning_rate": 2.0056022408963586e-05, "loss": 0.5215, "step": 14980 }, { "epoch": 19.17568, "grad_norm": 1.1021260023117065, "learning_rate": 2.0054021608643458e-05, "loss": 0.4645, "step": 14981 }, { "epoch": 19.17696, "grad_norm": 1.0811761617660522, "learning_rate": 2.005202080832333e-05, "loss": 0.5263, "step": 14982 }, { "epoch": 19.17824, "grad_norm": 1.1112900972366333, "learning_rate": 2.0050020008003205e-05, "loss": 0.4977, "step": 14983 }, { "epoch": 19.17952, "grad_norm": 1.1169459819793701, "learning_rate": 2.0048019207683074e-05, "loss": 0.4697, "step": 14984 }, { "epoch": 19.1808, "grad_norm": 1.056740164756775, "learning_rate": 2.0046018407362946e-05, "loss": 0.4776, "step": 14985 }, { "epoch": 19.18208, "grad_norm": 1.0898573398590088, "learning_rate": 2.0044017607042817e-05, "loss": 0.5211, "step": 14986 }, { "epoch": 19.18336, "grad_norm": 1.0396350622177124, "learning_rate": 2.0042016806722693e-05, "loss": 0.4809, "step": 14987 }, { "epoch": 19.18464, "grad_norm": 1.1024669408798218, "learning_rate": 2.004001600640256e-05, "loss": 0.4961, "step": 14988 }, { "epoch": 19.18592, "grad_norm": 1.1737849712371826, "learning_rate": 2.0038015206082433e-05, "loss": 0.5079, "step": 14989 }, { "epoch": 19.1872, "grad_norm": 1.1294634342193604, "learning_rate": 2.0036014405762308e-05, "loss": 0.5342, "step": 14990 }, { "epoch": 19.18848, "grad_norm": 1.021791696548462, "learning_rate": 2.003401360544218e-05, "loss": 0.497, "step": 14991 }, { "epoch": 19.18976, "grad_norm": 1.0725295543670654, "learning_rate": 2.003201280512205e-05, "loss": 0.4671, "step": 14992 }, { "epoch": 19.19104, "grad_norm": 1.098502278327942, "learning_rate": 2.003001200480192e-05, "loss": 0.5108, "step": 14993 }, { "epoch": 19.19232, "grad_norm": 1.060045599937439, "learning_rate": 2.0028011204481796e-05, "loss": 0.4673, "step": 14994 }, { "epoch": 19.1936, "grad_norm": 1.056288242340088, "learning_rate": 2.0026010404161667e-05, "loss": 0.4897, "step": 14995 }, { "epoch": 19.19488, "grad_norm": 1.0819458961486816, "learning_rate": 2.0024009603841536e-05, "loss": 0.5011, "step": 14996 }, { "epoch": 19.19616, "grad_norm": 1.0354195833206177, "learning_rate": 2.0022008803521408e-05, "loss": 0.4803, "step": 14997 }, { "epoch": 19.19744, "grad_norm": 1.0913057327270508, "learning_rate": 2.0020008003201283e-05, "loss": 0.4986, "step": 14998 }, { "epoch": 19.19872, "grad_norm": 1.101427674293518, "learning_rate": 2.0018007202881155e-05, "loss": 0.4715, "step": 14999 }, { "epoch": 19.2, "grad_norm": 0.9858012199401855, "learning_rate": 2.0016006402561023e-05, "loss": 0.4406, "step": 15000 }, { "epoch": 19.20128, "grad_norm": 1.187526822090149, "learning_rate": 2.00140056022409e-05, "loss": 0.5555, "step": 15001 }, { "epoch": 19.20256, "grad_norm": 1.087991714477539, "learning_rate": 2.001200480192077e-05, "loss": 0.5228, "step": 15002 }, { "epoch": 19.20384, "grad_norm": 1.0227059125900269, "learning_rate": 2.0010004001600642e-05, "loss": 0.4722, "step": 15003 }, { "epoch": 19.20512, "grad_norm": 1.0761233568191528, "learning_rate": 2.000800320128051e-05, "loss": 0.5027, "step": 15004 }, { "epoch": 19.2064, "grad_norm": 1.0522879362106323, "learning_rate": 2.0006002400960386e-05, "loss": 0.5058, "step": 15005 }, { "epoch": 19.20768, "grad_norm": 1.104921579360962, "learning_rate": 2.0004001600640258e-05, "loss": 0.4954, "step": 15006 }, { "epoch": 19.20896, "grad_norm": 1.0269452333450317, "learning_rate": 2.000200080032013e-05, "loss": 0.4919, "step": 15007 }, { "epoch": 19.21024, "grad_norm": 1.1063481569290161, "learning_rate": 2e-05, "loss": 0.5197, "step": 15008 }, { "epoch": 19.21152, "grad_norm": 1.0551246404647827, "learning_rate": 1.9997999199679873e-05, "loss": 0.5092, "step": 15009 }, { "epoch": 19.2128, "grad_norm": 1.0208076238632202, "learning_rate": 1.9995998399359745e-05, "loss": 0.4658, "step": 15010 }, { "epoch": 19.21408, "grad_norm": 1.0514510869979858, "learning_rate": 1.9993997599039617e-05, "loss": 0.4883, "step": 15011 }, { "epoch": 19.21536, "grad_norm": 1.0635521411895752, "learning_rate": 1.999199679871949e-05, "loss": 0.4738, "step": 15012 }, { "epoch": 19.21664, "grad_norm": 1.0084950923919678, "learning_rate": 1.998999599839936e-05, "loss": 0.4461, "step": 15013 }, { "epoch": 19.21792, "grad_norm": 1.0877736806869507, "learning_rate": 1.9987995198079233e-05, "loss": 0.5056, "step": 15014 }, { "epoch": 19.2192, "grad_norm": 1.0358392000198364, "learning_rate": 1.9985994397759105e-05, "loss": 0.4948, "step": 15015 }, { "epoch": 19.22048, "grad_norm": 1.0706851482391357, "learning_rate": 1.9983993597438976e-05, "loss": 0.5167, "step": 15016 }, { "epoch": 19.22176, "grad_norm": 1.0379141569137573, "learning_rate": 1.998199279711885e-05, "loss": 0.5038, "step": 15017 }, { "epoch": 19.22304, "grad_norm": 1.045020580291748, "learning_rate": 1.997999199679872e-05, "loss": 0.4624, "step": 15018 }, { "epoch": 19.22432, "grad_norm": 1.0447577238082886, "learning_rate": 1.9977991196478592e-05, "loss": 0.494, "step": 15019 }, { "epoch": 19.2256, "grad_norm": 1.057941198348999, "learning_rate": 1.9975990396158464e-05, "loss": 0.4793, "step": 15020 }, { "epoch": 19.22688, "grad_norm": 1.0662115812301636, "learning_rate": 1.9973989595838336e-05, "loss": 0.4976, "step": 15021 }, { "epoch": 19.22816, "grad_norm": 1.040855050086975, "learning_rate": 1.997198879551821e-05, "loss": 0.4681, "step": 15022 }, { "epoch": 19.22944, "grad_norm": 1.0539730787277222, "learning_rate": 1.996998799519808e-05, "loss": 0.4807, "step": 15023 }, { "epoch": 19.23072, "grad_norm": 1.0343507528305054, "learning_rate": 1.996798719487795e-05, "loss": 0.4931, "step": 15024 }, { "epoch": 19.232, "grad_norm": 1.0538980960845947, "learning_rate": 1.9965986394557823e-05, "loss": 0.4921, "step": 15025 }, { "epoch": 19.23328, "grad_norm": 1.0909112691879272, "learning_rate": 1.99639855942377e-05, "loss": 0.4804, "step": 15026 }, { "epoch": 19.23456, "grad_norm": 0.9821348786354065, "learning_rate": 1.9961984793917567e-05, "loss": 0.445, "step": 15027 }, { "epoch": 19.23584, "grad_norm": 0.9998717904090881, "learning_rate": 1.995998399359744e-05, "loss": 0.4519, "step": 15028 }, { "epoch": 19.23712, "grad_norm": 1.1057769060134888, "learning_rate": 1.9957983193277314e-05, "loss": 0.5291, "step": 15029 }, { "epoch": 19.2384, "grad_norm": 1.1260607242584229, "learning_rate": 1.9955982392957186e-05, "loss": 0.4936, "step": 15030 }, { "epoch": 19.23968, "grad_norm": 1.0743900537490845, "learning_rate": 1.9953981592637054e-05, "loss": 0.5174, "step": 15031 }, { "epoch": 19.24096, "grad_norm": 1.0485973358154297, "learning_rate": 1.9951980792316926e-05, "loss": 0.4953, "step": 15032 }, { "epoch": 19.24224, "grad_norm": 1.104485273361206, "learning_rate": 1.99499799919968e-05, "loss": 0.5035, "step": 15033 }, { "epoch": 19.24352, "grad_norm": 1.1098350286483765, "learning_rate": 1.9947979191676673e-05, "loss": 0.5428, "step": 15034 }, { "epoch": 19.2448, "grad_norm": 1.1056627035140991, "learning_rate": 1.9945978391356542e-05, "loss": 0.5034, "step": 15035 }, { "epoch": 19.24608, "grad_norm": 1.1753102540969849, "learning_rate": 1.9943977591036417e-05, "loss": 0.559, "step": 15036 }, { "epoch": 19.24736, "grad_norm": 1.1483614444732666, "learning_rate": 1.994197679071629e-05, "loss": 0.5179, "step": 15037 }, { "epoch": 19.24864, "grad_norm": 1.0651777982711792, "learning_rate": 1.993997599039616e-05, "loss": 0.4794, "step": 15038 }, { "epoch": 19.24992, "grad_norm": 1.1044447422027588, "learning_rate": 1.993797519007603e-05, "loss": 0.4939, "step": 15039 }, { "epoch": 19.2512, "grad_norm": 1.1543649435043335, "learning_rate": 1.9935974389755904e-05, "loss": 0.5337, "step": 15040 }, { "epoch": 19.25248, "grad_norm": 1.0800881385803223, "learning_rate": 1.9933973589435776e-05, "loss": 0.4684, "step": 15041 }, { "epoch": 19.25376, "grad_norm": 1.0753213167190552, "learning_rate": 1.9931972789115648e-05, "loss": 0.5192, "step": 15042 }, { "epoch": 19.25504, "grad_norm": 1.0312221050262451, "learning_rate": 1.992997198879552e-05, "loss": 0.484, "step": 15043 }, { "epoch": 19.25632, "grad_norm": 1.0647677183151245, "learning_rate": 1.9927971188475392e-05, "loss": 0.4847, "step": 15044 }, { "epoch": 19.2576, "grad_norm": 1.090179443359375, "learning_rate": 1.9925970388155264e-05, "loss": 0.4858, "step": 15045 }, { "epoch": 19.25888, "grad_norm": 1.055359125137329, "learning_rate": 1.9923969587835136e-05, "loss": 0.46, "step": 15046 }, { "epoch": 19.26016, "grad_norm": 1.0518981218338013, "learning_rate": 1.9921968787515007e-05, "loss": 0.4574, "step": 15047 }, { "epoch": 19.26144, "grad_norm": 1.1169776916503906, "learning_rate": 1.991996798719488e-05, "loss": 0.4914, "step": 15048 }, { "epoch": 19.26272, "grad_norm": 1.026633620262146, "learning_rate": 1.991796718687475e-05, "loss": 0.4853, "step": 15049 }, { "epoch": 19.264, "grad_norm": 1.0704096555709839, "learning_rate": 1.9915966386554623e-05, "loss": 0.5028, "step": 15050 }, { "epoch": 19.26528, "grad_norm": 1.075821042060852, "learning_rate": 1.9913965586234495e-05, "loss": 0.4707, "step": 15051 }, { "epoch": 19.26656, "grad_norm": 1.025315523147583, "learning_rate": 1.9911964785914367e-05, "loss": 0.4675, "step": 15052 }, { "epoch": 19.26784, "grad_norm": 1.0183051824569702, "learning_rate": 1.990996398559424e-05, "loss": 0.4757, "step": 15053 }, { "epoch": 19.26912, "grad_norm": 1.0708404779434204, "learning_rate": 1.990796318527411e-05, "loss": 0.5087, "step": 15054 }, { "epoch": 19.2704, "grad_norm": 1.080012559890747, "learning_rate": 1.9905962384953982e-05, "loss": 0.5115, "step": 15055 }, { "epoch": 19.27168, "grad_norm": 1.0551676750183105, "learning_rate": 1.9903961584633854e-05, "loss": 0.4924, "step": 15056 }, { "epoch": 19.27296, "grad_norm": 1.0453039407730103, "learning_rate": 1.9901960784313726e-05, "loss": 0.501, "step": 15057 }, { "epoch": 19.27424, "grad_norm": 1.0461045503616333, "learning_rate": 1.9899959983993598e-05, "loss": 0.4755, "step": 15058 }, { "epoch": 19.27552, "grad_norm": 1.0412358045578003, "learning_rate": 1.989795918367347e-05, "loss": 0.4581, "step": 15059 }, { "epoch": 19.2768, "grad_norm": 1.027969479560852, "learning_rate": 1.989595838335334e-05, "loss": 0.4646, "step": 15060 }, { "epoch": 19.27808, "grad_norm": 1.1006345748901367, "learning_rate": 1.9893957583033217e-05, "loss": 0.4882, "step": 15061 }, { "epoch": 19.27936, "grad_norm": 1.0730010271072388, "learning_rate": 1.9891956782713085e-05, "loss": 0.4754, "step": 15062 }, { "epoch": 19.28064, "grad_norm": 0.998073399066925, "learning_rate": 1.9889955982392957e-05, "loss": 0.4791, "step": 15063 }, { "epoch": 19.28192, "grad_norm": 1.0365452766418457, "learning_rate": 1.988795518207283e-05, "loss": 0.4787, "step": 15064 }, { "epoch": 19.2832, "grad_norm": 1.0752921104431152, "learning_rate": 1.9885954381752704e-05, "loss": 0.5146, "step": 15065 }, { "epoch": 19.28448, "grad_norm": 0.9972426295280457, "learning_rate": 1.9883953581432573e-05, "loss": 0.4596, "step": 15066 }, { "epoch": 19.28576, "grad_norm": 1.1111432313919067, "learning_rate": 1.9881952781112445e-05, "loss": 0.4724, "step": 15067 }, { "epoch": 19.28704, "grad_norm": 1.0094146728515625, "learning_rate": 1.987995198079232e-05, "loss": 0.4806, "step": 15068 }, { "epoch": 19.28832, "grad_norm": 1.0732576847076416, "learning_rate": 1.987795118047219e-05, "loss": 0.4957, "step": 15069 }, { "epoch": 19.2896, "grad_norm": 1.131595253944397, "learning_rate": 1.987595038015206e-05, "loss": 0.518, "step": 15070 }, { "epoch": 19.29088, "grad_norm": 1.0386974811553955, "learning_rate": 1.9873949579831932e-05, "loss": 0.4767, "step": 15071 }, { "epoch": 19.29216, "grad_norm": 1.0035613775253296, "learning_rate": 1.9871948779511807e-05, "loss": 0.4594, "step": 15072 }, { "epoch": 19.29344, "grad_norm": 1.0777531862258911, "learning_rate": 1.986994797919168e-05, "loss": 0.4983, "step": 15073 }, { "epoch": 19.29472, "grad_norm": 1.0419715642929077, "learning_rate": 1.9867947178871548e-05, "loss": 0.4667, "step": 15074 }, { "epoch": 19.296, "grad_norm": 1.090698003768921, "learning_rate": 1.9865946378551423e-05, "loss": 0.5553, "step": 15075 }, { "epoch": 19.29728, "grad_norm": 1.0526976585388184, "learning_rate": 1.9863945578231295e-05, "loss": 0.5112, "step": 15076 }, { "epoch": 19.29856, "grad_norm": 1.0763030052185059, "learning_rate": 1.9861944777911167e-05, "loss": 0.4721, "step": 15077 }, { "epoch": 19.29984, "grad_norm": 1.032845377922058, "learning_rate": 1.9859943977591035e-05, "loss": 0.5084, "step": 15078 }, { "epoch": 19.30112, "grad_norm": 1.0398248434066772, "learning_rate": 1.985794317727091e-05, "loss": 0.4519, "step": 15079 }, { "epoch": 19.3024, "grad_norm": 1.0678154230117798, "learning_rate": 1.9855942376950782e-05, "loss": 0.5367, "step": 15080 }, { "epoch": 19.30368, "grad_norm": 1.0743778944015503, "learning_rate": 1.9853941576630654e-05, "loss": 0.4997, "step": 15081 }, { "epoch": 19.30496, "grad_norm": 1.0013165473937988, "learning_rate": 1.9851940776310526e-05, "loss": 0.4498, "step": 15082 }, { "epoch": 19.30624, "grad_norm": 1.052975058555603, "learning_rate": 1.9849939975990398e-05, "loss": 0.4922, "step": 15083 }, { "epoch": 19.30752, "grad_norm": 1.0732128620147705, "learning_rate": 1.984793917567027e-05, "loss": 0.4821, "step": 15084 }, { "epoch": 19.3088, "grad_norm": 1.0122027397155762, "learning_rate": 1.984593837535014e-05, "loss": 0.4702, "step": 15085 }, { "epoch": 19.31008, "grad_norm": 1.1038622856140137, "learning_rate": 1.9843937575030013e-05, "loss": 0.5435, "step": 15086 }, { "epoch": 19.31136, "grad_norm": 1.1267859935760498, "learning_rate": 1.9841936774709885e-05, "loss": 0.5358, "step": 15087 }, { "epoch": 19.31264, "grad_norm": 1.109407663345337, "learning_rate": 1.9839935974389757e-05, "loss": 0.4993, "step": 15088 }, { "epoch": 19.31392, "grad_norm": 1.0586917400360107, "learning_rate": 1.983793517406963e-05, "loss": 0.541, "step": 15089 }, { "epoch": 19.3152, "grad_norm": 1.0636937618255615, "learning_rate": 1.98359343737495e-05, "loss": 0.5048, "step": 15090 }, { "epoch": 19.31648, "grad_norm": 1.0607376098632812, "learning_rate": 1.9833933573429373e-05, "loss": 0.485, "step": 15091 }, { "epoch": 19.31776, "grad_norm": 1.100722074508667, "learning_rate": 1.9831932773109244e-05, "loss": 0.5225, "step": 15092 }, { "epoch": 19.31904, "grad_norm": 1.06232750415802, "learning_rate": 1.9829931972789116e-05, "loss": 0.5353, "step": 15093 }, { "epoch": 19.32032, "grad_norm": 1.0231244564056396, "learning_rate": 1.9827931172468988e-05, "loss": 0.5138, "step": 15094 }, { "epoch": 19.3216, "grad_norm": 1.1289072036743164, "learning_rate": 1.982593037214886e-05, "loss": 0.5095, "step": 15095 }, { "epoch": 19.32288, "grad_norm": 1.0838286876678467, "learning_rate": 1.9823929571828735e-05, "loss": 0.5244, "step": 15096 }, { "epoch": 19.32416, "grad_norm": 0.9947683811187744, "learning_rate": 1.9821928771508604e-05, "loss": 0.4371, "step": 15097 }, { "epoch": 19.32544, "grad_norm": 0.9954429864883423, "learning_rate": 1.9819927971188476e-05, "loss": 0.4654, "step": 15098 }, { "epoch": 19.32672, "grad_norm": 0.9967473149299622, "learning_rate": 1.9817927170868347e-05, "loss": 0.4414, "step": 15099 }, { "epoch": 19.328, "grad_norm": 1.0796631574630737, "learning_rate": 1.9815926370548223e-05, "loss": 0.4671, "step": 15100 }, { "epoch": 19.32928, "grad_norm": 1.1318116188049316, "learning_rate": 1.981392557022809e-05, "loss": 0.487, "step": 15101 }, { "epoch": 19.33056, "grad_norm": 1.1136324405670166, "learning_rate": 1.9811924769907963e-05, "loss": 0.502, "step": 15102 }, { "epoch": 19.33184, "grad_norm": 1.093737244606018, "learning_rate": 1.9809923969587835e-05, "loss": 0.5176, "step": 15103 }, { "epoch": 19.33312, "grad_norm": 1.1789205074310303, "learning_rate": 1.980792316926771e-05, "loss": 0.494, "step": 15104 }, { "epoch": 19.3344, "grad_norm": 1.1485955715179443, "learning_rate": 1.980592236894758e-05, "loss": 0.5493, "step": 15105 }, { "epoch": 19.33568, "grad_norm": 1.1409004926681519, "learning_rate": 1.980392156862745e-05, "loss": 0.5134, "step": 15106 }, { "epoch": 19.33696, "grad_norm": 1.1257143020629883, "learning_rate": 1.9801920768307326e-05, "loss": 0.5045, "step": 15107 }, { "epoch": 19.33824, "grad_norm": 1.071676254272461, "learning_rate": 1.9799919967987197e-05, "loss": 0.4601, "step": 15108 }, { "epoch": 19.33952, "grad_norm": 1.0796996355056763, "learning_rate": 1.9797919167667066e-05, "loss": 0.5009, "step": 15109 }, { "epoch": 19.3408, "grad_norm": 1.1070489883422852, "learning_rate": 1.9795918367346938e-05, "loss": 0.5274, "step": 15110 }, { "epoch": 19.34208, "grad_norm": 1.0755043029785156, "learning_rate": 1.9793917567026813e-05, "loss": 0.5438, "step": 15111 }, { "epoch": 19.34336, "grad_norm": 1.077373743057251, "learning_rate": 1.9791916766706685e-05, "loss": 0.4636, "step": 15112 }, { "epoch": 19.34464, "grad_norm": 1.0781618356704712, "learning_rate": 1.9789915966386553e-05, "loss": 0.5117, "step": 15113 }, { "epoch": 19.34592, "grad_norm": 1.0397669076919556, "learning_rate": 1.978791516606643e-05, "loss": 0.4753, "step": 15114 }, { "epoch": 19.3472, "grad_norm": 1.0098005533218384, "learning_rate": 1.97859143657463e-05, "loss": 0.4595, "step": 15115 }, { "epoch": 19.34848, "grad_norm": 1.054352879524231, "learning_rate": 1.9783913565426172e-05, "loss": 0.5363, "step": 15116 }, { "epoch": 19.34976, "grad_norm": 1.1141222715377808, "learning_rate": 1.978191276510604e-05, "loss": 0.5044, "step": 15117 }, { "epoch": 19.35104, "grad_norm": 1.093037486076355, "learning_rate": 1.9779911964785916e-05, "loss": 0.5358, "step": 15118 }, { "epoch": 19.35232, "grad_norm": 1.0386536121368408, "learning_rate": 1.9777911164465788e-05, "loss": 0.4794, "step": 15119 }, { "epoch": 19.3536, "grad_norm": 1.0772889852523804, "learning_rate": 1.977591036414566e-05, "loss": 0.4993, "step": 15120 }, { "epoch": 19.35488, "grad_norm": 1.0881242752075195, "learning_rate": 1.977390956382553e-05, "loss": 0.5044, "step": 15121 }, { "epoch": 19.35616, "grad_norm": 1.0567618608474731, "learning_rate": 1.9771908763505403e-05, "loss": 0.497, "step": 15122 }, { "epoch": 19.35744, "grad_norm": 1.0052014589309692, "learning_rate": 1.9769907963185275e-05, "loss": 0.4805, "step": 15123 }, { "epoch": 19.35872, "grad_norm": 1.0756340026855469, "learning_rate": 1.9767907162865147e-05, "loss": 0.4962, "step": 15124 }, { "epoch": 19.36, "grad_norm": 1.0965667963027954, "learning_rate": 1.976590636254502e-05, "loss": 0.4453, "step": 15125 }, { "epoch": 19.36128, "grad_norm": 1.1271300315856934, "learning_rate": 1.976390556222489e-05, "loss": 0.4642, "step": 15126 }, { "epoch": 19.36256, "grad_norm": 1.1453865766525269, "learning_rate": 1.9761904761904763e-05, "loss": 0.5407, "step": 15127 }, { "epoch": 19.36384, "grad_norm": 1.1263774633407593, "learning_rate": 1.9759903961584635e-05, "loss": 0.5207, "step": 15128 }, { "epoch": 19.36512, "grad_norm": 1.1071971654891968, "learning_rate": 1.9757903161264506e-05, "loss": 0.5006, "step": 15129 }, { "epoch": 19.3664, "grad_norm": 1.0227842330932617, "learning_rate": 1.975590236094438e-05, "loss": 0.4618, "step": 15130 }, { "epoch": 19.36768, "grad_norm": 1.094109058380127, "learning_rate": 1.975390156062425e-05, "loss": 0.4808, "step": 15131 }, { "epoch": 19.36896, "grad_norm": 1.1060868501663208, "learning_rate": 1.9751900760304122e-05, "loss": 0.4996, "step": 15132 }, { "epoch": 19.37024, "grad_norm": 1.075696587562561, "learning_rate": 1.9749899959983994e-05, "loss": 0.5082, "step": 15133 }, { "epoch": 19.37152, "grad_norm": 1.0798813104629517, "learning_rate": 1.9747899159663866e-05, "loss": 0.4954, "step": 15134 }, { "epoch": 19.3728, "grad_norm": 1.1250864267349243, "learning_rate": 1.974589835934374e-05, "loss": 0.5539, "step": 15135 }, { "epoch": 19.37408, "grad_norm": 1.0421397686004639, "learning_rate": 1.974389755902361e-05, "loss": 0.4999, "step": 15136 }, { "epoch": 19.37536, "grad_norm": 1.0826746225357056, "learning_rate": 1.974189675870348e-05, "loss": 0.5566, "step": 15137 }, { "epoch": 19.37664, "grad_norm": 1.078742504119873, "learning_rate": 1.9739895958383353e-05, "loss": 0.5022, "step": 15138 }, { "epoch": 19.37792, "grad_norm": 1.0308053493499756, "learning_rate": 1.973789515806323e-05, "loss": 0.4586, "step": 15139 }, { "epoch": 19.3792, "grad_norm": 1.0350420475006104, "learning_rate": 1.9735894357743097e-05, "loss": 0.5138, "step": 15140 }, { "epoch": 19.38048, "grad_norm": 1.1027657985687256, "learning_rate": 1.973389355742297e-05, "loss": 0.538, "step": 15141 }, { "epoch": 19.38176, "grad_norm": 1.0577800273895264, "learning_rate": 1.9731892757102844e-05, "loss": 0.522, "step": 15142 }, { "epoch": 19.38304, "grad_norm": 1.043058156967163, "learning_rate": 1.9729891956782716e-05, "loss": 0.467, "step": 15143 }, { "epoch": 19.38432, "grad_norm": 1.0484575033187866, "learning_rate": 1.9727891156462584e-05, "loss": 0.524, "step": 15144 }, { "epoch": 19.3856, "grad_norm": 1.0222641229629517, "learning_rate": 1.9725890356142456e-05, "loss": 0.5035, "step": 15145 }, { "epoch": 19.38688, "grad_norm": 0.9348340630531311, "learning_rate": 1.972388955582233e-05, "loss": 0.4127, "step": 15146 }, { "epoch": 19.38816, "grad_norm": 1.0297220945358276, "learning_rate": 1.9721888755502203e-05, "loss": 0.4698, "step": 15147 }, { "epoch": 19.38944, "grad_norm": 1.1119678020477295, "learning_rate": 1.9719887955182072e-05, "loss": 0.5864, "step": 15148 }, { "epoch": 19.39072, "grad_norm": 1.0301072597503662, "learning_rate": 1.9717887154861947e-05, "loss": 0.4696, "step": 15149 }, { "epoch": 19.392, "grad_norm": 1.0419156551361084, "learning_rate": 1.971588635454182e-05, "loss": 0.4809, "step": 15150 }, { "epoch": 19.39328, "grad_norm": 1.1309643983840942, "learning_rate": 1.971388555422169e-05, "loss": 0.5239, "step": 15151 }, { "epoch": 19.39456, "grad_norm": 1.0294731855392456, "learning_rate": 1.971188475390156e-05, "loss": 0.4653, "step": 15152 }, { "epoch": 19.39584, "grad_norm": 1.0680971145629883, "learning_rate": 1.9709883953581434e-05, "loss": 0.5049, "step": 15153 }, { "epoch": 19.39712, "grad_norm": 1.0883816480636597, "learning_rate": 1.9707883153261306e-05, "loss": 0.4632, "step": 15154 }, { "epoch": 19.3984, "grad_norm": 1.0957615375518799, "learning_rate": 1.9705882352941178e-05, "loss": 0.4984, "step": 15155 }, { "epoch": 19.39968, "grad_norm": 1.0659648180007935, "learning_rate": 1.970388155262105e-05, "loss": 0.5045, "step": 15156 }, { "epoch": 19.40096, "grad_norm": 1.0844171047210693, "learning_rate": 1.9701880752300922e-05, "loss": 0.4732, "step": 15157 }, { "epoch": 19.40224, "grad_norm": 1.1332968473434448, "learning_rate": 1.9699879951980794e-05, "loss": 0.5328, "step": 15158 }, { "epoch": 19.40352, "grad_norm": 1.1378060579299927, "learning_rate": 1.9697879151660666e-05, "loss": 0.5104, "step": 15159 }, { "epoch": 19.4048, "grad_norm": 1.137235164642334, "learning_rate": 1.9695878351340537e-05, "loss": 0.5345, "step": 15160 }, { "epoch": 19.40608, "grad_norm": 1.0622361898422241, "learning_rate": 1.969387755102041e-05, "loss": 0.4613, "step": 15161 }, { "epoch": 19.40736, "grad_norm": 1.0199161767959595, "learning_rate": 1.969187675070028e-05, "loss": 0.4496, "step": 15162 }, { "epoch": 19.40864, "grad_norm": 1.075202226638794, "learning_rate": 1.9689875950380153e-05, "loss": 0.521, "step": 15163 }, { "epoch": 19.40992, "grad_norm": 1.0620388984680176, "learning_rate": 1.9687875150060025e-05, "loss": 0.4758, "step": 15164 }, { "epoch": 19.4112, "grad_norm": 1.073757529258728, "learning_rate": 1.9685874349739897e-05, "loss": 0.4867, "step": 15165 }, { "epoch": 19.41248, "grad_norm": 1.063657522201538, "learning_rate": 1.968387354941977e-05, "loss": 0.5093, "step": 15166 }, { "epoch": 19.41376, "grad_norm": 1.1278645992279053, "learning_rate": 1.968187274909964e-05, "loss": 0.4889, "step": 15167 }, { "epoch": 19.41504, "grad_norm": 1.0807961225509644, "learning_rate": 1.9679871948779512e-05, "loss": 0.5224, "step": 15168 }, { "epoch": 19.41632, "grad_norm": 1.0827388763427734, "learning_rate": 1.9677871148459384e-05, "loss": 0.5286, "step": 15169 }, { "epoch": 19.4176, "grad_norm": 1.0594857931137085, "learning_rate": 1.9675870348139256e-05, "loss": 0.4851, "step": 15170 }, { "epoch": 19.41888, "grad_norm": 1.0610501766204834, "learning_rate": 1.9673869547819128e-05, "loss": 0.4941, "step": 15171 }, { "epoch": 19.42016, "grad_norm": 1.06877863407135, "learning_rate": 1.9671868747499e-05, "loss": 0.4593, "step": 15172 }, { "epoch": 19.42144, "grad_norm": 1.0728821754455566, "learning_rate": 1.966986794717887e-05, "loss": 0.5086, "step": 15173 }, { "epoch": 19.422719999999998, "grad_norm": 1.062900185585022, "learning_rate": 1.9667867146858747e-05, "loss": 0.4906, "step": 15174 }, { "epoch": 19.424, "grad_norm": 1.0392909049987793, "learning_rate": 1.9665866346538615e-05, "loss": 0.4578, "step": 15175 }, { "epoch": 19.42528, "grad_norm": 1.0775206089019775, "learning_rate": 1.9663865546218487e-05, "loss": 0.4938, "step": 15176 }, { "epoch": 19.42656, "grad_norm": 1.0761048793792725, "learning_rate": 1.966186474589836e-05, "loss": 0.5025, "step": 15177 }, { "epoch": 19.42784, "grad_norm": 1.1409354209899902, "learning_rate": 1.9659863945578234e-05, "loss": 0.545, "step": 15178 }, { "epoch": 19.42912, "grad_norm": 1.0487301349639893, "learning_rate": 1.9657863145258103e-05, "loss": 0.4613, "step": 15179 }, { "epoch": 19.4304, "grad_norm": 1.12277090549469, "learning_rate": 1.9655862344937975e-05, "loss": 0.5116, "step": 15180 }, { "epoch": 19.43168, "grad_norm": 1.0349456071853638, "learning_rate": 1.965386154461785e-05, "loss": 0.4875, "step": 15181 }, { "epoch": 19.43296, "grad_norm": 1.0730128288269043, "learning_rate": 1.965186074429772e-05, "loss": 0.4988, "step": 15182 }, { "epoch": 19.43424, "grad_norm": 1.111451268196106, "learning_rate": 1.964985994397759e-05, "loss": 0.4995, "step": 15183 }, { "epoch": 19.43552, "grad_norm": 1.0789837837219238, "learning_rate": 1.9647859143657462e-05, "loss": 0.5073, "step": 15184 }, { "epoch": 19.4368, "grad_norm": 1.0748100280761719, "learning_rate": 1.9645858343337337e-05, "loss": 0.4898, "step": 15185 }, { "epoch": 19.43808, "grad_norm": 1.031887173652649, "learning_rate": 1.964385754301721e-05, "loss": 0.4586, "step": 15186 }, { "epoch": 19.43936, "grad_norm": 1.1339378356933594, "learning_rate": 1.9641856742697078e-05, "loss": 0.5464, "step": 15187 }, { "epoch": 19.44064, "grad_norm": 1.0379207134246826, "learning_rate": 1.9639855942376953e-05, "loss": 0.4321, "step": 15188 }, { "epoch": 19.44192, "grad_norm": 1.0902132987976074, "learning_rate": 1.9637855142056825e-05, "loss": 0.5081, "step": 15189 }, { "epoch": 19.4432, "grad_norm": 1.0853605270385742, "learning_rate": 1.9635854341736697e-05, "loss": 0.4636, "step": 15190 }, { "epoch": 19.44448, "grad_norm": 1.0282881259918213, "learning_rate": 1.9633853541416565e-05, "loss": 0.5072, "step": 15191 }, { "epoch": 19.44576, "grad_norm": 1.1072465181350708, "learning_rate": 1.963185274109644e-05, "loss": 0.5032, "step": 15192 }, { "epoch": 19.44704, "grad_norm": 1.0915943384170532, "learning_rate": 1.9629851940776312e-05, "loss": 0.5344, "step": 15193 }, { "epoch": 19.44832, "grad_norm": 1.1000624895095825, "learning_rate": 1.9627851140456184e-05, "loss": 0.449, "step": 15194 }, { "epoch": 19.4496, "grad_norm": 1.1032449007034302, "learning_rate": 1.9625850340136056e-05, "loss": 0.5057, "step": 15195 }, { "epoch": 19.45088, "grad_norm": 1.0936930179595947, "learning_rate": 1.9623849539815928e-05, "loss": 0.5342, "step": 15196 }, { "epoch": 19.45216, "grad_norm": 1.1406618356704712, "learning_rate": 1.96218487394958e-05, "loss": 0.514, "step": 15197 }, { "epoch": 19.45344, "grad_norm": 1.1138516664505005, "learning_rate": 1.961984793917567e-05, "loss": 0.4917, "step": 15198 }, { "epoch": 19.454720000000002, "grad_norm": 1.051370620727539, "learning_rate": 1.9617847138855543e-05, "loss": 0.497, "step": 15199 }, { "epoch": 19.456, "grad_norm": 1.126387357711792, "learning_rate": 1.9615846338535415e-05, "loss": 0.5121, "step": 15200 }, { "epoch": 19.45728, "grad_norm": 1.0682495832443237, "learning_rate": 1.9613845538215287e-05, "loss": 0.5013, "step": 15201 }, { "epoch": 19.45856, "grad_norm": 1.1218750476837158, "learning_rate": 1.961184473789516e-05, "loss": 0.5307, "step": 15202 }, { "epoch": 19.45984, "grad_norm": 1.0630134344100952, "learning_rate": 1.960984393757503e-05, "loss": 0.4964, "step": 15203 }, { "epoch": 19.46112, "grad_norm": 1.0802773237228394, "learning_rate": 1.9607843137254903e-05, "loss": 0.493, "step": 15204 }, { "epoch": 19.4624, "grad_norm": 1.061515212059021, "learning_rate": 1.9605842336934774e-05, "loss": 0.5217, "step": 15205 }, { "epoch": 19.46368, "grad_norm": 1.0091540813446045, "learning_rate": 1.9603841536614646e-05, "loss": 0.4742, "step": 15206 }, { "epoch": 19.46496, "grad_norm": 1.0679512023925781, "learning_rate": 1.9601840736294518e-05, "loss": 0.5139, "step": 15207 }, { "epoch": 19.46624, "grad_norm": 1.019675850868225, "learning_rate": 1.959983993597439e-05, "loss": 0.4558, "step": 15208 }, { "epoch": 19.46752, "grad_norm": 1.1162679195404053, "learning_rate": 1.9597839135654265e-05, "loss": 0.4691, "step": 15209 }, { "epoch": 19.4688, "grad_norm": 1.0296701192855835, "learning_rate": 1.9595838335334134e-05, "loss": 0.4811, "step": 15210 }, { "epoch": 19.47008, "grad_norm": 1.0854653120040894, "learning_rate": 1.9593837535014005e-05, "loss": 0.5192, "step": 15211 }, { "epoch": 19.47136, "grad_norm": 1.088734745979309, "learning_rate": 1.9591836734693877e-05, "loss": 0.507, "step": 15212 }, { "epoch": 19.47264, "grad_norm": 1.077596664428711, "learning_rate": 1.9589835934373753e-05, "loss": 0.4915, "step": 15213 }, { "epoch": 19.47392, "grad_norm": 1.1279915571212769, "learning_rate": 1.958783513405362e-05, "loss": 0.5085, "step": 15214 }, { "epoch": 19.4752, "grad_norm": 1.0682718753814697, "learning_rate": 1.9585834333733493e-05, "loss": 0.4905, "step": 15215 }, { "epoch": 19.47648, "grad_norm": 1.0742157697677612, "learning_rate": 1.9583833533413365e-05, "loss": 0.4806, "step": 15216 }, { "epoch": 19.47776, "grad_norm": 1.042440414428711, "learning_rate": 1.958183273309324e-05, "loss": 0.5013, "step": 15217 }, { "epoch": 19.47904, "grad_norm": 1.071058750152588, "learning_rate": 1.957983193277311e-05, "loss": 0.5188, "step": 15218 }, { "epoch": 19.48032, "grad_norm": 1.0596333742141724, "learning_rate": 1.957783113245298e-05, "loss": 0.5147, "step": 15219 }, { "epoch": 19.4816, "grad_norm": 1.0863871574401855, "learning_rate": 1.9575830332132856e-05, "loss": 0.5211, "step": 15220 }, { "epoch": 19.48288, "grad_norm": 1.0600311756134033, "learning_rate": 1.9573829531812727e-05, "loss": 0.4686, "step": 15221 }, { "epoch": 19.48416, "grad_norm": 1.0868990421295166, "learning_rate": 1.9571828731492596e-05, "loss": 0.5152, "step": 15222 }, { "epoch": 19.48544, "grad_norm": 1.065700888633728, "learning_rate": 1.9569827931172468e-05, "loss": 0.4934, "step": 15223 }, { "epoch": 19.48672, "grad_norm": 1.0733686685562134, "learning_rate": 1.9567827130852343e-05, "loss": 0.4662, "step": 15224 }, { "epoch": 19.488, "grad_norm": 1.0808594226837158, "learning_rate": 1.9565826330532215e-05, "loss": 0.497, "step": 15225 }, { "epoch": 19.48928, "grad_norm": 1.066211223602295, "learning_rate": 1.9563825530212083e-05, "loss": 0.549, "step": 15226 }, { "epoch": 19.49056, "grad_norm": 1.0072721242904663, "learning_rate": 1.956182472989196e-05, "loss": 0.4312, "step": 15227 }, { "epoch": 19.49184, "grad_norm": 1.107842206954956, "learning_rate": 1.955982392957183e-05, "loss": 0.4793, "step": 15228 }, { "epoch": 19.49312, "grad_norm": 1.1411617994308472, "learning_rate": 1.9557823129251702e-05, "loss": 0.534, "step": 15229 }, { "epoch": 19.4944, "grad_norm": 1.0612369775772095, "learning_rate": 1.955582232893157e-05, "loss": 0.5087, "step": 15230 }, { "epoch": 19.49568, "grad_norm": 1.0148245096206665, "learning_rate": 1.9553821528611446e-05, "loss": 0.4518, "step": 15231 }, { "epoch": 19.49696, "grad_norm": 1.133119821548462, "learning_rate": 1.9551820728291318e-05, "loss": 0.5301, "step": 15232 }, { "epoch": 19.49824, "grad_norm": 1.0392405986785889, "learning_rate": 1.954981992797119e-05, "loss": 0.4912, "step": 15233 }, { "epoch": 19.49952, "grad_norm": 1.0500421524047852, "learning_rate": 1.954781912765106e-05, "loss": 0.4866, "step": 15234 }, { "epoch": 19.5008, "grad_norm": 1.0911407470703125, "learning_rate": 1.9545818327330933e-05, "loss": 0.5114, "step": 15235 }, { "epoch": 19.50208, "grad_norm": 1.0419782400131226, "learning_rate": 1.9543817527010805e-05, "loss": 0.4925, "step": 15236 }, { "epoch": 19.50336, "grad_norm": 1.1287118196487427, "learning_rate": 1.9541816726690677e-05, "loss": 0.5297, "step": 15237 }, { "epoch": 19.50464, "grad_norm": 1.0417295694351196, "learning_rate": 1.953981592637055e-05, "loss": 0.4834, "step": 15238 }, { "epoch": 19.50592, "grad_norm": 1.098235011100769, "learning_rate": 1.953781512605042e-05, "loss": 0.5075, "step": 15239 }, { "epoch": 19.5072, "grad_norm": 1.0871201753616333, "learning_rate": 1.9535814325730293e-05, "loss": 0.4985, "step": 15240 }, { "epoch": 19.50848, "grad_norm": 1.1194583177566528, "learning_rate": 1.9533813525410165e-05, "loss": 0.5293, "step": 15241 }, { "epoch": 19.50976, "grad_norm": 1.0736286640167236, "learning_rate": 1.9531812725090036e-05, "loss": 0.488, "step": 15242 }, { "epoch": 19.51104, "grad_norm": 1.0650372505187988, "learning_rate": 1.9529811924769908e-05, "loss": 0.4828, "step": 15243 }, { "epoch": 19.51232, "grad_norm": 1.0762523412704468, "learning_rate": 1.952781112444978e-05, "loss": 0.5154, "step": 15244 }, { "epoch": 19.5136, "grad_norm": 1.1131715774536133, "learning_rate": 1.9525810324129652e-05, "loss": 0.4885, "step": 15245 }, { "epoch": 19.51488, "grad_norm": 1.0484470129013062, "learning_rate": 1.9523809523809524e-05, "loss": 0.4878, "step": 15246 }, { "epoch": 19.51616, "grad_norm": 1.0681511163711548, "learning_rate": 1.9521808723489396e-05, "loss": 0.5208, "step": 15247 }, { "epoch": 19.51744, "grad_norm": 1.0821027755737305, "learning_rate": 1.951980792316927e-05, "loss": 0.53, "step": 15248 }, { "epoch": 19.51872, "grad_norm": 1.14222252368927, "learning_rate": 1.951780712284914e-05, "loss": 0.5255, "step": 15249 }, { "epoch": 19.52, "grad_norm": 1.0315016508102417, "learning_rate": 1.951580632252901e-05, "loss": 0.4661, "step": 15250 }, { "epoch": 19.52128, "grad_norm": 1.0619421005249023, "learning_rate": 1.9513805522208883e-05, "loss": 0.5088, "step": 15251 }, { "epoch": 19.52256, "grad_norm": 1.1743899583816528, "learning_rate": 1.951180472188876e-05, "loss": 0.5224, "step": 15252 }, { "epoch": 19.52384, "grad_norm": 1.0858947038650513, "learning_rate": 1.9509803921568627e-05, "loss": 0.4706, "step": 15253 }, { "epoch": 19.52512, "grad_norm": 1.1015316247940063, "learning_rate": 1.95078031212485e-05, "loss": 0.5343, "step": 15254 }, { "epoch": 19.5264, "grad_norm": 1.0959560871124268, "learning_rate": 1.9505802320928374e-05, "loss": 0.4969, "step": 15255 }, { "epoch": 19.52768, "grad_norm": 1.0881457328796387, "learning_rate": 1.9503801520608246e-05, "loss": 0.5351, "step": 15256 }, { "epoch": 19.52896, "grad_norm": 1.0661768913269043, "learning_rate": 1.9501800720288114e-05, "loss": 0.4861, "step": 15257 }, { "epoch": 19.53024, "grad_norm": 1.1012307405471802, "learning_rate": 1.9499799919967986e-05, "loss": 0.5191, "step": 15258 }, { "epoch": 19.53152, "grad_norm": 1.0949671268463135, "learning_rate": 1.949779911964786e-05, "loss": 0.5407, "step": 15259 }, { "epoch": 19.5328, "grad_norm": 1.0634304285049438, "learning_rate": 1.9495798319327733e-05, "loss": 0.4695, "step": 15260 }, { "epoch": 19.53408, "grad_norm": 1.0616800785064697, "learning_rate": 1.9493797519007602e-05, "loss": 0.4984, "step": 15261 }, { "epoch": 19.53536, "grad_norm": 1.1313798427581787, "learning_rate": 1.9491796718687477e-05, "loss": 0.5477, "step": 15262 }, { "epoch": 19.53664, "grad_norm": 1.0650761127471924, "learning_rate": 1.948979591836735e-05, "loss": 0.5243, "step": 15263 }, { "epoch": 19.53792, "grad_norm": 1.0772321224212646, "learning_rate": 1.948779511804722e-05, "loss": 0.4764, "step": 15264 }, { "epoch": 19.5392, "grad_norm": 1.0623836517333984, "learning_rate": 1.948579431772709e-05, "loss": 0.5006, "step": 15265 }, { "epoch": 19.54048, "grad_norm": 0.9955456852912903, "learning_rate": 1.9483793517406964e-05, "loss": 0.4675, "step": 15266 }, { "epoch": 19.54176, "grad_norm": 1.0686817169189453, "learning_rate": 1.9481792717086836e-05, "loss": 0.5182, "step": 15267 }, { "epoch": 19.54304, "grad_norm": 1.1312791109085083, "learning_rate": 1.9479791916766708e-05, "loss": 0.5157, "step": 15268 }, { "epoch": 19.54432, "grad_norm": 1.1050148010253906, "learning_rate": 1.947779111644658e-05, "loss": 0.5053, "step": 15269 }, { "epoch": 19.5456, "grad_norm": 1.0860576629638672, "learning_rate": 1.9475790316126452e-05, "loss": 0.4846, "step": 15270 }, { "epoch": 19.54688, "grad_norm": 1.0629215240478516, "learning_rate": 1.9473789515806324e-05, "loss": 0.5131, "step": 15271 }, { "epoch": 19.54816, "grad_norm": 1.05284583568573, "learning_rate": 1.9471788715486196e-05, "loss": 0.4852, "step": 15272 }, { "epoch": 19.54944, "grad_norm": 1.0399121046066284, "learning_rate": 1.9469787915166067e-05, "loss": 0.4853, "step": 15273 }, { "epoch": 19.55072, "grad_norm": 1.0178717374801636, "learning_rate": 1.946778711484594e-05, "loss": 0.4905, "step": 15274 }, { "epoch": 19.552, "grad_norm": 1.0732840299606323, "learning_rate": 1.946578631452581e-05, "loss": 0.5017, "step": 15275 }, { "epoch": 19.55328, "grad_norm": 1.1350263357162476, "learning_rate": 1.9463785514205683e-05, "loss": 0.5214, "step": 15276 }, { "epoch": 19.55456, "grad_norm": 1.050378441810608, "learning_rate": 1.9461784713885555e-05, "loss": 0.4801, "step": 15277 }, { "epoch": 19.55584, "grad_norm": 1.0464566946029663, "learning_rate": 1.9459783913565427e-05, "loss": 0.4978, "step": 15278 }, { "epoch": 19.55712, "grad_norm": 1.0169779062271118, "learning_rate": 1.94577831132453e-05, "loss": 0.4731, "step": 15279 }, { "epoch": 19.5584, "grad_norm": 1.0549458265304565, "learning_rate": 1.945578231292517e-05, "loss": 0.498, "step": 15280 }, { "epoch": 19.55968, "grad_norm": 1.0301837921142578, "learning_rate": 1.9453781512605042e-05, "loss": 0.5407, "step": 15281 }, { "epoch": 19.56096, "grad_norm": 1.0875555276870728, "learning_rate": 1.9451780712284914e-05, "loss": 0.509, "step": 15282 }, { "epoch": 19.56224, "grad_norm": 1.072562336921692, "learning_rate": 1.9449779911964786e-05, "loss": 0.4804, "step": 15283 }, { "epoch": 19.56352, "grad_norm": 1.0816986560821533, "learning_rate": 1.9447779111644658e-05, "loss": 0.5124, "step": 15284 }, { "epoch": 19.564799999999998, "grad_norm": 1.0577350854873657, "learning_rate": 1.944577831132453e-05, "loss": 0.5007, "step": 15285 }, { "epoch": 19.56608, "grad_norm": 1.0548467636108398, "learning_rate": 1.94437775110044e-05, "loss": 0.4651, "step": 15286 }, { "epoch": 19.56736, "grad_norm": 1.0653223991394043, "learning_rate": 1.9441776710684277e-05, "loss": 0.4804, "step": 15287 }, { "epoch": 19.56864, "grad_norm": 1.1180814504623413, "learning_rate": 1.9439775910364145e-05, "loss": 0.4905, "step": 15288 }, { "epoch": 19.56992, "grad_norm": 1.0680838823318481, "learning_rate": 1.9437775110044017e-05, "loss": 0.4631, "step": 15289 }, { "epoch": 19.5712, "grad_norm": 1.106482982635498, "learning_rate": 1.943577430972389e-05, "loss": 0.4835, "step": 15290 }, { "epoch": 19.57248, "grad_norm": 1.105340838432312, "learning_rate": 1.9433773509403764e-05, "loss": 0.5132, "step": 15291 }, { "epoch": 19.57376, "grad_norm": 1.0390416383743286, "learning_rate": 1.9431772709083633e-05, "loss": 0.4297, "step": 15292 }, { "epoch": 19.57504, "grad_norm": 1.0809773206710815, "learning_rate": 1.9429771908763505e-05, "loss": 0.5219, "step": 15293 }, { "epoch": 19.57632, "grad_norm": 0.9835126996040344, "learning_rate": 1.942777110844338e-05, "loss": 0.4615, "step": 15294 }, { "epoch": 19.5776, "grad_norm": 1.0638185739517212, "learning_rate": 1.942577030812325e-05, "loss": 0.511, "step": 15295 }, { "epoch": 19.57888, "grad_norm": 1.027719259262085, "learning_rate": 1.942376950780312e-05, "loss": 0.4703, "step": 15296 }, { "epoch": 19.58016, "grad_norm": 1.035800814628601, "learning_rate": 1.9421768707482992e-05, "loss": 0.4904, "step": 15297 }, { "epoch": 19.58144, "grad_norm": 1.0650843381881714, "learning_rate": 1.9419767907162867e-05, "loss": 0.512, "step": 15298 }, { "epoch": 19.58272, "grad_norm": 1.0700833797454834, "learning_rate": 1.941776710684274e-05, "loss": 0.494, "step": 15299 }, { "epoch": 19.584, "grad_norm": 1.0548943281173706, "learning_rate": 1.9415766306522608e-05, "loss": 0.4706, "step": 15300 }, { "epoch": 19.58528, "grad_norm": 1.083795189857483, "learning_rate": 1.9413765506202483e-05, "loss": 0.4939, "step": 15301 }, { "epoch": 19.58656, "grad_norm": 1.0601279735565186, "learning_rate": 1.9411764705882355e-05, "loss": 0.4932, "step": 15302 }, { "epoch": 19.58784, "grad_norm": 1.109287977218628, "learning_rate": 1.9409763905562226e-05, "loss": 0.5211, "step": 15303 }, { "epoch": 19.58912, "grad_norm": 1.0838240385055542, "learning_rate": 1.9407763105242095e-05, "loss": 0.4992, "step": 15304 }, { "epoch": 19.5904, "grad_norm": 1.0001522302627563, "learning_rate": 1.940576230492197e-05, "loss": 0.4945, "step": 15305 }, { "epoch": 19.59168, "grad_norm": 1.0223277807235718, "learning_rate": 1.9403761504601842e-05, "loss": 0.4509, "step": 15306 }, { "epoch": 19.59296, "grad_norm": 1.0435092449188232, "learning_rate": 1.9401760704281714e-05, "loss": 0.4684, "step": 15307 }, { "epoch": 19.59424, "grad_norm": 1.0610374212265015, "learning_rate": 1.9399759903961586e-05, "loss": 0.5055, "step": 15308 }, { "epoch": 19.59552, "grad_norm": 1.1668132543563843, "learning_rate": 1.9397759103641458e-05, "loss": 0.5376, "step": 15309 }, { "epoch": 19.5968, "grad_norm": 1.0593454837799072, "learning_rate": 1.939575830332133e-05, "loss": 0.4956, "step": 15310 }, { "epoch": 19.59808, "grad_norm": 1.0181503295898438, "learning_rate": 1.93937575030012e-05, "loss": 0.4587, "step": 15311 }, { "epoch": 19.59936, "grad_norm": 1.0744041204452515, "learning_rate": 1.9391756702681073e-05, "loss": 0.4853, "step": 15312 }, { "epoch": 19.60064, "grad_norm": 1.075843334197998, "learning_rate": 1.9389755902360945e-05, "loss": 0.5086, "step": 15313 }, { "epoch": 19.60192, "grad_norm": 1.0893293619155884, "learning_rate": 1.9387755102040817e-05, "loss": 0.5133, "step": 15314 }, { "epoch": 19.6032, "grad_norm": 1.1128917932510376, "learning_rate": 1.9385754301720692e-05, "loss": 0.524, "step": 15315 }, { "epoch": 19.60448, "grad_norm": 1.0819975137710571, "learning_rate": 1.938375350140056e-05, "loss": 0.4789, "step": 15316 }, { "epoch": 19.60576, "grad_norm": 1.0744059085845947, "learning_rate": 1.9381752701080432e-05, "loss": 0.4927, "step": 15317 }, { "epoch": 19.60704, "grad_norm": 1.0915199518203735, "learning_rate": 1.9379751900760304e-05, "loss": 0.5235, "step": 15318 }, { "epoch": 19.60832, "grad_norm": 1.0718075037002563, "learning_rate": 1.937775110044018e-05, "loss": 0.4807, "step": 15319 }, { "epoch": 19.6096, "grad_norm": 1.1020848751068115, "learning_rate": 1.9375750300120048e-05, "loss": 0.5598, "step": 15320 }, { "epoch": 19.61088, "grad_norm": 1.1813325881958008, "learning_rate": 1.937374949979992e-05, "loss": 0.5096, "step": 15321 }, { "epoch": 19.61216, "grad_norm": 1.097054362297058, "learning_rate": 1.9371748699479795e-05, "loss": 0.4994, "step": 15322 }, { "epoch": 19.61344, "grad_norm": 1.1227426528930664, "learning_rate": 1.9369747899159667e-05, "loss": 0.517, "step": 15323 }, { "epoch": 19.61472, "grad_norm": 1.0609750747680664, "learning_rate": 1.9367747098839535e-05, "loss": 0.4834, "step": 15324 }, { "epoch": 19.616, "grad_norm": 1.1133859157562256, "learning_rate": 1.9365746298519407e-05, "loss": 0.5274, "step": 15325 }, { "epoch": 19.61728, "grad_norm": 1.035995602607727, "learning_rate": 1.9363745498199283e-05, "loss": 0.4571, "step": 15326 }, { "epoch": 19.61856, "grad_norm": 1.0632821321487427, "learning_rate": 1.9361744697879154e-05, "loss": 0.4931, "step": 15327 }, { "epoch": 19.61984, "grad_norm": 1.1032476425170898, "learning_rate": 1.9359743897559023e-05, "loss": 0.4829, "step": 15328 }, { "epoch": 19.62112, "grad_norm": 1.1157118082046509, "learning_rate": 1.9357743097238895e-05, "loss": 0.5142, "step": 15329 }, { "epoch": 19.6224, "grad_norm": 1.0863420963287354, "learning_rate": 1.935574229691877e-05, "loss": 0.5191, "step": 15330 }, { "epoch": 19.62368, "grad_norm": 1.044219970703125, "learning_rate": 1.9353741496598642e-05, "loss": 0.4757, "step": 15331 }, { "epoch": 19.62496, "grad_norm": 1.0467369556427002, "learning_rate": 1.935174069627851e-05, "loss": 0.5285, "step": 15332 }, { "epoch": 19.62624, "grad_norm": 1.1456965208053589, "learning_rate": 1.9349739895958386e-05, "loss": 0.5265, "step": 15333 }, { "epoch": 19.62752, "grad_norm": 1.1731479167938232, "learning_rate": 1.9347739095638257e-05, "loss": 0.5279, "step": 15334 }, { "epoch": 19.6288, "grad_norm": 1.0172317028045654, "learning_rate": 1.934573829531813e-05, "loss": 0.4484, "step": 15335 }, { "epoch": 19.63008, "grad_norm": 1.009181022644043, "learning_rate": 1.9343737494997998e-05, "loss": 0.4591, "step": 15336 }, { "epoch": 19.63136, "grad_norm": 1.1361030340194702, "learning_rate": 1.9341736694677873e-05, "loss": 0.5232, "step": 15337 }, { "epoch": 19.63264, "grad_norm": 1.0451850891113281, "learning_rate": 1.9339735894357745e-05, "loss": 0.4996, "step": 15338 }, { "epoch": 19.63392, "grad_norm": 1.0648033618927002, "learning_rate": 1.9337735094037617e-05, "loss": 0.5315, "step": 15339 }, { "epoch": 19.6352, "grad_norm": 1.0276292562484741, "learning_rate": 1.933573429371749e-05, "loss": 0.4351, "step": 15340 }, { "epoch": 19.63648, "grad_norm": 1.0635597705841064, "learning_rate": 1.933373349339736e-05, "loss": 0.5164, "step": 15341 }, { "epoch": 19.63776, "grad_norm": 1.0696072578430176, "learning_rate": 1.9331732693077232e-05, "loss": 0.4814, "step": 15342 }, { "epoch": 19.63904, "grad_norm": 1.0718597173690796, "learning_rate": 1.9329731892757104e-05, "loss": 0.5205, "step": 15343 }, { "epoch": 19.64032, "grad_norm": 1.0730284452438354, "learning_rate": 1.9327731092436976e-05, "loss": 0.5375, "step": 15344 }, { "epoch": 19.6416, "grad_norm": 1.0293726921081543, "learning_rate": 1.9325730292116848e-05, "loss": 0.4832, "step": 15345 }, { "epoch": 19.64288, "grad_norm": 1.0186909437179565, "learning_rate": 1.932372949179672e-05, "loss": 0.4701, "step": 15346 }, { "epoch": 19.64416, "grad_norm": 1.0497504472732544, "learning_rate": 1.932172869147659e-05, "loss": 0.4562, "step": 15347 }, { "epoch": 19.64544, "grad_norm": 1.1007732152938843, "learning_rate": 1.9319727891156463e-05, "loss": 0.5467, "step": 15348 }, { "epoch": 19.64672, "grad_norm": 1.1249552965164185, "learning_rate": 1.9317727090836335e-05, "loss": 0.4875, "step": 15349 }, { "epoch": 19.648, "grad_norm": 1.0523337125778198, "learning_rate": 1.9315726290516207e-05, "loss": 0.4888, "step": 15350 }, { "epoch": 19.64928, "grad_norm": 1.131332516670227, "learning_rate": 1.931372549019608e-05, "loss": 0.5072, "step": 15351 }, { "epoch": 19.65056, "grad_norm": 1.137013554573059, "learning_rate": 1.931172468987595e-05, "loss": 0.4821, "step": 15352 }, { "epoch": 19.65184, "grad_norm": 1.0466101169586182, "learning_rate": 1.9309723889555823e-05, "loss": 0.4602, "step": 15353 }, { "epoch": 19.65312, "grad_norm": 0.9879499673843384, "learning_rate": 1.9307723089235698e-05, "loss": 0.4225, "step": 15354 }, { "epoch": 19.6544, "grad_norm": 1.0248045921325684, "learning_rate": 1.9305722288915566e-05, "loss": 0.4888, "step": 15355 }, { "epoch": 19.65568, "grad_norm": 1.0874507427215576, "learning_rate": 1.9303721488595438e-05, "loss": 0.5084, "step": 15356 }, { "epoch": 19.65696, "grad_norm": 1.0725950002670288, "learning_rate": 1.930172068827531e-05, "loss": 0.5525, "step": 15357 }, { "epoch": 19.65824, "grad_norm": 1.0607742071151733, "learning_rate": 1.9299719887955185e-05, "loss": 0.5004, "step": 15358 }, { "epoch": 19.65952, "grad_norm": 1.1186076402664185, "learning_rate": 1.9297719087635054e-05, "loss": 0.5398, "step": 15359 }, { "epoch": 19.660800000000002, "grad_norm": 1.0648303031921387, "learning_rate": 1.9295718287314926e-05, "loss": 0.4627, "step": 15360 }, { "epoch": 19.66208, "grad_norm": 1.0890082120895386, "learning_rate": 1.92937174869948e-05, "loss": 0.5041, "step": 15361 }, { "epoch": 19.66336, "grad_norm": 1.079013705253601, "learning_rate": 1.9291716686674673e-05, "loss": 0.5148, "step": 15362 }, { "epoch": 19.66464, "grad_norm": 1.1085528135299683, "learning_rate": 1.928971588635454e-05, "loss": 0.534, "step": 15363 }, { "epoch": 19.66592, "grad_norm": 1.06136953830719, "learning_rate": 1.9287715086034413e-05, "loss": 0.4668, "step": 15364 }, { "epoch": 19.6672, "grad_norm": 1.0138074159622192, "learning_rate": 1.928571428571429e-05, "loss": 0.4922, "step": 15365 }, { "epoch": 19.66848, "grad_norm": 1.0837033987045288, "learning_rate": 1.928371348539416e-05, "loss": 0.5442, "step": 15366 }, { "epoch": 19.66976, "grad_norm": 1.1243685483932495, "learning_rate": 1.928171268507403e-05, "loss": 0.4993, "step": 15367 }, { "epoch": 19.67104, "grad_norm": 1.0624139308929443, "learning_rate": 1.9279711884753904e-05, "loss": 0.4684, "step": 15368 }, { "epoch": 19.67232, "grad_norm": 1.0839632749557495, "learning_rate": 1.9277711084433776e-05, "loss": 0.4872, "step": 15369 }, { "epoch": 19.6736, "grad_norm": 1.078340768814087, "learning_rate": 1.9275710284113648e-05, "loss": 0.4678, "step": 15370 }, { "epoch": 19.67488, "grad_norm": 1.0516289472579956, "learning_rate": 1.9273709483793516e-05, "loss": 0.4567, "step": 15371 }, { "epoch": 19.67616, "grad_norm": 1.0610841512680054, "learning_rate": 1.927170868347339e-05, "loss": 0.4731, "step": 15372 }, { "epoch": 19.67744, "grad_norm": 1.0811412334442139, "learning_rate": 1.9269707883153263e-05, "loss": 0.4978, "step": 15373 }, { "epoch": 19.67872, "grad_norm": 1.0839446783065796, "learning_rate": 1.9267707082833135e-05, "loss": 0.4874, "step": 15374 }, { "epoch": 19.68, "grad_norm": 1.06401526927948, "learning_rate": 1.9265706282513007e-05, "loss": 0.508, "step": 15375 }, { "epoch": 19.68128, "grad_norm": 1.0685803890228271, "learning_rate": 1.926370548219288e-05, "loss": 0.507, "step": 15376 }, { "epoch": 19.68256, "grad_norm": 1.075534462928772, "learning_rate": 1.926170468187275e-05, "loss": 0.5059, "step": 15377 }, { "epoch": 19.68384, "grad_norm": 1.0783944129943848, "learning_rate": 1.9259703881552623e-05, "loss": 0.4845, "step": 15378 }, { "epoch": 19.68512, "grad_norm": 1.0071983337402344, "learning_rate": 1.9257703081232494e-05, "loss": 0.4961, "step": 15379 }, { "epoch": 19.6864, "grad_norm": 1.0298351049423218, "learning_rate": 1.9255702280912366e-05, "loss": 0.512, "step": 15380 }, { "epoch": 19.68768, "grad_norm": 1.0420536994934082, "learning_rate": 1.9253701480592238e-05, "loss": 0.5143, "step": 15381 }, { "epoch": 19.68896, "grad_norm": 1.153039813041687, "learning_rate": 1.925170068027211e-05, "loss": 0.5164, "step": 15382 }, { "epoch": 19.69024, "grad_norm": 1.0602798461914062, "learning_rate": 1.9249699879951982e-05, "loss": 0.5358, "step": 15383 }, { "epoch": 19.69152, "grad_norm": 1.0772628784179688, "learning_rate": 1.9247699079631854e-05, "loss": 0.4898, "step": 15384 }, { "epoch": 19.6928, "grad_norm": 1.1267281770706177, "learning_rate": 1.9245698279311726e-05, "loss": 0.5324, "step": 15385 }, { "epoch": 19.69408, "grad_norm": 1.059523344039917, "learning_rate": 1.9243697478991597e-05, "loss": 0.4948, "step": 15386 }, { "epoch": 19.69536, "grad_norm": 1.0987871885299683, "learning_rate": 1.924169667867147e-05, "loss": 0.4947, "step": 15387 }, { "epoch": 19.69664, "grad_norm": 1.0897448062896729, "learning_rate": 1.923969587835134e-05, "loss": 0.5271, "step": 15388 }, { "epoch": 19.69792, "grad_norm": 1.0368708372116089, "learning_rate": 1.9237695078031213e-05, "loss": 0.4986, "step": 15389 }, { "epoch": 19.6992, "grad_norm": 1.063676357269287, "learning_rate": 1.9235694277711085e-05, "loss": 0.4886, "step": 15390 }, { "epoch": 19.70048, "grad_norm": 1.1547781229019165, "learning_rate": 1.9233693477390957e-05, "loss": 0.5081, "step": 15391 }, { "epoch": 19.70176, "grad_norm": 1.0699111223220825, "learning_rate": 1.923169267707083e-05, "loss": 0.4777, "step": 15392 }, { "epoch": 19.70304, "grad_norm": 1.0858879089355469, "learning_rate": 1.9229691876750704e-05, "loss": 0.5082, "step": 15393 }, { "epoch": 19.70432, "grad_norm": 1.145923376083374, "learning_rate": 1.9227691076430572e-05, "loss": 0.5587, "step": 15394 }, { "epoch": 19.7056, "grad_norm": 1.0954890251159668, "learning_rate": 1.9225690276110444e-05, "loss": 0.5212, "step": 15395 }, { "epoch": 19.706879999999998, "grad_norm": 1.0601500272750854, "learning_rate": 1.9223689475790316e-05, "loss": 0.4863, "step": 15396 }, { "epoch": 19.70816, "grad_norm": 1.0982835292816162, "learning_rate": 1.922168867547019e-05, "loss": 0.4968, "step": 15397 }, { "epoch": 19.70944, "grad_norm": 1.0785760879516602, "learning_rate": 1.921968787515006e-05, "loss": 0.5218, "step": 15398 }, { "epoch": 19.71072, "grad_norm": 1.069139003753662, "learning_rate": 1.921768707482993e-05, "loss": 0.4671, "step": 15399 }, { "epoch": 19.712, "grad_norm": 1.085194706916809, "learning_rate": 1.9215686274509807e-05, "loss": 0.5103, "step": 15400 }, { "epoch": 19.71328, "grad_norm": 1.1250481605529785, "learning_rate": 1.921368547418968e-05, "loss": 0.5061, "step": 15401 }, { "epoch": 19.71456, "grad_norm": 1.0836914777755737, "learning_rate": 1.9211684673869547e-05, "loss": 0.4801, "step": 15402 }, { "epoch": 19.71584, "grad_norm": 1.1382417678833008, "learning_rate": 1.920968387354942e-05, "loss": 0.5333, "step": 15403 }, { "epoch": 19.71712, "grad_norm": 1.0880017280578613, "learning_rate": 1.9207683073229294e-05, "loss": 0.5191, "step": 15404 }, { "epoch": 19.7184, "grad_norm": 1.089227557182312, "learning_rate": 1.9205682272909166e-05, "loss": 0.4923, "step": 15405 }, { "epoch": 19.71968, "grad_norm": 1.0711054801940918, "learning_rate": 1.9203681472589035e-05, "loss": 0.5067, "step": 15406 }, { "epoch": 19.72096, "grad_norm": 1.0419178009033203, "learning_rate": 1.920168067226891e-05, "loss": 0.4903, "step": 15407 }, { "epoch": 19.72224, "grad_norm": 1.0852587223052979, "learning_rate": 1.919967987194878e-05, "loss": 0.5343, "step": 15408 }, { "epoch": 19.72352, "grad_norm": 1.0830180644989014, "learning_rate": 1.9197679071628653e-05, "loss": 0.5289, "step": 15409 }, { "epoch": 19.7248, "grad_norm": 1.050003170967102, "learning_rate": 1.9195678271308522e-05, "loss": 0.4923, "step": 15410 }, { "epoch": 19.72608, "grad_norm": 1.1027849912643433, "learning_rate": 1.9193677470988397e-05, "loss": 0.5719, "step": 15411 }, { "epoch": 19.72736, "grad_norm": 1.0838650465011597, "learning_rate": 1.919167667066827e-05, "loss": 0.5167, "step": 15412 }, { "epoch": 19.72864, "grad_norm": 1.0345534086227417, "learning_rate": 1.918967587034814e-05, "loss": 0.4831, "step": 15413 }, { "epoch": 19.72992, "grad_norm": 1.0454968214035034, "learning_rate": 1.9187675070028013e-05, "loss": 0.5205, "step": 15414 }, { "epoch": 19.7312, "grad_norm": 1.0153090953826904, "learning_rate": 1.9185674269707885e-05, "loss": 0.5044, "step": 15415 }, { "epoch": 19.73248, "grad_norm": 1.0405809879302979, "learning_rate": 1.9183673469387756e-05, "loss": 0.4734, "step": 15416 }, { "epoch": 19.73376, "grad_norm": 1.0736857652664185, "learning_rate": 1.918167266906763e-05, "loss": 0.5145, "step": 15417 }, { "epoch": 19.73504, "grad_norm": 1.0757614374160767, "learning_rate": 1.91796718687475e-05, "loss": 0.5051, "step": 15418 }, { "epoch": 19.73632, "grad_norm": 1.074829339981079, "learning_rate": 1.9177671068427372e-05, "loss": 0.5238, "step": 15419 }, { "epoch": 19.7376, "grad_norm": 1.0481175184249878, "learning_rate": 1.9175670268107244e-05, "loss": 0.501, "step": 15420 }, { "epoch": 19.73888, "grad_norm": 1.1353001594543457, "learning_rate": 1.9173669467787116e-05, "loss": 0.4727, "step": 15421 }, { "epoch": 19.74016, "grad_norm": 1.078800082206726, "learning_rate": 1.9171668667466988e-05, "loss": 0.4876, "step": 15422 }, { "epoch": 19.74144, "grad_norm": 1.0725644826889038, "learning_rate": 1.916966786714686e-05, "loss": 0.489, "step": 15423 }, { "epoch": 19.74272, "grad_norm": 1.0209654569625854, "learning_rate": 1.916766706682673e-05, "loss": 0.4757, "step": 15424 }, { "epoch": 19.744, "grad_norm": 1.0331294536590576, "learning_rate": 1.9165666266506603e-05, "loss": 0.5106, "step": 15425 }, { "epoch": 19.74528, "grad_norm": 1.0943084955215454, "learning_rate": 1.9163665466186475e-05, "loss": 0.4626, "step": 15426 }, { "epoch": 19.74656, "grad_norm": 1.099189281463623, "learning_rate": 1.9161664665866347e-05, "loss": 0.4718, "step": 15427 }, { "epoch": 19.74784, "grad_norm": 1.0979981422424316, "learning_rate": 1.9159663865546222e-05, "loss": 0.4987, "step": 15428 }, { "epoch": 19.74912, "grad_norm": 1.0483365058898926, "learning_rate": 1.915766306522609e-05, "loss": 0.4775, "step": 15429 }, { "epoch": 19.7504, "grad_norm": 1.0271607637405396, "learning_rate": 1.9155662264905962e-05, "loss": 0.4741, "step": 15430 }, { "epoch": 19.75168, "grad_norm": 1.0611716508865356, "learning_rate": 1.9153661464585834e-05, "loss": 0.5134, "step": 15431 }, { "epoch": 19.75296, "grad_norm": 1.0353494882583618, "learning_rate": 1.915166066426571e-05, "loss": 0.4661, "step": 15432 }, { "epoch": 19.75424, "grad_norm": 1.0064977407455444, "learning_rate": 1.9149659863945578e-05, "loss": 0.4732, "step": 15433 }, { "epoch": 19.75552, "grad_norm": 1.0152562856674194, "learning_rate": 1.914765906362545e-05, "loss": 0.4847, "step": 15434 }, { "epoch": 19.7568, "grad_norm": 1.1019326448440552, "learning_rate": 1.9145658263305325e-05, "loss": 0.5283, "step": 15435 }, { "epoch": 19.75808, "grad_norm": 1.0624425411224365, "learning_rate": 1.9143657462985197e-05, "loss": 0.4991, "step": 15436 }, { "epoch": 19.75936, "grad_norm": 1.075663685798645, "learning_rate": 1.9141656662665065e-05, "loss": 0.5066, "step": 15437 }, { "epoch": 19.76064, "grad_norm": 1.1295933723449707, "learning_rate": 1.9139655862344937e-05, "loss": 0.575, "step": 15438 }, { "epoch": 19.76192, "grad_norm": 1.036911129951477, "learning_rate": 1.9137655062024813e-05, "loss": 0.4768, "step": 15439 }, { "epoch": 19.7632, "grad_norm": 1.107503056526184, "learning_rate": 1.9135654261704684e-05, "loss": 0.5262, "step": 15440 }, { "epoch": 19.76448, "grad_norm": 1.0643945932388306, "learning_rate": 1.9133653461384553e-05, "loss": 0.4138, "step": 15441 }, { "epoch": 19.76576, "grad_norm": 1.0519750118255615, "learning_rate": 1.9131652661064425e-05, "loss": 0.4887, "step": 15442 }, { "epoch": 19.76704, "grad_norm": 1.0428272485733032, "learning_rate": 1.91296518607443e-05, "loss": 0.4986, "step": 15443 }, { "epoch": 19.76832, "grad_norm": 1.0842911005020142, "learning_rate": 1.9127651060424172e-05, "loss": 0.5026, "step": 15444 }, { "epoch": 19.7696, "grad_norm": 1.1030406951904297, "learning_rate": 1.912565026010404e-05, "loss": 0.4867, "step": 15445 }, { "epoch": 19.77088, "grad_norm": 1.0993808507919312, "learning_rate": 1.9123649459783916e-05, "loss": 0.4984, "step": 15446 }, { "epoch": 19.77216, "grad_norm": 1.1211193799972534, "learning_rate": 1.9121648659463787e-05, "loss": 0.503, "step": 15447 }, { "epoch": 19.77344, "grad_norm": 1.1349544525146484, "learning_rate": 1.911964785914366e-05, "loss": 0.5267, "step": 15448 }, { "epoch": 19.77472, "grad_norm": 1.1778333187103271, "learning_rate": 1.9117647058823528e-05, "loss": 0.5236, "step": 15449 }, { "epoch": 19.776, "grad_norm": 1.1410446166992188, "learning_rate": 1.9115646258503403e-05, "loss": 0.4975, "step": 15450 }, { "epoch": 19.77728, "grad_norm": 1.0365501642227173, "learning_rate": 1.9113645458183275e-05, "loss": 0.4599, "step": 15451 }, { "epoch": 19.77856, "grad_norm": 1.1023868322372437, "learning_rate": 1.9111644657863147e-05, "loss": 0.5273, "step": 15452 }, { "epoch": 19.77984, "grad_norm": 1.103507399559021, "learning_rate": 1.910964385754302e-05, "loss": 0.4749, "step": 15453 }, { "epoch": 19.78112, "grad_norm": 1.0735512971878052, "learning_rate": 1.910764305722289e-05, "loss": 0.4875, "step": 15454 }, { "epoch": 19.7824, "grad_norm": 1.0677052736282349, "learning_rate": 1.9105642256902762e-05, "loss": 0.5173, "step": 15455 }, { "epoch": 19.78368, "grad_norm": 1.0987626314163208, "learning_rate": 1.9103641456582634e-05, "loss": 0.5271, "step": 15456 }, { "epoch": 19.78496, "grad_norm": 1.0896143913269043, "learning_rate": 1.9101640656262506e-05, "loss": 0.533, "step": 15457 }, { "epoch": 19.78624, "grad_norm": 1.1035916805267334, "learning_rate": 1.9099639855942378e-05, "loss": 0.5359, "step": 15458 }, { "epoch": 19.78752, "grad_norm": 1.1171396970748901, "learning_rate": 1.909763905562225e-05, "loss": 0.4939, "step": 15459 }, { "epoch": 19.7888, "grad_norm": 1.0793712139129639, "learning_rate": 1.909563825530212e-05, "loss": 0.5262, "step": 15460 }, { "epoch": 19.79008, "grad_norm": 1.0609002113342285, "learning_rate": 1.9093637454981993e-05, "loss": 0.4997, "step": 15461 }, { "epoch": 19.79136, "grad_norm": 1.010659098625183, "learning_rate": 1.9091636654661865e-05, "loss": 0.4642, "step": 15462 }, { "epoch": 19.79264, "grad_norm": 1.043384075164795, "learning_rate": 1.9089635854341737e-05, "loss": 0.5313, "step": 15463 }, { "epoch": 19.79392, "grad_norm": 1.0543464422225952, "learning_rate": 1.908763505402161e-05, "loss": 0.4636, "step": 15464 }, { "epoch": 19.7952, "grad_norm": 1.094262957572937, "learning_rate": 1.908563425370148e-05, "loss": 0.5249, "step": 15465 }, { "epoch": 19.79648, "grad_norm": 1.1226550340652466, "learning_rate": 1.9083633453381353e-05, "loss": 0.55, "step": 15466 }, { "epoch": 19.79776, "grad_norm": 1.1305582523345947, "learning_rate": 1.9081632653061228e-05, "loss": 0.5418, "step": 15467 }, { "epoch": 19.79904, "grad_norm": 1.1155486106872559, "learning_rate": 1.9079631852741096e-05, "loss": 0.5226, "step": 15468 }, { "epoch": 19.80032, "grad_norm": 1.1939311027526855, "learning_rate": 1.9077631052420968e-05, "loss": 0.5122, "step": 15469 }, { "epoch": 19.8016, "grad_norm": 1.0629066228866577, "learning_rate": 1.907563025210084e-05, "loss": 0.4641, "step": 15470 }, { "epoch": 19.802880000000002, "grad_norm": 1.05988609790802, "learning_rate": 1.9073629451780715e-05, "loss": 0.5368, "step": 15471 }, { "epoch": 19.80416, "grad_norm": 1.1533082723617554, "learning_rate": 1.9071628651460584e-05, "loss": 0.578, "step": 15472 }, { "epoch": 19.80544, "grad_norm": 1.1095813512802124, "learning_rate": 1.9069627851140456e-05, "loss": 0.4966, "step": 15473 }, { "epoch": 19.80672, "grad_norm": 1.067834734916687, "learning_rate": 1.906762705082033e-05, "loss": 0.5486, "step": 15474 }, { "epoch": 19.808, "grad_norm": 1.0650924444198608, "learning_rate": 1.9065626250500203e-05, "loss": 0.5034, "step": 15475 }, { "epoch": 19.80928, "grad_norm": 1.0607784986495972, "learning_rate": 1.906362545018007e-05, "loss": 0.5092, "step": 15476 }, { "epoch": 19.81056, "grad_norm": 1.0503934621810913, "learning_rate": 1.9061624649859943e-05, "loss": 0.4871, "step": 15477 }, { "epoch": 19.81184, "grad_norm": 1.1574764251708984, "learning_rate": 1.905962384953982e-05, "loss": 0.5786, "step": 15478 }, { "epoch": 19.81312, "grad_norm": 1.142246127128601, "learning_rate": 1.905762304921969e-05, "loss": 0.4866, "step": 15479 }, { "epoch": 19.8144, "grad_norm": 1.0628846883773804, "learning_rate": 1.905562224889956e-05, "loss": 0.4879, "step": 15480 }, { "epoch": 19.81568, "grad_norm": 1.1084834337234497, "learning_rate": 1.9053621448579434e-05, "loss": 0.5393, "step": 15481 }, { "epoch": 19.81696, "grad_norm": 1.2051427364349365, "learning_rate": 1.9051620648259306e-05, "loss": 0.5179, "step": 15482 }, { "epoch": 19.81824, "grad_norm": 1.0791866779327393, "learning_rate": 1.9049619847939178e-05, "loss": 0.463, "step": 15483 }, { "epoch": 19.81952, "grad_norm": 1.0924525260925293, "learning_rate": 1.9047619047619046e-05, "loss": 0.4755, "step": 15484 }, { "epoch": 19.8208, "grad_norm": 1.0845119953155518, "learning_rate": 1.904561824729892e-05, "loss": 0.5183, "step": 15485 }, { "epoch": 19.82208, "grad_norm": 1.0582520961761475, "learning_rate": 1.9043617446978793e-05, "loss": 0.4608, "step": 15486 }, { "epoch": 19.82336, "grad_norm": 1.0958303213119507, "learning_rate": 1.9041616646658665e-05, "loss": 0.4605, "step": 15487 }, { "epoch": 19.82464, "grad_norm": 1.0015331506729126, "learning_rate": 1.9039615846338537e-05, "loss": 0.4579, "step": 15488 }, { "epoch": 19.82592, "grad_norm": 1.1324008703231812, "learning_rate": 1.903761504601841e-05, "loss": 0.5423, "step": 15489 }, { "epoch": 19.8272, "grad_norm": 1.0755281448364258, "learning_rate": 1.903561424569828e-05, "loss": 0.4549, "step": 15490 }, { "epoch": 19.82848, "grad_norm": 1.1091095209121704, "learning_rate": 1.9033613445378152e-05, "loss": 0.5028, "step": 15491 }, { "epoch": 19.82976, "grad_norm": 1.0952885150909424, "learning_rate": 1.9031612645058024e-05, "loss": 0.479, "step": 15492 }, { "epoch": 19.83104, "grad_norm": 1.0651510953903198, "learning_rate": 1.9029611844737896e-05, "loss": 0.5052, "step": 15493 }, { "epoch": 19.83232, "grad_norm": 1.1032085418701172, "learning_rate": 1.9027611044417768e-05, "loss": 0.5447, "step": 15494 }, { "epoch": 19.8336, "grad_norm": 1.0811024904251099, "learning_rate": 1.902561024409764e-05, "loss": 0.4788, "step": 15495 }, { "epoch": 19.83488, "grad_norm": 1.0974313020706177, "learning_rate": 1.9023609443777512e-05, "loss": 0.5162, "step": 15496 }, { "epoch": 19.83616, "grad_norm": 1.0592910051345825, "learning_rate": 1.9021608643457384e-05, "loss": 0.4786, "step": 15497 }, { "epoch": 19.83744, "grad_norm": 1.1197059154510498, "learning_rate": 1.9019607843137255e-05, "loss": 0.5087, "step": 15498 }, { "epoch": 19.83872, "grad_norm": 1.0962718725204468, "learning_rate": 1.9017607042817127e-05, "loss": 0.5393, "step": 15499 }, { "epoch": 19.84, "grad_norm": 1.1129200458526611, "learning_rate": 1.9015606242497e-05, "loss": 0.5287, "step": 15500 }, { "epoch": 19.84128, "grad_norm": 1.049560785293579, "learning_rate": 1.901360544217687e-05, "loss": 0.4888, "step": 15501 }, { "epoch": 19.84256, "grad_norm": 1.0872515439987183, "learning_rate": 1.9011604641856743e-05, "loss": 0.4652, "step": 15502 }, { "epoch": 19.84384, "grad_norm": 1.112954020500183, "learning_rate": 1.9009603841536615e-05, "loss": 0.5514, "step": 15503 }, { "epoch": 19.84512, "grad_norm": 1.0758328437805176, "learning_rate": 1.9007603041216487e-05, "loss": 0.4836, "step": 15504 }, { "epoch": 19.8464, "grad_norm": 1.0734128952026367, "learning_rate": 1.900560224089636e-05, "loss": 0.5046, "step": 15505 }, { "epoch": 19.84768, "grad_norm": 1.1305335760116577, "learning_rate": 1.9003601440576234e-05, "loss": 0.5106, "step": 15506 }, { "epoch": 19.84896, "grad_norm": 1.1036813259124756, "learning_rate": 1.9001600640256102e-05, "loss": 0.5227, "step": 15507 }, { "epoch": 19.85024, "grad_norm": 1.1305910348892212, "learning_rate": 1.8999599839935974e-05, "loss": 0.5204, "step": 15508 }, { "epoch": 19.85152, "grad_norm": 1.0744602680206299, "learning_rate": 1.8997599039615846e-05, "loss": 0.4987, "step": 15509 }, { "epoch": 19.8528, "grad_norm": 1.0705610513687134, "learning_rate": 1.899559823929572e-05, "loss": 0.5388, "step": 15510 }, { "epoch": 19.85408, "grad_norm": 1.0107085704803467, "learning_rate": 1.899359743897559e-05, "loss": 0.4689, "step": 15511 }, { "epoch": 19.85536, "grad_norm": 1.0287785530090332, "learning_rate": 1.899159663865546e-05, "loss": 0.4932, "step": 15512 }, { "epoch": 19.85664, "grad_norm": 1.0263909101486206, "learning_rate": 1.8989595838335337e-05, "loss": 0.4614, "step": 15513 }, { "epoch": 19.85792, "grad_norm": 1.0720847845077515, "learning_rate": 1.898759503801521e-05, "loss": 0.5024, "step": 15514 }, { "epoch": 19.8592, "grad_norm": 1.1379327774047852, "learning_rate": 1.8985594237695077e-05, "loss": 0.4958, "step": 15515 }, { "epoch": 19.86048, "grad_norm": 1.0998201370239258, "learning_rate": 1.898359343737495e-05, "loss": 0.5257, "step": 15516 }, { "epoch": 19.86176, "grad_norm": 1.068842887878418, "learning_rate": 1.8981592637054824e-05, "loss": 0.4743, "step": 15517 }, { "epoch": 19.86304, "grad_norm": 1.0820146799087524, "learning_rate": 1.8979591836734696e-05, "loss": 0.5448, "step": 15518 }, { "epoch": 19.86432, "grad_norm": 1.0579636096954346, "learning_rate": 1.8977591036414564e-05, "loss": 0.5176, "step": 15519 }, { "epoch": 19.8656, "grad_norm": 1.0649138689041138, "learning_rate": 1.897559023609444e-05, "loss": 0.4429, "step": 15520 }, { "epoch": 19.86688, "grad_norm": 1.0768613815307617, "learning_rate": 1.897358943577431e-05, "loss": 0.4847, "step": 15521 }, { "epoch": 19.86816, "grad_norm": 1.0212260484695435, "learning_rate": 1.8971588635454183e-05, "loss": 0.4501, "step": 15522 }, { "epoch": 19.86944, "grad_norm": 1.0247191190719604, "learning_rate": 1.8969587835134052e-05, "loss": 0.4715, "step": 15523 }, { "epoch": 19.87072, "grad_norm": 1.0731992721557617, "learning_rate": 1.8967587034813927e-05, "loss": 0.4961, "step": 15524 }, { "epoch": 19.872, "grad_norm": 1.0773158073425293, "learning_rate": 1.89655862344938e-05, "loss": 0.4927, "step": 15525 }, { "epoch": 19.87328, "grad_norm": 1.0747638940811157, "learning_rate": 1.896358543417367e-05, "loss": 0.5224, "step": 15526 }, { "epoch": 19.87456, "grad_norm": 1.0612146854400635, "learning_rate": 1.8961584633853543e-05, "loss": 0.5373, "step": 15527 }, { "epoch": 19.87584, "grad_norm": 1.1125435829162598, "learning_rate": 1.8959583833533415e-05, "loss": 0.5768, "step": 15528 }, { "epoch": 19.87712, "grad_norm": 1.0910027027130127, "learning_rate": 1.8957583033213286e-05, "loss": 0.5322, "step": 15529 }, { "epoch": 19.8784, "grad_norm": 1.1110457181930542, "learning_rate": 1.8955582232893158e-05, "loss": 0.5189, "step": 15530 }, { "epoch": 19.87968, "grad_norm": 1.0447136163711548, "learning_rate": 1.895358143257303e-05, "loss": 0.4795, "step": 15531 }, { "epoch": 19.88096, "grad_norm": 1.0677382946014404, "learning_rate": 1.8951580632252902e-05, "loss": 0.4929, "step": 15532 }, { "epoch": 19.88224, "grad_norm": 1.038959264755249, "learning_rate": 1.8949579831932774e-05, "loss": 0.4779, "step": 15533 }, { "epoch": 19.88352, "grad_norm": 1.0871193408966064, "learning_rate": 1.8947579031612646e-05, "loss": 0.48, "step": 15534 }, { "epoch": 19.8848, "grad_norm": 1.1005915403366089, "learning_rate": 1.8945578231292518e-05, "loss": 0.5181, "step": 15535 }, { "epoch": 19.88608, "grad_norm": 1.0428812503814697, "learning_rate": 1.894357743097239e-05, "loss": 0.4534, "step": 15536 }, { "epoch": 19.88736, "grad_norm": 1.0860538482666016, "learning_rate": 1.894157663065226e-05, "loss": 0.5012, "step": 15537 }, { "epoch": 19.88864, "grad_norm": 1.0798299312591553, "learning_rate": 1.8939575830332133e-05, "loss": 0.5091, "step": 15538 }, { "epoch": 19.88992, "grad_norm": 1.0804452896118164, "learning_rate": 1.8937575030012005e-05, "loss": 0.4978, "step": 15539 }, { "epoch": 19.8912, "grad_norm": 1.069659948348999, "learning_rate": 1.8935574229691877e-05, "loss": 0.4902, "step": 15540 }, { "epoch": 19.89248, "grad_norm": 1.1372871398925781, "learning_rate": 1.8933573429371752e-05, "loss": 0.5438, "step": 15541 }, { "epoch": 19.89376, "grad_norm": 1.079403281211853, "learning_rate": 1.893157262905162e-05, "loss": 0.4882, "step": 15542 }, { "epoch": 19.89504, "grad_norm": 1.0226234197616577, "learning_rate": 1.8929571828731492e-05, "loss": 0.4323, "step": 15543 }, { "epoch": 19.89632, "grad_norm": 1.0797722339630127, "learning_rate": 1.8927571028411364e-05, "loss": 0.507, "step": 15544 }, { "epoch": 19.8976, "grad_norm": 1.0766708850860596, "learning_rate": 1.892557022809124e-05, "loss": 0.5058, "step": 15545 }, { "epoch": 19.89888, "grad_norm": 1.0884517431259155, "learning_rate": 1.8923569427771108e-05, "loss": 0.5136, "step": 15546 }, { "epoch": 19.90016, "grad_norm": 1.1171706914901733, "learning_rate": 1.892156862745098e-05, "loss": 0.5082, "step": 15547 }, { "epoch": 19.90144, "grad_norm": 1.065116286277771, "learning_rate": 1.8919567827130855e-05, "loss": 0.5113, "step": 15548 }, { "epoch": 19.90272, "grad_norm": 1.0674129724502563, "learning_rate": 1.8917567026810727e-05, "loss": 0.4844, "step": 15549 }, { "epoch": 19.904, "grad_norm": 1.123174786567688, "learning_rate": 1.8915566226490595e-05, "loss": 0.527, "step": 15550 }, { "epoch": 19.90528, "grad_norm": 1.113181710243225, "learning_rate": 1.8913565426170467e-05, "loss": 0.5275, "step": 15551 }, { "epoch": 19.90656, "grad_norm": 1.080833077430725, "learning_rate": 1.8911564625850343e-05, "loss": 0.5218, "step": 15552 }, { "epoch": 19.90784, "grad_norm": 1.1273305416107178, "learning_rate": 1.8909563825530214e-05, "loss": 0.5157, "step": 15553 }, { "epoch": 19.90912, "grad_norm": 1.1139534711837769, "learning_rate": 1.8907563025210083e-05, "loss": 0.5408, "step": 15554 }, { "epoch": 19.9104, "grad_norm": 1.0617810487747192, "learning_rate": 1.8905562224889955e-05, "loss": 0.4972, "step": 15555 }, { "epoch": 19.91168, "grad_norm": 1.0944743156433105, "learning_rate": 1.890356142456983e-05, "loss": 0.5016, "step": 15556 }, { "epoch": 19.912959999999998, "grad_norm": 1.0922056436538696, "learning_rate": 1.8901560624249702e-05, "loss": 0.5228, "step": 15557 }, { "epoch": 19.91424, "grad_norm": 1.060390830039978, "learning_rate": 1.889955982392957e-05, "loss": 0.5006, "step": 15558 }, { "epoch": 19.91552, "grad_norm": 1.0017435550689697, "learning_rate": 1.8897559023609446e-05, "loss": 0.4723, "step": 15559 }, { "epoch": 19.9168, "grad_norm": 1.0966525077819824, "learning_rate": 1.8895558223289317e-05, "loss": 0.5312, "step": 15560 }, { "epoch": 19.91808, "grad_norm": 1.037953495979309, "learning_rate": 1.889355742296919e-05, "loss": 0.5364, "step": 15561 }, { "epoch": 19.91936, "grad_norm": 1.0990053415298462, "learning_rate": 1.8891556622649058e-05, "loss": 0.515, "step": 15562 }, { "epoch": 19.92064, "grad_norm": 1.1318585872650146, "learning_rate": 1.8889555822328933e-05, "loss": 0.5202, "step": 15563 }, { "epoch": 19.92192, "grad_norm": 1.0594831705093384, "learning_rate": 1.8887555022008805e-05, "loss": 0.5013, "step": 15564 }, { "epoch": 19.9232, "grad_norm": 1.0701175928115845, "learning_rate": 1.8885554221688677e-05, "loss": 0.4641, "step": 15565 }, { "epoch": 19.92448, "grad_norm": 1.095017433166504, "learning_rate": 1.888355342136855e-05, "loss": 0.5022, "step": 15566 }, { "epoch": 19.92576, "grad_norm": 1.1008120775222778, "learning_rate": 1.888155262104842e-05, "loss": 0.5144, "step": 15567 }, { "epoch": 19.92704, "grad_norm": 1.0026847124099731, "learning_rate": 1.8879551820728292e-05, "loss": 0.4596, "step": 15568 }, { "epoch": 19.92832, "grad_norm": 1.0756937265396118, "learning_rate": 1.8877551020408164e-05, "loss": 0.4879, "step": 15569 }, { "epoch": 19.9296, "grad_norm": 1.0815765857696533, "learning_rate": 1.8875550220088036e-05, "loss": 0.533, "step": 15570 }, { "epoch": 19.93088, "grad_norm": 1.0457710027694702, "learning_rate": 1.8873549419767908e-05, "loss": 0.5204, "step": 15571 }, { "epoch": 19.93216, "grad_norm": 1.008851408958435, "learning_rate": 1.887154861944778e-05, "loss": 0.5, "step": 15572 }, { "epoch": 19.93344, "grad_norm": 1.0376039743423462, "learning_rate": 1.886954781912765e-05, "loss": 0.5194, "step": 15573 }, { "epoch": 19.93472, "grad_norm": 1.0573559999465942, "learning_rate": 1.8867547018807523e-05, "loss": 0.4944, "step": 15574 }, { "epoch": 19.936, "grad_norm": 1.035343885421753, "learning_rate": 1.8865546218487395e-05, "loss": 0.4731, "step": 15575 }, { "epoch": 19.93728, "grad_norm": 1.033947229385376, "learning_rate": 1.8863545418167267e-05, "loss": 0.4792, "step": 15576 }, { "epoch": 19.93856, "grad_norm": 1.0770082473754883, "learning_rate": 1.886154461784714e-05, "loss": 0.5264, "step": 15577 }, { "epoch": 19.93984, "grad_norm": 1.0262564420700073, "learning_rate": 1.885954381752701e-05, "loss": 0.5304, "step": 15578 }, { "epoch": 19.94112, "grad_norm": 1.0958056449890137, "learning_rate": 1.8857543017206883e-05, "loss": 0.5294, "step": 15579 }, { "epoch": 19.9424, "grad_norm": 1.1022257804870605, "learning_rate": 1.8855542216886758e-05, "loss": 0.4546, "step": 15580 }, { "epoch": 19.94368, "grad_norm": 1.0951906442642212, "learning_rate": 1.8853541416566626e-05, "loss": 0.5054, "step": 15581 }, { "epoch": 19.944960000000002, "grad_norm": 1.0561892986297607, "learning_rate": 1.8851540616246498e-05, "loss": 0.4944, "step": 15582 }, { "epoch": 19.94624, "grad_norm": 1.084155559539795, "learning_rate": 1.884953981592637e-05, "loss": 0.5098, "step": 15583 }, { "epoch": 19.94752, "grad_norm": 1.0809601545333862, "learning_rate": 1.8847539015606245e-05, "loss": 0.5657, "step": 15584 }, { "epoch": 19.9488, "grad_norm": 1.118355393409729, "learning_rate": 1.8845538215286114e-05, "loss": 0.5551, "step": 15585 }, { "epoch": 19.95008, "grad_norm": 1.0390595197677612, "learning_rate": 1.8843537414965986e-05, "loss": 0.476, "step": 15586 }, { "epoch": 19.95136, "grad_norm": 1.0548921823501587, "learning_rate": 1.884153661464586e-05, "loss": 0.5211, "step": 15587 }, { "epoch": 19.95264, "grad_norm": 1.0728892087936401, "learning_rate": 1.8839535814325733e-05, "loss": 0.4795, "step": 15588 }, { "epoch": 19.95392, "grad_norm": 1.0571420192718506, "learning_rate": 1.88375350140056e-05, "loss": 0.5225, "step": 15589 }, { "epoch": 19.9552, "grad_norm": 1.0640194416046143, "learning_rate": 1.8835534213685473e-05, "loss": 0.4759, "step": 15590 }, { "epoch": 19.95648, "grad_norm": 1.0471768379211426, "learning_rate": 1.883353341336535e-05, "loss": 0.4839, "step": 15591 }, { "epoch": 19.95776, "grad_norm": 1.0839452743530273, "learning_rate": 1.883153261304522e-05, "loss": 0.522, "step": 15592 }, { "epoch": 19.95904, "grad_norm": 1.0934643745422363, "learning_rate": 1.882953181272509e-05, "loss": 0.5176, "step": 15593 }, { "epoch": 19.96032, "grad_norm": 1.0967458486557007, "learning_rate": 1.8827531012404964e-05, "loss": 0.5198, "step": 15594 }, { "epoch": 19.9616, "grad_norm": 1.0761650800704956, "learning_rate": 1.8825530212084836e-05, "loss": 0.5091, "step": 15595 }, { "epoch": 19.96288, "grad_norm": 1.0557310581207275, "learning_rate": 1.8823529411764708e-05, "loss": 0.496, "step": 15596 }, { "epoch": 19.96416, "grad_norm": 1.0711382627487183, "learning_rate": 1.8821528611444576e-05, "loss": 0.4703, "step": 15597 }, { "epoch": 19.96544, "grad_norm": 1.1365162134170532, "learning_rate": 1.881952781112445e-05, "loss": 0.4936, "step": 15598 }, { "epoch": 19.96672, "grad_norm": 1.141715168952942, "learning_rate": 1.8817527010804323e-05, "loss": 0.4871, "step": 15599 }, { "epoch": 19.968, "grad_norm": 1.146036148071289, "learning_rate": 1.8815526210484195e-05, "loss": 0.5403, "step": 15600 }, { "epoch": 19.96928, "grad_norm": 1.123439073562622, "learning_rate": 1.8813525410164067e-05, "loss": 0.5526, "step": 15601 }, { "epoch": 19.97056, "grad_norm": 1.0493305921554565, "learning_rate": 1.881152460984394e-05, "loss": 0.4881, "step": 15602 }, { "epoch": 19.97184, "grad_norm": 1.0788578987121582, "learning_rate": 1.880952380952381e-05, "loss": 0.481, "step": 15603 }, { "epoch": 19.97312, "grad_norm": 1.089673399925232, "learning_rate": 1.8807523009203682e-05, "loss": 0.5252, "step": 15604 }, { "epoch": 19.9744, "grad_norm": 1.0892081260681152, "learning_rate": 1.8805522208883554e-05, "loss": 0.4939, "step": 15605 }, { "epoch": 19.97568, "grad_norm": 1.0614848136901855, "learning_rate": 1.8803521408563426e-05, "loss": 0.5036, "step": 15606 }, { "epoch": 19.97696, "grad_norm": 1.028151512145996, "learning_rate": 1.8801520608243298e-05, "loss": 0.4862, "step": 15607 }, { "epoch": 19.97824, "grad_norm": 1.0651766061782837, "learning_rate": 1.879951980792317e-05, "loss": 0.4842, "step": 15608 }, { "epoch": 19.97952, "grad_norm": 1.0863500833511353, "learning_rate": 1.8797519007603042e-05, "loss": 0.4905, "step": 15609 }, { "epoch": 19.9808, "grad_norm": 1.0720291137695312, "learning_rate": 1.8795518207282914e-05, "loss": 0.4991, "step": 15610 }, { "epoch": 19.98208, "grad_norm": 1.0834654569625854, "learning_rate": 1.8793517406962785e-05, "loss": 0.5061, "step": 15611 }, { "epoch": 19.98336, "grad_norm": 1.0607835054397583, "learning_rate": 1.8791516606642657e-05, "loss": 0.4861, "step": 15612 }, { "epoch": 19.98464, "grad_norm": 1.1219713687896729, "learning_rate": 1.878951580632253e-05, "loss": 0.5357, "step": 15613 }, { "epoch": 19.98592, "grad_norm": 1.078185796737671, "learning_rate": 1.87875150060024e-05, "loss": 0.5251, "step": 15614 }, { "epoch": 19.9872, "grad_norm": 1.1310948133468628, "learning_rate": 1.8785514205682273e-05, "loss": 0.5558, "step": 15615 }, { "epoch": 19.98848, "grad_norm": 1.0436378717422485, "learning_rate": 1.8783513405362145e-05, "loss": 0.4855, "step": 15616 }, { "epoch": 19.98976, "grad_norm": 1.074711799621582, "learning_rate": 1.8781512605042017e-05, "loss": 0.4971, "step": 15617 }, { "epoch": 19.99104, "grad_norm": 1.123464584350586, "learning_rate": 1.877951180472189e-05, "loss": 0.5288, "step": 15618 }, { "epoch": 19.99232, "grad_norm": 1.015176773071289, "learning_rate": 1.8777511004401764e-05, "loss": 0.5137, "step": 15619 }, { "epoch": 19.9936, "grad_norm": 1.0888524055480957, "learning_rate": 1.8775510204081632e-05, "loss": 0.4858, "step": 15620 }, { "epoch": 19.99488, "grad_norm": 1.0319979190826416, "learning_rate": 1.8773509403761504e-05, "loss": 0.4687, "step": 15621 }, { "epoch": 19.99616, "grad_norm": 1.116672396659851, "learning_rate": 1.8771508603441376e-05, "loss": 0.5169, "step": 15622 }, { "epoch": 19.99744, "grad_norm": 1.0477895736694336, "learning_rate": 1.876950780312125e-05, "loss": 0.5392, "step": 15623 }, { "epoch": 19.99872, "grad_norm": 1.069499135017395, "learning_rate": 1.876750700280112e-05, "loss": 0.4781, "step": 15624 }, { "epoch": 20.0, "grad_norm": 2.609760046005249, "learning_rate": 1.876550620248099e-05, "loss": 1.014, "step": 15625 }, { "epoch": 20.00128, "grad_norm": 1.0402753353118896, "learning_rate": 1.8763505402160867e-05, "loss": 0.4348, "step": 15626 }, { "epoch": 20.00256, "grad_norm": 1.0104725360870361, "learning_rate": 1.876150460184074e-05, "loss": 0.4641, "step": 15627 }, { "epoch": 20.00384, "grad_norm": 1.1191972494125366, "learning_rate": 1.8759503801520607e-05, "loss": 0.5, "step": 15628 }, { "epoch": 20.00512, "grad_norm": 1.0969229936599731, "learning_rate": 1.875750300120048e-05, "loss": 0.5265, "step": 15629 }, { "epoch": 20.0064, "grad_norm": 1.0212663412094116, "learning_rate": 1.8755502200880354e-05, "loss": 0.4381, "step": 15630 }, { "epoch": 20.00768, "grad_norm": 1.1064248085021973, "learning_rate": 1.8753501400560226e-05, "loss": 0.5045, "step": 15631 }, { "epoch": 20.00896, "grad_norm": 1.134947419166565, "learning_rate": 1.8751500600240094e-05, "loss": 0.5008, "step": 15632 }, { "epoch": 20.01024, "grad_norm": 1.0822664499282837, "learning_rate": 1.874949979991997e-05, "loss": 0.4941, "step": 15633 }, { "epoch": 20.01152, "grad_norm": 1.0499417781829834, "learning_rate": 1.874749899959984e-05, "loss": 0.4508, "step": 15634 }, { "epoch": 20.0128, "grad_norm": 1.050943374633789, "learning_rate": 1.8745498199279713e-05, "loss": 0.5099, "step": 15635 }, { "epoch": 20.01408, "grad_norm": 1.016408085823059, "learning_rate": 1.8743497398959582e-05, "loss": 0.4515, "step": 15636 }, { "epoch": 20.01536, "grad_norm": 1.0423884391784668, "learning_rate": 1.8741496598639457e-05, "loss": 0.5248, "step": 15637 }, { "epoch": 20.01664, "grad_norm": 0.9838659763336182, "learning_rate": 1.873949579831933e-05, "loss": 0.4533, "step": 15638 }, { "epoch": 20.01792, "grad_norm": 1.0517929792404175, "learning_rate": 1.87374949979992e-05, "loss": 0.4678, "step": 15639 }, { "epoch": 20.0192, "grad_norm": 1.0576074123382568, "learning_rate": 1.8735494197679073e-05, "loss": 0.471, "step": 15640 }, { "epoch": 20.02048, "grad_norm": 1.119922161102295, "learning_rate": 1.8733493397358945e-05, "loss": 0.5088, "step": 15641 }, { "epoch": 20.02176, "grad_norm": 1.0552582740783691, "learning_rate": 1.8731492597038816e-05, "loss": 0.477, "step": 15642 }, { "epoch": 20.02304, "grad_norm": 1.0699156522750854, "learning_rate": 1.8729491796718688e-05, "loss": 0.5483, "step": 15643 }, { "epoch": 20.02432, "grad_norm": 1.020215630531311, "learning_rate": 1.872749099639856e-05, "loss": 0.4916, "step": 15644 }, { "epoch": 20.0256, "grad_norm": 1.036571741104126, "learning_rate": 1.8725490196078432e-05, "loss": 0.4523, "step": 15645 }, { "epoch": 20.02688, "grad_norm": 1.095461130142212, "learning_rate": 1.8723489395758304e-05, "loss": 0.4832, "step": 15646 }, { "epoch": 20.02816, "grad_norm": 1.053385853767395, "learning_rate": 1.872148859543818e-05, "loss": 0.4627, "step": 15647 }, { "epoch": 20.02944, "grad_norm": 1.0644296407699585, "learning_rate": 1.8719487795118048e-05, "loss": 0.4749, "step": 15648 }, { "epoch": 20.03072, "grad_norm": 1.083106517791748, "learning_rate": 1.871748699479792e-05, "loss": 0.4926, "step": 15649 }, { "epoch": 20.032, "grad_norm": 1.06119704246521, "learning_rate": 1.871548619447779e-05, "loss": 0.4924, "step": 15650 }, { "epoch": 20.03328, "grad_norm": 1.0661925077438354, "learning_rate": 1.8713485394157667e-05, "loss": 0.4581, "step": 15651 }, { "epoch": 20.03456, "grad_norm": 1.1228007078170776, "learning_rate": 1.8711484593837535e-05, "loss": 0.5334, "step": 15652 }, { "epoch": 20.03584, "grad_norm": 1.0884519815444946, "learning_rate": 1.8709483793517407e-05, "loss": 0.4621, "step": 15653 }, { "epoch": 20.03712, "grad_norm": 1.0557575225830078, "learning_rate": 1.8707482993197282e-05, "loss": 0.4675, "step": 15654 }, { "epoch": 20.0384, "grad_norm": 1.0507951974868774, "learning_rate": 1.8705482192877154e-05, "loss": 0.4968, "step": 15655 }, { "epoch": 20.03968, "grad_norm": 1.0624488592147827, "learning_rate": 1.8703481392557022e-05, "loss": 0.4994, "step": 15656 }, { "epoch": 20.04096, "grad_norm": 1.0612828731536865, "learning_rate": 1.8701480592236894e-05, "loss": 0.4935, "step": 15657 }, { "epoch": 20.04224, "grad_norm": 1.0293620824813843, "learning_rate": 1.869947979191677e-05, "loss": 0.4321, "step": 15658 }, { "epoch": 20.04352, "grad_norm": 1.074039101600647, "learning_rate": 1.869747899159664e-05, "loss": 0.474, "step": 15659 }, { "epoch": 20.0448, "grad_norm": 1.0148733854293823, "learning_rate": 1.869547819127651e-05, "loss": 0.478, "step": 15660 }, { "epoch": 20.04608, "grad_norm": 1.1181195974349976, "learning_rate": 1.8693477390956382e-05, "loss": 0.5195, "step": 15661 }, { "epoch": 20.04736, "grad_norm": 1.0959631204605103, "learning_rate": 1.8691476590636257e-05, "loss": 0.5102, "step": 15662 }, { "epoch": 20.04864, "grad_norm": 1.070541262626648, "learning_rate": 1.868947579031613e-05, "loss": 0.4831, "step": 15663 }, { "epoch": 20.04992, "grad_norm": 1.032403826713562, "learning_rate": 1.8687474989995997e-05, "loss": 0.4576, "step": 15664 }, { "epoch": 20.0512, "grad_norm": 0.9710387587547302, "learning_rate": 1.8685474189675873e-05, "loss": 0.464, "step": 15665 }, { "epoch": 20.05248, "grad_norm": 1.1018273830413818, "learning_rate": 1.8683473389355744e-05, "loss": 0.4972, "step": 15666 }, { "epoch": 20.05376, "grad_norm": 0.9961934089660645, "learning_rate": 1.8681472589035616e-05, "loss": 0.4921, "step": 15667 }, { "epoch": 20.05504, "grad_norm": 1.1243575811386108, "learning_rate": 1.8679471788715485e-05, "loss": 0.4818, "step": 15668 }, { "epoch": 20.05632, "grad_norm": 1.05106520652771, "learning_rate": 1.867747098839536e-05, "loss": 0.4697, "step": 15669 }, { "epoch": 20.0576, "grad_norm": 1.0926669836044312, "learning_rate": 1.8675470188075232e-05, "loss": 0.5248, "step": 15670 }, { "epoch": 20.05888, "grad_norm": 1.1060155630111694, "learning_rate": 1.8673469387755104e-05, "loss": 0.4986, "step": 15671 }, { "epoch": 20.06016, "grad_norm": 1.0984371900558472, "learning_rate": 1.8671468587434976e-05, "loss": 0.5017, "step": 15672 }, { "epoch": 20.06144, "grad_norm": 1.0190678834915161, "learning_rate": 1.8669467787114847e-05, "loss": 0.4497, "step": 15673 }, { "epoch": 20.06272, "grad_norm": 1.1226532459259033, "learning_rate": 1.866746698679472e-05, "loss": 0.5061, "step": 15674 }, { "epoch": 20.064, "grad_norm": 1.016611099243164, "learning_rate": 1.866546618647459e-05, "loss": 0.4425, "step": 15675 }, { "epoch": 20.06528, "grad_norm": 1.1587402820587158, "learning_rate": 1.8663465386154463e-05, "loss": 0.4797, "step": 15676 }, { "epoch": 20.06656, "grad_norm": 1.0697815418243408, "learning_rate": 1.8661464585834335e-05, "loss": 0.4623, "step": 15677 }, { "epoch": 20.06784, "grad_norm": 1.0413057804107666, "learning_rate": 1.8659463785514207e-05, "loss": 0.4997, "step": 15678 }, { "epoch": 20.06912, "grad_norm": 1.0416499376296997, "learning_rate": 1.865746298519408e-05, "loss": 0.4707, "step": 15679 }, { "epoch": 20.0704, "grad_norm": 1.034729242324829, "learning_rate": 1.865546218487395e-05, "loss": 0.4702, "step": 15680 }, { "epoch": 20.07168, "grad_norm": 1.1289206743240356, "learning_rate": 1.8653461384553822e-05, "loss": 0.4843, "step": 15681 }, { "epoch": 20.07296, "grad_norm": 1.0934054851531982, "learning_rate": 1.8651460584233694e-05, "loss": 0.4927, "step": 15682 }, { "epoch": 20.07424, "grad_norm": 1.0436177253723145, "learning_rate": 1.8649459783913566e-05, "loss": 0.49, "step": 15683 }, { "epoch": 20.07552, "grad_norm": 1.0504745244979858, "learning_rate": 1.8647458983593438e-05, "loss": 0.4885, "step": 15684 }, { "epoch": 20.0768, "grad_norm": 1.0551018714904785, "learning_rate": 1.864545818327331e-05, "loss": 0.4789, "step": 15685 }, { "epoch": 20.07808, "grad_norm": 1.0579137802124023, "learning_rate": 1.8643457382953185e-05, "loss": 0.4515, "step": 15686 }, { "epoch": 20.07936, "grad_norm": 1.1196202039718628, "learning_rate": 1.8641456582633053e-05, "loss": 0.5234, "step": 15687 }, { "epoch": 20.08064, "grad_norm": 1.0107567310333252, "learning_rate": 1.8639455782312925e-05, "loss": 0.4665, "step": 15688 }, { "epoch": 20.08192, "grad_norm": 1.0458314418792725, "learning_rate": 1.8637454981992797e-05, "loss": 0.4644, "step": 15689 }, { "epoch": 20.0832, "grad_norm": 1.0873018503189087, "learning_rate": 1.8635454181672672e-05, "loss": 0.4421, "step": 15690 }, { "epoch": 20.08448, "grad_norm": 1.0975297689437866, "learning_rate": 1.863345338135254e-05, "loss": 0.5099, "step": 15691 }, { "epoch": 20.08576, "grad_norm": 1.0815826654434204, "learning_rate": 1.8631452581032413e-05, "loss": 0.4522, "step": 15692 }, { "epoch": 20.087040000000002, "grad_norm": 1.0389448404312134, "learning_rate": 1.8629451780712288e-05, "loss": 0.4587, "step": 15693 }, { "epoch": 20.08832, "grad_norm": 1.0936323404312134, "learning_rate": 1.862745098039216e-05, "loss": 0.5339, "step": 15694 }, { "epoch": 20.0896, "grad_norm": 1.065900444984436, "learning_rate": 1.8625450180072028e-05, "loss": 0.4701, "step": 15695 }, { "epoch": 20.09088, "grad_norm": 1.0863001346588135, "learning_rate": 1.86234493797519e-05, "loss": 0.5019, "step": 15696 }, { "epoch": 20.09216, "grad_norm": 1.1467665433883667, "learning_rate": 1.8621448579431775e-05, "loss": 0.5244, "step": 15697 }, { "epoch": 20.09344, "grad_norm": 1.0635558366775513, "learning_rate": 1.8619447779111647e-05, "loss": 0.4646, "step": 15698 }, { "epoch": 20.09472, "grad_norm": 1.0072330236434937, "learning_rate": 1.8617446978791516e-05, "loss": 0.4558, "step": 15699 }, { "epoch": 20.096, "grad_norm": 1.0939109325408936, "learning_rate": 1.861544617847139e-05, "loss": 0.5301, "step": 15700 }, { "epoch": 20.09728, "grad_norm": 1.1008801460266113, "learning_rate": 1.8613445378151263e-05, "loss": 0.4859, "step": 15701 }, { "epoch": 20.09856, "grad_norm": 1.1168580055236816, "learning_rate": 1.8611444577831135e-05, "loss": 0.4658, "step": 15702 }, { "epoch": 20.09984, "grad_norm": 1.0580940246582031, "learning_rate": 1.8609443777511003e-05, "loss": 0.4434, "step": 15703 }, { "epoch": 20.10112, "grad_norm": 1.0258312225341797, "learning_rate": 1.860744297719088e-05, "loss": 0.4644, "step": 15704 }, { "epoch": 20.1024, "grad_norm": 1.0801990032196045, "learning_rate": 1.860544217687075e-05, "loss": 0.4959, "step": 15705 }, { "epoch": 20.10368, "grad_norm": 1.0814316272735596, "learning_rate": 1.8603441376550622e-05, "loss": 0.4898, "step": 15706 }, { "epoch": 20.10496, "grad_norm": 1.116225004196167, "learning_rate": 1.8601440576230494e-05, "loss": 0.4865, "step": 15707 }, { "epoch": 20.10624, "grad_norm": 1.147642970085144, "learning_rate": 1.8599439775910366e-05, "loss": 0.5007, "step": 15708 }, { "epoch": 20.10752, "grad_norm": 1.1778041124343872, "learning_rate": 1.8597438975590238e-05, "loss": 0.5123, "step": 15709 }, { "epoch": 20.1088, "grad_norm": 1.0506484508514404, "learning_rate": 1.859543817527011e-05, "loss": 0.4695, "step": 15710 }, { "epoch": 20.11008, "grad_norm": 1.0645596981048584, "learning_rate": 1.859343737494998e-05, "loss": 0.4784, "step": 15711 }, { "epoch": 20.11136, "grad_norm": 1.086012840270996, "learning_rate": 1.8591436574629853e-05, "loss": 0.4975, "step": 15712 }, { "epoch": 20.11264, "grad_norm": 1.0438339710235596, "learning_rate": 1.8589435774309725e-05, "loss": 0.4973, "step": 15713 }, { "epoch": 20.11392, "grad_norm": 1.1068918704986572, "learning_rate": 1.8587434973989597e-05, "loss": 0.4846, "step": 15714 }, { "epoch": 20.1152, "grad_norm": 1.1290284395217896, "learning_rate": 1.858543417366947e-05, "loss": 0.5336, "step": 15715 }, { "epoch": 20.11648, "grad_norm": 1.1011111736297607, "learning_rate": 1.858343337334934e-05, "loss": 0.4703, "step": 15716 }, { "epoch": 20.11776, "grad_norm": 1.1040534973144531, "learning_rate": 1.8581432573029212e-05, "loss": 0.4649, "step": 15717 }, { "epoch": 20.11904, "grad_norm": 1.0694252252578735, "learning_rate": 1.8579431772709084e-05, "loss": 0.4411, "step": 15718 }, { "epoch": 20.12032, "grad_norm": 1.0621991157531738, "learning_rate": 1.8577430972388956e-05, "loss": 0.4636, "step": 15719 }, { "epoch": 20.1216, "grad_norm": 1.0658694505691528, "learning_rate": 1.8575430172068828e-05, "loss": 0.4973, "step": 15720 }, { "epoch": 20.12288, "grad_norm": 1.0541012287139893, "learning_rate": 1.85734293717487e-05, "loss": 0.492, "step": 15721 }, { "epoch": 20.12416, "grad_norm": 1.139356255531311, "learning_rate": 1.8571428571428572e-05, "loss": 0.4854, "step": 15722 }, { "epoch": 20.12544, "grad_norm": 1.0738463401794434, "learning_rate": 1.8569427771108444e-05, "loss": 0.5184, "step": 15723 }, { "epoch": 20.12672, "grad_norm": 1.042602777481079, "learning_rate": 1.8567426970788315e-05, "loss": 0.4965, "step": 15724 }, { "epoch": 20.128, "grad_norm": 1.100381851196289, "learning_rate": 1.856542617046819e-05, "loss": 0.52, "step": 15725 }, { "epoch": 20.12928, "grad_norm": 1.0966511964797974, "learning_rate": 1.856342537014806e-05, "loss": 0.5214, "step": 15726 }, { "epoch": 20.13056, "grad_norm": 1.1034797430038452, "learning_rate": 1.856142456982793e-05, "loss": 0.4959, "step": 15727 }, { "epoch": 20.13184, "grad_norm": 1.1695610284805298, "learning_rate": 1.8559423769507803e-05, "loss": 0.5517, "step": 15728 }, { "epoch": 20.13312, "grad_norm": 1.1129649877548218, "learning_rate": 1.8557422969187678e-05, "loss": 0.4974, "step": 15729 }, { "epoch": 20.1344, "grad_norm": 1.130495309829712, "learning_rate": 1.8555422168867547e-05, "loss": 0.4873, "step": 15730 }, { "epoch": 20.13568, "grad_norm": 1.099576473236084, "learning_rate": 1.855342136854742e-05, "loss": 0.4751, "step": 15731 }, { "epoch": 20.13696, "grad_norm": 1.0953304767608643, "learning_rate": 1.8551420568227294e-05, "loss": 0.5135, "step": 15732 }, { "epoch": 20.13824, "grad_norm": 1.0430271625518799, "learning_rate": 1.8549419767907166e-05, "loss": 0.4849, "step": 15733 }, { "epoch": 20.13952, "grad_norm": 1.1032804250717163, "learning_rate": 1.8547418967587034e-05, "loss": 0.5093, "step": 15734 }, { "epoch": 20.1408, "grad_norm": 1.1103456020355225, "learning_rate": 1.8545418167266906e-05, "loss": 0.5148, "step": 15735 }, { "epoch": 20.14208, "grad_norm": 1.1591553688049316, "learning_rate": 1.854341736694678e-05, "loss": 0.5335, "step": 15736 }, { "epoch": 20.14336, "grad_norm": 1.0609407424926758, "learning_rate": 1.8541416566626653e-05, "loss": 0.469, "step": 15737 }, { "epoch": 20.14464, "grad_norm": 1.0772405862808228, "learning_rate": 1.853941576630652e-05, "loss": 0.522, "step": 15738 }, { "epoch": 20.14592, "grad_norm": 1.2019360065460205, "learning_rate": 1.8537414965986397e-05, "loss": 0.5215, "step": 15739 }, { "epoch": 20.1472, "grad_norm": 1.0603306293487549, "learning_rate": 1.853541416566627e-05, "loss": 0.4832, "step": 15740 }, { "epoch": 20.14848, "grad_norm": 1.1025315523147583, "learning_rate": 1.853341336534614e-05, "loss": 0.5234, "step": 15741 }, { "epoch": 20.14976, "grad_norm": 1.0431936979293823, "learning_rate": 1.853141256502601e-05, "loss": 0.464, "step": 15742 }, { "epoch": 20.15104, "grad_norm": 1.0916297435760498, "learning_rate": 1.8529411764705884e-05, "loss": 0.4768, "step": 15743 }, { "epoch": 20.15232, "grad_norm": 1.0731911659240723, "learning_rate": 1.8527410964385756e-05, "loss": 0.4871, "step": 15744 }, { "epoch": 20.1536, "grad_norm": 1.060876488685608, "learning_rate": 1.8525410164065628e-05, "loss": 0.4926, "step": 15745 }, { "epoch": 20.15488, "grad_norm": 1.0614646673202515, "learning_rate": 1.85234093637455e-05, "loss": 0.4415, "step": 15746 }, { "epoch": 20.15616, "grad_norm": 1.1642632484436035, "learning_rate": 1.852140856342537e-05, "loss": 0.5138, "step": 15747 }, { "epoch": 20.15744, "grad_norm": 1.0110687017440796, "learning_rate": 1.8519407763105243e-05, "loss": 0.4631, "step": 15748 }, { "epoch": 20.15872, "grad_norm": 1.130932331085205, "learning_rate": 1.8517406962785115e-05, "loss": 0.4553, "step": 15749 }, { "epoch": 20.16, "grad_norm": 0.9995177388191223, "learning_rate": 1.8515406162464987e-05, "loss": 0.4915, "step": 15750 }, { "epoch": 20.16128, "grad_norm": 1.0752531290054321, "learning_rate": 1.851340536214486e-05, "loss": 0.5099, "step": 15751 }, { "epoch": 20.16256, "grad_norm": 1.0507272481918335, "learning_rate": 1.851140456182473e-05, "loss": 0.4755, "step": 15752 }, { "epoch": 20.16384, "grad_norm": 1.1398614645004272, "learning_rate": 1.8509403761504603e-05, "loss": 0.5324, "step": 15753 }, { "epoch": 20.16512, "grad_norm": 0.984160840511322, "learning_rate": 1.8507402961184475e-05, "loss": 0.4188, "step": 15754 }, { "epoch": 20.1664, "grad_norm": 1.1006124019622803, "learning_rate": 1.8505402160864346e-05, "loss": 0.5033, "step": 15755 }, { "epoch": 20.16768, "grad_norm": 1.0734970569610596, "learning_rate": 1.8503401360544218e-05, "loss": 0.4785, "step": 15756 }, { "epoch": 20.16896, "grad_norm": 1.1505695581436157, "learning_rate": 1.850140056022409e-05, "loss": 0.5547, "step": 15757 }, { "epoch": 20.17024, "grad_norm": 1.147254228591919, "learning_rate": 1.8499399759903962e-05, "loss": 0.5077, "step": 15758 }, { "epoch": 20.17152, "grad_norm": 1.0394471883773804, "learning_rate": 1.8497398959583834e-05, "loss": 0.465, "step": 15759 }, { "epoch": 20.1728, "grad_norm": 1.0904779434204102, "learning_rate": 1.849539815926371e-05, "loss": 0.4909, "step": 15760 }, { "epoch": 20.17408, "grad_norm": 1.2574462890625, "learning_rate": 1.8493397358943578e-05, "loss": 0.4928, "step": 15761 }, { "epoch": 20.17536, "grad_norm": 1.0924097299575806, "learning_rate": 1.849139655862345e-05, "loss": 0.4794, "step": 15762 }, { "epoch": 20.17664, "grad_norm": 1.0930646657943726, "learning_rate": 1.848939575830332e-05, "loss": 0.4895, "step": 15763 }, { "epoch": 20.17792, "grad_norm": 1.0378696918487549, "learning_rate": 1.8487394957983196e-05, "loss": 0.5068, "step": 15764 }, { "epoch": 20.1792, "grad_norm": 1.121130347251892, "learning_rate": 1.8485394157663065e-05, "loss": 0.49, "step": 15765 }, { "epoch": 20.18048, "grad_norm": 1.1667362451553345, "learning_rate": 1.8483393357342937e-05, "loss": 0.4976, "step": 15766 }, { "epoch": 20.18176, "grad_norm": 1.0337846279144287, "learning_rate": 1.8481392557022812e-05, "loss": 0.476, "step": 15767 }, { "epoch": 20.18304, "grad_norm": 1.0466781854629517, "learning_rate": 1.8479391756702684e-05, "loss": 0.4418, "step": 15768 }, { "epoch": 20.18432, "grad_norm": 1.1099622249603271, "learning_rate": 1.8477390956382552e-05, "loss": 0.5132, "step": 15769 }, { "epoch": 20.1856, "grad_norm": 1.1434911489486694, "learning_rate": 1.8475390156062424e-05, "loss": 0.5056, "step": 15770 }, { "epoch": 20.18688, "grad_norm": 1.0886666774749756, "learning_rate": 1.84733893557423e-05, "loss": 0.447, "step": 15771 }, { "epoch": 20.18816, "grad_norm": 1.051747441291809, "learning_rate": 1.847138855542217e-05, "loss": 0.4715, "step": 15772 }, { "epoch": 20.18944, "grad_norm": 1.1362502574920654, "learning_rate": 1.846938775510204e-05, "loss": 0.4843, "step": 15773 }, { "epoch": 20.19072, "grad_norm": 1.0374624729156494, "learning_rate": 1.846738695478191e-05, "loss": 0.4653, "step": 15774 }, { "epoch": 20.192, "grad_norm": 1.105280876159668, "learning_rate": 1.8465386154461787e-05, "loss": 0.4934, "step": 15775 }, { "epoch": 20.19328, "grad_norm": 1.1130280494689941, "learning_rate": 1.846338535414166e-05, "loss": 0.5158, "step": 15776 }, { "epoch": 20.19456, "grad_norm": 1.0840026140213013, "learning_rate": 1.8461384553821527e-05, "loss": 0.4837, "step": 15777 }, { "epoch": 20.19584, "grad_norm": 1.0840171575546265, "learning_rate": 1.8459383753501402e-05, "loss": 0.4841, "step": 15778 }, { "epoch": 20.19712, "grad_norm": 1.044407844543457, "learning_rate": 1.8457382953181274e-05, "loss": 0.4895, "step": 15779 }, { "epoch": 20.1984, "grad_norm": 1.110978603363037, "learning_rate": 1.8455382152861146e-05, "loss": 0.4946, "step": 15780 }, { "epoch": 20.19968, "grad_norm": 1.154561161994934, "learning_rate": 1.8453381352541015e-05, "loss": 0.5201, "step": 15781 }, { "epoch": 20.20096, "grad_norm": 1.1208866834640503, "learning_rate": 1.845138055222089e-05, "loss": 0.4845, "step": 15782 }, { "epoch": 20.20224, "grad_norm": 1.111349105834961, "learning_rate": 1.8449379751900762e-05, "loss": 0.4799, "step": 15783 }, { "epoch": 20.20352, "grad_norm": 1.1092686653137207, "learning_rate": 1.8447378951580634e-05, "loss": 0.4641, "step": 15784 }, { "epoch": 20.2048, "grad_norm": 1.198625922203064, "learning_rate": 1.8445378151260505e-05, "loss": 0.498, "step": 15785 }, { "epoch": 20.20608, "grad_norm": 1.128433108329773, "learning_rate": 1.8443377350940377e-05, "loss": 0.4709, "step": 15786 }, { "epoch": 20.20736, "grad_norm": 1.02711820602417, "learning_rate": 1.844137655062025e-05, "loss": 0.4623, "step": 15787 }, { "epoch": 20.20864, "grad_norm": 1.1294431686401367, "learning_rate": 1.843937575030012e-05, "loss": 0.4966, "step": 15788 }, { "epoch": 20.20992, "grad_norm": 1.0591932535171509, "learning_rate": 1.8437374949979993e-05, "loss": 0.4802, "step": 15789 }, { "epoch": 20.2112, "grad_norm": 1.1523479223251343, "learning_rate": 1.8435374149659865e-05, "loss": 0.5155, "step": 15790 }, { "epoch": 20.21248, "grad_norm": 1.1084343194961548, "learning_rate": 1.8433373349339737e-05, "loss": 0.5098, "step": 15791 }, { "epoch": 20.21376, "grad_norm": 1.1509208679199219, "learning_rate": 1.843137254901961e-05, "loss": 0.5099, "step": 15792 }, { "epoch": 20.21504, "grad_norm": 1.1288859844207764, "learning_rate": 1.842937174869948e-05, "loss": 0.482, "step": 15793 }, { "epoch": 20.21632, "grad_norm": 1.039110779762268, "learning_rate": 1.8427370948379352e-05, "loss": 0.4622, "step": 15794 }, { "epoch": 20.2176, "grad_norm": 1.1868407726287842, "learning_rate": 1.8425370148059224e-05, "loss": 0.5312, "step": 15795 }, { "epoch": 20.21888, "grad_norm": 1.0792521238327026, "learning_rate": 1.8423369347739096e-05, "loss": 0.4415, "step": 15796 }, { "epoch": 20.22016, "grad_norm": 1.0646893978118896, "learning_rate": 1.8421368547418968e-05, "loss": 0.4616, "step": 15797 }, { "epoch": 20.22144, "grad_norm": 1.0488739013671875, "learning_rate": 1.841936774709884e-05, "loss": 0.4913, "step": 15798 }, { "epoch": 20.22272, "grad_norm": 1.152377963066101, "learning_rate": 1.8417366946778715e-05, "loss": 0.5146, "step": 15799 }, { "epoch": 20.224, "grad_norm": 1.0589985847473145, "learning_rate": 1.8415366146458583e-05, "loss": 0.4561, "step": 15800 }, { "epoch": 20.22528, "grad_norm": 1.1091581583023071, "learning_rate": 1.8413365346138455e-05, "loss": 0.5, "step": 15801 }, { "epoch": 20.22656, "grad_norm": 1.1588554382324219, "learning_rate": 1.8411364545818327e-05, "loss": 0.4701, "step": 15802 }, { "epoch": 20.22784, "grad_norm": 1.0531977415084839, "learning_rate": 1.8409363745498202e-05, "loss": 0.4541, "step": 15803 }, { "epoch": 20.22912, "grad_norm": 1.1038832664489746, "learning_rate": 1.840736294517807e-05, "loss": 0.4773, "step": 15804 }, { "epoch": 20.2304, "grad_norm": 1.0699522495269775, "learning_rate": 1.8405362144857943e-05, "loss": 0.4831, "step": 15805 }, { "epoch": 20.23168, "grad_norm": 1.085437297821045, "learning_rate": 1.8403361344537818e-05, "loss": 0.4914, "step": 15806 }, { "epoch": 20.23296, "grad_norm": 1.104048728942871, "learning_rate": 1.840136054421769e-05, "loss": 0.4827, "step": 15807 }, { "epoch": 20.23424, "grad_norm": 1.0480152368545532, "learning_rate": 1.8399359743897558e-05, "loss": 0.4724, "step": 15808 }, { "epoch": 20.23552, "grad_norm": 1.1070879697799683, "learning_rate": 1.839735894357743e-05, "loss": 0.485, "step": 15809 }, { "epoch": 20.2368, "grad_norm": 1.0816839933395386, "learning_rate": 1.8395358143257305e-05, "loss": 0.4814, "step": 15810 }, { "epoch": 20.23808, "grad_norm": 1.0380091667175293, "learning_rate": 1.8393357342937177e-05, "loss": 0.483, "step": 15811 }, { "epoch": 20.23936, "grad_norm": 1.0764389038085938, "learning_rate": 1.8391356542617046e-05, "loss": 0.4797, "step": 15812 }, { "epoch": 20.24064, "grad_norm": 1.1139464378356934, "learning_rate": 1.838935574229692e-05, "loss": 0.5232, "step": 15813 }, { "epoch": 20.24192, "grad_norm": 1.0700820684432983, "learning_rate": 1.8387354941976793e-05, "loss": 0.434, "step": 15814 }, { "epoch": 20.2432, "grad_norm": 1.0398091077804565, "learning_rate": 1.8385354141656665e-05, "loss": 0.4793, "step": 15815 }, { "epoch": 20.24448, "grad_norm": 1.1150001287460327, "learning_rate": 1.8383353341336533e-05, "loss": 0.4747, "step": 15816 }, { "epoch": 20.24576, "grad_norm": 1.1604424715042114, "learning_rate": 1.8381352541016408e-05, "loss": 0.5042, "step": 15817 }, { "epoch": 20.24704, "grad_norm": 1.0625085830688477, "learning_rate": 1.837935174069628e-05, "loss": 0.4773, "step": 15818 }, { "epoch": 20.24832, "grad_norm": 1.1010764837265015, "learning_rate": 1.8377350940376152e-05, "loss": 0.458, "step": 15819 }, { "epoch": 20.2496, "grad_norm": 1.1224896907806396, "learning_rate": 1.8375350140056024e-05, "loss": 0.4832, "step": 15820 }, { "epoch": 20.25088, "grad_norm": 1.1224029064178467, "learning_rate": 1.8373349339735896e-05, "loss": 0.4807, "step": 15821 }, { "epoch": 20.25216, "grad_norm": 1.0098991394042969, "learning_rate": 1.8371348539415768e-05, "loss": 0.4576, "step": 15822 }, { "epoch": 20.25344, "grad_norm": 1.065501093864441, "learning_rate": 1.836934773909564e-05, "loss": 0.4252, "step": 15823 }, { "epoch": 20.25472, "grad_norm": 1.1191290616989136, "learning_rate": 1.836734693877551e-05, "loss": 0.4997, "step": 15824 }, { "epoch": 20.256, "grad_norm": 1.107366919517517, "learning_rate": 1.8365346138455383e-05, "loss": 0.4822, "step": 15825 }, { "epoch": 20.25728, "grad_norm": 1.0112433433532715, "learning_rate": 1.8363345338135255e-05, "loss": 0.4796, "step": 15826 }, { "epoch": 20.25856, "grad_norm": 1.03781259059906, "learning_rate": 1.8361344537815127e-05, "loss": 0.4841, "step": 15827 }, { "epoch": 20.25984, "grad_norm": 1.0869375467300415, "learning_rate": 1.8359343737495e-05, "loss": 0.5392, "step": 15828 }, { "epoch": 20.26112, "grad_norm": 1.1285794973373413, "learning_rate": 1.835734293717487e-05, "loss": 0.4978, "step": 15829 }, { "epoch": 20.2624, "grad_norm": 1.1054590940475464, "learning_rate": 1.8355342136854742e-05, "loss": 0.4876, "step": 15830 }, { "epoch": 20.26368, "grad_norm": 1.0893003940582275, "learning_rate": 1.8353341336534614e-05, "loss": 0.4894, "step": 15831 }, { "epoch": 20.26496, "grad_norm": 1.1795381307601929, "learning_rate": 1.8351340536214486e-05, "loss": 0.4894, "step": 15832 }, { "epoch": 20.26624, "grad_norm": 1.0786329507827759, "learning_rate": 1.8349339735894358e-05, "loss": 0.4816, "step": 15833 }, { "epoch": 20.26752, "grad_norm": 1.0577181577682495, "learning_rate": 1.834733893557423e-05, "loss": 0.4729, "step": 15834 }, { "epoch": 20.2688, "grad_norm": 1.0976567268371582, "learning_rate": 1.8345338135254102e-05, "loss": 0.5178, "step": 15835 }, { "epoch": 20.27008, "grad_norm": 1.0734076499938965, "learning_rate": 1.8343337334933974e-05, "loss": 0.4922, "step": 15836 }, { "epoch": 20.27136, "grad_norm": 1.062116265296936, "learning_rate": 1.8341336534613845e-05, "loss": 0.4958, "step": 15837 }, { "epoch": 20.27264, "grad_norm": 1.0870201587677002, "learning_rate": 1.833933573429372e-05, "loss": 0.4803, "step": 15838 }, { "epoch": 20.27392, "grad_norm": 1.0811505317687988, "learning_rate": 1.833733493397359e-05, "loss": 0.5093, "step": 15839 }, { "epoch": 20.2752, "grad_norm": 1.1151394844055176, "learning_rate": 1.833533413365346e-05, "loss": 0.5054, "step": 15840 }, { "epoch": 20.27648, "grad_norm": 1.1511766910552979, "learning_rate": 1.8333333333333333e-05, "loss": 0.4998, "step": 15841 }, { "epoch": 20.27776, "grad_norm": 1.0190060138702393, "learning_rate": 1.8331332533013208e-05, "loss": 0.4492, "step": 15842 }, { "epoch": 20.27904, "grad_norm": 1.0209211111068726, "learning_rate": 1.8329331732693077e-05, "loss": 0.4544, "step": 15843 }, { "epoch": 20.28032, "grad_norm": 1.0691982507705688, "learning_rate": 1.832733093237295e-05, "loss": 0.4621, "step": 15844 }, { "epoch": 20.2816, "grad_norm": 1.1807547807693481, "learning_rate": 1.8325330132052824e-05, "loss": 0.5496, "step": 15845 }, { "epoch": 20.28288, "grad_norm": 1.0593502521514893, "learning_rate": 1.8323329331732696e-05, "loss": 0.439, "step": 15846 }, { "epoch": 20.28416, "grad_norm": 1.0794156789779663, "learning_rate": 1.8321328531412564e-05, "loss": 0.5034, "step": 15847 }, { "epoch": 20.28544, "grad_norm": 1.1507887840270996, "learning_rate": 1.8319327731092436e-05, "loss": 0.5477, "step": 15848 }, { "epoch": 20.28672, "grad_norm": 0.9900954365730286, "learning_rate": 1.831732693077231e-05, "loss": 0.4778, "step": 15849 }, { "epoch": 20.288, "grad_norm": 1.0622221231460571, "learning_rate": 1.8315326130452183e-05, "loss": 0.4582, "step": 15850 }, { "epoch": 20.28928, "grad_norm": 1.0999888181686401, "learning_rate": 1.831332533013205e-05, "loss": 0.4872, "step": 15851 }, { "epoch": 20.29056, "grad_norm": 1.0076533555984497, "learning_rate": 1.8311324529811927e-05, "loss": 0.4649, "step": 15852 }, { "epoch": 20.29184, "grad_norm": 1.1547635793685913, "learning_rate": 1.83093237294918e-05, "loss": 0.4873, "step": 15853 }, { "epoch": 20.29312, "grad_norm": 1.1193164587020874, "learning_rate": 1.830732292917167e-05, "loss": 0.4748, "step": 15854 }, { "epoch": 20.2944, "grad_norm": 1.1040410995483398, "learning_rate": 1.830532212885154e-05, "loss": 0.4852, "step": 15855 }, { "epoch": 20.29568, "grad_norm": 1.0710041522979736, "learning_rate": 1.8303321328531414e-05, "loss": 0.4598, "step": 15856 }, { "epoch": 20.29696, "grad_norm": 1.1397336721420288, "learning_rate": 1.8301320528211286e-05, "loss": 0.5641, "step": 15857 }, { "epoch": 20.29824, "grad_norm": 1.1837016344070435, "learning_rate": 1.8299319727891158e-05, "loss": 0.4928, "step": 15858 }, { "epoch": 20.29952, "grad_norm": 1.112987756729126, "learning_rate": 1.829731892757103e-05, "loss": 0.4575, "step": 15859 }, { "epoch": 20.3008, "grad_norm": 1.0400470495224, "learning_rate": 1.82953181272509e-05, "loss": 0.4527, "step": 15860 }, { "epoch": 20.30208, "grad_norm": 1.0332235097885132, "learning_rate": 1.8293317326930773e-05, "loss": 0.4948, "step": 15861 }, { "epoch": 20.30336, "grad_norm": 1.1508287191390991, "learning_rate": 1.8291316526610645e-05, "loss": 0.519, "step": 15862 }, { "epoch": 20.30464, "grad_norm": 1.0921905040740967, "learning_rate": 1.8289315726290517e-05, "loss": 0.465, "step": 15863 }, { "epoch": 20.30592, "grad_norm": 1.1255881786346436, "learning_rate": 1.828731492597039e-05, "loss": 0.5087, "step": 15864 }, { "epoch": 20.3072, "grad_norm": 1.13408362865448, "learning_rate": 1.828531412565026e-05, "loss": 0.5263, "step": 15865 }, { "epoch": 20.30848, "grad_norm": 1.1082432270050049, "learning_rate": 1.8283313325330133e-05, "loss": 0.477, "step": 15866 }, { "epoch": 20.30976, "grad_norm": 1.0318244695663452, "learning_rate": 1.8281312525010005e-05, "loss": 0.4939, "step": 15867 }, { "epoch": 20.31104, "grad_norm": 1.0345925092697144, "learning_rate": 1.8279311724689876e-05, "loss": 0.4818, "step": 15868 }, { "epoch": 20.31232, "grad_norm": 1.1669683456420898, "learning_rate": 1.8277310924369748e-05, "loss": 0.5096, "step": 15869 }, { "epoch": 20.3136, "grad_norm": 1.1364049911499023, "learning_rate": 1.827531012404962e-05, "loss": 0.4886, "step": 15870 }, { "epoch": 20.31488, "grad_norm": 1.096252202987671, "learning_rate": 1.8273309323729492e-05, "loss": 0.4738, "step": 15871 }, { "epoch": 20.31616, "grad_norm": 1.098500370979309, "learning_rate": 1.8271308523409364e-05, "loss": 0.5058, "step": 15872 }, { "epoch": 20.31744, "grad_norm": 1.0478065013885498, "learning_rate": 1.826930772308924e-05, "loss": 0.4372, "step": 15873 }, { "epoch": 20.31872, "grad_norm": 1.1461261510849, "learning_rate": 1.8267306922769108e-05, "loss": 0.5234, "step": 15874 }, { "epoch": 20.32, "grad_norm": 1.1147645711898804, "learning_rate": 1.826530612244898e-05, "loss": 0.4923, "step": 15875 }, { "epoch": 20.32128, "grad_norm": 1.076523780822754, "learning_rate": 1.826330532212885e-05, "loss": 0.4899, "step": 15876 }, { "epoch": 20.32256, "grad_norm": 1.062248706817627, "learning_rate": 1.8261304521808726e-05, "loss": 0.4891, "step": 15877 }, { "epoch": 20.32384, "grad_norm": 1.0742130279541016, "learning_rate": 1.8259303721488595e-05, "loss": 0.5267, "step": 15878 }, { "epoch": 20.32512, "grad_norm": 1.046400547027588, "learning_rate": 1.8257302921168467e-05, "loss": 0.4743, "step": 15879 }, { "epoch": 20.3264, "grad_norm": 1.1239981651306152, "learning_rate": 1.8255302120848342e-05, "loss": 0.4765, "step": 15880 }, { "epoch": 20.32768, "grad_norm": 1.074862003326416, "learning_rate": 1.8253301320528214e-05, "loss": 0.4618, "step": 15881 }, { "epoch": 20.32896, "grad_norm": 1.0675029754638672, "learning_rate": 1.8251300520208082e-05, "loss": 0.501, "step": 15882 }, { "epoch": 20.33024, "grad_norm": 1.0445536375045776, "learning_rate": 1.8249299719887954e-05, "loss": 0.5028, "step": 15883 }, { "epoch": 20.33152, "grad_norm": 1.1353448629379272, "learning_rate": 1.824729891956783e-05, "loss": 0.4808, "step": 15884 }, { "epoch": 20.3328, "grad_norm": 1.0861884355545044, "learning_rate": 1.82452981192477e-05, "loss": 0.4717, "step": 15885 }, { "epoch": 20.33408, "grad_norm": 1.0767872333526611, "learning_rate": 1.824329731892757e-05, "loss": 0.4831, "step": 15886 }, { "epoch": 20.33536, "grad_norm": 1.1319317817687988, "learning_rate": 1.824129651860744e-05, "loss": 0.5856, "step": 15887 }, { "epoch": 20.33664, "grad_norm": 1.1070101261138916, "learning_rate": 1.8239295718287317e-05, "loss": 0.4876, "step": 15888 }, { "epoch": 20.33792, "grad_norm": 1.0813465118408203, "learning_rate": 1.823729491796719e-05, "loss": 0.4761, "step": 15889 }, { "epoch": 20.3392, "grad_norm": 1.1044111251831055, "learning_rate": 1.8235294117647057e-05, "loss": 0.5044, "step": 15890 }, { "epoch": 20.34048, "grad_norm": 1.0086759328842163, "learning_rate": 1.8233293317326932e-05, "loss": 0.5094, "step": 15891 }, { "epoch": 20.34176, "grad_norm": 1.0328330993652344, "learning_rate": 1.8231292517006804e-05, "loss": 0.4408, "step": 15892 }, { "epoch": 20.34304, "grad_norm": 1.073428988456726, "learning_rate": 1.8229291716686676e-05, "loss": 0.4879, "step": 15893 }, { "epoch": 20.34432, "grad_norm": 1.0707123279571533, "learning_rate": 1.8227290916366545e-05, "loss": 0.4873, "step": 15894 }, { "epoch": 20.3456, "grad_norm": 0.9961058497428894, "learning_rate": 1.822529011604642e-05, "loss": 0.4728, "step": 15895 }, { "epoch": 20.34688, "grad_norm": 1.1604456901550293, "learning_rate": 1.8223289315726292e-05, "loss": 0.4744, "step": 15896 }, { "epoch": 20.34816, "grad_norm": 1.1048893928527832, "learning_rate": 1.8221288515406164e-05, "loss": 0.4744, "step": 15897 }, { "epoch": 20.34944, "grad_norm": 1.0889872312545776, "learning_rate": 1.8219287715086035e-05, "loss": 0.4792, "step": 15898 }, { "epoch": 20.35072, "grad_norm": 1.1309471130371094, "learning_rate": 1.8217286914765907e-05, "loss": 0.5117, "step": 15899 }, { "epoch": 20.352, "grad_norm": 1.1387243270874023, "learning_rate": 1.821528611444578e-05, "loss": 0.5127, "step": 15900 }, { "epoch": 20.35328, "grad_norm": 1.1357046365737915, "learning_rate": 1.821328531412565e-05, "loss": 0.5016, "step": 15901 }, { "epoch": 20.35456, "grad_norm": 1.1048427820205688, "learning_rate": 1.8211284513805523e-05, "loss": 0.4902, "step": 15902 }, { "epoch": 20.35584, "grad_norm": 1.0928595066070557, "learning_rate": 1.8209283713485395e-05, "loss": 0.4721, "step": 15903 }, { "epoch": 20.35712, "grad_norm": 1.0246015787124634, "learning_rate": 1.8207282913165267e-05, "loss": 0.4882, "step": 15904 }, { "epoch": 20.3584, "grad_norm": 1.0948463678359985, "learning_rate": 1.820528211284514e-05, "loss": 0.4531, "step": 15905 }, { "epoch": 20.35968, "grad_norm": 1.0775697231292725, "learning_rate": 1.820328131252501e-05, "loss": 0.4587, "step": 15906 }, { "epoch": 20.36096, "grad_norm": 1.0711140632629395, "learning_rate": 1.8201280512204882e-05, "loss": 0.4533, "step": 15907 }, { "epoch": 20.36224, "grad_norm": 1.0861189365386963, "learning_rate": 1.8199279711884754e-05, "loss": 0.4906, "step": 15908 }, { "epoch": 20.36352, "grad_norm": 1.1456248760223389, "learning_rate": 1.8197278911564626e-05, "loss": 0.4666, "step": 15909 }, { "epoch": 20.3648, "grad_norm": 1.126597285270691, "learning_rate": 1.8195278111244498e-05, "loss": 0.5546, "step": 15910 }, { "epoch": 20.36608, "grad_norm": 1.1376280784606934, "learning_rate": 1.819327731092437e-05, "loss": 0.497, "step": 15911 }, { "epoch": 20.36736, "grad_norm": 1.084020972251892, "learning_rate": 1.8191276510604245e-05, "loss": 0.4715, "step": 15912 }, { "epoch": 20.36864, "grad_norm": 1.0613341331481934, "learning_rate": 1.8189275710284113e-05, "loss": 0.4906, "step": 15913 }, { "epoch": 20.36992, "grad_norm": 1.0506778955459595, "learning_rate": 1.8187274909963985e-05, "loss": 0.4739, "step": 15914 }, { "epoch": 20.3712, "grad_norm": 1.0626603364944458, "learning_rate": 1.8185274109643857e-05, "loss": 0.4892, "step": 15915 }, { "epoch": 20.37248, "grad_norm": 1.02902090549469, "learning_rate": 1.8183273309323732e-05, "loss": 0.5032, "step": 15916 }, { "epoch": 20.37376, "grad_norm": 1.0012882947921753, "learning_rate": 1.81812725090036e-05, "loss": 0.4414, "step": 15917 }, { "epoch": 20.37504, "grad_norm": 1.0998730659484863, "learning_rate": 1.8179271708683473e-05, "loss": 0.5046, "step": 15918 }, { "epoch": 20.37632, "grad_norm": 1.138268232345581, "learning_rate": 1.8177270908363348e-05, "loss": 0.5413, "step": 15919 }, { "epoch": 20.3776, "grad_norm": 1.1356502771377563, "learning_rate": 1.817527010804322e-05, "loss": 0.5459, "step": 15920 }, { "epoch": 20.37888, "grad_norm": 1.0949009656906128, "learning_rate": 1.8173269307723088e-05, "loss": 0.481, "step": 15921 }, { "epoch": 20.38016, "grad_norm": 1.0819274187088013, "learning_rate": 1.817126850740296e-05, "loss": 0.4975, "step": 15922 }, { "epoch": 20.38144, "grad_norm": 1.084596037864685, "learning_rate": 1.8169267707082835e-05, "loss": 0.5315, "step": 15923 }, { "epoch": 20.38272, "grad_norm": 1.1176440715789795, "learning_rate": 1.8167266906762707e-05, "loss": 0.4893, "step": 15924 }, { "epoch": 20.384, "grad_norm": 1.1294463872909546, "learning_rate": 1.8165266106442576e-05, "loss": 0.4869, "step": 15925 }, { "epoch": 20.38528, "grad_norm": 1.177349328994751, "learning_rate": 1.816326530612245e-05, "loss": 0.476, "step": 15926 }, { "epoch": 20.38656, "grad_norm": 1.15972101688385, "learning_rate": 1.8161264505802323e-05, "loss": 0.5329, "step": 15927 }, { "epoch": 20.38784, "grad_norm": 1.0567160844802856, "learning_rate": 1.8159263705482195e-05, "loss": 0.4679, "step": 15928 }, { "epoch": 20.38912, "grad_norm": 1.0844935178756714, "learning_rate": 1.8157262905162063e-05, "loss": 0.4588, "step": 15929 }, { "epoch": 20.3904, "grad_norm": 1.057117223739624, "learning_rate": 1.8155262104841938e-05, "loss": 0.4974, "step": 15930 }, { "epoch": 20.39168, "grad_norm": 1.1074366569519043, "learning_rate": 1.815326130452181e-05, "loss": 0.5375, "step": 15931 }, { "epoch": 20.39296, "grad_norm": 1.081174612045288, "learning_rate": 1.8151260504201682e-05, "loss": 0.5087, "step": 15932 }, { "epoch": 20.39424, "grad_norm": 1.090725064277649, "learning_rate": 1.8149259703881554e-05, "loss": 0.5162, "step": 15933 }, { "epoch": 20.39552, "grad_norm": 1.1822116374969482, "learning_rate": 1.8147258903561426e-05, "loss": 0.4763, "step": 15934 }, { "epoch": 20.3968, "grad_norm": 1.0998727083206177, "learning_rate": 1.8145258103241298e-05, "loss": 0.481, "step": 15935 }, { "epoch": 20.39808, "grad_norm": 1.1375815868377686, "learning_rate": 1.814325730292117e-05, "loss": 0.5338, "step": 15936 }, { "epoch": 20.39936, "grad_norm": 1.1332123279571533, "learning_rate": 1.814125650260104e-05, "loss": 0.5234, "step": 15937 }, { "epoch": 20.40064, "grad_norm": 1.1074390411376953, "learning_rate": 1.8139255702280913e-05, "loss": 0.4858, "step": 15938 }, { "epoch": 20.40192, "grad_norm": 1.059393286705017, "learning_rate": 1.8137254901960785e-05, "loss": 0.4912, "step": 15939 }, { "epoch": 20.4032, "grad_norm": 1.132832646369934, "learning_rate": 1.8135254101640657e-05, "loss": 0.5001, "step": 15940 }, { "epoch": 20.40448, "grad_norm": 1.0979952812194824, "learning_rate": 1.813325330132053e-05, "loss": 0.5107, "step": 15941 }, { "epoch": 20.40576, "grad_norm": 1.1360996961593628, "learning_rate": 1.81312525010004e-05, "loss": 0.4633, "step": 15942 }, { "epoch": 20.40704, "grad_norm": 1.0994714498519897, "learning_rate": 1.8129251700680272e-05, "loss": 0.4984, "step": 15943 }, { "epoch": 20.40832, "grad_norm": 1.0364211797714233, "learning_rate": 1.8127250900360144e-05, "loss": 0.4605, "step": 15944 }, { "epoch": 20.4096, "grad_norm": 1.0742835998535156, "learning_rate": 1.8125250100040016e-05, "loss": 0.4562, "step": 15945 }, { "epoch": 20.41088, "grad_norm": 1.151170015335083, "learning_rate": 1.8123249299719888e-05, "loss": 0.4731, "step": 15946 }, { "epoch": 20.41216, "grad_norm": 1.179980993270874, "learning_rate": 1.812124849939976e-05, "loss": 0.4855, "step": 15947 }, { "epoch": 20.41344, "grad_norm": 1.1587342023849487, "learning_rate": 1.8119247699079632e-05, "loss": 0.5534, "step": 15948 }, { "epoch": 20.41472, "grad_norm": 1.099782109260559, "learning_rate": 1.8117246898759504e-05, "loss": 0.4801, "step": 15949 }, { "epoch": 20.416, "grad_norm": 1.149043321609497, "learning_rate": 1.8115246098439375e-05, "loss": 0.5156, "step": 15950 }, { "epoch": 20.41728, "grad_norm": 1.0656511783599854, "learning_rate": 1.811324529811925e-05, "loss": 0.4889, "step": 15951 }, { "epoch": 20.41856, "grad_norm": 1.1039444208145142, "learning_rate": 1.811124449779912e-05, "loss": 0.4805, "step": 15952 }, { "epoch": 20.41984, "grad_norm": 1.2415475845336914, "learning_rate": 1.810924369747899e-05, "loss": 0.5199, "step": 15953 }, { "epoch": 20.42112, "grad_norm": 1.1294299364089966, "learning_rate": 1.8107242897158863e-05, "loss": 0.5036, "step": 15954 }, { "epoch": 20.4224, "grad_norm": 1.0674455165863037, "learning_rate": 1.8105242096838738e-05, "loss": 0.5114, "step": 15955 }, { "epoch": 20.42368, "grad_norm": 1.1650712490081787, "learning_rate": 1.8103241296518607e-05, "loss": 0.4882, "step": 15956 }, { "epoch": 20.42496, "grad_norm": 1.055827260017395, "learning_rate": 1.810124049619848e-05, "loss": 0.4596, "step": 15957 }, { "epoch": 20.42624, "grad_norm": 1.0579880475997925, "learning_rate": 1.8099239695878354e-05, "loss": 0.4844, "step": 15958 }, { "epoch": 20.42752, "grad_norm": 1.1154268980026245, "learning_rate": 1.8097238895558226e-05, "loss": 0.5422, "step": 15959 }, { "epoch": 20.4288, "grad_norm": 1.1454224586486816, "learning_rate": 1.8095238095238094e-05, "loss": 0.5215, "step": 15960 }, { "epoch": 20.43008, "grad_norm": 1.1174713373184204, "learning_rate": 1.8093237294917966e-05, "loss": 0.5044, "step": 15961 }, { "epoch": 20.43136, "grad_norm": 1.1929234266281128, "learning_rate": 1.809123649459784e-05, "loss": 0.5606, "step": 15962 }, { "epoch": 20.43264, "grad_norm": 1.2372697591781616, "learning_rate": 1.8089235694277713e-05, "loss": 0.535, "step": 15963 }, { "epoch": 20.43392, "grad_norm": 1.1086331605911255, "learning_rate": 1.808723489395758e-05, "loss": 0.5191, "step": 15964 }, { "epoch": 20.4352, "grad_norm": 1.1372125148773193, "learning_rate": 1.8085234093637457e-05, "loss": 0.5431, "step": 15965 }, { "epoch": 20.43648, "grad_norm": 1.093470811843872, "learning_rate": 1.808323329331733e-05, "loss": 0.4876, "step": 15966 }, { "epoch": 20.43776, "grad_norm": 1.1044193506240845, "learning_rate": 1.80812324929972e-05, "loss": 0.4605, "step": 15967 }, { "epoch": 20.43904, "grad_norm": 1.069149136543274, "learning_rate": 1.807923169267707e-05, "loss": 0.4948, "step": 15968 }, { "epoch": 20.44032, "grad_norm": 1.0932533740997314, "learning_rate": 1.8077230892356944e-05, "loss": 0.4867, "step": 15969 }, { "epoch": 20.4416, "grad_norm": 1.1302661895751953, "learning_rate": 1.8075230092036816e-05, "loss": 0.4979, "step": 15970 }, { "epoch": 20.44288, "grad_norm": 1.0727626085281372, "learning_rate": 1.8073229291716688e-05, "loss": 0.4647, "step": 15971 }, { "epoch": 20.44416, "grad_norm": 1.1052957773208618, "learning_rate": 1.807122849139656e-05, "loss": 0.5308, "step": 15972 }, { "epoch": 20.44544, "grad_norm": 1.0423941612243652, "learning_rate": 1.806922769107643e-05, "loss": 0.4499, "step": 15973 }, { "epoch": 20.44672, "grad_norm": 1.0597472190856934, "learning_rate": 1.8067226890756303e-05, "loss": 0.4767, "step": 15974 }, { "epoch": 20.448, "grad_norm": 1.0862598419189453, "learning_rate": 1.8065226090436175e-05, "loss": 0.5064, "step": 15975 }, { "epoch": 20.44928, "grad_norm": 1.124342441558838, "learning_rate": 1.8063225290116047e-05, "loss": 0.5078, "step": 15976 }, { "epoch": 20.45056, "grad_norm": 1.1150842905044556, "learning_rate": 1.806122448979592e-05, "loss": 0.5201, "step": 15977 }, { "epoch": 20.45184, "grad_norm": 1.101412296295166, "learning_rate": 1.805922368947579e-05, "loss": 0.5034, "step": 15978 }, { "epoch": 20.45312, "grad_norm": 1.0810297727584839, "learning_rate": 1.8057222889155666e-05, "loss": 0.5046, "step": 15979 }, { "epoch": 20.4544, "grad_norm": 1.1513960361480713, "learning_rate": 1.8055222088835534e-05, "loss": 0.5528, "step": 15980 }, { "epoch": 20.45568, "grad_norm": 1.0624333620071411, "learning_rate": 1.8053221288515406e-05, "loss": 0.4878, "step": 15981 }, { "epoch": 20.45696, "grad_norm": 1.1114479303359985, "learning_rate": 1.8051220488195278e-05, "loss": 0.5168, "step": 15982 }, { "epoch": 20.45824, "grad_norm": 1.1178675889968872, "learning_rate": 1.8049219687875153e-05, "loss": 0.5037, "step": 15983 }, { "epoch": 20.45952, "grad_norm": 1.145005226135254, "learning_rate": 1.8047218887555022e-05, "loss": 0.5133, "step": 15984 }, { "epoch": 20.4608, "grad_norm": 1.016296625137329, "learning_rate": 1.8045218087234894e-05, "loss": 0.4644, "step": 15985 }, { "epoch": 20.46208, "grad_norm": 1.0797942876815796, "learning_rate": 1.804321728691477e-05, "loss": 0.457, "step": 15986 }, { "epoch": 20.46336, "grad_norm": 1.0780278444290161, "learning_rate": 1.804121648659464e-05, "loss": 0.5116, "step": 15987 }, { "epoch": 20.46464, "grad_norm": 1.1817333698272705, "learning_rate": 1.803921568627451e-05, "loss": 0.4981, "step": 15988 }, { "epoch": 20.46592, "grad_norm": 1.0998475551605225, "learning_rate": 1.803721488595438e-05, "loss": 0.4816, "step": 15989 }, { "epoch": 20.4672, "grad_norm": 1.1314204931259155, "learning_rate": 1.8035214085634256e-05, "loss": 0.5686, "step": 15990 }, { "epoch": 20.46848, "grad_norm": 1.0639474391937256, "learning_rate": 1.803321328531413e-05, "loss": 0.4855, "step": 15991 }, { "epoch": 20.46976, "grad_norm": 0.9945549964904785, "learning_rate": 1.8031212484993997e-05, "loss": 0.4438, "step": 15992 }, { "epoch": 20.47104, "grad_norm": 1.022700548171997, "learning_rate": 1.8029211684673872e-05, "loss": 0.47, "step": 15993 }, { "epoch": 20.47232, "grad_norm": 1.0951484441757202, "learning_rate": 1.8027210884353744e-05, "loss": 0.5013, "step": 15994 }, { "epoch": 20.4736, "grad_norm": 1.067020058631897, "learning_rate": 1.8025210084033616e-05, "loss": 0.5355, "step": 15995 }, { "epoch": 20.47488, "grad_norm": 1.0288721323013306, "learning_rate": 1.8023209283713484e-05, "loss": 0.4643, "step": 15996 }, { "epoch": 20.47616, "grad_norm": 1.031306505203247, "learning_rate": 1.802120848339336e-05, "loss": 0.4628, "step": 15997 }, { "epoch": 20.47744, "grad_norm": 1.183764100074768, "learning_rate": 1.801920768307323e-05, "loss": 0.5168, "step": 15998 }, { "epoch": 20.47872, "grad_norm": 0.9885333180427551, "learning_rate": 1.8017206882753103e-05, "loss": 0.4841, "step": 15999 }, { "epoch": 20.48, "grad_norm": 1.01718008518219, "learning_rate": 1.801520608243297e-05, "loss": 0.5065, "step": 16000 }, { "epoch": 20.48128, "grad_norm": 1.058000922203064, "learning_rate": 1.8013205282112847e-05, "loss": 0.4702, "step": 16001 }, { "epoch": 20.48256, "grad_norm": 1.0549604892730713, "learning_rate": 1.801120448179272e-05, "loss": 0.4522, "step": 16002 }, { "epoch": 20.48384, "grad_norm": 1.2326083183288574, "learning_rate": 1.800920368147259e-05, "loss": 0.5391, "step": 16003 }, { "epoch": 20.48512, "grad_norm": 1.0707967281341553, "learning_rate": 1.8007202881152462e-05, "loss": 0.4603, "step": 16004 }, { "epoch": 20.4864, "grad_norm": 1.141228199005127, "learning_rate": 1.8005202080832334e-05, "loss": 0.5162, "step": 16005 }, { "epoch": 20.48768, "grad_norm": 1.074081540107727, "learning_rate": 1.8003201280512206e-05, "loss": 0.4736, "step": 16006 }, { "epoch": 20.48896, "grad_norm": 1.0647474527359009, "learning_rate": 1.8001200480192078e-05, "loss": 0.4778, "step": 16007 }, { "epoch": 20.49024, "grad_norm": 1.1163427829742432, "learning_rate": 1.799919967987195e-05, "loss": 0.5002, "step": 16008 }, { "epoch": 20.49152, "grad_norm": 1.0871421098709106, "learning_rate": 1.7997198879551822e-05, "loss": 0.485, "step": 16009 }, { "epoch": 20.4928, "grad_norm": 1.0807521343231201, "learning_rate": 1.7995198079231694e-05, "loss": 0.4932, "step": 16010 }, { "epoch": 20.49408, "grad_norm": 1.119227409362793, "learning_rate": 1.7993197278911565e-05, "loss": 0.5201, "step": 16011 }, { "epoch": 20.49536, "grad_norm": 1.0895061492919922, "learning_rate": 1.7991196478591437e-05, "loss": 0.495, "step": 16012 }, { "epoch": 20.49664, "grad_norm": 1.0847043991088867, "learning_rate": 1.798919567827131e-05, "loss": 0.484, "step": 16013 }, { "epoch": 20.49792, "grad_norm": 1.0497854948043823, "learning_rate": 1.798719487795118e-05, "loss": 0.4332, "step": 16014 }, { "epoch": 20.4992, "grad_norm": 1.1417521238327026, "learning_rate": 1.7985194077631053e-05, "loss": 0.518, "step": 16015 }, { "epoch": 20.50048, "grad_norm": 1.1823201179504395, "learning_rate": 1.7983193277310925e-05, "loss": 0.4732, "step": 16016 }, { "epoch": 20.50176, "grad_norm": 1.0755385160446167, "learning_rate": 1.7981192476990797e-05, "loss": 0.4858, "step": 16017 }, { "epoch": 20.50304, "grad_norm": 1.1058756113052368, "learning_rate": 1.7979191676670672e-05, "loss": 0.4979, "step": 16018 }, { "epoch": 20.50432, "grad_norm": 1.109181523323059, "learning_rate": 1.797719087635054e-05, "loss": 0.5062, "step": 16019 }, { "epoch": 20.5056, "grad_norm": 1.0815677642822266, "learning_rate": 1.7975190076030412e-05, "loss": 0.5043, "step": 16020 }, { "epoch": 20.50688, "grad_norm": 1.1085656881332397, "learning_rate": 1.7973189275710284e-05, "loss": 0.4851, "step": 16021 }, { "epoch": 20.50816, "grad_norm": 1.077195644378662, "learning_rate": 1.797118847539016e-05, "loss": 0.4885, "step": 16022 }, { "epoch": 20.50944, "grad_norm": 0.9872754812240601, "learning_rate": 1.7969187675070028e-05, "loss": 0.4926, "step": 16023 }, { "epoch": 20.51072, "grad_norm": 1.0659494400024414, "learning_rate": 1.79671868747499e-05, "loss": 0.447, "step": 16024 }, { "epoch": 20.512, "grad_norm": 1.0598735809326172, "learning_rate": 1.7965186074429775e-05, "loss": 0.487, "step": 16025 }, { "epoch": 20.51328, "grad_norm": 1.0480681657791138, "learning_rate": 1.7963185274109647e-05, "loss": 0.4795, "step": 16026 }, { "epoch": 20.51456, "grad_norm": 1.0445165634155273, "learning_rate": 1.7961184473789515e-05, "loss": 0.4792, "step": 16027 }, { "epoch": 20.51584, "grad_norm": 1.084743857383728, "learning_rate": 1.7959183673469387e-05, "loss": 0.5358, "step": 16028 }, { "epoch": 20.51712, "grad_norm": 1.127386212348938, "learning_rate": 1.7957182873149262e-05, "loss": 0.514, "step": 16029 }, { "epoch": 20.5184, "grad_norm": 1.1147735118865967, "learning_rate": 1.7955182072829134e-05, "loss": 0.53, "step": 16030 }, { "epoch": 20.51968, "grad_norm": 1.0256705284118652, "learning_rate": 1.7953181272509003e-05, "loss": 0.4541, "step": 16031 }, { "epoch": 20.52096, "grad_norm": 1.1226087808609009, "learning_rate": 1.7951180472188878e-05, "loss": 0.5, "step": 16032 }, { "epoch": 20.52224, "grad_norm": 1.1469954252243042, "learning_rate": 1.794917967186875e-05, "loss": 0.5799, "step": 16033 }, { "epoch": 20.52352, "grad_norm": 1.1341044902801514, "learning_rate": 1.794717887154862e-05, "loss": 0.547, "step": 16034 }, { "epoch": 20.5248, "grad_norm": 1.123470425605774, "learning_rate": 1.794517807122849e-05, "loss": 0.4843, "step": 16035 }, { "epoch": 20.52608, "grad_norm": 1.1044238805770874, "learning_rate": 1.7943177270908365e-05, "loss": 0.5044, "step": 16036 }, { "epoch": 20.52736, "grad_norm": 1.021741271018982, "learning_rate": 1.7941176470588237e-05, "loss": 0.4652, "step": 16037 }, { "epoch": 20.52864, "grad_norm": 1.1132144927978516, "learning_rate": 1.793917567026811e-05, "loss": 0.486, "step": 16038 }, { "epoch": 20.52992, "grad_norm": 1.0857055187225342, "learning_rate": 1.793717486994798e-05, "loss": 0.5323, "step": 16039 }, { "epoch": 20.5312, "grad_norm": 1.1283453702926636, "learning_rate": 1.7935174069627853e-05, "loss": 0.4848, "step": 16040 }, { "epoch": 20.53248, "grad_norm": 1.175715684890747, "learning_rate": 1.7933173269307725e-05, "loss": 0.5741, "step": 16041 }, { "epoch": 20.53376, "grad_norm": 1.1087825298309326, "learning_rate": 1.7931172468987596e-05, "loss": 0.4799, "step": 16042 }, { "epoch": 20.53504, "grad_norm": 1.1053529977798462, "learning_rate": 1.7929171668667468e-05, "loss": 0.5016, "step": 16043 }, { "epoch": 20.53632, "grad_norm": 1.1228110790252686, "learning_rate": 1.792717086834734e-05, "loss": 0.5231, "step": 16044 }, { "epoch": 20.5376, "grad_norm": 1.127894639968872, "learning_rate": 1.7925170068027212e-05, "loss": 0.5069, "step": 16045 }, { "epoch": 20.53888, "grad_norm": 1.0892380475997925, "learning_rate": 1.7923169267707084e-05, "loss": 0.5249, "step": 16046 }, { "epoch": 20.54016, "grad_norm": 1.0959818363189697, "learning_rate": 1.7921168467386956e-05, "loss": 0.511, "step": 16047 }, { "epoch": 20.54144, "grad_norm": 1.0303337574005127, "learning_rate": 1.7919167667066828e-05, "loss": 0.4818, "step": 16048 }, { "epoch": 20.54272, "grad_norm": 1.0872350931167603, "learning_rate": 1.79171668667467e-05, "loss": 0.4872, "step": 16049 }, { "epoch": 20.544, "grad_norm": 1.132896065711975, "learning_rate": 1.791516606642657e-05, "loss": 0.5216, "step": 16050 }, { "epoch": 20.545279999999998, "grad_norm": 1.0700019598007202, "learning_rate": 1.7913165266106443e-05, "loss": 0.4918, "step": 16051 }, { "epoch": 20.54656, "grad_norm": 1.0675747394561768, "learning_rate": 1.7911164465786315e-05, "loss": 0.4883, "step": 16052 }, { "epoch": 20.54784, "grad_norm": 1.1021859645843506, "learning_rate": 1.7909163665466187e-05, "loss": 0.4742, "step": 16053 }, { "epoch": 20.54912, "grad_norm": 1.0696967840194702, "learning_rate": 1.790716286514606e-05, "loss": 0.4873, "step": 16054 }, { "epoch": 20.5504, "grad_norm": 1.0992937088012695, "learning_rate": 1.790516206482593e-05, "loss": 0.5003, "step": 16055 }, { "epoch": 20.55168, "grad_norm": 1.0972262620925903, "learning_rate": 1.7903161264505802e-05, "loss": 0.562, "step": 16056 }, { "epoch": 20.55296, "grad_norm": 1.1256171464920044, "learning_rate": 1.7901160464185678e-05, "loss": 0.4714, "step": 16057 }, { "epoch": 20.55424, "grad_norm": 1.1551443338394165, "learning_rate": 1.7899159663865546e-05, "loss": 0.5156, "step": 16058 }, { "epoch": 20.55552, "grad_norm": 1.0866180658340454, "learning_rate": 1.7897158863545418e-05, "loss": 0.4726, "step": 16059 }, { "epoch": 20.5568, "grad_norm": 1.1982043981552124, "learning_rate": 1.789515806322529e-05, "loss": 0.5541, "step": 16060 }, { "epoch": 20.55808, "grad_norm": 1.107646107673645, "learning_rate": 1.7893157262905165e-05, "loss": 0.5068, "step": 16061 }, { "epoch": 20.55936, "grad_norm": 1.1011683940887451, "learning_rate": 1.7891156462585034e-05, "loss": 0.5374, "step": 16062 }, { "epoch": 20.56064, "grad_norm": 1.1166385412216187, "learning_rate": 1.7889155662264905e-05, "loss": 0.4849, "step": 16063 }, { "epoch": 20.56192, "grad_norm": 1.1044502258300781, "learning_rate": 1.788715486194478e-05, "loss": 0.4947, "step": 16064 }, { "epoch": 20.5632, "grad_norm": 1.126716136932373, "learning_rate": 1.7885154061624652e-05, "loss": 0.5107, "step": 16065 }, { "epoch": 20.56448, "grad_norm": 1.119673252105713, "learning_rate": 1.788315326130452e-05, "loss": 0.5369, "step": 16066 }, { "epoch": 20.56576, "grad_norm": 1.074339747428894, "learning_rate": 1.7881152460984393e-05, "loss": 0.4935, "step": 16067 }, { "epoch": 20.56704, "grad_norm": 1.1229872703552246, "learning_rate": 1.7879151660664268e-05, "loss": 0.5122, "step": 16068 }, { "epoch": 20.56832, "grad_norm": 1.1117804050445557, "learning_rate": 1.787715086034414e-05, "loss": 0.4864, "step": 16069 }, { "epoch": 20.5696, "grad_norm": 1.0915405750274658, "learning_rate": 1.787515006002401e-05, "loss": 0.5351, "step": 16070 }, { "epoch": 20.57088, "grad_norm": 1.1257494688034058, "learning_rate": 1.7873149259703884e-05, "loss": 0.4874, "step": 16071 }, { "epoch": 20.57216, "grad_norm": 1.1443331241607666, "learning_rate": 1.7871148459383755e-05, "loss": 0.4771, "step": 16072 }, { "epoch": 20.57344, "grad_norm": 1.1253244876861572, "learning_rate": 1.7869147659063627e-05, "loss": 0.492, "step": 16073 }, { "epoch": 20.57472, "grad_norm": 1.1300221681594849, "learning_rate": 1.7867146858743496e-05, "loss": 0.5306, "step": 16074 }, { "epoch": 20.576, "grad_norm": 1.1364995241165161, "learning_rate": 1.786514605842337e-05, "loss": 0.5069, "step": 16075 }, { "epoch": 20.577280000000002, "grad_norm": 1.0387669801712036, "learning_rate": 1.7863145258103243e-05, "loss": 0.4726, "step": 16076 }, { "epoch": 20.57856, "grad_norm": 1.049291968345642, "learning_rate": 1.7861144457783115e-05, "loss": 0.4852, "step": 16077 }, { "epoch": 20.57984, "grad_norm": 1.0444282293319702, "learning_rate": 1.7859143657462987e-05, "loss": 0.4718, "step": 16078 }, { "epoch": 20.58112, "grad_norm": 1.095975399017334, "learning_rate": 1.785714285714286e-05, "loss": 0.5015, "step": 16079 }, { "epoch": 20.5824, "grad_norm": 1.104600191116333, "learning_rate": 1.785514205682273e-05, "loss": 0.5515, "step": 16080 }, { "epoch": 20.58368, "grad_norm": 1.0863333940505981, "learning_rate": 1.7853141256502602e-05, "loss": 0.453, "step": 16081 }, { "epoch": 20.58496, "grad_norm": 1.1413263082504272, "learning_rate": 1.7851140456182474e-05, "loss": 0.5646, "step": 16082 }, { "epoch": 20.58624, "grad_norm": 1.149473786354065, "learning_rate": 1.7849139655862346e-05, "loss": 0.4853, "step": 16083 }, { "epoch": 20.58752, "grad_norm": 1.069388747215271, "learning_rate": 1.7847138855542218e-05, "loss": 0.4969, "step": 16084 }, { "epoch": 20.5888, "grad_norm": 1.156481385231018, "learning_rate": 1.784513805522209e-05, "loss": 0.5106, "step": 16085 }, { "epoch": 20.59008, "grad_norm": 1.0707303285598755, "learning_rate": 1.784313725490196e-05, "loss": 0.467, "step": 16086 }, { "epoch": 20.59136, "grad_norm": 1.1706613302230835, "learning_rate": 1.7841136454581833e-05, "loss": 0.5354, "step": 16087 }, { "epoch": 20.59264, "grad_norm": 1.0919955968856812, "learning_rate": 1.7839135654261705e-05, "loss": 0.4906, "step": 16088 }, { "epoch": 20.59392, "grad_norm": 1.0173168182373047, "learning_rate": 1.7837134853941577e-05, "loss": 0.4586, "step": 16089 }, { "epoch": 20.5952, "grad_norm": 1.1111178398132324, "learning_rate": 1.783513405362145e-05, "loss": 0.5209, "step": 16090 }, { "epoch": 20.59648, "grad_norm": 1.0708633661270142, "learning_rate": 1.783313325330132e-05, "loss": 0.4752, "step": 16091 }, { "epoch": 20.59776, "grad_norm": 1.0484986305236816, "learning_rate": 1.7831132452981196e-05, "loss": 0.491, "step": 16092 }, { "epoch": 20.59904, "grad_norm": 1.205859899520874, "learning_rate": 1.7829131652661064e-05, "loss": 0.4858, "step": 16093 }, { "epoch": 20.60032, "grad_norm": 1.1603915691375732, "learning_rate": 1.7827130852340936e-05, "loss": 0.4818, "step": 16094 }, { "epoch": 20.6016, "grad_norm": 1.0573688745498657, "learning_rate": 1.7825130052020808e-05, "loss": 0.4992, "step": 16095 }, { "epoch": 20.60288, "grad_norm": 1.1221954822540283, "learning_rate": 1.7823129251700683e-05, "loss": 0.4625, "step": 16096 }, { "epoch": 20.60416, "grad_norm": 1.1069375276565552, "learning_rate": 1.7821128451380552e-05, "loss": 0.502, "step": 16097 }, { "epoch": 20.60544, "grad_norm": 1.0597623586654663, "learning_rate": 1.7819127651060424e-05, "loss": 0.4757, "step": 16098 }, { "epoch": 20.60672, "grad_norm": 1.1148048639297485, "learning_rate": 1.78171268507403e-05, "loss": 0.521, "step": 16099 }, { "epoch": 20.608, "grad_norm": 1.1164958477020264, "learning_rate": 1.781512605042017e-05, "loss": 0.4964, "step": 16100 }, { "epoch": 20.60928, "grad_norm": 1.063112735748291, "learning_rate": 1.781312525010004e-05, "loss": 0.51, "step": 16101 }, { "epoch": 20.61056, "grad_norm": 1.1305928230285645, "learning_rate": 1.781112444977991e-05, "loss": 0.5106, "step": 16102 }, { "epoch": 20.61184, "grad_norm": 1.0931042432785034, "learning_rate": 1.7809123649459786e-05, "loss": 0.482, "step": 16103 }, { "epoch": 20.61312, "grad_norm": 1.0625600814819336, "learning_rate": 1.7807122849139658e-05, "loss": 0.4919, "step": 16104 }, { "epoch": 20.6144, "grad_norm": 1.0991266965866089, "learning_rate": 1.7805122048819527e-05, "loss": 0.481, "step": 16105 }, { "epoch": 20.61568, "grad_norm": 1.0887682437896729, "learning_rate": 1.7803121248499402e-05, "loss": 0.5264, "step": 16106 }, { "epoch": 20.61696, "grad_norm": 1.150978446006775, "learning_rate": 1.7801120448179274e-05, "loss": 0.5575, "step": 16107 }, { "epoch": 20.61824, "grad_norm": 1.1687663793563843, "learning_rate": 1.7799119647859146e-05, "loss": 0.4825, "step": 16108 }, { "epoch": 20.61952, "grad_norm": 1.1047236919403076, "learning_rate": 1.7797118847539014e-05, "loss": 0.513, "step": 16109 }, { "epoch": 20.6208, "grad_norm": 1.1841906309127808, "learning_rate": 1.779511804721889e-05, "loss": 0.448, "step": 16110 }, { "epoch": 20.62208, "grad_norm": 1.0986437797546387, "learning_rate": 1.779311724689876e-05, "loss": 0.5195, "step": 16111 }, { "epoch": 20.62336, "grad_norm": 1.110182762145996, "learning_rate": 1.7791116446578633e-05, "loss": 0.5159, "step": 16112 }, { "epoch": 20.62464, "grad_norm": 1.0949082374572754, "learning_rate": 1.77891156462585e-05, "loss": 0.5092, "step": 16113 }, { "epoch": 20.62592, "grad_norm": 1.1522455215454102, "learning_rate": 1.7787114845938377e-05, "loss": 0.5514, "step": 16114 }, { "epoch": 20.6272, "grad_norm": 1.0845671892166138, "learning_rate": 1.778511404561825e-05, "loss": 0.4713, "step": 16115 }, { "epoch": 20.62848, "grad_norm": 1.1189041137695312, "learning_rate": 1.778311324529812e-05, "loss": 0.5218, "step": 16116 }, { "epoch": 20.62976, "grad_norm": 1.1408662796020508, "learning_rate": 1.7781112444977992e-05, "loss": 0.531, "step": 16117 }, { "epoch": 20.63104, "grad_norm": 1.071902871131897, "learning_rate": 1.7779111644657864e-05, "loss": 0.47, "step": 16118 }, { "epoch": 20.63232, "grad_norm": 1.0264390707015991, "learning_rate": 1.7777110844337736e-05, "loss": 0.4506, "step": 16119 }, { "epoch": 20.6336, "grad_norm": 1.073315143585205, "learning_rate": 1.7775110044017608e-05, "loss": 0.4636, "step": 16120 }, { "epoch": 20.63488, "grad_norm": 1.1272789239883423, "learning_rate": 1.777310924369748e-05, "loss": 0.5256, "step": 16121 }, { "epoch": 20.63616, "grad_norm": 1.1192529201507568, "learning_rate": 1.7771108443377352e-05, "loss": 0.5349, "step": 16122 }, { "epoch": 20.63744, "grad_norm": 1.0640686750411987, "learning_rate": 1.7769107643057224e-05, "loss": 0.4645, "step": 16123 }, { "epoch": 20.63872, "grad_norm": 1.095218539237976, "learning_rate": 1.7767106842737095e-05, "loss": 0.5333, "step": 16124 }, { "epoch": 20.64, "grad_norm": 1.1425113677978516, "learning_rate": 1.7765106042416967e-05, "loss": 0.5289, "step": 16125 }, { "epoch": 20.64128, "grad_norm": 1.095801830291748, "learning_rate": 1.776310524209684e-05, "loss": 0.496, "step": 16126 }, { "epoch": 20.64256, "grad_norm": 1.1665611267089844, "learning_rate": 1.776110444177671e-05, "loss": 0.5176, "step": 16127 }, { "epoch": 20.64384, "grad_norm": 1.109303593635559, "learning_rate": 1.7759103641456583e-05, "loss": 0.5234, "step": 16128 }, { "epoch": 20.64512, "grad_norm": 1.0539172887802124, "learning_rate": 1.7757102841136455e-05, "loss": 0.4982, "step": 16129 }, { "epoch": 20.6464, "grad_norm": 1.068564772605896, "learning_rate": 1.7755102040816327e-05, "loss": 0.5223, "step": 16130 }, { "epoch": 20.64768, "grad_norm": 1.0728812217712402, "learning_rate": 1.7753101240496202e-05, "loss": 0.4921, "step": 16131 }, { "epoch": 20.64896, "grad_norm": 1.1256548166275024, "learning_rate": 1.775110044017607e-05, "loss": 0.5624, "step": 16132 }, { "epoch": 20.65024, "grad_norm": 1.0447193384170532, "learning_rate": 1.7749099639855942e-05, "loss": 0.5047, "step": 16133 }, { "epoch": 20.65152, "grad_norm": 1.0693058967590332, "learning_rate": 1.7747098839535814e-05, "loss": 0.4991, "step": 16134 }, { "epoch": 20.6528, "grad_norm": 1.0980228185653687, "learning_rate": 1.774509803921569e-05, "loss": 0.5151, "step": 16135 }, { "epoch": 20.65408, "grad_norm": 1.066780924797058, "learning_rate": 1.7743097238895558e-05, "loss": 0.4867, "step": 16136 }, { "epoch": 20.65536, "grad_norm": 1.225781798362732, "learning_rate": 1.774109643857543e-05, "loss": 0.5333, "step": 16137 }, { "epoch": 20.65664, "grad_norm": 1.0326478481292725, "learning_rate": 1.7739095638255305e-05, "loss": 0.4637, "step": 16138 }, { "epoch": 20.65792, "grad_norm": 1.1340293884277344, "learning_rate": 1.7737094837935177e-05, "loss": 0.5045, "step": 16139 }, { "epoch": 20.6592, "grad_norm": 1.1338059902191162, "learning_rate": 1.7735094037615045e-05, "loss": 0.5238, "step": 16140 }, { "epoch": 20.66048, "grad_norm": 1.1640844345092773, "learning_rate": 1.7733093237294917e-05, "loss": 0.5039, "step": 16141 }, { "epoch": 20.66176, "grad_norm": 1.0547772645950317, "learning_rate": 1.7731092436974792e-05, "loss": 0.4398, "step": 16142 }, { "epoch": 20.66304, "grad_norm": 1.1220600605010986, "learning_rate": 1.7729091636654664e-05, "loss": 0.5248, "step": 16143 }, { "epoch": 20.66432, "grad_norm": 1.0587539672851562, "learning_rate": 1.7727090836334533e-05, "loss": 0.4715, "step": 16144 }, { "epoch": 20.6656, "grad_norm": 1.1665568351745605, "learning_rate": 1.7725090036014408e-05, "loss": 0.5386, "step": 16145 }, { "epoch": 20.66688, "grad_norm": 1.0843427181243896, "learning_rate": 1.772308923569428e-05, "loss": 0.5055, "step": 16146 }, { "epoch": 20.66816, "grad_norm": 1.0697423219680786, "learning_rate": 1.772108843537415e-05, "loss": 0.5219, "step": 16147 }, { "epoch": 20.66944, "grad_norm": 1.1367213726043701, "learning_rate": 1.771908763505402e-05, "loss": 0.5201, "step": 16148 }, { "epoch": 20.67072, "grad_norm": 1.1785510778427124, "learning_rate": 1.7717086834733895e-05, "loss": 0.4896, "step": 16149 }, { "epoch": 20.672, "grad_norm": 1.1276311874389648, "learning_rate": 1.7715086034413767e-05, "loss": 0.4954, "step": 16150 }, { "epoch": 20.67328, "grad_norm": 1.1062501668930054, "learning_rate": 1.771308523409364e-05, "loss": 0.4945, "step": 16151 }, { "epoch": 20.67456, "grad_norm": 1.0566152334213257, "learning_rate": 1.771108443377351e-05, "loss": 0.471, "step": 16152 }, { "epoch": 20.67584, "grad_norm": 1.0588206052780151, "learning_rate": 1.7709083633453383e-05, "loss": 0.5359, "step": 16153 }, { "epoch": 20.67712, "grad_norm": 1.1501905918121338, "learning_rate": 1.7707082833133255e-05, "loss": 0.4935, "step": 16154 }, { "epoch": 20.6784, "grad_norm": 1.0157536268234253, "learning_rate": 1.7705082032813126e-05, "loss": 0.4498, "step": 16155 }, { "epoch": 20.67968, "grad_norm": 1.1221078634262085, "learning_rate": 1.7703081232492998e-05, "loss": 0.4926, "step": 16156 }, { "epoch": 20.68096, "grad_norm": 1.0495266914367676, "learning_rate": 1.770108043217287e-05, "loss": 0.471, "step": 16157 }, { "epoch": 20.68224, "grad_norm": 1.1005525588989258, "learning_rate": 1.7699079631852742e-05, "loss": 0.499, "step": 16158 }, { "epoch": 20.68352, "grad_norm": 1.0935386419296265, "learning_rate": 1.7697078831532614e-05, "loss": 0.4897, "step": 16159 }, { "epoch": 20.6848, "grad_norm": 1.0938061475753784, "learning_rate": 1.7695078031212486e-05, "loss": 0.4632, "step": 16160 }, { "epoch": 20.68608, "grad_norm": 1.104549765586853, "learning_rate": 1.7693077230892358e-05, "loss": 0.4732, "step": 16161 }, { "epoch": 20.687359999999998, "grad_norm": 1.0983625650405884, "learning_rate": 1.769107643057223e-05, "loss": 0.4978, "step": 16162 }, { "epoch": 20.68864, "grad_norm": 1.1382209062576294, "learning_rate": 1.76890756302521e-05, "loss": 0.5318, "step": 16163 }, { "epoch": 20.68992, "grad_norm": 1.1396292448043823, "learning_rate": 1.7687074829931973e-05, "loss": 0.5042, "step": 16164 }, { "epoch": 20.6912, "grad_norm": 1.1234673261642456, "learning_rate": 1.7685074029611845e-05, "loss": 0.5071, "step": 16165 }, { "epoch": 20.69248, "grad_norm": 1.0681381225585938, "learning_rate": 1.7683073229291717e-05, "loss": 0.5042, "step": 16166 }, { "epoch": 20.69376, "grad_norm": 1.0953648090362549, "learning_rate": 1.768107242897159e-05, "loss": 0.4874, "step": 16167 }, { "epoch": 20.69504, "grad_norm": 1.1817059516906738, "learning_rate": 1.767907162865146e-05, "loss": 0.5278, "step": 16168 }, { "epoch": 20.69632, "grad_norm": 1.130669116973877, "learning_rate": 1.7677070828331332e-05, "loss": 0.5081, "step": 16169 }, { "epoch": 20.6976, "grad_norm": 1.0833494663238525, "learning_rate": 1.7675070028011208e-05, "loss": 0.4866, "step": 16170 }, { "epoch": 20.69888, "grad_norm": 1.0703330039978027, "learning_rate": 1.7673069227691076e-05, "loss": 0.4909, "step": 16171 }, { "epoch": 20.70016, "grad_norm": 1.13280189037323, "learning_rate": 1.7671068427370948e-05, "loss": 0.5435, "step": 16172 }, { "epoch": 20.70144, "grad_norm": 1.023118495941162, "learning_rate": 1.766906762705082e-05, "loss": 0.4848, "step": 16173 }, { "epoch": 20.70272, "grad_norm": 1.0794446468353271, "learning_rate": 1.7667066826730695e-05, "loss": 0.5403, "step": 16174 }, { "epoch": 20.704, "grad_norm": 1.007285237312317, "learning_rate": 1.7665066026410564e-05, "loss": 0.4658, "step": 16175 }, { "epoch": 20.70528, "grad_norm": 1.0927997827529907, "learning_rate": 1.7663065226090435e-05, "loss": 0.5081, "step": 16176 }, { "epoch": 20.70656, "grad_norm": 1.104650855064392, "learning_rate": 1.766106442577031e-05, "loss": 0.4623, "step": 16177 }, { "epoch": 20.70784, "grad_norm": 1.0667095184326172, "learning_rate": 1.7659063625450182e-05, "loss": 0.4841, "step": 16178 }, { "epoch": 20.70912, "grad_norm": 1.0377103090286255, "learning_rate": 1.765706282513005e-05, "loss": 0.4809, "step": 16179 }, { "epoch": 20.7104, "grad_norm": 1.1214765310287476, "learning_rate": 1.7655062024809923e-05, "loss": 0.4913, "step": 16180 }, { "epoch": 20.71168, "grad_norm": 1.079932689666748, "learning_rate": 1.7653061224489798e-05, "loss": 0.4694, "step": 16181 }, { "epoch": 20.71296, "grad_norm": 1.179184913635254, "learning_rate": 1.765106042416967e-05, "loss": 0.4864, "step": 16182 }, { "epoch": 20.71424, "grad_norm": 1.0123862028121948, "learning_rate": 1.764905962384954e-05, "loss": 0.4597, "step": 16183 }, { "epoch": 20.71552, "grad_norm": 1.1281754970550537, "learning_rate": 1.7647058823529414e-05, "loss": 0.4937, "step": 16184 }, { "epoch": 20.7168, "grad_norm": 1.0788217782974243, "learning_rate": 1.7645058023209285e-05, "loss": 0.4898, "step": 16185 }, { "epoch": 20.71808, "grad_norm": 1.1888701915740967, "learning_rate": 1.7643057222889157e-05, "loss": 0.4891, "step": 16186 }, { "epoch": 20.71936, "grad_norm": 1.1022487878799438, "learning_rate": 1.7641056422569026e-05, "loss": 0.5248, "step": 16187 }, { "epoch": 20.72064, "grad_norm": 1.0947836637496948, "learning_rate": 1.76390556222489e-05, "loss": 0.4841, "step": 16188 }, { "epoch": 20.72192, "grad_norm": 1.101966142654419, "learning_rate": 1.7637054821928773e-05, "loss": 0.5178, "step": 16189 }, { "epoch": 20.7232, "grad_norm": 1.065540075302124, "learning_rate": 1.7635054021608645e-05, "loss": 0.452, "step": 16190 }, { "epoch": 20.72448, "grad_norm": 1.024093508720398, "learning_rate": 1.7633053221288517e-05, "loss": 0.4519, "step": 16191 }, { "epoch": 20.72576, "grad_norm": 1.0961694717407227, "learning_rate": 1.763105242096839e-05, "loss": 0.5014, "step": 16192 }, { "epoch": 20.72704, "grad_norm": 1.0572614669799805, "learning_rate": 1.762905162064826e-05, "loss": 0.4712, "step": 16193 }, { "epoch": 20.72832, "grad_norm": 1.2695916891098022, "learning_rate": 1.7627050820328132e-05, "loss": 0.5001, "step": 16194 }, { "epoch": 20.7296, "grad_norm": 1.0590322017669678, "learning_rate": 1.7625050020008004e-05, "loss": 0.4618, "step": 16195 }, { "epoch": 20.73088, "grad_norm": 1.0804553031921387, "learning_rate": 1.7623049219687876e-05, "loss": 0.4603, "step": 16196 }, { "epoch": 20.73216, "grad_norm": 1.037230372428894, "learning_rate": 1.7621048419367748e-05, "loss": 0.4547, "step": 16197 }, { "epoch": 20.73344, "grad_norm": 1.1001992225646973, "learning_rate": 1.761904761904762e-05, "loss": 0.4539, "step": 16198 }, { "epoch": 20.73472, "grad_norm": 1.0365502834320068, "learning_rate": 1.761704681872749e-05, "loss": 0.4636, "step": 16199 }, { "epoch": 20.736, "grad_norm": 1.0428606271743774, "learning_rate": 1.7615046018407363e-05, "loss": 0.4721, "step": 16200 }, { "epoch": 20.73728, "grad_norm": 1.1415473222732544, "learning_rate": 1.7613045218087235e-05, "loss": 0.4861, "step": 16201 }, { "epoch": 20.73856, "grad_norm": 1.120957612991333, "learning_rate": 1.7611044417767107e-05, "loss": 0.4992, "step": 16202 }, { "epoch": 20.73984, "grad_norm": 1.0169925689697266, "learning_rate": 1.760904361744698e-05, "loss": 0.4415, "step": 16203 }, { "epoch": 20.74112, "grad_norm": 1.1539958715438843, "learning_rate": 1.760704281712685e-05, "loss": 0.539, "step": 16204 }, { "epoch": 20.7424, "grad_norm": 1.0903569459915161, "learning_rate": 1.7605042016806726e-05, "loss": 0.4769, "step": 16205 }, { "epoch": 20.74368, "grad_norm": 1.1013376712799072, "learning_rate": 1.7603041216486594e-05, "loss": 0.4809, "step": 16206 }, { "epoch": 20.74496, "grad_norm": 1.1020969152450562, "learning_rate": 1.7601040416166466e-05, "loss": 0.4534, "step": 16207 }, { "epoch": 20.74624, "grad_norm": 1.0721830129623413, "learning_rate": 1.7599039615846338e-05, "loss": 0.4651, "step": 16208 }, { "epoch": 20.74752, "grad_norm": 1.0913660526275635, "learning_rate": 1.7597038815526213e-05, "loss": 0.482, "step": 16209 }, { "epoch": 20.7488, "grad_norm": 1.0793043375015259, "learning_rate": 1.7595038015206082e-05, "loss": 0.4536, "step": 16210 }, { "epoch": 20.75008, "grad_norm": 1.0972751379013062, "learning_rate": 1.7593037214885954e-05, "loss": 0.5117, "step": 16211 }, { "epoch": 20.75136, "grad_norm": 1.1126095056533813, "learning_rate": 1.759103641456583e-05, "loss": 0.5348, "step": 16212 }, { "epoch": 20.75264, "grad_norm": 1.1272165775299072, "learning_rate": 1.75890356142457e-05, "loss": 0.515, "step": 16213 }, { "epoch": 20.75392, "grad_norm": 1.1290925741195679, "learning_rate": 1.758703481392557e-05, "loss": 0.5332, "step": 16214 }, { "epoch": 20.7552, "grad_norm": 1.0821077823638916, "learning_rate": 1.758503401360544e-05, "loss": 0.5224, "step": 16215 }, { "epoch": 20.75648, "grad_norm": 1.0680049657821655, "learning_rate": 1.7583033213285316e-05, "loss": 0.5149, "step": 16216 }, { "epoch": 20.75776, "grad_norm": 1.059745192527771, "learning_rate": 1.7581032412965188e-05, "loss": 0.5058, "step": 16217 }, { "epoch": 20.75904, "grad_norm": 1.0059226751327515, "learning_rate": 1.7579031612645057e-05, "loss": 0.5011, "step": 16218 }, { "epoch": 20.76032, "grad_norm": 1.1066895723342896, "learning_rate": 1.757703081232493e-05, "loss": 0.5139, "step": 16219 }, { "epoch": 20.7616, "grad_norm": 1.0385751724243164, "learning_rate": 1.7575030012004804e-05, "loss": 0.4992, "step": 16220 }, { "epoch": 20.76288, "grad_norm": 1.0588712692260742, "learning_rate": 1.7573029211684676e-05, "loss": 0.4881, "step": 16221 }, { "epoch": 20.76416, "grad_norm": 1.1335150003433228, "learning_rate": 1.7571028411364544e-05, "loss": 0.5135, "step": 16222 }, { "epoch": 20.76544, "grad_norm": 1.06964111328125, "learning_rate": 1.756902761104442e-05, "loss": 0.474, "step": 16223 }, { "epoch": 20.76672, "grad_norm": 1.0755014419555664, "learning_rate": 1.756702681072429e-05, "loss": 0.5125, "step": 16224 }, { "epoch": 20.768, "grad_norm": 1.129836082458496, "learning_rate": 1.7565026010404163e-05, "loss": 0.4916, "step": 16225 }, { "epoch": 20.76928, "grad_norm": 1.106661319732666, "learning_rate": 1.756302521008403e-05, "loss": 0.5316, "step": 16226 }, { "epoch": 20.77056, "grad_norm": 1.123422384262085, "learning_rate": 1.7561024409763907e-05, "loss": 0.5241, "step": 16227 }, { "epoch": 20.77184, "grad_norm": 1.0029109716415405, "learning_rate": 1.755902360944378e-05, "loss": 0.4284, "step": 16228 }, { "epoch": 20.77312, "grad_norm": 1.0901793241500854, "learning_rate": 1.755702280912365e-05, "loss": 0.4894, "step": 16229 }, { "epoch": 20.7744, "grad_norm": 1.003308653831482, "learning_rate": 1.7555022008803522e-05, "loss": 0.4551, "step": 16230 }, { "epoch": 20.77568, "grad_norm": 1.1484678983688354, "learning_rate": 1.7553021208483394e-05, "loss": 0.4894, "step": 16231 }, { "epoch": 20.77696, "grad_norm": 1.0901198387145996, "learning_rate": 1.7551020408163266e-05, "loss": 0.4932, "step": 16232 }, { "epoch": 20.77824, "grad_norm": 1.1623092889785767, "learning_rate": 1.7549019607843138e-05, "loss": 0.5136, "step": 16233 }, { "epoch": 20.77952, "grad_norm": 1.0638892650604248, "learning_rate": 1.754701880752301e-05, "loss": 0.4726, "step": 16234 }, { "epoch": 20.7808, "grad_norm": 1.0341891050338745, "learning_rate": 1.754501800720288e-05, "loss": 0.4597, "step": 16235 }, { "epoch": 20.78208, "grad_norm": 1.0782972574234009, "learning_rate": 1.7543017206882754e-05, "loss": 0.5077, "step": 16236 }, { "epoch": 20.78336, "grad_norm": 1.1353981494903564, "learning_rate": 1.7541016406562625e-05, "loss": 0.518, "step": 16237 }, { "epoch": 20.78464, "grad_norm": 1.0707870721817017, "learning_rate": 1.7539015606242497e-05, "loss": 0.5071, "step": 16238 }, { "epoch": 20.78592, "grad_norm": 1.0623449087142944, "learning_rate": 1.753701480592237e-05, "loss": 0.4881, "step": 16239 }, { "epoch": 20.7872, "grad_norm": 1.0724384784698486, "learning_rate": 1.753501400560224e-05, "loss": 0.5197, "step": 16240 }, { "epoch": 20.78848, "grad_norm": 1.1770350933074951, "learning_rate": 1.7533013205282113e-05, "loss": 0.5326, "step": 16241 }, { "epoch": 20.78976, "grad_norm": 1.1036503314971924, "learning_rate": 1.7531012404961985e-05, "loss": 0.5241, "step": 16242 }, { "epoch": 20.79104, "grad_norm": 1.0659743547439575, "learning_rate": 1.7529011604641857e-05, "loss": 0.5047, "step": 16243 }, { "epoch": 20.79232, "grad_norm": 1.0868451595306396, "learning_rate": 1.7527010804321732e-05, "loss": 0.4819, "step": 16244 }, { "epoch": 20.7936, "grad_norm": 1.1210837364196777, "learning_rate": 1.75250100040016e-05, "loss": 0.5214, "step": 16245 }, { "epoch": 20.79488, "grad_norm": 1.065202236175537, "learning_rate": 1.7523009203681472e-05, "loss": 0.4913, "step": 16246 }, { "epoch": 20.79616, "grad_norm": 1.1045325994491577, "learning_rate": 1.7521008403361344e-05, "loss": 0.493, "step": 16247 }, { "epoch": 20.79744, "grad_norm": 1.029994249343872, "learning_rate": 1.751900760304122e-05, "loss": 0.4934, "step": 16248 }, { "epoch": 20.79872, "grad_norm": 1.1114221811294556, "learning_rate": 1.7517006802721088e-05, "loss": 0.5183, "step": 16249 }, { "epoch": 20.8, "grad_norm": 1.0240123271942139, "learning_rate": 1.751500600240096e-05, "loss": 0.4572, "step": 16250 }, { "epoch": 20.80128, "grad_norm": 1.0750398635864258, "learning_rate": 1.7513005202080835e-05, "loss": 0.4603, "step": 16251 }, { "epoch": 20.80256, "grad_norm": 1.1186068058013916, "learning_rate": 1.7511004401760707e-05, "loss": 0.4698, "step": 16252 }, { "epoch": 20.80384, "grad_norm": 1.1130450963974, "learning_rate": 1.7509003601440575e-05, "loss": 0.5152, "step": 16253 }, { "epoch": 20.80512, "grad_norm": 1.104473352432251, "learning_rate": 1.7507002801120447e-05, "loss": 0.5139, "step": 16254 }, { "epoch": 20.8064, "grad_norm": 1.1033751964569092, "learning_rate": 1.7505002000800322e-05, "loss": 0.4785, "step": 16255 }, { "epoch": 20.80768, "grad_norm": 1.0060745477676392, "learning_rate": 1.7503001200480194e-05, "loss": 0.4259, "step": 16256 }, { "epoch": 20.80896, "grad_norm": 1.1276196241378784, "learning_rate": 1.7501000400160063e-05, "loss": 0.5602, "step": 16257 }, { "epoch": 20.81024, "grad_norm": 1.1201550960540771, "learning_rate": 1.7498999599839938e-05, "loss": 0.5034, "step": 16258 }, { "epoch": 20.81152, "grad_norm": 1.0943266153335571, "learning_rate": 1.749699879951981e-05, "loss": 0.509, "step": 16259 }, { "epoch": 20.8128, "grad_norm": 1.0522860288619995, "learning_rate": 1.749499799919968e-05, "loss": 0.4813, "step": 16260 }, { "epoch": 20.81408, "grad_norm": 1.0484342575073242, "learning_rate": 1.749299719887955e-05, "loss": 0.4743, "step": 16261 }, { "epoch": 20.81536, "grad_norm": 1.0669806003570557, "learning_rate": 1.7490996398559425e-05, "loss": 0.4943, "step": 16262 }, { "epoch": 20.81664, "grad_norm": 1.063136339187622, "learning_rate": 1.7488995598239297e-05, "loss": 0.4856, "step": 16263 }, { "epoch": 20.81792, "grad_norm": 1.091721534729004, "learning_rate": 1.748699479791917e-05, "loss": 0.481, "step": 16264 }, { "epoch": 20.8192, "grad_norm": 1.0925869941711426, "learning_rate": 1.748499399759904e-05, "loss": 0.5033, "step": 16265 }, { "epoch": 20.82048, "grad_norm": 1.1635428667068481, "learning_rate": 1.7482993197278913e-05, "loss": 0.5144, "step": 16266 }, { "epoch": 20.82176, "grad_norm": 1.0611239671707153, "learning_rate": 1.7480992396958784e-05, "loss": 0.4716, "step": 16267 }, { "epoch": 20.82304, "grad_norm": 1.1461703777313232, "learning_rate": 1.7478991596638656e-05, "loss": 0.5118, "step": 16268 }, { "epoch": 20.82432, "grad_norm": 1.0631418228149414, "learning_rate": 1.7476990796318528e-05, "loss": 0.4622, "step": 16269 }, { "epoch": 20.8256, "grad_norm": 1.0580074787139893, "learning_rate": 1.74749899959984e-05, "loss": 0.5003, "step": 16270 }, { "epoch": 20.82688, "grad_norm": 1.1611815690994263, "learning_rate": 1.7472989195678272e-05, "loss": 0.5646, "step": 16271 }, { "epoch": 20.82816, "grad_norm": 1.1409611701965332, "learning_rate": 1.7470988395358144e-05, "loss": 0.4652, "step": 16272 }, { "epoch": 20.829439999999998, "grad_norm": 1.1084599494934082, "learning_rate": 1.7468987595038016e-05, "loss": 0.5027, "step": 16273 }, { "epoch": 20.83072, "grad_norm": 1.1274982690811157, "learning_rate": 1.7466986794717887e-05, "loss": 0.486, "step": 16274 }, { "epoch": 20.832, "grad_norm": 1.1236320734024048, "learning_rate": 1.746498599439776e-05, "loss": 0.5091, "step": 16275 }, { "epoch": 20.83328, "grad_norm": 1.1362968683242798, "learning_rate": 1.746298519407763e-05, "loss": 0.4902, "step": 16276 }, { "epoch": 20.83456, "grad_norm": 1.092063069343567, "learning_rate": 1.7460984393757503e-05, "loss": 0.5271, "step": 16277 }, { "epoch": 20.83584, "grad_norm": 1.1088082790374756, "learning_rate": 1.7458983593437375e-05, "loss": 0.4834, "step": 16278 }, { "epoch": 20.83712, "grad_norm": 1.081045150756836, "learning_rate": 1.7456982793117247e-05, "loss": 0.4928, "step": 16279 }, { "epoch": 20.8384, "grad_norm": 1.1172006130218506, "learning_rate": 1.745498199279712e-05, "loss": 0.5346, "step": 16280 }, { "epoch": 20.83968, "grad_norm": 1.0738575458526611, "learning_rate": 1.745298119247699e-05, "loss": 0.4861, "step": 16281 }, { "epoch": 20.84096, "grad_norm": 1.0271522998809814, "learning_rate": 1.7450980392156862e-05, "loss": 0.5072, "step": 16282 }, { "epoch": 20.84224, "grad_norm": 1.1056361198425293, "learning_rate": 1.7448979591836738e-05, "loss": 0.5387, "step": 16283 }, { "epoch": 20.84352, "grad_norm": 1.07204270362854, "learning_rate": 1.7446978791516606e-05, "loss": 0.4802, "step": 16284 }, { "epoch": 20.8448, "grad_norm": 1.0948388576507568, "learning_rate": 1.7444977991196478e-05, "loss": 0.4963, "step": 16285 }, { "epoch": 20.84608, "grad_norm": 1.1299151182174683, "learning_rate": 1.744297719087635e-05, "loss": 0.5061, "step": 16286 }, { "epoch": 20.84736, "grad_norm": 1.0689219236373901, "learning_rate": 1.7440976390556225e-05, "loss": 0.4855, "step": 16287 }, { "epoch": 20.84864, "grad_norm": 1.1455039978027344, "learning_rate": 1.7438975590236093e-05, "loss": 0.5598, "step": 16288 }, { "epoch": 20.84992, "grad_norm": 1.1063296794891357, "learning_rate": 1.7436974789915965e-05, "loss": 0.4986, "step": 16289 }, { "epoch": 20.8512, "grad_norm": 1.1937898397445679, "learning_rate": 1.743497398959584e-05, "loss": 0.513, "step": 16290 }, { "epoch": 20.85248, "grad_norm": 1.005609154701233, "learning_rate": 1.7432973189275712e-05, "loss": 0.4549, "step": 16291 }, { "epoch": 20.85376, "grad_norm": 1.0839899778366089, "learning_rate": 1.743097238895558e-05, "loss": 0.4952, "step": 16292 }, { "epoch": 20.85504, "grad_norm": 1.1058770418167114, "learning_rate": 1.7428971588635453e-05, "loss": 0.4729, "step": 16293 }, { "epoch": 20.85632, "grad_norm": 1.0722662210464478, "learning_rate": 1.7426970788315328e-05, "loss": 0.4774, "step": 16294 }, { "epoch": 20.8576, "grad_norm": 1.0828754901885986, "learning_rate": 1.74249699879952e-05, "loss": 0.5036, "step": 16295 }, { "epoch": 20.85888, "grad_norm": 1.0864346027374268, "learning_rate": 1.742296918767507e-05, "loss": 0.5187, "step": 16296 }, { "epoch": 20.86016, "grad_norm": 1.0293843746185303, "learning_rate": 1.7420968387354944e-05, "loss": 0.4526, "step": 16297 }, { "epoch": 20.86144, "grad_norm": 1.0849385261535645, "learning_rate": 1.7418967587034815e-05, "loss": 0.4959, "step": 16298 }, { "epoch": 20.86272, "grad_norm": 1.0619242191314697, "learning_rate": 1.7416966786714687e-05, "loss": 0.504, "step": 16299 }, { "epoch": 20.864, "grad_norm": 1.0861916542053223, "learning_rate": 1.7414965986394556e-05, "loss": 0.4619, "step": 16300 }, { "epoch": 20.86528, "grad_norm": 1.0871127843856812, "learning_rate": 1.741296518607443e-05, "loss": 0.4972, "step": 16301 }, { "epoch": 20.86656, "grad_norm": 1.1219013929367065, "learning_rate": 1.7410964385754303e-05, "loss": 0.4946, "step": 16302 }, { "epoch": 20.86784, "grad_norm": 1.0501264333724976, "learning_rate": 1.7408963585434175e-05, "loss": 0.5039, "step": 16303 }, { "epoch": 20.86912, "grad_norm": 1.1329530477523804, "learning_rate": 1.7406962785114047e-05, "loss": 0.5356, "step": 16304 }, { "epoch": 20.8704, "grad_norm": 1.0762802362442017, "learning_rate": 1.740496198479392e-05, "loss": 0.5171, "step": 16305 }, { "epoch": 20.87168, "grad_norm": 1.0678480863571167, "learning_rate": 1.740296118447379e-05, "loss": 0.4828, "step": 16306 }, { "epoch": 20.87296, "grad_norm": 1.1031467914581299, "learning_rate": 1.7400960384153662e-05, "loss": 0.4672, "step": 16307 }, { "epoch": 20.87424, "grad_norm": 1.0675780773162842, "learning_rate": 1.7398959583833534e-05, "loss": 0.5382, "step": 16308 }, { "epoch": 20.87552, "grad_norm": 1.0954948663711548, "learning_rate": 1.7396958783513406e-05, "loss": 0.5027, "step": 16309 }, { "epoch": 20.8768, "grad_norm": 1.0903716087341309, "learning_rate": 1.7394957983193278e-05, "loss": 0.4796, "step": 16310 }, { "epoch": 20.87808, "grad_norm": 1.147133708000183, "learning_rate": 1.7392957182873153e-05, "loss": 0.5004, "step": 16311 }, { "epoch": 20.87936, "grad_norm": 1.1145248413085938, "learning_rate": 1.739095638255302e-05, "loss": 0.5235, "step": 16312 }, { "epoch": 20.88064, "grad_norm": 1.0918245315551758, "learning_rate": 1.7388955582232893e-05, "loss": 0.4825, "step": 16313 }, { "epoch": 20.88192, "grad_norm": 1.1098552942276, "learning_rate": 1.7386954781912765e-05, "loss": 0.4949, "step": 16314 }, { "epoch": 20.8832, "grad_norm": 1.1097396612167358, "learning_rate": 1.738495398159264e-05, "loss": 0.49, "step": 16315 }, { "epoch": 20.88448, "grad_norm": 1.1164050102233887, "learning_rate": 1.738295318127251e-05, "loss": 0.5204, "step": 16316 }, { "epoch": 20.88576, "grad_norm": 1.1462451219558716, "learning_rate": 1.738095238095238e-05, "loss": 0.5449, "step": 16317 }, { "epoch": 20.88704, "grad_norm": 1.052255392074585, "learning_rate": 1.7378951580632256e-05, "loss": 0.481, "step": 16318 }, { "epoch": 20.88832, "grad_norm": 1.0906624794006348, "learning_rate": 1.7376950780312128e-05, "loss": 0.4848, "step": 16319 }, { "epoch": 20.8896, "grad_norm": 1.0164730548858643, "learning_rate": 1.7374949979991996e-05, "loss": 0.4646, "step": 16320 }, { "epoch": 20.89088, "grad_norm": 1.1174423694610596, "learning_rate": 1.7372949179671868e-05, "loss": 0.5213, "step": 16321 }, { "epoch": 20.89216, "grad_norm": 1.0460309982299805, "learning_rate": 1.7370948379351743e-05, "loss": 0.5021, "step": 16322 }, { "epoch": 20.89344, "grad_norm": 1.0785523653030396, "learning_rate": 1.7368947579031615e-05, "loss": 0.4775, "step": 16323 }, { "epoch": 20.89472, "grad_norm": 1.173263430595398, "learning_rate": 1.7366946778711484e-05, "loss": 0.5126, "step": 16324 }, { "epoch": 20.896, "grad_norm": 1.0594722032546997, "learning_rate": 1.736494597839136e-05, "loss": 0.4804, "step": 16325 }, { "epoch": 20.89728, "grad_norm": 1.1119956970214844, "learning_rate": 1.736294517807123e-05, "loss": 0.4952, "step": 16326 }, { "epoch": 20.89856, "grad_norm": 1.0069000720977783, "learning_rate": 1.7360944377751103e-05, "loss": 0.4821, "step": 16327 }, { "epoch": 20.89984, "grad_norm": 1.118168830871582, "learning_rate": 1.735894357743097e-05, "loss": 0.513, "step": 16328 }, { "epoch": 20.90112, "grad_norm": 1.0356378555297852, "learning_rate": 1.7356942777110846e-05, "loss": 0.435, "step": 16329 }, { "epoch": 20.9024, "grad_norm": 1.0259177684783936, "learning_rate": 1.7354941976790718e-05, "loss": 0.4688, "step": 16330 }, { "epoch": 20.90368, "grad_norm": 1.057941198348999, "learning_rate": 1.735294117647059e-05, "loss": 0.4662, "step": 16331 }, { "epoch": 20.90496, "grad_norm": 1.1319576501846313, "learning_rate": 1.735094037615046e-05, "loss": 0.5064, "step": 16332 }, { "epoch": 20.90624, "grad_norm": 1.1116087436676025, "learning_rate": 1.7348939575830334e-05, "loss": 0.5034, "step": 16333 }, { "epoch": 20.90752, "grad_norm": 1.1020821332931519, "learning_rate": 1.7346938775510206e-05, "loss": 0.509, "step": 16334 }, { "epoch": 20.9088, "grad_norm": 1.024755835533142, "learning_rate": 1.7344937975190078e-05, "loss": 0.4732, "step": 16335 }, { "epoch": 20.91008, "grad_norm": 1.1245591640472412, "learning_rate": 1.734293717486995e-05, "loss": 0.5083, "step": 16336 }, { "epoch": 20.91136, "grad_norm": 1.0561660528182983, "learning_rate": 1.734093637454982e-05, "loss": 0.4551, "step": 16337 }, { "epoch": 20.91264, "grad_norm": 1.017407774925232, "learning_rate": 1.7338935574229693e-05, "loss": 0.4703, "step": 16338 }, { "epoch": 20.91392, "grad_norm": 1.145080804824829, "learning_rate": 1.7336934773909565e-05, "loss": 0.5039, "step": 16339 }, { "epoch": 20.9152, "grad_norm": 1.074860692024231, "learning_rate": 1.7334933973589437e-05, "loss": 0.4906, "step": 16340 }, { "epoch": 20.91648, "grad_norm": 1.1369560956954956, "learning_rate": 1.733293317326931e-05, "loss": 0.5045, "step": 16341 }, { "epoch": 20.91776, "grad_norm": 1.0858222246170044, "learning_rate": 1.733093237294918e-05, "loss": 0.4916, "step": 16342 }, { "epoch": 20.91904, "grad_norm": 1.1016911268234253, "learning_rate": 1.7328931572629052e-05, "loss": 0.521, "step": 16343 }, { "epoch": 20.92032, "grad_norm": 1.0700989961624146, "learning_rate": 1.7326930772308924e-05, "loss": 0.4737, "step": 16344 }, { "epoch": 20.9216, "grad_norm": 1.066236972808838, "learning_rate": 1.7324929971988796e-05, "loss": 0.4873, "step": 16345 }, { "epoch": 20.92288, "grad_norm": 1.1762385368347168, "learning_rate": 1.7322929171668668e-05, "loss": 0.5469, "step": 16346 }, { "epoch": 20.92416, "grad_norm": 1.086787462234497, "learning_rate": 1.732092837134854e-05, "loss": 0.5013, "step": 16347 }, { "epoch": 20.925440000000002, "grad_norm": 1.0452648401260376, "learning_rate": 1.731892757102841e-05, "loss": 0.4687, "step": 16348 }, { "epoch": 20.92672, "grad_norm": 1.1296664476394653, "learning_rate": 1.7316926770708284e-05, "loss": 0.5306, "step": 16349 }, { "epoch": 20.928, "grad_norm": 1.0765169858932495, "learning_rate": 1.731492597038816e-05, "loss": 0.4865, "step": 16350 }, { "epoch": 20.92928, "grad_norm": 1.122309684753418, "learning_rate": 1.7312925170068027e-05, "loss": 0.4692, "step": 16351 }, { "epoch": 20.93056, "grad_norm": 1.1600003242492676, "learning_rate": 1.73109243697479e-05, "loss": 0.545, "step": 16352 }, { "epoch": 20.93184, "grad_norm": 1.06452214717865, "learning_rate": 1.730892356942777e-05, "loss": 0.4528, "step": 16353 }, { "epoch": 20.93312, "grad_norm": 1.0116653442382812, "learning_rate": 1.7306922769107646e-05, "loss": 0.444, "step": 16354 }, { "epoch": 20.9344, "grad_norm": 1.0115692615509033, "learning_rate": 1.7304921968787515e-05, "loss": 0.4983, "step": 16355 }, { "epoch": 20.93568, "grad_norm": 1.1115832328796387, "learning_rate": 1.7302921168467387e-05, "loss": 0.4955, "step": 16356 }, { "epoch": 20.93696, "grad_norm": 1.0773097276687622, "learning_rate": 1.7300920368147262e-05, "loss": 0.4978, "step": 16357 }, { "epoch": 20.93824, "grad_norm": 1.102440595626831, "learning_rate": 1.7298919567827134e-05, "loss": 0.5104, "step": 16358 }, { "epoch": 20.93952, "grad_norm": 1.0837904214859009, "learning_rate": 1.7296918767507002e-05, "loss": 0.4988, "step": 16359 }, { "epoch": 20.9408, "grad_norm": 1.091955542564392, "learning_rate": 1.7294917967186874e-05, "loss": 0.5182, "step": 16360 }, { "epoch": 20.94208, "grad_norm": 1.13055419921875, "learning_rate": 1.729291716686675e-05, "loss": 0.5187, "step": 16361 }, { "epoch": 20.94336, "grad_norm": 1.1152286529541016, "learning_rate": 1.729091636654662e-05, "loss": 0.498, "step": 16362 }, { "epoch": 20.94464, "grad_norm": 1.0456947088241577, "learning_rate": 1.728891556622649e-05, "loss": 0.5033, "step": 16363 }, { "epoch": 20.94592, "grad_norm": 1.0851963758468628, "learning_rate": 1.7286914765906365e-05, "loss": 0.5229, "step": 16364 }, { "epoch": 20.9472, "grad_norm": 1.03732168674469, "learning_rate": 1.7284913965586237e-05, "loss": 0.4809, "step": 16365 }, { "epoch": 20.94848, "grad_norm": 1.0764819383621216, "learning_rate": 1.728291316526611e-05, "loss": 0.4754, "step": 16366 }, { "epoch": 20.94976, "grad_norm": 1.1544440984725952, "learning_rate": 1.7280912364945977e-05, "loss": 0.5008, "step": 16367 }, { "epoch": 20.95104, "grad_norm": 1.1239932775497437, "learning_rate": 1.7278911564625852e-05, "loss": 0.4808, "step": 16368 }, { "epoch": 20.95232, "grad_norm": 1.1171079874038696, "learning_rate": 1.7276910764305724e-05, "loss": 0.4859, "step": 16369 }, { "epoch": 20.9536, "grad_norm": 1.1345586776733398, "learning_rate": 1.7274909963985596e-05, "loss": 0.4935, "step": 16370 }, { "epoch": 20.95488, "grad_norm": 1.1230216026306152, "learning_rate": 1.7272909163665468e-05, "loss": 0.5274, "step": 16371 }, { "epoch": 20.95616, "grad_norm": 1.2072840929031372, "learning_rate": 1.727090836334534e-05, "loss": 0.5538, "step": 16372 }, { "epoch": 20.95744, "grad_norm": 1.1466865539550781, "learning_rate": 1.726890756302521e-05, "loss": 0.5492, "step": 16373 }, { "epoch": 20.95872, "grad_norm": 1.017502784729004, "learning_rate": 1.7266906762705083e-05, "loss": 0.4899, "step": 16374 }, { "epoch": 20.96, "grad_norm": 1.1090508699417114, "learning_rate": 1.7264905962384955e-05, "loss": 0.4935, "step": 16375 }, { "epoch": 20.96128, "grad_norm": 1.078791618347168, "learning_rate": 1.7262905162064827e-05, "loss": 0.4829, "step": 16376 }, { "epoch": 20.96256, "grad_norm": 1.1684792041778564, "learning_rate": 1.72609043617447e-05, "loss": 0.5098, "step": 16377 }, { "epoch": 20.96384, "grad_norm": 1.1406118869781494, "learning_rate": 1.725890356142457e-05, "loss": 0.5024, "step": 16378 }, { "epoch": 20.96512, "grad_norm": 1.0845208168029785, "learning_rate": 1.7256902761104443e-05, "loss": 0.4965, "step": 16379 }, { "epoch": 20.9664, "grad_norm": 1.04744291305542, "learning_rate": 1.7254901960784314e-05, "loss": 0.4817, "step": 16380 }, { "epoch": 20.96768, "grad_norm": 1.1002355813980103, "learning_rate": 1.7252901160464186e-05, "loss": 0.4886, "step": 16381 }, { "epoch": 20.96896, "grad_norm": 1.0374670028686523, "learning_rate": 1.7250900360144058e-05, "loss": 0.4639, "step": 16382 }, { "epoch": 20.97024, "grad_norm": 1.0553470849990845, "learning_rate": 1.724889955982393e-05, "loss": 0.4705, "step": 16383 }, { "epoch": 20.97152, "grad_norm": 1.1559804677963257, "learning_rate": 1.7246898759503802e-05, "loss": 0.5046, "step": 16384 }, { "epoch": 20.9728, "grad_norm": 1.1546366214752197, "learning_rate": 1.7244897959183677e-05, "loss": 0.5199, "step": 16385 }, { "epoch": 20.97408, "grad_norm": 1.0644662380218506, "learning_rate": 1.7242897158863546e-05, "loss": 0.4589, "step": 16386 }, { "epoch": 20.97536, "grad_norm": 1.0808895826339722, "learning_rate": 1.7240896358543417e-05, "loss": 0.5159, "step": 16387 }, { "epoch": 20.97664, "grad_norm": 1.1637095212936401, "learning_rate": 1.723889555822329e-05, "loss": 0.5412, "step": 16388 }, { "epoch": 20.97792, "grad_norm": 1.0618444681167603, "learning_rate": 1.7236894757903165e-05, "loss": 0.5048, "step": 16389 }, { "epoch": 20.9792, "grad_norm": 1.1485847234725952, "learning_rate": 1.7234893957583033e-05, "loss": 0.4985, "step": 16390 }, { "epoch": 20.98048, "grad_norm": 1.1214594841003418, "learning_rate": 1.7232893157262905e-05, "loss": 0.4636, "step": 16391 }, { "epoch": 20.98176, "grad_norm": 1.1146374940872192, "learning_rate": 1.7230892356942777e-05, "loss": 0.5329, "step": 16392 }, { "epoch": 20.98304, "grad_norm": 1.092591404914856, "learning_rate": 1.7228891556622652e-05, "loss": 0.5354, "step": 16393 }, { "epoch": 20.98432, "grad_norm": 1.1593587398529053, "learning_rate": 1.722689075630252e-05, "loss": 0.4776, "step": 16394 }, { "epoch": 20.9856, "grad_norm": 1.0387076139450073, "learning_rate": 1.7224889955982392e-05, "loss": 0.4753, "step": 16395 }, { "epoch": 20.98688, "grad_norm": 1.0778547525405884, "learning_rate": 1.7222889155662268e-05, "loss": 0.4997, "step": 16396 }, { "epoch": 20.98816, "grad_norm": 1.0431793928146362, "learning_rate": 1.722088835534214e-05, "loss": 0.4463, "step": 16397 }, { "epoch": 20.98944, "grad_norm": 1.0882987976074219, "learning_rate": 1.7218887555022008e-05, "loss": 0.4593, "step": 16398 }, { "epoch": 20.99072, "grad_norm": 1.1702048778533936, "learning_rate": 1.721688675470188e-05, "loss": 0.5429, "step": 16399 }, { "epoch": 20.992, "grad_norm": 1.0901694297790527, "learning_rate": 1.7214885954381755e-05, "loss": 0.5139, "step": 16400 }, { "epoch": 20.99328, "grad_norm": 1.1674569845199585, "learning_rate": 1.7212885154061627e-05, "loss": 0.5385, "step": 16401 }, { "epoch": 20.99456, "grad_norm": 1.1179885864257812, "learning_rate": 1.7210884353741495e-05, "loss": 0.4636, "step": 16402 }, { "epoch": 20.99584, "grad_norm": 1.1010475158691406, "learning_rate": 1.720888355342137e-05, "loss": 0.5093, "step": 16403 }, { "epoch": 20.99712, "grad_norm": 1.157423734664917, "learning_rate": 1.7206882753101242e-05, "loss": 0.5069, "step": 16404 }, { "epoch": 20.9984, "grad_norm": 1.0208972692489624, "learning_rate": 1.7204881952781114e-05, "loss": 0.4783, "step": 16405 }, { "epoch": 20.99968, "grad_norm": 1.0992622375488281, "learning_rate": 1.7202881152460983e-05, "loss": 0.4992, "step": 16406 }, { "epoch": 21.00096, "grad_norm": 2.2934412956237793, "learning_rate": 1.7200880352140858e-05, "loss": 0.8021, "step": 16407 }, { "epoch": 21.00224, "grad_norm": 1.0393415689468384, "learning_rate": 1.719887955182073e-05, "loss": 0.4699, "step": 16408 }, { "epoch": 21.00352, "grad_norm": 1.0242365598678589, "learning_rate": 1.7196878751500602e-05, "loss": 0.4762, "step": 16409 }, { "epoch": 21.0048, "grad_norm": 1.0307321548461914, "learning_rate": 1.7194877951180474e-05, "loss": 0.4812, "step": 16410 }, { "epoch": 21.00608, "grad_norm": 1.07740318775177, "learning_rate": 1.7192877150860345e-05, "loss": 0.4985, "step": 16411 }, { "epoch": 21.00736, "grad_norm": 1.0476981401443481, "learning_rate": 1.7190876350540217e-05, "loss": 0.4478, "step": 16412 }, { "epoch": 21.00864, "grad_norm": 1.041973352432251, "learning_rate": 1.718887555022009e-05, "loss": 0.4452, "step": 16413 }, { "epoch": 21.00992, "grad_norm": 1.1095020771026611, "learning_rate": 1.718687474989996e-05, "loss": 0.5099, "step": 16414 }, { "epoch": 21.0112, "grad_norm": 1.123745083808899, "learning_rate": 1.7184873949579833e-05, "loss": 0.4954, "step": 16415 }, { "epoch": 21.01248, "grad_norm": 1.1033985614776611, "learning_rate": 1.7182873149259705e-05, "loss": 0.4863, "step": 16416 }, { "epoch": 21.01376, "grad_norm": 1.1041131019592285, "learning_rate": 1.7180872348939577e-05, "loss": 0.4556, "step": 16417 }, { "epoch": 21.01504, "grad_norm": 1.0607541799545288, "learning_rate": 1.717887154861945e-05, "loss": 0.4738, "step": 16418 }, { "epoch": 21.01632, "grad_norm": 1.0398081541061401, "learning_rate": 1.717687074829932e-05, "loss": 0.4566, "step": 16419 }, { "epoch": 21.0176, "grad_norm": 1.0724793672561646, "learning_rate": 1.7174869947979192e-05, "loss": 0.4586, "step": 16420 }, { "epoch": 21.01888, "grad_norm": 1.109832525253296, "learning_rate": 1.7172869147659064e-05, "loss": 0.4661, "step": 16421 }, { "epoch": 21.02016, "grad_norm": 1.0665154457092285, "learning_rate": 1.7170868347338936e-05, "loss": 0.4561, "step": 16422 }, { "epoch": 21.02144, "grad_norm": 1.094376564025879, "learning_rate": 1.7168867547018808e-05, "loss": 0.4649, "step": 16423 }, { "epoch": 21.02272, "grad_norm": 1.061568021774292, "learning_rate": 1.7166866746698683e-05, "loss": 0.4897, "step": 16424 }, { "epoch": 21.024, "grad_norm": 1.0756134986877441, "learning_rate": 1.716486594637855e-05, "loss": 0.4575, "step": 16425 }, { "epoch": 21.02528, "grad_norm": 1.0889450311660767, "learning_rate": 1.7162865146058423e-05, "loss": 0.5075, "step": 16426 }, { "epoch": 21.02656, "grad_norm": 1.0604745149612427, "learning_rate": 1.7160864345738295e-05, "loss": 0.4671, "step": 16427 }, { "epoch": 21.02784, "grad_norm": 1.0580859184265137, "learning_rate": 1.715886354541817e-05, "loss": 0.4691, "step": 16428 }, { "epoch": 21.02912, "grad_norm": 1.1256431341171265, "learning_rate": 1.715686274509804e-05, "loss": 0.5068, "step": 16429 }, { "epoch": 21.0304, "grad_norm": 1.1371206045150757, "learning_rate": 1.715486194477791e-05, "loss": 0.484, "step": 16430 }, { "epoch": 21.03168, "grad_norm": 1.045066237449646, "learning_rate": 1.7152861144457786e-05, "loss": 0.4497, "step": 16431 }, { "epoch": 21.03296, "grad_norm": 1.1001946926116943, "learning_rate": 1.7150860344137658e-05, "loss": 0.4488, "step": 16432 }, { "epoch": 21.03424, "grad_norm": 1.087947964668274, "learning_rate": 1.7148859543817526e-05, "loss": 0.475, "step": 16433 }, { "epoch": 21.03552, "grad_norm": 1.0743104219436646, "learning_rate": 1.7146858743497398e-05, "loss": 0.4484, "step": 16434 }, { "epoch": 21.0368, "grad_norm": 1.0668704509735107, "learning_rate": 1.7144857943177273e-05, "loss": 0.4431, "step": 16435 }, { "epoch": 21.03808, "grad_norm": 1.1153063774108887, "learning_rate": 1.7142857142857145e-05, "loss": 0.4967, "step": 16436 }, { "epoch": 21.03936, "grad_norm": 1.122804045677185, "learning_rate": 1.7140856342537014e-05, "loss": 0.4632, "step": 16437 }, { "epoch": 21.04064, "grad_norm": 1.1385273933410645, "learning_rate": 1.713885554221689e-05, "loss": 0.4943, "step": 16438 }, { "epoch": 21.04192, "grad_norm": 1.0751020908355713, "learning_rate": 1.713685474189676e-05, "loss": 0.4541, "step": 16439 }, { "epoch": 21.0432, "grad_norm": 1.0615102052688599, "learning_rate": 1.7134853941576633e-05, "loss": 0.48, "step": 16440 }, { "epoch": 21.04448, "grad_norm": 1.1338032484054565, "learning_rate": 1.71328531412565e-05, "loss": 0.4953, "step": 16441 }, { "epoch": 21.04576, "grad_norm": 1.0364360809326172, "learning_rate": 1.7130852340936376e-05, "loss": 0.4364, "step": 16442 }, { "epoch": 21.04704, "grad_norm": 1.0834779739379883, "learning_rate": 1.7128851540616248e-05, "loss": 0.5213, "step": 16443 }, { "epoch": 21.04832, "grad_norm": 1.0926162004470825, "learning_rate": 1.712685074029612e-05, "loss": 0.498, "step": 16444 }, { "epoch": 21.0496, "grad_norm": 1.0313501358032227, "learning_rate": 1.712484993997599e-05, "loss": 0.4867, "step": 16445 }, { "epoch": 21.05088, "grad_norm": 1.0741840600967407, "learning_rate": 1.7122849139655864e-05, "loss": 0.5016, "step": 16446 }, { "epoch": 21.05216, "grad_norm": 1.1211599111557007, "learning_rate": 1.7120848339335736e-05, "loss": 0.5157, "step": 16447 }, { "epoch": 21.05344, "grad_norm": 1.0967553853988647, "learning_rate": 1.7118847539015608e-05, "loss": 0.4737, "step": 16448 }, { "epoch": 21.05472, "grad_norm": 1.204904317855835, "learning_rate": 1.711684673869548e-05, "loss": 0.5274, "step": 16449 }, { "epoch": 21.056, "grad_norm": 1.0651218891143799, "learning_rate": 1.711484593837535e-05, "loss": 0.4312, "step": 16450 }, { "epoch": 21.05728, "grad_norm": 1.0928947925567627, "learning_rate": 1.7112845138055223e-05, "loss": 0.4495, "step": 16451 }, { "epoch": 21.05856, "grad_norm": 1.0684500932693481, "learning_rate": 1.7110844337735095e-05, "loss": 0.4851, "step": 16452 }, { "epoch": 21.05984, "grad_norm": 1.095968246459961, "learning_rate": 1.7108843537414967e-05, "loss": 0.452, "step": 16453 }, { "epoch": 21.06112, "grad_norm": 1.140784502029419, "learning_rate": 1.710684273709484e-05, "loss": 0.5047, "step": 16454 }, { "epoch": 21.0624, "grad_norm": 1.1561800241470337, "learning_rate": 1.710484193677471e-05, "loss": 0.5386, "step": 16455 }, { "epoch": 21.06368, "grad_norm": 1.110007643699646, "learning_rate": 1.7102841136454582e-05, "loss": 0.4753, "step": 16456 }, { "epoch": 21.06496, "grad_norm": 1.0784857273101807, "learning_rate": 1.7100840336134454e-05, "loss": 0.4374, "step": 16457 }, { "epoch": 21.06624, "grad_norm": 1.0909050703048706, "learning_rate": 1.7098839535814326e-05, "loss": 0.4375, "step": 16458 }, { "epoch": 21.06752, "grad_norm": 1.12933349609375, "learning_rate": 1.7096838735494198e-05, "loss": 0.4876, "step": 16459 }, { "epoch": 21.0688, "grad_norm": 1.1255989074707031, "learning_rate": 1.709483793517407e-05, "loss": 0.4721, "step": 16460 }, { "epoch": 21.07008, "grad_norm": 1.059699535369873, "learning_rate": 1.709283713485394e-05, "loss": 0.4565, "step": 16461 }, { "epoch": 21.07136, "grad_norm": 1.0751200914382935, "learning_rate": 1.7090836334533814e-05, "loss": 0.464, "step": 16462 }, { "epoch": 21.07264, "grad_norm": 1.1244473457336426, "learning_rate": 1.708883553421369e-05, "loss": 0.4842, "step": 16463 }, { "epoch": 21.07392, "grad_norm": 1.1796867847442627, "learning_rate": 1.7086834733893557e-05, "loss": 0.5435, "step": 16464 }, { "epoch": 21.0752, "grad_norm": 1.0969107151031494, "learning_rate": 1.708483393357343e-05, "loss": 0.479, "step": 16465 }, { "epoch": 21.07648, "grad_norm": 1.1163842678070068, "learning_rate": 1.70828331332533e-05, "loss": 0.4498, "step": 16466 }, { "epoch": 21.07776, "grad_norm": 1.1781340837478638, "learning_rate": 1.7080832332933176e-05, "loss": 0.5003, "step": 16467 }, { "epoch": 21.07904, "grad_norm": 1.1196157932281494, "learning_rate": 1.7078831532613045e-05, "loss": 0.5118, "step": 16468 }, { "epoch": 21.08032, "grad_norm": 1.1168984174728394, "learning_rate": 1.7076830732292917e-05, "loss": 0.4505, "step": 16469 }, { "epoch": 21.0816, "grad_norm": 1.1065702438354492, "learning_rate": 1.7074829931972792e-05, "loss": 0.5232, "step": 16470 }, { "epoch": 21.08288, "grad_norm": 1.0598688125610352, "learning_rate": 1.7072829131652664e-05, "loss": 0.4615, "step": 16471 }, { "epoch": 21.08416, "grad_norm": 1.0742090940475464, "learning_rate": 1.7070828331332532e-05, "loss": 0.4675, "step": 16472 }, { "epoch": 21.08544, "grad_norm": 1.0973572731018066, "learning_rate": 1.7068827531012404e-05, "loss": 0.4869, "step": 16473 }, { "epoch": 21.08672, "grad_norm": 1.0886359214782715, "learning_rate": 1.706682673069228e-05, "loss": 0.4684, "step": 16474 }, { "epoch": 21.088, "grad_norm": 1.0763150453567505, "learning_rate": 1.706482593037215e-05, "loss": 0.4558, "step": 16475 }, { "epoch": 21.08928, "grad_norm": 1.0928971767425537, "learning_rate": 1.706282513005202e-05, "loss": 0.4765, "step": 16476 }, { "epoch": 21.09056, "grad_norm": 1.1339545249938965, "learning_rate": 1.7060824329731895e-05, "loss": 0.5047, "step": 16477 }, { "epoch": 21.09184, "grad_norm": 1.0725475549697876, "learning_rate": 1.7058823529411767e-05, "loss": 0.4662, "step": 16478 }, { "epoch": 21.09312, "grad_norm": 1.1563411951065063, "learning_rate": 1.705682272909164e-05, "loss": 0.4984, "step": 16479 }, { "epoch": 21.0944, "grad_norm": 1.0735126733779907, "learning_rate": 1.7054821928771507e-05, "loss": 0.4125, "step": 16480 }, { "epoch": 21.09568, "grad_norm": 1.1152347326278687, "learning_rate": 1.7052821128451382e-05, "loss": 0.505, "step": 16481 }, { "epoch": 21.09696, "grad_norm": 1.0870592594146729, "learning_rate": 1.7050820328131254e-05, "loss": 0.5002, "step": 16482 }, { "epoch": 21.09824, "grad_norm": 1.116709589958191, "learning_rate": 1.7048819527811126e-05, "loss": 0.462, "step": 16483 }, { "epoch": 21.09952, "grad_norm": 1.1316735744476318, "learning_rate": 1.7046818727490998e-05, "loss": 0.5289, "step": 16484 }, { "epoch": 21.1008, "grad_norm": 1.127074122428894, "learning_rate": 1.704481792717087e-05, "loss": 0.4713, "step": 16485 }, { "epoch": 21.10208, "grad_norm": 1.1193740367889404, "learning_rate": 1.704281712685074e-05, "loss": 0.4759, "step": 16486 }, { "epoch": 21.10336, "grad_norm": 1.0757756233215332, "learning_rate": 1.7040816326530613e-05, "loss": 0.5199, "step": 16487 }, { "epoch": 21.10464, "grad_norm": 1.0991090536117554, "learning_rate": 1.7038815526210485e-05, "loss": 0.523, "step": 16488 }, { "epoch": 21.10592, "grad_norm": 1.0792702436447144, "learning_rate": 1.7036814725890357e-05, "loss": 0.4515, "step": 16489 }, { "epoch": 21.1072, "grad_norm": 1.1250958442687988, "learning_rate": 1.703481392557023e-05, "loss": 0.4843, "step": 16490 }, { "epoch": 21.10848, "grad_norm": 1.1384851932525635, "learning_rate": 1.70328131252501e-05, "loss": 0.523, "step": 16491 }, { "epoch": 21.10976, "grad_norm": 1.0551738739013672, "learning_rate": 1.7030812324929973e-05, "loss": 0.454, "step": 16492 }, { "epoch": 21.11104, "grad_norm": 1.0307825803756714, "learning_rate": 1.7028811524609844e-05, "loss": 0.4157, "step": 16493 }, { "epoch": 21.11232, "grad_norm": 1.1250662803649902, "learning_rate": 1.7026810724289716e-05, "loss": 0.5015, "step": 16494 }, { "epoch": 21.1136, "grad_norm": 1.0940858125686646, "learning_rate": 1.7024809923969588e-05, "loss": 0.4605, "step": 16495 }, { "epoch": 21.11488, "grad_norm": 1.0869758129119873, "learning_rate": 1.702280912364946e-05, "loss": 0.544, "step": 16496 }, { "epoch": 21.11616, "grad_norm": 1.0667223930358887, "learning_rate": 1.7020808323329332e-05, "loss": 0.4369, "step": 16497 }, { "epoch": 21.11744, "grad_norm": 1.0950666666030884, "learning_rate": 1.7018807523009204e-05, "loss": 0.5017, "step": 16498 }, { "epoch": 21.11872, "grad_norm": 1.0515073537826538, "learning_rate": 1.7016806722689076e-05, "loss": 0.4371, "step": 16499 }, { "epoch": 21.12, "grad_norm": 1.133811354637146, "learning_rate": 1.7014805922368947e-05, "loss": 0.5154, "step": 16500 }, { "epoch": 21.12128, "grad_norm": 1.1043317317962646, "learning_rate": 1.701280512204882e-05, "loss": 0.4841, "step": 16501 }, { "epoch": 21.12256, "grad_norm": 1.1179126501083374, "learning_rate": 1.7010804321728695e-05, "loss": 0.5082, "step": 16502 }, { "epoch": 21.12384, "grad_norm": 1.0235763788223267, "learning_rate": 1.7008803521408563e-05, "loss": 0.4659, "step": 16503 }, { "epoch": 21.12512, "grad_norm": 1.1011422872543335, "learning_rate": 1.7006802721088435e-05, "loss": 0.4946, "step": 16504 }, { "epoch": 21.1264, "grad_norm": 1.1294410228729248, "learning_rate": 1.7004801920768307e-05, "loss": 0.4528, "step": 16505 }, { "epoch": 21.12768, "grad_norm": 1.088690996170044, "learning_rate": 1.7002801120448182e-05, "loss": 0.4638, "step": 16506 }, { "epoch": 21.12896, "grad_norm": 1.1901614665985107, "learning_rate": 1.700080032012805e-05, "loss": 0.4745, "step": 16507 }, { "epoch": 21.13024, "grad_norm": 1.177269458770752, "learning_rate": 1.6998799519807922e-05, "loss": 0.4941, "step": 16508 }, { "epoch": 21.13152, "grad_norm": 1.117848515510559, "learning_rate": 1.6996798719487798e-05, "loss": 0.4575, "step": 16509 }, { "epoch": 21.1328, "grad_norm": 1.1764754056930542, "learning_rate": 1.699479791916767e-05, "loss": 0.4967, "step": 16510 }, { "epoch": 21.13408, "grad_norm": 1.094519853591919, "learning_rate": 1.6992797118847538e-05, "loss": 0.4677, "step": 16511 }, { "epoch": 21.13536, "grad_norm": 1.1102967262268066, "learning_rate": 1.699079631852741e-05, "loss": 0.4894, "step": 16512 }, { "epoch": 21.13664, "grad_norm": 1.0887529850006104, "learning_rate": 1.6988795518207285e-05, "loss": 0.4896, "step": 16513 }, { "epoch": 21.13792, "grad_norm": 1.1254242658615112, "learning_rate": 1.6986794717887157e-05, "loss": 0.4719, "step": 16514 }, { "epoch": 21.1392, "grad_norm": 1.1468827724456787, "learning_rate": 1.6984793917567025e-05, "loss": 0.4629, "step": 16515 }, { "epoch": 21.14048, "grad_norm": 1.1172226667404175, "learning_rate": 1.69827931172469e-05, "loss": 0.4621, "step": 16516 }, { "epoch": 21.14176, "grad_norm": 1.1248224973678589, "learning_rate": 1.6980792316926772e-05, "loss": 0.4801, "step": 16517 }, { "epoch": 21.14304, "grad_norm": 1.124162197113037, "learning_rate": 1.6978791516606644e-05, "loss": 0.4939, "step": 16518 }, { "epoch": 21.14432, "grad_norm": 1.0810459852218628, "learning_rate": 1.6976790716286513e-05, "loss": 0.48, "step": 16519 }, { "epoch": 21.1456, "grad_norm": 1.0889114141464233, "learning_rate": 1.6974789915966388e-05, "loss": 0.4854, "step": 16520 }, { "epoch": 21.14688, "grad_norm": 1.0843100547790527, "learning_rate": 1.697278911564626e-05, "loss": 0.4766, "step": 16521 }, { "epoch": 21.14816, "grad_norm": 1.0566636323928833, "learning_rate": 1.697078831532613e-05, "loss": 0.4388, "step": 16522 }, { "epoch": 21.14944, "grad_norm": 1.102766513824463, "learning_rate": 1.6968787515006004e-05, "loss": 0.4835, "step": 16523 }, { "epoch": 21.15072, "grad_norm": 1.0825378894805908, "learning_rate": 1.6966786714685875e-05, "loss": 0.4682, "step": 16524 }, { "epoch": 21.152, "grad_norm": 1.1190485954284668, "learning_rate": 1.6964785914365747e-05, "loss": 0.4853, "step": 16525 }, { "epoch": 21.15328, "grad_norm": 1.0945488214492798, "learning_rate": 1.696278511404562e-05, "loss": 0.5039, "step": 16526 }, { "epoch": 21.15456, "grad_norm": 1.0824321508407593, "learning_rate": 1.696078431372549e-05, "loss": 0.4791, "step": 16527 }, { "epoch": 21.15584, "grad_norm": 1.0835765600204468, "learning_rate": 1.6958783513405363e-05, "loss": 0.4846, "step": 16528 }, { "epoch": 21.15712, "grad_norm": 1.0822176933288574, "learning_rate": 1.6956782713085235e-05, "loss": 0.4914, "step": 16529 }, { "epoch": 21.1584, "grad_norm": 1.0641273260116577, "learning_rate": 1.6954781912765107e-05, "loss": 0.4497, "step": 16530 }, { "epoch": 21.15968, "grad_norm": 1.1108111143112183, "learning_rate": 1.695278111244498e-05, "loss": 0.4962, "step": 16531 }, { "epoch": 21.16096, "grad_norm": 1.0912580490112305, "learning_rate": 1.695078031212485e-05, "loss": 0.4981, "step": 16532 }, { "epoch": 21.16224, "grad_norm": 1.1157331466674805, "learning_rate": 1.6948779511804722e-05, "loss": 0.4682, "step": 16533 }, { "epoch": 21.16352, "grad_norm": 1.1047836542129517, "learning_rate": 1.6946778711484594e-05, "loss": 0.479, "step": 16534 }, { "epoch": 21.1648, "grad_norm": 1.1271506547927856, "learning_rate": 1.6944777911164466e-05, "loss": 0.5429, "step": 16535 }, { "epoch": 21.16608, "grad_norm": 1.075492024421692, "learning_rate": 1.6942777110844338e-05, "loss": 0.4732, "step": 16536 }, { "epoch": 21.16736, "grad_norm": 1.1787910461425781, "learning_rate": 1.6940776310524213e-05, "loss": 0.5362, "step": 16537 }, { "epoch": 21.16864, "grad_norm": 1.0927817821502686, "learning_rate": 1.693877551020408e-05, "loss": 0.4621, "step": 16538 }, { "epoch": 21.16992, "grad_norm": 1.1615148782730103, "learning_rate": 1.6936774709883953e-05, "loss": 0.5075, "step": 16539 }, { "epoch": 21.1712, "grad_norm": 1.115074634552002, "learning_rate": 1.6934773909563825e-05, "loss": 0.4464, "step": 16540 }, { "epoch": 21.17248, "grad_norm": 1.1260422468185425, "learning_rate": 1.69327731092437e-05, "loss": 0.4683, "step": 16541 }, { "epoch": 21.17376, "grad_norm": 1.0940788984298706, "learning_rate": 1.693077230892357e-05, "loss": 0.5185, "step": 16542 }, { "epoch": 21.17504, "grad_norm": 1.095339059829712, "learning_rate": 1.692877150860344e-05, "loss": 0.4741, "step": 16543 }, { "epoch": 21.17632, "grad_norm": 1.0709000825881958, "learning_rate": 1.6926770708283316e-05, "loss": 0.4933, "step": 16544 }, { "epoch": 21.1776, "grad_norm": 1.1118566989898682, "learning_rate": 1.6924769907963188e-05, "loss": 0.5025, "step": 16545 }, { "epoch": 21.17888, "grad_norm": 1.1093112230300903, "learning_rate": 1.6922769107643056e-05, "loss": 0.5049, "step": 16546 }, { "epoch": 21.18016, "grad_norm": 1.0397313833236694, "learning_rate": 1.6920768307322928e-05, "loss": 0.4659, "step": 16547 }, { "epoch": 21.18144, "grad_norm": 1.1054112911224365, "learning_rate": 1.6918767507002803e-05, "loss": 0.4411, "step": 16548 }, { "epoch": 21.18272, "grad_norm": 1.1328340768814087, "learning_rate": 1.6916766706682675e-05, "loss": 0.4802, "step": 16549 }, { "epoch": 21.184, "grad_norm": 1.0890549421310425, "learning_rate": 1.6914765906362544e-05, "loss": 0.4922, "step": 16550 }, { "epoch": 21.18528, "grad_norm": 1.1533173322677612, "learning_rate": 1.691276510604242e-05, "loss": 0.515, "step": 16551 }, { "epoch": 21.18656, "grad_norm": 1.0926942825317383, "learning_rate": 1.691076430572229e-05, "loss": 0.4696, "step": 16552 }, { "epoch": 21.18784, "grad_norm": 1.0669362545013428, "learning_rate": 1.6908763505402163e-05, "loss": 0.4982, "step": 16553 }, { "epoch": 21.18912, "grad_norm": 1.0951491594314575, "learning_rate": 1.690676270508203e-05, "loss": 0.4715, "step": 16554 }, { "epoch": 21.1904, "grad_norm": 1.072893500328064, "learning_rate": 1.6904761904761906e-05, "loss": 0.4721, "step": 16555 }, { "epoch": 21.19168, "grad_norm": 1.0877991914749146, "learning_rate": 1.6902761104441778e-05, "loss": 0.4584, "step": 16556 }, { "epoch": 21.19296, "grad_norm": 1.12388014793396, "learning_rate": 1.690076030412165e-05, "loss": 0.4523, "step": 16557 }, { "epoch": 21.19424, "grad_norm": 1.1295223236083984, "learning_rate": 1.689875950380152e-05, "loss": 0.4936, "step": 16558 }, { "epoch": 21.19552, "grad_norm": 1.1079462766647339, "learning_rate": 1.6896758703481394e-05, "loss": 0.4463, "step": 16559 }, { "epoch": 21.1968, "grad_norm": 1.1547305583953857, "learning_rate": 1.6894757903161266e-05, "loss": 0.4691, "step": 16560 }, { "epoch": 21.19808, "grad_norm": 1.1566081047058105, "learning_rate": 1.6892757102841137e-05, "loss": 0.5163, "step": 16561 }, { "epoch": 21.19936, "grad_norm": 1.1385910511016846, "learning_rate": 1.689075630252101e-05, "loss": 0.4941, "step": 16562 }, { "epoch": 21.20064, "grad_norm": 1.1214505434036255, "learning_rate": 1.688875550220088e-05, "loss": 0.5138, "step": 16563 }, { "epoch": 21.20192, "grad_norm": 1.089640498161316, "learning_rate": 1.6886754701880753e-05, "loss": 0.4491, "step": 16564 }, { "epoch": 21.2032, "grad_norm": 1.0873479843139648, "learning_rate": 1.6884753901560625e-05, "loss": 0.4711, "step": 16565 }, { "epoch": 21.20448, "grad_norm": 1.0695767402648926, "learning_rate": 1.6882753101240497e-05, "loss": 0.4575, "step": 16566 }, { "epoch": 21.20576, "grad_norm": 1.1109639406204224, "learning_rate": 1.688075230092037e-05, "loss": 0.5093, "step": 16567 }, { "epoch": 21.20704, "grad_norm": 1.10574471950531, "learning_rate": 1.687875150060024e-05, "loss": 0.5033, "step": 16568 }, { "epoch": 21.20832, "grad_norm": 1.0415626764297485, "learning_rate": 1.6876750700280112e-05, "loss": 0.4781, "step": 16569 }, { "epoch": 21.209600000000002, "grad_norm": 1.1042823791503906, "learning_rate": 1.6874749899959984e-05, "loss": 0.4813, "step": 16570 }, { "epoch": 21.21088, "grad_norm": 1.1033847332000732, "learning_rate": 1.6872749099639856e-05, "loss": 0.4547, "step": 16571 }, { "epoch": 21.21216, "grad_norm": 1.0810890197753906, "learning_rate": 1.6870748299319728e-05, "loss": 0.5008, "step": 16572 }, { "epoch": 21.21344, "grad_norm": 1.111965537071228, "learning_rate": 1.68687474989996e-05, "loss": 0.4542, "step": 16573 }, { "epoch": 21.21472, "grad_norm": 1.168083906173706, "learning_rate": 1.686674669867947e-05, "loss": 0.4931, "step": 16574 }, { "epoch": 21.216, "grad_norm": 1.1118204593658447, "learning_rate": 1.6864745898359343e-05, "loss": 0.4682, "step": 16575 }, { "epoch": 21.21728, "grad_norm": 1.1220096349716187, "learning_rate": 1.686274509803922e-05, "loss": 0.5033, "step": 16576 }, { "epoch": 21.21856, "grad_norm": 1.061000943183899, "learning_rate": 1.6860744297719087e-05, "loss": 0.5032, "step": 16577 }, { "epoch": 21.21984, "grad_norm": 1.1358115673065186, "learning_rate": 1.685874349739896e-05, "loss": 0.4291, "step": 16578 }, { "epoch": 21.22112, "grad_norm": 1.0819488763809204, "learning_rate": 1.685674269707883e-05, "loss": 0.4413, "step": 16579 }, { "epoch": 21.2224, "grad_norm": 1.0310840606689453, "learning_rate": 1.6854741896758706e-05, "loss": 0.436, "step": 16580 }, { "epoch": 21.22368, "grad_norm": 1.0708487033843994, "learning_rate": 1.6852741096438575e-05, "loss": 0.4718, "step": 16581 }, { "epoch": 21.22496, "grad_norm": 1.1317819356918335, "learning_rate": 1.6850740296118446e-05, "loss": 0.528, "step": 16582 }, { "epoch": 21.22624, "grad_norm": 1.1294492483139038, "learning_rate": 1.6848739495798322e-05, "loss": 0.4881, "step": 16583 }, { "epoch": 21.22752, "grad_norm": 1.1252028942108154, "learning_rate": 1.6846738695478194e-05, "loss": 0.4865, "step": 16584 }, { "epoch": 21.2288, "grad_norm": 1.187008261680603, "learning_rate": 1.6844737895158062e-05, "loss": 0.5291, "step": 16585 }, { "epoch": 21.23008, "grad_norm": 1.1691592931747437, "learning_rate": 1.6842737094837934e-05, "loss": 0.5374, "step": 16586 }, { "epoch": 21.23136, "grad_norm": 1.116336464881897, "learning_rate": 1.684073629451781e-05, "loss": 0.5126, "step": 16587 }, { "epoch": 21.23264, "grad_norm": 1.038357138633728, "learning_rate": 1.683873549419768e-05, "loss": 0.4979, "step": 16588 }, { "epoch": 21.23392, "grad_norm": 1.0832995176315308, "learning_rate": 1.683673469387755e-05, "loss": 0.4649, "step": 16589 }, { "epoch": 21.2352, "grad_norm": 1.0173653364181519, "learning_rate": 1.6834733893557425e-05, "loss": 0.4228, "step": 16590 }, { "epoch": 21.23648, "grad_norm": 1.0729964971542358, "learning_rate": 1.6832733093237297e-05, "loss": 0.4692, "step": 16591 }, { "epoch": 21.23776, "grad_norm": 1.036226749420166, "learning_rate": 1.683073229291717e-05, "loss": 0.4212, "step": 16592 }, { "epoch": 21.23904, "grad_norm": 1.1111071109771729, "learning_rate": 1.6828731492597037e-05, "loss": 0.5403, "step": 16593 }, { "epoch": 21.24032, "grad_norm": 1.0485975742340088, "learning_rate": 1.6826730692276912e-05, "loss": 0.4712, "step": 16594 }, { "epoch": 21.2416, "grad_norm": 0.9971339106559753, "learning_rate": 1.6824729891956784e-05, "loss": 0.3999, "step": 16595 }, { "epoch": 21.24288, "grad_norm": 1.1354920864105225, "learning_rate": 1.6822729091636656e-05, "loss": 0.533, "step": 16596 }, { "epoch": 21.24416, "grad_norm": 1.1078267097473145, "learning_rate": 1.6820728291316528e-05, "loss": 0.4979, "step": 16597 }, { "epoch": 21.24544, "grad_norm": 1.1035330295562744, "learning_rate": 1.68187274909964e-05, "loss": 0.4989, "step": 16598 }, { "epoch": 21.24672, "grad_norm": 1.091853380203247, "learning_rate": 1.681672669067627e-05, "loss": 0.4654, "step": 16599 }, { "epoch": 21.248, "grad_norm": 1.0949825048446655, "learning_rate": 1.6814725890356143e-05, "loss": 0.4446, "step": 16600 }, { "epoch": 21.24928, "grad_norm": 1.165686011314392, "learning_rate": 1.6812725090036015e-05, "loss": 0.5106, "step": 16601 }, { "epoch": 21.25056, "grad_norm": 1.0916712284088135, "learning_rate": 1.6810724289715887e-05, "loss": 0.4743, "step": 16602 }, { "epoch": 21.25184, "grad_norm": 1.0554473400115967, "learning_rate": 1.680872348939576e-05, "loss": 0.459, "step": 16603 }, { "epoch": 21.25312, "grad_norm": 1.1290833950042725, "learning_rate": 1.6806722689075634e-05, "loss": 0.5159, "step": 16604 }, { "epoch": 21.2544, "grad_norm": 1.066178560256958, "learning_rate": 1.6804721888755503e-05, "loss": 0.4695, "step": 16605 }, { "epoch": 21.25568, "grad_norm": 1.0555775165557861, "learning_rate": 1.6802721088435374e-05, "loss": 0.4638, "step": 16606 }, { "epoch": 21.25696, "grad_norm": 1.1663188934326172, "learning_rate": 1.6800720288115246e-05, "loss": 0.5057, "step": 16607 }, { "epoch": 21.25824, "grad_norm": 1.0777236223220825, "learning_rate": 1.679871948779512e-05, "loss": 0.457, "step": 16608 }, { "epoch": 21.25952, "grad_norm": 1.185142993927002, "learning_rate": 1.679671868747499e-05, "loss": 0.4742, "step": 16609 }, { "epoch": 21.2608, "grad_norm": 1.0730408430099487, "learning_rate": 1.6794717887154862e-05, "loss": 0.5001, "step": 16610 }, { "epoch": 21.26208, "grad_norm": 1.0346087217330933, "learning_rate": 1.6792717086834734e-05, "loss": 0.4242, "step": 16611 }, { "epoch": 21.26336, "grad_norm": 1.0942175388336182, "learning_rate": 1.6790716286514606e-05, "loss": 0.4677, "step": 16612 }, { "epoch": 21.26464, "grad_norm": 1.15269935131073, "learning_rate": 1.6788715486194477e-05, "loss": 0.5306, "step": 16613 }, { "epoch": 21.26592, "grad_norm": 1.0454847812652588, "learning_rate": 1.678671468587435e-05, "loss": 0.4561, "step": 16614 }, { "epoch": 21.2672, "grad_norm": 1.16476309299469, "learning_rate": 1.6784713885554225e-05, "loss": 0.53, "step": 16615 }, { "epoch": 21.26848, "grad_norm": 1.286267638206482, "learning_rate": 1.6782713085234093e-05, "loss": 0.5394, "step": 16616 }, { "epoch": 21.26976, "grad_norm": 1.114345908164978, "learning_rate": 1.6780712284913965e-05, "loss": 0.4361, "step": 16617 }, { "epoch": 21.27104, "grad_norm": 1.149522304534912, "learning_rate": 1.6778711484593837e-05, "loss": 0.4684, "step": 16618 }, { "epoch": 21.27232, "grad_norm": 1.143465280532837, "learning_rate": 1.6776710684273712e-05, "loss": 0.5119, "step": 16619 }, { "epoch": 21.2736, "grad_norm": 1.0972861051559448, "learning_rate": 1.677470988395358e-05, "loss": 0.482, "step": 16620 }, { "epoch": 21.27488, "grad_norm": 1.0499613285064697, "learning_rate": 1.6772709083633452e-05, "loss": 0.465, "step": 16621 }, { "epoch": 21.27616, "grad_norm": 1.0492812395095825, "learning_rate": 1.6770708283313328e-05, "loss": 0.4625, "step": 16622 }, { "epoch": 21.27744, "grad_norm": 1.0873419046401978, "learning_rate": 1.67687074829932e-05, "loss": 0.4993, "step": 16623 }, { "epoch": 21.27872, "grad_norm": 1.1490446329116821, "learning_rate": 1.6766706682673068e-05, "loss": 0.4968, "step": 16624 }, { "epoch": 21.28, "grad_norm": 1.1244101524353027, "learning_rate": 1.676470588235294e-05, "loss": 0.5022, "step": 16625 }, { "epoch": 21.28128, "grad_norm": 1.0936533212661743, "learning_rate": 1.6762705082032815e-05, "loss": 0.4537, "step": 16626 }, { "epoch": 21.28256, "grad_norm": 1.1399465799331665, "learning_rate": 1.6760704281712687e-05, "loss": 0.4794, "step": 16627 }, { "epoch": 21.28384, "grad_norm": 1.14096999168396, "learning_rate": 1.6758703481392555e-05, "loss": 0.4995, "step": 16628 }, { "epoch": 21.28512, "grad_norm": 1.1549474000930786, "learning_rate": 1.675670268107243e-05, "loss": 0.4909, "step": 16629 }, { "epoch": 21.2864, "grad_norm": 1.088848352432251, "learning_rate": 1.6754701880752302e-05, "loss": 0.4626, "step": 16630 }, { "epoch": 21.28768, "grad_norm": 1.114532709121704, "learning_rate": 1.6752701080432174e-05, "loss": 0.4492, "step": 16631 }, { "epoch": 21.28896, "grad_norm": 1.1168134212493896, "learning_rate": 1.6750700280112043e-05, "loss": 0.505, "step": 16632 }, { "epoch": 21.29024, "grad_norm": 1.1612701416015625, "learning_rate": 1.6748699479791918e-05, "loss": 0.519, "step": 16633 }, { "epoch": 21.29152, "grad_norm": 1.0742192268371582, "learning_rate": 1.674669867947179e-05, "loss": 0.4603, "step": 16634 }, { "epoch": 21.2928, "grad_norm": 1.1029329299926758, "learning_rate": 1.674469787915166e-05, "loss": 0.4851, "step": 16635 }, { "epoch": 21.29408, "grad_norm": 1.106626033782959, "learning_rate": 1.6742697078831534e-05, "loss": 0.5193, "step": 16636 }, { "epoch": 21.29536, "grad_norm": 1.117716908454895, "learning_rate": 1.6740696278511405e-05, "loss": 0.5412, "step": 16637 }, { "epoch": 21.29664, "grad_norm": 1.0421884059906006, "learning_rate": 1.6738695478191277e-05, "loss": 0.446, "step": 16638 }, { "epoch": 21.29792, "grad_norm": 1.0915087461471558, "learning_rate": 1.673669467787115e-05, "loss": 0.5246, "step": 16639 }, { "epoch": 21.2992, "grad_norm": 1.1888045072555542, "learning_rate": 1.673469387755102e-05, "loss": 0.4996, "step": 16640 }, { "epoch": 21.30048, "grad_norm": 1.0700676441192627, "learning_rate": 1.6732693077230893e-05, "loss": 0.4903, "step": 16641 }, { "epoch": 21.30176, "grad_norm": 1.1567354202270508, "learning_rate": 1.6730692276910765e-05, "loss": 0.525, "step": 16642 }, { "epoch": 21.30304, "grad_norm": 1.1310677528381348, "learning_rate": 1.672869147659064e-05, "loss": 0.5157, "step": 16643 }, { "epoch": 21.30432, "grad_norm": 1.101754903793335, "learning_rate": 1.672669067627051e-05, "loss": 0.4957, "step": 16644 }, { "epoch": 21.3056, "grad_norm": 1.1254335641860962, "learning_rate": 1.672468987595038e-05, "loss": 0.4955, "step": 16645 }, { "epoch": 21.30688, "grad_norm": 1.1134346723556519, "learning_rate": 1.6722689075630252e-05, "loss": 0.4501, "step": 16646 }, { "epoch": 21.30816, "grad_norm": 1.1738286018371582, "learning_rate": 1.6720688275310127e-05, "loss": 0.4818, "step": 16647 }, { "epoch": 21.30944, "grad_norm": 1.0866729021072388, "learning_rate": 1.6718687474989996e-05, "loss": 0.4542, "step": 16648 }, { "epoch": 21.31072, "grad_norm": 1.1546677350997925, "learning_rate": 1.6716686674669868e-05, "loss": 0.4684, "step": 16649 }, { "epoch": 21.312, "grad_norm": 1.077710747718811, "learning_rate": 1.6714685874349743e-05, "loss": 0.4649, "step": 16650 }, { "epoch": 21.31328, "grad_norm": 1.1036674976348877, "learning_rate": 1.6712685074029615e-05, "loss": 0.498, "step": 16651 }, { "epoch": 21.31456, "grad_norm": 1.1394011974334717, "learning_rate": 1.6710684273709483e-05, "loss": 0.4669, "step": 16652 }, { "epoch": 21.31584, "grad_norm": 1.1064188480377197, "learning_rate": 1.6708683473389355e-05, "loss": 0.4772, "step": 16653 }, { "epoch": 21.31712, "grad_norm": 1.1202365159988403, "learning_rate": 1.670668267306923e-05, "loss": 0.4958, "step": 16654 }, { "epoch": 21.3184, "grad_norm": 1.156451940536499, "learning_rate": 1.6704681872749102e-05, "loss": 0.5121, "step": 16655 }, { "epoch": 21.31968, "grad_norm": 1.1051130294799805, "learning_rate": 1.670268107242897e-05, "loss": 0.5077, "step": 16656 }, { "epoch": 21.32096, "grad_norm": 1.0539714097976685, "learning_rate": 1.6700680272108846e-05, "loss": 0.4521, "step": 16657 }, { "epoch": 21.32224, "grad_norm": 1.1256710290908813, "learning_rate": 1.6698679471788718e-05, "loss": 0.4814, "step": 16658 }, { "epoch": 21.32352, "grad_norm": 1.1350125074386597, "learning_rate": 1.669667867146859e-05, "loss": 0.509, "step": 16659 }, { "epoch": 21.3248, "grad_norm": 0.9891694784164429, "learning_rate": 1.6694677871148458e-05, "loss": 0.4096, "step": 16660 }, { "epoch": 21.32608, "grad_norm": 1.1129761934280396, "learning_rate": 1.6692677070828333e-05, "loss": 0.4794, "step": 16661 }, { "epoch": 21.32736, "grad_norm": 1.0066285133361816, "learning_rate": 1.6690676270508205e-05, "loss": 0.4442, "step": 16662 }, { "epoch": 21.32864, "grad_norm": 1.067672848701477, "learning_rate": 1.6688675470188077e-05, "loss": 0.4584, "step": 16663 }, { "epoch": 21.32992, "grad_norm": 1.1201221942901611, "learning_rate": 1.668667466986795e-05, "loss": 0.4726, "step": 16664 }, { "epoch": 21.3312, "grad_norm": 1.104931116104126, "learning_rate": 1.668467386954782e-05, "loss": 0.4799, "step": 16665 }, { "epoch": 21.33248, "grad_norm": 1.0619887113571167, "learning_rate": 1.6682673069227693e-05, "loss": 0.4639, "step": 16666 }, { "epoch": 21.33376, "grad_norm": 1.1233429908752441, "learning_rate": 1.6680672268907564e-05, "loss": 0.4904, "step": 16667 }, { "epoch": 21.33504, "grad_norm": 1.137324333190918, "learning_rate": 1.6678671468587436e-05, "loss": 0.4927, "step": 16668 }, { "epoch": 21.33632, "grad_norm": 1.0907856225967407, "learning_rate": 1.6676670668267308e-05, "loss": 0.4698, "step": 16669 }, { "epoch": 21.3376, "grad_norm": 1.1330994367599487, "learning_rate": 1.667466986794718e-05, "loss": 0.5419, "step": 16670 }, { "epoch": 21.33888, "grad_norm": 1.1534572839736938, "learning_rate": 1.6672669067627052e-05, "loss": 0.4913, "step": 16671 }, { "epoch": 21.34016, "grad_norm": 1.070062279701233, "learning_rate": 1.6670668267306924e-05, "loss": 0.4565, "step": 16672 }, { "epoch": 21.34144, "grad_norm": 1.0346976518630981, "learning_rate": 1.6668667466986796e-05, "loss": 0.4802, "step": 16673 }, { "epoch": 21.34272, "grad_norm": 1.0610109567642212, "learning_rate": 1.6666666666666667e-05, "loss": 0.4705, "step": 16674 }, { "epoch": 21.344, "grad_norm": 1.103748083114624, "learning_rate": 1.666466586634654e-05, "loss": 0.4516, "step": 16675 }, { "epoch": 21.34528, "grad_norm": 1.1500375270843506, "learning_rate": 1.666266506602641e-05, "loss": 0.5045, "step": 16676 }, { "epoch": 21.34656, "grad_norm": 1.0690054893493652, "learning_rate": 1.6660664265706283e-05, "loss": 0.4846, "step": 16677 }, { "epoch": 21.34784, "grad_norm": 1.1877983808517456, "learning_rate": 1.6658663465386155e-05, "loss": 0.5534, "step": 16678 }, { "epoch": 21.34912, "grad_norm": 1.092886209487915, "learning_rate": 1.6656662665066027e-05, "loss": 0.4751, "step": 16679 }, { "epoch": 21.3504, "grad_norm": 1.1173007488250732, "learning_rate": 1.66546618647459e-05, "loss": 0.5033, "step": 16680 }, { "epoch": 21.35168, "grad_norm": 1.1595876216888428, "learning_rate": 1.665266106442577e-05, "loss": 0.5458, "step": 16681 }, { "epoch": 21.35296, "grad_norm": 1.059984803199768, "learning_rate": 1.6650660264105646e-05, "loss": 0.4982, "step": 16682 }, { "epoch": 21.35424, "grad_norm": 1.0998493432998657, "learning_rate": 1.6648659463785514e-05, "loss": 0.5241, "step": 16683 }, { "epoch": 21.35552, "grad_norm": 1.1519036293029785, "learning_rate": 1.6646658663465386e-05, "loss": 0.5199, "step": 16684 }, { "epoch": 21.3568, "grad_norm": 1.141817331314087, "learning_rate": 1.6644657863145258e-05, "loss": 0.5133, "step": 16685 }, { "epoch": 21.35808, "grad_norm": 1.0783271789550781, "learning_rate": 1.6642657062825133e-05, "loss": 0.4728, "step": 16686 }, { "epoch": 21.35936, "grad_norm": 1.1302711963653564, "learning_rate": 1.6640656262505e-05, "loss": 0.5232, "step": 16687 }, { "epoch": 21.36064, "grad_norm": 1.1916934251785278, "learning_rate": 1.6638655462184873e-05, "loss": 0.5078, "step": 16688 }, { "epoch": 21.36192, "grad_norm": 1.1410053968429565, "learning_rate": 1.663665466186475e-05, "loss": 0.4956, "step": 16689 }, { "epoch": 21.3632, "grad_norm": 1.1169198751449585, "learning_rate": 1.663465386154462e-05, "loss": 0.4981, "step": 16690 }, { "epoch": 21.36448, "grad_norm": 1.0799909830093384, "learning_rate": 1.663265306122449e-05, "loss": 0.4978, "step": 16691 }, { "epoch": 21.36576, "grad_norm": 1.1033358573913574, "learning_rate": 1.663065226090436e-05, "loss": 0.4631, "step": 16692 }, { "epoch": 21.36704, "grad_norm": 1.146831750869751, "learning_rate": 1.6628651460584236e-05, "loss": 0.4868, "step": 16693 }, { "epoch": 21.36832, "grad_norm": 1.1381210088729858, "learning_rate": 1.6626650660264108e-05, "loss": 0.4916, "step": 16694 }, { "epoch": 21.3696, "grad_norm": 1.1156655550003052, "learning_rate": 1.6624649859943976e-05, "loss": 0.4718, "step": 16695 }, { "epoch": 21.37088, "grad_norm": 1.1188489198684692, "learning_rate": 1.6622649059623852e-05, "loss": 0.481, "step": 16696 }, { "epoch": 21.37216, "grad_norm": 1.0974246263504028, "learning_rate": 1.6620648259303724e-05, "loss": 0.4989, "step": 16697 }, { "epoch": 21.37344, "grad_norm": 1.2472598552703857, "learning_rate": 1.6618647458983595e-05, "loss": 0.621, "step": 16698 }, { "epoch": 21.37472, "grad_norm": 1.074589490890503, "learning_rate": 1.6616646658663464e-05, "loss": 0.4802, "step": 16699 }, { "epoch": 21.376, "grad_norm": 1.0904700756072998, "learning_rate": 1.661464585834334e-05, "loss": 0.4855, "step": 16700 }, { "epoch": 21.37728, "grad_norm": 1.093536376953125, "learning_rate": 1.661264505802321e-05, "loss": 0.4616, "step": 16701 }, { "epoch": 21.37856, "grad_norm": 1.0414737462997437, "learning_rate": 1.6610644257703083e-05, "loss": 0.4223, "step": 16702 }, { "epoch": 21.37984, "grad_norm": 1.1210976839065552, "learning_rate": 1.6608643457382955e-05, "loss": 0.5263, "step": 16703 }, { "epoch": 21.38112, "grad_norm": 1.0790395736694336, "learning_rate": 1.6606642657062827e-05, "loss": 0.4368, "step": 16704 }, { "epoch": 21.3824, "grad_norm": 1.146754503250122, "learning_rate": 1.66046418567427e-05, "loss": 0.501, "step": 16705 }, { "epoch": 21.38368, "grad_norm": 1.1394240856170654, "learning_rate": 1.660264105642257e-05, "loss": 0.53, "step": 16706 }, { "epoch": 21.38496, "grad_norm": 1.1505063772201538, "learning_rate": 1.6600640256102442e-05, "loss": 0.4901, "step": 16707 }, { "epoch": 21.38624, "grad_norm": 1.1096645593643188, "learning_rate": 1.6598639455782314e-05, "loss": 0.4851, "step": 16708 }, { "epoch": 21.38752, "grad_norm": 1.1275689601898193, "learning_rate": 1.6596638655462186e-05, "loss": 0.5176, "step": 16709 }, { "epoch": 21.3888, "grad_norm": 1.1523791551589966, "learning_rate": 1.6594637855142058e-05, "loss": 0.5758, "step": 16710 }, { "epoch": 21.39008, "grad_norm": 1.1662170886993408, "learning_rate": 1.659263705482193e-05, "loss": 0.5227, "step": 16711 }, { "epoch": 21.39136, "grad_norm": 1.178915023803711, "learning_rate": 1.65906362545018e-05, "loss": 0.4966, "step": 16712 }, { "epoch": 21.39264, "grad_norm": 1.153525471687317, "learning_rate": 1.6588635454181673e-05, "loss": 0.509, "step": 16713 }, { "epoch": 21.39392, "grad_norm": 1.0840404033660889, "learning_rate": 1.6586634653861545e-05, "loss": 0.45, "step": 16714 }, { "epoch": 21.3952, "grad_norm": 1.0108500719070435, "learning_rate": 1.6584633853541417e-05, "loss": 0.4983, "step": 16715 }, { "epoch": 21.39648, "grad_norm": 1.0287790298461914, "learning_rate": 1.658263305322129e-05, "loss": 0.447, "step": 16716 }, { "epoch": 21.39776, "grad_norm": 1.1262785196304321, "learning_rate": 1.6580632252901164e-05, "loss": 0.5199, "step": 16717 }, { "epoch": 21.39904, "grad_norm": 1.1663216352462769, "learning_rate": 1.6578631452581033e-05, "loss": 0.5389, "step": 16718 }, { "epoch": 21.40032, "grad_norm": 1.050490379333496, "learning_rate": 1.6576630652260904e-05, "loss": 0.4721, "step": 16719 }, { "epoch": 21.4016, "grad_norm": 1.0672640800476074, "learning_rate": 1.6574629851940776e-05, "loss": 0.4858, "step": 16720 }, { "epoch": 21.40288, "grad_norm": 1.0625916719436646, "learning_rate": 1.657262905162065e-05, "loss": 0.4438, "step": 16721 }, { "epoch": 21.40416, "grad_norm": 1.169717788696289, "learning_rate": 1.657062825130052e-05, "loss": 0.4998, "step": 16722 }, { "epoch": 21.40544, "grad_norm": 1.1702299118041992, "learning_rate": 1.6568627450980392e-05, "loss": 0.4903, "step": 16723 }, { "epoch": 21.40672, "grad_norm": 1.062159538269043, "learning_rate": 1.6566626650660264e-05, "loss": 0.4464, "step": 16724 }, { "epoch": 21.408, "grad_norm": 1.1465506553649902, "learning_rate": 1.656462585034014e-05, "loss": 0.511, "step": 16725 }, { "epoch": 21.40928, "grad_norm": 1.079226016998291, "learning_rate": 1.6562625050020007e-05, "loss": 0.4868, "step": 16726 }, { "epoch": 21.41056, "grad_norm": 1.0724486112594604, "learning_rate": 1.656062424969988e-05, "loss": 0.4862, "step": 16727 }, { "epoch": 21.41184, "grad_norm": 1.142055869102478, "learning_rate": 1.6558623449379755e-05, "loss": 0.5397, "step": 16728 }, { "epoch": 21.41312, "grad_norm": 1.0549299716949463, "learning_rate": 1.6556622649059626e-05, "loss": 0.44, "step": 16729 }, { "epoch": 21.4144, "grad_norm": 1.177176833152771, "learning_rate": 1.6554621848739495e-05, "loss": 0.5188, "step": 16730 }, { "epoch": 21.41568, "grad_norm": 1.174822449684143, "learning_rate": 1.6552621048419367e-05, "loss": 0.502, "step": 16731 }, { "epoch": 21.41696, "grad_norm": 1.1037957668304443, "learning_rate": 1.6550620248099242e-05, "loss": 0.486, "step": 16732 }, { "epoch": 21.41824, "grad_norm": 1.1217528581619263, "learning_rate": 1.6548619447779114e-05, "loss": 0.4969, "step": 16733 }, { "epoch": 21.41952, "grad_norm": 1.146602749824524, "learning_rate": 1.6546618647458982e-05, "loss": 0.4549, "step": 16734 }, { "epoch": 21.4208, "grad_norm": 1.062936782836914, "learning_rate": 1.6544617847138858e-05, "loss": 0.467, "step": 16735 }, { "epoch": 21.42208, "grad_norm": 1.1040713787078857, "learning_rate": 1.654261704681873e-05, "loss": 0.4766, "step": 16736 }, { "epoch": 21.42336, "grad_norm": 1.055647373199463, "learning_rate": 1.65406162464986e-05, "loss": 0.4662, "step": 16737 }, { "epoch": 21.42464, "grad_norm": 1.1249991655349731, "learning_rate": 1.653861544617847e-05, "loss": 0.5188, "step": 16738 }, { "epoch": 21.42592, "grad_norm": 1.0827454328536987, "learning_rate": 1.6536614645858345e-05, "loss": 0.4846, "step": 16739 }, { "epoch": 21.4272, "grad_norm": 1.1406104564666748, "learning_rate": 1.6534613845538217e-05, "loss": 0.5539, "step": 16740 }, { "epoch": 21.42848, "grad_norm": 1.1139311790466309, "learning_rate": 1.653261304521809e-05, "loss": 0.4687, "step": 16741 }, { "epoch": 21.42976, "grad_norm": 1.1412529945373535, "learning_rate": 1.653061224489796e-05, "loss": 0.5136, "step": 16742 }, { "epoch": 21.43104, "grad_norm": 1.1076371669769287, "learning_rate": 1.6528611444577832e-05, "loss": 0.4687, "step": 16743 }, { "epoch": 21.43232, "grad_norm": 1.0816609859466553, "learning_rate": 1.6526610644257704e-05, "loss": 0.4853, "step": 16744 }, { "epoch": 21.4336, "grad_norm": 1.0708239078521729, "learning_rate": 1.6524609843937576e-05, "loss": 0.4779, "step": 16745 }, { "epoch": 21.43488, "grad_norm": 1.1649770736694336, "learning_rate": 1.6522609043617448e-05, "loss": 0.498, "step": 16746 }, { "epoch": 21.43616, "grad_norm": 1.1269488334655762, "learning_rate": 1.652060824329732e-05, "loss": 0.4976, "step": 16747 }, { "epoch": 21.43744, "grad_norm": 1.0875318050384521, "learning_rate": 1.651860744297719e-05, "loss": 0.514, "step": 16748 }, { "epoch": 21.43872, "grad_norm": 1.0458475351333618, "learning_rate": 1.6516606642657063e-05, "loss": 0.4691, "step": 16749 }, { "epoch": 21.44, "grad_norm": 1.1326485872268677, "learning_rate": 1.6514605842336935e-05, "loss": 0.4746, "step": 16750 }, { "epoch": 21.44128, "grad_norm": 1.032772183418274, "learning_rate": 1.6512605042016807e-05, "loss": 0.4662, "step": 16751 }, { "epoch": 21.44256, "grad_norm": 1.1388492584228516, "learning_rate": 1.651060424169668e-05, "loss": 0.49, "step": 16752 }, { "epoch": 21.44384, "grad_norm": 1.1491458415985107, "learning_rate": 1.650860344137655e-05, "loss": 0.486, "step": 16753 }, { "epoch": 21.44512, "grad_norm": 1.0875811576843262, "learning_rate": 1.6506602641056423e-05, "loss": 0.4755, "step": 16754 }, { "epoch": 21.4464, "grad_norm": 1.056416630744934, "learning_rate": 1.6504601840736295e-05, "loss": 0.4494, "step": 16755 }, { "epoch": 21.44768, "grad_norm": 1.0903964042663574, "learning_rate": 1.650260104041617e-05, "loss": 0.5011, "step": 16756 }, { "epoch": 21.44896, "grad_norm": 1.0801723003387451, "learning_rate": 1.650060024009604e-05, "loss": 0.492, "step": 16757 }, { "epoch": 21.45024, "grad_norm": 1.1389845609664917, "learning_rate": 1.649859943977591e-05, "loss": 0.51, "step": 16758 }, { "epoch": 21.45152, "grad_norm": 1.1262261867523193, "learning_rate": 1.6496598639455782e-05, "loss": 0.5022, "step": 16759 }, { "epoch": 21.4528, "grad_norm": 1.123065710067749, "learning_rate": 1.6494597839135657e-05, "loss": 0.5005, "step": 16760 }, { "epoch": 21.45408, "grad_norm": 1.099016785621643, "learning_rate": 1.6492597038815526e-05, "loss": 0.5023, "step": 16761 }, { "epoch": 21.45536, "grad_norm": 1.1234606504440308, "learning_rate": 1.6490596238495398e-05, "loss": 0.4718, "step": 16762 }, { "epoch": 21.45664, "grad_norm": 1.1033679246902466, "learning_rate": 1.6488595438175273e-05, "loss": 0.4908, "step": 16763 }, { "epoch": 21.45792, "grad_norm": 1.0978657007217407, "learning_rate": 1.6486594637855145e-05, "loss": 0.5028, "step": 16764 }, { "epoch": 21.4592, "grad_norm": 1.0921522378921509, "learning_rate": 1.6484593837535013e-05, "loss": 0.4639, "step": 16765 }, { "epoch": 21.46048, "grad_norm": 1.0589871406555176, "learning_rate": 1.6482593037214885e-05, "loss": 0.4662, "step": 16766 }, { "epoch": 21.46176, "grad_norm": 1.120967149734497, "learning_rate": 1.648059223689476e-05, "loss": 0.4901, "step": 16767 }, { "epoch": 21.46304, "grad_norm": 1.1276214122772217, "learning_rate": 1.6478591436574632e-05, "loss": 0.4807, "step": 16768 }, { "epoch": 21.46432, "grad_norm": 1.0669405460357666, "learning_rate": 1.64765906362545e-05, "loss": 0.4683, "step": 16769 }, { "epoch": 21.4656, "grad_norm": 1.0860059261322021, "learning_rate": 1.6474589835934376e-05, "loss": 0.5079, "step": 16770 }, { "epoch": 21.46688, "grad_norm": 1.073190689086914, "learning_rate": 1.6472589035614248e-05, "loss": 0.4723, "step": 16771 }, { "epoch": 21.46816, "grad_norm": 1.1541026830673218, "learning_rate": 1.647058823529412e-05, "loss": 0.5063, "step": 16772 }, { "epoch": 21.46944, "grad_norm": 1.0719937086105347, "learning_rate": 1.6468587434973988e-05, "loss": 0.4436, "step": 16773 }, { "epoch": 21.47072, "grad_norm": 1.048323631286621, "learning_rate": 1.6466586634653863e-05, "loss": 0.439, "step": 16774 }, { "epoch": 21.472, "grad_norm": 1.0535809993743896, "learning_rate": 1.6464585834333735e-05, "loss": 0.4863, "step": 16775 }, { "epoch": 21.47328, "grad_norm": 1.0624300241470337, "learning_rate": 1.6462585034013607e-05, "loss": 0.5091, "step": 16776 }, { "epoch": 21.47456, "grad_norm": 1.1307200193405151, "learning_rate": 1.6460584233693475e-05, "loss": 0.4733, "step": 16777 }, { "epoch": 21.47584, "grad_norm": 1.1632894277572632, "learning_rate": 1.645858343337335e-05, "loss": 0.4962, "step": 16778 }, { "epoch": 21.47712, "grad_norm": 1.1236180067062378, "learning_rate": 1.6456582633053223e-05, "loss": 0.4846, "step": 16779 }, { "epoch": 21.4784, "grad_norm": 1.083125114440918, "learning_rate": 1.6454581832733094e-05, "loss": 0.4867, "step": 16780 }, { "epoch": 21.47968, "grad_norm": 1.0312892198562622, "learning_rate": 1.6452581032412966e-05, "loss": 0.4648, "step": 16781 }, { "epoch": 21.48096, "grad_norm": 1.0965737104415894, "learning_rate": 1.6450580232092838e-05, "loss": 0.4893, "step": 16782 }, { "epoch": 21.48224, "grad_norm": 1.157957673072815, "learning_rate": 1.644857943177271e-05, "loss": 0.5055, "step": 16783 }, { "epoch": 21.48352, "grad_norm": 1.0796724557876587, "learning_rate": 1.6446578631452582e-05, "loss": 0.4762, "step": 16784 }, { "epoch": 21.4848, "grad_norm": 1.1314095258712769, "learning_rate": 1.6444577831132454e-05, "loss": 0.49, "step": 16785 }, { "epoch": 21.48608, "grad_norm": 1.070644736289978, "learning_rate": 1.6442577030812326e-05, "loss": 0.4916, "step": 16786 }, { "epoch": 21.48736, "grad_norm": 1.101872205734253, "learning_rate": 1.6440576230492197e-05, "loss": 0.4969, "step": 16787 }, { "epoch": 21.48864, "grad_norm": 1.0998544692993164, "learning_rate": 1.643857543017207e-05, "loss": 0.4433, "step": 16788 }, { "epoch": 21.48992, "grad_norm": 1.1133356094360352, "learning_rate": 1.643657462985194e-05, "loss": 0.4838, "step": 16789 }, { "epoch": 21.4912, "grad_norm": 1.0400280952453613, "learning_rate": 1.6434573829531813e-05, "loss": 0.4781, "step": 16790 }, { "epoch": 21.49248, "grad_norm": 1.1187825202941895, "learning_rate": 1.6432573029211685e-05, "loss": 0.4502, "step": 16791 }, { "epoch": 21.49376, "grad_norm": 1.09933340549469, "learning_rate": 1.6430572228891557e-05, "loss": 0.4945, "step": 16792 }, { "epoch": 21.49504, "grad_norm": 1.0932108163833618, "learning_rate": 1.642857142857143e-05, "loss": 0.4643, "step": 16793 }, { "epoch": 21.49632, "grad_norm": 1.1463568210601807, "learning_rate": 1.64265706282513e-05, "loss": 0.5326, "step": 16794 }, { "epoch": 21.4976, "grad_norm": 1.0978707075119019, "learning_rate": 1.6424569827931176e-05, "loss": 0.5083, "step": 16795 }, { "epoch": 21.49888, "grad_norm": 1.1023026704788208, "learning_rate": 1.6422569027611044e-05, "loss": 0.4388, "step": 16796 }, { "epoch": 21.50016, "grad_norm": 1.119019865989685, "learning_rate": 1.6420568227290916e-05, "loss": 0.4728, "step": 16797 }, { "epoch": 21.50144, "grad_norm": 1.1367695331573486, "learning_rate": 1.6418567426970788e-05, "loss": 0.4819, "step": 16798 }, { "epoch": 21.50272, "grad_norm": 1.096197485923767, "learning_rate": 1.6416566626650663e-05, "loss": 0.459, "step": 16799 }, { "epoch": 21.504, "grad_norm": 1.1106514930725098, "learning_rate": 1.641456582633053e-05, "loss": 0.4894, "step": 16800 }, { "epoch": 21.50528, "grad_norm": 1.0969781875610352, "learning_rate": 1.6412565026010403e-05, "loss": 0.5059, "step": 16801 }, { "epoch": 21.50656, "grad_norm": 1.158473014831543, "learning_rate": 1.641056422569028e-05, "loss": 0.4895, "step": 16802 }, { "epoch": 21.50784, "grad_norm": 1.147180199623108, "learning_rate": 1.640856342537015e-05, "loss": 0.5291, "step": 16803 }, { "epoch": 21.50912, "grad_norm": 1.0704094171524048, "learning_rate": 1.640656262505002e-05, "loss": 0.4578, "step": 16804 }, { "epoch": 21.5104, "grad_norm": 1.1559474468231201, "learning_rate": 1.640456182472989e-05, "loss": 0.5084, "step": 16805 }, { "epoch": 21.51168, "grad_norm": 1.14545738697052, "learning_rate": 1.6402561024409766e-05, "loss": 0.5353, "step": 16806 }, { "epoch": 21.51296, "grad_norm": 1.1325570344924927, "learning_rate": 1.6400560224089638e-05, "loss": 0.4731, "step": 16807 }, { "epoch": 21.51424, "grad_norm": 1.1258713006973267, "learning_rate": 1.6398559423769506e-05, "loss": 0.4792, "step": 16808 }, { "epoch": 21.51552, "grad_norm": 1.1287716627120972, "learning_rate": 1.639655862344938e-05, "loss": 0.4661, "step": 16809 }, { "epoch": 21.5168, "grad_norm": 1.1289795637130737, "learning_rate": 1.6394557823129254e-05, "loss": 0.4774, "step": 16810 }, { "epoch": 21.51808, "grad_norm": 1.1608408689498901, "learning_rate": 1.6392557022809125e-05, "loss": 0.4932, "step": 16811 }, { "epoch": 21.51936, "grad_norm": 1.1531096696853638, "learning_rate": 1.6390556222488994e-05, "loss": 0.5028, "step": 16812 }, { "epoch": 21.52064, "grad_norm": 1.1289916038513184, "learning_rate": 1.638855542216887e-05, "loss": 0.4894, "step": 16813 }, { "epoch": 21.52192, "grad_norm": 1.131188988685608, "learning_rate": 1.638655462184874e-05, "loss": 0.5029, "step": 16814 }, { "epoch": 21.5232, "grad_norm": 1.0534837245941162, "learning_rate": 1.6384553821528613e-05, "loss": 0.4684, "step": 16815 }, { "epoch": 21.52448, "grad_norm": 1.1278927326202393, "learning_rate": 1.6382553021208485e-05, "loss": 0.5104, "step": 16816 }, { "epoch": 21.52576, "grad_norm": 1.177605152130127, "learning_rate": 1.6380552220888357e-05, "loss": 0.4751, "step": 16817 }, { "epoch": 21.52704, "grad_norm": 1.1548734903335571, "learning_rate": 1.637855142056823e-05, "loss": 0.502, "step": 16818 }, { "epoch": 21.52832, "grad_norm": 1.145601749420166, "learning_rate": 1.63765506202481e-05, "loss": 0.5197, "step": 16819 }, { "epoch": 21.5296, "grad_norm": 1.058839201927185, "learning_rate": 1.6374549819927972e-05, "loss": 0.4424, "step": 16820 }, { "epoch": 21.53088, "grad_norm": 1.1241230964660645, "learning_rate": 1.6372549019607844e-05, "loss": 0.4831, "step": 16821 }, { "epoch": 21.53216, "grad_norm": 1.130538821220398, "learning_rate": 1.6370548219287716e-05, "loss": 0.4936, "step": 16822 }, { "epoch": 21.53344, "grad_norm": 1.1460503339767456, "learning_rate": 1.6368547418967588e-05, "loss": 0.4879, "step": 16823 }, { "epoch": 21.53472, "grad_norm": 1.1377558708190918, "learning_rate": 1.636654661864746e-05, "loss": 0.5494, "step": 16824 }, { "epoch": 21.536, "grad_norm": 1.1145014762878418, "learning_rate": 1.636454581832733e-05, "loss": 0.5045, "step": 16825 }, { "epoch": 21.53728, "grad_norm": 1.0829366445541382, "learning_rate": 1.6362545018007203e-05, "loss": 0.4957, "step": 16826 }, { "epoch": 21.53856, "grad_norm": 1.0551543235778809, "learning_rate": 1.6360544217687075e-05, "loss": 0.4979, "step": 16827 }, { "epoch": 21.53984, "grad_norm": 1.0742313861846924, "learning_rate": 1.6358543417366947e-05, "loss": 0.5055, "step": 16828 }, { "epoch": 21.54112, "grad_norm": 1.1106947660446167, "learning_rate": 1.635654261704682e-05, "loss": 0.5044, "step": 16829 }, { "epoch": 21.5424, "grad_norm": 1.0783088207244873, "learning_rate": 1.6354541816726694e-05, "loss": 0.5078, "step": 16830 }, { "epoch": 21.54368, "grad_norm": 1.1662565469741821, "learning_rate": 1.6352541016406563e-05, "loss": 0.5252, "step": 16831 }, { "epoch": 21.54496, "grad_norm": 1.0940940380096436, "learning_rate": 1.6350540216086434e-05, "loss": 0.4984, "step": 16832 }, { "epoch": 21.54624, "grad_norm": 1.0466233491897583, "learning_rate": 1.6348539415766306e-05, "loss": 0.498, "step": 16833 }, { "epoch": 21.54752, "grad_norm": 1.097116470336914, "learning_rate": 1.634653861544618e-05, "loss": 0.4793, "step": 16834 }, { "epoch": 21.5488, "grad_norm": 1.19340980052948, "learning_rate": 1.634453781512605e-05, "loss": 0.5476, "step": 16835 }, { "epoch": 21.55008, "grad_norm": 1.0786470174789429, "learning_rate": 1.6342537014805922e-05, "loss": 0.4835, "step": 16836 }, { "epoch": 21.55136, "grad_norm": 1.1250004768371582, "learning_rate": 1.6340536214485794e-05, "loss": 0.4834, "step": 16837 }, { "epoch": 21.55264, "grad_norm": 1.1461224555969238, "learning_rate": 1.633853541416567e-05, "loss": 0.5397, "step": 16838 }, { "epoch": 21.55392, "grad_norm": 1.153669834136963, "learning_rate": 1.6336534613845537e-05, "loss": 0.5246, "step": 16839 }, { "epoch": 21.5552, "grad_norm": 1.1360396146774292, "learning_rate": 1.633453381352541e-05, "loss": 0.4887, "step": 16840 }, { "epoch": 21.55648, "grad_norm": 1.1669892072677612, "learning_rate": 1.6332533013205284e-05, "loss": 0.5377, "step": 16841 }, { "epoch": 21.557760000000002, "grad_norm": 1.094748854637146, "learning_rate": 1.6330532212885156e-05, "loss": 0.4918, "step": 16842 }, { "epoch": 21.55904, "grad_norm": 1.1243646144866943, "learning_rate": 1.6328531412565025e-05, "loss": 0.4567, "step": 16843 }, { "epoch": 21.56032, "grad_norm": 1.0712041854858398, "learning_rate": 1.6326530612244897e-05, "loss": 0.4915, "step": 16844 }, { "epoch": 21.5616, "grad_norm": 1.2099251747131348, "learning_rate": 1.6324529811924772e-05, "loss": 0.5528, "step": 16845 }, { "epoch": 21.56288, "grad_norm": 1.102573037147522, "learning_rate": 1.6322529011604644e-05, "loss": 0.4806, "step": 16846 }, { "epoch": 21.56416, "grad_norm": 1.1252408027648926, "learning_rate": 1.6320528211284512e-05, "loss": 0.48, "step": 16847 }, { "epoch": 21.56544, "grad_norm": 1.112750768661499, "learning_rate": 1.6318527410964387e-05, "loss": 0.5078, "step": 16848 }, { "epoch": 21.56672, "grad_norm": 1.0940823554992676, "learning_rate": 1.631652661064426e-05, "loss": 0.499, "step": 16849 }, { "epoch": 21.568, "grad_norm": 1.04874849319458, "learning_rate": 1.631452581032413e-05, "loss": 0.4649, "step": 16850 }, { "epoch": 21.56928, "grad_norm": 1.0783095359802246, "learning_rate": 1.6312525010004e-05, "loss": 0.5148, "step": 16851 }, { "epoch": 21.57056, "grad_norm": 1.1552876234054565, "learning_rate": 1.6310524209683875e-05, "loss": 0.5087, "step": 16852 }, { "epoch": 21.57184, "grad_norm": 1.1094108819961548, "learning_rate": 1.6308523409363747e-05, "loss": 0.4625, "step": 16853 }, { "epoch": 21.57312, "grad_norm": 1.1274285316467285, "learning_rate": 1.630652260904362e-05, "loss": 0.5366, "step": 16854 }, { "epoch": 21.5744, "grad_norm": 1.1100611686706543, "learning_rate": 1.630452180872349e-05, "loss": 0.5105, "step": 16855 }, { "epoch": 21.57568, "grad_norm": 1.108077883720398, "learning_rate": 1.6302521008403362e-05, "loss": 0.4801, "step": 16856 }, { "epoch": 21.57696, "grad_norm": 1.1188905239105225, "learning_rate": 1.6300520208083234e-05, "loss": 0.4432, "step": 16857 }, { "epoch": 21.57824, "grad_norm": 1.2001028060913086, "learning_rate": 1.6298519407763106e-05, "loss": 0.5044, "step": 16858 }, { "epoch": 21.57952, "grad_norm": 1.151809573173523, "learning_rate": 1.6296518607442978e-05, "loss": 0.4696, "step": 16859 }, { "epoch": 21.5808, "grad_norm": 1.1924430131912231, "learning_rate": 1.629451780712285e-05, "loss": 0.5198, "step": 16860 }, { "epoch": 21.58208, "grad_norm": 1.1216545104980469, "learning_rate": 1.629251700680272e-05, "loss": 0.4543, "step": 16861 }, { "epoch": 21.58336, "grad_norm": 1.150597333908081, "learning_rate": 1.6290516206482593e-05, "loss": 0.5262, "step": 16862 }, { "epoch": 21.58464, "grad_norm": 1.1352801322937012, "learning_rate": 1.6288515406162465e-05, "loss": 0.4916, "step": 16863 }, { "epoch": 21.58592, "grad_norm": 1.0616695880889893, "learning_rate": 1.6286514605842337e-05, "loss": 0.4861, "step": 16864 }, { "epoch": 21.5872, "grad_norm": 1.079972743988037, "learning_rate": 1.628451380552221e-05, "loss": 0.4737, "step": 16865 }, { "epoch": 21.58848, "grad_norm": 1.0440356731414795, "learning_rate": 1.628251300520208e-05, "loss": 0.499, "step": 16866 }, { "epoch": 21.58976, "grad_norm": 1.1359690427780151, "learning_rate": 1.6280512204881953e-05, "loss": 0.5054, "step": 16867 }, { "epoch": 21.59104, "grad_norm": 1.1668392419815063, "learning_rate": 1.6278511404561825e-05, "loss": 0.5426, "step": 16868 }, { "epoch": 21.59232, "grad_norm": 1.2051527500152588, "learning_rate": 1.62765106042417e-05, "loss": 0.4973, "step": 16869 }, { "epoch": 21.5936, "grad_norm": 1.0257412195205688, "learning_rate": 1.627450980392157e-05, "loss": 0.4355, "step": 16870 }, { "epoch": 21.59488, "grad_norm": 1.0660890340805054, "learning_rate": 1.627250900360144e-05, "loss": 0.4721, "step": 16871 }, { "epoch": 21.59616, "grad_norm": 1.1143311262130737, "learning_rate": 1.6270508203281312e-05, "loss": 0.4979, "step": 16872 }, { "epoch": 21.59744, "grad_norm": 1.1307166814804077, "learning_rate": 1.6268507402961187e-05, "loss": 0.516, "step": 16873 }, { "epoch": 21.59872, "grad_norm": 1.0710972547531128, "learning_rate": 1.6266506602641056e-05, "loss": 0.4866, "step": 16874 }, { "epoch": 21.6, "grad_norm": 1.1194899082183838, "learning_rate": 1.6264505802320928e-05, "loss": 0.4978, "step": 16875 }, { "epoch": 21.60128, "grad_norm": 1.1724941730499268, "learning_rate": 1.6262505002000803e-05, "loss": 0.5422, "step": 16876 }, { "epoch": 21.60256, "grad_norm": 1.1926066875457764, "learning_rate": 1.6260504201680675e-05, "loss": 0.5132, "step": 16877 }, { "epoch": 21.60384, "grad_norm": 1.1342614889144897, "learning_rate": 1.6258503401360543e-05, "loss": 0.4642, "step": 16878 }, { "epoch": 21.60512, "grad_norm": 1.0815118551254272, "learning_rate": 1.6256502601040415e-05, "loss": 0.4648, "step": 16879 }, { "epoch": 21.6064, "grad_norm": 1.073204517364502, "learning_rate": 1.625450180072029e-05, "loss": 0.4942, "step": 16880 }, { "epoch": 21.60768, "grad_norm": 1.1120681762695312, "learning_rate": 1.6252501000400162e-05, "loss": 0.5087, "step": 16881 }, { "epoch": 21.60896, "grad_norm": 1.085679292678833, "learning_rate": 1.625050020008003e-05, "loss": 0.4773, "step": 16882 }, { "epoch": 21.61024, "grad_norm": 1.0857542753219604, "learning_rate": 1.6248499399759906e-05, "loss": 0.4978, "step": 16883 }, { "epoch": 21.61152, "grad_norm": 1.0734546184539795, "learning_rate": 1.6246498599439778e-05, "loss": 0.4487, "step": 16884 }, { "epoch": 21.6128, "grad_norm": 1.1066250801086426, "learning_rate": 1.624449779911965e-05, "loss": 0.5063, "step": 16885 }, { "epoch": 21.61408, "grad_norm": 1.0745172500610352, "learning_rate": 1.6242496998799518e-05, "loss": 0.4654, "step": 16886 }, { "epoch": 21.61536, "grad_norm": 1.1120097637176514, "learning_rate": 1.6240496198479393e-05, "loss": 0.4833, "step": 16887 }, { "epoch": 21.61664, "grad_norm": 1.0820595026016235, "learning_rate": 1.6238495398159265e-05, "loss": 0.4938, "step": 16888 }, { "epoch": 21.61792, "grad_norm": 1.0889438390731812, "learning_rate": 1.6236494597839137e-05, "loss": 0.4847, "step": 16889 }, { "epoch": 21.6192, "grad_norm": 1.1894570589065552, "learning_rate": 1.6234493797519005e-05, "loss": 0.5127, "step": 16890 }, { "epoch": 21.62048, "grad_norm": 1.123768925666809, "learning_rate": 1.623249299719888e-05, "loss": 0.4727, "step": 16891 }, { "epoch": 21.62176, "grad_norm": 1.0649784803390503, "learning_rate": 1.6230492196878753e-05, "loss": 0.4568, "step": 16892 }, { "epoch": 21.62304, "grad_norm": 1.1826279163360596, "learning_rate": 1.6228491396558624e-05, "loss": 0.4925, "step": 16893 }, { "epoch": 21.62432, "grad_norm": 1.1905529499053955, "learning_rate": 1.6226490596238496e-05, "loss": 0.495, "step": 16894 }, { "epoch": 21.6256, "grad_norm": 1.206997275352478, "learning_rate": 1.6224489795918368e-05, "loss": 0.4867, "step": 16895 }, { "epoch": 21.62688, "grad_norm": 1.1951205730438232, "learning_rate": 1.622248899559824e-05, "loss": 0.5134, "step": 16896 }, { "epoch": 21.62816, "grad_norm": 1.1452414989471436, "learning_rate": 1.6220488195278112e-05, "loss": 0.4927, "step": 16897 }, { "epoch": 21.62944, "grad_norm": 1.1808103322982788, "learning_rate": 1.6218487394957984e-05, "loss": 0.5282, "step": 16898 }, { "epoch": 21.63072, "grad_norm": 1.0460494756698608, "learning_rate": 1.6216486594637856e-05, "loss": 0.4496, "step": 16899 }, { "epoch": 21.632, "grad_norm": 1.1484510898590088, "learning_rate": 1.6214485794317727e-05, "loss": 0.4895, "step": 16900 }, { "epoch": 21.63328, "grad_norm": 1.153800129890442, "learning_rate": 1.62124849939976e-05, "loss": 0.5, "step": 16901 }, { "epoch": 21.63456, "grad_norm": 1.1488093137741089, "learning_rate": 1.621048419367747e-05, "loss": 0.5194, "step": 16902 }, { "epoch": 21.63584, "grad_norm": 1.0916675329208374, "learning_rate": 1.6208483393357343e-05, "loss": 0.4648, "step": 16903 }, { "epoch": 21.63712, "grad_norm": 1.0833429098129272, "learning_rate": 1.6206482593037215e-05, "loss": 0.4548, "step": 16904 }, { "epoch": 21.6384, "grad_norm": 1.1267287731170654, "learning_rate": 1.6204481792717087e-05, "loss": 0.5134, "step": 16905 }, { "epoch": 21.63968, "grad_norm": 1.2103078365325928, "learning_rate": 1.620248099239696e-05, "loss": 0.5598, "step": 16906 }, { "epoch": 21.64096, "grad_norm": 1.0767333507537842, "learning_rate": 1.620048019207683e-05, "loss": 0.4599, "step": 16907 }, { "epoch": 21.64224, "grad_norm": 1.12871515750885, "learning_rate": 1.6198479391756706e-05, "loss": 0.5094, "step": 16908 }, { "epoch": 21.64352, "grad_norm": 1.0893502235412598, "learning_rate": 1.6196478591436574e-05, "loss": 0.4617, "step": 16909 }, { "epoch": 21.6448, "grad_norm": 1.1467949151992798, "learning_rate": 1.6194477791116446e-05, "loss": 0.4717, "step": 16910 }, { "epoch": 21.64608, "grad_norm": 1.151532530784607, "learning_rate": 1.6192476990796318e-05, "loss": 0.4958, "step": 16911 }, { "epoch": 21.64736, "grad_norm": 1.151410698890686, "learning_rate": 1.6190476190476193e-05, "loss": 0.5203, "step": 16912 }, { "epoch": 21.64864, "grad_norm": 1.044360876083374, "learning_rate": 1.618847539015606e-05, "loss": 0.4547, "step": 16913 }, { "epoch": 21.64992, "grad_norm": 1.1051994562149048, "learning_rate": 1.6186474589835933e-05, "loss": 0.5285, "step": 16914 }, { "epoch": 21.6512, "grad_norm": 1.0989066362380981, "learning_rate": 1.618447378951581e-05, "loss": 0.4682, "step": 16915 }, { "epoch": 21.65248, "grad_norm": 1.1165852546691895, "learning_rate": 1.618247298919568e-05, "loss": 0.5312, "step": 16916 }, { "epoch": 21.65376, "grad_norm": 1.1108812093734741, "learning_rate": 1.618047218887555e-05, "loss": 0.5191, "step": 16917 }, { "epoch": 21.65504, "grad_norm": 1.1192561388015747, "learning_rate": 1.617847138855542e-05, "loss": 0.4527, "step": 16918 }, { "epoch": 21.65632, "grad_norm": 1.137689232826233, "learning_rate": 1.6176470588235296e-05, "loss": 0.4527, "step": 16919 }, { "epoch": 21.6576, "grad_norm": 1.1603665351867676, "learning_rate": 1.6174469787915168e-05, "loss": 0.5427, "step": 16920 }, { "epoch": 21.65888, "grad_norm": 1.0487416982650757, "learning_rate": 1.6172468987595036e-05, "loss": 0.4543, "step": 16921 }, { "epoch": 21.66016, "grad_norm": 1.0956952571868896, "learning_rate": 1.617046818727491e-05, "loss": 0.4952, "step": 16922 }, { "epoch": 21.66144, "grad_norm": 1.0673739910125732, "learning_rate": 1.6168467386954784e-05, "loss": 0.4378, "step": 16923 }, { "epoch": 21.66272, "grad_norm": 1.1492853164672852, "learning_rate": 1.6166466586634655e-05, "loss": 0.5302, "step": 16924 }, { "epoch": 21.664, "grad_norm": 1.058276653289795, "learning_rate": 1.6164465786314524e-05, "loss": 0.4846, "step": 16925 }, { "epoch": 21.66528, "grad_norm": 1.1349074840545654, "learning_rate": 1.61624649859944e-05, "loss": 0.5074, "step": 16926 }, { "epoch": 21.66656, "grad_norm": 1.1578072309494019, "learning_rate": 1.616046418567427e-05, "loss": 0.5537, "step": 16927 }, { "epoch": 21.667839999999998, "grad_norm": 1.1258023977279663, "learning_rate": 1.6158463385354143e-05, "loss": 0.5015, "step": 16928 }, { "epoch": 21.66912, "grad_norm": 1.1199811697006226, "learning_rate": 1.6156462585034015e-05, "loss": 0.5215, "step": 16929 }, { "epoch": 21.6704, "grad_norm": 1.0602338314056396, "learning_rate": 1.6154461784713887e-05, "loss": 0.4738, "step": 16930 }, { "epoch": 21.67168, "grad_norm": 1.1654423475265503, "learning_rate": 1.615246098439376e-05, "loss": 0.5589, "step": 16931 }, { "epoch": 21.67296, "grad_norm": 1.081931710243225, "learning_rate": 1.615046018407363e-05, "loss": 0.4466, "step": 16932 }, { "epoch": 21.67424, "grad_norm": 1.1227881908416748, "learning_rate": 1.6148459383753502e-05, "loss": 0.5191, "step": 16933 }, { "epoch": 21.67552, "grad_norm": 1.1291007995605469, "learning_rate": 1.6146458583433374e-05, "loss": 0.5003, "step": 16934 }, { "epoch": 21.6768, "grad_norm": 1.0886553525924683, "learning_rate": 1.6144457783113246e-05, "loss": 0.4575, "step": 16935 }, { "epoch": 21.67808, "grad_norm": 1.2142690420150757, "learning_rate": 1.614245698279312e-05, "loss": 0.5728, "step": 16936 }, { "epoch": 21.67936, "grad_norm": 1.0875095129013062, "learning_rate": 1.614045618247299e-05, "loss": 0.4926, "step": 16937 }, { "epoch": 21.68064, "grad_norm": 1.092563509941101, "learning_rate": 1.613845538215286e-05, "loss": 0.4254, "step": 16938 }, { "epoch": 21.68192, "grad_norm": 1.0964988470077515, "learning_rate": 1.6136454581832733e-05, "loss": 0.4502, "step": 16939 }, { "epoch": 21.6832, "grad_norm": 1.0522211790084839, "learning_rate": 1.613445378151261e-05, "loss": 0.4743, "step": 16940 }, { "epoch": 21.68448, "grad_norm": 1.0800302028656006, "learning_rate": 1.6132452981192477e-05, "loss": 0.4934, "step": 16941 }, { "epoch": 21.68576, "grad_norm": 1.1527082920074463, "learning_rate": 1.613045218087235e-05, "loss": 0.5126, "step": 16942 }, { "epoch": 21.68704, "grad_norm": 1.0669933557510376, "learning_rate": 1.6128451380552224e-05, "loss": 0.4885, "step": 16943 }, { "epoch": 21.68832, "grad_norm": 1.1759413480758667, "learning_rate": 1.6126450580232096e-05, "loss": 0.5232, "step": 16944 }, { "epoch": 21.6896, "grad_norm": 1.2183908224105835, "learning_rate": 1.6124449779911964e-05, "loss": 0.5372, "step": 16945 }, { "epoch": 21.69088, "grad_norm": 1.1375067234039307, "learning_rate": 1.6122448979591836e-05, "loss": 0.5206, "step": 16946 }, { "epoch": 21.69216, "grad_norm": 1.2400755882263184, "learning_rate": 1.612044817927171e-05, "loss": 0.5103, "step": 16947 }, { "epoch": 21.69344, "grad_norm": 1.172790288925171, "learning_rate": 1.6118447378951583e-05, "loss": 0.4674, "step": 16948 }, { "epoch": 21.69472, "grad_norm": 1.0851976871490479, "learning_rate": 1.6116446578631452e-05, "loss": 0.4606, "step": 16949 }, { "epoch": 21.696, "grad_norm": 1.1035497188568115, "learning_rate": 1.6114445778311324e-05, "loss": 0.4504, "step": 16950 }, { "epoch": 21.69728, "grad_norm": 1.1531620025634766, "learning_rate": 1.61124449779912e-05, "loss": 0.5273, "step": 16951 }, { "epoch": 21.69856, "grad_norm": 1.0832749605178833, "learning_rate": 1.611044417767107e-05, "loss": 0.4824, "step": 16952 }, { "epoch": 21.699840000000002, "grad_norm": 1.1482528448104858, "learning_rate": 1.610844337735094e-05, "loss": 0.5408, "step": 16953 }, { "epoch": 21.70112, "grad_norm": 1.0921155214309692, "learning_rate": 1.6106442577030814e-05, "loss": 0.4788, "step": 16954 }, { "epoch": 21.7024, "grad_norm": 1.0282621383666992, "learning_rate": 1.6104441776710686e-05, "loss": 0.422, "step": 16955 }, { "epoch": 21.70368, "grad_norm": 1.1054280996322632, "learning_rate": 1.6102440976390558e-05, "loss": 0.4924, "step": 16956 }, { "epoch": 21.70496, "grad_norm": 1.1694878339767456, "learning_rate": 1.6100440176070427e-05, "loss": 0.5017, "step": 16957 }, { "epoch": 21.70624, "grad_norm": 1.0920751094818115, "learning_rate": 1.6098439375750302e-05, "loss": 0.5053, "step": 16958 }, { "epoch": 21.70752, "grad_norm": 1.0558879375457764, "learning_rate": 1.6096438575430174e-05, "loss": 0.4535, "step": 16959 }, { "epoch": 21.7088, "grad_norm": 1.084283709526062, "learning_rate": 1.6094437775110046e-05, "loss": 0.4792, "step": 16960 }, { "epoch": 21.71008, "grad_norm": 1.0939773321151733, "learning_rate": 1.6092436974789917e-05, "loss": 0.466, "step": 16961 }, { "epoch": 21.71136, "grad_norm": 1.151667594909668, "learning_rate": 1.609043617446979e-05, "loss": 0.5377, "step": 16962 }, { "epoch": 21.71264, "grad_norm": 1.173499345779419, "learning_rate": 1.608843537414966e-05, "loss": 0.5031, "step": 16963 }, { "epoch": 21.71392, "grad_norm": 1.102555751800537, "learning_rate": 1.6086434573829533e-05, "loss": 0.457, "step": 16964 }, { "epoch": 21.7152, "grad_norm": 1.1618655920028687, "learning_rate": 1.6084433773509405e-05, "loss": 0.513, "step": 16965 }, { "epoch": 21.71648, "grad_norm": 1.0982638597488403, "learning_rate": 1.6082432973189277e-05, "loss": 0.4711, "step": 16966 }, { "epoch": 21.71776, "grad_norm": 1.179809808731079, "learning_rate": 1.608043217286915e-05, "loss": 0.5369, "step": 16967 }, { "epoch": 21.71904, "grad_norm": 1.1830394268035889, "learning_rate": 1.607843137254902e-05, "loss": 0.5041, "step": 16968 }, { "epoch": 21.72032, "grad_norm": 1.1552484035491943, "learning_rate": 1.6076430572228892e-05, "loss": 0.5089, "step": 16969 }, { "epoch": 21.7216, "grad_norm": 1.0561984777450562, "learning_rate": 1.6074429771908764e-05, "loss": 0.4571, "step": 16970 }, { "epoch": 21.72288, "grad_norm": 1.2352055311203003, "learning_rate": 1.6072428971588636e-05, "loss": 0.5468, "step": 16971 }, { "epoch": 21.72416, "grad_norm": 1.0787138938903809, "learning_rate": 1.6070428171268508e-05, "loss": 0.4551, "step": 16972 }, { "epoch": 21.72544, "grad_norm": 1.0198187828063965, "learning_rate": 1.606842737094838e-05, "loss": 0.4421, "step": 16973 }, { "epoch": 21.72672, "grad_norm": 1.1738202571868896, "learning_rate": 1.606642657062825e-05, "loss": 0.5602, "step": 16974 }, { "epoch": 21.728, "grad_norm": 1.08427095413208, "learning_rate": 1.6064425770308127e-05, "loss": 0.5051, "step": 16975 }, { "epoch": 21.72928, "grad_norm": 1.0601693391799927, "learning_rate": 1.6062424969987995e-05, "loss": 0.4876, "step": 16976 }, { "epoch": 21.73056, "grad_norm": 1.016387701034546, "learning_rate": 1.6060424169667867e-05, "loss": 0.4689, "step": 16977 }, { "epoch": 21.73184, "grad_norm": 1.1087530851364136, "learning_rate": 1.605842336934774e-05, "loss": 0.5092, "step": 16978 }, { "epoch": 21.73312, "grad_norm": 1.0978829860687256, "learning_rate": 1.6056422569027614e-05, "loss": 0.4665, "step": 16979 }, { "epoch": 21.7344, "grad_norm": 1.1613261699676514, "learning_rate": 1.6054421768707483e-05, "loss": 0.5061, "step": 16980 }, { "epoch": 21.73568, "grad_norm": 1.1227811574935913, "learning_rate": 1.6052420968387355e-05, "loss": 0.5022, "step": 16981 }, { "epoch": 21.73696, "grad_norm": 1.1268730163574219, "learning_rate": 1.605042016806723e-05, "loss": 0.4788, "step": 16982 }, { "epoch": 21.73824, "grad_norm": 1.1465482711791992, "learning_rate": 1.6048419367747102e-05, "loss": 0.4893, "step": 16983 }, { "epoch": 21.73952, "grad_norm": 1.1241455078125, "learning_rate": 1.604641856742697e-05, "loss": 0.5033, "step": 16984 }, { "epoch": 21.7408, "grad_norm": 1.1142101287841797, "learning_rate": 1.6044417767106842e-05, "loss": 0.5004, "step": 16985 }, { "epoch": 21.74208, "grad_norm": 1.125550389289856, "learning_rate": 1.6042416966786717e-05, "loss": 0.5053, "step": 16986 }, { "epoch": 21.74336, "grad_norm": 1.0979461669921875, "learning_rate": 1.604041616646659e-05, "loss": 0.514, "step": 16987 }, { "epoch": 21.74464, "grad_norm": 1.0835514068603516, "learning_rate": 1.6038415366146458e-05, "loss": 0.4502, "step": 16988 }, { "epoch": 21.74592, "grad_norm": 1.1370912790298462, "learning_rate": 1.6036414565826333e-05, "loss": 0.5205, "step": 16989 }, { "epoch": 21.7472, "grad_norm": 1.076702356338501, "learning_rate": 1.6034413765506205e-05, "loss": 0.4769, "step": 16990 }, { "epoch": 21.74848, "grad_norm": 1.1314976215362549, "learning_rate": 1.6032412965186077e-05, "loss": 0.473, "step": 16991 }, { "epoch": 21.74976, "grad_norm": 1.1105777025222778, "learning_rate": 1.6030412164865945e-05, "loss": 0.5232, "step": 16992 }, { "epoch": 21.75104, "grad_norm": 1.0831820964813232, "learning_rate": 1.602841136454582e-05, "loss": 0.4784, "step": 16993 }, { "epoch": 21.75232, "grad_norm": 1.2100034952163696, "learning_rate": 1.6026410564225692e-05, "loss": 0.4977, "step": 16994 }, { "epoch": 21.7536, "grad_norm": 1.1598615646362305, "learning_rate": 1.6024409763905564e-05, "loss": 0.5115, "step": 16995 }, { "epoch": 21.75488, "grad_norm": 1.0910362005233765, "learning_rate": 1.6022408963585436e-05, "loss": 0.5047, "step": 16996 }, { "epoch": 21.75616, "grad_norm": 1.12237548828125, "learning_rate": 1.6020408163265308e-05, "loss": 0.4748, "step": 16997 }, { "epoch": 21.75744, "grad_norm": 1.175667643547058, "learning_rate": 1.601840736294518e-05, "loss": 0.5035, "step": 16998 }, { "epoch": 21.75872, "grad_norm": 1.1076536178588867, "learning_rate": 1.601640656262505e-05, "loss": 0.5181, "step": 16999 }, { "epoch": 21.76, "grad_norm": 1.105377197265625, "learning_rate": 1.6014405762304923e-05, "loss": 0.4939, "step": 17000 }, { "epoch": 21.76128, "grad_norm": 1.052820086479187, "learning_rate": 1.6012404961984795e-05, "loss": 0.4644, "step": 17001 }, { "epoch": 21.76256, "grad_norm": 1.0277769565582275, "learning_rate": 1.6010404161664667e-05, "loss": 0.4438, "step": 17002 }, { "epoch": 21.76384, "grad_norm": 1.1701537370681763, "learning_rate": 1.600840336134454e-05, "loss": 0.5511, "step": 17003 }, { "epoch": 21.76512, "grad_norm": 1.1238141059875488, "learning_rate": 1.600640256102441e-05, "loss": 0.5343, "step": 17004 }, { "epoch": 21.7664, "grad_norm": 1.1096354722976685, "learning_rate": 1.6004401760704283e-05, "loss": 0.576, "step": 17005 }, { "epoch": 21.76768, "grad_norm": 1.1454472541809082, "learning_rate": 1.6002400960384154e-05, "loss": 0.5276, "step": 17006 }, { "epoch": 21.76896, "grad_norm": 1.095155954360962, "learning_rate": 1.6000400160064026e-05, "loss": 0.4968, "step": 17007 }, { "epoch": 21.77024, "grad_norm": 1.0994304418563843, "learning_rate": 1.5998399359743898e-05, "loss": 0.4959, "step": 17008 }, { "epoch": 21.77152, "grad_norm": 1.117372989654541, "learning_rate": 1.599639855942377e-05, "loss": 0.5205, "step": 17009 }, { "epoch": 21.7728, "grad_norm": 1.0571292638778687, "learning_rate": 1.5994397759103642e-05, "loss": 0.4693, "step": 17010 }, { "epoch": 21.77408, "grad_norm": 1.0867185592651367, "learning_rate": 1.5992396958783514e-05, "loss": 0.4867, "step": 17011 }, { "epoch": 21.77536, "grad_norm": 1.0914740562438965, "learning_rate": 1.5990396158463386e-05, "loss": 0.4593, "step": 17012 }, { "epoch": 21.77664, "grad_norm": 1.1249064207077026, "learning_rate": 1.5988395358143257e-05, "loss": 0.4779, "step": 17013 }, { "epoch": 21.77792, "grad_norm": 1.1130808591842651, "learning_rate": 1.5986394557823133e-05, "loss": 0.4954, "step": 17014 }, { "epoch": 21.7792, "grad_norm": 1.0818630456924438, "learning_rate": 1.5984393757503e-05, "loss": 0.4842, "step": 17015 }, { "epoch": 21.78048, "grad_norm": 1.1350693702697754, "learning_rate": 1.5982392957182873e-05, "loss": 0.4944, "step": 17016 }, { "epoch": 21.78176, "grad_norm": 1.1261863708496094, "learning_rate": 1.5980392156862745e-05, "loss": 0.4952, "step": 17017 }, { "epoch": 21.78304, "grad_norm": 1.1032289266586304, "learning_rate": 1.597839135654262e-05, "loss": 0.505, "step": 17018 }, { "epoch": 21.78432, "grad_norm": 1.1199618577957153, "learning_rate": 1.597639055622249e-05, "loss": 0.4496, "step": 17019 }, { "epoch": 21.7856, "grad_norm": 1.105373740196228, "learning_rate": 1.597438975590236e-05, "loss": 0.5072, "step": 17020 }, { "epoch": 21.78688, "grad_norm": 1.0840733051300049, "learning_rate": 1.5972388955582236e-05, "loss": 0.5159, "step": 17021 }, { "epoch": 21.78816, "grad_norm": 1.0092965364456177, "learning_rate": 1.5970388155262107e-05, "loss": 0.4299, "step": 17022 }, { "epoch": 21.78944, "grad_norm": 1.0409196615219116, "learning_rate": 1.5968387354941976e-05, "loss": 0.4441, "step": 17023 }, { "epoch": 21.79072, "grad_norm": 1.1237512826919556, "learning_rate": 1.5966386554621848e-05, "loss": 0.5225, "step": 17024 }, { "epoch": 21.792, "grad_norm": 1.1367634534835815, "learning_rate": 1.5964385754301723e-05, "loss": 0.4826, "step": 17025 }, { "epoch": 21.79328, "grad_norm": 1.146195650100708, "learning_rate": 1.5962384953981595e-05, "loss": 0.5016, "step": 17026 }, { "epoch": 21.79456, "grad_norm": 1.0935816764831543, "learning_rate": 1.5960384153661463e-05, "loss": 0.4626, "step": 17027 }, { "epoch": 21.79584, "grad_norm": 1.169556975364685, "learning_rate": 1.595838335334134e-05, "loss": 0.5184, "step": 17028 }, { "epoch": 21.79712, "grad_norm": 1.1683391332626343, "learning_rate": 1.595638255302121e-05, "loss": 0.5, "step": 17029 }, { "epoch": 21.7984, "grad_norm": 1.1834429502487183, "learning_rate": 1.5954381752701082e-05, "loss": 0.5633, "step": 17030 }, { "epoch": 21.79968, "grad_norm": 1.1966886520385742, "learning_rate": 1.595238095238095e-05, "loss": 0.5166, "step": 17031 }, { "epoch": 21.80096, "grad_norm": 1.1592307090759277, "learning_rate": 1.5950380152060826e-05, "loss": 0.5007, "step": 17032 }, { "epoch": 21.80224, "grad_norm": 1.0894125699996948, "learning_rate": 1.5948379351740698e-05, "loss": 0.4405, "step": 17033 }, { "epoch": 21.80352, "grad_norm": 1.1028647422790527, "learning_rate": 1.594637855142057e-05, "loss": 0.4736, "step": 17034 }, { "epoch": 21.8048, "grad_norm": 1.01607084274292, "learning_rate": 1.594437775110044e-05, "loss": 0.4253, "step": 17035 }, { "epoch": 21.80608, "grad_norm": 1.1347633600234985, "learning_rate": 1.5942376950780313e-05, "loss": 0.5113, "step": 17036 }, { "epoch": 21.80736, "grad_norm": 1.1249445676803589, "learning_rate": 1.5940376150460185e-05, "loss": 0.4994, "step": 17037 }, { "epoch": 21.80864, "grad_norm": 1.192696452140808, "learning_rate": 1.5938375350140057e-05, "loss": 0.5374, "step": 17038 }, { "epoch": 21.809919999999998, "grad_norm": 1.092063307762146, "learning_rate": 1.593637454981993e-05, "loss": 0.4376, "step": 17039 }, { "epoch": 21.8112, "grad_norm": 1.0697550773620605, "learning_rate": 1.59343737494998e-05, "loss": 0.4639, "step": 17040 }, { "epoch": 21.81248, "grad_norm": 1.066964864730835, "learning_rate": 1.5932372949179673e-05, "loss": 0.4451, "step": 17041 }, { "epoch": 21.81376, "grad_norm": 1.099763035774231, "learning_rate": 1.5930372148859545e-05, "loss": 0.4918, "step": 17042 }, { "epoch": 21.81504, "grad_norm": 1.1521753072738647, "learning_rate": 1.5928371348539416e-05, "loss": 0.5071, "step": 17043 }, { "epoch": 21.81632, "grad_norm": 1.0961254835128784, "learning_rate": 1.592637054821929e-05, "loss": 0.496, "step": 17044 }, { "epoch": 21.8176, "grad_norm": 1.0640747547149658, "learning_rate": 1.592436974789916e-05, "loss": 0.4542, "step": 17045 }, { "epoch": 21.81888, "grad_norm": 1.2090388536453247, "learning_rate": 1.5922368947579032e-05, "loss": 0.5355, "step": 17046 }, { "epoch": 21.82016, "grad_norm": 1.0608830451965332, "learning_rate": 1.5920368147258904e-05, "loss": 0.4686, "step": 17047 }, { "epoch": 21.82144, "grad_norm": 1.1006875038146973, "learning_rate": 1.5918367346938776e-05, "loss": 0.5012, "step": 17048 }, { "epoch": 21.82272, "grad_norm": 1.132010817527771, "learning_rate": 1.591636654661865e-05, "loss": 0.5102, "step": 17049 }, { "epoch": 21.824, "grad_norm": 1.0770601034164429, "learning_rate": 1.591436574629852e-05, "loss": 0.4736, "step": 17050 }, { "epoch": 21.82528, "grad_norm": 1.1132420301437378, "learning_rate": 1.591236494597839e-05, "loss": 0.512, "step": 17051 }, { "epoch": 21.82656, "grad_norm": 1.1608811616897583, "learning_rate": 1.5910364145658263e-05, "loss": 0.4529, "step": 17052 }, { "epoch": 21.82784, "grad_norm": 1.1335759162902832, "learning_rate": 1.590836334533814e-05, "loss": 0.501, "step": 17053 }, { "epoch": 21.82912, "grad_norm": 1.0787115097045898, "learning_rate": 1.5906362545018007e-05, "loss": 0.478, "step": 17054 }, { "epoch": 21.8304, "grad_norm": 1.0791386365890503, "learning_rate": 1.590436174469788e-05, "loss": 0.4745, "step": 17055 }, { "epoch": 21.83168, "grad_norm": 1.0508241653442383, "learning_rate": 1.590236094437775e-05, "loss": 0.4423, "step": 17056 }, { "epoch": 21.83296, "grad_norm": 1.061378836631775, "learning_rate": 1.5900360144057626e-05, "loss": 0.4637, "step": 17057 }, { "epoch": 21.83424, "grad_norm": 1.1394723653793335, "learning_rate": 1.5898359343737494e-05, "loss": 0.4634, "step": 17058 }, { "epoch": 21.83552, "grad_norm": 1.1414905786514282, "learning_rate": 1.5896358543417366e-05, "loss": 0.4838, "step": 17059 }, { "epoch": 21.8368, "grad_norm": 1.127100944519043, "learning_rate": 1.589435774309724e-05, "loss": 0.5292, "step": 17060 }, { "epoch": 21.83808, "grad_norm": 1.1181409358978271, "learning_rate": 1.5892356942777113e-05, "loss": 0.5023, "step": 17061 }, { "epoch": 21.83936, "grad_norm": 1.1021265983581543, "learning_rate": 1.5890356142456982e-05, "loss": 0.4538, "step": 17062 }, { "epoch": 21.84064, "grad_norm": 1.042770504951477, "learning_rate": 1.5888355342136854e-05, "loss": 0.4804, "step": 17063 }, { "epoch": 21.841920000000002, "grad_norm": 1.1434727907180786, "learning_rate": 1.588635454181673e-05, "loss": 0.5127, "step": 17064 }, { "epoch": 21.8432, "grad_norm": 1.111856460571289, "learning_rate": 1.58843537414966e-05, "loss": 0.5108, "step": 17065 }, { "epoch": 21.84448, "grad_norm": 1.0827891826629639, "learning_rate": 1.588235294117647e-05, "loss": 0.4725, "step": 17066 }, { "epoch": 21.84576, "grad_norm": 1.1117992401123047, "learning_rate": 1.5880352140856344e-05, "loss": 0.474, "step": 17067 }, { "epoch": 21.84704, "grad_norm": 1.1064029932022095, "learning_rate": 1.5878351340536216e-05, "loss": 0.4948, "step": 17068 }, { "epoch": 21.84832, "grad_norm": 1.0790382623672485, "learning_rate": 1.5876350540216088e-05, "loss": 0.5079, "step": 17069 }, { "epoch": 21.8496, "grad_norm": 1.1318869590759277, "learning_rate": 1.5874349739895957e-05, "loss": 0.5018, "step": 17070 }, { "epoch": 21.85088, "grad_norm": 1.1790701150894165, "learning_rate": 1.5872348939575832e-05, "loss": 0.5168, "step": 17071 }, { "epoch": 21.85216, "grad_norm": 1.1890596151351929, "learning_rate": 1.5870348139255704e-05, "loss": 0.5259, "step": 17072 }, { "epoch": 21.85344, "grad_norm": 1.1023216247558594, "learning_rate": 1.5868347338935576e-05, "loss": 0.5027, "step": 17073 }, { "epoch": 21.85472, "grad_norm": 1.2085516452789307, "learning_rate": 1.5866346538615447e-05, "loss": 0.5627, "step": 17074 }, { "epoch": 21.856, "grad_norm": 1.0791484117507935, "learning_rate": 1.586434573829532e-05, "loss": 0.5077, "step": 17075 }, { "epoch": 21.85728, "grad_norm": 1.1096769571304321, "learning_rate": 1.586234493797519e-05, "loss": 0.4913, "step": 17076 }, { "epoch": 21.85856, "grad_norm": 1.1315524578094482, "learning_rate": 1.5860344137655063e-05, "loss": 0.4864, "step": 17077 }, { "epoch": 21.85984, "grad_norm": 1.126545786857605, "learning_rate": 1.5858343337334935e-05, "loss": 0.4946, "step": 17078 }, { "epoch": 21.86112, "grad_norm": 1.0645451545715332, "learning_rate": 1.5856342537014807e-05, "loss": 0.4946, "step": 17079 }, { "epoch": 21.8624, "grad_norm": 1.1454330682754517, "learning_rate": 1.585434173669468e-05, "loss": 0.4943, "step": 17080 }, { "epoch": 21.86368, "grad_norm": 1.156087040901184, "learning_rate": 1.585234093637455e-05, "loss": 0.493, "step": 17081 }, { "epoch": 21.86496, "grad_norm": 1.093726634979248, "learning_rate": 1.5850340136054422e-05, "loss": 0.4829, "step": 17082 }, { "epoch": 21.86624, "grad_norm": 1.1023703813552856, "learning_rate": 1.5848339335734294e-05, "loss": 0.5139, "step": 17083 }, { "epoch": 21.86752, "grad_norm": 1.0602704286575317, "learning_rate": 1.5846338535414166e-05, "loss": 0.4675, "step": 17084 }, { "epoch": 21.8688, "grad_norm": 1.1112686395645142, "learning_rate": 1.5844337735094038e-05, "loss": 0.4874, "step": 17085 }, { "epoch": 21.87008, "grad_norm": 1.07467520236969, "learning_rate": 1.584233693477391e-05, "loss": 0.4669, "step": 17086 }, { "epoch": 21.87136, "grad_norm": 1.1608061790466309, "learning_rate": 1.584033613445378e-05, "loss": 0.5248, "step": 17087 }, { "epoch": 21.87264, "grad_norm": 1.1059867143630981, "learning_rate": 1.5838335334133657e-05, "loss": 0.5052, "step": 17088 }, { "epoch": 21.87392, "grad_norm": 1.120044231414795, "learning_rate": 1.5836334533813525e-05, "loss": 0.4999, "step": 17089 }, { "epoch": 21.8752, "grad_norm": 1.1224958896636963, "learning_rate": 1.5834333733493397e-05, "loss": 0.5023, "step": 17090 }, { "epoch": 21.87648, "grad_norm": 1.1289787292480469, "learning_rate": 1.583233293317327e-05, "loss": 0.5192, "step": 17091 }, { "epoch": 21.87776, "grad_norm": 1.0791914463043213, "learning_rate": 1.5830332132853144e-05, "loss": 0.4934, "step": 17092 }, { "epoch": 21.87904, "grad_norm": 1.1275081634521484, "learning_rate": 1.5828331332533013e-05, "loss": 0.5088, "step": 17093 }, { "epoch": 21.88032, "grad_norm": 1.1475367546081543, "learning_rate": 1.5826330532212885e-05, "loss": 0.5101, "step": 17094 }, { "epoch": 21.8816, "grad_norm": 1.1276719570159912, "learning_rate": 1.582432973189276e-05, "loss": 0.5062, "step": 17095 }, { "epoch": 21.88288, "grad_norm": 1.0605647563934326, "learning_rate": 1.582232893157263e-05, "loss": 0.465, "step": 17096 }, { "epoch": 21.88416, "grad_norm": 1.1123402118682861, "learning_rate": 1.58203281312525e-05, "loss": 0.5084, "step": 17097 }, { "epoch": 21.88544, "grad_norm": 1.1372113227844238, "learning_rate": 1.5818327330932372e-05, "loss": 0.5107, "step": 17098 }, { "epoch": 21.88672, "grad_norm": 1.104132890701294, "learning_rate": 1.5816326530612247e-05, "loss": 0.4793, "step": 17099 }, { "epoch": 21.888, "grad_norm": 1.0645356178283691, "learning_rate": 1.581432573029212e-05, "loss": 0.4723, "step": 17100 }, { "epoch": 21.88928, "grad_norm": 1.1151045560836792, "learning_rate": 1.5812324929971988e-05, "loss": 0.4642, "step": 17101 }, { "epoch": 21.89056, "grad_norm": 1.1927440166473389, "learning_rate": 1.5810324129651863e-05, "loss": 0.5306, "step": 17102 }, { "epoch": 21.89184, "grad_norm": 1.148725986480713, "learning_rate": 1.5808323329331735e-05, "loss": 0.5302, "step": 17103 }, { "epoch": 21.89312, "grad_norm": 1.1260452270507812, "learning_rate": 1.5806322529011607e-05, "loss": 0.5168, "step": 17104 }, { "epoch": 21.8944, "grad_norm": 1.1221392154693604, "learning_rate": 1.5804321728691475e-05, "loss": 0.5141, "step": 17105 }, { "epoch": 21.89568, "grad_norm": 1.172025442123413, "learning_rate": 1.580232092837135e-05, "loss": 0.5186, "step": 17106 }, { "epoch": 21.89696, "grad_norm": 1.1051148176193237, "learning_rate": 1.5800320128051222e-05, "loss": 0.4797, "step": 17107 }, { "epoch": 21.89824, "grad_norm": 1.0419796705245972, "learning_rate": 1.5798319327731094e-05, "loss": 0.4483, "step": 17108 }, { "epoch": 21.89952, "grad_norm": 1.0614566802978516, "learning_rate": 1.5796318527410966e-05, "loss": 0.4695, "step": 17109 }, { "epoch": 21.9008, "grad_norm": 1.111011266708374, "learning_rate": 1.5794317727090838e-05, "loss": 0.4986, "step": 17110 }, { "epoch": 21.90208, "grad_norm": 1.1020454168319702, "learning_rate": 1.579231692677071e-05, "loss": 0.5104, "step": 17111 }, { "epoch": 21.90336, "grad_norm": 1.1246579885482788, "learning_rate": 1.579031612645058e-05, "loss": 0.5063, "step": 17112 }, { "epoch": 21.90464, "grad_norm": 1.0644440650939941, "learning_rate": 1.5788315326130453e-05, "loss": 0.4876, "step": 17113 }, { "epoch": 21.90592, "grad_norm": 1.0820109844207764, "learning_rate": 1.5786314525810325e-05, "loss": 0.5087, "step": 17114 }, { "epoch": 21.9072, "grad_norm": 1.0998029708862305, "learning_rate": 1.5784313725490197e-05, "loss": 0.4956, "step": 17115 }, { "epoch": 21.90848, "grad_norm": 1.1597471237182617, "learning_rate": 1.578231292517007e-05, "loss": 0.5432, "step": 17116 }, { "epoch": 21.90976, "grad_norm": 1.1102598905563354, "learning_rate": 1.578031212484994e-05, "loss": 0.528, "step": 17117 }, { "epoch": 21.91104, "grad_norm": 1.1272238492965698, "learning_rate": 1.5778311324529813e-05, "loss": 0.4563, "step": 17118 }, { "epoch": 21.91232, "grad_norm": 1.0742859840393066, "learning_rate": 1.5776310524209684e-05, "loss": 0.4609, "step": 17119 }, { "epoch": 21.9136, "grad_norm": 1.1245981454849243, "learning_rate": 1.5774309723889556e-05, "loss": 0.5284, "step": 17120 }, { "epoch": 21.91488, "grad_norm": 1.1140938997268677, "learning_rate": 1.5772308923569428e-05, "loss": 0.4731, "step": 17121 }, { "epoch": 21.91616, "grad_norm": 1.1234639883041382, "learning_rate": 1.57703081232493e-05, "loss": 0.4937, "step": 17122 }, { "epoch": 21.91744, "grad_norm": 1.1589488983154297, "learning_rate": 1.5768307322929172e-05, "loss": 0.5186, "step": 17123 }, { "epoch": 21.91872, "grad_norm": 1.1331297159194946, "learning_rate": 1.5766306522609044e-05, "loss": 0.4893, "step": 17124 }, { "epoch": 21.92, "grad_norm": 1.1356956958770752, "learning_rate": 1.5764305722288916e-05, "loss": 0.4674, "step": 17125 }, { "epoch": 21.92128, "grad_norm": 1.0896925926208496, "learning_rate": 1.5762304921968787e-05, "loss": 0.462, "step": 17126 }, { "epoch": 21.92256, "grad_norm": 1.1289901733398438, "learning_rate": 1.5760304121648663e-05, "loss": 0.5332, "step": 17127 }, { "epoch": 21.92384, "grad_norm": 1.0392855405807495, "learning_rate": 1.575830332132853e-05, "loss": 0.4433, "step": 17128 }, { "epoch": 21.92512, "grad_norm": 1.1507699489593506, "learning_rate": 1.5756302521008403e-05, "loss": 0.5408, "step": 17129 }, { "epoch": 21.9264, "grad_norm": 1.0881316661834717, "learning_rate": 1.5754301720688275e-05, "loss": 0.5205, "step": 17130 }, { "epoch": 21.92768, "grad_norm": 1.1345676183700562, "learning_rate": 1.575230092036815e-05, "loss": 0.4812, "step": 17131 }, { "epoch": 21.92896, "grad_norm": 1.1051517724990845, "learning_rate": 1.575030012004802e-05, "loss": 0.5129, "step": 17132 }, { "epoch": 21.93024, "grad_norm": 1.1058467626571655, "learning_rate": 1.574829931972789e-05, "loss": 0.4401, "step": 17133 }, { "epoch": 21.93152, "grad_norm": 1.1275383234024048, "learning_rate": 1.5746298519407766e-05, "loss": 0.5036, "step": 17134 }, { "epoch": 21.9328, "grad_norm": 1.1110889911651611, "learning_rate": 1.5744297719087637e-05, "loss": 0.4844, "step": 17135 }, { "epoch": 21.93408, "grad_norm": 1.1405858993530273, "learning_rate": 1.5742296918767506e-05, "loss": 0.5322, "step": 17136 }, { "epoch": 21.93536, "grad_norm": 1.1357598304748535, "learning_rate": 1.5740296118447378e-05, "loss": 0.4834, "step": 17137 }, { "epoch": 21.93664, "grad_norm": 1.1561405658721924, "learning_rate": 1.5738295318127253e-05, "loss": 0.5107, "step": 17138 }, { "epoch": 21.93792, "grad_norm": 1.1202070713043213, "learning_rate": 1.5736294517807125e-05, "loss": 0.5176, "step": 17139 }, { "epoch": 21.9392, "grad_norm": 1.141701579093933, "learning_rate": 1.5734293717486993e-05, "loss": 0.5304, "step": 17140 }, { "epoch": 21.94048, "grad_norm": 1.0925021171569824, "learning_rate": 1.573229291716687e-05, "loss": 0.4811, "step": 17141 }, { "epoch": 21.94176, "grad_norm": 1.147238850593567, "learning_rate": 1.573029211684674e-05, "loss": 0.5093, "step": 17142 }, { "epoch": 21.94304, "grad_norm": 1.1315157413482666, "learning_rate": 1.5728291316526612e-05, "loss": 0.4744, "step": 17143 }, { "epoch": 21.94432, "grad_norm": 1.1635738611221313, "learning_rate": 1.572629051620648e-05, "loss": 0.5085, "step": 17144 }, { "epoch": 21.9456, "grad_norm": 1.1979985237121582, "learning_rate": 1.5724289715886356e-05, "loss": 0.5047, "step": 17145 }, { "epoch": 21.94688, "grad_norm": 1.1460307836532593, "learning_rate": 1.5722288915566228e-05, "loss": 0.487, "step": 17146 }, { "epoch": 21.94816, "grad_norm": 1.087184190750122, "learning_rate": 1.57202881152461e-05, "loss": 0.524, "step": 17147 }, { "epoch": 21.94944, "grad_norm": 1.1512465476989746, "learning_rate": 1.571828731492597e-05, "loss": 0.4737, "step": 17148 }, { "epoch": 21.95072, "grad_norm": 1.1650768518447876, "learning_rate": 1.5716286514605843e-05, "loss": 0.5084, "step": 17149 }, { "epoch": 21.951999999999998, "grad_norm": 1.0953246355056763, "learning_rate": 1.5714285714285715e-05, "loss": 0.5255, "step": 17150 }, { "epoch": 21.95328, "grad_norm": 1.0524414777755737, "learning_rate": 1.5712284913965587e-05, "loss": 0.485, "step": 17151 }, { "epoch": 21.95456, "grad_norm": 1.0316898822784424, "learning_rate": 1.571028411364546e-05, "loss": 0.4774, "step": 17152 }, { "epoch": 21.95584, "grad_norm": 1.0071184635162354, "learning_rate": 1.570828331332533e-05, "loss": 0.4582, "step": 17153 }, { "epoch": 21.95712, "grad_norm": 1.1721525192260742, "learning_rate": 1.5706282513005203e-05, "loss": 0.5407, "step": 17154 }, { "epoch": 21.9584, "grad_norm": 1.0725557804107666, "learning_rate": 1.5704281712685075e-05, "loss": 0.4728, "step": 17155 }, { "epoch": 21.95968, "grad_norm": 1.0535809993743896, "learning_rate": 1.5702280912364946e-05, "loss": 0.455, "step": 17156 }, { "epoch": 21.96096, "grad_norm": 1.1127283573150635, "learning_rate": 1.570028011204482e-05, "loss": 0.5051, "step": 17157 }, { "epoch": 21.96224, "grad_norm": 1.1096875667572021, "learning_rate": 1.569827931172469e-05, "loss": 0.4694, "step": 17158 }, { "epoch": 21.96352, "grad_norm": 1.105588436126709, "learning_rate": 1.5696278511404562e-05, "loss": 0.4654, "step": 17159 }, { "epoch": 21.9648, "grad_norm": 1.1018520593643188, "learning_rate": 1.5694277711084434e-05, "loss": 0.4964, "step": 17160 }, { "epoch": 21.96608, "grad_norm": 1.1417279243469238, "learning_rate": 1.5692276910764306e-05, "loss": 0.5166, "step": 17161 }, { "epoch": 21.96736, "grad_norm": 1.0698052644729614, "learning_rate": 1.569027611044418e-05, "loss": 0.5072, "step": 17162 }, { "epoch": 21.96864, "grad_norm": 1.1088814735412598, "learning_rate": 1.568827531012405e-05, "loss": 0.5034, "step": 17163 }, { "epoch": 21.96992, "grad_norm": 1.0650029182434082, "learning_rate": 1.568627450980392e-05, "loss": 0.4262, "step": 17164 }, { "epoch": 21.9712, "grad_norm": 1.0559251308441162, "learning_rate": 1.5684273709483793e-05, "loss": 0.4375, "step": 17165 }, { "epoch": 21.97248, "grad_norm": 1.107661485671997, "learning_rate": 1.568227290916367e-05, "loss": 0.4978, "step": 17166 }, { "epoch": 21.97376, "grad_norm": 1.109234094619751, "learning_rate": 1.5680272108843537e-05, "loss": 0.5112, "step": 17167 }, { "epoch": 21.97504, "grad_norm": 1.1099008321762085, "learning_rate": 1.567827130852341e-05, "loss": 0.4588, "step": 17168 }, { "epoch": 21.97632, "grad_norm": 1.1135979890823364, "learning_rate": 1.567627050820328e-05, "loss": 0.4801, "step": 17169 }, { "epoch": 21.9776, "grad_norm": 1.1482584476470947, "learning_rate": 1.5674269707883156e-05, "loss": 0.4637, "step": 17170 }, { "epoch": 21.97888, "grad_norm": 1.0247658491134644, "learning_rate": 1.5672268907563024e-05, "loss": 0.4485, "step": 17171 }, { "epoch": 21.98016, "grad_norm": 1.0352832078933716, "learning_rate": 1.5670268107242896e-05, "loss": 0.4452, "step": 17172 }, { "epoch": 21.98144, "grad_norm": 1.1640503406524658, "learning_rate": 1.566826730692277e-05, "loss": 0.5008, "step": 17173 }, { "epoch": 21.98272, "grad_norm": 1.086647629737854, "learning_rate": 1.5666266506602643e-05, "loss": 0.4714, "step": 17174 }, { "epoch": 21.984, "grad_norm": 1.1195262670516968, "learning_rate": 1.5664265706282512e-05, "loss": 0.5151, "step": 17175 }, { "epoch": 21.98528, "grad_norm": 1.1734910011291504, "learning_rate": 1.5662264905962384e-05, "loss": 0.5262, "step": 17176 }, { "epoch": 21.98656, "grad_norm": 1.1472947597503662, "learning_rate": 1.566026410564226e-05, "loss": 0.4849, "step": 17177 }, { "epoch": 21.98784, "grad_norm": 1.114270806312561, "learning_rate": 1.565826330532213e-05, "loss": 0.5082, "step": 17178 }, { "epoch": 21.98912, "grad_norm": 1.0618884563446045, "learning_rate": 1.5656262505002e-05, "loss": 0.4387, "step": 17179 }, { "epoch": 21.9904, "grad_norm": 1.098699927330017, "learning_rate": 1.5654261704681874e-05, "loss": 0.5168, "step": 17180 }, { "epoch": 21.99168, "grad_norm": 1.1803953647613525, "learning_rate": 1.5652260904361746e-05, "loss": 0.5424, "step": 17181 }, { "epoch": 21.99296, "grad_norm": 1.1238080263137817, "learning_rate": 1.5650260104041618e-05, "loss": 0.4951, "step": 17182 }, { "epoch": 21.99424, "grad_norm": 1.0754061937332153, "learning_rate": 1.5648259303721487e-05, "loss": 0.4725, "step": 17183 }, { "epoch": 21.99552, "grad_norm": 1.0654562711715698, "learning_rate": 1.5646258503401362e-05, "loss": 0.486, "step": 17184 }, { "epoch": 21.9968, "grad_norm": 1.1009260416030884, "learning_rate": 1.5644257703081234e-05, "loss": 0.5057, "step": 17185 }, { "epoch": 21.99808, "grad_norm": 1.091380000114441, "learning_rate": 1.5642256902761106e-05, "loss": 0.4666, "step": 17186 }, { "epoch": 21.99936, "grad_norm": 1.0999908447265625, "learning_rate": 1.5640256102440977e-05, "loss": 0.4635, "step": 17187 }, { "epoch": 22.00064, "grad_norm": Infinity, "learning_rate": 1.5640256102440977e-05, "loss": 0.957, "step": 17188 }, { "epoch": 22.00192, "grad_norm": 1.0367313623428345, "learning_rate": 1.563825530212085e-05, "loss": 0.4574, "step": 17189 }, { "epoch": 22.0032, "grad_norm": 1.0084816217422485, "learning_rate": 1.563625450180072e-05, "loss": 0.4657, "step": 17190 }, { "epoch": 22.00448, "grad_norm": 1.096815824508667, "learning_rate": 1.5634253701480593e-05, "loss": 0.4837, "step": 17191 }, { "epoch": 22.00576, "grad_norm": 1.0867575407028198, "learning_rate": 1.5632252901160465e-05, "loss": 0.4679, "step": 17192 }, { "epoch": 22.00704, "grad_norm": 1.0765024423599243, "learning_rate": 1.5630252100840337e-05, "loss": 0.4796, "step": 17193 }, { "epoch": 22.00832, "grad_norm": 1.0676796436309814, "learning_rate": 1.562825130052021e-05, "loss": 0.4513, "step": 17194 }, { "epoch": 22.0096, "grad_norm": 1.0853677988052368, "learning_rate": 1.562625050020008e-05, "loss": 0.4669, "step": 17195 }, { "epoch": 22.01088, "grad_norm": 1.1179803609848022, "learning_rate": 1.5624249699879952e-05, "loss": 0.4737, "step": 17196 }, { "epoch": 22.01216, "grad_norm": 1.1370209455490112, "learning_rate": 1.5622248899559824e-05, "loss": 0.4873, "step": 17197 }, { "epoch": 22.01344, "grad_norm": 1.0936213731765747, "learning_rate": 1.5620248099239696e-05, "loss": 0.4791, "step": 17198 }, { "epoch": 22.01472, "grad_norm": 1.1142305135726929, "learning_rate": 1.5618247298919568e-05, "loss": 0.4956, "step": 17199 }, { "epoch": 22.016, "grad_norm": 1.0316848754882812, "learning_rate": 1.561624649859944e-05, "loss": 0.3966, "step": 17200 }, { "epoch": 22.01728, "grad_norm": 1.1402348279953003, "learning_rate": 1.561424569827931e-05, "loss": 0.4791, "step": 17201 }, { "epoch": 22.01856, "grad_norm": 1.1665503978729248, "learning_rate": 1.5612244897959187e-05, "loss": 0.5207, "step": 17202 }, { "epoch": 22.01984, "grad_norm": 1.1318738460540771, "learning_rate": 1.5610244097639055e-05, "loss": 0.4978, "step": 17203 }, { "epoch": 22.02112, "grad_norm": 1.0995848178863525, "learning_rate": 1.5608243297318927e-05, "loss": 0.4897, "step": 17204 }, { "epoch": 22.0224, "grad_norm": 1.0910522937774658, "learning_rate": 1.56062424969988e-05, "loss": 0.4392, "step": 17205 }, { "epoch": 22.02368, "grad_norm": 1.0444594621658325, "learning_rate": 1.5604241696678674e-05, "loss": 0.428, "step": 17206 }, { "epoch": 22.02496, "grad_norm": 1.1223368644714355, "learning_rate": 1.5602240896358543e-05, "loss": 0.4473, "step": 17207 }, { "epoch": 22.02624, "grad_norm": 1.035774827003479, "learning_rate": 1.5600240096038415e-05, "loss": 0.4201, "step": 17208 }, { "epoch": 22.02752, "grad_norm": 1.155383586883545, "learning_rate": 1.559823929571829e-05, "loss": 0.4817, "step": 17209 }, { "epoch": 22.0288, "grad_norm": 1.180153727531433, "learning_rate": 1.559623849539816e-05, "loss": 0.5142, "step": 17210 }, { "epoch": 22.03008, "grad_norm": 1.0396239757537842, "learning_rate": 1.559423769507803e-05, "loss": 0.4348, "step": 17211 }, { "epoch": 22.03136, "grad_norm": 1.1152554750442505, "learning_rate": 1.5592236894757902e-05, "loss": 0.5252, "step": 17212 }, { "epoch": 22.03264, "grad_norm": 1.0972347259521484, "learning_rate": 1.5590236094437777e-05, "loss": 0.4627, "step": 17213 }, { "epoch": 22.03392, "grad_norm": 1.1150223016738892, "learning_rate": 1.558823529411765e-05, "loss": 0.4778, "step": 17214 }, { "epoch": 22.0352, "grad_norm": 1.1251050233840942, "learning_rate": 1.5586234493797518e-05, "loss": 0.5056, "step": 17215 }, { "epoch": 22.03648, "grad_norm": 1.1292921304702759, "learning_rate": 1.5584233693477393e-05, "loss": 0.4896, "step": 17216 }, { "epoch": 22.03776, "grad_norm": 1.169345736503601, "learning_rate": 1.5582232893157265e-05, "loss": 0.4869, "step": 17217 }, { "epoch": 22.03904, "grad_norm": 1.10861074924469, "learning_rate": 1.5580232092837137e-05, "loss": 0.4686, "step": 17218 }, { "epoch": 22.04032, "grad_norm": 1.080115556716919, "learning_rate": 1.5578231292517005e-05, "loss": 0.4709, "step": 17219 }, { "epoch": 22.0416, "grad_norm": 1.0956518650054932, "learning_rate": 1.557623049219688e-05, "loss": 0.4785, "step": 17220 }, { "epoch": 22.04288, "grad_norm": 1.1570404767990112, "learning_rate": 1.5574229691876752e-05, "loss": 0.539, "step": 17221 }, { "epoch": 22.04416, "grad_norm": 1.1641499996185303, "learning_rate": 1.5572228891556624e-05, "loss": 0.5051, "step": 17222 }, { "epoch": 22.04544, "grad_norm": 1.096850872039795, "learning_rate": 1.5570228091236496e-05, "loss": 0.4491, "step": 17223 }, { "epoch": 22.04672, "grad_norm": 1.103550910949707, "learning_rate": 1.5568227290916368e-05, "loss": 0.5001, "step": 17224 }, { "epoch": 22.048, "grad_norm": 1.1057217121124268, "learning_rate": 1.556622649059624e-05, "loss": 0.4451, "step": 17225 }, { "epoch": 22.04928, "grad_norm": 1.1657497882843018, "learning_rate": 1.556422569027611e-05, "loss": 0.5181, "step": 17226 }, { "epoch": 22.05056, "grad_norm": 1.1382044553756714, "learning_rate": 1.5562224889955983e-05, "loss": 0.4842, "step": 17227 }, { "epoch": 22.05184, "grad_norm": 1.1846833229064941, "learning_rate": 1.5560224089635855e-05, "loss": 0.5218, "step": 17228 }, { "epoch": 22.05312, "grad_norm": 1.137743592262268, "learning_rate": 1.5558223289315727e-05, "loss": 0.4667, "step": 17229 }, { "epoch": 22.0544, "grad_norm": 1.1043981313705444, "learning_rate": 1.55562224889956e-05, "loss": 0.4647, "step": 17230 }, { "epoch": 22.05568, "grad_norm": 1.121191143989563, "learning_rate": 1.555422168867547e-05, "loss": 0.4709, "step": 17231 }, { "epoch": 22.05696, "grad_norm": 1.0988284349441528, "learning_rate": 1.5552220888355343e-05, "loss": 0.4861, "step": 17232 }, { "epoch": 22.05824, "grad_norm": 1.153282880783081, "learning_rate": 1.5550220088035214e-05, "loss": 0.4873, "step": 17233 }, { "epoch": 22.05952, "grad_norm": 1.1652368307113647, "learning_rate": 1.5548219287715086e-05, "loss": 0.4896, "step": 17234 }, { "epoch": 22.0608, "grad_norm": 1.1135512590408325, "learning_rate": 1.5546218487394958e-05, "loss": 0.4406, "step": 17235 }, { "epoch": 22.06208, "grad_norm": 1.166584849357605, "learning_rate": 1.554421768707483e-05, "loss": 0.507, "step": 17236 }, { "epoch": 22.06336, "grad_norm": 1.1483736038208008, "learning_rate": 1.5542216886754702e-05, "loss": 0.5168, "step": 17237 }, { "epoch": 22.06464, "grad_norm": 1.1366267204284668, "learning_rate": 1.5540216086434574e-05, "loss": 0.5365, "step": 17238 }, { "epoch": 22.06592, "grad_norm": 1.1409132480621338, "learning_rate": 1.5538215286114446e-05, "loss": 0.4238, "step": 17239 }, { "epoch": 22.0672, "grad_norm": 1.1683720350265503, "learning_rate": 1.5536214485794317e-05, "loss": 0.5112, "step": 17240 }, { "epoch": 22.06848, "grad_norm": 1.0807946920394897, "learning_rate": 1.5534213685474193e-05, "loss": 0.4384, "step": 17241 }, { "epoch": 22.06976, "grad_norm": 1.2099087238311768, "learning_rate": 1.553221288515406e-05, "loss": 0.523, "step": 17242 }, { "epoch": 22.07104, "grad_norm": 1.1151604652404785, "learning_rate": 1.5530212084833933e-05, "loss": 0.4653, "step": 17243 }, { "epoch": 22.07232, "grad_norm": 1.0875353813171387, "learning_rate": 1.5528211284513805e-05, "loss": 0.4273, "step": 17244 }, { "epoch": 22.0736, "grad_norm": 1.2180532217025757, "learning_rate": 1.552621048419368e-05, "loss": 0.5473, "step": 17245 }, { "epoch": 22.07488, "grad_norm": 1.1339551210403442, "learning_rate": 1.552420968387355e-05, "loss": 0.4595, "step": 17246 }, { "epoch": 22.07616, "grad_norm": 1.1073286533355713, "learning_rate": 1.552220888355342e-05, "loss": 0.4464, "step": 17247 }, { "epoch": 22.07744, "grad_norm": 1.136877417564392, "learning_rate": 1.5520208083233296e-05, "loss": 0.494, "step": 17248 }, { "epoch": 22.07872, "grad_norm": 1.1355657577514648, "learning_rate": 1.5518207282913167e-05, "loss": 0.4883, "step": 17249 }, { "epoch": 22.08, "grad_norm": 1.1080483198165894, "learning_rate": 1.5516206482593036e-05, "loss": 0.4671, "step": 17250 }, { "epoch": 22.08128, "grad_norm": 1.1329385042190552, "learning_rate": 1.5514205682272908e-05, "loss": 0.536, "step": 17251 }, { "epoch": 22.08256, "grad_norm": 1.0859447717666626, "learning_rate": 1.5512204881952783e-05, "loss": 0.4559, "step": 17252 }, { "epoch": 22.08384, "grad_norm": 1.134740948677063, "learning_rate": 1.5510204081632655e-05, "loss": 0.4539, "step": 17253 }, { "epoch": 22.08512, "grad_norm": 1.1605035066604614, "learning_rate": 1.5508203281312523e-05, "loss": 0.4898, "step": 17254 }, { "epoch": 22.0864, "grad_norm": 1.0824528932571411, "learning_rate": 1.55062024809924e-05, "loss": 0.4827, "step": 17255 }, { "epoch": 22.08768, "grad_norm": 1.0842760801315308, "learning_rate": 1.550420168067227e-05, "loss": 0.4643, "step": 17256 }, { "epoch": 22.08896, "grad_norm": 1.152551531791687, "learning_rate": 1.5502200880352142e-05, "loss": 0.5292, "step": 17257 }, { "epoch": 22.09024, "grad_norm": 1.086955189704895, "learning_rate": 1.550020008003201e-05, "loss": 0.4563, "step": 17258 }, { "epoch": 22.09152, "grad_norm": 1.1958043575286865, "learning_rate": 1.5498199279711886e-05, "loss": 0.5174, "step": 17259 }, { "epoch": 22.0928, "grad_norm": 1.111706256866455, "learning_rate": 1.5496198479391758e-05, "loss": 0.4607, "step": 17260 }, { "epoch": 22.09408, "grad_norm": 1.1034555435180664, "learning_rate": 1.549419767907163e-05, "loss": 0.4702, "step": 17261 }, { "epoch": 22.09536, "grad_norm": 1.1045020818710327, "learning_rate": 1.54921968787515e-05, "loss": 0.4442, "step": 17262 }, { "epoch": 22.09664, "grad_norm": 1.1338021755218506, "learning_rate": 1.5490196078431373e-05, "loss": 0.4941, "step": 17263 }, { "epoch": 22.09792, "grad_norm": 1.1852363348007202, "learning_rate": 1.5488195278111245e-05, "loss": 0.5476, "step": 17264 }, { "epoch": 22.0992, "grad_norm": 1.109511137008667, "learning_rate": 1.5486194477791117e-05, "loss": 0.4669, "step": 17265 }, { "epoch": 22.10048, "grad_norm": 1.1228883266448975, "learning_rate": 1.548419367747099e-05, "loss": 0.4729, "step": 17266 }, { "epoch": 22.10176, "grad_norm": 1.1240496635437012, "learning_rate": 1.548219287715086e-05, "loss": 0.5041, "step": 17267 }, { "epoch": 22.10304, "grad_norm": 1.1427851915359497, "learning_rate": 1.5480192076830733e-05, "loss": 0.5104, "step": 17268 }, { "epoch": 22.10432, "grad_norm": 1.1009935140609741, "learning_rate": 1.5478191276510608e-05, "loss": 0.4835, "step": 17269 }, { "epoch": 22.1056, "grad_norm": 1.2154606580734253, "learning_rate": 1.5476190476190476e-05, "loss": 0.5254, "step": 17270 }, { "epoch": 22.10688, "grad_norm": 1.1332104206085205, "learning_rate": 1.5474189675870348e-05, "loss": 0.475, "step": 17271 }, { "epoch": 22.10816, "grad_norm": 1.156253457069397, "learning_rate": 1.547218887555022e-05, "loss": 0.4589, "step": 17272 }, { "epoch": 22.10944, "grad_norm": 1.0867305994033813, "learning_rate": 1.5470188075230095e-05, "loss": 0.4505, "step": 17273 }, { "epoch": 22.11072, "grad_norm": 1.1919612884521484, "learning_rate": 1.5468187274909964e-05, "loss": 0.4891, "step": 17274 }, { "epoch": 22.112, "grad_norm": 1.1696399450302124, "learning_rate": 1.5466186474589836e-05, "loss": 0.5135, "step": 17275 }, { "epoch": 22.11328, "grad_norm": 1.1965242624282837, "learning_rate": 1.546418567426971e-05, "loss": 0.5167, "step": 17276 }, { "epoch": 22.11456, "grad_norm": 1.1492869853973389, "learning_rate": 1.5462184873949583e-05, "loss": 0.5027, "step": 17277 }, { "epoch": 22.11584, "grad_norm": 1.1493687629699707, "learning_rate": 1.546018407362945e-05, "loss": 0.4764, "step": 17278 }, { "epoch": 22.11712, "grad_norm": 1.1883156299591064, "learning_rate": 1.5458183273309323e-05, "loss": 0.4979, "step": 17279 }, { "epoch": 22.1184, "grad_norm": 1.198495864868164, "learning_rate": 1.54561824729892e-05, "loss": 0.5273, "step": 17280 }, { "epoch": 22.11968, "grad_norm": 1.1346237659454346, "learning_rate": 1.545418167266907e-05, "loss": 0.4574, "step": 17281 }, { "epoch": 22.12096, "grad_norm": 1.1258900165557861, "learning_rate": 1.545218087234894e-05, "loss": 0.4931, "step": 17282 }, { "epoch": 22.12224, "grad_norm": 1.1091734170913696, "learning_rate": 1.545018007202881e-05, "loss": 0.4793, "step": 17283 }, { "epoch": 22.12352, "grad_norm": 1.0940433740615845, "learning_rate": 1.5448179271708686e-05, "loss": 0.4683, "step": 17284 }, { "epoch": 22.1248, "grad_norm": 1.120511531829834, "learning_rate": 1.5446178471388558e-05, "loss": 0.4579, "step": 17285 }, { "epoch": 22.12608, "grad_norm": 1.1699022054672241, "learning_rate": 1.5444177671068426e-05, "loss": 0.4941, "step": 17286 }, { "epoch": 22.12736, "grad_norm": 1.0578786134719849, "learning_rate": 1.54421768707483e-05, "loss": 0.4419, "step": 17287 }, { "epoch": 22.12864, "grad_norm": 1.104691982269287, "learning_rate": 1.5440176070428173e-05, "loss": 0.4954, "step": 17288 }, { "epoch": 22.12992, "grad_norm": 1.1554813385009766, "learning_rate": 1.5438175270108045e-05, "loss": 0.5006, "step": 17289 }, { "epoch": 22.1312, "grad_norm": 1.1532070636749268, "learning_rate": 1.5436174469787914e-05, "loss": 0.4952, "step": 17290 }, { "epoch": 22.13248, "grad_norm": 1.0986944437026978, "learning_rate": 1.543417366946779e-05, "loss": 0.4291, "step": 17291 }, { "epoch": 22.13376, "grad_norm": 1.1358639001846313, "learning_rate": 1.543217286914766e-05, "loss": 0.4807, "step": 17292 }, { "epoch": 22.13504, "grad_norm": 1.113168716430664, "learning_rate": 1.5430172068827533e-05, "loss": 0.4816, "step": 17293 }, { "epoch": 22.13632, "grad_norm": 1.1050689220428467, "learning_rate": 1.5428171268507404e-05, "loss": 0.4404, "step": 17294 }, { "epoch": 22.1376, "grad_norm": 1.091439962387085, "learning_rate": 1.5426170468187276e-05, "loss": 0.5036, "step": 17295 }, { "epoch": 22.13888, "grad_norm": 1.1166634559631348, "learning_rate": 1.5424169667867148e-05, "loss": 0.5347, "step": 17296 }, { "epoch": 22.14016, "grad_norm": 1.1514959335327148, "learning_rate": 1.542216886754702e-05, "loss": 0.494, "step": 17297 }, { "epoch": 22.14144, "grad_norm": 1.070124864578247, "learning_rate": 1.5420168067226892e-05, "loss": 0.4533, "step": 17298 }, { "epoch": 22.14272, "grad_norm": 1.0914841890335083, "learning_rate": 1.5418167266906764e-05, "loss": 0.509, "step": 17299 }, { "epoch": 22.144, "grad_norm": 1.1017416715621948, "learning_rate": 1.5416166466586636e-05, "loss": 0.4744, "step": 17300 }, { "epoch": 22.14528, "grad_norm": 1.1203486919403076, "learning_rate": 1.5414165666266507e-05, "loss": 0.4563, "step": 17301 }, { "epoch": 22.14656, "grad_norm": 1.1463284492492676, "learning_rate": 1.541216486594638e-05, "loss": 0.4892, "step": 17302 }, { "epoch": 22.14784, "grad_norm": 1.1561495065689087, "learning_rate": 1.541016406562625e-05, "loss": 0.4916, "step": 17303 }, { "epoch": 22.14912, "grad_norm": 1.1047677993774414, "learning_rate": 1.5408163265306123e-05, "loss": 0.4706, "step": 17304 }, { "epoch": 22.1504, "grad_norm": 1.1294938325881958, "learning_rate": 1.5406162464985995e-05, "loss": 0.4652, "step": 17305 }, { "epoch": 22.15168, "grad_norm": 1.166673183441162, "learning_rate": 1.5404161664665867e-05, "loss": 0.478, "step": 17306 }, { "epoch": 22.15296, "grad_norm": 1.1008706092834473, "learning_rate": 1.540216086434574e-05, "loss": 0.4525, "step": 17307 }, { "epoch": 22.15424, "grad_norm": 1.1509610414505005, "learning_rate": 1.5400160064025614e-05, "loss": 0.5268, "step": 17308 }, { "epoch": 22.15552, "grad_norm": 1.1664763689041138, "learning_rate": 1.5398159263705482e-05, "loss": 0.5145, "step": 17309 }, { "epoch": 22.1568, "grad_norm": 1.1705671548843384, "learning_rate": 1.5396158463385354e-05, "loss": 0.4866, "step": 17310 }, { "epoch": 22.158079999999998, "grad_norm": 1.0866199731826782, "learning_rate": 1.5394157663065226e-05, "loss": 0.4587, "step": 17311 }, { "epoch": 22.15936, "grad_norm": 1.0654592514038086, "learning_rate": 1.53921568627451e-05, "loss": 0.4489, "step": 17312 }, { "epoch": 22.16064, "grad_norm": 1.1411796808242798, "learning_rate": 1.539015606242497e-05, "loss": 0.5293, "step": 17313 }, { "epoch": 22.16192, "grad_norm": 1.1217213869094849, "learning_rate": 1.538815526210484e-05, "loss": 0.4792, "step": 17314 }, { "epoch": 22.1632, "grad_norm": 1.133726954460144, "learning_rate": 1.5386154461784717e-05, "loss": 0.4706, "step": 17315 }, { "epoch": 22.16448, "grad_norm": 1.0698765516281128, "learning_rate": 1.538415366146459e-05, "loss": 0.4544, "step": 17316 }, { "epoch": 22.16576, "grad_norm": 1.112635850906372, "learning_rate": 1.5382152861144457e-05, "loss": 0.4675, "step": 17317 }, { "epoch": 22.16704, "grad_norm": 1.0733355283737183, "learning_rate": 1.538015206082433e-05, "loss": 0.4737, "step": 17318 }, { "epoch": 22.16832, "grad_norm": 1.1422197818756104, "learning_rate": 1.5378151260504204e-05, "loss": 0.4691, "step": 17319 }, { "epoch": 22.1696, "grad_norm": 1.1713340282440186, "learning_rate": 1.5376150460184076e-05, "loss": 0.4922, "step": 17320 }, { "epoch": 22.17088, "grad_norm": 1.114890217781067, "learning_rate": 1.5374149659863945e-05, "loss": 0.4539, "step": 17321 }, { "epoch": 22.17216, "grad_norm": 1.1028982400894165, "learning_rate": 1.537214885954382e-05, "loss": 0.4329, "step": 17322 }, { "epoch": 22.17344, "grad_norm": 1.1502256393432617, "learning_rate": 1.537014805922369e-05, "loss": 0.486, "step": 17323 }, { "epoch": 22.17472, "grad_norm": 1.1716766357421875, "learning_rate": 1.5368147258903563e-05, "loss": 0.5172, "step": 17324 }, { "epoch": 22.176, "grad_norm": 1.1179568767547607, "learning_rate": 1.5366146458583432e-05, "loss": 0.481, "step": 17325 }, { "epoch": 22.17728, "grad_norm": 1.0973176956176758, "learning_rate": 1.5364145658263307e-05, "loss": 0.4588, "step": 17326 }, { "epoch": 22.17856, "grad_norm": 1.1206368207931519, "learning_rate": 1.536214485794318e-05, "loss": 0.4673, "step": 17327 }, { "epoch": 22.17984, "grad_norm": 1.128866195678711, "learning_rate": 1.536014405762305e-05, "loss": 0.4766, "step": 17328 }, { "epoch": 22.18112, "grad_norm": 1.0996205806732178, "learning_rate": 1.5358143257302923e-05, "loss": 0.4679, "step": 17329 }, { "epoch": 22.1824, "grad_norm": 1.087931752204895, "learning_rate": 1.5356142456982795e-05, "loss": 0.4724, "step": 17330 }, { "epoch": 22.18368, "grad_norm": 1.1762522459030151, "learning_rate": 1.5354141656662666e-05, "loss": 0.5389, "step": 17331 }, { "epoch": 22.18496, "grad_norm": 1.0654993057250977, "learning_rate": 1.535214085634254e-05, "loss": 0.4851, "step": 17332 }, { "epoch": 22.18624, "grad_norm": 1.0894544124603271, "learning_rate": 1.535014005602241e-05, "loss": 0.5082, "step": 17333 }, { "epoch": 22.18752, "grad_norm": 1.0549466609954834, "learning_rate": 1.5348139255702282e-05, "loss": 0.4361, "step": 17334 }, { "epoch": 22.1888, "grad_norm": 1.16023588180542, "learning_rate": 1.5346138455382154e-05, "loss": 0.5082, "step": 17335 }, { "epoch": 22.19008, "grad_norm": 1.1312403678894043, "learning_rate": 1.5344137655062026e-05, "loss": 0.4884, "step": 17336 }, { "epoch": 22.19136, "grad_norm": 1.1789931058883667, "learning_rate": 1.5342136854741898e-05, "loss": 0.4722, "step": 17337 }, { "epoch": 22.19264, "grad_norm": 1.1645036935806274, "learning_rate": 1.534013605442177e-05, "loss": 0.5172, "step": 17338 }, { "epoch": 22.19392, "grad_norm": 1.106508731842041, "learning_rate": 1.533813525410164e-05, "loss": 0.466, "step": 17339 }, { "epoch": 22.1952, "grad_norm": 1.1362861394882202, "learning_rate": 1.5336134453781513e-05, "loss": 0.4865, "step": 17340 }, { "epoch": 22.19648, "grad_norm": 1.10444974899292, "learning_rate": 1.5334133653461385e-05, "loss": 0.4501, "step": 17341 }, { "epoch": 22.19776, "grad_norm": 1.1265275478363037, "learning_rate": 1.5332132853141257e-05, "loss": 0.4673, "step": 17342 }, { "epoch": 22.19904, "grad_norm": 1.1572017669677734, "learning_rate": 1.533013205282113e-05, "loss": 0.4611, "step": 17343 }, { "epoch": 22.20032, "grad_norm": 1.2215920686721802, "learning_rate": 1.5328131252501e-05, "loss": 0.5035, "step": 17344 }, { "epoch": 22.2016, "grad_norm": 1.2130608558654785, "learning_rate": 1.5326130452180872e-05, "loss": 0.4942, "step": 17345 }, { "epoch": 22.20288, "grad_norm": 1.133594036102295, "learning_rate": 1.5324129651860744e-05, "loss": 0.4936, "step": 17346 }, { "epoch": 22.20416, "grad_norm": 1.1092051267623901, "learning_rate": 1.532212885154062e-05, "loss": 0.5127, "step": 17347 }, { "epoch": 22.20544, "grad_norm": 1.145732045173645, "learning_rate": 1.5320128051220488e-05, "loss": 0.4747, "step": 17348 }, { "epoch": 22.20672, "grad_norm": 1.0972932577133179, "learning_rate": 1.531812725090036e-05, "loss": 0.4997, "step": 17349 }, { "epoch": 22.208, "grad_norm": 1.0953388214111328, "learning_rate": 1.5316126450580232e-05, "loss": 0.4583, "step": 17350 }, { "epoch": 22.20928, "grad_norm": 1.086438775062561, "learning_rate": 1.5314125650260107e-05, "loss": 0.4578, "step": 17351 }, { "epoch": 22.21056, "grad_norm": 1.1954177618026733, "learning_rate": 1.5312124849939975e-05, "loss": 0.5342, "step": 17352 }, { "epoch": 22.21184, "grad_norm": 1.1370768547058105, "learning_rate": 1.5310124049619847e-05, "loss": 0.5075, "step": 17353 }, { "epoch": 22.21312, "grad_norm": 1.1684788465499878, "learning_rate": 1.5308123249299723e-05, "loss": 0.4789, "step": 17354 }, { "epoch": 22.2144, "grad_norm": 1.108345866203308, "learning_rate": 1.5306122448979594e-05, "loss": 0.4686, "step": 17355 }, { "epoch": 22.21568, "grad_norm": 1.0191106796264648, "learning_rate": 1.5304121648659463e-05, "loss": 0.4258, "step": 17356 }, { "epoch": 22.21696, "grad_norm": 1.1205805540084839, "learning_rate": 1.5302120848339335e-05, "loss": 0.4757, "step": 17357 }, { "epoch": 22.21824, "grad_norm": 1.1553397178649902, "learning_rate": 1.530012004801921e-05, "loss": 0.4878, "step": 17358 }, { "epoch": 22.21952, "grad_norm": 1.1630548238754272, "learning_rate": 1.5298119247699082e-05, "loss": 0.4906, "step": 17359 }, { "epoch": 22.2208, "grad_norm": 1.0727124214172363, "learning_rate": 1.529611844737895e-05, "loss": 0.4877, "step": 17360 }, { "epoch": 22.22208, "grad_norm": 1.0958082675933838, "learning_rate": 1.5294117647058826e-05, "loss": 0.4236, "step": 17361 }, { "epoch": 22.22336, "grad_norm": 1.1463395357131958, "learning_rate": 1.5292116846738697e-05, "loss": 0.4354, "step": 17362 }, { "epoch": 22.22464, "grad_norm": 1.125491976737976, "learning_rate": 1.529011604641857e-05, "loss": 0.4664, "step": 17363 }, { "epoch": 22.22592, "grad_norm": 1.1401169300079346, "learning_rate": 1.5288115246098438e-05, "loss": 0.4903, "step": 17364 }, { "epoch": 22.2272, "grad_norm": 1.1379910707473755, "learning_rate": 1.5286114445778313e-05, "loss": 0.4734, "step": 17365 }, { "epoch": 22.22848, "grad_norm": 1.0853389501571655, "learning_rate": 1.5284113645458185e-05, "loss": 0.4546, "step": 17366 }, { "epoch": 22.22976, "grad_norm": 1.176604151725769, "learning_rate": 1.5282112845138057e-05, "loss": 0.5138, "step": 17367 }, { "epoch": 22.23104, "grad_norm": 1.2261618375778198, "learning_rate": 1.528011204481793e-05, "loss": 0.5195, "step": 17368 }, { "epoch": 22.23232, "grad_norm": 1.1438370943069458, "learning_rate": 1.52781112444978e-05, "loss": 0.5111, "step": 17369 }, { "epoch": 22.2336, "grad_norm": 1.0962119102478027, "learning_rate": 1.5276110444177672e-05, "loss": 0.4471, "step": 17370 }, { "epoch": 22.23488, "grad_norm": 1.1545382738113403, "learning_rate": 1.5274109643857544e-05, "loss": 0.4741, "step": 17371 }, { "epoch": 22.23616, "grad_norm": 1.122843861579895, "learning_rate": 1.5272108843537416e-05, "loss": 0.4445, "step": 17372 }, { "epoch": 22.23744, "grad_norm": 1.114371418952942, "learning_rate": 1.5270108043217288e-05, "loss": 0.4685, "step": 17373 }, { "epoch": 22.23872, "grad_norm": 1.1114271879196167, "learning_rate": 1.526810724289716e-05, "loss": 0.5048, "step": 17374 }, { "epoch": 22.24, "grad_norm": 1.1175719499588013, "learning_rate": 1.526610644257703e-05, "loss": 0.4901, "step": 17375 }, { "epoch": 22.24128, "grad_norm": 1.1345455646514893, "learning_rate": 1.5264105642256903e-05, "loss": 0.4525, "step": 17376 }, { "epoch": 22.24256, "grad_norm": 1.138181447982788, "learning_rate": 1.5262104841936775e-05, "loss": 0.4898, "step": 17377 }, { "epoch": 22.24384, "grad_norm": 1.1441845893859863, "learning_rate": 1.5260104041616647e-05, "loss": 0.4646, "step": 17378 }, { "epoch": 22.24512, "grad_norm": 1.102541446685791, "learning_rate": 1.525810324129652e-05, "loss": 0.4854, "step": 17379 }, { "epoch": 22.2464, "grad_norm": 1.0756680965423584, "learning_rate": 1.5256102440976391e-05, "loss": 0.4716, "step": 17380 }, { "epoch": 22.24768, "grad_norm": 1.1094943284988403, "learning_rate": 1.5254101640656263e-05, "loss": 0.4975, "step": 17381 }, { "epoch": 22.24896, "grad_norm": 1.1272087097167969, "learning_rate": 1.5252100840336136e-05, "loss": 0.4882, "step": 17382 }, { "epoch": 22.25024, "grad_norm": 1.134270191192627, "learning_rate": 1.5250100040016008e-05, "loss": 0.4806, "step": 17383 }, { "epoch": 22.25152, "grad_norm": 1.1215940713882446, "learning_rate": 1.5248099239695878e-05, "loss": 0.4555, "step": 17384 }, { "epoch": 22.2528, "grad_norm": 1.0712652206420898, "learning_rate": 1.524609843937575e-05, "loss": 0.4371, "step": 17385 }, { "epoch": 22.25408, "grad_norm": 1.0350494384765625, "learning_rate": 1.5244097639055624e-05, "loss": 0.4513, "step": 17386 }, { "epoch": 22.25536, "grad_norm": 1.1774425506591797, "learning_rate": 1.5242096838735496e-05, "loss": 0.5087, "step": 17387 }, { "epoch": 22.25664, "grad_norm": 1.137933611869812, "learning_rate": 1.5240096038415366e-05, "loss": 0.5026, "step": 17388 }, { "epoch": 22.25792, "grad_norm": 1.1481642723083496, "learning_rate": 1.5238095238095241e-05, "loss": 0.474, "step": 17389 }, { "epoch": 22.2592, "grad_norm": 1.067002296447754, "learning_rate": 1.5236094437775111e-05, "loss": 0.4291, "step": 17390 }, { "epoch": 22.26048, "grad_norm": 1.1963670253753662, "learning_rate": 1.5234093637454983e-05, "loss": 0.5206, "step": 17391 }, { "epoch": 22.26176, "grad_norm": 1.1314671039581299, "learning_rate": 1.5232092837134853e-05, "loss": 0.4769, "step": 17392 }, { "epoch": 22.26304, "grad_norm": 1.0662577152252197, "learning_rate": 1.5230092036814728e-05, "loss": 0.4752, "step": 17393 }, { "epoch": 22.26432, "grad_norm": 1.1250959634780884, "learning_rate": 1.5228091236494599e-05, "loss": 0.5012, "step": 17394 }, { "epoch": 22.2656, "grad_norm": 1.1448113918304443, "learning_rate": 1.522609043617447e-05, "loss": 0.5028, "step": 17395 }, { "epoch": 22.26688, "grad_norm": 1.1659822463989258, "learning_rate": 1.522408963585434e-05, "loss": 0.4969, "step": 17396 }, { "epoch": 22.26816, "grad_norm": 1.1037496328353882, "learning_rate": 1.5222088835534216e-05, "loss": 0.501, "step": 17397 }, { "epoch": 22.26944, "grad_norm": 1.1459519863128662, "learning_rate": 1.5220088035214086e-05, "loss": 0.4836, "step": 17398 }, { "epoch": 22.27072, "grad_norm": 1.0809226036071777, "learning_rate": 1.5218087234893958e-05, "loss": 0.4574, "step": 17399 }, { "epoch": 22.272, "grad_norm": 1.1643927097320557, "learning_rate": 1.5216086434573831e-05, "loss": 0.5028, "step": 17400 }, { "epoch": 22.27328, "grad_norm": 1.097196102142334, "learning_rate": 1.5214085634253703e-05, "loss": 0.4355, "step": 17401 }, { "epoch": 22.27456, "grad_norm": 1.1606805324554443, "learning_rate": 1.5212084833933573e-05, "loss": 0.4937, "step": 17402 }, { "epoch": 22.27584, "grad_norm": 1.1250141859054565, "learning_rate": 1.5210084033613445e-05, "loss": 0.4682, "step": 17403 }, { "epoch": 22.27712, "grad_norm": 1.1419126987457275, "learning_rate": 1.5208083233293319e-05, "loss": 0.4867, "step": 17404 }, { "epoch": 22.2784, "grad_norm": 1.1214076280593872, "learning_rate": 1.520608243297319e-05, "loss": 0.4733, "step": 17405 }, { "epoch": 22.27968, "grad_norm": 1.1369352340698242, "learning_rate": 1.520408163265306e-05, "loss": 0.4595, "step": 17406 }, { "epoch": 22.28096, "grad_norm": 1.0881363153457642, "learning_rate": 1.5202080832332934e-05, "loss": 0.479, "step": 17407 }, { "epoch": 22.28224, "grad_norm": 1.1420772075653076, "learning_rate": 1.5200080032012806e-05, "loss": 0.4535, "step": 17408 }, { "epoch": 22.28352, "grad_norm": 1.1348367929458618, "learning_rate": 1.5198079231692678e-05, "loss": 0.4644, "step": 17409 }, { "epoch": 22.2848, "grad_norm": 1.0882881879806519, "learning_rate": 1.5196078431372548e-05, "loss": 0.4883, "step": 17410 }, { "epoch": 22.28608, "grad_norm": 1.1225090026855469, "learning_rate": 1.5194077631052422e-05, "loss": 0.4732, "step": 17411 }, { "epoch": 22.28736, "grad_norm": 1.195477843284607, "learning_rate": 1.5192076830732294e-05, "loss": 0.5217, "step": 17412 }, { "epoch": 22.28864, "grad_norm": 1.1594408750534058, "learning_rate": 1.5190076030412166e-05, "loss": 0.5314, "step": 17413 }, { "epoch": 22.28992, "grad_norm": 1.1369751691818237, "learning_rate": 1.5188075230092039e-05, "loss": 0.4771, "step": 17414 }, { "epoch": 22.2912, "grad_norm": 1.1328139305114746, "learning_rate": 1.518607442977191e-05, "loss": 0.5051, "step": 17415 }, { "epoch": 22.29248, "grad_norm": 1.1395426988601685, "learning_rate": 1.5184073629451781e-05, "loss": 0.4516, "step": 17416 }, { "epoch": 22.29376, "grad_norm": 1.1057137250900269, "learning_rate": 1.5182072829131653e-05, "loss": 0.5055, "step": 17417 }, { "epoch": 22.29504, "grad_norm": 1.0750411748886108, "learning_rate": 1.5180072028811526e-05, "loss": 0.4643, "step": 17418 }, { "epoch": 22.29632, "grad_norm": 1.134002685546875, "learning_rate": 1.5178071228491397e-05, "loss": 0.4959, "step": 17419 }, { "epoch": 22.2976, "grad_norm": 1.1353780031204224, "learning_rate": 1.5176070428171269e-05, "loss": 0.4992, "step": 17420 }, { "epoch": 22.29888, "grad_norm": 1.131705641746521, "learning_rate": 1.5174069627851142e-05, "loss": 0.4595, "step": 17421 }, { "epoch": 22.300159999999998, "grad_norm": 1.1513729095458984, "learning_rate": 1.5172068827531014e-05, "loss": 0.4984, "step": 17422 }, { "epoch": 22.30144, "grad_norm": 1.126017689704895, "learning_rate": 1.5170068027210884e-05, "loss": 0.4743, "step": 17423 }, { "epoch": 22.30272, "grad_norm": 1.056380271911621, "learning_rate": 1.5168067226890756e-05, "loss": 0.4702, "step": 17424 }, { "epoch": 22.304, "grad_norm": 1.079280972480774, "learning_rate": 1.516606642657063e-05, "loss": 0.4556, "step": 17425 }, { "epoch": 22.30528, "grad_norm": 1.1339120864868164, "learning_rate": 1.5164065626250501e-05, "loss": 0.4703, "step": 17426 }, { "epoch": 22.30656, "grad_norm": 1.1718413829803467, "learning_rate": 1.5162064825930372e-05, "loss": 0.4925, "step": 17427 }, { "epoch": 22.30784, "grad_norm": 1.1251111030578613, "learning_rate": 1.5160064025610247e-05, "loss": 0.4401, "step": 17428 }, { "epoch": 22.30912, "grad_norm": 1.1180959939956665, "learning_rate": 1.5158063225290117e-05, "loss": 0.5187, "step": 17429 }, { "epoch": 22.3104, "grad_norm": 1.1028566360473633, "learning_rate": 1.5156062424969989e-05, "loss": 0.4702, "step": 17430 }, { "epoch": 22.31168, "grad_norm": 1.1383028030395508, "learning_rate": 1.5154061624649859e-05, "loss": 0.4931, "step": 17431 }, { "epoch": 22.31296, "grad_norm": 1.0866107940673828, "learning_rate": 1.5152060824329734e-05, "loss": 0.5136, "step": 17432 }, { "epoch": 22.31424, "grad_norm": 1.1053640842437744, "learning_rate": 1.5150060024009604e-05, "loss": 0.5046, "step": 17433 }, { "epoch": 22.31552, "grad_norm": 1.0949057340621948, "learning_rate": 1.5148059223689476e-05, "loss": 0.524, "step": 17434 }, { "epoch": 22.3168, "grad_norm": 1.101889729499817, "learning_rate": 1.514605842336935e-05, "loss": 0.4857, "step": 17435 }, { "epoch": 22.31808, "grad_norm": 1.0940123796463013, "learning_rate": 1.5144057623049222e-05, "loss": 0.4603, "step": 17436 }, { "epoch": 22.31936, "grad_norm": 1.0854915380477905, "learning_rate": 1.5142056822729092e-05, "loss": 0.506, "step": 17437 }, { "epoch": 22.32064, "grad_norm": 1.1346979141235352, "learning_rate": 1.5140056022408964e-05, "loss": 0.5258, "step": 17438 }, { "epoch": 22.32192, "grad_norm": 1.0600947141647339, "learning_rate": 1.5138055222088837e-05, "loss": 0.4525, "step": 17439 }, { "epoch": 22.3232, "grad_norm": 1.082271695137024, "learning_rate": 1.5136054421768709e-05, "loss": 0.4732, "step": 17440 }, { "epoch": 22.32448, "grad_norm": 1.145754098892212, "learning_rate": 1.513405362144858e-05, "loss": 0.4547, "step": 17441 }, { "epoch": 22.32576, "grad_norm": 1.162156581878662, "learning_rate": 1.5132052821128453e-05, "loss": 0.4697, "step": 17442 }, { "epoch": 22.32704, "grad_norm": 1.1728843450546265, "learning_rate": 1.5130052020808325e-05, "loss": 0.4654, "step": 17443 }, { "epoch": 22.32832, "grad_norm": 1.1649385690689087, "learning_rate": 1.5128051220488196e-05, "loss": 0.5223, "step": 17444 }, { "epoch": 22.3296, "grad_norm": 1.1600626707077026, "learning_rate": 1.5126050420168067e-05, "loss": 0.4684, "step": 17445 }, { "epoch": 22.33088, "grad_norm": 1.1548811197280884, "learning_rate": 1.512404961984794e-05, "loss": 0.4642, "step": 17446 }, { "epoch": 22.332160000000002, "grad_norm": 1.2105642557144165, "learning_rate": 1.5122048819527812e-05, "loss": 0.4687, "step": 17447 }, { "epoch": 22.33344, "grad_norm": 1.204110860824585, "learning_rate": 1.5120048019207684e-05, "loss": 0.4745, "step": 17448 }, { "epoch": 22.33472, "grad_norm": 1.189799189567566, "learning_rate": 1.5118047218887554e-05, "loss": 0.4882, "step": 17449 }, { "epoch": 22.336, "grad_norm": 1.0634477138519287, "learning_rate": 1.5116046418567428e-05, "loss": 0.4457, "step": 17450 }, { "epoch": 22.33728, "grad_norm": 1.1369130611419678, "learning_rate": 1.51140456182473e-05, "loss": 0.5071, "step": 17451 }, { "epoch": 22.33856, "grad_norm": 1.1142311096191406, "learning_rate": 1.5112044817927171e-05, "loss": 0.4664, "step": 17452 }, { "epoch": 22.33984, "grad_norm": 1.1078466176986694, "learning_rate": 1.5110044017607045e-05, "loss": 0.4767, "step": 17453 }, { "epoch": 22.34112, "grad_norm": 1.1485053300857544, "learning_rate": 1.5108043217286915e-05, "loss": 0.4765, "step": 17454 }, { "epoch": 22.3424, "grad_norm": 1.105195164680481, "learning_rate": 1.5106042416966787e-05, "loss": 0.4429, "step": 17455 }, { "epoch": 22.34368, "grad_norm": 1.1941795349121094, "learning_rate": 1.5104041616646659e-05, "loss": 0.5371, "step": 17456 }, { "epoch": 22.34496, "grad_norm": 1.0616157054901123, "learning_rate": 1.5102040816326532e-05, "loss": 0.421, "step": 17457 }, { "epoch": 22.34624, "grad_norm": 1.1069750785827637, "learning_rate": 1.5100040016006402e-05, "loss": 0.4859, "step": 17458 }, { "epoch": 22.34752, "grad_norm": 1.1655443906784058, "learning_rate": 1.5098039215686274e-05, "loss": 0.519, "step": 17459 }, { "epoch": 22.3488, "grad_norm": 1.1244604587554932, "learning_rate": 1.5096038415366148e-05, "loss": 0.4697, "step": 17460 }, { "epoch": 22.35008, "grad_norm": 1.1628854274749756, "learning_rate": 1.509403761504602e-05, "loss": 0.5347, "step": 17461 }, { "epoch": 22.35136, "grad_norm": 1.0745189189910889, "learning_rate": 1.509203681472589e-05, "loss": 0.4332, "step": 17462 }, { "epoch": 22.35264, "grad_norm": 1.117801308631897, "learning_rate": 1.5090036014405762e-05, "loss": 0.439, "step": 17463 }, { "epoch": 22.35392, "grad_norm": 1.1143686771392822, "learning_rate": 1.5088035214085635e-05, "loss": 0.4714, "step": 17464 }, { "epoch": 22.3552, "grad_norm": 1.1027582883834839, "learning_rate": 1.5086034413765507e-05, "loss": 0.4566, "step": 17465 }, { "epoch": 22.35648, "grad_norm": 1.114611268043518, "learning_rate": 1.5084033613445377e-05, "loss": 0.5007, "step": 17466 }, { "epoch": 22.35776, "grad_norm": 1.12099027633667, "learning_rate": 1.5082032813125253e-05, "loss": 0.4968, "step": 17467 }, { "epoch": 22.35904, "grad_norm": 1.1713063716888428, "learning_rate": 1.5080032012805123e-05, "loss": 0.5194, "step": 17468 }, { "epoch": 22.36032, "grad_norm": 1.1638789176940918, "learning_rate": 1.5078031212484995e-05, "loss": 0.5699, "step": 17469 }, { "epoch": 22.3616, "grad_norm": 1.118815541267395, "learning_rate": 1.5076030412164865e-05, "loss": 0.4611, "step": 17470 }, { "epoch": 22.36288, "grad_norm": 1.087066650390625, "learning_rate": 1.507402961184474e-05, "loss": 0.4478, "step": 17471 }, { "epoch": 22.36416, "grad_norm": 1.1488027572631836, "learning_rate": 1.507202881152461e-05, "loss": 0.4589, "step": 17472 }, { "epoch": 22.36544, "grad_norm": 1.1603463888168335, "learning_rate": 1.5070028011204482e-05, "loss": 0.5202, "step": 17473 }, { "epoch": 22.36672, "grad_norm": 1.1120957136154175, "learning_rate": 1.5068027210884356e-05, "loss": 0.4665, "step": 17474 }, { "epoch": 22.368, "grad_norm": 1.0825188159942627, "learning_rate": 1.5066026410564227e-05, "loss": 0.4775, "step": 17475 }, { "epoch": 22.36928, "grad_norm": 1.1528738737106323, "learning_rate": 1.5064025610244098e-05, "loss": 0.529, "step": 17476 }, { "epoch": 22.37056, "grad_norm": 1.176680326461792, "learning_rate": 1.506202480992397e-05, "loss": 0.4932, "step": 17477 }, { "epoch": 22.37184, "grad_norm": 1.1375823020935059, "learning_rate": 1.5060024009603843e-05, "loss": 0.484, "step": 17478 }, { "epoch": 22.37312, "grad_norm": 1.109999179840088, "learning_rate": 1.5058023209283715e-05, "loss": 0.4834, "step": 17479 }, { "epoch": 22.3744, "grad_norm": 1.1392533779144287, "learning_rate": 1.5056022408963585e-05, "loss": 0.4511, "step": 17480 }, { "epoch": 22.37568, "grad_norm": 1.074696660041809, "learning_rate": 1.5054021608643459e-05, "loss": 0.4497, "step": 17481 }, { "epoch": 22.37696, "grad_norm": 1.1152911186218262, "learning_rate": 1.505202080832333e-05, "loss": 0.456, "step": 17482 }, { "epoch": 22.37824, "grad_norm": 1.1230454444885254, "learning_rate": 1.5050020008003202e-05, "loss": 0.4766, "step": 17483 }, { "epoch": 22.37952, "grad_norm": 1.068898320198059, "learning_rate": 1.5048019207683072e-05, "loss": 0.425, "step": 17484 }, { "epoch": 22.3808, "grad_norm": 1.1210873126983643, "learning_rate": 1.5046018407362946e-05, "loss": 0.5016, "step": 17485 }, { "epoch": 22.38208, "grad_norm": 1.0686876773834229, "learning_rate": 1.5044017607042818e-05, "loss": 0.4212, "step": 17486 }, { "epoch": 22.38336, "grad_norm": 1.0619049072265625, "learning_rate": 1.504201680672269e-05, "loss": 0.45, "step": 17487 }, { "epoch": 22.38464, "grad_norm": 1.1653971672058105, "learning_rate": 1.5040016006402563e-05, "loss": 0.4943, "step": 17488 }, { "epoch": 22.38592, "grad_norm": 1.1745988130569458, "learning_rate": 1.5038015206082433e-05, "loss": 0.4886, "step": 17489 }, { "epoch": 22.3872, "grad_norm": 1.1317180395126343, "learning_rate": 1.5036014405762305e-05, "loss": 0.4911, "step": 17490 }, { "epoch": 22.38848, "grad_norm": 1.1715854406356812, "learning_rate": 1.5034013605442177e-05, "loss": 0.4806, "step": 17491 }, { "epoch": 22.38976, "grad_norm": 1.1153987646102905, "learning_rate": 1.503201280512205e-05, "loss": 0.5112, "step": 17492 }, { "epoch": 22.39104, "grad_norm": 1.1248096227645874, "learning_rate": 1.503001200480192e-05, "loss": 0.4911, "step": 17493 }, { "epoch": 22.39232, "grad_norm": 1.1459614038467407, "learning_rate": 1.5028011204481793e-05, "loss": 0.4958, "step": 17494 }, { "epoch": 22.3936, "grad_norm": 1.153074860572815, "learning_rate": 1.5026010404161666e-05, "loss": 0.4822, "step": 17495 }, { "epoch": 22.39488, "grad_norm": 1.1298890113830566, "learning_rate": 1.5024009603841538e-05, "loss": 0.4773, "step": 17496 }, { "epoch": 22.39616, "grad_norm": 1.125186800956726, "learning_rate": 1.5022008803521408e-05, "loss": 0.4854, "step": 17497 }, { "epoch": 22.39744, "grad_norm": 1.1420212984085083, "learning_rate": 1.502000800320128e-05, "loss": 0.5225, "step": 17498 }, { "epoch": 22.39872, "grad_norm": 1.0702089071273804, "learning_rate": 1.5018007202881154e-05, "loss": 0.4647, "step": 17499 }, { "epoch": 22.4, "grad_norm": 1.172632098197937, "learning_rate": 1.5016006402561026e-05, "loss": 0.5238, "step": 17500 }, { "epoch": 22.40128, "grad_norm": 1.1643844842910767, "learning_rate": 1.5014005602240896e-05, "loss": 0.5163, "step": 17501 }, { "epoch": 22.40256, "grad_norm": 1.044021725654602, "learning_rate": 1.5012004801920771e-05, "loss": 0.4328, "step": 17502 }, { "epoch": 22.40384, "grad_norm": 1.0835877656936646, "learning_rate": 1.5010004001600641e-05, "loss": 0.4673, "step": 17503 }, { "epoch": 22.40512, "grad_norm": 1.095995306968689, "learning_rate": 1.5008003201280513e-05, "loss": 0.5019, "step": 17504 }, { "epoch": 22.4064, "grad_norm": 1.1427990198135376, "learning_rate": 1.5006002400960383e-05, "loss": 0.4884, "step": 17505 }, { "epoch": 22.40768, "grad_norm": 1.1321765184402466, "learning_rate": 1.5004001600640258e-05, "loss": 0.4775, "step": 17506 }, { "epoch": 22.40896, "grad_norm": 1.107414722442627, "learning_rate": 1.5002000800320129e-05, "loss": 0.4403, "step": 17507 }, { "epoch": 22.41024, "grad_norm": 1.1043665409088135, "learning_rate": 1.5e-05, "loss": 0.4972, "step": 17508 }, { "epoch": 22.41152, "grad_norm": 1.120118260383606, "learning_rate": 1.499799919967987e-05, "loss": 0.4897, "step": 17509 }, { "epoch": 22.4128, "grad_norm": 1.11980140209198, "learning_rate": 1.4995998399359746e-05, "loss": 0.4861, "step": 17510 }, { "epoch": 22.41408, "grad_norm": 1.1767655611038208, "learning_rate": 1.4993997599039616e-05, "loss": 0.518, "step": 17511 }, { "epoch": 22.41536, "grad_norm": 1.1098543405532837, "learning_rate": 1.4991996798719488e-05, "loss": 0.472, "step": 17512 }, { "epoch": 22.41664, "grad_norm": 1.2055127620697021, "learning_rate": 1.4989995998399361e-05, "loss": 0.5145, "step": 17513 }, { "epoch": 22.41792, "grad_norm": 1.160184621810913, "learning_rate": 1.4987995198079233e-05, "loss": 0.5199, "step": 17514 }, { "epoch": 22.4192, "grad_norm": 1.1769219636917114, "learning_rate": 1.4985994397759103e-05, "loss": 0.5057, "step": 17515 }, { "epoch": 22.42048, "grad_norm": 1.1221567392349243, "learning_rate": 1.4983993597438975e-05, "loss": 0.4786, "step": 17516 }, { "epoch": 22.42176, "grad_norm": 1.167410135269165, "learning_rate": 1.4981992797118849e-05, "loss": 0.4793, "step": 17517 }, { "epoch": 22.42304, "grad_norm": 1.1696871519088745, "learning_rate": 1.497999199679872e-05, "loss": 0.4783, "step": 17518 }, { "epoch": 22.42432, "grad_norm": 1.1420179605484009, "learning_rate": 1.497799119647859e-05, "loss": 0.47, "step": 17519 }, { "epoch": 22.4256, "grad_norm": 1.1280018091201782, "learning_rate": 1.4975990396158466e-05, "loss": 0.5005, "step": 17520 }, { "epoch": 22.42688, "grad_norm": 1.126597285270691, "learning_rate": 1.4973989595838336e-05, "loss": 0.4808, "step": 17521 }, { "epoch": 22.42816, "grad_norm": 1.1016881465911865, "learning_rate": 1.4971988795518208e-05, "loss": 0.4692, "step": 17522 }, { "epoch": 22.42944, "grad_norm": 1.1398890018463135, "learning_rate": 1.4969987995198078e-05, "loss": 0.5017, "step": 17523 }, { "epoch": 22.43072, "grad_norm": 1.1369463205337524, "learning_rate": 1.4967987194877953e-05, "loss": 0.4744, "step": 17524 }, { "epoch": 22.432, "grad_norm": 1.1468337774276733, "learning_rate": 1.4965986394557824e-05, "loss": 0.4802, "step": 17525 }, { "epoch": 22.43328, "grad_norm": 1.184463620185852, "learning_rate": 1.4963985594237695e-05, "loss": 0.5436, "step": 17526 }, { "epoch": 22.43456, "grad_norm": 1.0913583040237427, "learning_rate": 1.4961984793917569e-05, "loss": 0.4606, "step": 17527 }, { "epoch": 22.43584, "grad_norm": 1.1201003789901733, "learning_rate": 1.4959983993597441e-05, "loss": 0.4782, "step": 17528 }, { "epoch": 22.43712, "grad_norm": 1.1170686483383179, "learning_rate": 1.4957983193277311e-05, "loss": 0.4602, "step": 17529 }, { "epoch": 22.4384, "grad_norm": 1.1606577634811401, "learning_rate": 1.4955982392957183e-05, "loss": 0.501, "step": 17530 }, { "epoch": 22.43968, "grad_norm": 1.1875215768814087, "learning_rate": 1.4953981592637056e-05, "loss": 0.4986, "step": 17531 }, { "epoch": 22.44096, "grad_norm": 1.1248286962509155, "learning_rate": 1.4951980792316928e-05, "loss": 0.51, "step": 17532 }, { "epoch": 22.44224, "grad_norm": 1.106138825416565, "learning_rate": 1.4949979991996798e-05, "loss": 0.4864, "step": 17533 }, { "epoch": 22.44352, "grad_norm": 1.1583833694458008, "learning_rate": 1.4947979191676672e-05, "loss": 0.4824, "step": 17534 }, { "epoch": 22.4448, "grad_norm": 1.1970548629760742, "learning_rate": 1.4945978391356544e-05, "loss": 0.5043, "step": 17535 }, { "epoch": 22.44608, "grad_norm": 1.1255377531051636, "learning_rate": 1.4943977591036416e-05, "loss": 0.4703, "step": 17536 }, { "epoch": 22.44736, "grad_norm": 1.137693166732788, "learning_rate": 1.4941976790716286e-05, "loss": 0.5076, "step": 17537 }, { "epoch": 22.44864, "grad_norm": 1.099455714225769, "learning_rate": 1.493997599039616e-05, "loss": 0.4662, "step": 17538 }, { "epoch": 22.44992, "grad_norm": 1.1596556901931763, "learning_rate": 1.4937975190076031e-05, "loss": 0.4969, "step": 17539 }, { "epoch": 22.4512, "grad_norm": 1.1630852222442627, "learning_rate": 1.4935974389755903e-05, "loss": 0.5131, "step": 17540 }, { "epoch": 22.45248, "grad_norm": 1.1254620552062988, "learning_rate": 1.4933973589435777e-05, "loss": 0.5012, "step": 17541 }, { "epoch": 22.45376, "grad_norm": 1.1461201906204224, "learning_rate": 1.4931972789115647e-05, "loss": 0.4934, "step": 17542 }, { "epoch": 22.45504, "grad_norm": 1.1740776300430298, "learning_rate": 1.4929971988795519e-05, "loss": 0.4746, "step": 17543 }, { "epoch": 22.45632, "grad_norm": 1.1553022861480713, "learning_rate": 1.492797118847539e-05, "loss": 0.485, "step": 17544 }, { "epoch": 22.4576, "grad_norm": 1.1020857095718384, "learning_rate": 1.4925970388155264e-05, "loss": 0.4326, "step": 17545 }, { "epoch": 22.45888, "grad_norm": 1.125576376914978, "learning_rate": 1.4923969587835134e-05, "loss": 0.4976, "step": 17546 }, { "epoch": 22.46016, "grad_norm": 1.1211215257644653, "learning_rate": 1.4921968787515006e-05, "loss": 0.4824, "step": 17547 }, { "epoch": 22.46144, "grad_norm": 1.1188104152679443, "learning_rate": 1.491996798719488e-05, "loss": 0.4772, "step": 17548 }, { "epoch": 22.46272, "grad_norm": 1.074813723564148, "learning_rate": 1.4917967186874752e-05, "loss": 0.4494, "step": 17549 }, { "epoch": 22.464, "grad_norm": 1.0762443542480469, "learning_rate": 1.4915966386554622e-05, "loss": 0.4373, "step": 17550 }, { "epoch": 22.46528, "grad_norm": 1.1819483041763306, "learning_rate": 1.4913965586234494e-05, "loss": 0.5661, "step": 17551 }, { "epoch": 22.46656, "grad_norm": 1.104282259941101, "learning_rate": 1.4911964785914367e-05, "loss": 0.4914, "step": 17552 }, { "epoch": 22.46784, "grad_norm": 1.1156835556030273, "learning_rate": 1.4909963985594239e-05, "loss": 0.4791, "step": 17553 }, { "epoch": 22.46912, "grad_norm": 1.198472023010254, "learning_rate": 1.490796318527411e-05, "loss": 0.4995, "step": 17554 }, { "epoch": 22.4704, "grad_norm": 1.1563735008239746, "learning_rate": 1.4905962384953984e-05, "loss": 0.4815, "step": 17555 }, { "epoch": 22.47168, "grad_norm": 1.1188043355941772, "learning_rate": 1.4903961584633855e-05, "loss": 0.4759, "step": 17556 }, { "epoch": 22.47296, "grad_norm": 1.130955457687378, "learning_rate": 1.4901960784313726e-05, "loss": 0.4884, "step": 17557 }, { "epoch": 22.47424, "grad_norm": 1.1355321407318115, "learning_rate": 1.4899959983993597e-05, "loss": 0.4986, "step": 17558 }, { "epoch": 22.47552, "grad_norm": 1.1446267366409302, "learning_rate": 1.4897959183673472e-05, "loss": 0.4902, "step": 17559 }, { "epoch": 22.4768, "grad_norm": 1.1643836498260498, "learning_rate": 1.4895958383353342e-05, "loss": 0.4702, "step": 17560 }, { "epoch": 22.47808, "grad_norm": 1.1795804500579834, "learning_rate": 1.4893957583033214e-05, "loss": 0.4978, "step": 17561 }, { "epoch": 22.47936, "grad_norm": 1.0704967975616455, "learning_rate": 1.4891956782713084e-05, "loss": 0.4907, "step": 17562 }, { "epoch": 22.48064, "grad_norm": 1.0626331567764282, "learning_rate": 1.488995598239296e-05, "loss": 0.4522, "step": 17563 }, { "epoch": 22.48192, "grad_norm": 1.0941236019134521, "learning_rate": 1.488795518207283e-05, "loss": 0.4381, "step": 17564 }, { "epoch": 22.4832, "grad_norm": 1.100813388824463, "learning_rate": 1.4885954381752701e-05, "loss": 0.5071, "step": 17565 }, { "epoch": 22.48448, "grad_norm": 1.2816932201385498, "learning_rate": 1.4883953581432575e-05, "loss": 0.4937, "step": 17566 }, { "epoch": 22.48576, "grad_norm": 1.2014461755752563, "learning_rate": 1.4881952781112447e-05, "loss": 0.4908, "step": 17567 }, { "epoch": 22.48704, "grad_norm": 1.0948677062988281, "learning_rate": 1.4879951980792317e-05, "loss": 0.4935, "step": 17568 }, { "epoch": 22.48832, "grad_norm": 1.114828109741211, "learning_rate": 1.4877951180472189e-05, "loss": 0.4639, "step": 17569 }, { "epoch": 22.4896, "grad_norm": 1.118323802947998, "learning_rate": 1.4875950380152062e-05, "loss": 0.5029, "step": 17570 }, { "epoch": 22.49088, "grad_norm": 1.11355721950531, "learning_rate": 1.4873949579831934e-05, "loss": 0.4878, "step": 17571 }, { "epoch": 22.49216, "grad_norm": 1.1527410745620728, "learning_rate": 1.4871948779511804e-05, "loss": 0.5242, "step": 17572 }, { "epoch": 22.49344, "grad_norm": 1.1299474239349365, "learning_rate": 1.4869947979191678e-05, "loss": 0.4836, "step": 17573 }, { "epoch": 22.49472, "grad_norm": 1.0945117473602295, "learning_rate": 1.486794717887155e-05, "loss": 0.4503, "step": 17574 }, { "epoch": 22.496, "grad_norm": 1.0921000242233276, "learning_rate": 1.4865946378551422e-05, "loss": 0.5056, "step": 17575 }, { "epoch": 22.49728, "grad_norm": 1.1385952234268188, "learning_rate": 1.4863945578231292e-05, "loss": 0.5017, "step": 17576 }, { "epoch": 22.49856, "grad_norm": 1.1188937425613403, "learning_rate": 1.4861944777911165e-05, "loss": 0.4629, "step": 17577 }, { "epoch": 22.49984, "grad_norm": 1.1394176483154297, "learning_rate": 1.4859943977591037e-05, "loss": 0.483, "step": 17578 }, { "epoch": 22.50112, "grad_norm": 1.0991370677947998, "learning_rate": 1.4857943177270909e-05, "loss": 0.49, "step": 17579 }, { "epoch": 22.5024, "grad_norm": 1.1252307891845703, "learning_rate": 1.4855942376950783e-05, "loss": 0.4859, "step": 17580 }, { "epoch": 22.50368, "grad_norm": 1.1384406089782715, "learning_rate": 1.4853941576630653e-05, "loss": 0.4985, "step": 17581 }, { "epoch": 22.50496, "grad_norm": 1.1817059516906738, "learning_rate": 1.4851940776310525e-05, "loss": 0.4953, "step": 17582 }, { "epoch": 22.50624, "grad_norm": 1.1219290494918823, "learning_rate": 1.4849939975990396e-05, "loss": 0.4729, "step": 17583 }, { "epoch": 22.50752, "grad_norm": 1.1384577751159668, "learning_rate": 1.484793917567027e-05, "loss": 0.5415, "step": 17584 }, { "epoch": 22.5088, "grad_norm": 1.1150089502334595, "learning_rate": 1.484593837535014e-05, "loss": 0.4845, "step": 17585 }, { "epoch": 22.51008, "grad_norm": 1.1471636295318604, "learning_rate": 1.4843937575030012e-05, "loss": 0.4882, "step": 17586 }, { "epoch": 22.51136, "grad_norm": 1.1583935022354126, "learning_rate": 1.4841936774709886e-05, "loss": 0.5159, "step": 17587 }, { "epoch": 22.51264, "grad_norm": 1.1290314197540283, "learning_rate": 1.4839935974389757e-05, "loss": 0.4779, "step": 17588 }, { "epoch": 22.51392, "grad_norm": 1.116462230682373, "learning_rate": 1.4837935174069628e-05, "loss": 0.4735, "step": 17589 }, { "epoch": 22.5152, "grad_norm": 1.1295406818389893, "learning_rate": 1.48359343737495e-05, "loss": 0.4985, "step": 17590 }, { "epoch": 22.51648, "grad_norm": 1.0936286449432373, "learning_rate": 1.4833933573429373e-05, "loss": 0.4902, "step": 17591 }, { "epoch": 22.51776, "grad_norm": 1.1139616966247559, "learning_rate": 1.4831932773109245e-05, "loss": 0.5094, "step": 17592 }, { "epoch": 22.51904, "grad_norm": 1.0469192266464233, "learning_rate": 1.4829931972789115e-05, "loss": 0.4364, "step": 17593 }, { "epoch": 22.52032, "grad_norm": 1.0828360319137573, "learning_rate": 1.482793117246899e-05, "loss": 0.483, "step": 17594 }, { "epoch": 22.5216, "grad_norm": 1.1085882186889648, "learning_rate": 1.482593037214886e-05, "loss": 0.5008, "step": 17595 }, { "epoch": 22.52288, "grad_norm": 1.1540615558624268, "learning_rate": 1.4823929571828732e-05, "loss": 0.4861, "step": 17596 }, { "epoch": 22.52416, "grad_norm": 1.106768012046814, "learning_rate": 1.4821928771508602e-05, "loss": 0.4946, "step": 17597 }, { "epoch": 22.52544, "grad_norm": 1.1145246028900146, "learning_rate": 1.4819927971188478e-05, "loss": 0.4877, "step": 17598 }, { "epoch": 22.52672, "grad_norm": 1.1328237056732178, "learning_rate": 1.4817927170868348e-05, "loss": 0.4733, "step": 17599 }, { "epoch": 22.528, "grad_norm": 1.1732732057571411, "learning_rate": 1.481592637054822e-05, "loss": 0.4773, "step": 17600 }, { "epoch": 22.52928, "grad_norm": 1.093930721282959, "learning_rate": 1.4813925570228093e-05, "loss": 0.4651, "step": 17601 }, { "epoch": 22.53056, "grad_norm": 1.0944005250930786, "learning_rate": 1.4811924769907965e-05, "loss": 0.4874, "step": 17602 }, { "epoch": 22.53184, "grad_norm": 1.1620426177978516, "learning_rate": 1.4809923969587835e-05, "loss": 0.4759, "step": 17603 }, { "epoch": 22.53312, "grad_norm": 1.131779670715332, "learning_rate": 1.4807923169267707e-05, "loss": 0.4777, "step": 17604 }, { "epoch": 22.5344, "grad_norm": 1.1112638711929321, "learning_rate": 1.480592236894758e-05, "loss": 0.4639, "step": 17605 }, { "epoch": 22.53568, "grad_norm": 1.039440393447876, "learning_rate": 1.4803921568627453e-05, "loss": 0.4314, "step": 17606 }, { "epoch": 22.53696, "grad_norm": 1.0729459524154663, "learning_rate": 1.4801920768307323e-05, "loss": 0.4708, "step": 17607 }, { "epoch": 22.538240000000002, "grad_norm": 1.1430833339691162, "learning_rate": 1.4799919967987196e-05, "loss": 0.4659, "step": 17608 }, { "epoch": 22.53952, "grad_norm": 1.1749787330627441, "learning_rate": 1.4797919167667068e-05, "loss": 0.5147, "step": 17609 }, { "epoch": 22.5408, "grad_norm": 1.1035168170928955, "learning_rate": 1.479591836734694e-05, "loss": 0.4272, "step": 17610 }, { "epoch": 22.54208, "grad_norm": 1.131881594657898, "learning_rate": 1.479391756702681e-05, "loss": 0.4769, "step": 17611 }, { "epoch": 22.54336, "grad_norm": 1.0516685247421265, "learning_rate": 1.4791916766706684e-05, "loss": 0.4525, "step": 17612 }, { "epoch": 22.54464, "grad_norm": 1.0958613157272339, "learning_rate": 1.4789915966386556e-05, "loss": 0.4753, "step": 17613 }, { "epoch": 22.54592, "grad_norm": 1.1349154710769653, "learning_rate": 1.4787915166066427e-05, "loss": 0.4946, "step": 17614 }, { "epoch": 22.5472, "grad_norm": 1.055051565170288, "learning_rate": 1.4785914365746298e-05, "loss": 0.4331, "step": 17615 }, { "epoch": 22.54848, "grad_norm": 1.1691598892211914, "learning_rate": 1.4783913565426171e-05, "loss": 0.5193, "step": 17616 }, { "epoch": 22.54976, "grad_norm": 1.0969427824020386, "learning_rate": 1.4781912765106043e-05, "loss": 0.4621, "step": 17617 }, { "epoch": 22.55104, "grad_norm": 1.1640121936798096, "learning_rate": 1.4779911964785915e-05, "loss": 0.4855, "step": 17618 }, { "epoch": 22.55232, "grad_norm": 1.2234057188034058, "learning_rate": 1.4777911164465788e-05, "loss": 0.4956, "step": 17619 }, { "epoch": 22.5536, "grad_norm": 1.1525243520736694, "learning_rate": 1.4775910364145659e-05, "loss": 0.4738, "step": 17620 }, { "epoch": 22.55488, "grad_norm": 1.1218520402908325, "learning_rate": 1.477390956382553e-05, "loss": 0.4954, "step": 17621 }, { "epoch": 22.55616, "grad_norm": 1.1429941654205322, "learning_rate": 1.4771908763505402e-05, "loss": 0.4926, "step": 17622 }, { "epoch": 22.55744, "grad_norm": 1.072890043258667, "learning_rate": 1.4769907963185276e-05, "loss": 0.4441, "step": 17623 }, { "epoch": 22.55872, "grad_norm": 1.085209608078003, "learning_rate": 1.4767907162865146e-05, "loss": 0.4751, "step": 17624 }, { "epoch": 22.56, "grad_norm": 1.085693597793579, "learning_rate": 1.4765906362545018e-05, "loss": 0.4678, "step": 17625 }, { "epoch": 22.56128, "grad_norm": 1.1705639362335205, "learning_rate": 1.4763905562224891e-05, "loss": 0.4686, "step": 17626 }, { "epoch": 22.56256, "grad_norm": 1.130325198173523, "learning_rate": 1.4761904761904763e-05, "loss": 0.5101, "step": 17627 }, { "epoch": 22.56384, "grad_norm": 1.1805752515792847, "learning_rate": 1.4759903961584633e-05, "loss": 0.5458, "step": 17628 }, { "epoch": 22.56512, "grad_norm": 1.1645885705947876, "learning_rate": 1.4757903161264505e-05, "loss": 0.5055, "step": 17629 }, { "epoch": 22.5664, "grad_norm": 1.2138627767562866, "learning_rate": 1.4755902360944379e-05, "loss": 0.5324, "step": 17630 }, { "epoch": 22.56768, "grad_norm": 1.119138240814209, "learning_rate": 1.475390156062425e-05, "loss": 0.4681, "step": 17631 }, { "epoch": 22.56896, "grad_norm": 1.09098482131958, "learning_rate": 1.475190076030412e-05, "loss": 0.4839, "step": 17632 }, { "epoch": 22.57024, "grad_norm": 1.1301281452178955, "learning_rate": 1.4749899959983996e-05, "loss": 0.4629, "step": 17633 }, { "epoch": 22.57152, "grad_norm": 1.1431349515914917, "learning_rate": 1.4747899159663866e-05, "loss": 0.5064, "step": 17634 }, { "epoch": 22.5728, "grad_norm": 1.023766279220581, "learning_rate": 1.4745898359343738e-05, "loss": 0.4402, "step": 17635 }, { "epoch": 22.57408, "grad_norm": 1.1107232570648193, "learning_rate": 1.4743897559023608e-05, "loss": 0.4486, "step": 17636 }, { "epoch": 22.57536, "grad_norm": 1.0915488004684448, "learning_rate": 1.4741896758703483e-05, "loss": 0.4598, "step": 17637 }, { "epoch": 22.57664, "grad_norm": 1.1454777717590332, "learning_rate": 1.4739895958383354e-05, "loss": 0.5209, "step": 17638 }, { "epoch": 22.57792, "grad_norm": 1.0833492279052734, "learning_rate": 1.4737895158063225e-05, "loss": 0.5036, "step": 17639 }, { "epoch": 22.5792, "grad_norm": 1.115124225616455, "learning_rate": 1.4735894357743099e-05, "loss": 0.5157, "step": 17640 }, { "epoch": 22.58048, "grad_norm": 1.121090292930603, "learning_rate": 1.4733893557422971e-05, "loss": 0.5237, "step": 17641 }, { "epoch": 22.58176, "grad_norm": 1.0920593738555908, "learning_rate": 1.4731892757102841e-05, "loss": 0.4775, "step": 17642 }, { "epoch": 22.58304, "grad_norm": 1.0497217178344727, "learning_rate": 1.4729891956782713e-05, "loss": 0.4622, "step": 17643 }, { "epoch": 22.584319999999998, "grad_norm": 1.1190520524978638, "learning_rate": 1.4727891156462586e-05, "loss": 0.4555, "step": 17644 }, { "epoch": 22.5856, "grad_norm": 1.1475340127944946, "learning_rate": 1.4725890356142458e-05, "loss": 0.5076, "step": 17645 }, { "epoch": 22.58688, "grad_norm": 1.2062084674835205, "learning_rate": 1.4723889555822328e-05, "loss": 0.4723, "step": 17646 }, { "epoch": 22.58816, "grad_norm": 1.1473878622055054, "learning_rate": 1.4721888755502202e-05, "loss": 0.522, "step": 17647 }, { "epoch": 22.58944, "grad_norm": 1.0924144983291626, "learning_rate": 1.4719887955182074e-05, "loss": 0.4567, "step": 17648 }, { "epoch": 22.59072, "grad_norm": 1.141308069229126, "learning_rate": 1.4717887154861946e-05, "loss": 0.4708, "step": 17649 }, { "epoch": 22.592, "grad_norm": 1.0392951965332031, "learning_rate": 1.4715886354541816e-05, "loss": 0.4772, "step": 17650 }, { "epoch": 22.59328, "grad_norm": 1.0937608480453491, "learning_rate": 1.471388555422169e-05, "loss": 0.4681, "step": 17651 }, { "epoch": 22.59456, "grad_norm": 1.1153881549835205, "learning_rate": 1.4711884753901561e-05, "loss": 0.5083, "step": 17652 }, { "epoch": 22.59584, "grad_norm": 1.1007659435272217, "learning_rate": 1.4709883953581433e-05, "loss": 0.4735, "step": 17653 }, { "epoch": 22.59712, "grad_norm": 1.126693844795227, "learning_rate": 1.4707883153261307e-05, "loss": 0.4602, "step": 17654 }, { "epoch": 22.5984, "grad_norm": 1.103529453277588, "learning_rate": 1.4705882352941177e-05, "loss": 0.4932, "step": 17655 }, { "epoch": 22.59968, "grad_norm": 1.045106053352356, "learning_rate": 1.4703881552621049e-05, "loss": 0.4576, "step": 17656 }, { "epoch": 22.60096, "grad_norm": 1.0801410675048828, "learning_rate": 1.470188075230092e-05, "loss": 0.4534, "step": 17657 }, { "epoch": 22.60224, "grad_norm": 1.1122349500656128, "learning_rate": 1.4699879951980794e-05, "loss": 0.4549, "step": 17658 }, { "epoch": 22.60352, "grad_norm": 1.1067173480987549, "learning_rate": 1.4697879151660664e-05, "loss": 0.478, "step": 17659 }, { "epoch": 22.6048, "grad_norm": 1.1408361196517944, "learning_rate": 1.4695878351340536e-05, "loss": 0.4798, "step": 17660 }, { "epoch": 22.60608, "grad_norm": 1.1362731456756592, "learning_rate": 1.469387755102041e-05, "loss": 0.4567, "step": 17661 }, { "epoch": 22.60736, "grad_norm": 1.2117960453033447, "learning_rate": 1.4691876750700282e-05, "loss": 0.5187, "step": 17662 }, { "epoch": 22.60864, "grad_norm": 1.155263066291809, "learning_rate": 1.4689875950380152e-05, "loss": 0.531, "step": 17663 }, { "epoch": 22.60992, "grad_norm": 1.1107439994812012, "learning_rate": 1.4687875150060024e-05, "loss": 0.471, "step": 17664 }, { "epoch": 22.6112, "grad_norm": 1.1272735595703125, "learning_rate": 1.4685874349739897e-05, "loss": 0.4807, "step": 17665 }, { "epoch": 22.61248, "grad_norm": 1.142755150794983, "learning_rate": 1.4683873549419769e-05, "loss": 0.4807, "step": 17666 }, { "epoch": 22.61376, "grad_norm": 1.160695195198059, "learning_rate": 1.468187274909964e-05, "loss": 0.5241, "step": 17667 }, { "epoch": 22.61504, "grad_norm": 1.1584571599960327, "learning_rate": 1.4679871948779514e-05, "loss": 0.4977, "step": 17668 }, { "epoch": 22.61632, "grad_norm": 1.08279287815094, "learning_rate": 1.4677871148459385e-05, "loss": 0.4404, "step": 17669 }, { "epoch": 22.6176, "grad_norm": 1.0838192701339722, "learning_rate": 1.4675870348139256e-05, "loss": 0.4565, "step": 17670 }, { "epoch": 22.61888, "grad_norm": 1.1284055709838867, "learning_rate": 1.4673869547819127e-05, "loss": 0.5155, "step": 17671 }, { "epoch": 22.62016, "grad_norm": 1.0866196155548096, "learning_rate": 1.4671868747499002e-05, "loss": 0.4731, "step": 17672 }, { "epoch": 22.62144, "grad_norm": 1.1500025987625122, "learning_rate": 1.4669867947178872e-05, "loss": 0.5287, "step": 17673 }, { "epoch": 22.62272, "grad_norm": 1.1272227764129639, "learning_rate": 1.4667867146858744e-05, "loss": 0.4799, "step": 17674 }, { "epoch": 22.624, "grad_norm": 1.1563876867294312, "learning_rate": 1.4665866346538614e-05, "loss": 0.4674, "step": 17675 }, { "epoch": 22.62528, "grad_norm": 1.1425659656524658, "learning_rate": 1.466386554621849e-05, "loss": 0.4739, "step": 17676 }, { "epoch": 22.62656, "grad_norm": 1.1223922967910767, "learning_rate": 1.466186474589836e-05, "loss": 0.4803, "step": 17677 }, { "epoch": 22.62784, "grad_norm": 1.1119972467422485, "learning_rate": 1.4659863945578231e-05, "loss": 0.5031, "step": 17678 }, { "epoch": 22.62912, "grad_norm": 1.1872590780258179, "learning_rate": 1.4657863145258105e-05, "loss": 0.5111, "step": 17679 }, { "epoch": 22.6304, "grad_norm": 1.173486351966858, "learning_rate": 1.4655862344937977e-05, "loss": 0.5107, "step": 17680 }, { "epoch": 22.63168, "grad_norm": 1.1515588760375977, "learning_rate": 1.4653861544617847e-05, "loss": 0.4845, "step": 17681 }, { "epoch": 22.63296, "grad_norm": 1.1229499578475952, "learning_rate": 1.4651860744297719e-05, "loss": 0.4891, "step": 17682 }, { "epoch": 22.63424, "grad_norm": 1.1395021677017212, "learning_rate": 1.4649859943977592e-05, "loss": 0.4833, "step": 17683 }, { "epoch": 22.63552, "grad_norm": 1.1398979425430298, "learning_rate": 1.4647859143657464e-05, "loss": 0.4845, "step": 17684 }, { "epoch": 22.6368, "grad_norm": 1.1859970092773438, "learning_rate": 1.4645858343337334e-05, "loss": 0.5003, "step": 17685 }, { "epoch": 22.63808, "grad_norm": 1.1473771333694458, "learning_rate": 1.464385754301721e-05, "loss": 0.4632, "step": 17686 }, { "epoch": 22.63936, "grad_norm": 1.087823748588562, "learning_rate": 1.464185674269708e-05, "loss": 0.4444, "step": 17687 }, { "epoch": 22.64064, "grad_norm": 1.0971521139144897, "learning_rate": 1.4639855942376952e-05, "loss": 0.4446, "step": 17688 }, { "epoch": 22.64192, "grad_norm": 1.1230698823928833, "learning_rate": 1.4637855142056822e-05, "loss": 0.45, "step": 17689 }, { "epoch": 22.6432, "grad_norm": 1.176695704460144, "learning_rate": 1.4635854341736697e-05, "loss": 0.5211, "step": 17690 }, { "epoch": 22.64448, "grad_norm": 1.1348381042480469, "learning_rate": 1.4633853541416567e-05, "loss": 0.487, "step": 17691 }, { "epoch": 22.64576, "grad_norm": 1.1356598138809204, "learning_rate": 1.4631852741096439e-05, "loss": 0.5139, "step": 17692 }, { "epoch": 22.64704, "grad_norm": 1.204661250114441, "learning_rate": 1.4629851940776313e-05, "loss": 0.5212, "step": 17693 }, { "epoch": 22.64832, "grad_norm": 1.1288728713989258, "learning_rate": 1.4627851140456184e-05, "loss": 0.5026, "step": 17694 }, { "epoch": 22.6496, "grad_norm": 1.1189838647842407, "learning_rate": 1.4625850340136055e-05, "loss": 0.4806, "step": 17695 }, { "epoch": 22.65088, "grad_norm": 1.099488615989685, "learning_rate": 1.4623849539815926e-05, "loss": 0.4657, "step": 17696 }, { "epoch": 22.65216, "grad_norm": 1.1469444036483765, "learning_rate": 1.46218487394958e-05, "loss": 0.5038, "step": 17697 }, { "epoch": 22.65344, "grad_norm": 1.1008341312408447, "learning_rate": 1.4619847939175672e-05, "loss": 0.4696, "step": 17698 }, { "epoch": 22.65472, "grad_norm": 1.1267304420471191, "learning_rate": 1.4617847138855542e-05, "loss": 0.4798, "step": 17699 }, { "epoch": 22.656, "grad_norm": 1.0712333917617798, "learning_rate": 1.4615846338535416e-05, "loss": 0.4875, "step": 17700 }, { "epoch": 22.65728, "grad_norm": 1.090133547782898, "learning_rate": 1.4613845538215287e-05, "loss": 0.5221, "step": 17701 }, { "epoch": 22.65856, "grad_norm": 1.0911118984222412, "learning_rate": 1.461184473789516e-05, "loss": 0.4545, "step": 17702 }, { "epoch": 22.65984, "grad_norm": 1.1019871234893799, "learning_rate": 1.460984393757503e-05, "loss": 0.5021, "step": 17703 }, { "epoch": 22.66112, "grad_norm": 1.1478427648544312, "learning_rate": 1.4607843137254903e-05, "loss": 0.4791, "step": 17704 }, { "epoch": 22.6624, "grad_norm": 1.038714051246643, "learning_rate": 1.4605842336934775e-05, "loss": 0.4635, "step": 17705 }, { "epoch": 22.66368, "grad_norm": 1.1068960428237915, "learning_rate": 1.4603841536614647e-05, "loss": 0.4915, "step": 17706 }, { "epoch": 22.66496, "grad_norm": 1.1241267919540405, "learning_rate": 1.460184073629452e-05, "loss": 0.4938, "step": 17707 }, { "epoch": 22.66624, "grad_norm": 1.1513806581497192, "learning_rate": 1.459983993597439e-05, "loss": 0.4996, "step": 17708 }, { "epoch": 22.66752, "grad_norm": 1.0984781980514526, "learning_rate": 1.4597839135654262e-05, "loss": 0.4556, "step": 17709 }, { "epoch": 22.6688, "grad_norm": 1.1513564586639404, "learning_rate": 1.4595838335334134e-05, "loss": 0.4871, "step": 17710 }, { "epoch": 22.67008, "grad_norm": 1.0893858671188354, "learning_rate": 1.4593837535014008e-05, "loss": 0.418, "step": 17711 }, { "epoch": 22.67136, "grad_norm": 1.1374493837356567, "learning_rate": 1.4591836734693878e-05, "loss": 0.4628, "step": 17712 }, { "epoch": 22.67264, "grad_norm": 1.066206932067871, "learning_rate": 1.458983593437375e-05, "loss": 0.4407, "step": 17713 }, { "epoch": 22.67392, "grad_norm": 1.120012640953064, "learning_rate": 1.4587835134053623e-05, "loss": 0.4844, "step": 17714 }, { "epoch": 22.6752, "grad_norm": 1.1568934917449951, "learning_rate": 1.4585834333733495e-05, "loss": 0.4558, "step": 17715 }, { "epoch": 22.67648, "grad_norm": 1.1683546304702759, "learning_rate": 1.4583833533413365e-05, "loss": 0.5141, "step": 17716 }, { "epoch": 22.67776, "grad_norm": 1.1002466678619385, "learning_rate": 1.4581832733093237e-05, "loss": 0.483, "step": 17717 }, { "epoch": 22.67904, "grad_norm": 1.1254520416259766, "learning_rate": 1.457983193277311e-05, "loss": 0.4755, "step": 17718 }, { "epoch": 22.680320000000002, "grad_norm": 1.0948314666748047, "learning_rate": 1.4577831132452982e-05, "loss": 0.4998, "step": 17719 }, { "epoch": 22.6816, "grad_norm": 1.126795768737793, "learning_rate": 1.4575830332132853e-05, "loss": 0.4927, "step": 17720 }, { "epoch": 22.68288, "grad_norm": 1.096273422241211, "learning_rate": 1.4573829531812728e-05, "loss": 0.4373, "step": 17721 }, { "epoch": 22.68416, "grad_norm": 1.1614435911178589, "learning_rate": 1.4571828731492598e-05, "loss": 0.5023, "step": 17722 }, { "epoch": 22.68544, "grad_norm": 1.181004524230957, "learning_rate": 1.456982793117247e-05, "loss": 0.5358, "step": 17723 }, { "epoch": 22.68672, "grad_norm": 1.0640144348144531, "learning_rate": 1.456782713085234e-05, "loss": 0.4442, "step": 17724 }, { "epoch": 22.688, "grad_norm": 1.1106035709381104, "learning_rate": 1.4565826330532215e-05, "loss": 0.4776, "step": 17725 }, { "epoch": 22.68928, "grad_norm": 1.104239583015442, "learning_rate": 1.4563825530212085e-05, "loss": 0.4939, "step": 17726 }, { "epoch": 22.69056, "grad_norm": 1.165892481803894, "learning_rate": 1.4561824729891957e-05, "loss": 0.5086, "step": 17727 }, { "epoch": 22.69184, "grad_norm": 1.1454297304153442, "learning_rate": 1.4559823929571828e-05, "loss": 0.4816, "step": 17728 }, { "epoch": 22.69312, "grad_norm": 1.103267788887024, "learning_rate": 1.4557823129251703e-05, "loss": 0.4574, "step": 17729 }, { "epoch": 22.6944, "grad_norm": 1.1371041536331177, "learning_rate": 1.4555822328931573e-05, "loss": 0.4844, "step": 17730 }, { "epoch": 22.69568, "grad_norm": 1.0860483646392822, "learning_rate": 1.4553821528611445e-05, "loss": 0.4337, "step": 17731 }, { "epoch": 22.69696, "grad_norm": 1.1786482334136963, "learning_rate": 1.4551820728291318e-05, "loss": 0.5068, "step": 17732 }, { "epoch": 22.69824, "grad_norm": 1.1752735376358032, "learning_rate": 1.454981992797119e-05, "loss": 0.5256, "step": 17733 }, { "epoch": 22.69952, "grad_norm": 1.1000226736068726, "learning_rate": 1.454781912765106e-05, "loss": 0.4766, "step": 17734 }, { "epoch": 22.7008, "grad_norm": 1.137929081916809, "learning_rate": 1.4545818327330932e-05, "loss": 0.4936, "step": 17735 }, { "epoch": 22.70208, "grad_norm": 1.1989151239395142, "learning_rate": 1.4543817527010806e-05, "loss": 0.5027, "step": 17736 }, { "epoch": 22.70336, "grad_norm": 1.180206537246704, "learning_rate": 1.4541816726690678e-05, "loss": 0.5085, "step": 17737 }, { "epoch": 22.70464, "grad_norm": 1.0755740404129028, "learning_rate": 1.4539815926370548e-05, "loss": 0.4829, "step": 17738 }, { "epoch": 22.70592, "grad_norm": 1.0281373262405396, "learning_rate": 1.4537815126050421e-05, "loss": 0.4234, "step": 17739 }, { "epoch": 22.7072, "grad_norm": 1.0628975629806519, "learning_rate": 1.4535814325730293e-05, "loss": 0.4498, "step": 17740 }, { "epoch": 22.70848, "grad_norm": 1.0640840530395508, "learning_rate": 1.4533813525410165e-05, "loss": 0.4457, "step": 17741 }, { "epoch": 22.70976, "grad_norm": 1.1275110244750977, "learning_rate": 1.4531812725090035e-05, "loss": 0.5072, "step": 17742 }, { "epoch": 22.71104, "grad_norm": 1.1140217781066895, "learning_rate": 1.4529811924769909e-05, "loss": 0.4612, "step": 17743 }, { "epoch": 22.71232, "grad_norm": 1.083038568496704, "learning_rate": 1.452781112444978e-05, "loss": 0.4893, "step": 17744 }, { "epoch": 22.7136, "grad_norm": 1.105229377746582, "learning_rate": 1.4525810324129652e-05, "loss": 0.461, "step": 17745 }, { "epoch": 22.71488, "grad_norm": 1.2414989471435547, "learning_rate": 1.4523809523809526e-05, "loss": 0.5307, "step": 17746 }, { "epoch": 22.71616, "grad_norm": 1.2143381834030151, "learning_rate": 1.4521808723489396e-05, "loss": 0.5498, "step": 17747 }, { "epoch": 22.71744, "grad_norm": 1.171492099761963, "learning_rate": 1.4519807923169268e-05, "loss": 0.4874, "step": 17748 }, { "epoch": 22.71872, "grad_norm": 1.1219180822372437, "learning_rate": 1.451780712284914e-05, "loss": 0.4637, "step": 17749 }, { "epoch": 22.72, "grad_norm": 1.1720901727676392, "learning_rate": 1.4515806322529013e-05, "loss": 0.5322, "step": 17750 }, { "epoch": 22.72128, "grad_norm": 1.137251615524292, "learning_rate": 1.4513805522208884e-05, "loss": 0.5004, "step": 17751 }, { "epoch": 22.72256, "grad_norm": 1.147051215171814, "learning_rate": 1.4511804721888755e-05, "loss": 0.4769, "step": 17752 }, { "epoch": 22.72384, "grad_norm": 1.1876957416534424, "learning_rate": 1.4509803921568629e-05, "loss": 0.4828, "step": 17753 }, { "epoch": 22.72512, "grad_norm": 1.1537209749221802, "learning_rate": 1.4507803121248501e-05, "loss": 0.5377, "step": 17754 }, { "epoch": 22.7264, "grad_norm": 1.0867969989776611, "learning_rate": 1.4505802320928371e-05, "loss": 0.4288, "step": 17755 }, { "epoch": 22.72768, "grad_norm": 1.1193979978561401, "learning_rate": 1.4503801520608243e-05, "loss": 0.4479, "step": 17756 }, { "epoch": 22.72896, "grad_norm": 1.113378882408142, "learning_rate": 1.4501800720288116e-05, "loss": 0.4729, "step": 17757 }, { "epoch": 22.73024, "grad_norm": 1.0698660612106323, "learning_rate": 1.4499799919967988e-05, "loss": 0.454, "step": 17758 }, { "epoch": 22.73152, "grad_norm": 1.0980463027954102, "learning_rate": 1.4497799119647858e-05, "loss": 0.512, "step": 17759 }, { "epoch": 22.7328, "grad_norm": 1.0734245777130127, "learning_rate": 1.4495798319327734e-05, "loss": 0.4737, "step": 17760 }, { "epoch": 22.73408, "grad_norm": 1.1146806478500366, "learning_rate": 1.4493797519007604e-05, "loss": 0.4867, "step": 17761 }, { "epoch": 22.73536, "grad_norm": 1.1204739809036255, "learning_rate": 1.4491796718687476e-05, "loss": 0.4844, "step": 17762 }, { "epoch": 22.73664, "grad_norm": 1.0988640785217285, "learning_rate": 1.4489795918367346e-05, "loss": 0.4449, "step": 17763 }, { "epoch": 22.73792, "grad_norm": 1.1634745597839355, "learning_rate": 1.4487795118047221e-05, "loss": 0.5171, "step": 17764 }, { "epoch": 22.7392, "grad_norm": 1.177647352218628, "learning_rate": 1.4485794317727091e-05, "loss": 0.5184, "step": 17765 }, { "epoch": 22.74048, "grad_norm": 1.12690007686615, "learning_rate": 1.4483793517406963e-05, "loss": 0.4881, "step": 17766 }, { "epoch": 22.74176, "grad_norm": 1.1667804718017578, "learning_rate": 1.4481792717086837e-05, "loss": 0.4587, "step": 17767 }, { "epoch": 22.74304, "grad_norm": 1.1464014053344727, "learning_rate": 1.4479791916766709e-05, "loss": 0.4792, "step": 17768 }, { "epoch": 22.74432, "grad_norm": 1.0807876586914062, "learning_rate": 1.4477791116446579e-05, "loss": 0.4861, "step": 17769 }, { "epoch": 22.7456, "grad_norm": 1.1696687936782837, "learning_rate": 1.447579031612645e-05, "loss": 0.5222, "step": 17770 }, { "epoch": 22.74688, "grad_norm": 1.1905460357666016, "learning_rate": 1.4473789515806324e-05, "loss": 0.5166, "step": 17771 }, { "epoch": 22.74816, "grad_norm": 1.1877039670944214, "learning_rate": 1.4471788715486196e-05, "loss": 0.4591, "step": 17772 }, { "epoch": 22.74944, "grad_norm": 1.1958463191986084, "learning_rate": 1.4469787915166066e-05, "loss": 0.4868, "step": 17773 }, { "epoch": 22.75072, "grad_norm": 1.1075440645217896, "learning_rate": 1.446778711484594e-05, "loss": 0.4828, "step": 17774 }, { "epoch": 22.752, "grad_norm": 1.0892101526260376, "learning_rate": 1.4465786314525812e-05, "loss": 0.4635, "step": 17775 }, { "epoch": 22.75328, "grad_norm": 1.1559746265411377, "learning_rate": 1.4463785514205683e-05, "loss": 0.5233, "step": 17776 }, { "epoch": 22.75456, "grad_norm": 1.0793570280075073, "learning_rate": 1.4461784713885554e-05, "loss": 0.4376, "step": 17777 }, { "epoch": 22.75584, "grad_norm": 1.12493896484375, "learning_rate": 1.4459783913565427e-05, "loss": 0.5247, "step": 17778 }, { "epoch": 22.75712, "grad_norm": 1.1282217502593994, "learning_rate": 1.4457783113245299e-05, "loss": 0.4513, "step": 17779 }, { "epoch": 22.7584, "grad_norm": 1.1162893772125244, "learning_rate": 1.445578231292517e-05, "loss": 0.4873, "step": 17780 }, { "epoch": 22.75968, "grad_norm": 1.1070144176483154, "learning_rate": 1.4453781512605044e-05, "loss": 0.4985, "step": 17781 }, { "epoch": 22.76096, "grad_norm": 1.0941122770309448, "learning_rate": 1.4451780712284915e-05, "loss": 0.4902, "step": 17782 }, { "epoch": 22.76224, "grad_norm": 1.1269056797027588, "learning_rate": 1.4449779911964786e-05, "loss": 0.469, "step": 17783 }, { "epoch": 22.76352, "grad_norm": 1.0950514078140259, "learning_rate": 1.4447779111644658e-05, "loss": 0.4394, "step": 17784 }, { "epoch": 22.7648, "grad_norm": 1.1223610639572144, "learning_rate": 1.4445778311324532e-05, "loss": 0.4732, "step": 17785 }, { "epoch": 22.76608, "grad_norm": 1.0896393060684204, "learning_rate": 1.4443777511004402e-05, "loss": 0.4444, "step": 17786 }, { "epoch": 22.76736, "grad_norm": 1.1810468435287476, "learning_rate": 1.4441776710684274e-05, "loss": 0.5342, "step": 17787 }, { "epoch": 22.76864, "grad_norm": 1.1601206064224243, "learning_rate": 1.4439775910364146e-05, "loss": 0.5354, "step": 17788 }, { "epoch": 22.76992, "grad_norm": 1.1780500411987305, "learning_rate": 1.443777511004402e-05, "loss": 0.4826, "step": 17789 }, { "epoch": 22.7712, "grad_norm": 1.1130772829055786, "learning_rate": 1.443577430972389e-05, "loss": 0.4948, "step": 17790 }, { "epoch": 22.77248, "grad_norm": 1.1274909973144531, "learning_rate": 1.4433773509403761e-05, "loss": 0.4979, "step": 17791 }, { "epoch": 22.77376, "grad_norm": 1.1447163820266724, "learning_rate": 1.4431772709083635e-05, "loss": 0.4735, "step": 17792 }, { "epoch": 22.77504, "grad_norm": 1.0660210847854614, "learning_rate": 1.4429771908763507e-05, "loss": 0.4555, "step": 17793 }, { "epoch": 22.77632, "grad_norm": 1.0781277418136597, "learning_rate": 1.4427771108443377e-05, "loss": 0.4619, "step": 17794 }, { "epoch": 22.7776, "grad_norm": 1.1187148094177246, "learning_rate": 1.4425770308123249e-05, "loss": 0.4552, "step": 17795 }, { "epoch": 22.77888, "grad_norm": 1.1533479690551758, "learning_rate": 1.4423769507803122e-05, "loss": 0.4782, "step": 17796 }, { "epoch": 22.78016, "grad_norm": 1.143250584602356, "learning_rate": 1.4421768707482994e-05, "loss": 0.5023, "step": 17797 }, { "epoch": 22.78144, "grad_norm": 1.1441926956176758, "learning_rate": 1.4419767907162864e-05, "loss": 0.5012, "step": 17798 }, { "epoch": 22.78272, "grad_norm": 1.1615264415740967, "learning_rate": 1.441776710684274e-05, "loss": 0.5388, "step": 17799 }, { "epoch": 22.784, "grad_norm": 1.1589363813400269, "learning_rate": 1.441576630652261e-05, "loss": 0.505, "step": 17800 }, { "epoch": 22.78528, "grad_norm": 1.1797236204147339, "learning_rate": 1.4413765506202482e-05, "loss": 0.502, "step": 17801 }, { "epoch": 22.78656, "grad_norm": 1.2213630676269531, "learning_rate": 1.4411764705882352e-05, "loss": 0.5167, "step": 17802 }, { "epoch": 22.78784, "grad_norm": 1.155617117881775, "learning_rate": 1.4409763905562227e-05, "loss": 0.4613, "step": 17803 }, { "epoch": 22.78912, "grad_norm": 1.1112602949142456, "learning_rate": 1.4407763105242097e-05, "loss": 0.4594, "step": 17804 }, { "epoch": 22.790399999999998, "grad_norm": 1.1475427150726318, "learning_rate": 1.4405762304921969e-05, "loss": 0.457, "step": 17805 }, { "epoch": 22.79168, "grad_norm": 1.1044026613235474, "learning_rate": 1.4403761504601842e-05, "loss": 0.48, "step": 17806 }, { "epoch": 22.79296, "grad_norm": 1.119400978088379, "learning_rate": 1.4401760704281714e-05, "loss": 0.481, "step": 17807 }, { "epoch": 22.79424, "grad_norm": 1.1394966840744019, "learning_rate": 1.4399759903961585e-05, "loss": 0.5108, "step": 17808 }, { "epoch": 22.79552, "grad_norm": 1.170421838760376, "learning_rate": 1.4397759103641456e-05, "loss": 0.4999, "step": 17809 }, { "epoch": 22.7968, "grad_norm": 1.134114384651184, "learning_rate": 1.439575830332133e-05, "loss": 0.4723, "step": 17810 }, { "epoch": 22.79808, "grad_norm": 1.180664300918579, "learning_rate": 1.4393757503001202e-05, "loss": 0.4983, "step": 17811 }, { "epoch": 22.79936, "grad_norm": 1.2017977237701416, "learning_rate": 1.4391756702681072e-05, "loss": 0.5217, "step": 17812 }, { "epoch": 22.80064, "grad_norm": 1.0838621854782104, "learning_rate": 1.4389755902360945e-05, "loss": 0.4477, "step": 17813 }, { "epoch": 22.80192, "grad_norm": 1.1182233095169067, "learning_rate": 1.4387755102040817e-05, "loss": 0.4882, "step": 17814 }, { "epoch": 22.8032, "grad_norm": 1.1456414461135864, "learning_rate": 1.438575430172069e-05, "loss": 0.4561, "step": 17815 }, { "epoch": 22.80448, "grad_norm": 1.0864543914794922, "learning_rate": 1.438375350140056e-05, "loss": 0.4516, "step": 17816 }, { "epoch": 22.80576, "grad_norm": 1.145043134689331, "learning_rate": 1.4381752701080433e-05, "loss": 0.4764, "step": 17817 }, { "epoch": 22.80704, "grad_norm": 1.1899402141571045, "learning_rate": 1.4379751900760305e-05, "loss": 0.564, "step": 17818 }, { "epoch": 22.80832, "grad_norm": 1.0811036825180054, "learning_rate": 1.4377751100440177e-05, "loss": 0.453, "step": 17819 }, { "epoch": 22.8096, "grad_norm": 1.1460031270980835, "learning_rate": 1.437575030012005e-05, "loss": 0.4835, "step": 17820 }, { "epoch": 22.81088, "grad_norm": 1.1830692291259766, "learning_rate": 1.437374949979992e-05, "loss": 0.4641, "step": 17821 }, { "epoch": 22.81216, "grad_norm": 1.204338550567627, "learning_rate": 1.4371748699479792e-05, "loss": 0.4938, "step": 17822 }, { "epoch": 22.81344, "grad_norm": 1.107739806175232, "learning_rate": 1.4369747899159664e-05, "loss": 0.459, "step": 17823 }, { "epoch": 22.81472, "grad_norm": 1.113206386566162, "learning_rate": 1.4367747098839538e-05, "loss": 0.459, "step": 17824 }, { "epoch": 22.816, "grad_norm": 1.164965033531189, "learning_rate": 1.4365746298519408e-05, "loss": 0.533, "step": 17825 }, { "epoch": 22.81728, "grad_norm": 1.1349233388900757, "learning_rate": 1.436374549819928e-05, "loss": 0.4582, "step": 17826 }, { "epoch": 22.81856, "grad_norm": 1.1366132497787476, "learning_rate": 1.4361744697879153e-05, "loss": 0.518, "step": 17827 }, { "epoch": 22.81984, "grad_norm": 1.064651370048523, "learning_rate": 1.4359743897559025e-05, "loss": 0.4514, "step": 17828 }, { "epoch": 22.82112, "grad_norm": 1.0854195356369019, "learning_rate": 1.4357743097238895e-05, "loss": 0.4654, "step": 17829 }, { "epoch": 22.822400000000002, "grad_norm": 1.1058958768844604, "learning_rate": 1.4355742296918767e-05, "loss": 0.4712, "step": 17830 }, { "epoch": 22.82368, "grad_norm": 1.1246731281280518, "learning_rate": 1.435374149659864e-05, "loss": 0.4796, "step": 17831 }, { "epoch": 22.82496, "grad_norm": 1.123199224472046, "learning_rate": 1.4351740696278512e-05, "loss": 0.521, "step": 17832 }, { "epoch": 22.82624, "grad_norm": 1.1568586826324463, "learning_rate": 1.4349739895958383e-05, "loss": 0.5103, "step": 17833 }, { "epoch": 22.82752, "grad_norm": 1.0084097385406494, "learning_rate": 1.4347739095638258e-05, "loss": 0.4231, "step": 17834 }, { "epoch": 22.8288, "grad_norm": 1.0467265844345093, "learning_rate": 1.4345738295318128e-05, "loss": 0.4596, "step": 17835 }, { "epoch": 22.83008, "grad_norm": 1.1111983060836792, "learning_rate": 1.4343737494998e-05, "loss": 0.4774, "step": 17836 }, { "epoch": 22.83136, "grad_norm": 1.1236525774002075, "learning_rate": 1.434173669467787e-05, "loss": 0.4637, "step": 17837 }, { "epoch": 22.83264, "grad_norm": 1.144333839416504, "learning_rate": 1.4339735894357745e-05, "loss": 0.5001, "step": 17838 }, { "epoch": 22.83392, "grad_norm": 1.1298198699951172, "learning_rate": 1.4337735094037615e-05, "loss": 0.4897, "step": 17839 }, { "epoch": 22.8352, "grad_norm": 1.0742158889770508, "learning_rate": 1.4335734293717487e-05, "loss": 0.4371, "step": 17840 }, { "epoch": 22.83648, "grad_norm": 1.1449413299560547, "learning_rate": 1.4333733493397357e-05, "loss": 0.4818, "step": 17841 }, { "epoch": 22.83776, "grad_norm": 1.1488326787948608, "learning_rate": 1.4331732693077233e-05, "loss": 0.4926, "step": 17842 }, { "epoch": 22.83904, "grad_norm": 1.1773475408554077, "learning_rate": 1.4329731892757103e-05, "loss": 0.5046, "step": 17843 }, { "epoch": 22.84032, "grad_norm": 1.1405889987945557, "learning_rate": 1.4327731092436975e-05, "loss": 0.4876, "step": 17844 }, { "epoch": 22.8416, "grad_norm": 1.0573958158493042, "learning_rate": 1.4325730292116848e-05, "loss": 0.4591, "step": 17845 }, { "epoch": 22.84288, "grad_norm": 1.1423496007919312, "learning_rate": 1.432372949179672e-05, "loss": 0.5063, "step": 17846 }, { "epoch": 22.84416, "grad_norm": 1.1753638982772827, "learning_rate": 1.432172869147659e-05, "loss": 0.5118, "step": 17847 }, { "epoch": 22.84544, "grad_norm": 1.114268183708191, "learning_rate": 1.4319727891156462e-05, "loss": 0.4528, "step": 17848 }, { "epoch": 22.84672, "grad_norm": 1.1355352401733398, "learning_rate": 1.4317727090836336e-05, "loss": 0.4828, "step": 17849 }, { "epoch": 22.848, "grad_norm": 1.199616551399231, "learning_rate": 1.4315726290516208e-05, "loss": 0.4811, "step": 17850 }, { "epoch": 22.84928, "grad_norm": 1.173221468925476, "learning_rate": 1.4313725490196078e-05, "loss": 0.5268, "step": 17851 }, { "epoch": 22.85056, "grad_norm": 1.1589332818984985, "learning_rate": 1.4311724689875953e-05, "loss": 0.4963, "step": 17852 }, { "epoch": 22.85184, "grad_norm": 1.137489676475525, "learning_rate": 1.4309723889555823e-05, "loss": 0.4951, "step": 17853 }, { "epoch": 22.85312, "grad_norm": 1.1129984855651855, "learning_rate": 1.4307723089235695e-05, "loss": 0.4814, "step": 17854 }, { "epoch": 22.8544, "grad_norm": 1.0853337049484253, "learning_rate": 1.4305722288915565e-05, "loss": 0.4432, "step": 17855 }, { "epoch": 22.85568, "grad_norm": 1.1107585430145264, "learning_rate": 1.430372148859544e-05, "loss": 0.5175, "step": 17856 }, { "epoch": 22.85696, "grad_norm": 1.143998146057129, "learning_rate": 1.430172068827531e-05, "loss": 0.4882, "step": 17857 }, { "epoch": 22.85824, "grad_norm": 1.164641261100769, "learning_rate": 1.4299719887955182e-05, "loss": 0.53, "step": 17858 }, { "epoch": 22.85952, "grad_norm": 1.1436692476272583, "learning_rate": 1.4297719087635056e-05, "loss": 0.4866, "step": 17859 }, { "epoch": 22.8608, "grad_norm": 1.1730378866195679, "learning_rate": 1.4295718287314928e-05, "loss": 0.5107, "step": 17860 }, { "epoch": 22.86208, "grad_norm": 1.122793436050415, "learning_rate": 1.4293717486994798e-05, "loss": 0.5111, "step": 17861 }, { "epoch": 22.86336, "grad_norm": 1.0745701789855957, "learning_rate": 1.429171668667467e-05, "loss": 0.4485, "step": 17862 }, { "epoch": 22.86464, "grad_norm": 1.1484006643295288, "learning_rate": 1.4289715886354543e-05, "loss": 0.5085, "step": 17863 }, { "epoch": 22.86592, "grad_norm": 1.1889216899871826, "learning_rate": 1.4287715086034415e-05, "loss": 0.5249, "step": 17864 }, { "epoch": 22.8672, "grad_norm": 1.1078394651412964, "learning_rate": 1.4285714285714285e-05, "loss": 0.4893, "step": 17865 }, { "epoch": 22.86848, "grad_norm": 1.0795047283172607, "learning_rate": 1.4283713485394159e-05, "loss": 0.4388, "step": 17866 }, { "epoch": 22.86976, "grad_norm": 1.0773197412490845, "learning_rate": 1.428171268507403e-05, "loss": 0.4857, "step": 17867 }, { "epoch": 22.87104, "grad_norm": 1.1797206401824951, "learning_rate": 1.4279711884753903e-05, "loss": 0.5151, "step": 17868 }, { "epoch": 22.87232, "grad_norm": 1.1339863538742065, "learning_rate": 1.4277711084433773e-05, "loss": 0.457, "step": 17869 }, { "epoch": 22.8736, "grad_norm": 1.1581915616989136, "learning_rate": 1.4275710284113646e-05, "loss": 0.5057, "step": 17870 }, { "epoch": 22.87488, "grad_norm": 1.1262785196304321, "learning_rate": 1.4273709483793518e-05, "loss": 0.5133, "step": 17871 }, { "epoch": 22.87616, "grad_norm": 1.1205724477767944, "learning_rate": 1.427170868347339e-05, "loss": 0.5048, "step": 17872 }, { "epoch": 22.87744, "grad_norm": 1.0860368013381958, "learning_rate": 1.4269707883153264e-05, "loss": 0.4702, "step": 17873 }, { "epoch": 22.87872, "grad_norm": 1.1230723857879639, "learning_rate": 1.4267707082833134e-05, "loss": 0.4754, "step": 17874 }, { "epoch": 22.88, "grad_norm": 1.1550164222717285, "learning_rate": 1.4265706282513006e-05, "loss": 0.5318, "step": 17875 }, { "epoch": 22.88128, "grad_norm": 1.0607014894485474, "learning_rate": 1.4263705482192878e-05, "loss": 0.4266, "step": 17876 }, { "epoch": 22.88256, "grad_norm": 1.1427565813064575, "learning_rate": 1.4261704681872751e-05, "loss": 0.5016, "step": 17877 }, { "epoch": 22.88384, "grad_norm": 1.082594394683838, "learning_rate": 1.4259703881552621e-05, "loss": 0.4745, "step": 17878 }, { "epoch": 22.88512, "grad_norm": 1.1618205308914185, "learning_rate": 1.4257703081232493e-05, "loss": 0.5358, "step": 17879 }, { "epoch": 22.8864, "grad_norm": 1.0929317474365234, "learning_rate": 1.4255702280912367e-05, "loss": 0.4662, "step": 17880 }, { "epoch": 22.88768, "grad_norm": 1.1140695810317993, "learning_rate": 1.4253701480592239e-05, "loss": 0.4729, "step": 17881 }, { "epoch": 22.88896, "grad_norm": 1.0910569429397583, "learning_rate": 1.4251700680272109e-05, "loss": 0.4473, "step": 17882 }, { "epoch": 22.89024, "grad_norm": 1.193246841430664, "learning_rate": 1.424969987995198e-05, "loss": 0.541, "step": 17883 }, { "epoch": 22.89152, "grad_norm": 1.0496735572814941, "learning_rate": 1.4247699079631854e-05, "loss": 0.494, "step": 17884 }, { "epoch": 22.8928, "grad_norm": 1.1013808250427246, "learning_rate": 1.4245698279311726e-05, "loss": 0.4842, "step": 17885 }, { "epoch": 22.89408, "grad_norm": 1.1200693845748901, "learning_rate": 1.4243697478991596e-05, "loss": 0.5158, "step": 17886 }, { "epoch": 22.89536, "grad_norm": 1.1038991212844849, "learning_rate": 1.4241696678671471e-05, "loss": 0.4855, "step": 17887 }, { "epoch": 22.89664, "grad_norm": 1.119197964668274, "learning_rate": 1.4239695878351342e-05, "loss": 0.5312, "step": 17888 }, { "epoch": 22.89792, "grad_norm": 1.0970196723937988, "learning_rate": 1.4237695078031213e-05, "loss": 0.481, "step": 17889 }, { "epoch": 22.8992, "grad_norm": 1.1380757093429565, "learning_rate": 1.4235694277711084e-05, "loss": 0.5077, "step": 17890 }, { "epoch": 22.90048, "grad_norm": 1.0854060649871826, "learning_rate": 1.4233693477390959e-05, "loss": 0.4607, "step": 17891 }, { "epoch": 22.90176, "grad_norm": 1.0660706758499146, "learning_rate": 1.4231692677070829e-05, "loss": 0.4662, "step": 17892 }, { "epoch": 22.90304, "grad_norm": 1.1607842445373535, "learning_rate": 1.42296918767507e-05, "loss": 0.5338, "step": 17893 }, { "epoch": 22.90432, "grad_norm": 1.1080985069274902, "learning_rate": 1.4227691076430571e-05, "loss": 0.4802, "step": 17894 }, { "epoch": 22.9056, "grad_norm": 1.1644606590270996, "learning_rate": 1.4225690276110446e-05, "loss": 0.5258, "step": 17895 }, { "epoch": 22.90688, "grad_norm": 1.1852686405181885, "learning_rate": 1.4223689475790316e-05, "loss": 0.4759, "step": 17896 }, { "epoch": 22.90816, "grad_norm": 1.1916351318359375, "learning_rate": 1.4221688675470188e-05, "loss": 0.4743, "step": 17897 }, { "epoch": 22.90944, "grad_norm": 1.1184988021850586, "learning_rate": 1.4219687875150062e-05, "loss": 0.5025, "step": 17898 }, { "epoch": 22.91072, "grad_norm": 1.147409439086914, "learning_rate": 1.4217687074829934e-05, "loss": 0.526, "step": 17899 }, { "epoch": 22.912, "grad_norm": 1.1477159261703491, "learning_rate": 1.4215686274509804e-05, "loss": 0.4882, "step": 17900 }, { "epoch": 22.91328, "grad_norm": 1.0916913747787476, "learning_rate": 1.4213685474189676e-05, "loss": 0.508, "step": 17901 }, { "epoch": 22.91456, "grad_norm": 1.0843031406402588, "learning_rate": 1.421168467386955e-05, "loss": 0.5053, "step": 17902 }, { "epoch": 22.91584, "grad_norm": 1.1080353260040283, "learning_rate": 1.4209683873549421e-05, "loss": 0.5002, "step": 17903 }, { "epoch": 22.91712, "grad_norm": 1.1214649677276611, "learning_rate": 1.4207683073229291e-05, "loss": 0.5296, "step": 17904 }, { "epoch": 22.9184, "grad_norm": 1.0759986639022827, "learning_rate": 1.4205682272909165e-05, "loss": 0.4737, "step": 17905 }, { "epoch": 22.91968, "grad_norm": 1.1020501852035522, "learning_rate": 1.4203681472589037e-05, "loss": 0.4915, "step": 17906 }, { "epoch": 22.92096, "grad_norm": 1.1096168756484985, "learning_rate": 1.4201680672268908e-05, "loss": 0.4827, "step": 17907 }, { "epoch": 22.92224, "grad_norm": 1.0973726511001587, "learning_rate": 1.4199679871948779e-05, "loss": 0.4771, "step": 17908 }, { "epoch": 22.92352, "grad_norm": 1.143187403678894, "learning_rate": 1.4197679071628652e-05, "loss": 0.5155, "step": 17909 }, { "epoch": 22.9248, "grad_norm": 1.1421252489089966, "learning_rate": 1.4195678271308524e-05, "loss": 0.4643, "step": 17910 }, { "epoch": 22.92608, "grad_norm": 1.1452699899673462, "learning_rate": 1.4193677470988396e-05, "loss": 0.5185, "step": 17911 }, { "epoch": 22.92736, "grad_norm": 1.0854843854904175, "learning_rate": 1.419167667066827e-05, "loss": 0.4865, "step": 17912 }, { "epoch": 22.92864, "grad_norm": 1.117672324180603, "learning_rate": 1.418967587034814e-05, "loss": 0.4689, "step": 17913 }, { "epoch": 22.92992, "grad_norm": 1.135145902633667, "learning_rate": 1.4187675070028011e-05, "loss": 0.4971, "step": 17914 }, { "epoch": 22.9312, "grad_norm": 1.0750057697296143, "learning_rate": 1.4185674269707883e-05, "loss": 0.4357, "step": 17915 }, { "epoch": 22.932479999999998, "grad_norm": 1.1267346143722534, "learning_rate": 1.4183673469387757e-05, "loss": 0.4957, "step": 17916 }, { "epoch": 22.93376, "grad_norm": 1.1975816488265991, "learning_rate": 1.4181672669067627e-05, "loss": 0.5446, "step": 17917 }, { "epoch": 22.93504, "grad_norm": 1.12879478931427, "learning_rate": 1.4179671868747499e-05, "loss": 0.4855, "step": 17918 }, { "epoch": 22.93632, "grad_norm": 1.1235387325286865, "learning_rate": 1.4177671068427372e-05, "loss": 0.5129, "step": 17919 }, { "epoch": 22.9376, "grad_norm": 1.1320890188217163, "learning_rate": 1.4175670268107244e-05, "loss": 0.4815, "step": 17920 }, { "epoch": 22.93888, "grad_norm": 1.1158738136291504, "learning_rate": 1.4173669467787114e-05, "loss": 0.4736, "step": 17921 }, { "epoch": 22.94016, "grad_norm": 1.0395013093948364, "learning_rate": 1.4171668667466986e-05, "loss": 0.4211, "step": 17922 }, { "epoch": 22.94144, "grad_norm": 1.0959734916687012, "learning_rate": 1.416966786714686e-05, "loss": 0.4874, "step": 17923 }, { "epoch": 22.94272, "grad_norm": 1.0824655294418335, "learning_rate": 1.4167667066826732e-05, "loss": 0.494, "step": 17924 }, { "epoch": 22.944, "grad_norm": 1.1469032764434814, "learning_rate": 1.4165666266506602e-05, "loss": 0.4917, "step": 17925 }, { "epoch": 22.94528, "grad_norm": 1.2326279878616333, "learning_rate": 1.4163665466186477e-05, "loss": 0.5198, "step": 17926 }, { "epoch": 22.94656, "grad_norm": 1.1711357831954956, "learning_rate": 1.4161664665866347e-05, "loss": 0.5073, "step": 17927 }, { "epoch": 22.94784, "grad_norm": 1.1785284280776978, "learning_rate": 1.415966386554622e-05, "loss": 0.4986, "step": 17928 }, { "epoch": 22.94912, "grad_norm": 1.1128637790679932, "learning_rate": 1.415766306522609e-05, "loss": 0.4626, "step": 17929 }, { "epoch": 22.9504, "grad_norm": 1.1575325727462769, "learning_rate": 1.4155662264905965e-05, "loss": 0.5185, "step": 17930 }, { "epoch": 22.95168, "grad_norm": 1.11526620388031, "learning_rate": 1.4153661464585835e-05, "loss": 0.47, "step": 17931 }, { "epoch": 22.95296, "grad_norm": 1.1603832244873047, "learning_rate": 1.4151660664265707e-05, "loss": 0.479, "step": 17932 }, { "epoch": 22.95424, "grad_norm": 1.1286890506744385, "learning_rate": 1.414965986394558e-05, "loss": 0.4544, "step": 17933 }, { "epoch": 22.95552, "grad_norm": 1.0917226076126099, "learning_rate": 1.4147659063625452e-05, "loss": 0.4819, "step": 17934 }, { "epoch": 22.9568, "grad_norm": 1.1569414138793945, "learning_rate": 1.4145658263305322e-05, "loss": 0.5174, "step": 17935 }, { "epoch": 22.95808, "grad_norm": 1.1322275400161743, "learning_rate": 1.4143657462985194e-05, "loss": 0.5149, "step": 17936 }, { "epoch": 22.95936, "grad_norm": 1.1222246885299683, "learning_rate": 1.4141656662665068e-05, "loss": 0.5159, "step": 17937 }, { "epoch": 22.96064, "grad_norm": 1.0865634679794312, "learning_rate": 1.413965586234494e-05, "loss": 0.4598, "step": 17938 }, { "epoch": 22.96192, "grad_norm": 1.2040810585021973, "learning_rate": 1.413765506202481e-05, "loss": 0.5407, "step": 17939 }, { "epoch": 22.9632, "grad_norm": 1.1161025762557983, "learning_rate": 1.4135654261704683e-05, "loss": 0.52, "step": 17940 }, { "epoch": 22.964480000000002, "grad_norm": 1.1312873363494873, "learning_rate": 1.4133653461384555e-05, "loss": 0.5067, "step": 17941 }, { "epoch": 22.96576, "grad_norm": 1.0572233200073242, "learning_rate": 1.4131652661064427e-05, "loss": 0.4639, "step": 17942 }, { "epoch": 22.96704, "grad_norm": 1.0853049755096436, "learning_rate": 1.4129651860744297e-05, "loss": 0.4694, "step": 17943 }, { "epoch": 22.96832, "grad_norm": 1.133817434310913, "learning_rate": 1.412765106042417e-05, "loss": 0.5031, "step": 17944 }, { "epoch": 22.9696, "grad_norm": 1.1215367317199707, "learning_rate": 1.4125650260104042e-05, "loss": 0.4852, "step": 17945 }, { "epoch": 22.97088, "grad_norm": 1.1265122890472412, "learning_rate": 1.4123649459783914e-05, "loss": 0.4838, "step": 17946 }, { "epoch": 22.97216, "grad_norm": 1.1352272033691406, "learning_rate": 1.4121648659463788e-05, "loss": 0.4701, "step": 17947 }, { "epoch": 22.97344, "grad_norm": 1.0938243865966797, "learning_rate": 1.4119647859143658e-05, "loss": 0.4614, "step": 17948 }, { "epoch": 22.97472, "grad_norm": 1.147600769996643, "learning_rate": 1.411764705882353e-05, "loss": 0.4799, "step": 17949 }, { "epoch": 22.976, "grad_norm": 1.1563359498977661, "learning_rate": 1.4115646258503402e-05, "loss": 0.4585, "step": 17950 }, { "epoch": 22.97728, "grad_norm": 1.1627904176712036, "learning_rate": 1.4113645458183275e-05, "loss": 0.56, "step": 17951 }, { "epoch": 22.97856, "grad_norm": 1.1048862934112549, "learning_rate": 1.4111644657863145e-05, "loss": 0.5129, "step": 17952 }, { "epoch": 22.97984, "grad_norm": 1.1043338775634766, "learning_rate": 1.4109643857543017e-05, "loss": 0.4792, "step": 17953 }, { "epoch": 22.98112, "grad_norm": 1.151166319847107, "learning_rate": 1.4107643057222889e-05, "loss": 0.5038, "step": 17954 }, { "epoch": 22.9824, "grad_norm": 1.1335054636001587, "learning_rate": 1.4105642256902763e-05, "loss": 0.5076, "step": 17955 }, { "epoch": 22.98368, "grad_norm": 1.0865764617919922, "learning_rate": 1.4103641456582633e-05, "loss": 0.4719, "step": 17956 }, { "epoch": 22.98496, "grad_norm": 1.1240063905715942, "learning_rate": 1.4101640656262505e-05, "loss": 0.5044, "step": 17957 }, { "epoch": 22.98624, "grad_norm": 1.142352819442749, "learning_rate": 1.4099639855942378e-05, "loss": 0.5157, "step": 17958 }, { "epoch": 22.98752, "grad_norm": 1.0918866395950317, "learning_rate": 1.409763905562225e-05, "loss": 0.4706, "step": 17959 }, { "epoch": 22.9888, "grad_norm": 1.0810490846633911, "learning_rate": 1.409563825530212e-05, "loss": 0.4711, "step": 17960 }, { "epoch": 22.99008, "grad_norm": 1.1601754426956177, "learning_rate": 1.4093637454981992e-05, "loss": 0.5184, "step": 17961 }, { "epoch": 22.99136, "grad_norm": 1.089206576347351, "learning_rate": 1.4091636654661866e-05, "loss": 0.5103, "step": 17962 }, { "epoch": 22.99264, "grad_norm": 1.1088589429855347, "learning_rate": 1.4089635854341738e-05, "loss": 0.4609, "step": 17963 }, { "epoch": 22.99392, "grad_norm": 1.1909695863723755, "learning_rate": 1.4087635054021608e-05, "loss": 0.5124, "step": 17964 }, { "epoch": 22.9952, "grad_norm": 1.0958809852600098, "learning_rate": 1.4085634253701483e-05, "loss": 0.4415, "step": 17965 }, { "epoch": 22.99648, "grad_norm": 1.120891809463501, "learning_rate": 1.4083633453381353e-05, "loss": 0.4856, "step": 17966 }, { "epoch": 22.99776, "grad_norm": 1.111769437789917, "learning_rate": 1.4081632653061225e-05, "loss": 0.5141, "step": 17967 }, { "epoch": 22.99904, "grad_norm": 1.1565723419189453, "learning_rate": 1.4079631852741095e-05, "loss": 0.5062, "step": 17968 }, { "epoch": 23.00032, "grad_norm": 2.3065271377563477, "learning_rate": 1.407763105242097e-05, "loss": 0.7523, "step": 17969 }, { "epoch": 23.0016, "grad_norm": 1.0669747591018677, "learning_rate": 1.407563025210084e-05, "loss": 0.4847, "step": 17970 }, { "epoch": 23.00288, "grad_norm": 1.0902814865112305, "learning_rate": 1.4073629451780712e-05, "loss": 0.4821, "step": 17971 }, { "epoch": 23.00416, "grad_norm": 1.1460057497024536, "learning_rate": 1.4071628651460586e-05, "loss": 0.4837, "step": 17972 }, { "epoch": 23.00544, "grad_norm": 1.0994794368743896, "learning_rate": 1.4069627851140458e-05, "loss": 0.4203, "step": 17973 }, { "epoch": 23.00672, "grad_norm": 1.093055009841919, "learning_rate": 1.4067627050820328e-05, "loss": 0.4731, "step": 17974 }, { "epoch": 23.008, "grad_norm": 1.139220952987671, "learning_rate": 1.40656262505002e-05, "loss": 0.4932, "step": 17975 }, { "epoch": 23.00928, "grad_norm": 1.0311874151229858, "learning_rate": 1.4063625450180073e-05, "loss": 0.4708, "step": 17976 }, { "epoch": 23.01056, "grad_norm": 1.0706690549850464, "learning_rate": 1.4061624649859945e-05, "loss": 0.4173, "step": 17977 }, { "epoch": 23.01184, "grad_norm": 1.192206621170044, "learning_rate": 1.4059623849539815e-05, "loss": 0.4742, "step": 17978 }, { "epoch": 23.01312, "grad_norm": 1.13650381565094, "learning_rate": 1.4057623049219689e-05, "loss": 0.4875, "step": 17979 }, { "epoch": 23.0144, "grad_norm": 1.1168575286865234, "learning_rate": 1.405562224889956e-05, "loss": 0.4488, "step": 17980 }, { "epoch": 23.01568, "grad_norm": 1.1627042293548584, "learning_rate": 1.4053621448579433e-05, "loss": 0.4947, "step": 17981 }, { "epoch": 23.01696, "grad_norm": 1.2101789712905884, "learning_rate": 1.4051620648259303e-05, "loss": 0.5105, "step": 17982 }, { "epoch": 23.01824, "grad_norm": 1.080032467842102, "learning_rate": 1.4049619847939176e-05, "loss": 0.4658, "step": 17983 }, { "epoch": 23.01952, "grad_norm": 1.169611930847168, "learning_rate": 1.4047619047619048e-05, "loss": 0.4832, "step": 17984 }, { "epoch": 23.0208, "grad_norm": 1.147308588027954, "learning_rate": 1.404561824729892e-05, "loss": 0.4709, "step": 17985 }, { "epoch": 23.02208, "grad_norm": 1.1938717365264893, "learning_rate": 1.4043617446978794e-05, "loss": 0.5101, "step": 17986 }, { "epoch": 23.02336, "grad_norm": 1.174412488937378, "learning_rate": 1.4041616646658664e-05, "loss": 0.4884, "step": 17987 }, { "epoch": 23.02464, "grad_norm": 1.1288552284240723, "learning_rate": 1.4039615846338536e-05, "loss": 0.4617, "step": 17988 }, { "epoch": 23.02592, "grad_norm": 1.100049614906311, "learning_rate": 1.4037615046018408e-05, "loss": 0.4318, "step": 17989 }, { "epoch": 23.0272, "grad_norm": 1.1527884006500244, "learning_rate": 1.4035614245698281e-05, "loss": 0.4885, "step": 17990 }, { "epoch": 23.02848, "grad_norm": 1.0793335437774658, "learning_rate": 1.4033613445378151e-05, "loss": 0.4429, "step": 17991 }, { "epoch": 23.02976, "grad_norm": 1.1420992612838745, "learning_rate": 1.4031612645058023e-05, "loss": 0.4655, "step": 17992 }, { "epoch": 23.03104, "grad_norm": 1.115891695022583, "learning_rate": 1.4029611844737897e-05, "loss": 0.5128, "step": 17993 }, { "epoch": 23.03232, "grad_norm": 1.1061939001083374, "learning_rate": 1.4027611044417769e-05, "loss": 0.4666, "step": 17994 }, { "epoch": 23.0336, "grad_norm": 1.1033188104629517, "learning_rate": 1.4025610244097639e-05, "loss": 0.4384, "step": 17995 }, { "epoch": 23.03488, "grad_norm": 1.1487460136413574, "learning_rate": 1.402360944377751e-05, "loss": 0.4253, "step": 17996 }, { "epoch": 23.03616, "grad_norm": 1.210303544998169, "learning_rate": 1.4021608643457384e-05, "loss": 0.4954, "step": 17997 }, { "epoch": 23.03744, "grad_norm": 1.119410753250122, "learning_rate": 1.4019607843137256e-05, "loss": 0.4598, "step": 17998 }, { "epoch": 23.03872, "grad_norm": 1.1100172996520996, "learning_rate": 1.4017607042817126e-05, "loss": 0.4887, "step": 17999 }, { "epoch": 23.04, "grad_norm": 1.1196397542953491, "learning_rate": 1.4015606242497001e-05, "loss": 0.4729, "step": 18000 }, { "epoch": 23.04128, "grad_norm": 1.1445229053497314, "learning_rate": 1.4013605442176872e-05, "loss": 0.4818, "step": 18001 }, { "epoch": 23.04256, "grad_norm": 1.0524245500564575, "learning_rate": 1.4011604641856743e-05, "loss": 0.45, "step": 18002 }, { "epoch": 23.04384, "grad_norm": 1.1181271076202393, "learning_rate": 1.4009603841536614e-05, "loss": 0.4498, "step": 18003 }, { "epoch": 23.04512, "grad_norm": 1.0702544450759888, "learning_rate": 1.4007603041216489e-05, "loss": 0.4782, "step": 18004 }, { "epoch": 23.0464, "grad_norm": 1.1783769130706787, "learning_rate": 1.4005602240896359e-05, "loss": 0.5008, "step": 18005 }, { "epoch": 23.04768, "grad_norm": 1.1337677240371704, "learning_rate": 1.400360144057623e-05, "loss": 0.465, "step": 18006 }, { "epoch": 23.04896, "grad_norm": 1.0862562656402588, "learning_rate": 1.4001600640256101e-05, "loss": 0.4675, "step": 18007 }, { "epoch": 23.05024, "grad_norm": 1.122215747833252, "learning_rate": 1.3999599839935976e-05, "loss": 0.4764, "step": 18008 }, { "epoch": 23.05152, "grad_norm": 1.1443709135055542, "learning_rate": 1.3997599039615846e-05, "loss": 0.496, "step": 18009 }, { "epoch": 23.0528, "grad_norm": 1.1884753704071045, "learning_rate": 1.3995598239295718e-05, "loss": 0.5125, "step": 18010 }, { "epoch": 23.05408, "grad_norm": 1.1133694648742676, "learning_rate": 1.3993597438975592e-05, "loss": 0.4876, "step": 18011 }, { "epoch": 23.05536, "grad_norm": 1.0614551305770874, "learning_rate": 1.3991596638655464e-05, "loss": 0.4339, "step": 18012 }, { "epoch": 23.05664, "grad_norm": 1.1484719514846802, "learning_rate": 1.3989595838335334e-05, "loss": 0.4655, "step": 18013 }, { "epoch": 23.05792, "grad_norm": 1.1289807558059692, "learning_rate": 1.3987595038015206e-05, "loss": 0.4593, "step": 18014 }, { "epoch": 23.0592, "grad_norm": 1.1590949296951294, "learning_rate": 1.398559423769508e-05, "loss": 0.4633, "step": 18015 }, { "epoch": 23.06048, "grad_norm": 1.1496042013168335, "learning_rate": 1.3983593437374951e-05, "loss": 0.4818, "step": 18016 }, { "epoch": 23.06176, "grad_norm": 1.1318520307540894, "learning_rate": 1.3981592637054821e-05, "loss": 0.4942, "step": 18017 }, { "epoch": 23.06304, "grad_norm": 1.1314492225646973, "learning_rate": 1.3979591836734696e-05, "loss": 0.4815, "step": 18018 }, { "epoch": 23.06432, "grad_norm": 1.1428439617156982, "learning_rate": 1.3977591036414567e-05, "loss": 0.4794, "step": 18019 }, { "epoch": 23.0656, "grad_norm": 1.1494139432907104, "learning_rate": 1.3975590236094438e-05, "loss": 0.4554, "step": 18020 }, { "epoch": 23.06688, "grad_norm": 1.0700560808181763, "learning_rate": 1.3973589435774309e-05, "loss": 0.4345, "step": 18021 }, { "epoch": 23.06816, "grad_norm": 1.1106326580047607, "learning_rate": 1.3971588635454184e-05, "loss": 0.4747, "step": 18022 }, { "epoch": 23.06944, "grad_norm": 1.1512048244476318, "learning_rate": 1.3969587835134054e-05, "loss": 0.478, "step": 18023 }, { "epoch": 23.07072, "grad_norm": 1.0818241834640503, "learning_rate": 1.3967587034813926e-05, "loss": 0.4671, "step": 18024 }, { "epoch": 23.072, "grad_norm": 1.1178078651428223, "learning_rate": 1.39655862344938e-05, "loss": 0.4329, "step": 18025 }, { "epoch": 23.07328, "grad_norm": 1.1609102487564087, "learning_rate": 1.3963585434173671e-05, "loss": 0.4793, "step": 18026 }, { "epoch": 23.07456, "grad_norm": 1.1655328273773193, "learning_rate": 1.3961584633853541e-05, "loss": 0.4715, "step": 18027 }, { "epoch": 23.07584, "grad_norm": 1.150871753692627, "learning_rate": 1.3959583833533413e-05, "loss": 0.4812, "step": 18028 }, { "epoch": 23.07712, "grad_norm": 1.2036709785461426, "learning_rate": 1.3957583033213287e-05, "loss": 0.5125, "step": 18029 }, { "epoch": 23.0784, "grad_norm": 1.169793725013733, "learning_rate": 1.3955582232893159e-05, "loss": 0.4995, "step": 18030 }, { "epoch": 23.07968, "grad_norm": 1.1694694757461548, "learning_rate": 1.3953581432573029e-05, "loss": 0.5015, "step": 18031 }, { "epoch": 23.08096, "grad_norm": 1.1842191219329834, "learning_rate": 1.3951580632252902e-05, "loss": 0.5023, "step": 18032 }, { "epoch": 23.08224, "grad_norm": 1.1319433450698853, "learning_rate": 1.3949579831932774e-05, "loss": 0.4759, "step": 18033 }, { "epoch": 23.08352, "grad_norm": 1.0593634843826294, "learning_rate": 1.3947579031612646e-05, "loss": 0.4504, "step": 18034 }, { "epoch": 23.0848, "grad_norm": 1.1120679378509521, "learning_rate": 1.3945578231292516e-05, "loss": 0.5023, "step": 18035 }, { "epoch": 23.08608, "grad_norm": 1.1178866624832153, "learning_rate": 1.394357743097239e-05, "loss": 0.4568, "step": 18036 }, { "epoch": 23.08736, "grad_norm": 1.2306301593780518, "learning_rate": 1.3941576630652262e-05, "loss": 0.5722, "step": 18037 }, { "epoch": 23.08864, "grad_norm": 1.0901416540145874, "learning_rate": 1.3939575830332134e-05, "loss": 0.4703, "step": 18038 }, { "epoch": 23.08992, "grad_norm": 1.0788695812225342, "learning_rate": 1.3937575030012007e-05, "loss": 0.4225, "step": 18039 }, { "epoch": 23.0912, "grad_norm": 1.1180025339126587, "learning_rate": 1.3935574229691877e-05, "loss": 0.4538, "step": 18040 }, { "epoch": 23.09248, "grad_norm": 1.1626075506210327, "learning_rate": 1.393357342937175e-05, "loss": 0.4787, "step": 18041 }, { "epoch": 23.09376, "grad_norm": 1.1562151908874512, "learning_rate": 1.3931572629051621e-05, "loss": 0.4508, "step": 18042 }, { "epoch": 23.09504, "grad_norm": 1.0841056108474731, "learning_rate": 1.3929571828731495e-05, "loss": 0.4401, "step": 18043 }, { "epoch": 23.09632, "grad_norm": 1.1172791719436646, "learning_rate": 1.3927571028411365e-05, "loss": 0.4876, "step": 18044 }, { "epoch": 23.0976, "grad_norm": 1.1132711172103882, "learning_rate": 1.3925570228091237e-05, "loss": 0.483, "step": 18045 }, { "epoch": 23.09888, "grad_norm": 1.1011528968811035, "learning_rate": 1.392356942777111e-05, "loss": 0.4601, "step": 18046 }, { "epoch": 23.10016, "grad_norm": 1.2015907764434814, "learning_rate": 1.3921568627450982e-05, "loss": 0.4793, "step": 18047 }, { "epoch": 23.10144, "grad_norm": 1.1870005130767822, "learning_rate": 1.3919567827130852e-05, "loss": 0.4781, "step": 18048 }, { "epoch": 23.10272, "grad_norm": 1.1651169061660767, "learning_rate": 1.3917567026810724e-05, "loss": 0.4782, "step": 18049 }, { "epoch": 23.104, "grad_norm": 1.1237787008285522, "learning_rate": 1.3915566226490598e-05, "loss": 0.4722, "step": 18050 }, { "epoch": 23.10528, "grad_norm": 1.1419413089752197, "learning_rate": 1.391356542617047e-05, "loss": 0.4772, "step": 18051 }, { "epoch": 23.10656, "grad_norm": 1.1415618658065796, "learning_rate": 1.391156462585034e-05, "loss": 0.4407, "step": 18052 }, { "epoch": 23.10784, "grad_norm": 1.1670557260513306, "learning_rate": 1.3909563825530215e-05, "loss": 0.5378, "step": 18053 }, { "epoch": 23.10912, "grad_norm": 1.1420172452926636, "learning_rate": 1.3907563025210085e-05, "loss": 0.4717, "step": 18054 }, { "epoch": 23.1104, "grad_norm": 1.1076371669769287, "learning_rate": 1.3905562224889957e-05, "loss": 0.4571, "step": 18055 }, { "epoch": 23.11168, "grad_norm": 1.1110548973083496, "learning_rate": 1.3903561424569827e-05, "loss": 0.4484, "step": 18056 }, { "epoch": 23.11296, "grad_norm": 1.1428343057632446, "learning_rate": 1.3901560624249702e-05, "loss": 0.5226, "step": 18057 }, { "epoch": 23.11424, "grad_norm": 1.1347376108169556, "learning_rate": 1.3899559823929572e-05, "loss": 0.4633, "step": 18058 }, { "epoch": 23.11552, "grad_norm": 1.1556316614151, "learning_rate": 1.3897559023609444e-05, "loss": 0.4751, "step": 18059 }, { "epoch": 23.1168, "grad_norm": 1.1056498289108276, "learning_rate": 1.3895558223289318e-05, "loss": 0.4446, "step": 18060 }, { "epoch": 23.11808, "grad_norm": 1.194538950920105, "learning_rate": 1.389355742296919e-05, "loss": 0.5121, "step": 18061 }, { "epoch": 23.11936, "grad_norm": 1.0786786079406738, "learning_rate": 1.389155662264906e-05, "loss": 0.482, "step": 18062 }, { "epoch": 23.12064, "grad_norm": 1.1537976264953613, "learning_rate": 1.3889555822328932e-05, "loss": 0.5048, "step": 18063 }, { "epoch": 23.12192, "grad_norm": 1.1101106405258179, "learning_rate": 1.3887555022008805e-05, "loss": 0.5427, "step": 18064 }, { "epoch": 23.1232, "grad_norm": 1.0096379518508911, "learning_rate": 1.3885554221688677e-05, "loss": 0.4264, "step": 18065 }, { "epoch": 23.12448, "grad_norm": 1.1329681873321533, "learning_rate": 1.3883553421368547e-05, "loss": 0.5005, "step": 18066 }, { "epoch": 23.12576, "grad_norm": 1.1219654083251953, "learning_rate": 1.3881552621048419e-05, "loss": 0.4677, "step": 18067 }, { "epoch": 23.12704, "grad_norm": 1.1316763162612915, "learning_rate": 1.3879551820728293e-05, "loss": 0.4857, "step": 18068 }, { "epoch": 23.12832, "grad_norm": 1.1240242719650269, "learning_rate": 1.3877551020408165e-05, "loss": 0.481, "step": 18069 }, { "epoch": 23.1296, "grad_norm": 1.0853866338729858, "learning_rate": 1.3875550220088035e-05, "loss": 0.4481, "step": 18070 }, { "epoch": 23.13088, "grad_norm": 1.1638892889022827, "learning_rate": 1.3873549419767908e-05, "loss": 0.5032, "step": 18071 }, { "epoch": 23.13216, "grad_norm": 1.1282097101211548, "learning_rate": 1.387154861944778e-05, "loss": 0.4717, "step": 18072 }, { "epoch": 23.13344, "grad_norm": 1.1175307035446167, "learning_rate": 1.3869547819127652e-05, "loss": 0.4375, "step": 18073 }, { "epoch": 23.13472, "grad_norm": 1.0587260723114014, "learning_rate": 1.3867547018807522e-05, "loss": 0.4765, "step": 18074 }, { "epoch": 23.136, "grad_norm": 1.179371953010559, "learning_rate": 1.3865546218487396e-05, "loss": 0.5337, "step": 18075 }, { "epoch": 23.13728, "grad_norm": 1.153063178062439, "learning_rate": 1.3863545418167268e-05, "loss": 0.4959, "step": 18076 }, { "epoch": 23.13856, "grad_norm": 1.1271103620529175, "learning_rate": 1.386154461784714e-05, "loss": 0.478, "step": 18077 }, { "epoch": 23.13984, "grad_norm": 1.0915660858154297, "learning_rate": 1.3859543817527013e-05, "loss": 0.4598, "step": 18078 }, { "epoch": 23.14112, "grad_norm": 1.1732710599899292, "learning_rate": 1.3857543017206883e-05, "loss": 0.4857, "step": 18079 }, { "epoch": 23.1424, "grad_norm": 1.1222920417785645, "learning_rate": 1.3855542216886755e-05, "loss": 0.4344, "step": 18080 }, { "epoch": 23.14368, "grad_norm": 1.1178438663482666, "learning_rate": 1.3853541416566627e-05, "loss": 0.4794, "step": 18081 }, { "epoch": 23.14496, "grad_norm": 1.1883807182312012, "learning_rate": 1.38515406162465e-05, "loss": 0.4837, "step": 18082 }, { "epoch": 23.14624, "grad_norm": 1.1413466930389404, "learning_rate": 1.384953981592637e-05, "loss": 0.4826, "step": 18083 }, { "epoch": 23.14752, "grad_norm": 1.0779472589492798, "learning_rate": 1.3847539015606242e-05, "loss": 0.4544, "step": 18084 }, { "epoch": 23.1488, "grad_norm": 1.0848368406295776, "learning_rate": 1.3845538215286116e-05, "loss": 0.4832, "step": 18085 }, { "epoch": 23.15008, "grad_norm": 1.117047667503357, "learning_rate": 1.3843537414965988e-05, "loss": 0.4593, "step": 18086 }, { "epoch": 23.15136, "grad_norm": 1.113674283027649, "learning_rate": 1.3841536614645858e-05, "loss": 0.4669, "step": 18087 }, { "epoch": 23.15264, "grad_norm": 1.186633825302124, "learning_rate": 1.383953581432573e-05, "loss": 0.4913, "step": 18088 }, { "epoch": 23.15392, "grad_norm": 1.1024609804153442, "learning_rate": 1.3837535014005603e-05, "loss": 0.4779, "step": 18089 }, { "epoch": 23.1552, "grad_norm": 1.0634430646896362, "learning_rate": 1.3835534213685475e-05, "loss": 0.4301, "step": 18090 }, { "epoch": 23.15648, "grad_norm": 1.10919988155365, "learning_rate": 1.3833533413365345e-05, "loss": 0.4733, "step": 18091 }, { "epoch": 23.15776, "grad_norm": 1.1104084253311157, "learning_rate": 1.383153261304522e-05, "loss": 0.4612, "step": 18092 }, { "epoch": 23.15904, "grad_norm": 1.1497199535369873, "learning_rate": 1.382953181272509e-05, "loss": 0.4389, "step": 18093 }, { "epoch": 23.16032, "grad_norm": 1.177474856376648, "learning_rate": 1.3827531012404963e-05, "loss": 0.4916, "step": 18094 }, { "epoch": 23.1616, "grad_norm": 1.1319663524627686, "learning_rate": 1.3825530212084833e-05, "loss": 0.4834, "step": 18095 }, { "epoch": 23.16288, "grad_norm": 1.1392722129821777, "learning_rate": 1.3823529411764708e-05, "loss": 0.4928, "step": 18096 }, { "epoch": 23.16416, "grad_norm": 1.1550222635269165, "learning_rate": 1.3821528611444578e-05, "loss": 0.4575, "step": 18097 }, { "epoch": 23.16544, "grad_norm": 1.17167067527771, "learning_rate": 1.381952781112445e-05, "loss": 0.5154, "step": 18098 }, { "epoch": 23.16672, "grad_norm": 1.167103886604309, "learning_rate": 1.3817527010804324e-05, "loss": 0.4935, "step": 18099 }, { "epoch": 23.168, "grad_norm": 1.1817306280136108, "learning_rate": 1.3815526210484195e-05, "loss": 0.4877, "step": 18100 }, { "epoch": 23.16928, "grad_norm": 1.1134272813796997, "learning_rate": 1.3813525410164066e-05, "loss": 0.4528, "step": 18101 }, { "epoch": 23.17056, "grad_norm": 1.1140645742416382, "learning_rate": 1.3811524609843938e-05, "loss": 0.4576, "step": 18102 }, { "epoch": 23.17184, "grad_norm": 1.140651822090149, "learning_rate": 1.3809523809523811e-05, "loss": 0.4729, "step": 18103 }, { "epoch": 23.17312, "grad_norm": 1.1449460983276367, "learning_rate": 1.3807523009203683e-05, "loss": 0.4636, "step": 18104 }, { "epoch": 23.1744, "grad_norm": 1.1074920892715454, "learning_rate": 1.3805522208883553e-05, "loss": 0.4774, "step": 18105 }, { "epoch": 23.17568, "grad_norm": 1.1994163990020752, "learning_rate": 1.3803521408563427e-05, "loss": 0.4314, "step": 18106 }, { "epoch": 23.17696, "grad_norm": 1.1801729202270508, "learning_rate": 1.3801520608243298e-05, "loss": 0.4872, "step": 18107 }, { "epoch": 23.17824, "grad_norm": 1.1620689630508423, "learning_rate": 1.379951980792317e-05, "loss": 0.473, "step": 18108 }, { "epoch": 23.17952, "grad_norm": 1.1328171491622925, "learning_rate": 1.379751900760304e-05, "loss": 0.4785, "step": 18109 }, { "epoch": 23.1808, "grad_norm": 1.1725462675094604, "learning_rate": 1.3795518207282914e-05, "loss": 0.4587, "step": 18110 }, { "epoch": 23.18208, "grad_norm": 1.1395995616912842, "learning_rate": 1.3793517406962786e-05, "loss": 0.4554, "step": 18111 }, { "epoch": 23.18336, "grad_norm": 1.0973244905471802, "learning_rate": 1.3791516606642658e-05, "loss": 0.4621, "step": 18112 }, { "epoch": 23.18464, "grad_norm": 1.1823533773422241, "learning_rate": 1.3789515806322531e-05, "loss": 0.5013, "step": 18113 }, { "epoch": 23.18592, "grad_norm": 1.124189019203186, "learning_rate": 1.3787515006002401e-05, "loss": 0.4375, "step": 18114 }, { "epoch": 23.1872, "grad_norm": 1.0450643301010132, "learning_rate": 1.3785514205682273e-05, "loss": 0.4448, "step": 18115 }, { "epoch": 23.18848, "grad_norm": 1.1765981912612915, "learning_rate": 1.3783513405362145e-05, "loss": 0.5302, "step": 18116 }, { "epoch": 23.18976, "grad_norm": 1.1082278490066528, "learning_rate": 1.3781512605042019e-05, "loss": 0.4354, "step": 18117 }, { "epoch": 23.19104, "grad_norm": 1.123656153678894, "learning_rate": 1.3779511804721889e-05, "loss": 0.4896, "step": 18118 }, { "epoch": 23.19232, "grad_norm": 1.1027441024780273, "learning_rate": 1.377751100440176e-05, "loss": 0.4553, "step": 18119 }, { "epoch": 23.1936, "grad_norm": 1.1921277046203613, "learning_rate": 1.3775510204081633e-05, "loss": 0.5235, "step": 18120 }, { "epoch": 23.19488, "grad_norm": 1.1635897159576416, "learning_rate": 1.3773509403761506e-05, "loss": 0.4774, "step": 18121 }, { "epoch": 23.19616, "grad_norm": 1.1405202150344849, "learning_rate": 1.3771508603441376e-05, "loss": 0.4663, "step": 18122 }, { "epoch": 23.19744, "grad_norm": 1.0812584161758423, "learning_rate": 1.3769507803121248e-05, "loss": 0.4246, "step": 18123 }, { "epoch": 23.19872, "grad_norm": 1.1046404838562012, "learning_rate": 1.3767507002801122e-05, "loss": 0.444, "step": 18124 }, { "epoch": 23.2, "grad_norm": 1.1270830631256104, "learning_rate": 1.3765506202480994e-05, "loss": 0.4681, "step": 18125 }, { "epoch": 23.20128, "grad_norm": 1.092661738395691, "learning_rate": 1.3763505402160864e-05, "loss": 0.4537, "step": 18126 }, { "epoch": 23.20256, "grad_norm": 1.1461021900177002, "learning_rate": 1.3761504601840736e-05, "loss": 0.4697, "step": 18127 }, { "epoch": 23.20384, "grad_norm": 1.1606507301330566, "learning_rate": 1.375950380152061e-05, "loss": 0.5199, "step": 18128 }, { "epoch": 23.20512, "grad_norm": 1.1296125650405884, "learning_rate": 1.3757503001200481e-05, "loss": 0.4839, "step": 18129 }, { "epoch": 23.2064, "grad_norm": 1.2692511081695557, "learning_rate": 1.3755502200880351e-05, "loss": 0.4903, "step": 18130 }, { "epoch": 23.20768, "grad_norm": 1.1228126287460327, "learning_rate": 1.3753501400560226e-05, "loss": 0.4585, "step": 18131 }, { "epoch": 23.20896, "grad_norm": 1.1252394914627075, "learning_rate": 1.3751500600240097e-05, "loss": 0.5162, "step": 18132 }, { "epoch": 23.21024, "grad_norm": 1.128462791442871, "learning_rate": 1.3749499799919968e-05, "loss": 0.4823, "step": 18133 }, { "epoch": 23.21152, "grad_norm": 1.157885193824768, "learning_rate": 1.3747498999599839e-05, "loss": 0.5073, "step": 18134 }, { "epoch": 23.2128, "grad_norm": 1.0952425003051758, "learning_rate": 1.3745498199279714e-05, "loss": 0.5003, "step": 18135 }, { "epoch": 23.21408, "grad_norm": 1.1897740364074707, "learning_rate": 1.3743497398959584e-05, "loss": 0.5091, "step": 18136 }, { "epoch": 23.21536, "grad_norm": 1.1394944190979004, "learning_rate": 1.3741496598639456e-05, "loss": 0.4115, "step": 18137 }, { "epoch": 23.21664, "grad_norm": 1.1047616004943848, "learning_rate": 1.373949579831933e-05, "loss": 0.4888, "step": 18138 }, { "epoch": 23.21792, "grad_norm": 1.1248316764831543, "learning_rate": 1.3737494997999201e-05, "loss": 0.4505, "step": 18139 }, { "epoch": 23.2192, "grad_norm": 1.1958407163619995, "learning_rate": 1.3735494197679071e-05, "loss": 0.5051, "step": 18140 }, { "epoch": 23.22048, "grad_norm": 1.186632513999939, "learning_rate": 1.3733493397358943e-05, "loss": 0.4692, "step": 18141 }, { "epoch": 23.22176, "grad_norm": 1.1667543649673462, "learning_rate": 1.3731492597038817e-05, "loss": 0.5314, "step": 18142 }, { "epoch": 23.22304, "grad_norm": 1.1657756567001343, "learning_rate": 1.3729491796718689e-05, "loss": 0.5432, "step": 18143 }, { "epoch": 23.22432, "grad_norm": 1.1313673257827759, "learning_rate": 1.3727490996398559e-05, "loss": 0.4813, "step": 18144 }, { "epoch": 23.2256, "grad_norm": 1.1224007606506348, "learning_rate": 1.3725490196078432e-05, "loss": 0.4471, "step": 18145 }, { "epoch": 23.22688, "grad_norm": 1.171020746231079, "learning_rate": 1.3723489395758304e-05, "loss": 0.5398, "step": 18146 }, { "epoch": 23.22816, "grad_norm": 1.0536117553710938, "learning_rate": 1.3721488595438176e-05, "loss": 0.4308, "step": 18147 }, { "epoch": 23.22944, "grad_norm": 1.146742820739746, "learning_rate": 1.3719487795118046e-05, "loss": 0.4901, "step": 18148 }, { "epoch": 23.23072, "grad_norm": 1.09756338596344, "learning_rate": 1.371748699479792e-05, "loss": 0.4469, "step": 18149 }, { "epoch": 23.232, "grad_norm": 1.1287214756011963, "learning_rate": 1.3715486194477792e-05, "loss": 0.4648, "step": 18150 }, { "epoch": 23.23328, "grad_norm": 1.138611078262329, "learning_rate": 1.3713485394157664e-05, "loss": 0.4304, "step": 18151 }, { "epoch": 23.23456, "grad_norm": 1.092214822769165, "learning_rate": 1.3711484593837537e-05, "loss": 0.4395, "step": 18152 }, { "epoch": 23.23584, "grad_norm": 1.1681923866271973, "learning_rate": 1.3709483793517407e-05, "loss": 0.5203, "step": 18153 }, { "epoch": 23.23712, "grad_norm": 1.1423531770706177, "learning_rate": 1.3707482993197279e-05, "loss": 0.455, "step": 18154 }, { "epoch": 23.2384, "grad_norm": 1.1326360702514648, "learning_rate": 1.3705482192877151e-05, "loss": 0.4795, "step": 18155 }, { "epoch": 23.23968, "grad_norm": 1.1082360744476318, "learning_rate": 1.3703481392557025e-05, "loss": 0.4743, "step": 18156 }, { "epoch": 23.24096, "grad_norm": 1.1555867195129395, "learning_rate": 1.3701480592236895e-05, "loss": 0.4985, "step": 18157 }, { "epoch": 23.24224, "grad_norm": 1.1038802862167358, "learning_rate": 1.3699479791916767e-05, "loss": 0.4486, "step": 18158 }, { "epoch": 23.24352, "grad_norm": 1.1623843908309937, "learning_rate": 1.369747899159664e-05, "loss": 0.4817, "step": 18159 }, { "epoch": 23.2448, "grad_norm": 1.2444136142730713, "learning_rate": 1.3695478191276512e-05, "loss": 0.5373, "step": 18160 }, { "epoch": 23.24608, "grad_norm": 1.107471227645874, "learning_rate": 1.3693477390956382e-05, "loss": 0.463, "step": 18161 }, { "epoch": 23.24736, "grad_norm": 1.1224188804626465, "learning_rate": 1.3691476590636254e-05, "loss": 0.4517, "step": 18162 }, { "epoch": 23.24864, "grad_norm": 1.119936466217041, "learning_rate": 1.3689475790316128e-05, "loss": 0.4567, "step": 18163 }, { "epoch": 23.24992, "grad_norm": 1.1780643463134766, "learning_rate": 1.3687474989996e-05, "loss": 0.4465, "step": 18164 }, { "epoch": 23.2512, "grad_norm": 1.0980896949768066, "learning_rate": 1.368547418967587e-05, "loss": 0.4619, "step": 18165 }, { "epoch": 23.25248, "grad_norm": 1.1995065212249756, "learning_rate": 1.3683473389355745e-05, "loss": 0.5128, "step": 18166 }, { "epoch": 23.25376, "grad_norm": 1.0773276090621948, "learning_rate": 1.3681472589035615e-05, "loss": 0.4548, "step": 18167 }, { "epoch": 23.25504, "grad_norm": 1.1148123741149902, "learning_rate": 1.3679471788715487e-05, "loss": 0.4754, "step": 18168 }, { "epoch": 23.25632, "grad_norm": 1.1616092920303345, "learning_rate": 1.3677470988395357e-05, "loss": 0.4963, "step": 18169 }, { "epoch": 23.2576, "grad_norm": 1.1255162954330444, "learning_rate": 1.3675470188075232e-05, "loss": 0.4957, "step": 18170 }, { "epoch": 23.25888, "grad_norm": 1.1024315357208252, "learning_rate": 1.3673469387755102e-05, "loss": 0.4764, "step": 18171 }, { "epoch": 23.26016, "grad_norm": 1.1717227697372437, "learning_rate": 1.3671468587434974e-05, "loss": 0.4885, "step": 18172 }, { "epoch": 23.26144, "grad_norm": 1.1702935695648193, "learning_rate": 1.3669467787114844e-05, "loss": 0.4821, "step": 18173 }, { "epoch": 23.26272, "grad_norm": 1.051859974861145, "learning_rate": 1.366746698679472e-05, "loss": 0.4485, "step": 18174 }, { "epoch": 23.264, "grad_norm": 1.042208194732666, "learning_rate": 1.366546618647459e-05, "loss": 0.4337, "step": 18175 }, { "epoch": 23.26528, "grad_norm": 1.1617704629898071, "learning_rate": 1.3663465386154462e-05, "loss": 0.5218, "step": 18176 }, { "epoch": 23.26656, "grad_norm": 1.151158332824707, "learning_rate": 1.3661464585834335e-05, "loss": 0.4289, "step": 18177 }, { "epoch": 23.26784, "grad_norm": 1.1818608045578003, "learning_rate": 1.3659463785514207e-05, "loss": 0.4554, "step": 18178 }, { "epoch": 23.26912, "grad_norm": 1.1779991388320923, "learning_rate": 1.3657462985194077e-05, "loss": 0.5049, "step": 18179 }, { "epoch": 23.2704, "grad_norm": 1.209188461303711, "learning_rate": 1.3655462184873949e-05, "loss": 0.539, "step": 18180 }, { "epoch": 23.27168, "grad_norm": 1.1615291833877563, "learning_rate": 1.3653461384553823e-05, "loss": 0.5058, "step": 18181 }, { "epoch": 23.27296, "grad_norm": 1.1432435512542725, "learning_rate": 1.3651460584233695e-05, "loss": 0.4513, "step": 18182 }, { "epoch": 23.27424, "grad_norm": 1.2107926607131958, "learning_rate": 1.3649459783913565e-05, "loss": 0.5399, "step": 18183 }, { "epoch": 23.27552, "grad_norm": 1.2299079895019531, "learning_rate": 1.364745898359344e-05, "loss": 0.5395, "step": 18184 }, { "epoch": 23.2768, "grad_norm": 1.1999679803848267, "learning_rate": 1.364545818327331e-05, "loss": 0.4976, "step": 18185 }, { "epoch": 23.27808, "grad_norm": 1.1381257772445679, "learning_rate": 1.3643457382953182e-05, "loss": 0.507, "step": 18186 }, { "epoch": 23.27936, "grad_norm": 1.1756349802017212, "learning_rate": 1.3641456582633052e-05, "loss": 0.5124, "step": 18187 }, { "epoch": 23.28064, "grad_norm": 1.1597191095352173, "learning_rate": 1.3639455782312927e-05, "loss": 0.4758, "step": 18188 }, { "epoch": 23.28192, "grad_norm": 1.1272660493850708, "learning_rate": 1.3637454981992798e-05, "loss": 0.4734, "step": 18189 }, { "epoch": 23.2832, "grad_norm": 1.1996667385101318, "learning_rate": 1.363545418167267e-05, "loss": 0.4675, "step": 18190 }, { "epoch": 23.28448, "grad_norm": 1.1502827405929565, "learning_rate": 1.3633453381352543e-05, "loss": 0.5163, "step": 18191 }, { "epoch": 23.28576, "grad_norm": 1.1988962888717651, "learning_rate": 1.3631452581032415e-05, "loss": 0.4729, "step": 18192 }, { "epoch": 23.28704, "grad_norm": 1.0956463813781738, "learning_rate": 1.3629451780712285e-05, "loss": 0.4646, "step": 18193 }, { "epoch": 23.28832, "grad_norm": 1.0955307483673096, "learning_rate": 1.3627450980392157e-05, "loss": 0.4796, "step": 18194 }, { "epoch": 23.2896, "grad_norm": 1.2165777683258057, "learning_rate": 1.362545018007203e-05, "loss": 0.5016, "step": 18195 }, { "epoch": 23.29088, "grad_norm": 1.2035049200057983, "learning_rate": 1.3623449379751902e-05, "loss": 0.4793, "step": 18196 }, { "epoch": 23.29216, "grad_norm": 1.1386168003082275, "learning_rate": 1.3621448579431772e-05, "loss": 0.4938, "step": 18197 }, { "epoch": 23.29344, "grad_norm": 1.1671661138534546, "learning_rate": 1.3619447779111646e-05, "loss": 0.523, "step": 18198 }, { "epoch": 23.29472, "grad_norm": 1.096612811088562, "learning_rate": 1.3617446978791518e-05, "loss": 0.4371, "step": 18199 }, { "epoch": 23.296, "grad_norm": 1.1467458009719849, "learning_rate": 1.361544617847139e-05, "loss": 0.4933, "step": 18200 }, { "epoch": 23.29728, "grad_norm": 1.131923794746399, "learning_rate": 1.361344537815126e-05, "loss": 0.4217, "step": 18201 }, { "epoch": 23.29856, "grad_norm": 1.149901032447815, "learning_rate": 1.3611444577831133e-05, "loss": 0.4558, "step": 18202 }, { "epoch": 23.29984, "grad_norm": 1.162813663482666, "learning_rate": 1.3609443777511005e-05, "loss": 0.4712, "step": 18203 }, { "epoch": 23.30112, "grad_norm": 1.11738920211792, "learning_rate": 1.3607442977190877e-05, "loss": 0.4764, "step": 18204 }, { "epoch": 23.3024, "grad_norm": 1.2097214460372925, "learning_rate": 1.360544217687075e-05, "loss": 0.4811, "step": 18205 }, { "epoch": 23.30368, "grad_norm": 1.095917820930481, "learning_rate": 1.360344137655062e-05, "loss": 0.4281, "step": 18206 }, { "epoch": 23.30496, "grad_norm": 1.1422666311264038, "learning_rate": 1.3601440576230493e-05, "loss": 0.4902, "step": 18207 }, { "epoch": 23.30624, "grad_norm": 1.118310809135437, "learning_rate": 1.3599439775910364e-05, "loss": 0.4364, "step": 18208 }, { "epoch": 23.30752, "grad_norm": 1.0865938663482666, "learning_rate": 1.3597438975590238e-05, "loss": 0.4709, "step": 18209 }, { "epoch": 23.3088, "grad_norm": 1.0885086059570312, "learning_rate": 1.3595438175270108e-05, "loss": 0.489, "step": 18210 }, { "epoch": 23.31008, "grad_norm": 1.1577430963516235, "learning_rate": 1.359343737494998e-05, "loss": 0.5023, "step": 18211 }, { "epoch": 23.31136, "grad_norm": 1.162643551826477, "learning_rate": 1.3591436574629854e-05, "loss": 0.4846, "step": 18212 }, { "epoch": 23.31264, "grad_norm": 1.1046801805496216, "learning_rate": 1.3589435774309725e-05, "loss": 0.4425, "step": 18213 }, { "epoch": 23.31392, "grad_norm": 1.1700000762939453, "learning_rate": 1.3587434973989596e-05, "loss": 0.4799, "step": 18214 }, { "epoch": 23.3152, "grad_norm": 1.0989649295806885, "learning_rate": 1.3585434173669467e-05, "loss": 0.4446, "step": 18215 }, { "epoch": 23.31648, "grad_norm": 1.168190598487854, "learning_rate": 1.3583433373349341e-05, "loss": 0.5183, "step": 18216 }, { "epoch": 23.31776, "grad_norm": 1.081773281097412, "learning_rate": 1.3581432573029213e-05, "loss": 0.4795, "step": 18217 }, { "epoch": 23.31904, "grad_norm": 1.1346309185028076, "learning_rate": 1.3579431772709083e-05, "loss": 0.4431, "step": 18218 }, { "epoch": 23.32032, "grad_norm": 1.150037169456482, "learning_rate": 1.3577430972388958e-05, "loss": 0.4687, "step": 18219 }, { "epoch": 23.3216, "grad_norm": 1.1243027448654175, "learning_rate": 1.3575430172068828e-05, "loss": 0.4564, "step": 18220 }, { "epoch": 23.32288, "grad_norm": 1.1913601160049438, "learning_rate": 1.35734293717487e-05, "loss": 0.5253, "step": 18221 }, { "epoch": 23.32416, "grad_norm": 1.142009973526001, "learning_rate": 1.357142857142857e-05, "loss": 0.4782, "step": 18222 }, { "epoch": 23.32544, "grad_norm": 1.1879467964172363, "learning_rate": 1.3569427771108446e-05, "loss": 0.4572, "step": 18223 }, { "epoch": 23.32672, "grad_norm": 1.1096800565719604, "learning_rate": 1.3567426970788316e-05, "loss": 0.4476, "step": 18224 }, { "epoch": 23.328, "grad_norm": 1.2172260284423828, "learning_rate": 1.3565426170468188e-05, "loss": 0.4931, "step": 18225 }, { "epoch": 23.32928, "grad_norm": 1.1828625202178955, "learning_rate": 1.3563425370148061e-05, "loss": 0.4673, "step": 18226 }, { "epoch": 23.33056, "grad_norm": 1.162265419960022, "learning_rate": 1.3561424569827933e-05, "loss": 0.4658, "step": 18227 }, { "epoch": 23.33184, "grad_norm": 1.1894097328186035, "learning_rate": 1.3559423769507803e-05, "loss": 0.5361, "step": 18228 }, { "epoch": 23.33312, "grad_norm": 1.0926059484481812, "learning_rate": 1.3557422969187675e-05, "loss": 0.4379, "step": 18229 }, { "epoch": 23.3344, "grad_norm": 1.1234076023101807, "learning_rate": 1.3555422168867549e-05, "loss": 0.4882, "step": 18230 }, { "epoch": 23.33568, "grad_norm": 1.1318891048431396, "learning_rate": 1.355342136854742e-05, "loss": 0.4784, "step": 18231 }, { "epoch": 23.33696, "grad_norm": 1.2062749862670898, "learning_rate": 1.355142056822729e-05, "loss": 0.5137, "step": 18232 }, { "epoch": 23.33824, "grad_norm": 1.1662986278533936, "learning_rate": 1.3549419767907163e-05, "loss": 0.4596, "step": 18233 }, { "epoch": 23.33952, "grad_norm": 1.1790847778320312, "learning_rate": 1.3547418967587036e-05, "loss": 0.482, "step": 18234 }, { "epoch": 23.3408, "grad_norm": 1.079903244972229, "learning_rate": 1.3545418167266908e-05, "loss": 0.4931, "step": 18235 }, { "epoch": 23.34208, "grad_norm": 1.0966075658798218, "learning_rate": 1.3543417366946778e-05, "loss": 0.4332, "step": 18236 }, { "epoch": 23.34336, "grad_norm": 1.104612946510315, "learning_rate": 1.3541416566626652e-05, "loss": 0.4597, "step": 18237 }, { "epoch": 23.34464, "grad_norm": 1.1291465759277344, "learning_rate": 1.3539415766306524e-05, "loss": 0.4864, "step": 18238 }, { "epoch": 23.34592, "grad_norm": 1.205235242843628, "learning_rate": 1.3537414965986395e-05, "loss": 0.5488, "step": 18239 }, { "epoch": 23.3472, "grad_norm": 1.1906328201293945, "learning_rate": 1.3535414165666266e-05, "loss": 0.4874, "step": 18240 }, { "epoch": 23.34848, "grad_norm": 1.1368838548660278, "learning_rate": 1.3533413365346139e-05, "loss": 0.4336, "step": 18241 }, { "epoch": 23.34976, "grad_norm": 1.2331318855285645, "learning_rate": 1.3531412565026011e-05, "loss": 0.5252, "step": 18242 }, { "epoch": 23.35104, "grad_norm": 1.147335410118103, "learning_rate": 1.3529411764705883e-05, "loss": 0.4733, "step": 18243 }, { "epoch": 23.35232, "grad_norm": 1.1000919342041016, "learning_rate": 1.3527410964385756e-05, "loss": 0.4743, "step": 18244 }, { "epoch": 23.3536, "grad_norm": 1.0781689882278442, "learning_rate": 1.3525410164065627e-05, "loss": 0.4291, "step": 18245 }, { "epoch": 23.35488, "grad_norm": 1.218489170074463, "learning_rate": 1.3523409363745498e-05, "loss": 0.4633, "step": 18246 }, { "epoch": 23.35616, "grad_norm": 1.2166152000427246, "learning_rate": 1.352140856342537e-05, "loss": 0.5128, "step": 18247 }, { "epoch": 23.35744, "grad_norm": 1.1179983615875244, "learning_rate": 1.3519407763105244e-05, "loss": 0.4939, "step": 18248 }, { "epoch": 23.35872, "grad_norm": 1.1847084760665894, "learning_rate": 1.3517406962785114e-05, "loss": 0.5327, "step": 18249 }, { "epoch": 23.36, "grad_norm": 1.1466864347457886, "learning_rate": 1.3515406162464986e-05, "loss": 0.4945, "step": 18250 }, { "epoch": 23.36128, "grad_norm": 1.1324851512908936, "learning_rate": 1.351340536214486e-05, "loss": 0.4895, "step": 18251 }, { "epoch": 23.36256, "grad_norm": 1.1512887477874756, "learning_rate": 1.3511404561824731e-05, "loss": 0.4707, "step": 18252 }, { "epoch": 23.36384, "grad_norm": 1.1378182172775269, "learning_rate": 1.3509403761504601e-05, "loss": 0.5104, "step": 18253 }, { "epoch": 23.36512, "grad_norm": 1.1355574131011963, "learning_rate": 1.3507402961184473e-05, "loss": 0.4774, "step": 18254 }, { "epoch": 23.3664, "grad_norm": 1.095694899559021, "learning_rate": 1.3505402160864347e-05, "loss": 0.4631, "step": 18255 }, { "epoch": 23.36768, "grad_norm": 1.0908712148666382, "learning_rate": 1.3503401360544219e-05, "loss": 0.5036, "step": 18256 }, { "epoch": 23.36896, "grad_norm": 1.1985548734664917, "learning_rate": 1.3501400560224089e-05, "loss": 0.5001, "step": 18257 }, { "epoch": 23.37024, "grad_norm": 1.1489444971084595, "learning_rate": 1.3499399759903964e-05, "loss": 0.4526, "step": 18258 }, { "epoch": 23.37152, "grad_norm": 1.183395266532898, "learning_rate": 1.3497398959583834e-05, "loss": 0.5232, "step": 18259 }, { "epoch": 23.3728, "grad_norm": 1.1368277072906494, "learning_rate": 1.3495398159263706e-05, "loss": 0.4497, "step": 18260 }, { "epoch": 23.37408, "grad_norm": 1.1291542053222656, "learning_rate": 1.3493397358943576e-05, "loss": 0.4875, "step": 18261 }, { "epoch": 23.37536, "grad_norm": 1.132755160331726, "learning_rate": 1.3491396558623452e-05, "loss": 0.4763, "step": 18262 }, { "epoch": 23.37664, "grad_norm": 1.107372522354126, "learning_rate": 1.3489395758303322e-05, "loss": 0.4821, "step": 18263 }, { "epoch": 23.37792, "grad_norm": 1.1369836330413818, "learning_rate": 1.3487394957983194e-05, "loss": 0.4666, "step": 18264 }, { "epoch": 23.3792, "grad_norm": 1.1723092794418335, "learning_rate": 1.3485394157663067e-05, "loss": 0.5537, "step": 18265 }, { "epoch": 23.38048, "grad_norm": 1.1863315105438232, "learning_rate": 1.3483393357342939e-05, "loss": 0.5479, "step": 18266 }, { "epoch": 23.38176, "grad_norm": 1.1658673286437988, "learning_rate": 1.3481392557022809e-05, "loss": 0.5388, "step": 18267 }, { "epoch": 23.38304, "grad_norm": 1.1437945365905762, "learning_rate": 1.3479391756702681e-05, "loss": 0.4794, "step": 18268 }, { "epoch": 23.38432, "grad_norm": 1.149343729019165, "learning_rate": 1.3477390956382555e-05, "loss": 0.4792, "step": 18269 }, { "epoch": 23.3856, "grad_norm": 1.0931353569030762, "learning_rate": 1.3475390156062426e-05, "loss": 0.4807, "step": 18270 }, { "epoch": 23.38688, "grad_norm": 1.122345209121704, "learning_rate": 1.3473389355742297e-05, "loss": 0.4718, "step": 18271 }, { "epoch": 23.38816, "grad_norm": 1.0672874450683594, "learning_rate": 1.347138855542217e-05, "loss": 0.4547, "step": 18272 }, { "epoch": 23.38944, "grad_norm": 1.1067068576812744, "learning_rate": 1.3469387755102042e-05, "loss": 0.4553, "step": 18273 }, { "epoch": 23.39072, "grad_norm": 1.193006992340088, "learning_rate": 1.3467386954781914e-05, "loss": 0.5064, "step": 18274 }, { "epoch": 23.392, "grad_norm": 1.1643834114074707, "learning_rate": 1.3465386154461784e-05, "loss": 0.4489, "step": 18275 }, { "epoch": 23.39328, "grad_norm": 1.0767834186553955, "learning_rate": 1.3463385354141658e-05, "loss": 0.4177, "step": 18276 }, { "epoch": 23.39456, "grad_norm": 1.1161274909973145, "learning_rate": 1.346138455382153e-05, "loss": 0.4918, "step": 18277 }, { "epoch": 23.39584, "grad_norm": 1.2007757425308228, "learning_rate": 1.3459383753501401e-05, "loss": 0.5226, "step": 18278 }, { "epoch": 23.39712, "grad_norm": 1.1643115282058716, "learning_rate": 1.3457382953181275e-05, "loss": 0.4366, "step": 18279 }, { "epoch": 23.3984, "grad_norm": 1.183032512664795, "learning_rate": 1.3455382152861145e-05, "loss": 0.4606, "step": 18280 }, { "epoch": 23.39968, "grad_norm": 1.0798239707946777, "learning_rate": 1.3453381352541017e-05, "loss": 0.4546, "step": 18281 }, { "epoch": 23.40096, "grad_norm": 1.117011547088623, "learning_rate": 1.3451380552220889e-05, "loss": 0.4951, "step": 18282 }, { "epoch": 23.40224, "grad_norm": 1.2203407287597656, "learning_rate": 1.3449379751900762e-05, "loss": 0.4415, "step": 18283 }, { "epoch": 23.40352, "grad_norm": 1.1237221956253052, "learning_rate": 1.3447378951580632e-05, "loss": 0.4756, "step": 18284 }, { "epoch": 23.4048, "grad_norm": 1.107589840888977, "learning_rate": 1.3445378151260504e-05, "loss": 0.4927, "step": 18285 }, { "epoch": 23.40608, "grad_norm": 1.1575672626495361, "learning_rate": 1.3443377350940376e-05, "loss": 0.4813, "step": 18286 }, { "epoch": 23.40736, "grad_norm": 1.074341893196106, "learning_rate": 1.344137655062025e-05, "loss": 0.4872, "step": 18287 }, { "epoch": 23.40864, "grad_norm": 1.0716972351074219, "learning_rate": 1.343937575030012e-05, "loss": 0.4199, "step": 18288 }, { "epoch": 23.40992, "grad_norm": 1.1021279096603394, "learning_rate": 1.3437374949979992e-05, "loss": 0.5047, "step": 18289 }, { "epoch": 23.4112, "grad_norm": 1.1311216354370117, "learning_rate": 1.3435374149659865e-05, "loss": 0.5052, "step": 18290 }, { "epoch": 23.41248, "grad_norm": 1.1251327991485596, "learning_rate": 1.3433373349339737e-05, "loss": 0.4459, "step": 18291 }, { "epoch": 23.41376, "grad_norm": 1.1681146621704102, "learning_rate": 1.3431372549019607e-05, "loss": 0.4603, "step": 18292 }, { "epoch": 23.41504, "grad_norm": 1.1599884033203125, "learning_rate": 1.3429371748699479e-05, "loss": 0.4869, "step": 18293 }, { "epoch": 23.41632, "grad_norm": 1.170707106590271, "learning_rate": 1.3427370948379353e-05, "loss": 0.4833, "step": 18294 }, { "epoch": 23.4176, "grad_norm": 1.1624923944473267, "learning_rate": 1.3425370148059224e-05, "loss": 0.472, "step": 18295 }, { "epoch": 23.41888, "grad_norm": 1.1000187397003174, "learning_rate": 1.3423369347739095e-05, "loss": 0.4675, "step": 18296 }, { "epoch": 23.42016, "grad_norm": 1.2124167680740356, "learning_rate": 1.342136854741897e-05, "loss": 0.5053, "step": 18297 }, { "epoch": 23.42144, "grad_norm": 1.0960698127746582, "learning_rate": 1.341936774709884e-05, "loss": 0.4545, "step": 18298 }, { "epoch": 23.422719999999998, "grad_norm": 1.1535124778747559, "learning_rate": 1.3417366946778712e-05, "loss": 0.4919, "step": 18299 }, { "epoch": 23.424, "grad_norm": 1.0561548471450806, "learning_rate": 1.3415366146458582e-05, "loss": 0.4315, "step": 18300 }, { "epoch": 23.42528, "grad_norm": 1.136558175086975, "learning_rate": 1.3413365346138457e-05, "loss": 0.4742, "step": 18301 }, { "epoch": 23.42656, "grad_norm": 1.1573458909988403, "learning_rate": 1.3411364545818327e-05, "loss": 0.4753, "step": 18302 }, { "epoch": 23.42784, "grad_norm": 1.1722750663757324, "learning_rate": 1.34093637454982e-05, "loss": 0.4666, "step": 18303 }, { "epoch": 23.42912, "grad_norm": 1.1860383749008179, "learning_rate": 1.3407362945178073e-05, "loss": 0.4841, "step": 18304 }, { "epoch": 23.4304, "grad_norm": 1.1146502494812012, "learning_rate": 1.3405362144857945e-05, "loss": 0.4435, "step": 18305 }, { "epoch": 23.43168, "grad_norm": 1.1159223318099976, "learning_rate": 1.3403361344537815e-05, "loss": 0.4846, "step": 18306 }, { "epoch": 23.43296, "grad_norm": 1.122043490409851, "learning_rate": 1.3401360544217687e-05, "loss": 0.4639, "step": 18307 }, { "epoch": 23.43424, "grad_norm": 1.105041742324829, "learning_rate": 1.339935974389756e-05, "loss": 0.4886, "step": 18308 }, { "epoch": 23.43552, "grad_norm": 1.1351252794265747, "learning_rate": 1.3397358943577432e-05, "loss": 0.4366, "step": 18309 }, { "epoch": 23.4368, "grad_norm": 1.1929394006729126, "learning_rate": 1.3395358143257302e-05, "loss": 0.4917, "step": 18310 }, { "epoch": 23.43808, "grad_norm": 1.1288291215896606, "learning_rate": 1.3393357342937176e-05, "loss": 0.4498, "step": 18311 }, { "epoch": 23.43936, "grad_norm": 1.1538920402526855, "learning_rate": 1.3391356542617048e-05, "loss": 0.4655, "step": 18312 }, { "epoch": 23.44064, "grad_norm": 1.0849840641021729, "learning_rate": 1.338935574229692e-05, "loss": 0.472, "step": 18313 }, { "epoch": 23.44192, "grad_norm": 1.0807019472122192, "learning_rate": 1.338735494197679e-05, "loss": 0.4488, "step": 18314 }, { "epoch": 23.4432, "grad_norm": 1.1522600650787354, "learning_rate": 1.3385354141656663e-05, "loss": 0.5164, "step": 18315 }, { "epoch": 23.44448, "grad_norm": 1.2187221050262451, "learning_rate": 1.3383353341336535e-05, "loss": 0.5471, "step": 18316 }, { "epoch": 23.44576, "grad_norm": 1.1273307800292969, "learning_rate": 1.3381352541016407e-05, "loss": 0.4396, "step": 18317 }, { "epoch": 23.44704, "grad_norm": 1.0828683376312256, "learning_rate": 1.337935174069628e-05, "loss": 0.4686, "step": 18318 }, { "epoch": 23.44832, "grad_norm": 1.0796676874160767, "learning_rate": 1.337735094037615e-05, "loss": 0.4329, "step": 18319 }, { "epoch": 23.4496, "grad_norm": 1.2199808359146118, "learning_rate": 1.3375350140056023e-05, "loss": 0.5222, "step": 18320 }, { "epoch": 23.45088, "grad_norm": 1.096962571144104, "learning_rate": 1.3373349339735894e-05, "loss": 0.4696, "step": 18321 }, { "epoch": 23.45216, "grad_norm": 1.1377143859863281, "learning_rate": 1.3371348539415768e-05, "loss": 0.5229, "step": 18322 }, { "epoch": 23.45344, "grad_norm": 1.1810131072998047, "learning_rate": 1.3369347739095638e-05, "loss": 0.476, "step": 18323 }, { "epoch": 23.454720000000002, "grad_norm": 1.1354953050613403, "learning_rate": 1.336734693877551e-05, "loss": 0.4791, "step": 18324 }, { "epoch": 23.456, "grad_norm": 1.0756906270980835, "learning_rate": 1.3365346138455384e-05, "loss": 0.44, "step": 18325 }, { "epoch": 23.45728, "grad_norm": 1.1659843921661377, "learning_rate": 1.3363345338135255e-05, "loss": 0.4829, "step": 18326 }, { "epoch": 23.45856, "grad_norm": 1.0880708694458008, "learning_rate": 1.3361344537815126e-05, "loss": 0.4278, "step": 18327 }, { "epoch": 23.45984, "grad_norm": 1.161241888999939, "learning_rate": 1.3359343737494997e-05, "loss": 0.4762, "step": 18328 }, { "epoch": 23.46112, "grad_norm": 1.1863901615142822, "learning_rate": 1.3357342937174871e-05, "loss": 0.4841, "step": 18329 }, { "epoch": 23.4624, "grad_norm": 1.1466909646987915, "learning_rate": 1.3355342136854743e-05, "loss": 0.5037, "step": 18330 }, { "epoch": 23.46368, "grad_norm": 1.2050119638442993, "learning_rate": 1.3353341336534613e-05, "loss": 0.4718, "step": 18331 }, { "epoch": 23.46496, "grad_norm": 1.173220157623291, "learning_rate": 1.3351340536214488e-05, "loss": 0.4654, "step": 18332 }, { "epoch": 23.46624, "grad_norm": 1.1541483402252197, "learning_rate": 1.3349339735894358e-05, "loss": 0.455, "step": 18333 }, { "epoch": 23.46752, "grad_norm": 1.1780107021331787, "learning_rate": 1.334733893557423e-05, "loss": 0.5024, "step": 18334 }, { "epoch": 23.4688, "grad_norm": 1.1591427326202393, "learning_rate": 1.33453381352541e-05, "loss": 0.4447, "step": 18335 }, { "epoch": 23.47008, "grad_norm": 1.058050274848938, "learning_rate": 1.3343337334933976e-05, "loss": 0.4141, "step": 18336 }, { "epoch": 23.47136, "grad_norm": 1.1635574102401733, "learning_rate": 1.3341336534613846e-05, "loss": 0.4718, "step": 18337 }, { "epoch": 23.47264, "grad_norm": 1.1959178447723389, "learning_rate": 1.3339335734293718e-05, "loss": 0.5097, "step": 18338 }, { "epoch": 23.47392, "grad_norm": 1.1117192506790161, "learning_rate": 1.3337334933973591e-05, "loss": 0.4703, "step": 18339 }, { "epoch": 23.4752, "grad_norm": 1.1207165718078613, "learning_rate": 1.3335334133653463e-05, "loss": 0.4286, "step": 18340 }, { "epoch": 23.47648, "grad_norm": 1.1271799802780151, "learning_rate": 1.3333333333333333e-05, "loss": 0.4686, "step": 18341 }, { "epoch": 23.47776, "grad_norm": 1.1951183080673218, "learning_rate": 1.3331332533013205e-05, "loss": 0.5247, "step": 18342 }, { "epoch": 23.47904, "grad_norm": 1.1584644317626953, "learning_rate": 1.3329331732693079e-05, "loss": 0.4629, "step": 18343 }, { "epoch": 23.48032, "grad_norm": 1.1701948642730713, "learning_rate": 1.332733093237295e-05, "loss": 0.4776, "step": 18344 }, { "epoch": 23.4816, "grad_norm": 1.1780883073806763, "learning_rate": 1.332533013205282e-05, "loss": 0.4967, "step": 18345 }, { "epoch": 23.48288, "grad_norm": 1.11570143699646, "learning_rate": 1.3323329331732693e-05, "loss": 0.486, "step": 18346 }, { "epoch": 23.48416, "grad_norm": 1.060181975364685, "learning_rate": 1.3321328531412566e-05, "loss": 0.4462, "step": 18347 }, { "epoch": 23.48544, "grad_norm": 1.0786004066467285, "learning_rate": 1.3319327731092438e-05, "loss": 0.4475, "step": 18348 }, { "epoch": 23.48672, "grad_norm": 1.0986952781677246, "learning_rate": 1.3317326930772308e-05, "loss": 0.4567, "step": 18349 }, { "epoch": 23.488, "grad_norm": 1.1361345052719116, "learning_rate": 1.3315326130452183e-05, "loss": 0.4504, "step": 18350 }, { "epoch": 23.48928, "grad_norm": 1.145846962928772, "learning_rate": 1.3313325330132054e-05, "loss": 0.4997, "step": 18351 }, { "epoch": 23.49056, "grad_norm": 1.1790693998336792, "learning_rate": 1.3311324529811925e-05, "loss": 0.504, "step": 18352 }, { "epoch": 23.49184, "grad_norm": 1.1805227994918823, "learning_rate": 1.3309323729491796e-05, "loss": 0.4856, "step": 18353 }, { "epoch": 23.49312, "grad_norm": 1.1480375528335571, "learning_rate": 1.330732292917167e-05, "loss": 0.4626, "step": 18354 }, { "epoch": 23.4944, "grad_norm": 1.1622354984283447, "learning_rate": 1.3305322128851541e-05, "loss": 0.5071, "step": 18355 }, { "epoch": 23.49568, "grad_norm": 1.1230262517929077, "learning_rate": 1.3303321328531413e-05, "loss": 0.4853, "step": 18356 }, { "epoch": 23.49696, "grad_norm": 1.181242823600769, "learning_rate": 1.3301320528211286e-05, "loss": 0.4681, "step": 18357 }, { "epoch": 23.49824, "grad_norm": 1.1128627061843872, "learning_rate": 1.3299319727891158e-05, "loss": 0.5194, "step": 18358 }, { "epoch": 23.49952, "grad_norm": 1.192732572555542, "learning_rate": 1.3297318927571028e-05, "loss": 0.5138, "step": 18359 }, { "epoch": 23.5008, "grad_norm": 1.1375622749328613, "learning_rate": 1.32953181272509e-05, "loss": 0.5004, "step": 18360 }, { "epoch": 23.50208, "grad_norm": 1.13736093044281, "learning_rate": 1.3293317326930774e-05, "loss": 0.5064, "step": 18361 }, { "epoch": 23.50336, "grad_norm": 1.1759809255599976, "learning_rate": 1.3291316526610646e-05, "loss": 0.4812, "step": 18362 }, { "epoch": 23.50464, "grad_norm": 1.115228295326233, "learning_rate": 1.3289315726290516e-05, "loss": 0.4502, "step": 18363 }, { "epoch": 23.50592, "grad_norm": 1.1482048034667969, "learning_rate": 1.328731492597039e-05, "loss": 0.4791, "step": 18364 }, { "epoch": 23.5072, "grad_norm": 1.1504039764404297, "learning_rate": 1.3285314125650261e-05, "loss": 0.4734, "step": 18365 }, { "epoch": 23.50848, "grad_norm": 1.0897183418273926, "learning_rate": 1.3283313325330133e-05, "loss": 0.4615, "step": 18366 }, { "epoch": 23.50976, "grad_norm": 1.1377613544464111, "learning_rate": 1.3281312525010003e-05, "loss": 0.482, "step": 18367 }, { "epoch": 23.51104, "grad_norm": 1.1299495697021484, "learning_rate": 1.3279311724689877e-05, "loss": 0.4337, "step": 18368 }, { "epoch": 23.51232, "grad_norm": 1.1434617042541504, "learning_rate": 1.3277310924369749e-05, "loss": 0.5078, "step": 18369 }, { "epoch": 23.5136, "grad_norm": 1.1693207025527954, "learning_rate": 1.327531012404962e-05, "loss": 0.4764, "step": 18370 }, { "epoch": 23.51488, "grad_norm": 1.1742783784866333, "learning_rate": 1.3273309323729494e-05, "loss": 0.4831, "step": 18371 }, { "epoch": 23.51616, "grad_norm": 1.1278483867645264, "learning_rate": 1.3271308523409364e-05, "loss": 0.4247, "step": 18372 }, { "epoch": 23.51744, "grad_norm": 1.1402909755706787, "learning_rate": 1.3269307723089236e-05, "loss": 0.4449, "step": 18373 }, { "epoch": 23.51872, "grad_norm": 1.1170767545700073, "learning_rate": 1.3267306922769108e-05, "loss": 0.4955, "step": 18374 }, { "epoch": 23.52, "grad_norm": 1.194703221321106, "learning_rate": 1.3265306122448982e-05, "loss": 0.5139, "step": 18375 }, { "epoch": 23.52128, "grad_norm": 1.1340879201889038, "learning_rate": 1.3263305322128852e-05, "loss": 0.4611, "step": 18376 }, { "epoch": 23.52256, "grad_norm": 1.1601988077163696, "learning_rate": 1.3261304521808724e-05, "loss": 0.4484, "step": 18377 }, { "epoch": 23.52384, "grad_norm": 1.1433758735656738, "learning_rate": 1.3259303721488597e-05, "loss": 0.4465, "step": 18378 }, { "epoch": 23.52512, "grad_norm": 1.2454715967178345, "learning_rate": 1.3257302921168469e-05, "loss": 0.528, "step": 18379 }, { "epoch": 23.5264, "grad_norm": 1.1691670417785645, "learning_rate": 1.3255302120848339e-05, "loss": 0.4814, "step": 18380 }, { "epoch": 23.52768, "grad_norm": 1.140670657157898, "learning_rate": 1.3253301320528211e-05, "loss": 0.4821, "step": 18381 }, { "epoch": 23.52896, "grad_norm": 1.1309688091278076, "learning_rate": 1.3251300520208085e-05, "loss": 0.4691, "step": 18382 }, { "epoch": 23.53024, "grad_norm": 1.1939188241958618, "learning_rate": 1.3249299719887956e-05, "loss": 0.5132, "step": 18383 }, { "epoch": 23.53152, "grad_norm": 1.214198112487793, "learning_rate": 1.3247298919567827e-05, "loss": 0.5292, "step": 18384 }, { "epoch": 23.5328, "grad_norm": 1.1498759984970093, "learning_rate": 1.3245298119247702e-05, "loss": 0.4803, "step": 18385 }, { "epoch": 23.53408, "grad_norm": 1.1071444749832153, "learning_rate": 1.3243297318927572e-05, "loss": 0.466, "step": 18386 }, { "epoch": 23.53536, "grad_norm": 1.1929941177368164, "learning_rate": 1.3241296518607444e-05, "loss": 0.4975, "step": 18387 }, { "epoch": 23.53664, "grad_norm": 1.1311265230178833, "learning_rate": 1.3239295718287314e-05, "loss": 0.4715, "step": 18388 }, { "epoch": 23.53792, "grad_norm": 1.0508058071136475, "learning_rate": 1.323729491796719e-05, "loss": 0.4312, "step": 18389 }, { "epoch": 23.5392, "grad_norm": 1.1475504636764526, "learning_rate": 1.323529411764706e-05, "loss": 0.5074, "step": 18390 }, { "epoch": 23.54048, "grad_norm": 1.1296319961547852, "learning_rate": 1.3233293317326931e-05, "loss": 0.5169, "step": 18391 }, { "epoch": 23.54176, "grad_norm": 1.1883400678634644, "learning_rate": 1.3231292517006805e-05, "loss": 0.4944, "step": 18392 }, { "epoch": 23.54304, "grad_norm": 1.1157552003860474, "learning_rate": 1.3229291716686677e-05, "loss": 0.4619, "step": 18393 }, { "epoch": 23.54432, "grad_norm": 1.1468461751937866, "learning_rate": 1.3227290916366547e-05, "loss": 0.5149, "step": 18394 }, { "epoch": 23.5456, "grad_norm": 1.0889732837677002, "learning_rate": 1.3225290116046419e-05, "loss": 0.4268, "step": 18395 }, { "epoch": 23.54688, "grad_norm": 1.142313838005066, "learning_rate": 1.3223289315726292e-05, "loss": 0.5009, "step": 18396 }, { "epoch": 23.54816, "grad_norm": 1.1910713911056519, "learning_rate": 1.3221288515406164e-05, "loss": 0.5089, "step": 18397 }, { "epoch": 23.54944, "grad_norm": 1.1199548244476318, "learning_rate": 1.3219287715086034e-05, "loss": 0.4684, "step": 18398 }, { "epoch": 23.55072, "grad_norm": 1.0918095111846924, "learning_rate": 1.3217286914765906e-05, "loss": 0.462, "step": 18399 }, { "epoch": 23.552, "grad_norm": 1.1307764053344727, "learning_rate": 1.321528611444578e-05, "loss": 0.4839, "step": 18400 }, { "epoch": 23.55328, "grad_norm": 1.1751610040664673, "learning_rate": 1.3213285314125651e-05, "loss": 0.4891, "step": 18401 }, { "epoch": 23.55456, "grad_norm": 1.1532877683639526, "learning_rate": 1.3211284513805522e-05, "loss": 0.4607, "step": 18402 }, { "epoch": 23.55584, "grad_norm": 1.1767176389694214, "learning_rate": 1.3209283713485395e-05, "loss": 0.5076, "step": 18403 }, { "epoch": 23.55712, "grad_norm": 1.2673394680023193, "learning_rate": 1.3207282913165267e-05, "loss": 0.5038, "step": 18404 }, { "epoch": 23.5584, "grad_norm": 1.1632843017578125, "learning_rate": 1.3205282112845139e-05, "loss": 0.5004, "step": 18405 }, { "epoch": 23.55968, "grad_norm": 1.1043983697891235, "learning_rate": 1.3203281312525009e-05, "loss": 0.4669, "step": 18406 }, { "epoch": 23.56096, "grad_norm": 1.1622287034988403, "learning_rate": 1.3201280512204883e-05, "loss": 0.4538, "step": 18407 }, { "epoch": 23.56224, "grad_norm": 1.1268742084503174, "learning_rate": 1.3199279711884754e-05, "loss": 0.4563, "step": 18408 }, { "epoch": 23.56352, "grad_norm": 1.1703455448150635, "learning_rate": 1.3197278911564626e-05, "loss": 0.5168, "step": 18409 }, { "epoch": 23.564799999999998, "grad_norm": 1.1342973709106445, "learning_rate": 1.31952781112445e-05, "loss": 0.505, "step": 18410 }, { "epoch": 23.56608, "grad_norm": 1.1047439575195312, "learning_rate": 1.319327731092437e-05, "loss": 0.4839, "step": 18411 }, { "epoch": 23.56736, "grad_norm": 1.1439480781555176, "learning_rate": 1.3191276510604242e-05, "loss": 0.462, "step": 18412 }, { "epoch": 23.56864, "grad_norm": 1.1610522270202637, "learning_rate": 1.3189275710284114e-05, "loss": 0.498, "step": 18413 }, { "epoch": 23.56992, "grad_norm": 1.1288456916809082, "learning_rate": 1.3187274909963987e-05, "loss": 0.4751, "step": 18414 }, { "epoch": 23.5712, "grad_norm": 1.204026460647583, "learning_rate": 1.3185274109643857e-05, "loss": 0.517, "step": 18415 }, { "epoch": 23.57248, "grad_norm": 1.1533409357070923, "learning_rate": 1.318327330932373e-05, "loss": 0.4879, "step": 18416 }, { "epoch": 23.57376, "grad_norm": 1.1280791759490967, "learning_rate": 1.3181272509003603e-05, "loss": 0.4578, "step": 18417 }, { "epoch": 23.57504, "grad_norm": 1.1419196128845215, "learning_rate": 1.3179271708683475e-05, "loss": 0.4827, "step": 18418 }, { "epoch": 23.57632, "grad_norm": 1.1568984985351562, "learning_rate": 1.3177270908363345e-05, "loss": 0.4784, "step": 18419 }, { "epoch": 23.5776, "grad_norm": 1.1242620944976807, "learning_rate": 1.3175270108043217e-05, "loss": 0.4755, "step": 18420 }, { "epoch": 23.57888, "grad_norm": 1.1650310754776, "learning_rate": 1.317326930772309e-05, "loss": 0.5058, "step": 18421 }, { "epoch": 23.58016, "grad_norm": 1.2178586721420288, "learning_rate": 1.3171268507402962e-05, "loss": 0.457, "step": 18422 }, { "epoch": 23.58144, "grad_norm": 1.105704665184021, "learning_rate": 1.3169267707082832e-05, "loss": 0.4333, "step": 18423 }, { "epoch": 23.58272, "grad_norm": 1.1392953395843506, "learning_rate": 1.3167266906762708e-05, "loss": 0.517, "step": 18424 }, { "epoch": 23.584, "grad_norm": 1.1252771615982056, "learning_rate": 1.3165266106442578e-05, "loss": 0.4966, "step": 18425 }, { "epoch": 23.58528, "grad_norm": 1.2022629976272583, "learning_rate": 1.316326530612245e-05, "loss": 0.4968, "step": 18426 }, { "epoch": 23.58656, "grad_norm": 1.1347490549087524, "learning_rate": 1.316126450580232e-05, "loss": 0.4637, "step": 18427 }, { "epoch": 23.58784, "grad_norm": 1.175969123840332, "learning_rate": 1.3159263705482195e-05, "loss": 0.4954, "step": 18428 }, { "epoch": 23.58912, "grad_norm": 1.1564298868179321, "learning_rate": 1.3157262905162065e-05, "loss": 0.5057, "step": 18429 }, { "epoch": 23.5904, "grad_norm": 1.1712132692337036, "learning_rate": 1.3155262104841937e-05, "loss": 0.49, "step": 18430 }, { "epoch": 23.59168, "grad_norm": 1.1701792478561401, "learning_rate": 1.315326130452181e-05, "loss": 0.485, "step": 18431 }, { "epoch": 23.59296, "grad_norm": 1.1181670427322388, "learning_rate": 1.3151260504201682e-05, "loss": 0.447, "step": 18432 }, { "epoch": 23.59424, "grad_norm": 1.1028802394866943, "learning_rate": 1.3149259703881553e-05, "loss": 0.4823, "step": 18433 }, { "epoch": 23.59552, "grad_norm": 1.191518783569336, "learning_rate": 1.3147258903561424e-05, "loss": 0.4561, "step": 18434 }, { "epoch": 23.5968, "grad_norm": 1.1290074586868286, "learning_rate": 1.3145258103241298e-05, "loss": 0.4843, "step": 18435 }, { "epoch": 23.59808, "grad_norm": 1.1779063940048218, "learning_rate": 1.314325730292117e-05, "loss": 0.4708, "step": 18436 }, { "epoch": 23.59936, "grad_norm": 1.1097599267959595, "learning_rate": 1.314125650260104e-05, "loss": 0.4734, "step": 18437 }, { "epoch": 23.60064, "grad_norm": 1.1385622024536133, "learning_rate": 1.3139255702280914e-05, "loss": 0.4822, "step": 18438 }, { "epoch": 23.60192, "grad_norm": 1.0946968793869019, "learning_rate": 1.3137254901960785e-05, "loss": 0.4713, "step": 18439 }, { "epoch": 23.6032, "grad_norm": 1.1536378860473633, "learning_rate": 1.3135254101640657e-05, "loss": 0.5073, "step": 18440 }, { "epoch": 23.60448, "grad_norm": 1.1673475503921509, "learning_rate": 1.3133253301320527e-05, "loss": 0.4681, "step": 18441 }, { "epoch": 23.60576, "grad_norm": 1.163110375404358, "learning_rate": 1.3131252501000401e-05, "loss": 0.5031, "step": 18442 }, { "epoch": 23.60704, "grad_norm": 1.131412386894226, "learning_rate": 1.3129251700680273e-05, "loss": 0.4597, "step": 18443 }, { "epoch": 23.60832, "grad_norm": 1.1555569171905518, "learning_rate": 1.3127250900360145e-05, "loss": 0.5379, "step": 18444 }, { "epoch": 23.6096, "grad_norm": 1.1479874849319458, "learning_rate": 1.3125250100040018e-05, "loss": 0.4431, "step": 18445 }, { "epoch": 23.61088, "grad_norm": 1.140120029449463, "learning_rate": 1.3123249299719888e-05, "loss": 0.4916, "step": 18446 }, { "epoch": 23.61216, "grad_norm": 1.1265473365783691, "learning_rate": 1.312124849939976e-05, "loss": 0.5152, "step": 18447 }, { "epoch": 23.61344, "grad_norm": 1.043741226196289, "learning_rate": 1.3119247699079632e-05, "loss": 0.4195, "step": 18448 }, { "epoch": 23.61472, "grad_norm": 1.1078736782073975, "learning_rate": 1.3117246898759506e-05, "loss": 0.4793, "step": 18449 }, { "epoch": 23.616, "grad_norm": 1.1717230081558228, "learning_rate": 1.3115246098439376e-05, "loss": 0.4681, "step": 18450 }, { "epoch": 23.61728, "grad_norm": 1.174607753753662, "learning_rate": 1.3113245298119248e-05, "loss": 0.4738, "step": 18451 }, { "epoch": 23.61856, "grad_norm": 1.1562086343765259, "learning_rate": 1.311124449779912e-05, "loss": 0.4716, "step": 18452 }, { "epoch": 23.61984, "grad_norm": 1.1882222890853882, "learning_rate": 1.3109243697478993e-05, "loss": 0.5002, "step": 18453 }, { "epoch": 23.62112, "grad_norm": 1.1770943403244019, "learning_rate": 1.3107242897158863e-05, "loss": 0.4787, "step": 18454 }, { "epoch": 23.6224, "grad_norm": 1.16677987575531, "learning_rate": 1.3105242096838735e-05, "loss": 0.494, "step": 18455 }, { "epoch": 23.62368, "grad_norm": 1.190652847290039, "learning_rate": 1.3103241296518609e-05, "loss": 0.4809, "step": 18456 }, { "epoch": 23.62496, "grad_norm": 1.1706172227859497, "learning_rate": 1.310124049619848e-05, "loss": 0.4633, "step": 18457 }, { "epoch": 23.62624, "grad_norm": 1.0963094234466553, "learning_rate": 1.309923969587835e-05, "loss": 0.4892, "step": 18458 }, { "epoch": 23.62752, "grad_norm": 1.1064554452896118, "learning_rate": 1.3097238895558223e-05, "loss": 0.4748, "step": 18459 }, { "epoch": 23.6288, "grad_norm": 1.1423083543777466, "learning_rate": 1.3095238095238096e-05, "loss": 0.4798, "step": 18460 }, { "epoch": 23.63008, "grad_norm": 1.1238229274749756, "learning_rate": 1.3093237294917968e-05, "loss": 0.4674, "step": 18461 }, { "epoch": 23.63136, "grad_norm": 1.1107577085494995, "learning_rate": 1.3091236494597838e-05, "loss": 0.438, "step": 18462 }, { "epoch": 23.63264, "grad_norm": 1.1175166368484497, "learning_rate": 1.3089235694277713e-05, "loss": 0.4601, "step": 18463 }, { "epoch": 23.63392, "grad_norm": 1.1670434474945068, "learning_rate": 1.3087234893957584e-05, "loss": 0.4494, "step": 18464 }, { "epoch": 23.6352, "grad_norm": 1.2020723819732666, "learning_rate": 1.3085234093637455e-05, "loss": 0.4967, "step": 18465 }, { "epoch": 23.63648, "grad_norm": 1.1246027946472168, "learning_rate": 1.3083233293317326e-05, "loss": 0.4723, "step": 18466 }, { "epoch": 23.63776, "grad_norm": 1.1464457511901855, "learning_rate": 1.30812324929972e-05, "loss": 0.5051, "step": 18467 }, { "epoch": 23.63904, "grad_norm": 1.152032732963562, "learning_rate": 1.3079231692677071e-05, "loss": 0.4749, "step": 18468 }, { "epoch": 23.64032, "grad_norm": 1.150923490524292, "learning_rate": 1.3077230892356943e-05, "loss": 0.4738, "step": 18469 }, { "epoch": 23.6416, "grad_norm": 1.1413908004760742, "learning_rate": 1.3075230092036816e-05, "loss": 0.4809, "step": 18470 }, { "epoch": 23.64288, "grad_norm": 1.130707025527954, "learning_rate": 1.3073229291716688e-05, "loss": 0.4826, "step": 18471 }, { "epoch": 23.64416, "grad_norm": 1.115024209022522, "learning_rate": 1.3071228491396558e-05, "loss": 0.4714, "step": 18472 }, { "epoch": 23.64544, "grad_norm": 1.1373069286346436, "learning_rate": 1.306922769107643e-05, "loss": 0.4832, "step": 18473 }, { "epoch": 23.64672, "grad_norm": 1.1076675653457642, "learning_rate": 1.3067226890756304e-05, "loss": 0.466, "step": 18474 }, { "epoch": 23.648, "grad_norm": 1.190407156944275, "learning_rate": 1.3065226090436176e-05, "loss": 0.5365, "step": 18475 }, { "epoch": 23.64928, "grad_norm": 1.0926904678344727, "learning_rate": 1.3063225290116046e-05, "loss": 0.5145, "step": 18476 }, { "epoch": 23.65056, "grad_norm": 1.066358208656311, "learning_rate": 1.306122448979592e-05, "loss": 0.4459, "step": 18477 }, { "epoch": 23.65184, "grad_norm": 1.129427194595337, "learning_rate": 1.3059223689475791e-05, "loss": 0.4508, "step": 18478 }, { "epoch": 23.65312, "grad_norm": 1.1428444385528564, "learning_rate": 1.3057222889155663e-05, "loss": 0.4887, "step": 18479 }, { "epoch": 23.6544, "grad_norm": 1.1522812843322754, "learning_rate": 1.3055222088835533e-05, "loss": 0.4857, "step": 18480 }, { "epoch": 23.65568, "grad_norm": 1.1594774723052979, "learning_rate": 1.3053221288515407e-05, "loss": 0.483, "step": 18481 }, { "epoch": 23.65696, "grad_norm": 1.1202539205551147, "learning_rate": 1.3051220488195279e-05, "loss": 0.4884, "step": 18482 }, { "epoch": 23.65824, "grad_norm": 1.184862732887268, "learning_rate": 1.304921968787515e-05, "loss": 0.5249, "step": 18483 }, { "epoch": 23.65952, "grad_norm": 1.2333064079284668, "learning_rate": 1.3047218887555024e-05, "loss": 0.5485, "step": 18484 }, { "epoch": 23.660800000000002, "grad_norm": 1.1727824211120605, "learning_rate": 1.3045218087234894e-05, "loss": 0.4687, "step": 18485 }, { "epoch": 23.66208, "grad_norm": 1.1470680236816406, "learning_rate": 1.3043217286914766e-05, "loss": 0.5179, "step": 18486 }, { "epoch": 23.66336, "grad_norm": 1.1859335899353027, "learning_rate": 1.3041216486594638e-05, "loss": 0.4971, "step": 18487 }, { "epoch": 23.66464, "grad_norm": 1.2277741432189941, "learning_rate": 1.3039215686274511e-05, "loss": 0.4887, "step": 18488 }, { "epoch": 23.66592, "grad_norm": 1.1842122077941895, "learning_rate": 1.3037214885954382e-05, "loss": 0.4908, "step": 18489 }, { "epoch": 23.6672, "grad_norm": 1.0600404739379883, "learning_rate": 1.3035214085634254e-05, "loss": 0.4354, "step": 18490 }, { "epoch": 23.66848, "grad_norm": 1.13839590549469, "learning_rate": 1.3033213285314127e-05, "loss": 0.4953, "step": 18491 }, { "epoch": 23.66976, "grad_norm": 1.1359751224517822, "learning_rate": 1.3031212484993999e-05, "loss": 0.4667, "step": 18492 }, { "epoch": 23.67104, "grad_norm": 1.168168067932129, "learning_rate": 1.3029211684673869e-05, "loss": 0.4623, "step": 18493 }, { "epoch": 23.67232, "grad_norm": 1.1284360885620117, "learning_rate": 1.3027210884353741e-05, "loss": 0.4694, "step": 18494 }, { "epoch": 23.6736, "grad_norm": 1.1159340143203735, "learning_rate": 1.3025210084033614e-05, "loss": 0.4493, "step": 18495 }, { "epoch": 23.67488, "grad_norm": 1.236001968383789, "learning_rate": 1.3023209283713486e-05, "loss": 0.508, "step": 18496 }, { "epoch": 23.67616, "grad_norm": 1.2399630546569824, "learning_rate": 1.3021208483393357e-05, "loss": 0.494, "step": 18497 }, { "epoch": 23.67744, "grad_norm": 1.181467890739441, "learning_rate": 1.3019207683073232e-05, "loss": 0.4886, "step": 18498 }, { "epoch": 23.67872, "grad_norm": 1.2008730173110962, "learning_rate": 1.3017206882753102e-05, "loss": 0.5143, "step": 18499 }, { "epoch": 23.68, "grad_norm": 1.1191836595535278, "learning_rate": 1.3015206082432974e-05, "loss": 0.4378, "step": 18500 }, { "epoch": 23.68128, "grad_norm": 1.0821129083633423, "learning_rate": 1.3013205282112844e-05, "loss": 0.4534, "step": 18501 }, { "epoch": 23.68256, "grad_norm": 1.1685720682144165, "learning_rate": 1.301120448179272e-05, "loss": 0.5053, "step": 18502 }, { "epoch": 23.68384, "grad_norm": 1.1220587491989136, "learning_rate": 1.300920368147259e-05, "loss": 0.5203, "step": 18503 }, { "epoch": 23.68512, "grad_norm": 1.1965205669403076, "learning_rate": 1.3007202881152461e-05, "loss": 0.535, "step": 18504 }, { "epoch": 23.6864, "grad_norm": 1.161190152168274, "learning_rate": 1.3005202080832335e-05, "loss": 0.4718, "step": 18505 }, { "epoch": 23.68768, "grad_norm": 1.1077537536621094, "learning_rate": 1.3003201280512207e-05, "loss": 0.4594, "step": 18506 }, { "epoch": 23.68896, "grad_norm": 1.136878490447998, "learning_rate": 1.3001200480192077e-05, "loss": 0.5092, "step": 18507 }, { "epoch": 23.69024, "grad_norm": 1.1334853172302246, "learning_rate": 1.2999199679871949e-05, "loss": 0.4772, "step": 18508 }, { "epoch": 23.69152, "grad_norm": 1.1531234979629517, "learning_rate": 1.2997198879551822e-05, "loss": 0.4892, "step": 18509 }, { "epoch": 23.6928, "grad_norm": 1.186574101448059, "learning_rate": 1.2995198079231694e-05, "loss": 0.5056, "step": 18510 }, { "epoch": 23.69408, "grad_norm": 1.0875732898712158, "learning_rate": 1.2993197278911564e-05, "loss": 0.458, "step": 18511 }, { "epoch": 23.69536, "grad_norm": 1.11167311668396, "learning_rate": 1.2991196478591436e-05, "loss": 0.4613, "step": 18512 }, { "epoch": 23.69664, "grad_norm": 1.0831806659698486, "learning_rate": 1.298919567827131e-05, "loss": 0.4726, "step": 18513 }, { "epoch": 23.69792, "grad_norm": 1.1323453187942505, "learning_rate": 1.2987194877951181e-05, "loss": 0.449, "step": 18514 }, { "epoch": 23.6992, "grad_norm": 1.2333152294158936, "learning_rate": 1.2985194077631052e-05, "loss": 0.5372, "step": 18515 }, { "epoch": 23.70048, "grad_norm": 1.1051863431930542, "learning_rate": 1.2983193277310927e-05, "loss": 0.4527, "step": 18516 }, { "epoch": 23.70176, "grad_norm": 1.1067372560501099, "learning_rate": 1.2981192476990797e-05, "loss": 0.4401, "step": 18517 }, { "epoch": 23.70304, "grad_norm": 1.1563383340835571, "learning_rate": 1.2979191676670669e-05, "loss": 0.4822, "step": 18518 }, { "epoch": 23.70432, "grad_norm": 1.1573394536972046, "learning_rate": 1.2977190876350539e-05, "loss": 0.4711, "step": 18519 }, { "epoch": 23.7056, "grad_norm": 1.1465578079223633, "learning_rate": 1.2975190076030414e-05, "loss": 0.4794, "step": 18520 }, { "epoch": 23.706879999999998, "grad_norm": 1.093738317489624, "learning_rate": 1.2973189275710284e-05, "loss": 0.4742, "step": 18521 }, { "epoch": 23.70816, "grad_norm": 1.1131172180175781, "learning_rate": 1.2971188475390156e-05, "loss": 0.5315, "step": 18522 }, { "epoch": 23.70944, "grad_norm": 1.1380599737167358, "learning_rate": 1.296918767507003e-05, "loss": 0.4672, "step": 18523 }, { "epoch": 23.71072, "grad_norm": 1.1121565103530884, "learning_rate": 1.2967186874749902e-05, "loss": 0.4578, "step": 18524 }, { "epoch": 23.712, "grad_norm": 1.1249797344207764, "learning_rate": 1.2965186074429772e-05, "loss": 0.4734, "step": 18525 }, { "epoch": 23.71328, "grad_norm": 1.141965389251709, "learning_rate": 1.2963185274109644e-05, "loss": 0.4883, "step": 18526 }, { "epoch": 23.71456, "grad_norm": 1.1544233560562134, "learning_rate": 1.2961184473789517e-05, "loss": 0.4956, "step": 18527 }, { "epoch": 23.71584, "grad_norm": 1.1456880569458008, "learning_rate": 1.2959183673469389e-05, "loss": 0.4913, "step": 18528 }, { "epoch": 23.71712, "grad_norm": 1.1444575786590576, "learning_rate": 1.295718287314926e-05, "loss": 0.4707, "step": 18529 }, { "epoch": 23.7184, "grad_norm": 1.050844669342041, "learning_rate": 1.2955182072829133e-05, "loss": 0.4341, "step": 18530 }, { "epoch": 23.71968, "grad_norm": 1.1426994800567627, "learning_rate": 1.2953181272509005e-05, "loss": 0.4679, "step": 18531 }, { "epoch": 23.72096, "grad_norm": 1.1515729427337646, "learning_rate": 1.2951180472188877e-05, "loss": 0.4746, "step": 18532 }, { "epoch": 23.72224, "grad_norm": 1.2546969652175903, "learning_rate": 1.2949179671868747e-05, "loss": 0.4886, "step": 18533 }, { "epoch": 23.72352, "grad_norm": 1.2079882621765137, "learning_rate": 1.294717887154862e-05, "loss": 0.4708, "step": 18534 }, { "epoch": 23.7248, "grad_norm": 1.2106804847717285, "learning_rate": 1.2945178071228492e-05, "loss": 0.5217, "step": 18535 }, { "epoch": 23.72608, "grad_norm": 1.2002469301223755, "learning_rate": 1.2943177270908364e-05, "loss": 0.4444, "step": 18536 }, { "epoch": 23.72736, "grad_norm": 1.2250186204910278, "learning_rate": 1.2941176470588238e-05, "loss": 0.5409, "step": 18537 }, { "epoch": 23.72864, "grad_norm": 1.1869670152664185, "learning_rate": 1.2939175670268108e-05, "loss": 0.4845, "step": 18538 }, { "epoch": 23.72992, "grad_norm": 1.2370437383651733, "learning_rate": 1.293717486994798e-05, "loss": 0.4883, "step": 18539 }, { "epoch": 23.7312, "grad_norm": 1.15911066532135, "learning_rate": 1.2935174069627851e-05, "loss": 0.4697, "step": 18540 }, { "epoch": 23.73248, "grad_norm": 1.1930956840515137, "learning_rate": 1.2933173269307725e-05, "loss": 0.4824, "step": 18541 }, { "epoch": 23.73376, "grad_norm": 1.1702487468719482, "learning_rate": 1.2931172468987595e-05, "loss": 0.4525, "step": 18542 }, { "epoch": 23.73504, "grad_norm": 1.1612924337387085, "learning_rate": 1.2929171668667467e-05, "loss": 0.5488, "step": 18543 }, { "epoch": 23.73632, "grad_norm": 1.1951484680175781, "learning_rate": 1.292717086834734e-05, "loss": 0.4683, "step": 18544 }, { "epoch": 23.7376, "grad_norm": 1.1368860006332397, "learning_rate": 1.2925170068027212e-05, "loss": 0.4735, "step": 18545 }, { "epoch": 23.73888, "grad_norm": 1.100229263305664, "learning_rate": 1.2923169267707083e-05, "loss": 0.4805, "step": 18546 }, { "epoch": 23.74016, "grad_norm": 1.1962560415267944, "learning_rate": 1.2921168467386954e-05, "loss": 0.488, "step": 18547 }, { "epoch": 23.74144, "grad_norm": 1.1619665622711182, "learning_rate": 1.2919167667066828e-05, "loss": 0.4893, "step": 18548 }, { "epoch": 23.74272, "grad_norm": 1.1612014770507812, "learning_rate": 1.29171668667467e-05, "loss": 0.4735, "step": 18549 }, { "epoch": 23.744, "grad_norm": 1.1319211721420288, "learning_rate": 1.291516606642657e-05, "loss": 0.4683, "step": 18550 }, { "epoch": 23.74528, "grad_norm": 1.1409507989883423, "learning_rate": 1.2913165266106445e-05, "loss": 0.4702, "step": 18551 }, { "epoch": 23.74656, "grad_norm": 1.1675480604171753, "learning_rate": 1.2911164465786315e-05, "loss": 0.4782, "step": 18552 }, { "epoch": 23.74784, "grad_norm": 1.1573346853256226, "learning_rate": 1.2909163665466187e-05, "loss": 0.5024, "step": 18553 }, { "epoch": 23.74912, "grad_norm": 1.1228611469268799, "learning_rate": 1.2907162865146057e-05, "loss": 0.4761, "step": 18554 }, { "epoch": 23.7504, "grad_norm": 1.0913078784942627, "learning_rate": 1.2905162064825933e-05, "loss": 0.4681, "step": 18555 }, { "epoch": 23.75168, "grad_norm": 1.1557594537734985, "learning_rate": 1.2903161264505803e-05, "loss": 0.4563, "step": 18556 }, { "epoch": 23.75296, "grad_norm": 1.1187933683395386, "learning_rate": 1.2901160464185675e-05, "loss": 0.4953, "step": 18557 }, { "epoch": 23.75424, "grad_norm": 1.18485426902771, "learning_rate": 1.2899159663865548e-05, "loss": 0.5024, "step": 18558 }, { "epoch": 23.75552, "grad_norm": 1.158681035041809, "learning_rate": 1.289715886354542e-05, "loss": 0.471, "step": 18559 }, { "epoch": 23.7568, "grad_norm": 1.1719639301300049, "learning_rate": 1.289515806322529e-05, "loss": 0.4878, "step": 18560 }, { "epoch": 23.75808, "grad_norm": 1.1346675157546997, "learning_rate": 1.2893157262905162e-05, "loss": 0.4737, "step": 18561 }, { "epoch": 23.75936, "grad_norm": 1.1762566566467285, "learning_rate": 1.2891156462585036e-05, "loss": 0.4824, "step": 18562 }, { "epoch": 23.76064, "grad_norm": 1.190079689025879, "learning_rate": 1.2889155662264908e-05, "loss": 0.4976, "step": 18563 }, { "epoch": 23.76192, "grad_norm": 1.1472417116165161, "learning_rate": 1.2887154861944778e-05, "loss": 0.4598, "step": 18564 }, { "epoch": 23.7632, "grad_norm": 1.1223433017730713, "learning_rate": 1.288515406162465e-05, "loss": 0.4866, "step": 18565 }, { "epoch": 23.76448, "grad_norm": 1.154817819595337, "learning_rate": 1.2883153261304523e-05, "loss": 0.4892, "step": 18566 }, { "epoch": 23.76576, "grad_norm": 1.1164178848266602, "learning_rate": 1.2881152460984395e-05, "loss": 0.5027, "step": 18567 }, { "epoch": 23.76704, "grad_norm": 1.171064019203186, "learning_rate": 1.2879151660664265e-05, "loss": 0.4635, "step": 18568 }, { "epoch": 23.76832, "grad_norm": 1.1405155658721924, "learning_rate": 1.2877150860344139e-05, "loss": 0.4721, "step": 18569 }, { "epoch": 23.7696, "grad_norm": 1.0519278049468994, "learning_rate": 1.287515006002401e-05, "loss": 0.4368, "step": 18570 }, { "epoch": 23.77088, "grad_norm": 1.1556754112243652, "learning_rate": 1.2873149259703882e-05, "loss": 0.5245, "step": 18571 }, { "epoch": 23.77216, "grad_norm": 1.1321344375610352, "learning_rate": 1.2871148459383753e-05, "loss": 0.4937, "step": 18572 }, { "epoch": 23.77344, "grad_norm": 1.14230477809906, "learning_rate": 1.2869147659063626e-05, "loss": 0.4789, "step": 18573 }, { "epoch": 23.77472, "grad_norm": 1.082296371459961, "learning_rate": 1.2867146858743498e-05, "loss": 0.4855, "step": 18574 }, { "epoch": 23.776, "grad_norm": 1.1363306045532227, "learning_rate": 1.286514605842337e-05, "loss": 0.4706, "step": 18575 }, { "epoch": 23.77728, "grad_norm": 1.180460810661316, "learning_rate": 1.2863145258103243e-05, "loss": 0.503, "step": 18576 }, { "epoch": 23.77856, "grad_norm": 1.1838196516036987, "learning_rate": 1.2861144457783114e-05, "loss": 0.5241, "step": 18577 }, { "epoch": 23.77984, "grad_norm": 1.1658860445022583, "learning_rate": 1.2859143657462985e-05, "loss": 0.4915, "step": 18578 }, { "epoch": 23.78112, "grad_norm": 1.1486824750900269, "learning_rate": 1.2857142857142857e-05, "loss": 0.4803, "step": 18579 }, { "epoch": 23.7824, "grad_norm": 1.0898503065109253, "learning_rate": 1.285514205682273e-05, "loss": 0.4444, "step": 18580 }, { "epoch": 23.78368, "grad_norm": 1.1599661111831665, "learning_rate": 1.2853141256502601e-05, "loss": 0.539, "step": 18581 }, { "epoch": 23.78496, "grad_norm": 1.151951551437378, "learning_rate": 1.2851140456182473e-05, "loss": 0.4741, "step": 18582 }, { "epoch": 23.78624, "grad_norm": 1.2248902320861816, "learning_rate": 1.2849139655862346e-05, "loss": 0.4911, "step": 18583 }, { "epoch": 23.78752, "grad_norm": 1.1011284589767456, "learning_rate": 1.2847138855542218e-05, "loss": 0.491, "step": 18584 }, { "epoch": 23.7888, "grad_norm": 1.1119115352630615, "learning_rate": 1.2845138055222088e-05, "loss": 0.47, "step": 18585 }, { "epoch": 23.79008, "grad_norm": 1.1536645889282227, "learning_rate": 1.284313725490196e-05, "loss": 0.4986, "step": 18586 }, { "epoch": 23.79136, "grad_norm": 1.1889305114746094, "learning_rate": 1.2841136454581834e-05, "loss": 0.4693, "step": 18587 }, { "epoch": 23.79264, "grad_norm": 1.0987606048583984, "learning_rate": 1.2839135654261706e-05, "loss": 0.4626, "step": 18588 }, { "epoch": 23.79392, "grad_norm": 1.1800764799118042, "learning_rate": 1.2837134853941576e-05, "loss": 0.5212, "step": 18589 }, { "epoch": 23.7952, "grad_norm": 1.1225427389144897, "learning_rate": 1.2835134053621451e-05, "loss": 0.4598, "step": 18590 }, { "epoch": 23.79648, "grad_norm": 1.1654258966445923, "learning_rate": 1.2833133253301321e-05, "loss": 0.491, "step": 18591 }, { "epoch": 23.79776, "grad_norm": 1.114014983177185, "learning_rate": 1.2831132452981193e-05, "loss": 0.4837, "step": 18592 }, { "epoch": 23.79904, "grad_norm": 1.117835521697998, "learning_rate": 1.2829131652661063e-05, "loss": 0.4643, "step": 18593 }, { "epoch": 23.80032, "grad_norm": 1.181115746498108, "learning_rate": 1.2827130852340938e-05, "loss": 0.5055, "step": 18594 }, { "epoch": 23.8016, "grad_norm": 1.112195611000061, "learning_rate": 1.2825130052020809e-05, "loss": 0.4422, "step": 18595 }, { "epoch": 23.802880000000002, "grad_norm": 1.1325654983520508, "learning_rate": 1.282312925170068e-05, "loss": 0.4486, "step": 18596 }, { "epoch": 23.80416, "grad_norm": 1.103108286857605, "learning_rate": 1.2821128451380554e-05, "loss": 0.4953, "step": 18597 }, { "epoch": 23.80544, "grad_norm": 1.0995981693267822, "learning_rate": 1.2819127651060426e-05, "loss": 0.4517, "step": 18598 }, { "epoch": 23.80672, "grad_norm": 1.0977530479431152, "learning_rate": 1.2817126850740296e-05, "loss": 0.4939, "step": 18599 }, { "epoch": 23.808, "grad_norm": 1.1470292806625366, "learning_rate": 1.2815126050420168e-05, "loss": 0.4998, "step": 18600 }, { "epoch": 23.80928, "grad_norm": 1.1226685047149658, "learning_rate": 1.2813125250100041e-05, "loss": 0.4792, "step": 18601 }, { "epoch": 23.81056, "grad_norm": 1.1221965551376343, "learning_rate": 1.2811124449779913e-05, "loss": 0.4889, "step": 18602 }, { "epoch": 23.81184, "grad_norm": 1.1775237321853638, "learning_rate": 1.2809123649459783e-05, "loss": 0.4841, "step": 18603 }, { "epoch": 23.81312, "grad_norm": 1.0999358892440796, "learning_rate": 1.2807122849139657e-05, "loss": 0.4482, "step": 18604 }, { "epoch": 23.8144, "grad_norm": 1.074433445930481, "learning_rate": 1.2805122048819529e-05, "loss": 0.4656, "step": 18605 }, { "epoch": 23.81568, "grad_norm": 1.0768699645996094, "learning_rate": 1.28031212484994e-05, "loss": 0.4976, "step": 18606 }, { "epoch": 23.81696, "grad_norm": 1.0591034889221191, "learning_rate": 1.2801120448179271e-05, "loss": 0.4466, "step": 18607 }, { "epoch": 23.81824, "grad_norm": 1.2163914442062378, "learning_rate": 1.2799119647859144e-05, "loss": 0.5735, "step": 18608 }, { "epoch": 23.81952, "grad_norm": 1.1105905771255493, "learning_rate": 1.2797118847539016e-05, "loss": 0.4582, "step": 18609 }, { "epoch": 23.8208, "grad_norm": 1.1055454015731812, "learning_rate": 1.2795118047218888e-05, "loss": 0.4706, "step": 18610 }, { "epoch": 23.82208, "grad_norm": 1.1523383855819702, "learning_rate": 1.2793117246898762e-05, "loss": 0.4749, "step": 18611 }, { "epoch": 23.82336, "grad_norm": 1.1723835468292236, "learning_rate": 1.2791116446578632e-05, "loss": 0.4854, "step": 18612 }, { "epoch": 23.82464, "grad_norm": 1.1401787996292114, "learning_rate": 1.2789115646258504e-05, "loss": 0.5144, "step": 18613 }, { "epoch": 23.82592, "grad_norm": 1.1540292501449585, "learning_rate": 1.2787114845938376e-05, "loss": 0.4866, "step": 18614 }, { "epoch": 23.8272, "grad_norm": 1.1869522333145142, "learning_rate": 1.2785114045618249e-05, "loss": 0.4653, "step": 18615 }, { "epoch": 23.82848, "grad_norm": 1.1412826776504517, "learning_rate": 1.278311324529812e-05, "loss": 0.4968, "step": 18616 }, { "epoch": 23.82976, "grad_norm": 1.15664803981781, "learning_rate": 1.2781112444977991e-05, "loss": 0.4903, "step": 18617 }, { "epoch": 23.83104, "grad_norm": 1.1142537593841553, "learning_rate": 1.2779111644657865e-05, "loss": 0.4709, "step": 18618 }, { "epoch": 23.83232, "grad_norm": 1.0486506223678589, "learning_rate": 1.2777110844337737e-05, "loss": 0.4785, "step": 18619 }, { "epoch": 23.8336, "grad_norm": 1.0911049842834473, "learning_rate": 1.2775110044017607e-05, "loss": 0.4762, "step": 18620 }, { "epoch": 23.83488, "grad_norm": 1.146394968032837, "learning_rate": 1.2773109243697479e-05, "loss": 0.521, "step": 18621 }, { "epoch": 23.83616, "grad_norm": 1.053382396697998, "learning_rate": 1.2771108443377352e-05, "loss": 0.4642, "step": 18622 }, { "epoch": 23.83744, "grad_norm": 1.1303876638412476, "learning_rate": 1.2769107643057224e-05, "loss": 0.4674, "step": 18623 }, { "epoch": 23.83872, "grad_norm": 1.1761562824249268, "learning_rate": 1.2767106842737094e-05, "loss": 0.5115, "step": 18624 }, { "epoch": 23.84, "grad_norm": 1.120778203010559, "learning_rate": 1.2765106042416966e-05, "loss": 0.4848, "step": 18625 }, { "epoch": 23.84128, "grad_norm": 1.110851764678955, "learning_rate": 1.276310524209684e-05, "loss": 0.4846, "step": 18626 }, { "epoch": 23.84256, "grad_norm": 1.1596050262451172, "learning_rate": 1.2761104441776711e-05, "loss": 0.4922, "step": 18627 }, { "epoch": 23.84384, "grad_norm": 1.1609506607055664, "learning_rate": 1.2759103641456582e-05, "loss": 0.5059, "step": 18628 }, { "epoch": 23.84512, "grad_norm": 1.1542946100234985, "learning_rate": 1.2757102841136457e-05, "loss": 0.5106, "step": 18629 }, { "epoch": 23.8464, "grad_norm": 1.1738747358322144, "learning_rate": 1.2755102040816327e-05, "loss": 0.4977, "step": 18630 }, { "epoch": 23.84768, "grad_norm": 1.1270424127578735, "learning_rate": 1.2753101240496199e-05, "loss": 0.4735, "step": 18631 }, { "epoch": 23.84896, "grad_norm": 1.1536674499511719, "learning_rate": 1.2751100440176069e-05, "loss": 0.506, "step": 18632 }, { "epoch": 23.85024, "grad_norm": 1.1659085750579834, "learning_rate": 1.2749099639855944e-05, "loss": 0.5192, "step": 18633 }, { "epoch": 23.85152, "grad_norm": 1.1134508848190308, "learning_rate": 1.2747098839535814e-05, "loss": 0.4403, "step": 18634 }, { "epoch": 23.8528, "grad_norm": 1.1781237125396729, "learning_rate": 1.2745098039215686e-05, "loss": 0.4842, "step": 18635 }, { "epoch": 23.85408, "grad_norm": 1.1390173435211182, "learning_rate": 1.274309723889556e-05, "loss": 0.4649, "step": 18636 }, { "epoch": 23.85536, "grad_norm": 1.1824352741241455, "learning_rate": 1.2741096438575432e-05, "loss": 0.4635, "step": 18637 }, { "epoch": 23.85664, "grad_norm": 1.1931419372558594, "learning_rate": 1.2739095638255302e-05, "loss": 0.5115, "step": 18638 }, { "epoch": 23.85792, "grad_norm": 1.082920789718628, "learning_rate": 1.2737094837935174e-05, "loss": 0.4455, "step": 18639 }, { "epoch": 23.8592, "grad_norm": 1.1064939498901367, "learning_rate": 1.2735094037615047e-05, "loss": 0.4162, "step": 18640 }, { "epoch": 23.86048, "grad_norm": 1.1889894008636475, "learning_rate": 1.2733093237294919e-05, "loss": 0.4835, "step": 18641 }, { "epoch": 23.86176, "grad_norm": 1.2531040906906128, "learning_rate": 1.273109243697479e-05, "loss": 0.5775, "step": 18642 }, { "epoch": 23.86304, "grad_norm": 1.176379919052124, "learning_rate": 1.2729091636654663e-05, "loss": 0.5473, "step": 18643 }, { "epoch": 23.86432, "grad_norm": 1.1142444610595703, "learning_rate": 1.2727090836334535e-05, "loss": 0.4867, "step": 18644 }, { "epoch": 23.8656, "grad_norm": 1.1500768661499023, "learning_rate": 1.2725090036014407e-05, "loss": 0.4787, "step": 18645 }, { "epoch": 23.86688, "grad_norm": 1.1695806980133057, "learning_rate": 1.2723089235694277e-05, "loss": 0.4926, "step": 18646 }, { "epoch": 23.86816, "grad_norm": 1.1269073486328125, "learning_rate": 1.272108843537415e-05, "loss": 0.4831, "step": 18647 }, { "epoch": 23.86944, "grad_norm": 1.213214635848999, "learning_rate": 1.2719087635054022e-05, "loss": 0.5235, "step": 18648 }, { "epoch": 23.87072, "grad_norm": 1.134529709815979, "learning_rate": 1.2717086834733894e-05, "loss": 0.5038, "step": 18649 }, { "epoch": 23.872, "grad_norm": 1.142578363418579, "learning_rate": 1.2715086034413768e-05, "loss": 0.4529, "step": 18650 }, { "epoch": 23.87328, "grad_norm": 1.2363348007202148, "learning_rate": 1.2713085234093638e-05, "loss": 0.5431, "step": 18651 }, { "epoch": 23.87456, "grad_norm": 1.0975717306137085, "learning_rate": 1.271108443377351e-05, "loss": 0.4614, "step": 18652 }, { "epoch": 23.87584, "grad_norm": 1.1565914154052734, "learning_rate": 1.2709083633453381e-05, "loss": 0.5314, "step": 18653 }, { "epoch": 23.87712, "grad_norm": 1.1390055418014526, "learning_rate": 1.2707082833133255e-05, "loss": 0.4763, "step": 18654 }, { "epoch": 23.8784, "grad_norm": 1.120939016342163, "learning_rate": 1.2705082032813125e-05, "loss": 0.4826, "step": 18655 }, { "epoch": 23.87968, "grad_norm": 1.1649311780929565, "learning_rate": 1.2703081232492997e-05, "loss": 0.5242, "step": 18656 }, { "epoch": 23.88096, "grad_norm": 1.1487033367156982, "learning_rate": 1.270108043217287e-05, "loss": 0.4823, "step": 18657 }, { "epoch": 23.88224, "grad_norm": 1.161218523979187, "learning_rate": 1.2699079631852742e-05, "loss": 0.4973, "step": 18658 }, { "epoch": 23.88352, "grad_norm": 1.1504417657852173, "learning_rate": 1.2697078831532613e-05, "loss": 0.4817, "step": 18659 }, { "epoch": 23.8848, "grad_norm": 1.204056978225708, "learning_rate": 1.2695078031212484e-05, "loss": 0.531, "step": 18660 }, { "epoch": 23.88608, "grad_norm": 1.180108666419983, "learning_rate": 1.2693077230892358e-05, "loss": 0.4767, "step": 18661 }, { "epoch": 23.88736, "grad_norm": 1.0854483842849731, "learning_rate": 1.269107643057223e-05, "loss": 0.4293, "step": 18662 }, { "epoch": 23.88864, "grad_norm": 1.11992609500885, "learning_rate": 1.26890756302521e-05, "loss": 0.4731, "step": 18663 }, { "epoch": 23.88992, "grad_norm": 1.1562358140945435, "learning_rate": 1.2687074829931975e-05, "loss": 0.5066, "step": 18664 }, { "epoch": 23.8912, "grad_norm": 1.1416101455688477, "learning_rate": 1.2685074029611845e-05, "loss": 0.517, "step": 18665 }, { "epoch": 23.89248, "grad_norm": 1.144882082939148, "learning_rate": 1.2683073229291717e-05, "loss": 0.4406, "step": 18666 }, { "epoch": 23.89376, "grad_norm": 1.1081516742706299, "learning_rate": 1.2681072428971587e-05, "loss": 0.4549, "step": 18667 }, { "epoch": 23.89504, "grad_norm": 1.1630297899246216, "learning_rate": 1.2679071628651463e-05, "loss": 0.4835, "step": 18668 }, { "epoch": 23.89632, "grad_norm": 1.1396640539169312, "learning_rate": 1.2677070828331333e-05, "loss": 0.4788, "step": 18669 }, { "epoch": 23.8976, "grad_norm": 1.2122644186019897, "learning_rate": 1.2675070028011205e-05, "loss": 0.5103, "step": 18670 }, { "epoch": 23.89888, "grad_norm": 1.224592924118042, "learning_rate": 1.2673069227691078e-05, "loss": 0.5471, "step": 18671 }, { "epoch": 23.90016, "grad_norm": 1.184139370918274, "learning_rate": 1.267106842737095e-05, "loss": 0.5388, "step": 18672 }, { "epoch": 23.90144, "grad_norm": 1.2130299806594849, "learning_rate": 1.266906762705082e-05, "loss": 0.5117, "step": 18673 }, { "epoch": 23.90272, "grad_norm": 1.1781811714172363, "learning_rate": 1.2667066826730692e-05, "loss": 0.4856, "step": 18674 }, { "epoch": 23.904, "grad_norm": 1.1690351963043213, "learning_rate": 1.2665066026410566e-05, "loss": 0.4863, "step": 18675 }, { "epoch": 23.90528, "grad_norm": 1.0583910942077637, "learning_rate": 1.2663065226090437e-05, "loss": 0.4572, "step": 18676 }, { "epoch": 23.90656, "grad_norm": 1.0969510078430176, "learning_rate": 1.2661064425770308e-05, "loss": 0.4255, "step": 18677 }, { "epoch": 23.90784, "grad_norm": 1.1258211135864258, "learning_rate": 1.265906362545018e-05, "loss": 0.4855, "step": 18678 }, { "epoch": 23.90912, "grad_norm": 1.175527572631836, "learning_rate": 1.2657062825130053e-05, "loss": 0.5177, "step": 18679 }, { "epoch": 23.9104, "grad_norm": 1.1303937435150146, "learning_rate": 1.2655062024809925e-05, "loss": 0.4979, "step": 18680 }, { "epoch": 23.91168, "grad_norm": 1.0970633029937744, "learning_rate": 1.2653061224489795e-05, "loss": 0.4648, "step": 18681 }, { "epoch": 23.912959999999998, "grad_norm": 1.12120521068573, "learning_rate": 1.265106042416967e-05, "loss": 0.5126, "step": 18682 }, { "epoch": 23.91424, "grad_norm": 1.1222971677780151, "learning_rate": 1.264905962384954e-05, "loss": 0.4831, "step": 18683 }, { "epoch": 23.91552, "grad_norm": 1.146567702293396, "learning_rate": 1.2647058823529412e-05, "loss": 0.4479, "step": 18684 }, { "epoch": 23.9168, "grad_norm": 1.0587197542190552, "learning_rate": 1.2645058023209283e-05, "loss": 0.4675, "step": 18685 }, { "epoch": 23.91808, "grad_norm": 1.0969828367233276, "learning_rate": 1.2643057222889158e-05, "loss": 0.5139, "step": 18686 }, { "epoch": 23.91936, "grad_norm": 1.1403480768203735, "learning_rate": 1.2641056422569028e-05, "loss": 0.5023, "step": 18687 }, { "epoch": 23.92064, "grad_norm": 1.137335181236267, "learning_rate": 1.26390556222489e-05, "loss": 0.4405, "step": 18688 }, { "epoch": 23.92192, "grad_norm": 1.1394611597061157, "learning_rate": 1.2637054821928773e-05, "loss": 0.5089, "step": 18689 }, { "epoch": 23.9232, "grad_norm": 1.1698044538497925, "learning_rate": 1.2635054021608645e-05, "loss": 0.436, "step": 18690 }, { "epoch": 23.92448, "grad_norm": 1.1128085851669312, "learning_rate": 1.2633053221288515e-05, "loss": 0.4677, "step": 18691 }, { "epoch": 23.92576, "grad_norm": 1.1220570802688599, "learning_rate": 1.2631052420968387e-05, "loss": 0.4621, "step": 18692 }, { "epoch": 23.92704, "grad_norm": 1.0818321704864502, "learning_rate": 1.262905162064826e-05, "loss": 0.4531, "step": 18693 }, { "epoch": 23.92832, "grad_norm": 1.1508997678756714, "learning_rate": 1.2627050820328133e-05, "loss": 0.4856, "step": 18694 }, { "epoch": 23.9296, "grad_norm": 1.146302580833435, "learning_rate": 1.2625050020008003e-05, "loss": 0.4544, "step": 18695 }, { "epoch": 23.93088, "grad_norm": 1.1530104875564575, "learning_rate": 1.2623049219687876e-05, "loss": 0.4572, "step": 18696 }, { "epoch": 23.93216, "grad_norm": 1.2042316198349, "learning_rate": 1.2621048419367748e-05, "loss": 0.521, "step": 18697 }, { "epoch": 23.93344, "grad_norm": 1.1437196731567383, "learning_rate": 1.261904761904762e-05, "loss": 0.4874, "step": 18698 }, { "epoch": 23.93472, "grad_norm": 1.0899137258529663, "learning_rate": 1.261704681872749e-05, "loss": 0.4506, "step": 18699 }, { "epoch": 23.936, "grad_norm": 1.153517723083496, "learning_rate": 1.2615046018407364e-05, "loss": 0.4816, "step": 18700 }, { "epoch": 23.93728, "grad_norm": 1.1165791749954224, "learning_rate": 1.2613045218087236e-05, "loss": 0.4971, "step": 18701 }, { "epoch": 23.93856, "grad_norm": 1.0416091680526733, "learning_rate": 1.2611044417767107e-05, "loss": 0.4636, "step": 18702 }, { "epoch": 23.93984, "grad_norm": 1.1278401613235474, "learning_rate": 1.2609043617446981e-05, "loss": 0.4784, "step": 18703 }, { "epoch": 23.94112, "grad_norm": 1.0824394226074219, "learning_rate": 1.2607042817126851e-05, "loss": 0.4231, "step": 18704 }, { "epoch": 23.9424, "grad_norm": 1.1062698364257812, "learning_rate": 1.2605042016806723e-05, "loss": 0.4636, "step": 18705 }, { "epoch": 23.94368, "grad_norm": 1.1416337490081787, "learning_rate": 1.2603041216486595e-05, "loss": 0.4995, "step": 18706 }, { "epoch": 23.944960000000002, "grad_norm": 1.0663871765136719, "learning_rate": 1.2601040416166468e-05, "loss": 0.4747, "step": 18707 }, { "epoch": 23.94624, "grad_norm": 1.2235000133514404, "learning_rate": 1.2599039615846339e-05, "loss": 0.4992, "step": 18708 }, { "epoch": 23.94752, "grad_norm": 1.0971728563308716, "learning_rate": 1.259703881552621e-05, "loss": 0.4431, "step": 18709 }, { "epoch": 23.9488, "grad_norm": 1.1560825109481812, "learning_rate": 1.2595038015206084e-05, "loss": 0.4653, "step": 18710 }, { "epoch": 23.95008, "grad_norm": 1.0700130462646484, "learning_rate": 1.2593037214885956e-05, "loss": 0.4259, "step": 18711 }, { "epoch": 23.95136, "grad_norm": 1.120007038116455, "learning_rate": 1.2591036414565826e-05, "loss": 0.5081, "step": 18712 }, { "epoch": 23.95264, "grad_norm": 1.1066792011260986, "learning_rate": 1.2589035614245698e-05, "loss": 0.4736, "step": 18713 }, { "epoch": 23.95392, "grad_norm": 1.1956183910369873, "learning_rate": 1.2587034813925571e-05, "loss": 0.5158, "step": 18714 }, { "epoch": 23.9552, "grad_norm": 1.1309118270874023, "learning_rate": 1.2585034013605443e-05, "loss": 0.459, "step": 18715 }, { "epoch": 23.95648, "grad_norm": 1.1367197036743164, "learning_rate": 1.2583033213285313e-05, "loss": 0.5102, "step": 18716 }, { "epoch": 23.95776, "grad_norm": 1.1677851676940918, "learning_rate": 1.2581032412965189e-05, "loss": 0.4909, "step": 18717 }, { "epoch": 23.95904, "grad_norm": 1.1207268238067627, "learning_rate": 1.2579031612645059e-05, "loss": 0.5059, "step": 18718 }, { "epoch": 23.96032, "grad_norm": 1.18141770362854, "learning_rate": 1.257703081232493e-05, "loss": 0.4715, "step": 18719 }, { "epoch": 23.9616, "grad_norm": 1.203596591949463, "learning_rate": 1.2575030012004801e-05, "loss": 0.5205, "step": 18720 }, { "epoch": 23.96288, "grad_norm": 1.124710202217102, "learning_rate": 1.2573029211684676e-05, "loss": 0.478, "step": 18721 }, { "epoch": 23.96416, "grad_norm": 1.1432015895843506, "learning_rate": 1.2571028411364546e-05, "loss": 0.4859, "step": 18722 }, { "epoch": 23.96544, "grad_norm": 1.1251646280288696, "learning_rate": 1.2569027611044418e-05, "loss": 0.4917, "step": 18723 }, { "epoch": 23.96672, "grad_norm": 1.1594064235687256, "learning_rate": 1.2567026810724292e-05, "loss": 0.4919, "step": 18724 }, { "epoch": 23.968, "grad_norm": 1.253121256828308, "learning_rate": 1.2565026010404164e-05, "loss": 0.467, "step": 18725 }, { "epoch": 23.96928, "grad_norm": 1.188087821006775, "learning_rate": 1.2563025210084034e-05, "loss": 0.5015, "step": 18726 }, { "epoch": 23.97056, "grad_norm": 1.1531556844711304, "learning_rate": 1.2561024409763906e-05, "loss": 0.4871, "step": 18727 }, { "epoch": 23.97184, "grad_norm": 1.1422969102859497, "learning_rate": 1.2559023609443779e-05, "loss": 0.4797, "step": 18728 }, { "epoch": 23.97312, "grad_norm": 1.1510505676269531, "learning_rate": 1.2557022809123651e-05, "loss": 0.4865, "step": 18729 }, { "epoch": 23.9744, "grad_norm": 1.1790677309036255, "learning_rate": 1.2555022008803521e-05, "loss": 0.5234, "step": 18730 }, { "epoch": 23.97568, "grad_norm": 1.141440987586975, "learning_rate": 1.2553021208483393e-05, "loss": 0.4965, "step": 18731 }, { "epoch": 23.97696, "grad_norm": 1.1835546493530273, "learning_rate": 1.2551020408163267e-05, "loss": 0.4611, "step": 18732 }, { "epoch": 23.97824, "grad_norm": 1.1586164236068726, "learning_rate": 1.2549019607843138e-05, "loss": 0.5029, "step": 18733 }, { "epoch": 23.97952, "grad_norm": 1.1706522703170776, "learning_rate": 1.2547018807523009e-05, "loss": 0.4507, "step": 18734 }, { "epoch": 23.9808, "grad_norm": 1.1484572887420654, "learning_rate": 1.2545018007202882e-05, "loss": 0.5009, "step": 18735 }, { "epoch": 23.98208, "grad_norm": 1.1552718877792358, "learning_rate": 1.2543017206882754e-05, "loss": 0.4575, "step": 18736 }, { "epoch": 23.98336, "grad_norm": 1.15525221824646, "learning_rate": 1.2541016406562626e-05, "loss": 0.5026, "step": 18737 }, { "epoch": 23.98464, "grad_norm": 1.107552170753479, "learning_rate": 1.2539015606242496e-05, "loss": 0.4756, "step": 18738 }, { "epoch": 23.98592, "grad_norm": 1.2044583559036255, "learning_rate": 1.253701480592237e-05, "loss": 0.4688, "step": 18739 }, { "epoch": 23.9872, "grad_norm": 1.1863468885421753, "learning_rate": 1.2535014005602241e-05, "loss": 0.4841, "step": 18740 }, { "epoch": 23.98848, "grad_norm": 1.1284416913986206, "learning_rate": 1.2533013205282113e-05, "loss": 0.4416, "step": 18741 }, { "epoch": 23.98976, "grad_norm": 1.1152154207229614, "learning_rate": 1.2531012404961987e-05, "loss": 0.4756, "step": 18742 }, { "epoch": 23.99104, "grad_norm": 1.1176108121871948, "learning_rate": 1.2529011604641857e-05, "loss": 0.449, "step": 18743 }, { "epoch": 23.99232, "grad_norm": 1.2353702783584595, "learning_rate": 1.2527010804321729e-05, "loss": 0.513, "step": 18744 }, { "epoch": 23.9936, "grad_norm": 1.1160780191421509, "learning_rate": 1.25250100040016e-05, "loss": 0.4815, "step": 18745 }, { "epoch": 23.99488, "grad_norm": 1.2114685773849487, "learning_rate": 1.2523009203681474e-05, "loss": 0.5117, "step": 18746 }, { "epoch": 23.99616, "grad_norm": 1.226145625114441, "learning_rate": 1.2521008403361344e-05, "loss": 0.5135, "step": 18747 }, { "epoch": 23.99744, "grad_norm": 1.1571670770645142, "learning_rate": 1.2519007603041216e-05, "loss": 0.4824, "step": 18748 }, { "epoch": 23.99872, "grad_norm": 1.0984230041503906, "learning_rate": 1.251700680272109e-05, "loss": 0.4727, "step": 18749 }, { "epoch": 24.0, "grad_norm": 2.419872283935547, "learning_rate": 1.2515006002400962e-05, "loss": 0.8476, "step": 18750 }, { "epoch": 24.00128, "grad_norm": 1.1103284358978271, "learning_rate": 1.2513005202080832e-05, "loss": 0.5034, "step": 18751 }, { "epoch": 24.00256, "grad_norm": 1.077684760093689, "learning_rate": 1.2511004401760704e-05, "loss": 0.4662, "step": 18752 }, { "epoch": 24.00384, "grad_norm": 1.0543673038482666, "learning_rate": 1.2509003601440577e-05, "loss": 0.4556, "step": 18753 }, { "epoch": 24.00512, "grad_norm": 1.18900728225708, "learning_rate": 1.2507002801120449e-05, "loss": 0.5031, "step": 18754 }, { "epoch": 24.0064, "grad_norm": 1.0953742265701294, "learning_rate": 1.250500200080032e-05, "loss": 0.4672, "step": 18755 }, { "epoch": 24.00768, "grad_norm": 1.1897867918014526, "learning_rate": 1.2503001200480195e-05, "loss": 0.4988, "step": 18756 }, { "epoch": 24.00896, "grad_norm": 1.134912371635437, "learning_rate": 1.2501000400160065e-05, "loss": 0.5086, "step": 18757 }, { "epoch": 24.01024, "grad_norm": 1.1343998908996582, "learning_rate": 1.2498999599839937e-05, "loss": 0.4737, "step": 18758 }, { "epoch": 24.01152, "grad_norm": 1.0810625553131104, "learning_rate": 1.2496998799519808e-05, "loss": 0.4678, "step": 18759 }, { "epoch": 24.0128, "grad_norm": 1.1055142879486084, "learning_rate": 1.249499799919968e-05, "loss": 0.459, "step": 18760 }, { "epoch": 24.01408, "grad_norm": 1.1549721956253052, "learning_rate": 1.2492997198879552e-05, "loss": 0.4943, "step": 18761 }, { "epoch": 24.01536, "grad_norm": 1.121549129486084, "learning_rate": 1.2490996398559426e-05, "loss": 0.4437, "step": 18762 }, { "epoch": 24.01664, "grad_norm": 1.0666358470916748, "learning_rate": 1.2488995598239296e-05, "loss": 0.4201, "step": 18763 }, { "epoch": 24.01792, "grad_norm": 1.134900689125061, "learning_rate": 1.248699479791917e-05, "loss": 0.4853, "step": 18764 }, { "epoch": 24.0192, "grad_norm": 1.1466809511184692, "learning_rate": 1.248499399759904e-05, "loss": 0.4803, "step": 18765 }, { "epoch": 24.02048, "grad_norm": 1.1177642345428467, "learning_rate": 1.2482993197278913e-05, "loss": 0.4439, "step": 18766 }, { "epoch": 24.02176, "grad_norm": 1.1064014434814453, "learning_rate": 1.2480992396958783e-05, "loss": 0.4562, "step": 18767 }, { "epoch": 24.02304, "grad_norm": 1.103027105331421, "learning_rate": 1.2478991596638657e-05, "loss": 0.4495, "step": 18768 }, { "epoch": 24.02432, "grad_norm": 1.165516972541809, "learning_rate": 1.2476990796318529e-05, "loss": 0.4858, "step": 18769 }, { "epoch": 24.0256, "grad_norm": 1.037825107574463, "learning_rate": 1.24749899959984e-05, "loss": 0.4153, "step": 18770 }, { "epoch": 24.02688, "grad_norm": 1.0688804388046265, "learning_rate": 1.2472989195678272e-05, "loss": 0.4368, "step": 18771 }, { "epoch": 24.02816, "grad_norm": 1.1697540283203125, "learning_rate": 1.2470988395358144e-05, "loss": 0.4791, "step": 18772 }, { "epoch": 24.02944, "grad_norm": 1.1564234495162964, "learning_rate": 1.2468987595038016e-05, "loss": 0.4714, "step": 18773 }, { "epoch": 24.03072, "grad_norm": 1.1580915451049805, "learning_rate": 1.2466986794717888e-05, "loss": 0.4786, "step": 18774 }, { "epoch": 24.032, "grad_norm": 1.172911286354065, "learning_rate": 1.246498599439776e-05, "loss": 0.4536, "step": 18775 }, { "epoch": 24.03328, "grad_norm": 1.1058646440505981, "learning_rate": 1.2462985194077632e-05, "loss": 0.436, "step": 18776 }, { "epoch": 24.03456, "grad_norm": 1.1083683967590332, "learning_rate": 1.2460984393757503e-05, "loss": 0.4947, "step": 18777 }, { "epoch": 24.03584, "grad_norm": 1.1440143585205078, "learning_rate": 1.2458983593437375e-05, "loss": 0.4808, "step": 18778 }, { "epoch": 24.03712, "grad_norm": 1.11277174949646, "learning_rate": 1.2456982793117247e-05, "loss": 0.4342, "step": 18779 }, { "epoch": 24.0384, "grad_norm": 1.054457426071167, "learning_rate": 1.2454981992797119e-05, "loss": 0.4136, "step": 18780 }, { "epoch": 24.03968, "grad_norm": 1.1549383401870728, "learning_rate": 1.2452981192476991e-05, "loss": 0.4787, "step": 18781 }, { "epoch": 24.04096, "grad_norm": 1.158250093460083, "learning_rate": 1.2450980392156863e-05, "loss": 0.4401, "step": 18782 }, { "epoch": 24.04224, "grad_norm": 1.170989990234375, "learning_rate": 1.2448979591836735e-05, "loss": 0.4973, "step": 18783 }, { "epoch": 24.04352, "grad_norm": 1.1259979009628296, "learning_rate": 1.2446978791516606e-05, "loss": 0.4472, "step": 18784 }, { "epoch": 24.0448, "grad_norm": 1.147057056427002, "learning_rate": 1.244497799119648e-05, "loss": 0.5124, "step": 18785 }, { "epoch": 24.04608, "grad_norm": 1.2855243682861328, "learning_rate": 1.244297719087635e-05, "loss": 0.543, "step": 18786 }, { "epoch": 24.04736, "grad_norm": 1.1440112590789795, "learning_rate": 1.2440976390556224e-05, "loss": 0.4572, "step": 18787 }, { "epoch": 24.04864, "grad_norm": 1.1109403371810913, "learning_rate": 1.2438975590236094e-05, "loss": 0.4864, "step": 18788 }, { "epoch": 24.04992, "grad_norm": 1.129292368888855, "learning_rate": 1.2436974789915967e-05, "loss": 0.4461, "step": 18789 }, { "epoch": 24.0512, "grad_norm": 1.091012716293335, "learning_rate": 1.2434973989595838e-05, "loss": 0.4387, "step": 18790 }, { "epoch": 24.05248, "grad_norm": 1.1717545986175537, "learning_rate": 1.2432973189275711e-05, "loss": 0.461, "step": 18791 }, { "epoch": 24.05376, "grad_norm": 1.1226803064346313, "learning_rate": 1.2430972388955583e-05, "loss": 0.4271, "step": 18792 }, { "epoch": 24.05504, "grad_norm": 1.1514590978622437, "learning_rate": 1.2428971588635455e-05, "loss": 0.4739, "step": 18793 }, { "epoch": 24.05632, "grad_norm": 1.157591700553894, "learning_rate": 1.2426970788315327e-05, "loss": 0.4825, "step": 18794 }, { "epoch": 24.0576, "grad_norm": 1.17417311668396, "learning_rate": 1.2424969987995199e-05, "loss": 0.5034, "step": 18795 }, { "epoch": 24.05888, "grad_norm": 1.0604926347732544, "learning_rate": 1.242296918767507e-05, "loss": 0.451, "step": 18796 }, { "epoch": 24.06016, "grad_norm": 1.0973352193832397, "learning_rate": 1.2420968387354942e-05, "loss": 0.4632, "step": 18797 }, { "epoch": 24.06144, "grad_norm": 1.1098352670669556, "learning_rate": 1.2418967587034814e-05, "loss": 0.4198, "step": 18798 }, { "epoch": 24.06272, "grad_norm": 1.117893934249878, "learning_rate": 1.2416966786714688e-05, "loss": 0.4585, "step": 18799 }, { "epoch": 24.064, "grad_norm": 1.1006295680999756, "learning_rate": 1.2414965986394558e-05, "loss": 0.474, "step": 18800 }, { "epoch": 24.06528, "grad_norm": 1.1561734676361084, "learning_rate": 1.2412965186074431e-05, "loss": 0.4768, "step": 18801 }, { "epoch": 24.06656, "grad_norm": 1.1429786682128906, "learning_rate": 1.2410964385754302e-05, "loss": 0.4457, "step": 18802 }, { "epoch": 24.06784, "grad_norm": 1.1339454650878906, "learning_rate": 1.2408963585434175e-05, "loss": 0.4879, "step": 18803 }, { "epoch": 24.06912, "grad_norm": 1.198805332183838, "learning_rate": 1.2406962785114045e-05, "loss": 0.5238, "step": 18804 }, { "epoch": 24.0704, "grad_norm": 1.101431131362915, "learning_rate": 1.2404961984793919e-05, "loss": 0.4727, "step": 18805 }, { "epoch": 24.07168, "grad_norm": 1.0786443948745728, "learning_rate": 1.2402961184473789e-05, "loss": 0.4213, "step": 18806 }, { "epoch": 24.07296, "grad_norm": 1.1256358623504639, "learning_rate": 1.2400960384153663e-05, "loss": 0.4796, "step": 18807 }, { "epoch": 24.07424, "grad_norm": 1.1662980318069458, "learning_rate": 1.2398959583833534e-05, "loss": 0.4701, "step": 18808 }, { "epoch": 24.07552, "grad_norm": 1.2283040285110474, "learning_rate": 1.2396958783513406e-05, "loss": 0.5541, "step": 18809 }, { "epoch": 24.0768, "grad_norm": 1.189424753189087, "learning_rate": 1.2394957983193278e-05, "loss": 0.4847, "step": 18810 }, { "epoch": 24.07808, "grad_norm": 1.106939673423767, "learning_rate": 1.239295718287315e-05, "loss": 0.4491, "step": 18811 }, { "epoch": 24.07936, "grad_norm": 1.059758186340332, "learning_rate": 1.2390956382553022e-05, "loss": 0.4485, "step": 18812 }, { "epoch": 24.08064, "grad_norm": 1.2013357877731323, "learning_rate": 1.2388955582232894e-05, "loss": 0.502, "step": 18813 }, { "epoch": 24.08192, "grad_norm": 1.1451191902160645, "learning_rate": 1.2386954781912766e-05, "loss": 0.4661, "step": 18814 }, { "epoch": 24.0832, "grad_norm": 1.110666275024414, "learning_rate": 1.2384953981592637e-05, "loss": 0.453, "step": 18815 }, { "epoch": 24.08448, "grad_norm": 1.1258022785186768, "learning_rate": 1.238295318127251e-05, "loss": 0.4725, "step": 18816 }, { "epoch": 24.08576, "grad_norm": 1.1745010614395142, "learning_rate": 1.2380952380952381e-05, "loss": 0.4724, "step": 18817 }, { "epoch": 24.087040000000002, "grad_norm": 1.1408393383026123, "learning_rate": 1.2378951580632253e-05, "loss": 0.485, "step": 18818 }, { "epoch": 24.08832, "grad_norm": 1.2080360651016235, "learning_rate": 1.2376950780312125e-05, "loss": 0.5133, "step": 18819 }, { "epoch": 24.0896, "grad_norm": 1.2680752277374268, "learning_rate": 1.2374949979991997e-05, "loss": 0.4442, "step": 18820 }, { "epoch": 24.09088, "grad_norm": 1.0254223346710205, "learning_rate": 1.2372949179671869e-05, "loss": 0.4307, "step": 18821 }, { "epoch": 24.09216, "grad_norm": 1.1265034675598145, "learning_rate": 1.2370948379351742e-05, "loss": 0.468, "step": 18822 }, { "epoch": 24.09344, "grad_norm": 1.1478646993637085, "learning_rate": 1.2368947579031612e-05, "loss": 0.456, "step": 18823 }, { "epoch": 24.09472, "grad_norm": 1.2165275812149048, "learning_rate": 1.2366946778711486e-05, "loss": 0.4712, "step": 18824 }, { "epoch": 24.096, "grad_norm": 1.1693812608718872, "learning_rate": 1.2364945978391356e-05, "loss": 0.4956, "step": 18825 }, { "epoch": 24.09728, "grad_norm": 1.1482276916503906, "learning_rate": 1.236294517807123e-05, "loss": 0.4687, "step": 18826 }, { "epoch": 24.09856, "grad_norm": 1.1738667488098145, "learning_rate": 1.23609443777511e-05, "loss": 0.4096, "step": 18827 }, { "epoch": 24.09984, "grad_norm": 1.1686372756958008, "learning_rate": 1.2358943577430973e-05, "loss": 0.4708, "step": 18828 }, { "epoch": 24.10112, "grad_norm": 1.1600852012634277, "learning_rate": 1.2356942777110843e-05, "loss": 0.5038, "step": 18829 }, { "epoch": 24.1024, "grad_norm": 1.1481022834777832, "learning_rate": 1.2354941976790717e-05, "loss": 0.4626, "step": 18830 }, { "epoch": 24.10368, "grad_norm": 1.158562421798706, "learning_rate": 1.2352941176470589e-05, "loss": 0.4819, "step": 18831 }, { "epoch": 24.10496, "grad_norm": 1.2289602756500244, "learning_rate": 1.235094037615046e-05, "loss": 0.4865, "step": 18832 }, { "epoch": 24.10624, "grad_norm": 1.2404357194900513, "learning_rate": 1.2348939575830333e-05, "loss": 0.4778, "step": 18833 }, { "epoch": 24.10752, "grad_norm": 1.0343823432922363, "learning_rate": 1.2346938775510204e-05, "loss": 0.4364, "step": 18834 }, { "epoch": 24.1088, "grad_norm": 1.1348611116409302, "learning_rate": 1.2344937975190076e-05, "loss": 0.4825, "step": 18835 }, { "epoch": 24.11008, "grad_norm": 1.1395764350891113, "learning_rate": 1.2342937174869948e-05, "loss": 0.4656, "step": 18836 }, { "epoch": 24.11136, "grad_norm": 1.1095606088638306, "learning_rate": 1.234093637454982e-05, "loss": 0.4568, "step": 18837 }, { "epoch": 24.11264, "grad_norm": 1.184478521347046, "learning_rate": 1.2338935574229694e-05, "loss": 0.4887, "step": 18838 }, { "epoch": 24.11392, "grad_norm": 1.1172800064086914, "learning_rate": 1.2336934773909564e-05, "loss": 0.4558, "step": 18839 }, { "epoch": 24.1152, "grad_norm": 1.2466896772384644, "learning_rate": 1.2334933973589437e-05, "loss": 0.5224, "step": 18840 }, { "epoch": 24.11648, "grad_norm": 1.1181126832962036, "learning_rate": 1.2332933173269307e-05, "loss": 0.4622, "step": 18841 }, { "epoch": 24.11776, "grad_norm": 1.1256959438323975, "learning_rate": 1.2330932372949181e-05, "loss": 0.4646, "step": 18842 }, { "epoch": 24.11904, "grad_norm": 1.1790109872817993, "learning_rate": 1.2328931572629051e-05, "loss": 0.4708, "step": 18843 }, { "epoch": 24.12032, "grad_norm": 1.1937501430511475, "learning_rate": 1.2326930772308925e-05, "loss": 0.4791, "step": 18844 }, { "epoch": 24.1216, "grad_norm": 1.2300196886062622, "learning_rate": 1.2324929971988797e-05, "loss": 0.4995, "step": 18845 }, { "epoch": 24.12288, "grad_norm": 1.1216380596160889, "learning_rate": 1.2322929171668668e-05, "loss": 0.4832, "step": 18846 }, { "epoch": 24.12416, "grad_norm": 1.1680253744125366, "learning_rate": 1.232092837134854e-05, "loss": 0.4863, "step": 18847 }, { "epoch": 24.12544, "grad_norm": 1.191275715827942, "learning_rate": 1.2318927571028412e-05, "loss": 0.4561, "step": 18848 }, { "epoch": 24.12672, "grad_norm": 1.1473134756088257, "learning_rate": 1.2316926770708284e-05, "loss": 0.466, "step": 18849 }, { "epoch": 24.128, "grad_norm": 1.181347370147705, "learning_rate": 1.2314925970388156e-05, "loss": 0.478, "step": 18850 }, { "epoch": 24.12928, "grad_norm": 1.1565207242965698, "learning_rate": 1.2312925170068028e-05, "loss": 0.5205, "step": 18851 }, { "epoch": 24.13056, "grad_norm": 1.136231541633606, "learning_rate": 1.2310924369747901e-05, "loss": 0.4984, "step": 18852 }, { "epoch": 24.13184, "grad_norm": 1.1483358144760132, "learning_rate": 1.2308923569427771e-05, "loss": 0.4857, "step": 18853 }, { "epoch": 24.13312, "grad_norm": 1.1635894775390625, "learning_rate": 1.2306922769107645e-05, "loss": 0.4728, "step": 18854 }, { "epoch": 24.1344, "grad_norm": 1.1853755712509155, "learning_rate": 1.2304921968787515e-05, "loss": 0.4908, "step": 18855 }, { "epoch": 24.13568, "grad_norm": 1.1830283403396606, "learning_rate": 1.2302921168467389e-05, "loss": 0.4935, "step": 18856 }, { "epoch": 24.13696, "grad_norm": 1.1618894338607788, "learning_rate": 1.2300920368147259e-05, "loss": 0.4388, "step": 18857 }, { "epoch": 24.13824, "grad_norm": 1.1691558361053467, "learning_rate": 1.2298919567827132e-05, "loss": 0.459, "step": 18858 }, { "epoch": 24.13952, "grad_norm": 1.1906380653381348, "learning_rate": 1.2296918767507003e-05, "loss": 0.4695, "step": 18859 }, { "epoch": 24.1408, "grad_norm": 1.0874199867248535, "learning_rate": 1.2294917967186876e-05, "loss": 0.4549, "step": 18860 }, { "epoch": 24.14208, "grad_norm": 1.1504427194595337, "learning_rate": 1.2292917166866748e-05, "loss": 0.4616, "step": 18861 }, { "epoch": 24.14336, "grad_norm": 1.1930842399597168, "learning_rate": 1.229091636654662e-05, "loss": 0.5141, "step": 18862 }, { "epoch": 24.14464, "grad_norm": 1.1488827466964722, "learning_rate": 1.2288915566226492e-05, "loss": 0.4666, "step": 18863 }, { "epoch": 24.14592, "grad_norm": 1.1404608488082886, "learning_rate": 1.2286914765906364e-05, "loss": 0.4479, "step": 18864 }, { "epoch": 24.1472, "grad_norm": 1.1836938858032227, "learning_rate": 1.2284913965586235e-05, "loss": 0.5085, "step": 18865 }, { "epoch": 24.14848, "grad_norm": 1.1096590757369995, "learning_rate": 1.2282913165266107e-05, "loss": 0.4589, "step": 18866 }, { "epoch": 24.14976, "grad_norm": 1.1787697076797485, "learning_rate": 1.2280912364945979e-05, "loss": 0.4746, "step": 18867 }, { "epoch": 24.15104, "grad_norm": 1.0832207202911377, "learning_rate": 1.2278911564625851e-05, "loss": 0.4693, "step": 18868 }, { "epoch": 24.15232, "grad_norm": 1.1375176906585693, "learning_rate": 1.2276910764305723e-05, "loss": 0.4311, "step": 18869 }, { "epoch": 24.1536, "grad_norm": 1.1903811693191528, "learning_rate": 1.2274909963985595e-05, "loss": 0.4851, "step": 18870 }, { "epoch": 24.15488, "grad_norm": 1.169784665107727, "learning_rate": 1.2272909163665467e-05, "loss": 0.495, "step": 18871 }, { "epoch": 24.15616, "grad_norm": 1.154515027999878, "learning_rate": 1.2270908363345338e-05, "loss": 0.4999, "step": 18872 }, { "epoch": 24.15744, "grad_norm": 1.141190528869629, "learning_rate": 1.226890756302521e-05, "loss": 0.4566, "step": 18873 }, { "epoch": 24.15872, "grad_norm": 1.1099512577056885, "learning_rate": 1.2266906762705082e-05, "loss": 0.3997, "step": 18874 }, { "epoch": 24.16, "grad_norm": 1.129675030708313, "learning_rate": 1.2264905962384956e-05, "loss": 0.4474, "step": 18875 }, { "epoch": 24.16128, "grad_norm": 1.2040431499481201, "learning_rate": 1.2262905162064826e-05, "loss": 0.4952, "step": 18876 }, { "epoch": 24.16256, "grad_norm": 1.1369742155075073, "learning_rate": 1.22609043617447e-05, "loss": 0.4785, "step": 18877 }, { "epoch": 24.16384, "grad_norm": 1.1882468461990356, "learning_rate": 1.225890356142457e-05, "loss": 0.4668, "step": 18878 }, { "epoch": 24.16512, "grad_norm": 1.1586542129516602, "learning_rate": 1.2256902761104443e-05, "loss": 0.4616, "step": 18879 }, { "epoch": 24.1664, "grad_norm": 1.1391338109970093, "learning_rate": 1.2254901960784313e-05, "loss": 0.4558, "step": 18880 }, { "epoch": 24.16768, "grad_norm": 1.2286574840545654, "learning_rate": 1.2252901160464187e-05, "loss": 0.5128, "step": 18881 }, { "epoch": 24.16896, "grad_norm": 1.1499093770980835, "learning_rate": 1.2250900360144059e-05, "loss": 0.4686, "step": 18882 }, { "epoch": 24.17024, "grad_norm": 1.1170378923416138, "learning_rate": 1.224889955982393e-05, "loss": 0.5008, "step": 18883 }, { "epoch": 24.17152, "grad_norm": 1.1456178426742554, "learning_rate": 1.2246898759503802e-05, "loss": 0.4888, "step": 18884 }, { "epoch": 24.1728, "grad_norm": 1.1081149578094482, "learning_rate": 1.2244897959183674e-05, "loss": 0.4932, "step": 18885 }, { "epoch": 24.17408, "grad_norm": 1.250626564025879, "learning_rate": 1.2242897158863546e-05, "loss": 0.4693, "step": 18886 }, { "epoch": 24.17536, "grad_norm": 1.2053821086883545, "learning_rate": 1.2240896358543418e-05, "loss": 0.5102, "step": 18887 }, { "epoch": 24.17664, "grad_norm": 1.1828999519348145, "learning_rate": 1.223889555822329e-05, "loss": 0.4694, "step": 18888 }, { "epoch": 24.17792, "grad_norm": 1.1308258771896362, "learning_rate": 1.2236894757903162e-05, "loss": 0.4473, "step": 18889 }, { "epoch": 24.1792, "grad_norm": 1.1865490674972534, "learning_rate": 1.2234893957583033e-05, "loss": 0.5123, "step": 18890 }, { "epoch": 24.18048, "grad_norm": 1.1763098239898682, "learning_rate": 1.2232893157262907e-05, "loss": 0.5011, "step": 18891 }, { "epoch": 24.18176, "grad_norm": 1.1184165477752686, "learning_rate": 1.2230892356942777e-05, "loss": 0.4935, "step": 18892 }, { "epoch": 24.18304, "grad_norm": 1.2041736841201782, "learning_rate": 1.222889155662265e-05, "loss": 0.5107, "step": 18893 }, { "epoch": 24.18432, "grad_norm": 1.1142528057098389, "learning_rate": 1.2226890756302521e-05, "loss": 0.4647, "step": 18894 }, { "epoch": 24.1856, "grad_norm": 1.1789337396621704, "learning_rate": 1.2224889955982394e-05, "loss": 0.4476, "step": 18895 }, { "epoch": 24.18688, "grad_norm": 1.0955803394317627, "learning_rate": 1.2222889155662265e-05, "loss": 0.4594, "step": 18896 }, { "epoch": 24.18816, "grad_norm": 1.1010346412658691, "learning_rate": 1.2220888355342138e-05, "loss": 0.4668, "step": 18897 }, { "epoch": 24.18944, "grad_norm": 1.0665899515151978, "learning_rate": 1.221888755502201e-05, "loss": 0.4563, "step": 18898 }, { "epoch": 24.19072, "grad_norm": 1.1239908933639526, "learning_rate": 1.2216886754701882e-05, "loss": 0.4785, "step": 18899 }, { "epoch": 24.192, "grad_norm": 1.1581279039382935, "learning_rate": 1.2214885954381754e-05, "loss": 0.501, "step": 18900 }, { "epoch": 24.19328, "grad_norm": 1.1129977703094482, "learning_rate": 1.2212885154061626e-05, "loss": 0.4571, "step": 18901 }, { "epoch": 24.19456, "grad_norm": 1.1460938453674316, "learning_rate": 1.2210884353741497e-05, "loss": 0.4706, "step": 18902 }, { "epoch": 24.19584, "grad_norm": 1.2374320030212402, "learning_rate": 1.220888355342137e-05, "loss": 0.5082, "step": 18903 }, { "epoch": 24.19712, "grad_norm": 1.1086463928222656, "learning_rate": 1.2206882753101241e-05, "loss": 0.4465, "step": 18904 }, { "epoch": 24.1984, "grad_norm": 1.147619605064392, "learning_rate": 1.2204881952781113e-05, "loss": 0.4718, "step": 18905 }, { "epoch": 24.19968, "grad_norm": 1.117663025856018, "learning_rate": 1.2202881152460985e-05, "loss": 0.5058, "step": 18906 }, { "epoch": 24.20096, "grad_norm": 1.1890056133270264, "learning_rate": 1.2200880352140857e-05, "loss": 0.4962, "step": 18907 }, { "epoch": 24.20224, "grad_norm": 1.0809297561645508, "learning_rate": 1.2198879551820729e-05, "loss": 0.4143, "step": 18908 }, { "epoch": 24.20352, "grad_norm": 1.146008014678955, "learning_rate": 1.21968787515006e-05, "loss": 0.4672, "step": 18909 }, { "epoch": 24.2048, "grad_norm": 1.1988301277160645, "learning_rate": 1.2194877951180472e-05, "loss": 0.4645, "step": 18910 }, { "epoch": 24.20608, "grad_norm": 1.2274937629699707, "learning_rate": 1.2192877150860344e-05, "loss": 0.5402, "step": 18911 }, { "epoch": 24.20736, "grad_norm": 1.1610702276229858, "learning_rate": 1.2190876350540216e-05, "loss": 0.5225, "step": 18912 }, { "epoch": 24.20864, "grad_norm": 1.2057294845581055, "learning_rate": 1.2188875550220088e-05, "loss": 0.4622, "step": 18913 }, { "epoch": 24.20992, "grad_norm": 1.1343713998794556, "learning_rate": 1.2186874749899961e-05, "loss": 0.4997, "step": 18914 }, { "epoch": 24.2112, "grad_norm": 1.0980840921401978, "learning_rate": 1.2184873949579832e-05, "loss": 0.4442, "step": 18915 }, { "epoch": 24.21248, "grad_norm": 1.099935531616211, "learning_rate": 1.2182873149259705e-05, "loss": 0.4833, "step": 18916 }, { "epoch": 24.21376, "grad_norm": 1.2026798725128174, "learning_rate": 1.2180872348939575e-05, "loss": 0.4885, "step": 18917 }, { "epoch": 24.21504, "grad_norm": 1.138269066810608, "learning_rate": 1.2178871548619449e-05, "loss": 0.4539, "step": 18918 }, { "epoch": 24.21632, "grad_norm": 1.233717441558838, "learning_rate": 1.2176870748299319e-05, "loss": 0.5163, "step": 18919 }, { "epoch": 24.2176, "grad_norm": 1.2078992128372192, "learning_rate": 1.2174869947979193e-05, "loss": 0.4496, "step": 18920 }, { "epoch": 24.21888, "grad_norm": 1.226100206375122, "learning_rate": 1.2172869147659064e-05, "loss": 0.511, "step": 18921 }, { "epoch": 24.22016, "grad_norm": 1.183384895324707, "learning_rate": 1.2170868347338936e-05, "loss": 0.4633, "step": 18922 }, { "epoch": 24.22144, "grad_norm": 1.2039517164230347, "learning_rate": 1.2168867547018808e-05, "loss": 0.5063, "step": 18923 }, { "epoch": 24.22272, "grad_norm": 1.2347691059112549, "learning_rate": 1.216686674669868e-05, "loss": 0.4843, "step": 18924 }, { "epoch": 24.224, "grad_norm": 1.2100071907043457, "learning_rate": 1.2164865946378552e-05, "loss": 0.4907, "step": 18925 }, { "epoch": 24.22528, "grad_norm": 1.1934117078781128, "learning_rate": 1.2162865146058424e-05, "loss": 0.4824, "step": 18926 }, { "epoch": 24.22656, "grad_norm": 1.1818937063217163, "learning_rate": 1.2160864345738296e-05, "loss": 0.4746, "step": 18927 }, { "epoch": 24.22784, "grad_norm": 1.0990982055664062, "learning_rate": 1.2158863545418169e-05, "loss": 0.4461, "step": 18928 }, { "epoch": 24.22912, "grad_norm": 1.1781893968582153, "learning_rate": 1.215686274509804e-05, "loss": 0.5018, "step": 18929 }, { "epoch": 24.2304, "grad_norm": 1.147173523902893, "learning_rate": 1.2154861944777913e-05, "loss": 0.4664, "step": 18930 }, { "epoch": 24.23168, "grad_norm": 1.0861092805862427, "learning_rate": 1.2152861144457783e-05, "loss": 0.4398, "step": 18931 }, { "epoch": 24.23296, "grad_norm": 1.103131651878357, "learning_rate": 1.2150860344137657e-05, "loss": 0.4301, "step": 18932 }, { "epoch": 24.23424, "grad_norm": 1.0709394216537476, "learning_rate": 1.2148859543817527e-05, "loss": 0.4424, "step": 18933 }, { "epoch": 24.23552, "grad_norm": 1.1708987951278687, "learning_rate": 1.21468587434974e-05, "loss": 0.4815, "step": 18934 }, { "epoch": 24.2368, "grad_norm": 1.1951818466186523, "learning_rate": 1.2144857943177272e-05, "loss": 0.4715, "step": 18935 }, { "epoch": 24.23808, "grad_norm": 1.169890284538269, "learning_rate": 1.2142857142857144e-05, "loss": 0.4762, "step": 18936 }, { "epoch": 24.23936, "grad_norm": 1.164069652557373, "learning_rate": 1.2140856342537016e-05, "loss": 0.4739, "step": 18937 }, { "epoch": 24.24064, "grad_norm": 1.2312345504760742, "learning_rate": 1.2138855542216888e-05, "loss": 0.5088, "step": 18938 }, { "epoch": 24.24192, "grad_norm": 1.0831817388534546, "learning_rate": 1.213685474189676e-05, "loss": 0.4128, "step": 18939 }, { "epoch": 24.2432, "grad_norm": 1.1156847476959229, "learning_rate": 1.2134853941576631e-05, "loss": 0.4483, "step": 18940 }, { "epoch": 24.24448, "grad_norm": 1.2278788089752197, "learning_rate": 1.2132853141256503e-05, "loss": 0.5139, "step": 18941 }, { "epoch": 24.24576, "grad_norm": 1.1557207107543945, "learning_rate": 1.2130852340936375e-05, "loss": 0.5051, "step": 18942 }, { "epoch": 24.24704, "grad_norm": 1.1289258003234863, "learning_rate": 1.2128851540616247e-05, "loss": 0.473, "step": 18943 }, { "epoch": 24.24832, "grad_norm": 1.245375633239746, "learning_rate": 1.2126850740296119e-05, "loss": 0.4962, "step": 18944 }, { "epoch": 24.2496, "grad_norm": 1.1361275911331177, "learning_rate": 1.212484993997599e-05, "loss": 0.4961, "step": 18945 }, { "epoch": 24.25088, "grad_norm": 1.0928107500076294, "learning_rate": 1.2122849139655863e-05, "loss": 0.4831, "step": 18946 }, { "epoch": 24.25216, "grad_norm": 1.0881211757659912, "learning_rate": 1.2120848339335734e-05, "loss": 0.4388, "step": 18947 }, { "epoch": 24.25344, "grad_norm": 1.1172457933425903, "learning_rate": 1.2118847539015606e-05, "loss": 0.4584, "step": 18948 }, { "epoch": 24.25472, "grad_norm": 1.1269158124923706, "learning_rate": 1.2116846738695478e-05, "loss": 0.4362, "step": 18949 }, { "epoch": 24.256, "grad_norm": 1.0757176876068115, "learning_rate": 1.211484593837535e-05, "loss": 0.4515, "step": 18950 }, { "epoch": 24.25728, "grad_norm": 1.0964628458023071, "learning_rate": 1.2112845138055224e-05, "loss": 0.4624, "step": 18951 }, { "epoch": 24.25856, "grad_norm": 1.1697242259979248, "learning_rate": 1.2110844337735094e-05, "loss": 0.4753, "step": 18952 }, { "epoch": 24.25984, "grad_norm": 1.098563313484192, "learning_rate": 1.2108843537414967e-05, "loss": 0.4689, "step": 18953 }, { "epoch": 24.26112, "grad_norm": 1.081687092781067, "learning_rate": 1.2106842737094837e-05, "loss": 0.467, "step": 18954 }, { "epoch": 24.2624, "grad_norm": 1.130462884902954, "learning_rate": 1.2104841936774711e-05, "loss": 0.4772, "step": 18955 }, { "epoch": 24.26368, "grad_norm": 1.1619678735733032, "learning_rate": 1.2102841136454581e-05, "loss": 0.4805, "step": 18956 }, { "epoch": 24.26496, "grad_norm": 1.2183215618133545, "learning_rate": 1.2100840336134455e-05, "loss": 0.4988, "step": 18957 }, { "epoch": 24.26624, "grad_norm": 1.1362768411636353, "learning_rate": 1.2098839535814327e-05, "loss": 0.4678, "step": 18958 }, { "epoch": 24.26752, "grad_norm": 1.1488263607025146, "learning_rate": 1.2096838735494198e-05, "loss": 0.4232, "step": 18959 }, { "epoch": 24.2688, "grad_norm": 1.1357872486114502, "learning_rate": 1.209483793517407e-05, "loss": 0.4901, "step": 18960 }, { "epoch": 24.27008, "grad_norm": 1.1027874946594238, "learning_rate": 1.2092837134853942e-05, "loss": 0.4982, "step": 18961 }, { "epoch": 24.27136, "grad_norm": 1.157282829284668, "learning_rate": 1.2090836334533814e-05, "loss": 0.4774, "step": 18962 }, { "epoch": 24.27264, "grad_norm": 1.1591331958770752, "learning_rate": 1.2088835534213686e-05, "loss": 0.4684, "step": 18963 }, { "epoch": 24.27392, "grad_norm": 1.0676262378692627, "learning_rate": 1.2086834733893558e-05, "loss": 0.4187, "step": 18964 }, { "epoch": 24.2752, "grad_norm": 1.143548846244812, "learning_rate": 1.2084833933573431e-05, "loss": 0.4558, "step": 18965 }, { "epoch": 24.27648, "grad_norm": 1.1962380409240723, "learning_rate": 1.2082833133253301e-05, "loss": 0.5233, "step": 18966 }, { "epoch": 24.27776, "grad_norm": 1.1192182302474976, "learning_rate": 1.2080832332933175e-05, "loss": 0.4541, "step": 18967 }, { "epoch": 24.27904, "grad_norm": 1.1944539546966553, "learning_rate": 1.2078831532613045e-05, "loss": 0.4669, "step": 18968 }, { "epoch": 24.28032, "grad_norm": 1.0787324905395508, "learning_rate": 1.2076830732292919e-05, "loss": 0.4497, "step": 18969 }, { "epoch": 24.2816, "grad_norm": 1.12637197971344, "learning_rate": 1.2074829931972789e-05, "loss": 0.4599, "step": 18970 }, { "epoch": 24.28288, "grad_norm": 1.1529687643051147, "learning_rate": 1.2072829131652662e-05, "loss": 0.5066, "step": 18971 }, { "epoch": 24.28416, "grad_norm": 1.202721357345581, "learning_rate": 1.2070828331332533e-05, "loss": 0.494, "step": 18972 }, { "epoch": 24.28544, "grad_norm": 1.1301331520080566, "learning_rate": 1.2068827531012406e-05, "loss": 0.4652, "step": 18973 }, { "epoch": 24.28672, "grad_norm": 1.128435492515564, "learning_rate": 1.2066826730692278e-05, "loss": 0.4296, "step": 18974 }, { "epoch": 24.288, "grad_norm": 1.1200505495071411, "learning_rate": 1.206482593037215e-05, "loss": 0.474, "step": 18975 }, { "epoch": 24.28928, "grad_norm": 1.2111964225769043, "learning_rate": 1.2062825130052022e-05, "loss": 0.4765, "step": 18976 }, { "epoch": 24.29056, "grad_norm": 1.1505967378616333, "learning_rate": 1.2060824329731893e-05, "loss": 0.4658, "step": 18977 }, { "epoch": 24.29184, "grad_norm": 1.0870449542999268, "learning_rate": 1.2058823529411765e-05, "loss": 0.4378, "step": 18978 }, { "epoch": 24.29312, "grad_norm": 1.1326916217803955, "learning_rate": 1.2056822729091637e-05, "loss": 0.4744, "step": 18979 }, { "epoch": 24.2944, "grad_norm": 1.1708118915557861, "learning_rate": 1.2054821928771509e-05, "loss": 0.4676, "step": 18980 }, { "epoch": 24.29568, "grad_norm": 1.1142549514770508, "learning_rate": 1.2052821128451381e-05, "loss": 0.4682, "step": 18981 }, { "epoch": 24.29696, "grad_norm": 1.2015085220336914, "learning_rate": 1.2050820328131253e-05, "loss": 0.5163, "step": 18982 }, { "epoch": 24.29824, "grad_norm": 1.0846655368804932, "learning_rate": 1.2048819527811125e-05, "loss": 0.411, "step": 18983 }, { "epoch": 24.29952, "grad_norm": 1.1354156732559204, "learning_rate": 1.2046818727490996e-05, "loss": 0.4529, "step": 18984 }, { "epoch": 24.3008, "grad_norm": 1.1821706295013428, "learning_rate": 1.2044817927170868e-05, "loss": 0.4717, "step": 18985 }, { "epoch": 24.30208, "grad_norm": 1.1429351568222046, "learning_rate": 1.204281712685074e-05, "loss": 0.4679, "step": 18986 }, { "epoch": 24.30336, "grad_norm": 1.1987007856369019, "learning_rate": 1.2040816326530612e-05, "loss": 0.479, "step": 18987 }, { "epoch": 24.30464, "grad_norm": 1.1858272552490234, "learning_rate": 1.2038815526210486e-05, "loss": 0.4528, "step": 18988 }, { "epoch": 24.30592, "grad_norm": 1.1735626459121704, "learning_rate": 1.2036814725890356e-05, "loss": 0.4459, "step": 18989 }, { "epoch": 24.3072, "grad_norm": 1.2804443836212158, "learning_rate": 1.203481392557023e-05, "loss": 0.5244, "step": 18990 }, { "epoch": 24.30848, "grad_norm": 1.2020955085754395, "learning_rate": 1.20328131252501e-05, "loss": 0.4598, "step": 18991 }, { "epoch": 24.30976, "grad_norm": 1.2442394495010376, "learning_rate": 1.2030812324929973e-05, "loss": 0.4843, "step": 18992 }, { "epoch": 24.31104, "grad_norm": 1.137221097946167, "learning_rate": 1.2028811524609843e-05, "loss": 0.4527, "step": 18993 }, { "epoch": 24.31232, "grad_norm": 1.1395703554153442, "learning_rate": 1.2026810724289717e-05, "loss": 0.5018, "step": 18994 }, { "epoch": 24.3136, "grad_norm": 1.1961019039154053, "learning_rate": 1.2024809923969587e-05, "loss": 0.5364, "step": 18995 }, { "epoch": 24.31488, "grad_norm": 1.1549445390701294, "learning_rate": 1.202280912364946e-05, "loss": 0.4855, "step": 18996 }, { "epoch": 24.31616, "grad_norm": 1.1084400415420532, "learning_rate": 1.2020808323329332e-05, "loss": 0.4317, "step": 18997 }, { "epoch": 24.31744, "grad_norm": 1.1791542768478394, "learning_rate": 1.2018807523009204e-05, "loss": 0.5045, "step": 18998 }, { "epoch": 24.31872, "grad_norm": 1.1057560443878174, "learning_rate": 1.2016806722689076e-05, "loss": 0.4459, "step": 18999 }, { "epoch": 24.32, "grad_norm": 1.1355136632919312, "learning_rate": 1.2014805922368948e-05, "loss": 0.4693, "step": 19000 }, { "epoch": 24.32128, "grad_norm": 1.204648494720459, "learning_rate": 1.201280512204882e-05, "loss": 0.5096, "step": 19001 }, { "epoch": 24.32256, "grad_norm": 1.1683499813079834, "learning_rate": 1.2010804321728692e-05, "loss": 0.4805, "step": 19002 }, { "epoch": 24.32384, "grad_norm": 1.0788731575012207, "learning_rate": 1.2008803521408563e-05, "loss": 0.4542, "step": 19003 }, { "epoch": 24.32512, "grad_norm": 1.1736633777618408, "learning_rate": 1.2006802721088437e-05, "loss": 0.4771, "step": 19004 }, { "epoch": 24.3264, "grad_norm": 1.1012755632400513, "learning_rate": 1.2004801920768307e-05, "loss": 0.4554, "step": 19005 }, { "epoch": 24.32768, "grad_norm": 1.0851147174835205, "learning_rate": 1.200280112044818e-05, "loss": 0.4286, "step": 19006 }, { "epoch": 24.32896, "grad_norm": 1.1383137702941895, "learning_rate": 1.2000800320128051e-05, "loss": 0.4664, "step": 19007 }, { "epoch": 24.33024, "grad_norm": 1.1844596862792969, "learning_rate": 1.1998799519807924e-05, "loss": 0.5101, "step": 19008 }, { "epoch": 24.33152, "grad_norm": 1.212876319885254, "learning_rate": 1.1996798719487795e-05, "loss": 0.524, "step": 19009 }, { "epoch": 24.3328, "grad_norm": 1.1917188167572021, "learning_rate": 1.1994797919167668e-05, "loss": 0.5007, "step": 19010 }, { "epoch": 24.33408, "grad_norm": 1.0979135036468506, "learning_rate": 1.199279711884754e-05, "loss": 0.4251, "step": 19011 }, { "epoch": 24.33536, "grad_norm": 1.197851300239563, "learning_rate": 1.1990796318527412e-05, "loss": 0.4987, "step": 19012 }, { "epoch": 24.33664, "grad_norm": 1.1661128997802734, "learning_rate": 1.1988795518207284e-05, "loss": 0.4864, "step": 19013 }, { "epoch": 24.33792, "grad_norm": 1.1655899286270142, "learning_rate": 1.1986794717887156e-05, "loss": 0.4645, "step": 19014 }, { "epoch": 24.3392, "grad_norm": 1.1260086297988892, "learning_rate": 1.1984793917567027e-05, "loss": 0.4571, "step": 19015 }, { "epoch": 24.34048, "grad_norm": 1.1551868915557861, "learning_rate": 1.19827931172469e-05, "loss": 0.454, "step": 19016 }, { "epoch": 24.34176, "grad_norm": 1.09392249584198, "learning_rate": 1.1980792316926771e-05, "loss": 0.4698, "step": 19017 }, { "epoch": 24.34304, "grad_norm": 1.2250076532363892, "learning_rate": 1.1978791516606645e-05, "loss": 0.4851, "step": 19018 }, { "epoch": 24.34432, "grad_norm": 1.1403043270111084, "learning_rate": 1.1976790716286515e-05, "loss": 0.4895, "step": 19019 }, { "epoch": 24.3456, "grad_norm": 1.113724946975708, "learning_rate": 1.1974789915966388e-05, "loss": 0.4718, "step": 19020 }, { "epoch": 24.34688, "grad_norm": 1.1426780223846436, "learning_rate": 1.1972789115646259e-05, "loss": 0.4889, "step": 19021 }, { "epoch": 24.34816, "grad_norm": 1.1902352571487427, "learning_rate": 1.1970788315326132e-05, "loss": 0.4702, "step": 19022 }, { "epoch": 24.34944, "grad_norm": 1.1023898124694824, "learning_rate": 1.1968787515006002e-05, "loss": 0.4402, "step": 19023 }, { "epoch": 24.35072, "grad_norm": 1.1217379570007324, "learning_rate": 1.1966786714685876e-05, "loss": 0.453, "step": 19024 }, { "epoch": 24.352, "grad_norm": 1.115411639213562, "learning_rate": 1.1964785914365746e-05, "loss": 0.4485, "step": 19025 }, { "epoch": 24.35328, "grad_norm": 1.209621787071228, "learning_rate": 1.196278511404562e-05, "loss": 0.5006, "step": 19026 }, { "epoch": 24.35456, "grad_norm": 1.1467214822769165, "learning_rate": 1.1960784313725491e-05, "loss": 0.4591, "step": 19027 }, { "epoch": 24.35584, "grad_norm": 1.1208250522613525, "learning_rate": 1.1958783513405363e-05, "loss": 0.4738, "step": 19028 }, { "epoch": 24.35712, "grad_norm": 1.1195433139801025, "learning_rate": 1.1956782713085235e-05, "loss": 0.4478, "step": 19029 }, { "epoch": 24.3584, "grad_norm": 1.1476173400878906, "learning_rate": 1.1954781912765107e-05, "loss": 0.4901, "step": 19030 }, { "epoch": 24.35968, "grad_norm": 1.2079092264175415, "learning_rate": 1.1952781112444979e-05, "loss": 0.4439, "step": 19031 }, { "epoch": 24.36096, "grad_norm": 1.1161270141601562, "learning_rate": 1.195078031212485e-05, "loss": 0.4548, "step": 19032 }, { "epoch": 24.36224, "grad_norm": 1.1574740409851074, "learning_rate": 1.1948779511804723e-05, "loss": 0.4633, "step": 19033 }, { "epoch": 24.36352, "grad_norm": 1.1372309923171997, "learning_rate": 1.1946778711484594e-05, "loss": 0.4352, "step": 19034 }, { "epoch": 24.3648, "grad_norm": 1.1243098974227905, "learning_rate": 1.1944777911164466e-05, "loss": 0.4584, "step": 19035 }, { "epoch": 24.36608, "grad_norm": 1.1075938940048218, "learning_rate": 1.1942777110844338e-05, "loss": 0.4726, "step": 19036 }, { "epoch": 24.36736, "grad_norm": 1.1360459327697754, "learning_rate": 1.194077631052421e-05, "loss": 0.423, "step": 19037 }, { "epoch": 24.36864, "grad_norm": 1.1323789358139038, "learning_rate": 1.1938775510204082e-05, "loss": 0.4819, "step": 19038 }, { "epoch": 24.36992, "grad_norm": 1.1539483070373535, "learning_rate": 1.1936774709883954e-05, "loss": 0.4689, "step": 19039 }, { "epoch": 24.3712, "grad_norm": 1.1773465871810913, "learning_rate": 1.1934773909563826e-05, "loss": 0.5437, "step": 19040 }, { "epoch": 24.37248, "grad_norm": 1.0829496383666992, "learning_rate": 1.1932773109243699e-05, "loss": 0.4514, "step": 19041 }, { "epoch": 24.37376, "grad_norm": 1.1402915716171265, "learning_rate": 1.193077230892357e-05, "loss": 0.4494, "step": 19042 }, { "epoch": 24.37504, "grad_norm": 1.1688295602798462, "learning_rate": 1.1928771508603443e-05, "loss": 0.4596, "step": 19043 }, { "epoch": 24.37632, "grad_norm": 1.1912732124328613, "learning_rate": 1.1926770708283313e-05, "loss": 0.4838, "step": 19044 }, { "epoch": 24.3776, "grad_norm": 1.1036256551742554, "learning_rate": 1.1924769907963187e-05, "loss": 0.4478, "step": 19045 }, { "epoch": 24.37888, "grad_norm": 1.1427876949310303, "learning_rate": 1.1922769107643057e-05, "loss": 0.4632, "step": 19046 }, { "epoch": 24.38016, "grad_norm": 1.2275477647781372, "learning_rate": 1.192076830732293e-05, "loss": 0.5451, "step": 19047 }, { "epoch": 24.38144, "grad_norm": 1.0969351530075073, "learning_rate": 1.1918767507002802e-05, "loss": 0.4578, "step": 19048 }, { "epoch": 24.38272, "grad_norm": 1.117645502090454, "learning_rate": 1.1916766706682674e-05, "loss": 0.4327, "step": 19049 }, { "epoch": 24.384, "grad_norm": 1.157266616821289, "learning_rate": 1.1914765906362546e-05, "loss": 0.478, "step": 19050 }, { "epoch": 24.38528, "grad_norm": 1.1453224420547485, "learning_rate": 1.1912765106042418e-05, "loss": 0.4923, "step": 19051 }, { "epoch": 24.38656, "grad_norm": 1.1940408945083618, "learning_rate": 1.191076430572229e-05, "loss": 0.5081, "step": 19052 }, { "epoch": 24.38784, "grad_norm": 1.0976282358169556, "learning_rate": 1.1908763505402161e-05, "loss": 0.4342, "step": 19053 }, { "epoch": 24.38912, "grad_norm": 1.211582899093628, "learning_rate": 1.1906762705082033e-05, "loss": 0.4941, "step": 19054 }, { "epoch": 24.3904, "grad_norm": 1.1101171970367432, "learning_rate": 1.1904761904761905e-05, "loss": 0.4526, "step": 19055 }, { "epoch": 24.39168, "grad_norm": 1.213545560836792, "learning_rate": 1.1902761104441777e-05, "loss": 0.4828, "step": 19056 }, { "epoch": 24.39296, "grad_norm": 1.1441152095794678, "learning_rate": 1.190076030412165e-05, "loss": 0.4691, "step": 19057 }, { "epoch": 24.39424, "grad_norm": 1.1789923906326294, "learning_rate": 1.189875950380152e-05, "loss": 0.4947, "step": 19058 }, { "epoch": 24.39552, "grad_norm": 1.031014323234558, "learning_rate": 1.1896758703481394e-05, "loss": 0.3981, "step": 19059 }, { "epoch": 24.3968, "grad_norm": 1.2153488397598267, "learning_rate": 1.1894757903161264e-05, "loss": 0.5261, "step": 19060 }, { "epoch": 24.39808, "grad_norm": 1.2073795795440674, "learning_rate": 1.1892757102841138e-05, "loss": 0.49, "step": 19061 }, { "epoch": 24.39936, "grad_norm": 1.1584200859069824, "learning_rate": 1.1890756302521008e-05, "loss": 0.4965, "step": 19062 }, { "epoch": 24.40064, "grad_norm": 1.1565824747085571, "learning_rate": 1.1888755502200882e-05, "loss": 0.4811, "step": 19063 }, { "epoch": 24.40192, "grad_norm": 1.1859803199768066, "learning_rate": 1.1886754701880753e-05, "loss": 0.4927, "step": 19064 }, { "epoch": 24.4032, "grad_norm": 1.1612244844436646, "learning_rate": 1.1884753901560625e-05, "loss": 0.5152, "step": 19065 }, { "epoch": 24.40448, "grad_norm": 1.1404790878295898, "learning_rate": 1.1882753101240497e-05, "loss": 0.4933, "step": 19066 }, { "epoch": 24.40576, "grad_norm": 1.1987690925598145, "learning_rate": 1.1880752300920369e-05, "loss": 0.4743, "step": 19067 }, { "epoch": 24.40704, "grad_norm": 1.145716905593872, "learning_rate": 1.1878751500600241e-05, "loss": 0.4787, "step": 19068 }, { "epoch": 24.40832, "grad_norm": 1.1686224937438965, "learning_rate": 1.1876750700280113e-05, "loss": 0.485, "step": 19069 }, { "epoch": 24.4096, "grad_norm": 1.1903284788131714, "learning_rate": 1.1874749899959985e-05, "loss": 0.4924, "step": 19070 }, { "epoch": 24.41088, "grad_norm": 1.1217108964920044, "learning_rate": 1.1872749099639856e-05, "loss": 0.4669, "step": 19071 }, { "epoch": 24.41216, "grad_norm": 1.1774252653121948, "learning_rate": 1.1870748299319728e-05, "loss": 0.4977, "step": 19072 }, { "epoch": 24.41344, "grad_norm": 1.2087725400924683, "learning_rate": 1.18687474989996e-05, "loss": 0.5188, "step": 19073 }, { "epoch": 24.41472, "grad_norm": 1.1201173067092896, "learning_rate": 1.1866746698679472e-05, "loss": 0.4849, "step": 19074 }, { "epoch": 24.416, "grad_norm": 1.0546908378601074, "learning_rate": 1.1864745898359344e-05, "loss": 0.4321, "step": 19075 }, { "epoch": 24.41728, "grad_norm": 1.2529699802398682, "learning_rate": 1.1862745098039216e-05, "loss": 0.5136, "step": 19076 }, { "epoch": 24.41856, "grad_norm": 1.12168550491333, "learning_rate": 1.1860744297719088e-05, "loss": 0.484, "step": 19077 }, { "epoch": 24.41984, "grad_norm": 1.2384254932403564, "learning_rate": 1.1858743497398961e-05, "loss": 0.4956, "step": 19078 }, { "epoch": 24.42112, "grad_norm": 1.1256715059280396, "learning_rate": 1.1856742697078831e-05, "loss": 0.512, "step": 19079 }, { "epoch": 24.4224, "grad_norm": 1.2130337953567505, "learning_rate": 1.1854741896758705e-05, "loss": 0.4678, "step": 19080 }, { "epoch": 24.42368, "grad_norm": 1.1287142038345337, "learning_rate": 1.1852741096438575e-05, "loss": 0.4757, "step": 19081 }, { "epoch": 24.42496, "grad_norm": 1.1295257806777954, "learning_rate": 1.1850740296118449e-05, "loss": 0.4708, "step": 19082 }, { "epoch": 24.42624, "grad_norm": 1.1850224733352661, "learning_rate": 1.1848739495798319e-05, "loss": 0.493, "step": 19083 }, { "epoch": 24.42752, "grad_norm": 1.1218152046203613, "learning_rate": 1.1846738695478192e-05, "loss": 0.4252, "step": 19084 }, { "epoch": 24.4288, "grad_norm": 1.257108449935913, "learning_rate": 1.1844737895158062e-05, "loss": 0.5102, "step": 19085 }, { "epoch": 24.43008, "grad_norm": 1.2210159301757812, "learning_rate": 1.1842737094837936e-05, "loss": 0.4865, "step": 19086 }, { "epoch": 24.43136, "grad_norm": 1.188494086265564, "learning_rate": 1.1840736294517808e-05, "loss": 0.5042, "step": 19087 }, { "epoch": 24.43264, "grad_norm": 1.1470962762832642, "learning_rate": 1.183873549419768e-05, "loss": 0.4539, "step": 19088 }, { "epoch": 24.43392, "grad_norm": 1.181655764579773, "learning_rate": 1.1836734693877552e-05, "loss": 0.4374, "step": 19089 }, { "epoch": 24.4352, "grad_norm": 1.1574394702911377, "learning_rate": 1.1834733893557423e-05, "loss": 0.4477, "step": 19090 }, { "epoch": 24.43648, "grad_norm": 1.2077171802520752, "learning_rate": 1.1832733093237295e-05, "loss": 0.4863, "step": 19091 }, { "epoch": 24.43776, "grad_norm": 1.1384170055389404, "learning_rate": 1.1830732292917167e-05, "loss": 0.4579, "step": 19092 }, { "epoch": 24.43904, "grad_norm": 1.1912693977355957, "learning_rate": 1.1828731492597039e-05, "loss": 0.5061, "step": 19093 }, { "epoch": 24.44032, "grad_norm": 1.2225050926208496, "learning_rate": 1.1826730692276913e-05, "loss": 0.5158, "step": 19094 }, { "epoch": 24.4416, "grad_norm": 1.105109691619873, "learning_rate": 1.1824729891956783e-05, "loss": 0.4698, "step": 19095 }, { "epoch": 24.44288, "grad_norm": 1.2286189794540405, "learning_rate": 1.1822729091636656e-05, "loss": 0.5637, "step": 19096 }, { "epoch": 24.44416, "grad_norm": 1.2093946933746338, "learning_rate": 1.1820728291316526e-05, "loss": 0.5066, "step": 19097 }, { "epoch": 24.44544, "grad_norm": 1.1525238752365112, "learning_rate": 1.18187274909964e-05, "loss": 0.4576, "step": 19098 }, { "epoch": 24.44672, "grad_norm": 1.128960371017456, "learning_rate": 1.181672669067627e-05, "loss": 0.4811, "step": 19099 }, { "epoch": 24.448, "grad_norm": 1.11859130859375, "learning_rate": 1.1814725890356144e-05, "loss": 0.4477, "step": 19100 }, { "epoch": 24.44928, "grad_norm": 1.2186490297317505, "learning_rate": 1.1812725090036016e-05, "loss": 0.492, "step": 19101 }, { "epoch": 24.45056, "grad_norm": 1.1721785068511963, "learning_rate": 1.1810724289715887e-05, "loss": 0.4775, "step": 19102 }, { "epoch": 24.45184, "grad_norm": 1.1152302026748657, "learning_rate": 1.180872348939576e-05, "loss": 0.445, "step": 19103 }, { "epoch": 24.45312, "grad_norm": 1.2016420364379883, "learning_rate": 1.1806722689075631e-05, "loss": 0.4517, "step": 19104 }, { "epoch": 24.4544, "grad_norm": 1.1429190635681152, "learning_rate": 1.1804721888755503e-05, "loss": 0.4761, "step": 19105 }, { "epoch": 24.45568, "grad_norm": 1.1792242527008057, "learning_rate": 1.1802721088435375e-05, "loss": 0.4446, "step": 19106 }, { "epoch": 24.45696, "grad_norm": 1.086751103401184, "learning_rate": 1.1800720288115247e-05, "loss": 0.4721, "step": 19107 }, { "epoch": 24.45824, "grad_norm": 1.1734704971313477, "learning_rate": 1.1798719487795119e-05, "loss": 0.4855, "step": 19108 }, { "epoch": 24.45952, "grad_norm": 1.1557416915893555, "learning_rate": 1.179671868747499e-05, "loss": 0.4758, "step": 19109 }, { "epoch": 24.4608, "grad_norm": 1.1353665590286255, "learning_rate": 1.1794717887154862e-05, "loss": 0.5025, "step": 19110 }, { "epoch": 24.46208, "grad_norm": 1.1943861246109009, "learning_rate": 1.1792717086834734e-05, "loss": 0.4488, "step": 19111 }, { "epoch": 24.46336, "grad_norm": 1.1474003791809082, "learning_rate": 1.1790716286514606e-05, "loss": 0.4272, "step": 19112 }, { "epoch": 24.46464, "grad_norm": 1.1481927633285522, "learning_rate": 1.1788715486194478e-05, "loss": 0.4674, "step": 19113 }, { "epoch": 24.46592, "grad_norm": 1.2008520364761353, "learning_rate": 1.178671468587435e-05, "loss": 0.47, "step": 19114 }, { "epoch": 24.4672, "grad_norm": 1.1071596145629883, "learning_rate": 1.1784713885554222e-05, "loss": 0.4489, "step": 19115 }, { "epoch": 24.46848, "grad_norm": 1.1181803941726685, "learning_rate": 1.1782713085234093e-05, "loss": 0.4641, "step": 19116 }, { "epoch": 24.46976, "grad_norm": 1.163570523262024, "learning_rate": 1.1780712284913967e-05, "loss": 0.492, "step": 19117 }, { "epoch": 24.47104, "grad_norm": 1.1192409992218018, "learning_rate": 1.1778711484593837e-05, "loss": 0.4299, "step": 19118 }, { "epoch": 24.47232, "grad_norm": 1.1589858531951904, "learning_rate": 1.177671068427371e-05, "loss": 0.4381, "step": 19119 }, { "epoch": 24.4736, "grad_norm": 1.183703064918518, "learning_rate": 1.1774709883953581e-05, "loss": 0.4732, "step": 19120 }, { "epoch": 24.47488, "grad_norm": 1.0970691442489624, "learning_rate": 1.1772709083633454e-05, "loss": 0.4542, "step": 19121 }, { "epoch": 24.47616, "grad_norm": 1.1885040998458862, "learning_rate": 1.1770708283313325e-05, "loss": 0.5307, "step": 19122 }, { "epoch": 24.47744, "grad_norm": 1.1278455257415771, "learning_rate": 1.1768707482993198e-05, "loss": 0.4681, "step": 19123 }, { "epoch": 24.47872, "grad_norm": 1.0988664627075195, "learning_rate": 1.176670668267307e-05, "loss": 0.4867, "step": 19124 }, { "epoch": 24.48, "grad_norm": 1.0938818454742432, "learning_rate": 1.1764705882352942e-05, "loss": 0.4929, "step": 19125 }, { "epoch": 24.48128, "grad_norm": 1.1701966524124146, "learning_rate": 1.1762705082032814e-05, "loss": 0.4634, "step": 19126 }, { "epoch": 24.48256, "grad_norm": 1.2021418809890747, "learning_rate": 1.1760704281712686e-05, "loss": 0.4588, "step": 19127 }, { "epoch": 24.48384, "grad_norm": 1.1727592945098877, "learning_rate": 1.1758703481392557e-05, "loss": 0.4352, "step": 19128 }, { "epoch": 24.48512, "grad_norm": 1.174090027809143, "learning_rate": 1.175670268107243e-05, "loss": 0.4726, "step": 19129 }, { "epoch": 24.4864, "grad_norm": 1.1474714279174805, "learning_rate": 1.1754701880752301e-05, "loss": 0.4448, "step": 19130 }, { "epoch": 24.48768, "grad_norm": 1.2793716192245483, "learning_rate": 1.1752701080432175e-05, "loss": 0.5175, "step": 19131 }, { "epoch": 24.48896, "grad_norm": 1.201866626739502, "learning_rate": 1.1750700280112045e-05, "loss": 0.4494, "step": 19132 }, { "epoch": 24.49024, "grad_norm": 1.1244724988937378, "learning_rate": 1.1748699479791918e-05, "loss": 0.4488, "step": 19133 }, { "epoch": 24.49152, "grad_norm": 1.1583161354064941, "learning_rate": 1.1746698679471789e-05, "loss": 0.4516, "step": 19134 }, { "epoch": 24.4928, "grad_norm": 1.1337168216705322, "learning_rate": 1.1744697879151662e-05, "loss": 0.4695, "step": 19135 }, { "epoch": 24.49408, "grad_norm": 1.064746618270874, "learning_rate": 1.1742697078831532e-05, "loss": 0.4032, "step": 19136 }, { "epoch": 24.49536, "grad_norm": 1.1812145709991455, "learning_rate": 1.1740696278511406e-05, "loss": 0.5035, "step": 19137 }, { "epoch": 24.49664, "grad_norm": 1.156087875366211, "learning_rate": 1.1738695478191276e-05, "loss": 0.4852, "step": 19138 }, { "epoch": 24.49792, "grad_norm": 1.1552976369857788, "learning_rate": 1.173669467787115e-05, "loss": 0.4727, "step": 19139 }, { "epoch": 24.4992, "grad_norm": 1.160605788230896, "learning_rate": 1.1734693877551021e-05, "loss": 0.4935, "step": 19140 }, { "epoch": 24.50048, "grad_norm": 1.1020570993423462, "learning_rate": 1.1732693077230893e-05, "loss": 0.453, "step": 19141 }, { "epoch": 24.50176, "grad_norm": 1.234426498413086, "learning_rate": 1.1730692276910765e-05, "loss": 0.5184, "step": 19142 }, { "epoch": 24.50304, "grad_norm": 1.2169140577316284, "learning_rate": 1.1728691476590637e-05, "loss": 0.509, "step": 19143 }, { "epoch": 24.50432, "grad_norm": 1.199015736579895, "learning_rate": 1.1726690676270509e-05, "loss": 0.4727, "step": 19144 }, { "epoch": 24.5056, "grad_norm": 1.1079201698303223, "learning_rate": 1.172468987595038e-05, "loss": 0.4845, "step": 19145 }, { "epoch": 24.50688, "grad_norm": 1.146178126335144, "learning_rate": 1.1722689075630253e-05, "loss": 0.4743, "step": 19146 }, { "epoch": 24.50816, "grad_norm": 1.0882790088653564, "learning_rate": 1.1720688275310124e-05, "loss": 0.4622, "step": 19147 }, { "epoch": 24.50944, "grad_norm": 1.2238930463790894, "learning_rate": 1.1718687474989996e-05, "loss": 0.4771, "step": 19148 }, { "epoch": 24.51072, "grad_norm": 1.1269352436065674, "learning_rate": 1.1716686674669868e-05, "loss": 0.5029, "step": 19149 }, { "epoch": 24.512, "grad_norm": 1.1483230590820312, "learning_rate": 1.171468587434974e-05, "loss": 0.4726, "step": 19150 }, { "epoch": 24.51328, "grad_norm": 1.1776012182235718, "learning_rate": 1.1712685074029612e-05, "loss": 0.497, "step": 19151 }, { "epoch": 24.51456, "grad_norm": 1.1069426536560059, "learning_rate": 1.1710684273709484e-05, "loss": 0.4583, "step": 19152 }, { "epoch": 24.51584, "grad_norm": 1.1324975490570068, "learning_rate": 1.1708683473389356e-05, "loss": 0.4128, "step": 19153 }, { "epoch": 24.51712, "grad_norm": 1.190555214881897, "learning_rate": 1.1706682673069229e-05, "loss": 0.4871, "step": 19154 }, { "epoch": 24.5184, "grad_norm": 1.1613634824752808, "learning_rate": 1.17046818727491e-05, "loss": 0.4866, "step": 19155 }, { "epoch": 24.51968, "grad_norm": 1.1718133687973022, "learning_rate": 1.1702681072428973e-05, "loss": 0.4818, "step": 19156 }, { "epoch": 24.52096, "grad_norm": 1.146981954574585, "learning_rate": 1.1700680272108843e-05, "loss": 0.5019, "step": 19157 }, { "epoch": 24.52224, "grad_norm": 1.192657709121704, "learning_rate": 1.1698679471788717e-05, "loss": 0.4981, "step": 19158 }, { "epoch": 24.52352, "grad_norm": 1.0763636827468872, "learning_rate": 1.1696678671468587e-05, "loss": 0.4314, "step": 19159 }, { "epoch": 24.5248, "grad_norm": 1.1718692779541016, "learning_rate": 1.169467787114846e-05, "loss": 0.4831, "step": 19160 }, { "epoch": 24.52608, "grad_norm": 1.0933730602264404, "learning_rate": 1.1692677070828332e-05, "loss": 0.4976, "step": 19161 }, { "epoch": 24.52736, "grad_norm": 1.0974316596984863, "learning_rate": 1.1690676270508204e-05, "loss": 0.4816, "step": 19162 }, { "epoch": 24.52864, "grad_norm": 1.2042176723480225, "learning_rate": 1.1688675470188076e-05, "loss": 0.4928, "step": 19163 }, { "epoch": 24.52992, "grad_norm": 1.1524473428726196, "learning_rate": 1.1686674669867948e-05, "loss": 0.4771, "step": 19164 }, { "epoch": 24.5312, "grad_norm": 1.2503443956375122, "learning_rate": 1.168467386954782e-05, "loss": 0.4702, "step": 19165 }, { "epoch": 24.53248, "grad_norm": 1.1078400611877441, "learning_rate": 1.1682673069227691e-05, "loss": 0.4361, "step": 19166 }, { "epoch": 24.53376, "grad_norm": 1.1431939601898193, "learning_rate": 1.1680672268907563e-05, "loss": 0.4693, "step": 19167 }, { "epoch": 24.53504, "grad_norm": 1.201981782913208, "learning_rate": 1.1678671468587435e-05, "loss": 0.4996, "step": 19168 }, { "epoch": 24.53632, "grad_norm": 1.2216880321502686, "learning_rate": 1.1676670668267307e-05, "loss": 0.4964, "step": 19169 }, { "epoch": 24.5376, "grad_norm": 1.199825644493103, "learning_rate": 1.167466986794718e-05, "loss": 0.484, "step": 19170 }, { "epoch": 24.53888, "grad_norm": 1.2285641431808472, "learning_rate": 1.167266906762705e-05, "loss": 0.509, "step": 19171 }, { "epoch": 24.54016, "grad_norm": 1.1986514329910278, "learning_rate": 1.1670668267306924e-05, "loss": 0.49, "step": 19172 }, { "epoch": 24.54144, "grad_norm": 1.1682507991790771, "learning_rate": 1.1668667466986794e-05, "loss": 0.4695, "step": 19173 }, { "epoch": 24.54272, "grad_norm": 1.216668963432312, "learning_rate": 1.1666666666666668e-05, "loss": 0.508, "step": 19174 }, { "epoch": 24.544, "grad_norm": 1.08146071434021, "learning_rate": 1.1664665866346538e-05, "loss": 0.4698, "step": 19175 }, { "epoch": 24.545279999999998, "grad_norm": 1.1765329837799072, "learning_rate": 1.1662665066026412e-05, "loss": 0.4394, "step": 19176 }, { "epoch": 24.54656, "grad_norm": 1.1449719667434692, "learning_rate": 1.1660664265706283e-05, "loss": 0.4574, "step": 19177 }, { "epoch": 24.54784, "grad_norm": 1.0752500295639038, "learning_rate": 1.1658663465386155e-05, "loss": 0.4515, "step": 19178 }, { "epoch": 24.54912, "grad_norm": 1.214896559715271, "learning_rate": 1.1656662665066027e-05, "loss": 0.5097, "step": 19179 }, { "epoch": 24.5504, "grad_norm": 1.113995909690857, "learning_rate": 1.1654661864745899e-05, "loss": 0.4797, "step": 19180 }, { "epoch": 24.55168, "grad_norm": 1.1679980754852295, "learning_rate": 1.1652661064425771e-05, "loss": 0.4592, "step": 19181 }, { "epoch": 24.55296, "grad_norm": 1.154405117034912, "learning_rate": 1.1650660264105643e-05, "loss": 0.4735, "step": 19182 }, { "epoch": 24.55424, "grad_norm": 1.2269680500030518, "learning_rate": 1.1648659463785515e-05, "loss": 0.499, "step": 19183 }, { "epoch": 24.55552, "grad_norm": 1.146951675415039, "learning_rate": 1.1646658663465388e-05, "loss": 0.4796, "step": 19184 }, { "epoch": 24.5568, "grad_norm": 1.2074452638626099, "learning_rate": 1.1644657863145258e-05, "loss": 0.4796, "step": 19185 }, { "epoch": 24.55808, "grad_norm": 1.1281073093414307, "learning_rate": 1.1642657062825132e-05, "loss": 0.4737, "step": 19186 }, { "epoch": 24.55936, "grad_norm": 1.1057974100112915, "learning_rate": 1.1640656262505002e-05, "loss": 0.4699, "step": 19187 }, { "epoch": 24.56064, "grad_norm": 1.1083946228027344, "learning_rate": 1.1638655462184876e-05, "loss": 0.4705, "step": 19188 }, { "epoch": 24.56192, "grad_norm": 1.1685590744018555, "learning_rate": 1.1636654661864746e-05, "loss": 0.4922, "step": 19189 }, { "epoch": 24.5632, "grad_norm": 1.1086533069610596, "learning_rate": 1.163465386154462e-05, "loss": 0.4473, "step": 19190 }, { "epoch": 24.56448, "grad_norm": 1.156948447227478, "learning_rate": 1.163265306122449e-05, "loss": 0.4685, "step": 19191 }, { "epoch": 24.56576, "grad_norm": 1.1937439441680908, "learning_rate": 1.1630652260904363e-05, "loss": 0.4974, "step": 19192 }, { "epoch": 24.56704, "grad_norm": 1.1433395147323608, "learning_rate": 1.1628651460584235e-05, "loss": 0.4823, "step": 19193 }, { "epoch": 24.56832, "grad_norm": 1.1796252727508545, "learning_rate": 1.1626650660264107e-05, "loss": 0.4591, "step": 19194 }, { "epoch": 24.5696, "grad_norm": 1.1520229578018188, "learning_rate": 1.1624649859943979e-05, "loss": 0.4805, "step": 19195 }, { "epoch": 24.57088, "grad_norm": 1.0709677934646606, "learning_rate": 1.162264905962385e-05, "loss": 0.4397, "step": 19196 }, { "epoch": 24.57216, "grad_norm": 1.1727806329727173, "learning_rate": 1.1620648259303722e-05, "loss": 0.5408, "step": 19197 }, { "epoch": 24.57344, "grad_norm": 1.1703429222106934, "learning_rate": 1.1618647458983594e-05, "loss": 0.5012, "step": 19198 }, { "epoch": 24.57472, "grad_norm": 1.1146478652954102, "learning_rate": 1.1616646658663466e-05, "loss": 0.4456, "step": 19199 }, { "epoch": 24.576, "grad_norm": 1.1429121494293213, "learning_rate": 1.1614645858343338e-05, "loss": 0.4617, "step": 19200 }, { "epoch": 24.577280000000002, "grad_norm": 1.0925348997116089, "learning_rate": 1.161264505802321e-05, "loss": 0.487, "step": 19201 }, { "epoch": 24.57856, "grad_norm": 1.1159874200820923, "learning_rate": 1.1610644257703082e-05, "loss": 0.4982, "step": 19202 }, { "epoch": 24.57984, "grad_norm": 1.1333866119384766, "learning_rate": 1.1608643457382953e-05, "loss": 0.4537, "step": 19203 }, { "epoch": 24.58112, "grad_norm": 1.134494662284851, "learning_rate": 1.1606642657062825e-05, "loss": 0.4516, "step": 19204 }, { "epoch": 24.5824, "grad_norm": 1.1905516386032104, "learning_rate": 1.1604641856742697e-05, "loss": 0.4714, "step": 19205 }, { "epoch": 24.58368, "grad_norm": 1.208512544631958, "learning_rate": 1.1602641056422569e-05, "loss": 0.4834, "step": 19206 }, { "epoch": 24.58496, "grad_norm": 1.2601969242095947, "learning_rate": 1.1600640256102443e-05, "loss": 0.501, "step": 19207 }, { "epoch": 24.58624, "grad_norm": 1.1832962036132812, "learning_rate": 1.1598639455782313e-05, "loss": 0.459, "step": 19208 }, { "epoch": 24.58752, "grad_norm": 1.2461732625961304, "learning_rate": 1.1596638655462186e-05, "loss": 0.5219, "step": 19209 }, { "epoch": 24.5888, "grad_norm": 1.1089426279067993, "learning_rate": 1.1594637855142056e-05, "loss": 0.5035, "step": 19210 }, { "epoch": 24.59008, "grad_norm": 1.2491692304611206, "learning_rate": 1.159263705482193e-05, "loss": 0.5015, "step": 19211 }, { "epoch": 24.59136, "grad_norm": 1.1578104496002197, "learning_rate": 1.15906362545018e-05, "loss": 0.4683, "step": 19212 }, { "epoch": 24.59264, "grad_norm": 1.1508525609970093, "learning_rate": 1.1588635454181674e-05, "loss": 0.4839, "step": 19213 }, { "epoch": 24.59392, "grad_norm": 1.249266266822815, "learning_rate": 1.1586634653861546e-05, "loss": 0.4757, "step": 19214 }, { "epoch": 24.5952, "grad_norm": 1.13655686378479, "learning_rate": 1.1584633853541417e-05, "loss": 0.4996, "step": 19215 }, { "epoch": 24.59648, "grad_norm": 1.1786251068115234, "learning_rate": 1.158263305322129e-05, "loss": 0.4798, "step": 19216 }, { "epoch": 24.59776, "grad_norm": 1.1763968467712402, "learning_rate": 1.1580632252901161e-05, "loss": 0.4782, "step": 19217 }, { "epoch": 24.59904, "grad_norm": 1.097705602645874, "learning_rate": 1.1578631452581033e-05, "loss": 0.4706, "step": 19218 }, { "epoch": 24.60032, "grad_norm": 1.1707555055618286, "learning_rate": 1.1576630652260905e-05, "loss": 0.4738, "step": 19219 }, { "epoch": 24.6016, "grad_norm": 1.1376193761825562, "learning_rate": 1.1574629851940777e-05, "loss": 0.505, "step": 19220 }, { "epoch": 24.60288, "grad_norm": 1.26047945022583, "learning_rate": 1.1572629051620649e-05, "loss": 0.5244, "step": 19221 }, { "epoch": 24.60416, "grad_norm": 1.1913353204727173, "learning_rate": 1.157062825130052e-05, "loss": 0.4879, "step": 19222 }, { "epoch": 24.60544, "grad_norm": 1.1769354343414307, "learning_rate": 1.1568627450980394e-05, "loss": 0.5038, "step": 19223 }, { "epoch": 24.60672, "grad_norm": 1.16858971118927, "learning_rate": 1.1566626650660264e-05, "loss": 0.4796, "step": 19224 }, { "epoch": 24.608, "grad_norm": 1.1839061975479126, "learning_rate": 1.1564625850340138e-05, "loss": 0.5132, "step": 19225 }, { "epoch": 24.60928, "grad_norm": 1.2122026681900024, "learning_rate": 1.1562625050020008e-05, "loss": 0.5023, "step": 19226 }, { "epoch": 24.61056, "grad_norm": 1.1488457918167114, "learning_rate": 1.1560624249699881e-05, "loss": 0.4586, "step": 19227 }, { "epoch": 24.61184, "grad_norm": 1.1649909019470215, "learning_rate": 1.1558623449379752e-05, "loss": 0.4974, "step": 19228 }, { "epoch": 24.61312, "grad_norm": 1.180234670639038, "learning_rate": 1.1556622649059625e-05, "loss": 0.4725, "step": 19229 }, { "epoch": 24.6144, "grad_norm": 1.1081424951553345, "learning_rate": 1.1554621848739497e-05, "loss": 0.47, "step": 19230 }, { "epoch": 24.61568, "grad_norm": 1.1423239707946777, "learning_rate": 1.1552621048419369e-05, "loss": 0.4692, "step": 19231 }, { "epoch": 24.61696, "grad_norm": 1.1524152755737305, "learning_rate": 1.155062024809924e-05, "loss": 0.4718, "step": 19232 }, { "epoch": 24.61824, "grad_norm": 1.2075049877166748, "learning_rate": 1.1548619447779113e-05, "loss": 0.4985, "step": 19233 }, { "epoch": 24.61952, "grad_norm": 1.1373603343963623, "learning_rate": 1.1546618647458984e-05, "loss": 0.5022, "step": 19234 }, { "epoch": 24.6208, "grad_norm": 1.1606000661849976, "learning_rate": 1.1544617847138856e-05, "loss": 0.4829, "step": 19235 }, { "epoch": 24.62208, "grad_norm": 1.1605327129364014, "learning_rate": 1.1542617046818728e-05, "loss": 0.4924, "step": 19236 }, { "epoch": 24.62336, "grad_norm": 1.228710651397705, "learning_rate": 1.15406162464986e-05, "loss": 0.5122, "step": 19237 }, { "epoch": 24.62464, "grad_norm": 1.0705116987228394, "learning_rate": 1.1538615446178472e-05, "loss": 0.4644, "step": 19238 }, { "epoch": 24.62592, "grad_norm": 1.2077350616455078, "learning_rate": 1.1536614645858344e-05, "loss": 0.4765, "step": 19239 }, { "epoch": 24.6272, "grad_norm": 1.1825919151306152, "learning_rate": 1.1534613845538216e-05, "loss": 0.47, "step": 19240 }, { "epoch": 24.62848, "grad_norm": 1.1296800374984741, "learning_rate": 1.1532613045218087e-05, "loss": 0.4958, "step": 19241 }, { "epoch": 24.62976, "grad_norm": 1.184819221496582, "learning_rate": 1.153061224489796e-05, "loss": 0.4932, "step": 19242 }, { "epoch": 24.63104, "grad_norm": 1.2108218669891357, "learning_rate": 1.1528611444577831e-05, "loss": 0.4847, "step": 19243 }, { "epoch": 24.63232, "grad_norm": 1.1210412979125977, "learning_rate": 1.1526610644257705e-05, "loss": 0.4636, "step": 19244 }, { "epoch": 24.6336, "grad_norm": 1.1325141191482544, "learning_rate": 1.1524609843937575e-05, "loss": 0.4885, "step": 19245 }, { "epoch": 24.63488, "grad_norm": 1.198911428451538, "learning_rate": 1.1522609043617448e-05, "loss": 0.4902, "step": 19246 }, { "epoch": 24.63616, "grad_norm": 1.1128379106521606, "learning_rate": 1.1520608243297319e-05, "loss": 0.4737, "step": 19247 }, { "epoch": 24.63744, "grad_norm": 1.147925853729248, "learning_rate": 1.1518607442977192e-05, "loss": 0.4618, "step": 19248 }, { "epoch": 24.63872, "grad_norm": 1.10917329788208, "learning_rate": 1.1516606642657062e-05, "loss": 0.437, "step": 19249 }, { "epoch": 24.64, "grad_norm": 1.1810179948806763, "learning_rate": 1.1514605842336936e-05, "loss": 0.4697, "step": 19250 }, { "epoch": 24.64128, "grad_norm": 1.15571928024292, "learning_rate": 1.1512605042016806e-05, "loss": 0.4482, "step": 19251 }, { "epoch": 24.64256, "grad_norm": 1.2140380144119263, "learning_rate": 1.151060424169668e-05, "loss": 0.4722, "step": 19252 }, { "epoch": 24.64384, "grad_norm": 1.1459300518035889, "learning_rate": 1.1508603441376551e-05, "loss": 0.4415, "step": 19253 }, { "epoch": 24.64512, "grad_norm": 1.2092753648757935, "learning_rate": 1.1506602641056423e-05, "loss": 0.4927, "step": 19254 }, { "epoch": 24.6464, "grad_norm": 1.0979299545288086, "learning_rate": 1.1504601840736295e-05, "loss": 0.474, "step": 19255 }, { "epoch": 24.64768, "grad_norm": 1.2090137004852295, "learning_rate": 1.1502601040416167e-05, "loss": 0.4667, "step": 19256 }, { "epoch": 24.64896, "grad_norm": 1.0773359537124634, "learning_rate": 1.1500600240096039e-05, "loss": 0.4631, "step": 19257 }, { "epoch": 24.65024, "grad_norm": 1.1589806079864502, "learning_rate": 1.149859943977591e-05, "loss": 0.4615, "step": 19258 }, { "epoch": 24.65152, "grad_norm": 1.1646665334701538, "learning_rate": 1.1496598639455783e-05, "loss": 0.5144, "step": 19259 }, { "epoch": 24.6528, "grad_norm": 1.1850485801696777, "learning_rate": 1.1494597839135656e-05, "loss": 0.4873, "step": 19260 }, { "epoch": 24.65408, "grad_norm": 1.111702561378479, "learning_rate": 1.1492597038815526e-05, "loss": 0.462, "step": 19261 }, { "epoch": 24.65536, "grad_norm": 1.0776835680007935, "learning_rate": 1.14905962384954e-05, "loss": 0.4624, "step": 19262 }, { "epoch": 24.65664, "grad_norm": 1.1940804719924927, "learning_rate": 1.148859543817527e-05, "loss": 0.4731, "step": 19263 }, { "epoch": 24.65792, "grad_norm": 1.0899372100830078, "learning_rate": 1.1486594637855143e-05, "loss": 0.4729, "step": 19264 }, { "epoch": 24.6592, "grad_norm": 1.2058807611465454, "learning_rate": 1.1484593837535014e-05, "loss": 0.4518, "step": 19265 }, { "epoch": 24.66048, "grad_norm": 1.14639413356781, "learning_rate": 1.1482593037214887e-05, "loss": 0.4715, "step": 19266 }, { "epoch": 24.66176, "grad_norm": 1.0863559246063232, "learning_rate": 1.1480592236894759e-05, "loss": 0.4479, "step": 19267 }, { "epoch": 24.66304, "grad_norm": 1.1600267887115479, "learning_rate": 1.1478591436574631e-05, "loss": 0.5148, "step": 19268 }, { "epoch": 24.66432, "grad_norm": 1.1542725563049316, "learning_rate": 1.1476590636254503e-05, "loss": 0.4798, "step": 19269 }, { "epoch": 24.6656, "grad_norm": 1.1546802520751953, "learning_rate": 1.1474589835934375e-05, "loss": 0.4433, "step": 19270 }, { "epoch": 24.66688, "grad_norm": 1.123247742652893, "learning_rate": 1.1472589035614246e-05, "loss": 0.4422, "step": 19271 }, { "epoch": 24.66816, "grad_norm": 1.2257593870162964, "learning_rate": 1.1470588235294118e-05, "loss": 0.4726, "step": 19272 }, { "epoch": 24.66944, "grad_norm": 1.196321725845337, "learning_rate": 1.146858743497399e-05, "loss": 0.5249, "step": 19273 }, { "epoch": 24.67072, "grad_norm": 1.1580753326416016, "learning_rate": 1.1466586634653862e-05, "loss": 0.4793, "step": 19274 }, { "epoch": 24.672, "grad_norm": 1.10203218460083, "learning_rate": 1.1464585834333734e-05, "loss": 0.416, "step": 19275 }, { "epoch": 24.67328, "grad_norm": 1.0155160427093506, "learning_rate": 1.1462585034013606e-05, "loss": 0.4385, "step": 19276 }, { "epoch": 24.67456, "grad_norm": 1.1510674953460693, "learning_rate": 1.1460584233693478e-05, "loss": 0.4783, "step": 19277 }, { "epoch": 24.67584, "grad_norm": 1.1276487112045288, "learning_rate": 1.145858343337335e-05, "loss": 0.4613, "step": 19278 }, { "epoch": 24.67712, "grad_norm": 1.1388295888900757, "learning_rate": 1.1456582633053221e-05, "loss": 0.4549, "step": 19279 }, { "epoch": 24.6784, "grad_norm": 1.1739778518676758, "learning_rate": 1.1454581832733093e-05, "loss": 0.4569, "step": 19280 }, { "epoch": 24.67968, "grad_norm": 1.0705592632293701, "learning_rate": 1.1452581032412965e-05, "loss": 0.4347, "step": 19281 }, { "epoch": 24.68096, "grad_norm": 1.2039315700531006, "learning_rate": 1.1450580232092837e-05, "loss": 0.4751, "step": 19282 }, { "epoch": 24.68224, "grad_norm": 1.1473381519317627, "learning_rate": 1.144857943177271e-05, "loss": 0.4583, "step": 19283 }, { "epoch": 24.68352, "grad_norm": 1.2741856575012207, "learning_rate": 1.144657863145258e-05, "loss": 0.4695, "step": 19284 }, { "epoch": 24.6848, "grad_norm": 1.1236690282821655, "learning_rate": 1.1444577831132454e-05, "loss": 0.4626, "step": 19285 }, { "epoch": 24.68608, "grad_norm": 1.2310682535171509, "learning_rate": 1.1442577030812324e-05, "loss": 0.4727, "step": 19286 }, { "epoch": 24.687359999999998, "grad_norm": 1.1304185390472412, "learning_rate": 1.1440576230492198e-05, "loss": 0.4577, "step": 19287 }, { "epoch": 24.68864, "grad_norm": 1.2843713760375977, "learning_rate": 1.1438575430172068e-05, "loss": 0.4966, "step": 19288 }, { "epoch": 24.68992, "grad_norm": 1.1501768827438354, "learning_rate": 1.1436574629851942e-05, "loss": 0.4763, "step": 19289 }, { "epoch": 24.6912, "grad_norm": 1.2315309047698975, "learning_rate": 1.1434573829531813e-05, "loss": 0.4693, "step": 19290 }, { "epoch": 24.69248, "grad_norm": 1.1766899824142456, "learning_rate": 1.1432573029211685e-05, "loss": 0.4635, "step": 19291 }, { "epoch": 24.69376, "grad_norm": 1.2574455738067627, "learning_rate": 1.1430572228891557e-05, "loss": 0.5019, "step": 19292 }, { "epoch": 24.69504, "grad_norm": 1.1796600818634033, "learning_rate": 1.1428571428571429e-05, "loss": 0.4645, "step": 19293 }, { "epoch": 24.69632, "grad_norm": 1.1451575756072998, "learning_rate": 1.1426570628251301e-05, "loss": 0.4712, "step": 19294 }, { "epoch": 24.6976, "grad_norm": 1.1572251319885254, "learning_rate": 1.1424569827931173e-05, "loss": 0.5001, "step": 19295 }, { "epoch": 24.69888, "grad_norm": 1.1308900117874146, "learning_rate": 1.1422569027611045e-05, "loss": 0.4832, "step": 19296 }, { "epoch": 24.70016, "grad_norm": 1.1736658811569214, "learning_rate": 1.1420568227290918e-05, "loss": 0.4795, "step": 19297 }, { "epoch": 24.70144, "grad_norm": 1.182158350944519, "learning_rate": 1.1418567426970788e-05, "loss": 0.4893, "step": 19298 }, { "epoch": 24.70272, "grad_norm": 1.1963363885879517, "learning_rate": 1.1416566626650662e-05, "loss": 0.4335, "step": 19299 }, { "epoch": 24.704, "grad_norm": 1.1854673624038696, "learning_rate": 1.1414565826330532e-05, "loss": 0.4765, "step": 19300 }, { "epoch": 24.70528, "grad_norm": 1.1632603406906128, "learning_rate": 1.1412565026010406e-05, "loss": 0.453, "step": 19301 }, { "epoch": 24.70656, "grad_norm": 1.1224347352981567, "learning_rate": 1.1410564225690276e-05, "loss": 0.4329, "step": 19302 }, { "epoch": 24.70784, "grad_norm": 1.083587408065796, "learning_rate": 1.140856342537015e-05, "loss": 0.4908, "step": 19303 }, { "epoch": 24.70912, "grad_norm": 1.1031328439712524, "learning_rate": 1.140656262505002e-05, "loss": 0.4961, "step": 19304 }, { "epoch": 24.7104, "grad_norm": 1.1741230487823486, "learning_rate": 1.1404561824729893e-05, "loss": 0.4992, "step": 19305 }, { "epoch": 24.71168, "grad_norm": 1.1455093622207642, "learning_rate": 1.1402561024409765e-05, "loss": 0.4462, "step": 19306 }, { "epoch": 24.71296, "grad_norm": 1.1792043447494507, "learning_rate": 1.1400560224089637e-05, "loss": 0.4428, "step": 19307 }, { "epoch": 24.71424, "grad_norm": 1.1586357355117798, "learning_rate": 1.1398559423769509e-05, "loss": 0.4899, "step": 19308 }, { "epoch": 24.71552, "grad_norm": 1.1558829545974731, "learning_rate": 1.139655862344938e-05, "loss": 0.4433, "step": 19309 }, { "epoch": 24.7168, "grad_norm": 1.1648732423782349, "learning_rate": 1.1394557823129252e-05, "loss": 0.4847, "step": 19310 }, { "epoch": 24.71808, "grad_norm": 1.1810020208358765, "learning_rate": 1.1392557022809124e-05, "loss": 0.524, "step": 19311 }, { "epoch": 24.71936, "grad_norm": 1.210523247718811, "learning_rate": 1.1390556222488996e-05, "loss": 0.4705, "step": 19312 }, { "epoch": 24.72064, "grad_norm": 1.1746503114700317, "learning_rate": 1.1388555422168868e-05, "loss": 0.4782, "step": 19313 }, { "epoch": 24.72192, "grad_norm": 1.1971492767333984, "learning_rate": 1.138655462184874e-05, "loss": 0.5247, "step": 19314 }, { "epoch": 24.7232, "grad_norm": 1.219415307044983, "learning_rate": 1.1384553821528612e-05, "loss": 0.5261, "step": 19315 }, { "epoch": 24.72448, "grad_norm": 1.157728910446167, "learning_rate": 1.1382553021208483e-05, "loss": 0.5033, "step": 19316 }, { "epoch": 24.72576, "grad_norm": 1.21958327293396, "learning_rate": 1.1380552220888355e-05, "loss": 0.5063, "step": 19317 }, { "epoch": 24.72704, "grad_norm": 1.183083176612854, "learning_rate": 1.1378551420568227e-05, "loss": 0.5095, "step": 19318 }, { "epoch": 24.72832, "grad_norm": 1.1700811386108398, "learning_rate": 1.1376550620248099e-05, "loss": 0.4605, "step": 19319 }, { "epoch": 24.7296, "grad_norm": 1.1539628505706787, "learning_rate": 1.1374549819927973e-05, "loss": 0.49, "step": 19320 }, { "epoch": 24.73088, "grad_norm": 1.0613603591918945, "learning_rate": 1.1372549019607843e-05, "loss": 0.4647, "step": 19321 }, { "epoch": 24.73216, "grad_norm": 1.124666690826416, "learning_rate": 1.1370548219287716e-05, "loss": 0.4497, "step": 19322 }, { "epoch": 24.73344, "grad_norm": 1.1756181716918945, "learning_rate": 1.1368547418967586e-05, "loss": 0.4863, "step": 19323 }, { "epoch": 24.73472, "grad_norm": 1.0413018465042114, "learning_rate": 1.136654661864746e-05, "loss": 0.4702, "step": 19324 }, { "epoch": 24.736, "grad_norm": 1.0692871809005737, "learning_rate": 1.136454581832733e-05, "loss": 0.4444, "step": 19325 }, { "epoch": 24.73728, "grad_norm": 1.1003504991531372, "learning_rate": 1.1362545018007204e-05, "loss": 0.4332, "step": 19326 }, { "epoch": 24.73856, "grad_norm": 1.126057744026184, "learning_rate": 1.1360544217687076e-05, "loss": 0.4527, "step": 19327 }, { "epoch": 24.73984, "grad_norm": 1.1299209594726562, "learning_rate": 1.1358543417366947e-05, "loss": 0.4338, "step": 19328 }, { "epoch": 24.74112, "grad_norm": 1.0575133562088013, "learning_rate": 1.135654261704682e-05, "loss": 0.4591, "step": 19329 }, { "epoch": 24.7424, "grad_norm": 1.1829102039337158, "learning_rate": 1.1354541816726691e-05, "loss": 0.4912, "step": 19330 }, { "epoch": 24.74368, "grad_norm": 1.154536247253418, "learning_rate": 1.1352541016406563e-05, "loss": 0.5155, "step": 19331 }, { "epoch": 24.74496, "grad_norm": 1.1477676630020142, "learning_rate": 1.1350540216086435e-05, "loss": 0.4689, "step": 19332 }, { "epoch": 24.74624, "grad_norm": 1.1423364877700806, "learning_rate": 1.1348539415766307e-05, "loss": 0.4852, "step": 19333 }, { "epoch": 24.74752, "grad_norm": 1.1712870597839355, "learning_rate": 1.1346538615446179e-05, "loss": 0.4899, "step": 19334 }, { "epoch": 24.7488, "grad_norm": 1.229701042175293, "learning_rate": 1.134453781512605e-05, "loss": 0.4972, "step": 19335 }, { "epoch": 24.75008, "grad_norm": 1.098663330078125, "learning_rate": 1.1342537014805924e-05, "loss": 0.4796, "step": 19336 }, { "epoch": 24.75136, "grad_norm": 1.0754587650299072, "learning_rate": 1.1340536214485794e-05, "loss": 0.4549, "step": 19337 }, { "epoch": 24.75264, "grad_norm": 1.1610110998153687, "learning_rate": 1.1338535414165668e-05, "loss": 0.4837, "step": 19338 }, { "epoch": 24.75392, "grad_norm": 1.2524155378341675, "learning_rate": 1.1336534613845538e-05, "loss": 0.5235, "step": 19339 }, { "epoch": 24.7552, "grad_norm": 1.139482021331787, "learning_rate": 1.1334533813525411e-05, "loss": 0.5018, "step": 19340 }, { "epoch": 24.75648, "grad_norm": 1.2066160440444946, "learning_rate": 1.1332533013205282e-05, "loss": 0.5218, "step": 19341 }, { "epoch": 24.75776, "grad_norm": 1.0574979782104492, "learning_rate": 1.1330532212885155e-05, "loss": 0.4569, "step": 19342 }, { "epoch": 24.75904, "grad_norm": 1.1374906301498413, "learning_rate": 1.1328531412565027e-05, "loss": 0.4353, "step": 19343 }, { "epoch": 24.76032, "grad_norm": 1.135045051574707, "learning_rate": 1.1326530612244899e-05, "loss": 0.4811, "step": 19344 }, { "epoch": 24.7616, "grad_norm": 1.1101502180099487, "learning_rate": 1.132452981192477e-05, "loss": 0.4816, "step": 19345 }, { "epoch": 24.76288, "grad_norm": 1.1571624279022217, "learning_rate": 1.1322529011604643e-05, "loss": 0.4853, "step": 19346 }, { "epoch": 24.76416, "grad_norm": 1.216991662979126, "learning_rate": 1.1320528211284514e-05, "loss": 0.4746, "step": 19347 }, { "epoch": 24.76544, "grad_norm": 1.1886634826660156, "learning_rate": 1.1318527410964386e-05, "loss": 0.4852, "step": 19348 }, { "epoch": 24.76672, "grad_norm": 1.2373932600021362, "learning_rate": 1.1316526610644258e-05, "loss": 0.463, "step": 19349 }, { "epoch": 24.768, "grad_norm": 1.2170404195785522, "learning_rate": 1.1314525810324132e-05, "loss": 0.5278, "step": 19350 }, { "epoch": 24.76928, "grad_norm": 1.1271648406982422, "learning_rate": 1.1312525010004002e-05, "loss": 0.4666, "step": 19351 }, { "epoch": 24.77056, "grad_norm": 1.2704542875289917, "learning_rate": 1.1310524209683875e-05, "loss": 0.541, "step": 19352 }, { "epoch": 24.77184, "grad_norm": 1.1266765594482422, "learning_rate": 1.1308523409363746e-05, "loss": 0.4432, "step": 19353 }, { "epoch": 24.77312, "grad_norm": 1.1655040979385376, "learning_rate": 1.1306522609043619e-05, "loss": 0.4932, "step": 19354 }, { "epoch": 24.7744, "grad_norm": 1.0827305316925049, "learning_rate": 1.130452180872349e-05, "loss": 0.4499, "step": 19355 }, { "epoch": 24.77568, "grad_norm": 1.1636383533477783, "learning_rate": 1.1302521008403363e-05, "loss": 0.5046, "step": 19356 }, { "epoch": 24.77696, "grad_norm": 1.1613502502441406, "learning_rate": 1.1300520208083235e-05, "loss": 0.454, "step": 19357 }, { "epoch": 24.77824, "grad_norm": 1.1521443128585815, "learning_rate": 1.1298519407763106e-05, "loss": 0.4479, "step": 19358 }, { "epoch": 24.77952, "grad_norm": 1.129206657409668, "learning_rate": 1.1296518607442978e-05, "loss": 0.4722, "step": 19359 }, { "epoch": 24.7808, "grad_norm": 1.1790378093719482, "learning_rate": 1.129451780712285e-05, "loss": 0.4682, "step": 19360 }, { "epoch": 24.78208, "grad_norm": 1.1063263416290283, "learning_rate": 1.1292517006802722e-05, "loss": 0.4484, "step": 19361 }, { "epoch": 24.78336, "grad_norm": 1.1789052486419678, "learning_rate": 1.1290516206482594e-05, "loss": 0.4894, "step": 19362 }, { "epoch": 24.78464, "grad_norm": 1.210119605064392, "learning_rate": 1.1288515406162466e-05, "loss": 0.533, "step": 19363 }, { "epoch": 24.78592, "grad_norm": 1.0903270244598389, "learning_rate": 1.1286514605842338e-05, "loss": 0.4602, "step": 19364 }, { "epoch": 24.7872, "grad_norm": 1.1570169925689697, "learning_rate": 1.128451380552221e-05, "loss": 0.4611, "step": 19365 }, { "epoch": 24.78848, "grad_norm": 1.0663702487945557, "learning_rate": 1.1282513005202081e-05, "loss": 0.4379, "step": 19366 }, { "epoch": 24.78976, "grad_norm": 1.067586898803711, "learning_rate": 1.1280512204881953e-05, "loss": 0.4306, "step": 19367 }, { "epoch": 24.79104, "grad_norm": 1.1584056615829468, "learning_rate": 1.1278511404561825e-05, "loss": 0.4455, "step": 19368 }, { "epoch": 24.79232, "grad_norm": 1.188567042350769, "learning_rate": 1.1276510604241697e-05, "loss": 0.5147, "step": 19369 }, { "epoch": 24.7936, "grad_norm": 1.2147189378738403, "learning_rate": 1.1274509803921569e-05, "loss": 0.4957, "step": 19370 }, { "epoch": 24.79488, "grad_norm": 1.1530333757400513, "learning_rate": 1.127250900360144e-05, "loss": 0.4771, "step": 19371 }, { "epoch": 24.79616, "grad_norm": 1.1799629926681519, "learning_rate": 1.1270508203281312e-05, "loss": 0.5172, "step": 19372 }, { "epoch": 24.79744, "grad_norm": 1.1808135509490967, "learning_rate": 1.1268507402961186e-05, "loss": 0.4899, "step": 19373 }, { "epoch": 24.79872, "grad_norm": 1.169081687927246, "learning_rate": 1.1266506602641056e-05, "loss": 0.4794, "step": 19374 }, { "epoch": 24.8, "grad_norm": 1.2258483171463013, "learning_rate": 1.126450580232093e-05, "loss": 0.4697, "step": 19375 }, { "epoch": 24.80128, "grad_norm": 1.1741260290145874, "learning_rate": 1.12625050020008e-05, "loss": 0.5146, "step": 19376 }, { "epoch": 24.80256, "grad_norm": 1.1346204280853271, "learning_rate": 1.1260504201680673e-05, "loss": 0.4403, "step": 19377 }, { "epoch": 24.80384, "grad_norm": 1.1797999143600464, "learning_rate": 1.1258503401360544e-05, "loss": 0.449, "step": 19378 }, { "epoch": 24.80512, "grad_norm": 1.2282897233963013, "learning_rate": 1.1256502601040417e-05, "loss": 0.476, "step": 19379 }, { "epoch": 24.8064, "grad_norm": 1.1009891033172607, "learning_rate": 1.1254501800720289e-05, "loss": 0.498, "step": 19380 }, { "epoch": 24.80768, "grad_norm": 1.0904237031936646, "learning_rate": 1.1252501000400161e-05, "loss": 0.4462, "step": 19381 }, { "epoch": 24.80896, "grad_norm": 1.162895679473877, "learning_rate": 1.1250500200080033e-05, "loss": 0.4788, "step": 19382 }, { "epoch": 24.81024, "grad_norm": 1.1506489515304565, "learning_rate": 1.1248499399759905e-05, "loss": 0.4834, "step": 19383 }, { "epoch": 24.81152, "grad_norm": 1.1401734352111816, "learning_rate": 1.1246498599439776e-05, "loss": 0.4822, "step": 19384 }, { "epoch": 24.8128, "grad_norm": 1.0999462604522705, "learning_rate": 1.1244497799119648e-05, "loss": 0.4681, "step": 19385 }, { "epoch": 24.81408, "grad_norm": 1.060306429862976, "learning_rate": 1.124249699879952e-05, "loss": 0.4561, "step": 19386 }, { "epoch": 24.81536, "grad_norm": 1.2425814867019653, "learning_rate": 1.1240496198479392e-05, "loss": 0.5074, "step": 19387 }, { "epoch": 24.81664, "grad_norm": 1.209997534751892, "learning_rate": 1.1238495398159264e-05, "loss": 0.5151, "step": 19388 }, { "epoch": 24.81792, "grad_norm": 1.1693835258483887, "learning_rate": 1.1236494597839137e-05, "loss": 0.5006, "step": 19389 }, { "epoch": 24.8192, "grad_norm": 1.1733357906341553, "learning_rate": 1.1234493797519008e-05, "loss": 0.5059, "step": 19390 }, { "epoch": 24.82048, "grad_norm": 1.16240656375885, "learning_rate": 1.1232492997198881e-05, "loss": 0.458, "step": 19391 }, { "epoch": 24.82176, "grad_norm": 1.0904651880264282, "learning_rate": 1.1230492196878751e-05, "loss": 0.4414, "step": 19392 }, { "epoch": 24.82304, "grad_norm": 1.1327263116836548, "learning_rate": 1.1228491396558625e-05, "loss": 0.4849, "step": 19393 }, { "epoch": 24.82432, "grad_norm": 1.2412240505218506, "learning_rate": 1.1226490596238495e-05, "loss": 0.5405, "step": 19394 }, { "epoch": 24.8256, "grad_norm": 1.1290397644042969, "learning_rate": 1.1224489795918369e-05, "loss": 0.4745, "step": 19395 }, { "epoch": 24.82688, "grad_norm": 1.1640607118606567, "learning_rate": 1.122248899559824e-05, "loss": 0.4839, "step": 19396 }, { "epoch": 24.82816, "grad_norm": 1.1949489116668701, "learning_rate": 1.1220488195278112e-05, "loss": 0.4592, "step": 19397 }, { "epoch": 24.829439999999998, "grad_norm": 1.164475440979004, "learning_rate": 1.1218487394957984e-05, "loss": 0.4737, "step": 19398 }, { "epoch": 24.83072, "grad_norm": 1.185512661933899, "learning_rate": 1.1216486594637856e-05, "loss": 0.451, "step": 19399 }, { "epoch": 24.832, "grad_norm": 1.096878170967102, "learning_rate": 1.1214485794317728e-05, "loss": 0.5043, "step": 19400 }, { "epoch": 24.83328, "grad_norm": 1.2109886407852173, "learning_rate": 1.12124849939976e-05, "loss": 0.569, "step": 19401 }, { "epoch": 24.83456, "grad_norm": 1.1665548086166382, "learning_rate": 1.1210484193677472e-05, "loss": 0.5026, "step": 19402 }, { "epoch": 24.83584, "grad_norm": 1.1184903383255005, "learning_rate": 1.1208483393357343e-05, "loss": 0.4835, "step": 19403 }, { "epoch": 24.83712, "grad_norm": 1.2306338548660278, "learning_rate": 1.1206482593037215e-05, "loss": 0.4726, "step": 19404 }, { "epoch": 24.8384, "grad_norm": 1.2000937461853027, "learning_rate": 1.1204481792717087e-05, "loss": 0.4827, "step": 19405 }, { "epoch": 24.83968, "grad_norm": 1.1614446640014648, "learning_rate": 1.1202480992396959e-05, "loss": 0.4524, "step": 19406 }, { "epoch": 24.84096, "grad_norm": 1.1615883111953735, "learning_rate": 1.1200480192076831e-05, "loss": 0.5126, "step": 19407 }, { "epoch": 24.84224, "grad_norm": 1.1704049110412598, "learning_rate": 1.1198479391756703e-05, "loss": 0.4715, "step": 19408 }, { "epoch": 24.84352, "grad_norm": 1.2115988731384277, "learning_rate": 1.1196478591436575e-05, "loss": 0.4983, "step": 19409 }, { "epoch": 24.8448, "grad_norm": 1.1601941585540771, "learning_rate": 1.1194477791116448e-05, "loss": 0.5095, "step": 19410 }, { "epoch": 24.84608, "grad_norm": 1.1305478811264038, "learning_rate": 1.1192476990796318e-05, "loss": 0.4968, "step": 19411 }, { "epoch": 24.84736, "grad_norm": 1.243231177330017, "learning_rate": 1.1190476190476192e-05, "loss": 0.4912, "step": 19412 }, { "epoch": 24.84864, "grad_norm": 1.0542876720428467, "learning_rate": 1.1188475390156062e-05, "loss": 0.4489, "step": 19413 }, { "epoch": 24.84992, "grad_norm": 1.184417486190796, "learning_rate": 1.1186474589835936e-05, "loss": 0.4938, "step": 19414 }, { "epoch": 24.8512, "grad_norm": 1.2762627601623535, "learning_rate": 1.1184473789515806e-05, "loss": 0.5049, "step": 19415 }, { "epoch": 24.85248, "grad_norm": 1.0796477794647217, "learning_rate": 1.118247298919568e-05, "loss": 0.4241, "step": 19416 }, { "epoch": 24.85376, "grad_norm": 1.2161225080490112, "learning_rate": 1.118047218887555e-05, "loss": 0.5025, "step": 19417 }, { "epoch": 24.85504, "grad_norm": 1.180956482887268, "learning_rate": 1.1178471388555423e-05, "loss": 0.4798, "step": 19418 }, { "epoch": 24.85632, "grad_norm": 1.1922006607055664, "learning_rate": 1.1176470588235295e-05, "loss": 0.4928, "step": 19419 }, { "epoch": 24.8576, "grad_norm": 1.1288496255874634, "learning_rate": 1.1174469787915167e-05, "loss": 0.4416, "step": 19420 }, { "epoch": 24.85888, "grad_norm": 1.1524951457977295, "learning_rate": 1.1172468987595039e-05, "loss": 0.4827, "step": 19421 }, { "epoch": 24.86016, "grad_norm": 1.239288330078125, "learning_rate": 1.117046818727491e-05, "loss": 0.4972, "step": 19422 }, { "epoch": 24.86144, "grad_norm": 1.1284489631652832, "learning_rate": 1.1168467386954782e-05, "loss": 0.4293, "step": 19423 }, { "epoch": 24.86272, "grad_norm": 1.178391456604004, "learning_rate": 1.1166466586634654e-05, "loss": 0.4698, "step": 19424 }, { "epoch": 24.864, "grad_norm": 1.1781879663467407, "learning_rate": 1.1164465786314526e-05, "loss": 0.4427, "step": 19425 }, { "epoch": 24.86528, "grad_norm": 1.1736154556274414, "learning_rate": 1.11624649859944e-05, "loss": 0.4899, "step": 19426 }, { "epoch": 24.86656, "grad_norm": 1.210067868232727, "learning_rate": 1.116046418567427e-05, "loss": 0.4746, "step": 19427 }, { "epoch": 24.86784, "grad_norm": 1.1524900197982788, "learning_rate": 1.1158463385354143e-05, "loss": 0.4755, "step": 19428 }, { "epoch": 24.86912, "grad_norm": 1.077675461769104, "learning_rate": 1.1156462585034013e-05, "loss": 0.4248, "step": 19429 }, { "epoch": 24.8704, "grad_norm": 1.1400989294052124, "learning_rate": 1.1154461784713887e-05, "loss": 0.4719, "step": 19430 }, { "epoch": 24.87168, "grad_norm": 1.0998234748840332, "learning_rate": 1.1152460984393757e-05, "loss": 0.4563, "step": 19431 }, { "epoch": 24.87296, "grad_norm": 1.2079157829284668, "learning_rate": 1.115046018407363e-05, "loss": 0.508, "step": 19432 }, { "epoch": 24.87424, "grad_norm": 1.118981122970581, "learning_rate": 1.1148459383753503e-05, "loss": 0.4577, "step": 19433 }, { "epoch": 24.87552, "grad_norm": 1.1195893287658691, "learning_rate": 1.1146458583433374e-05, "loss": 0.4688, "step": 19434 }, { "epoch": 24.8768, "grad_norm": 1.1069542169570923, "learning_rate": 1.1144457783113246e-05, "loss": 0.4522, "step": 19435 }, { "epoch": 24.87808, "grad_norm": 1.2050132751464844, "learning_rate": 1.1142456982793118e-05, "loss": 0.4965, "step": 19436 }, { "epoch": 24.87936, "grad_norm": 1.0464030504226685, "learning_rate": 1.114045618247299e-05, "loss": 0.4366, "step": 19437 }, { "epoch": 24.88064, "grad_norm": 1.1153632402420044, "learning_rate": 1.1138455382152862e-05, "loss": 0.454, "step": 19438 }, { "epoch": 24.88192, "grad_norm": 1.178065299987793, "learning_rate": 1.1136454581832734e-05, "loss": 0.4914, "step": 19439 }, { "epoch": 24.8832, "grad_norm": 1.2401835918426514, "learning_rate": 1.1134453781512606e-05, "loss": 0.4877, "step": 19440 }, { "epoch": 24.88448, "grad_norm": 1.2349568605422974, "learning_rate": 1.1132452981192477e-05, "loss": 0.5027, "step": 19441 }, { "epoch": 24.88576, "grad_norm": 1.188691258430481, "learning_rate": 1.113045218087235e-05, "loss": 0.4862, "step": 19442 }, { "epoch": 24.88704, "grad_norm": 1.2274821996688843, "learning_rate": 1.1128451380552221e-05, "loss": 0.489, "step": 19443 }, { "epoch": 24.88832, "grad_norm": 1.2157435417175293, "learning_rate": 1.1126450580232093e-05, "loss": 0.518, "step": 19444 }, { "epoch": 24.8896, "grad_norm": 1.1647244691848755, "learning_rate": 1.1124449779911965e-05, "loss": 0.5051, "step": 19445 }, { "epoch": 24.89088, "grad_norm": 1.1653064489364624, "learning_rate": 1.1122448979591837e-05, "loss": 0.4646, "step": 19446 }, { "epoch": 24.89216, "grad_norm": 1.1139925718307495, "learning_rate": 1.1120448179271709e-05, "loss": 0.4621, "step": 19447 }, { "epoch": 24.89344, "grad_norm": 1.1649415493011475, "learning_rate": 1.111844737895158e-05, "loss": 0.4833, "step": 19448 }, { "epoch": 24.89472, "grad_norm": 1.2173422574996948, "learning_rate": 1.1116446578631454e-05, "loss": 0.5032, "step": 19449 }, { "epoch": 24.896, "grad_norm": 1.1215828657150269, "learning_rate": 1.1114445778311324e-05, "loss": 0.4903, "step": 19450 }, { "epoch": 24.89728, "grad_norm": 1.179179310798645, "learning_rate": 1.1112444977991198e-05, "loss": 0.4854, "step": 19451 }, { "epoch": 24.89856, "grad_norm": 1.1343804597854614, "learning_rate": 1.1110444177671068e-05, "loss": 0.4477, "step": 19452 }, { "epoch": 24.89984, "grad_norm": 1.1283519268035889, "learning_rate": 1.1108443377350941e-05, "loss": 0.4814, "step": 19453 }, { "epoch": 24.90112, "grad_norm": 1.117431879043579, "learning_rate": 1.1106442577030812e-05, "loss": 0.4512, "step": 19454 }, { "epoch": 24.9024, "grad_norm": 1.1315174102783203, "learning_rate": 1.1104441776710685e-05, "loss": 0.4423, "step": 19455 }, { "epoch": 24.90368, "grad_norm": 1.2112675905227661, "learning_rate": 1.1102440976390557e-05, "loss": 0.5169, "step": 19456 }, { "epoch": 24.90496, "grad_norm": 1.1385278701782227, "learning_rate": 1.1100440176070429e-05, "loss": 0.4717, "step": 19457 }, { "epoch": 24.90624, "grad_norm": 1.1794296503067017, "learning_rate": 1.10984393757503e-05, "loss": 0.503, "step": 19458 }, { "epoch": 24.90752, "grad_norm": 1.1522889137268066, "learning_rate": 1.1096438575430172e-05, "loss": 0.4582, "step": 19459 }, { "epoch": 24.9088, "grad_norm": 1.233756422996521, "learning_rate": 1.1094437775110044e-05, "loss": 0.521, "step": 19460 }, { "epoch": 24.91008, "grad_norm": 1.1709715127944946, "learning_rate": 1.1092436974789916e-05, "loss": 0.492, "step": 19461 }, { "epoch": 24.91136, "grad_norm": 1.193808913230896, "learning_rate": 1.1090436174469788e-05, "loss": 0.4568, "step": 19462 }, { "epoch": 24.91264, "grad_norm": 1.14645254611969, "learning_rate": 1.1088435374149662e-05, "loss": 0.4904, "step": 19463 }, { "epoch": 24.91392, "grad_norm": 1.100979208946228, "learning_rate": 1.1086434573829532e-05, "loss": 0.4828, "step": 19464 }, { "epoch": 24.9152, "grad_norm": 1.1360437870025635, "learning_rate": 1.1084433773509405e-05, "loss": 0.4519, "step": 19465 }, { "epoch": 24.91648, "grad_norm": 1.0801247358322144, "learning_rate": 1.1082432973189275e-05, "loss": 0.4426, "step": 19466 }, { "epoch": 24.91776, "grad_norm": 1.2544835805892944, "learning_rate": 1.1080432172869149e-05, "loss": 0.5123, "step": 19467 }, { "epoch": 24.91904, "grad_norm": 1.1919646263122559, "learning_rate": 1.107843137254902e-05, "loss": 0.4854, "step": 19468 }, { "epoch": 24.92032, "grad_norm": 1.1021586656570435, "learning_rate": 1.1076430572228893e-05, "loss": 0.5155, "step": 19469 }, { "epoch": 24.9216, "grad_norm": 1.230746865272522, "learning_rate": 1.1074429771908763e-05, "loss": 0.5456, "step": 19470 }, { "epoch": 24.92288, "grad_norm": 1.0849162340164185, "learning_rate": 1.1072428971588636e-05, "loss": 0.4524, "step": 19471 }, { "epoch": 24.92416, "grad_norm": 1.1267224550247192, "learning_rate": 1.1070428171268508e-05, "loss": 0.4821, "step": 19472 }, { "epoch": 24.925440000000002, "grad_norm": 1.134163737297058, "learning_rate": 1.106842737094838e-05, "loss": 0.4881, "step": 19473 }, { "epoch": 24.92672, "grad_norm": 1.142830729484558, "learning_rate": 1.1066426570628252e-05, "loss": 0.5139, "step": 19474 }, { "epoch": 24.928, "grad_norm": 1.1527884006500244, "learning_rate": 1.1064425770308124e-05, "loss": 0.4652, "step": 19475 }, { "epoch": 24.92928, "grad_norm": 1.1849658489227295, "learning_rate": 1.1062424969987996e-05, "loss": 0.5047, "step": 19476 }, { "epoch": 24.93056, "grad_norm": 1.1960779428482056, "learning_rate": 1.1060424169667868e-05, "loss": 0.4927, "step": 19477 }, { "epoch": 24.93184, "grad_norm": 1.118734359741211, "learning_rate": 1.105842336934774e-05, "loss": 0.463, "step": 19478 }, { "epoch": 24.93312, "grad_norm": 1.1411648988723755, "learning_rate": 1.1056422569027611e-05, "loss": 0.4535, "step": 19479 }, { "epoch": 24.9344, "grad_norm": 1.1188914775848389, "learning_rate": 1.1054421768707483e-05, "loss": 0.4437, "step": 19480 }, { "epoch": 24.93568, "grad_norm": 1.1471185684204102, "learning_rate": 1.1052420968387355e-05, "loss": 0.4782, "step": 19481 }, { "epoch": 24.93696, "grad_norm": 1.2301186323165894, "learning_rate": 1.1050420168067227e-05, "loss": 0.4759, "step": 19482 }, { "epoch": 24.93824, "grad_norm": 1.175561547279358, "learning_rate": 1.1048419367747099e-05, "loss": 0.4798, "step": 19483 }, { "epoch": 24.93952, "grad_norm": 1.172258734703064, "learning_rate": 1.104641856742697e-05, "loss": 0.465, "step": 19484 }, { "epoch": 24.9408, "grad_norm": 1.1110254526138306, "learning_rate": 1.1044417767106842e-05, "loss": 0.4597, "step": 19485 }, { "epoch": 24.94208, "grad_norm": 1.0964692831039429, "learning_rate": 1.1042416966786716e-05, "loss": 0.4654, "step": 19486 }, { "epoch": 24.94336, "grad_norm": 1.187997579574585, "learning_rate": 1.1040416166466586e-05, "loss": 0.4786, "step": 19487 }, { "epoch": 24.94464, "grad_norm": 1.1386297941207886, "learning_rate": 1.103841536614646e-05, "loss": 0.4923, "step": 19488 }, { "epoch": 24.94592, "grad_norm": 1.0917891263961792, "learning_rate": 1.103641456582633e-05, "loss": 0.4856, "step": 19489 }, { "epoch": 24.9472, "grad_norm": 1.1903581619262695, "learning_rate": 1.1034413765506203e-05, "loss": 0.5102, "step": 19490 }, { "epoch": 24.94848, "grad_norm": 1.118826985359192, "learning_rate": 1.1032412965186074e-05, "loss": 0.4864, "step": 19491 }, { "epoch": 24.94976, "grad_norm": 1.0741007328033447, "learning_rate": 1.1030412164865947e-05, "loss": 0.4401, "step": 19492 }, { "epoch": 24.95104, "grad_norm": 1.154763102531433, "learning_rate": 1.1028411364545819e-05, "loss": 0.493, "step": 19493 }, { "epoch": 24.95232, "grad_norm": 1.167617678642273, "learning_rate": 1.1026410564225691e-05, "loss": 0.4935, "step": 19494 }, { "epoch": 24.9536, "grad_norm": 1.1910767555236816, "learning_rate": 1.1024409763905563e-05, "loss": 0.5077, "step": 19495 }, { "epoch": 24.95488, "grad_norm": 1.1297580003738403, "learning_rate": 1.1022408963585435e-05, "loss": 0.5074, "step": 19496 }, { "epoch": 24.95616, "grad_norm": 1.1995117664337158, "learning_rate": 1.1020408163265306e-05, "loss": 0.4817, "step": 19497 }, { "epoch": 24.95744, "grad_norm": 1.1976964473724365, "learning_rate": 1.1018407362945178e-05, "loss": 0.5004, "step": 19498 }, { "epoch": 24.95872, "grad_norm": 1.1336443424224854, "learning_rate": 1.101640656262505e-05, "loss": 0.46, "step": 19499 }, { "epoch": 24.96, "grad_norm": 1.1351187229156494, "learning_rate": 1.1014405762304922e-05, "loss": 0.4802, "step": 19500 }, { "epoch": 24.96128, "grad_norm": 1.1822195053100586, "learning_rate": 1.1012404961984794e-05, "loss": 0.5001, "step": 19501 }, { "epoch": 24.96256, "grad_norm": 1.1518561840057373, "learning_rate": 1.1010404161664667e-05, "loss": 0.4872, "step": 19502 }, { "epoch": 24.96384, "grad_norm": 1.2118054628372192, "learning_rate": 1.1008403361344538e-05, "loss": 0.4995, "step": 19503 }, { "epoch": 24.96512, "grad_norm": 1.1399140357971191, "learning_rate": 1.1006402561024411e-05, "loss": 0.4491, "step": 19504 }, { "epoch": 24.9664, "grad_norm": 1.1353464126586914, "learning_rate": 1.1004401760704281e-05, "loss": 0.4953, "step": 19505 }, { "epoch": 24.96768, "grad_norm": 1.1443747282028198, "learning_rate": 1.1002400960384155e-05, "loss": 0.4445, "step": 19506 }, { "epoch": 24.96896, "grad_norm": 1.1765371561050415, "learning_rate": 1.1000400160064025e-05, "loss": 0.5095, "step": 19507 }, { "epoch": 24.97024, "grad_norm": 1.1671823263168335, "learning_rate": 1.0998399359743899e-05, "loss": 0.5449, "step": 19508 }, { "epoch": 24.97152, "grad_norm": 1.2038657665252686, "learning_rate": 1.099639855942377e-05, "loss": 0.4931, "step": 19509 }, { "epoch": 24.9728, "grad_norm": 1.1690441370010376, "learning_rate": 1.0994397759103642e-05, "loss": 0.4586, "step": 19510 }, { "epoch": 24.97408, "grad_norm": 1.184372901916504, "learning_rate": 1.0992396958783514e-05, "loss": 0.4714, "step": 19511 }, { "epoch": 24.97536, "grad_norm": 1.1089961528778076, "learning_rate": 1.0990396158463386e-05, "loss": 0.4356, "step": 19512 }, { "epoch": 24.97664, "grad_norm": 1.1306204795837402, "learning_rate": 1.0988395358143258e-05, "loss": 0.4918, "step": 19513 }, { "epoch": 24.97792, "grad_norm": 1.166355013847351, "learning_rate": 1.098639455782313e-05, "loss": 0.5125, "step": 19514 }, { "epoch": 24.9792, "grad_norm": 1.1938666105270386, "learning_rate": 1.0984393757503002e-05, "loss": 0.5012, "step": 19515 }, { "epoch": 24.98048, "grad_norm": 1.2338180541992188, "learning_rate": 1.0982392957182875e-05, "loss": 0.5217, "step": 19516 }, { "epoch": 24.98176, "grad_norm": 1.154515027999878, "learning_rate": 1.0980392156862745e-05, "loss": 0.497, "step": 19517 }, { "epoch": 24.98304, "grad_norm": 1.1719876527786255, "learning_rate": 1.0978391356542619e-05, "loss": 0.4643, "step": 19518 }, { "epoch": 24.98432, "grad_norm": 1.1815974712371826, "learning_rate": 1.0976390556222489e-05, "loss": 0.5102, "step": 19519 }, { "epoch": 24.9856, "grad_norm": 1.095108985900879, "learning_rate": 1.0974389755902363e-05, "loss": 0.4735, "step": 19520 }, { "epoch": 24.98688, "grad_norm": 1.1639820337295532, "learning_rate": 1.0972388955582233e-05, "loss": 0.4904, "step": 19521 }, { "epoch": 24.98816, "grad_norm": 1.1341108083724976, "learning_rate": 1.0970388155262106e-05, "loss": 0.4568, "step": 19522 }, { "epoch": 24.98944, "grad_norm": 1.2175263166427612, "learning_rate": 1.0968387354941978e-05, "loss": 0.4845, "step": 19523 }, { "epoch": 24.99072, "grad_norm": 1.1433970928192139, "learning_rate": 1.096638655462185e-05, "loss": 0.4664, "step": 19524 }, { "epoch": 24.992, "grad_norm": 1.1078377962112427, "learning_rate": 1.0964385754301722e-05, "loss": 0.4772, "step": 19525 }, { "epoch": 24.99328, "grad_norm": 1.181159257888794, "learning_rate": 1.0962384953981594e-05, "loss": 0.4969, "step": 19526 }, { "epoch": 24.99456, "grad_norm": 1.0896167755126953, "learning_rate": 1.0960384153661466e-05, "loss": 0.4691, "step": 19527 }, { "epoch": 24.99584, "grad_norm": 1.1512247323989868, "learning_rate": 1.0958383353341337e-05, "loss": 0.4367, "step": 19528 }, { "epoch": 24.99712, "grad_norm": 1.1622196435928345, "learning_rate": 1.095638255302121e-05, "loss": 0.4856, "step": 19529 }, { "epoch": 24.9984, "grad_norm": 1.1247361898422241, "learning_rate": 1.0954381752701081e-05, "loss": 0.4831, "step": 19530 }, { "epoch": 24.99968, "grad_norm": 1.0839389562606812, "learning_rate": 1.0952380952380953e-05, "loss": 0.4514, "step": 19531 }, { "epoch": 25.00096, "grad_norm": Infinity, "learning_rate": 1.0952380952380953e-05, "loss": 0.8962, "step": 19532 }, { "epoch": 25.00224, "grad_norm": 1.1649938821792603, "learning_rate": 1.0950380152060825e-05, "loss": 0.4679, "step": 19533 }, { "epoch": 25.00352, "grad_norm": 1.159871220588684, "learning_rate": 1.0948379351740697e-05, "loss": 0.4861, "step": 19534 }, { "epoch": 25.0048, "grad_norm": 1.1205157041549683, "learning_rate": 1.0946378551420569e-05, "loss": 0.4473, "step": 19535 }, { "epoch": 25.00608, "grad_norm": 1.156924843788147, "learning_rate": 1.094437775110044e-05, "loss": 0.4897, "step": 19536 }, { "epoch": 25.00736, "grad_norm": 1.0987516641616821, "learning_rate": 1.0942376950780312e-05, "loss": 0.4423, "step": 19537 }, { "epoch": 25.00864, "grad_norm": 1.0584564208984375, "learning_rate": 1.0940376150460184e-05, "loss": 0.4282, "step": 19538 }, { "epoch": 25.00992, "grad_norm": 1.0948853492736816, "learning_rate": 1.0938375350140056e-05, "loss": 0.453, "step": 19539 }, { "epoch": 25.0112, "grad_norm": 1.1634764671325684, "learning_rate": 1.093637454981993e-05, "loss": 0.4743, "step": 19540 }, { "epoch": 25.01248, "grad_norm": 1.087397575378418, "learning_rate": 1.09343737494998e-05, "loss": 0.4398, "step": 19541 }, { "epoch": 25.01376, "grad_norm": 1.1633117198944092, "learning_rate": 1.0932372949179673e-05, "loss": 0.4746, "step": 19542 }, { "epoch": 25.01504, "grad_norm": 1.2032607793807983, "learning_rate": 1.0930372148859543e-05, "loss": 0.4951, "step": 19543 }, { "epoch": 25.01632, "grad_norm": 1.1877236366271973, "learning_rate": 1.0928371348539417e-05, "loss": 0.5303, "step": 19544 }, { "epoch": 25.0176, "grad_norm": 1.0547975301742554, "learning_rate": 1.0926370548219287e-05, "loss": 0.4098, "step": 19545 }, { "epoch": 25.01888, "grad_norm": 1.1014307737350464, "learning_rate": 1.092436974789916e-05, "loss": 0.4382, "step": 19546 }, { "epoch": 25.02016, "grad_norm": 1.15842866897583, "learning_rate": 1.0922368947579032e-05, "loss": 0.4791, "step": 19547 }, { "epoch": 25.02144, "grad_norm": 1.2171627283096313, "learning_rate": 1.0920368147258904e-05, "loss": 0.4589, "step": 19548 }, { "epoch": 25.02272, "grad_norm": 1.2597172260284424, "learning_rate": 1.0918367346938776e-05, "loss": 0.4976, "step": 19549 }, { "epoch": 25.024, "grad_norm": 1.1223315000534058, "learning_rate": 1.0916366546618648e-05, "loss": 0.4765, "step": 19550 }, { "epoch": 25.02528, "grad_norm": 1.0641728639602661, "learning_rate": 1.091436574629852e-05, "loss": 0.4378, "step": 19551 }, { "epoch": 25.02656, "grad_norm": 1.0940122604370117, "learning_rate": 1.0912364945978392e-05, "loss": 0.4407, "step": 19552 }, { "epoch": 25.02784, "grad_norm": 1.1914441585540771, "learning_rate": 1.0910364145658264e-05, "loss": 0.4541, "step": 19553 }, { "epoch": 25.02912, "grad_norm": 1.1707878112792969, "learning_rate": 1.0908363345338135e-05, "loss": 0.4925, "step": 19554 }, { "epoch": 25.0304, "grad_norm": 1.0737802982330322, "learning_rate": 1.0906362545018007e-05, "loss": 0.4441, "step": 19555 }, { "epoch": 25.03168, "grad_norm": 1.1500251293182373, "learning_rate": 1.0904361744697881e-05, "loss": 0.4752, "step": 19556 }, { "epoch": 25.03296, "grad_norm": 1.1668425798416138, "learning_rate": 1.0902360944377751e-05, "loss": 0.4919, "step": 19557 }, { "epoch": 25.03424, "grad_norm": 1.0954525470733643, "learning_rate": 1.0900360144057625e-05, "loss": 0.4363, "step": 19558 }, { "epoch": 25.03552, "grad_norm": 1.1638271808624268, "learning_rate": 1.0898359343737495e-05, "loss": 0.488, "step": 19559 }, { "epoch": 25.0368, "grad_norm": 1.1640788316726685, "learning_rate": 1.0896358543417368e-05, "loss": 0.4723, "step": 19560 }, { "epoch": 25.03808, "grad_norm": 1.1712232828140259, "learning_rate": 1.0894357743097238e-05, "loss": 0.4883, "step": 19561 }, { "epoch": 25.03936, "grad_norm": 1.1707978248596191, "learning_rate": 1.0892356942777112e-05, "loss": 0.5356, "step": 19562 }, { "epoch": 25.04064, "grad_norm": 1.095038890838623, "learning_rate": 1.0890356142456984e-05, "loss": 0.4394, "step": 19563 }, { "epoch": 25.04192, "grad_norm": 1.108280062675476, "learning_rate": 1.0888355342136856e-05, "loss": 0.4608, "step": 19564 }, { "epoch": 25.0432, "grad_norm": 1.1268192529678345, "learning_rate": 1.0886354541816728e-05, "loss": 0.4504, "step": 19565 }, { "epoch": 25.04448, "grad_norm": 1.1444103717803955, "learning_rate": 1.08843537414966e-05, "loss": 0.4217, "step": 19566 }, { "epoch": 25.04576, "grad_norm": 1.1157702207565308, "learning_rate": 1.0882352941176471e-05, "loss": 0.4607, "step": 19567 }, { "epoch": 25.04704, "grad_norm": 1.1346677541732788, "learning_rate": 1.0880352140856343e-05, "loss": 0.4446, "step": 19568 }, { "epoch": 25.04832, "grad_norm": 1.1954710483551025, "learning_rate": 1.0878351340536215e-05, "loss": 0.4784, "step": 19569 }, { "epoch": 25.0496, "grad_norm": 1.1586507558822632, "learning_rate": 1.0876350540216087e-05, "loss": 0.457, "step": 19570 }, { "epoch": 25.05088, "grad_norm": 1.112011432647705, "learning_rate": 1.0874349739895959e-05, "loss": 0.4386, "step": 19571 }, { "epoch": 25.05216, "grad_norm": 1.1521170139312744, "learning_rate": 1.087234893957583e-05, "loss": 0.4442, "step": 19572 }, { "epoch": 25.05344, "grad_norm": 1.1750129461288452, "learning_rate": 1.0870348139255702e-05, "loss": 0.4451, "step": 19573 }, { "epoch": 25.05472, "grad_norm": 1.1142715215682983, "learning_rate": 1.0868347338935574e-05, "loss": 0.4397, "step": 19574 }, { "epoch": 25.056, "grad_norm": 1.1580311059951782, "learning_rate": 1.0866346538615446e-05, "loss": 0.5067, "step": 19575 }, { "epoch": 25.05728, "grad_norm": 1.1484664678573608, "learning_rate": 1.0864345738295318e-05, "loss": 0.461, "step": 19576 }, { "epoch": 25.05856, "grad_norm": 1.169391393661499, "learning_rate": 1.0862344937975192e-05, "loss": 0.4694, "step": 19577 }, { "epoch": 25.05984, "grad_norm": 1.1013165712356567, "learning_rate": 1.0860344137655062e-05, "loss": 0.4279, "step": 19578 }, { "epoch": 25.06112, "grad_norm": 1.248039960861206, "learning_rate": 1.0858343337334935e-05, "loss": 0.4867, "step": 19579 }, { "epoch": 25.0624, "grad_norm": 1.119541883468628, "learning_rate": 1.0856342537014805e-05, "loss": 0.455, "step": 19580 }, { "epoch": 25.06368, "grad_norm": 1.1457856893539429, "learning_rate": 1.0854341736694679e-05, "loss": 0.4475, "step": 19581 }, { "epoch": 25.06496, "grad_norm": 1.1471192836761475, "learning_rate": 1.085234093637455e-05, "loss": 0.4766, "step": 19582 }, { "epoch": 25.06624, "grad_norm": 1.1262675523757935, "learning_rate": 1.0850340136054423e-05, "loss": 0.4753, "step": 19583 }, { "epoch": 25.06752, "grad_norm": 1.182495355606079, "learning_rate": 1.0848339335734293e-05, "loss": 0.4704, "step": 19584 }, { "epoch": 25.0688, "grad_norm": 1.0830098390579224, "learning_rate": 1.0846338535414166e-05, "loss": 0.4645, "step": 19585 }, { "epoch": 25.07008, "grad_norm": 1.1021302938461304, "learning_rate": 1.0844337735094038e-05, "loss": 0.4325, "step": 19586 }, { "epoch": 25.07136, "grad_norm": 1.192583441734314, "learning_rate": 1.084233693477391e-05, "loss": 0.5081, "step": 19587 }, { "epoch": 25.07264, "grad_norm": 1.1611496210098267, "learning_rate": 1.0840336134453782e-05, "loss": 0.4763, "step": 19588 }, { "epoch": 25.07392, "grad_norm": 1.1356542110443115, "learning_rate": 1.0838335334133654e-05, "loss": 0.4435, "step": 19589 }, { "epoch": 25.0752, "grad_norm": 1.1349308490753174, "learning_rate": 1.0836334533813526e-05, "loss": 0.4455, "step": 19590 }, { "epoch": 25.07648, "grad_norm": 1.1213163137435913, "learning_rate": 1.0834333733493398e-05, "loss": 0.4538, "step": 19591 }, { "epoch": 25.07776, "grad_norm": 1.151708960533142, "learning_rate": 1.083233293317327e-05, "loss": 0.4627, "step": 19592 }, { "epoch": 25.07904, "grad_norm": 1.1179898977279663, "learning_rate": 1.0830332132853143e-05, "loss": 0.4877, "step": 19593 }, { "epoch": 25.08032, "grad_norm": 1.1163991689682007, "learning_rate": 1.0828331332533013e-05, "loss": 0.4423, "step": 19594 }, { "epoch": 25.0816, "grad_norm": 1.1171714067459106, "learning_rate": 1.0826330532212887e-05, "loss": 0.4341, "step": 19595 }, { "epoch": 25.08288, "grad_norm": 1.216645359992981, "learning_rate": 1.0824329731892757e-05, "loss": 0.4985, "step": 19596 }, { "epoch": 25.08416, "grad_norm": 1.1512889862060547, "learning_rate": 1.082232893157263e-05, "loss": 0.479, "step": 19597 }, { "epoch": 25.08544, "grad_norm": 1.210100531578064, "learning_rate": 1.08203281312525e-05, "loss": 0.476, "step": 19598 }, { "epoch": 25.08672, "grad_norm": 1.1688969135284424, "learning_rate": 1.0818327330932374e-05, "loss": 0.4753, "step": 19599 }, { "epoch": 25.088, "grad_norm": 1.1013729572296143, "learning_rate": 1.0816326530612246e-05, "loss": 0.4628, "step": 19600 }, { "epoch": 25.08928, "grad_norm": 1.096455454826355, "learning_rate": 1.0814325730292118e-05, "loss": 0.4244, "step": 19601 }, { "epoch": 25.09056, "grad_norm": 1.1417826414108276, "learning_rate": 1.081232492997199e-05, "loss": 0.4263, "step": 19602 }, { "epoch": 25.09184, "grad_norm": 1.2026113271713257, "learning_rate": 1.0810324129651862e-05, "loss": 0.4583, "step": 19603 }, { "epoch": 25.09312, "grad_norm": 1.1624215841293335, "learning_rate": 1.0808323329331733e-05, "loss": 0.464, "step": 19604 }, { "epoch": 25.0944, "grad_norm": 1.2777810096740723, "learning_rate": 1.0806322529011605e-05, "loss": 0.486, "step": 19605 }, { "epoch": 25.09568, "grad_norm": 1.1731245517730713, "learning_rate": 1.0804321728691477e-05, "loss": 0.4465, "step": 19606 }, { "epoch": 25.09696, "grad_norm": 1.1680535078048706, "learning_rate": 1.0802320928371349e-05, "loss": 0.4408, "step": 19607 }, { "epoch": 25.09824, "grad_norm": 1.1202332973480225, "learning_rate": 1.080032012805122e-05, "loss": 0.4328, "step": 19608 }, { "epoch": 25.09952, "grad_norm": 1.1760560274124146, "learning_rate": 1.0798319327731093e-05, "loss": 0.4216, "step": 19609 }, { "epoch": 25.1008, "grad_norm": 1.2153133153915405, "learning_rate": 1.0796318527410965e-05, "loss": 0.5195, "step": 19610 }, { "epoch": 25.10208, "grad_norm": 1.2274070978164673, "learning_rate": 1.0794317727090836e-05, "loss": 0.4786, "step": 19611 }, { "epoch": 25.10336, "grad_norm": 1.141127109527588, "learning_rate": 1.0792316926770708e-05, "loss": 0.4644, "step": 19612 }, { "epoch": 25.10464, "grad_norm": 1.1838977336883545, "learning_rate": 1.079031612645058e-05, "loss": 0.494, "step": 19613 }, { "epoch": 25.10592, "grad_norm": 1.1999531984329224, "learning_rate": 1.0788315326130452e-05, "loss": 0.52, "step": 19614 }, { "epoch": 25.1072, "grad_norm": 1.1070973873138428, "learning_rate": 1.0786314525810324e-05, "loss": 0.4395, "step": 19615 }, { "epoch": 25.10848, "grad_norm": 1.1713796854019165, "learning_rate": 1.0784313725490197e-05, "loss": 0.4757, "step": 19616 }, { "epoch": 25.10976, "grad_norm": 1.1294423341751099, "learning_rate": 1.0782312925170068e-05, "loss": 0.4865, "step": 19617 }, { "epoch": 25.11104, "grad_norm": 1.1077033281326294, "learning_rate": 1.0780312124849941e-05, "loss": 0.4506, "step": 19618 }, { "epoch": 25.11232, "grad_norm": 1.2054367065429688, "learning_rate": 1.0778311324529811e-05, "loss": 0.4717, "step": 19619 }, { "epoch": 25.1136, "grad_norm": 1.1735788583755493, "learning_rate": 1.0776310524209685e-05, "loss": 0.479, "step": 19620 }, { "epoch": 25.11488, "grad_norm": 1.128080129623413, "learning_rate": 1.0774309723889555e-05, "loss": 0.4248, "step": 19621 }, { "epoch": 25.11616, "grad_norm": 1.1419153213500977, "learning_rate": 1.0772308923569429e-05, "loss": 0.4552, "step": 19622 }, { "epoch": 25.11744, "grad_norm": 1.2297865152359009, "learning_rate": 1.07703081232493e-05, "loss": 0.4826, "step": 19623 }, { "epoch": 25.11872, "grad_norm": 1.1833217144012451, "learning_rate": 1.0768307322929172e-05, "loss": 0.521, "step": 19624 }, { "epoch": 25.12, "grad_norm": 1.152848243713379, "learning_rate": 1.0766306522609044e-05, "loss": 0.5335, "step": 19625 }, { "epoch": 25.12128, "grad_norm": 1.1874018907546997, "learning_rate": 1.0764305722288916e-05, "loss": 0.4737, "step": 19626 }, { "epoch": 25.12256, "grad_norm": 1.2371412515640259, "learning_rate": 1.0762304921968788e-05, "loss": 0.4999, "step": 19627 }, { "epoch": 25.12384, "grad_norm": 1.167389988899231, "learning_rate": 1.076030412164866e-05, "loss": 0.4559, "step": 19628 }, { "epoch": 25.12512, "grad_norm": 1.1866403818130493, "learning_rate": 1.0758303321328532e-05, "loss": 0.4684, "step": 19629 }, { "epoch": 25.1264, "grad_norm": 1.1500517129898071, "learning_rate": 1.0756302521008405e-05, "loss": 0.4702, "step": 19630 }, { "epoch": 25.12768, "grad_norm": 1.1612963676452637, "learning_rate": 1.0754301720688275e-05, "loss": 0.4909, "step": 19631 }, { "epoch": 25.12896, "grad_norm": 1.1947745084762573, "learning_rate": 1.0752300920368149e-05, "loss": 0.4601, "step": 19632 }, { "epoch": 25.13024, "grad_norm": 1.2526357173919678, "learning_rate": 1.0750300120048019e-05, "loss": 0.4815, "step": 19633 }, { "epoch": 25.13152, "grad_norm": 1.1006672382354736, "learning_rate": 1.0748299319727893e-05, "loss": 0.4635, "step": 19634 }, { "epoch": 25.1328, "grad_norm": 1.0949301719665527, "learning_rate": 1.0746298519407763e-05, "loss": 0.4385, "step": 19635 }, { "epoch": 25.13408, "grad_norm": 1.2405381202697754, "learning_rate": 1.0744297719087636e-05, "loss": 0.5009, "step": 19636 }, { "epoch": 25.13536, "grad_norm": 1.1082509756088257, "learning_rate": 1.0742296918767508e-05, "loss": 0.4463, "step": 19637 }, { "epoch": 25.13664, "grad_norm": 1.1167351007461548, "learning_rate": 1.074029611844738e-05, "loss": 0.4892, "step": 19638 }, { "epoch": 25.13792, "grad_norm": 1.158218264579773, "learning_rate": 1.0738295318127252e-05, "loss": 0.4539, "step": 19639 }, { "epoch": 25.1392, "grad_norm": 1.2034755945205688, "learning_rate": 1.0736294517807124e-05, "loss": 0.4988, "step": 19640 }, { "epoch": 25.14048, "grad_norm": 1.1195570230484009, "learning_rate": 1.0734293717486996e-05, "loss": 0.4505, "step": 19641 }, { "epoch": 25.14176, "grad_norm": 1.1278491020202637, "learning_rate": 1.0732292917166867e-05, "loss": 0.4631, "step": 19642 }, { "epoch": 25.14304, "grad_norm": 1.1615363359451294, "learning_rate": 1.073029211684674e-05, "loss": 0.4845, "step": 19643 }, { "epoch": 25.14432, "grad_norm": 1.1552554368972778, "learning_rate": 1.0728291316526611e-05, "loss": 0.4543, "step": 19644 }, { "epoch": 25.1456, "grad_norm": 1.1733779907226562, "learning_rate": 1.0726290516206483e-05, "loss": 0.5251, "step": 19645 }, { "epoch": 25.14688, "grad_norm": 1.2017656564712524, "learning_rate": 1.0724289715886355e-05, "loss": 0.5293, "step": 19646 }, { "epoch": 25.14816, "grad_norm": 1.1338574886322021, "learning_rate": 1.0722288915566227e-05, "loss": 0.4493, "step": 19647 }, { "epoch": 25.14944, "grad_norm": 1.1379152536392212, "learning_rate": 1.0720288115246099e-05, "loss": 0.4381, "step": 19648 }, { "epoch": 25.15072, "grad_norm": 1.1826634407043457, "learning_rate": 1.071828731492597e-05, "loss": 0.4921, "step": 19649 }, { "epoch": 25.152, "grad_norm": 1.1363234519958496, "learning_rate": 1.0716286514605842e-05, "loss": 0.4593, "step": 19650 }, { "epoch": 25.15328, "grad_norm": 1.157832384109497, "learning_rate": 1.0714285714285714e-05, "loss": 0.456, "step": 19651 }, { "epoch": 25.15456, "grad_norm": 1.1512408256530762, "learning_rate": 1.0712284913965586e-05, "loss": 0.4946, "step": 19652 }, { "epoch": 25.15584, "grad_norm": 1.2528743743896484, "learning_rate": 1.071028411364546e-05, "loss": 0.5087, "step": 19653 }, { "epoch": 25.15712, "grad_norm": 1.1764575242996216, "learning_rate": 1.070828331332533e-05, "loss": 0.4661, "step": 19654 }, { "epoch": 25.1584, "grad_norm": 1.1859891414642334, "learning_rate": 1.0706282513005203e-05, "loss": 0.4623, "step": 19655 }, { "epoch": 25.15968, "grad_norm": 1.1567497253417969, "learning_rate": 1.0704281712685073e-05, "loss": 0.4522, "step": 19656 }, { "epoch": 25.16096, "grad_norm": 1.191056489944458, "learning_rate": 1.0702280912364947e-05, "loss": 0.4735, "step": 19657 }, { "epoch": 25.16224, "grad_norm": 1.1542645692825317, "learning_rate": 1.0700280112044817e-05, "loss": 0.4693, "step": 19658 }, { "epoch": 25.16352, "grad_norm": 1.1220331192016602, "learning_rate": 1.069827931172469e-05, "loss": 0.4753, "step": 19659 }, { "epoch": 25.1648, "grad_norm": 1.1264070272445679, "learning_rate": 1.0696278511404562e-05, "loss": 0.4709, "step": 19660 }, { "epoch": 25.16608, "grad_norm": 1.16756272315979, "learning_rate": 1.0694277711084434e-05, "loss": 0.4573, "step": 19661 }, { "epoch": 25.16736, "grad_norm": 1.1602298021316528, "learning_rate": 1.0692276910764306e-05, "loss": 0.4363, "step": 19662 }, { "epoch": 25.16864, "grad_norm": 1.1670148372650146, "learning_rate": 1.0690276110444178e-05, "loss": 0.4925, "step": 19663 }, { "epoch": 25.16992, "grad_norm": 1.2356739044189453, "learning_rate": 1.068827531012405e-05, "loss": 0.4923, "step": 19664 }, { "epoch": 25.1712, "grad_norm": 1.2018803358078003, "learning_rate": 1.0686274509803922e-05, "loss": 0.4857, "step": 19665 }, { "epoch": 25.17248, "grad_norm": 1.1678217649459839, "learning_rate": 1.0684273709483794e-05, "loss": 0.4493, "step": 19666 }, { "epoch": 25.17376, "grad_norm": 1.1933562755584717, "learning_rate": 1.0682272909163665e-05, "loss": 0.4712, "step": 19667 }, { "epoch": 25.17504, "grad_norm": 1.1472783088684082, "learning_rate": 1.0680272108843537e-05, "loss": 0.4889, "step": 19668 }, { "epoch": 25.17632, "grad_norm": 1.113418698310852, "learning_rate": 1.0678271308523411e-05, "loss": 0.4455, "step": 19669 }, { "epoch": 25.1776, "grad_norm": 1.2086471319198608, "learning_rate": 1.0676270508203281e-05, "loss": 0.4709, "step": 19670 }, { "epoch": 25.17888, "grad_norm": 1.0935802459716797, "learning_rate": 1.0674269707883155e-05, "loss": 0.4329, "step": 19671 }, { "epoch": 25.18016, "grad_norm": 1.1935173273086548, "learning_rate": 1.0672268907563025e-05, "loss": 0.4988, "step": 19672 }, { "epoch": 25.18144, "grad_norm": 1.189258337020874, "learning_rate": 1.0670268107242898e-05, "loss": 0.4994, "step": 19673 }, { "epoch": 25.18272, "grad_norm": 1.2858033180236816, "learning_rate": 1.0668267306922768e-05, "loss": 0.5511, "step": 19674 }, { "epoch": 25.184, "grad_norm": 1.2078543901443481, "learning_rate": 1.0666266506602642e-05, "loss": 0.4853, "step": 19675 }, { "epoch": 25.18528, "grad_norm": 1.1853865385055542, "learning_rate": 1.0664265706282514e-05, "loss": 0.475, "step": 19676 }, { "epoch": 25.18656, "grad_norm": 1.1798455715179443, "learning_rate": 1.0662264905962386e-05, "loss": 0.5103, "step": 19677 }, { "epoch": 25.18784, "grad_norm": 1.1571396589279175, "learning_rate": 1.0660264105642258e-05, "loss": 0.4582, "step": 19678 }, { "epoch": 25.18912, "grad_norm": 1.178780198097229, "learning_rate": 1.065826330532213e-05, "loss": 0.5142, "step": 19679 }, { "epoch": 25.1904, "grad_norm": 1.1873482465744019, "learning_rate": 1.0656262505002001e-05, "loss": 0.4801, "step": 19680 }, { "epoch": 25.19168, "grad_norm": 1.1041178703308105, "learning_rate": 1.0654261704681873e-05, "loss": 0.4445, "step": 19681 }, { "epoch": 25.19296, "grad_norm": 1.1332768201828003, "learning_rate": 1.0652260904361745e-05, "loss": 0.444, "step": 19682 }, { "epoch": 25.19424, "grad_norm": 1.1968101263046265, "learning_rate": 1.0650260104041619e-05, "loss": 0.5094, "step": 19683 }, { "epoch": 25.19552, "grad_norm": 1.158816933631897, "learning_rate": 1.0648259303721489e-05, "loss": 0.4494, "step": 19684 }, { "epoch": 25.1968, "grad_norm": 1.192723274230957, "learning_rate": 1.0646258503401362e-05, "loss": 0.4797, "step": 19685 }, { "epoch": 25.19808, "grad_norm": 1.202081561088562, "learning_rate": 1.0644257703081232e-05, "loss": 0.441, "step": 19686 }, { "epoch": 25.19936, "grad_norm": 1.2185006141662598, "learning_rate": 1.0642256902761106e-05, "loss": 0.4857, "step": 19687 }, { "epoch": 25.20064, "grad_norm": 1.2291009426116943, "learning_rate": 1.0640256102440976e-05, "loss": 0.4885, "step": 19688 }, { "epoch": 25.20192, "grad_norm": 1.1647063493728638, "learning_rate": 1.063825530212085e-05, "loss": 0.4644, "step": 19689 }, { "epoch": 25.2032, "grad_norm": 1.197346806526184, "learning_rate": 1.0636254501800722e-05, "loss": 0.5212, "step": 19690 }, { "epoch": 25.20448, "grad_norm": 1.0721224546432495, "learning_rate": 1.0634253701480593e-05, "loss": 0.4732, "step": 19691 }, { "epoch": 25.20576, "grad_norm": 1.1543662548065186, "learning_rate": 1.0632252901160465e-05, "loss": 0.4853, "step": 19692 }, { "epoch": 25.20704, "grad_norm": 1.0685230493545532, "learning_rate": 1.0630252100840337e-05, "loss": 0.4494, "step": 19693 }, { "epoch": 25.20832, "grad_norm": 1.1875240802764893, "learning_rate": 1.0628251300520209e-05, "loss": 0.4819, "step": 19694 }, { "epoch": 25.209600000000002, "grad_norm": 1.1269207000732422, "learning_rate": 1.0626250500200081e-05, "loss": 0.4352, "step": 19695 }, { "epoch": 25.21088, "grad_norm": 1.164787769317627, "learning_rate": 1.0624249699879953e-05, "loss": 0.4917, "step": 19696 }, { "epoch": 25.21216, "grad_norm": 1.168983817100525, "learning_rate": 1.0622248899559825e-05, "loss": 0.454, "step": 19697 }, { "epoch": 25.21344, "grad_norm": 1.1204594373703003, "learning_rate": 1.0620248099239696e-05, "loss": 0.428, "step": 19698 }, { "epoch": 25.21472, "grad_norm": 1.1424245834350586, "learning_rate": 1.0618247298919568e-05, "loss": 0.441, "step": 19699 }, { "epoch": 25.216, "grad_norm": 1.2119210958480835, "learning_rate": 1.061624649859944e-05, "loss": 0.4883, "step": 19700 }, { "epoch": 25.21728, "grad_norm": 1.1396766901016235, "learning_rate": 1.0614245698279312e-05, "loss": 0.4449, "step": 19701 }, { "epoch": 25.21856, "grad_norm": 1.164551019668579, "learning_rate": 1.0612244897959184e-05, "loss": 0.466, "step": 19702 }, { "epoch": 25.21984, "grad_norm": 1.1590672731399536, "learning_rate": 1.0610244097639056e-05, "loss": 0.4948, "step": 19703 }, { "epoch": 25.22112, "grad_norm": 1.2192983627319336, "learning_rate": 1.0608243297318928e-05, "loss": 0.463, "step": 19704 }, { "epoch": 25.2224, "grad_norm": 1.1451095342636108, "learning_rate": 1.06062424969988e-05, "loss": 0.4541, "step": 19705 }, { "epoch": 25.22368, "grad_norm": 1.203220009803772, "learning_rate": 1.0604241696678673e-05, "loss": 0.4684, "step": 19706 }, { "epoch": 25.22496, "grad_norm": 1.1325691938400269, "learning_rate": 1.0602240896358543e-05, "loss": 0.5005, "step": 19707 }, { "epoch": 25.22624, "grad_norm": 1.106677532196045, "learning_rate": 1.0600240096038417e-05, "loss": 0.5002, "step": 19708 }, { "epoch": 25.22752, "grad_norm": 1.176331639289856, "learning_rate": 1.0598239295718287e-05, "loss": 0.4872, "step": 19709 }, { "epoch": 25.2288, "grad_norm": 1.1356264352798462, "learning_rate": 1.059623849539816e-05, "loss": 0.4681, "step": 19710 }, { "epoch": 25.23008, "grad_norm": 1.0752949714660645, "learning_rate": 1.059423769507803e-05, "loss": 0.4497, "step": 19711 }, { "epoch": 25.23136, "grad_norm": 1.1183545589447021, "learning_rate": 1.0592236894757904e-05, "loss": 0.4709, "step": 19712 }, { "epoch": 25.23264, "grad_norm": 1.1348707675933838, "learning_rate": 1.0590236094437776e-05, "loss": 0.4373, "step": 19713 }, { "epoch": 25.23392, "grad_norm": 1.201171636581421, "learning_rate": 1.0588235294117648e-05, "loss": 0.4347, "step": 19714 }, { "epoch": 25.2352, "grad_norm": 1.2042893171310425, "learning_rate": 1.058623449379752e-05, "loss": 0.4988, "step": 19715 }, { "epoch": 25.23648, "grad_norm": 1.172882318496704, "learning_rate": 1.0584233693477392e-05, "loss": 0.4931, "step": 19716 }, { "epoch": 25.23776, "grad_norm": 1.1809163093566895, "learning_rate": 1.0582232893157263e-05, "loss": 0.463, "step": 19717 }, { "epoch": 25.23904, "grad_norm": 1.2052890062332153, "learning_rate": 1.0580232092837135e-05, "loss": 0.487, "step": 19718 }, { "epoch": 25.24032, "grad_norm": 1.1632746458053589, "learning_rate": 1.0578231292517007e-05, "loss": 0.4584, "step": 19719 }, { "epoch": 25.2416, "grad_norm": 1.1408315896987915, "learning_rate": 1.057623049219688e-05, "loss": 0.4565, "step": 19720 }, { "epoch": 25.24288, "grad_norm": 1.1668243408203125, "learning_rate": 1.057422969187675e-05, "loss": 0.4432, "step": 19721 }, { "epoch": 25.24416, "grad_norm": 1.1259456872940063, "learning_rate": 1.0572228891556624e-05, "loss": 0.456, "step": 19722 }, { "epoch": 25.24544, "grad_norm": 1.109616994857788, "learning_rate": 1.0570228091236495e-05, "loss": 0.4352, "step": 19723 }, { "epoch": 25.24672, "grad_norm": 1.1580768823623657, "learning_rate": 1.0568227290916368e-05, "loss": 0.451, "step": 19724 }, { "epoch": 25.248, "grad_norm": 1.1380645036697388, "learning_rate": 1.0566226490596238e-05, "loss": 0.4576, "step": 19725 }, { "epoch": 25.24928, "grad_norm": 1.1373761892318726, "learning_rate": 1.0564225690276112e-05, "loss": 0.4766, "step": 19726 }, { "epoch": 25.25056, "grad_norm": 1.169333577156067, "learning_rate": 1.0562224889955982e-05, "loss": 0.5047, "step": 19727 }, { "epoch": 25.25184, "grad_norm": 1.1574784517288208, "learning_rate": 1.0560224089635856e-05, "loss": 0.4608, "step": 19728 }, { "epoch": 25.25312, "grad_norm": 1.2007426023483276, "learning_rate": 1.0558223289315727e-05, "loss": 0.4717, "step": 19729 }, { "epoch": 25.2544, "grad_norm": 1.2134323120117188, "learning_rate": 1.05562224889956e-05, "loss": 0.4757, "step": 19730 }, { "epoch": 25.25568, "grad_norm": 1.1803364753723145, "learning_rate": 1.0554221688675471e-05, "loss": 0.462, "step": 19731 }, { "epoch": 25.25696, "grad_norm": 1.1004109382629395, "learning_rate": 1.0552220888355343e-05, "loss": 0.4594, "step": 19732 }, { "epoch": 25.25824, "grad_norm": 1.1959086656570435, "learning_rate": 1.0550220088035215e-05, "loss": 0.4424, "step": 19733 }, { "epoch": 25.25952, "grad_norm": 1.1749048233032227, "learning_rate": 1.0548219287715087e-05, "loss": 0.4687, "step": 19734 }, { "epoch": 25.2608, "grad_norm": 1.2056117057800293, "learning_rate": 1.0546218487394959e-05, "loss": 0.4574, "step": 19735 }, { "epoch": 25.26208, "grad_norm": 1.1588826179504395, "learning_rate": 1.054421768707483e-05, "loss": 0.4271, "step": 19736 }, { "epoch": 25.26336, "grad_norm": 1.1901261806488037, "learning_rate": 1.0542216886754702e-05, "loss": 0.4621, "step": 19737 }, { "epoch": 25.26464, "grad_norm": 1.1712379455566406, "learning_rate": 1.0540216086434574e-05, "loss": 0.4774, "step": 19738 }, { "epoch": 25.26592, "grad_norm": 1.1799527406692505, "learning_rate": 1.0538215286114446e-05, "loss": 0.4345, "step": 19739 }, { "epoch": 25.2672, "grad_norm": 1.1726861000061035, "learning_rate": 1.0536214485794318e-05, "loss": 0.5192, "step": 19740 }, { "epoch": 25.26848, "grad_norm": 1.1405744552612305, "learning_rate": 1.053421368547419e-05, "loss": 0.4844, "step": 19741 }, { "epoch": 25.26976, "grad_norm": 1.1705641746520996, "learning_rate": 1.0532212885154062e-05, "loss": 0.4587, "step": 19742 }, { "epoch": 25.27104, "grad_norm": 1.0993807315826416, "learning_rate": 1.0530212084833935e-05, "loss": 0.4864, "step": 19743 }, { "epoch": 25.27232, "grad_norm": 1.1683118343353271, "learning_rate": 1.0528211284513805e-05, "loss": 0.4339, "step": 19744 }, { "epoch": 25.2736, "grad_norm": 1.18573796749115, "learning_rate": 1.0526210484193679e-05, "loss": 0.5003, "step": 19745 }, { "epoch": 25.27488, "grad_norm": 1.1361162662506104, "learning_rate": 1.0524209683873549e-05, "loss": 0.4511, "step": 19746 }, { "epoch": 25.27616, "grad_norm": 1.144995927810669, "learning_rate": 1.0522208883553422e-05, "loss": 0.4936, "step": 19747 }, { "epoch": 25.27744, "grad_norm": 1.0969334840774536, "learning_rate": 1.0520208083233293e-05, "loss": 0.4298, "step": 19748 }, { "epoch": 25.27872, "grad_norm": 1.1504309177398682, "learning_rate": 1.0518207282913166e-05, "loss": 0.4984, "step": 19749 }, { "epoch": 25.28, "grad_norm": 1.1646525859832764, "learning_rate": 1.0516206482593036e-05, "loss": 0.4784, "step": 19750 }, { "epoch": 25.28128, "grad_norm": 1.2210896015167236, "learning_rate": 1.051420568227291e-05, "loss": 0.5125, "step": 19751 }, { "epoch": 25.28256, "grad_norm": 1.1825578212738037, "learning_rate": 1.0512204881952782e-05, "loss": 0.4775, "step": 19752 }, { "epoch": 25.28384, "grad_norm": 1.098472237586975, "learning_rate": 1.0510204081632654e-05, "loss": 0.4602, "step": 19753 }, { "epoch": 25.28512, "grad_norm": 1.095916748046875, "learning_rate": 1.0508203281312525e-05, "loss": 0.4521, "step": 19754 }, { "epoch": 25.2864, "grad_norm": 1.1654385328292847, "learning_rate": 1.0506202480992397e-05, "loss": 0.4875, "step": 19755 }, { "epoch": 25.28768, "grad_norm": 1.1669584512710571, "learning_rate": 1.050420168067227e-05, "loss": 0.4939, "step": 19756 }, { "epoch": 25.28896, "grad_norm": 1.1289222240447998, "learning_rate": 1.0502200880352141e-05, "loss": 0.4583, "step": 19757 }, { "epoch": 25.29024, "grad_norm": 1.213550090789795, "learning_rate": 1.0500200080032013e-05, "loss": 0.4899, "step": 19758 }, { "epoch": 25.29152, "grad_norm": 1.2919397354125977, "learning_rate": 1.0498199279711886e-05, "loss": 0.5269, "step": 19759 }, { "epoch": 25.2928, "grad_norm": 1.1487518548965454, "learning_rate": 1.0496198479391757e-05, "loss": 0.4528, "step": 19760 }, { "epoch": 25.29408, "grad_norm": 1.0663877725601196, "learning_rate": 1.049419767907163e-05, "loss": 0.4368, "step": 19761 }, { "epoch": 25.29536, "grad_norm": 1.125251293182373, "learning_rate": 1.04921968787515e-05, "loss": 0.4331, "step": 19762 }, { "epoch": 25.29664, "grad_norm": 1.1998741626739502, "learning_rate": 1.0490196078431374e-05, "loss": 0.4779, "step": 19763 }, { "epoch": 25.29792, "grad_norm": 1.1586112976074219, "learning_rate": 1.0488195278111244e-05, "loss": 0.4594, "step": 19764 }, { "epoch": 25.2992, "grad_norm": 1.1433591842651367, "learning_rate": 1.0486194477791118e-05, "loss": 0.4665, "step": 19765 }, { "epoch": 25.30048, "grad_norm": 1.2368263006210327, "learning_rate": 1.048419367747099e-05, "loss": 0.5092, "step": 19766 }, { "epoch": 25.30176, "grad_norm": 1.1645898818969727, "learning_rate": 1.0482192877150861e-05, "loss": 0.4237, "step": 19767 }, { "epoch": 25.30304, "grad_norm": 1.1961017847061157, "learning_rate": 1.0480192076830733e-05, "loss": 0.4801, "step": 19768 }, { "epoch": 25.30432, "grad_norm": 1.1524591445922852, "learning_rate": 1.0478191276510605e-05, "loss": 0.4472, "step": 19769 }, { "epoch": 25.3056, "grad_norm": 1.2043436765670776, "learning_rate": 1.0476190476190477e-05, "loss": 0.4915, "step": 19770 }, { "epoch": 25.30688, "grad_norm": 1.173971176147461, "learning_rate": 1.0474189675870349e-05, "loss": 0.464, "step": 19771 }, { "epoch": 25.30816, "grad_norm": 1.1863977909088135, "learning_rate": 1.047218887555022e-05, "loss": 0.4996, "step": 19772 }, { "epoch": 25.30944, "grad_norm": 1.2210062742233276, "learning_rate": 1.0470188075230092e-05, "loss": 0.4825, "step": 19773 }, { "epoch": 25.31072, "grad_norm": 1.209662914276123, "learning_rate": 1.0468187274909964e-05, "loss": 0.4904, "step": 19774 }, { "epoch": 25.312, "grad_norm": 1.1289560794830322, "learning_rate": 1.0466186474589836e-05, "loss": 0.4321, "step": 19775 }, { "epoch": 25.31328, "grad_norm": 1.1407843828201294, "learning_rate": 1.0464185674269708e-05, "loss": 0.4982, "step": 19776 }, { "epoch": 25.31456, "grad_norm": 1.0865232944488525, "learning_rate": 1.046218487394958e-05, "loss": 0.4361, "step": 19777 }, { "epoch": 25.31584, "grad_norm": 1.192724585533142, "learning_rate": 1.0460184073629452e-05, "loss": 0.5161, "step": 19778 }, { "epoch": 25.31712, "grad_norm": 1.1117703914642334, "learning_rate": 1.0458183273309324e-05, "loss": 0.4656, "step": 19779 }, { "epoch": 25.3184, "grad_norm": 1.1453944444656372, "learning_rate": 1.0456182472989195e-05, "loss": 0.4729, "step": 19780 }, { "epoch": 25.31968, "grad_norm": 1.1950432062149048, "learning_rate": 1.0454181672669067e-05, "loss": 0.4684, "step": 19781 }, { "epoch": 25.32096, "grad_norm": 1.17280912399292, "learning_rate": 1.0452180872348941e-05, "loss": 0.4754, "step": 19782 }, { "epoch": 25.32224, "grad_norm": 1.2976539134979248, "learning_rate": 1.0450180072028811e-05, "loss": 0.5065, "step": 19783 }, { "epoch": 25.32352, "grad_norm": 1.2335752248764038, "learning_rate": 1.0448179271708685e-05, "loss": 0.4805, "step": 19784 }, { "epoch": 25.3248, "grad_norm": 1.1491906642913818, "learning_rate": 1.0446178471388555e-05, "loss": 0.424, "step": 19785 }, { "epoch": 25.32608, "grad_norm": 1.1486718654632568, "learning_rate": 1.0444177671068428e-05, "loss": 0.4615, "step": 19786 }, { "epoch": 25.32736, "grad_norm": 1.197849988937378, "learning_rate": 1.0442176870748298e-05, "loss": 0.4458, "step": 19787 }, { "epoch": 25.32864, "grad_norm": 1.2207767963409424, "learning_rate": 1.0440176070428172e-05, "loss": 0.5007, "step": 19788 }, { "epoch": 25.32992, "grad_norm": 1.2052980661392212, "learning_rate": 1.0438175270108044e-05, "loss": 0.4814, "step": 19789 }, { "epoch": 25.3312, "grad_norm": 1.1656453609466553, "learning_rate": 1.0436174469787916e-05, "loss": 0.4519, "step": 19790 }, { "epoch": 25.33248, "grad_norm": 1.150758981704712, "learning_rate": 1.0434173669467788e-05, "loss": 0.4832, "step": 19791 }, { "epoch": 25.33376, "grad_norm": 1.1945995092391968, "learning_rate": 1.043217286914766e-05, "loss": 0.469, "step": 19792 }, { "epoch": 25.33504, "grad_norm": 1.1622135639190674, "learning_rate": 1.0430172068827531e-05, "loss": 0.4425, "step": 19793 }, { "epoch": 25.33632, "grad_norm": 1.1778172254562378, "learning_rate": 1.0428171268507403e-05, "loss": 0.4779, "step": 19794 }, { "epoch": 25.3376, "grad_norm": 1.2094917297363281, "learning_rate": 1.0426170468187275e-05, "loss": 0.4715, "step": 19795 }, { "epoch": 25.33888, "grad_norm": 1.206709861755371, "learning_rate": 1.0424169667867149e-05, "loss": 0.5091, "step": 19796 }, { "epoch": 25.34016, "grad_norm": 1.2187248468399048, "learning_rate": 1.0422168867547019e-05, "loss": 0.5013, "step": 19797 }, { "epoch": 25.34144, "grad_norm": 1.3126270771026611, "learning_rate": 1.0420168067226892e-05, "loss": 0.4873, "step": 19798 }, { "epoch": 25.34272, "grad_norm": 1.1935737133026123, "learning_rate": 1.0418167266906762e-05, "loss": 0.4794, "step": 19799 }, { "epoch": 25.344, "grad_norm": 1.2357202768325806, "learning_rate": 1.0416166466586636e-05, "loss": 0.4987, "step": 19800 }, { "epoch": 25.34528, "grad_norm": 1.1685526371002197, "learning_rate": 1.0414165666266506e-05, "loss": 0.4617, "step": 19801 }, { "epoch": 25.34656, "grad_norm": 1.1791237592697144, "learning_rate": 1.041216486594638e-05, "loss": 0.4917, "step": 19802 }, { "epoch": 25.34784, "grad_norm": 1.1622064113616943, "learning_rate": 1.0410164065626252e-05, "loss": 0.4591, "step": 19803 }, { "epoch": 25.34912, "grad_norm": 1.1878505945205688, "learning_rate": 1.0408163265306123e-05, "loss": 0.483, "step": 19804 }, { "epoch": 25.3504, "grad_norm": 1.2232707738876343, "learning_rate": 1.0406162464985995e-05, "loss": 0.4873, "step": 19805 }, { "epoch": 25.35168, "grad_norm": 1.1818996667861938, "learning_rate": 1.0404161664665867e-05, "loss": 0.4899, "step": 19806 }, { "epoch": 25.35296, "grad_norm": 1.1730505228042603, "learning_rate": 1.0402160864345739e-05, "loss": 0.4547, "step": 19807 }, { "epoch": 25.35424, "grad_norm": 1.2024863958358765, "learning_rate": 1.040016006402561e-05, "loss": 0.4791, "step": 19808 }, { "epoch": 25.35552, "grad_norm": 1.1775726079940796, "learning_rate": 1.0398159263705483e-05, "loss": 0.4802, "step": 19809 }, { "epoch": 25.3568, "grad_norm": 1.1456516981124878, "learning_rate": 1.0396158463385355e-05, "loss": 0.4666, "step": 19810 }, { "epoch": 25.35808, "grad_norm": 1.1130499839782715, "learning_rate": 1.0394157663065226e-05, "loss": 0.4674, "step": 19811 }, { "epoch": 25.35936, "grad_norm": 1.2441742420196533, "learning_rate": 1.0392156862745098e-05, "loss": 0.5162, "step": 19812 }, { "epoch": 25.36064, "grad_norm": 1.139827847480774, "learning_rate": 1.039015606242497e-05, "loss": 0.4504, "step": 19813 }, { "epoch": 25.36192, "grad_norm": 1.1869721412658691, "learning_rate": 1.0388155262104842e-05, "loss": 0.5, "step": 19814 }, { "epoch": 25.3632, "grad_norm": 1.1606616973876953, "learning_rate": 1.0386154461784714e-05, "loss": 0.5037, "step": 19815 }, { "epoch": 25.36448, "grad_norm": 1.1421772241592407, "learning_rate": 1.0384153661464586e-05, "loss": 0.4588, "step": 19816 }, { "epoch": 25.36576, "grad_norm": 1.224739909172058, "learning_rate": 1.0382152861144458e-05, "loss": 0.4998, "step": 19817 }, { "epoch": 25.36704, "grad_norm": 1.2049306631088257, "learning_rate": 1.038015206082433e-05, "loss": 0.5418, "step": 19818 }, { "epoch": 25.36832, "grad_norm": 1.1043879985809326, "learning_rate": 1.0378151260504203e-05, "loss": 0.4709, "step": 19819 }, { "epoch": 25.3696, "grad_norm": 1.1660434007644653, "learning_rate": 1.0376150460184073e-05, "loss": 0.48, "step": 19820 }, { "epoch": 25.37088, "grad_norm": 1.2284889221191406, "learning_rate": 1.0374149659863947e-05, "loss": 0.5137, "step": 19821 }, { "epoch": 25.37216, "grad_norm": 1.163232684135437, "learning_rate": 1.0372148859543817e-05, "loss": 0.4841, "step": 19822 }, { "epoch": 25.37344, "grad_norm": 1.2111849784851074, "learning_rate": 1.037014805922369e-05, "loss": 0.4561, "step": 19823 }, { "epoch": 25.37472, "grad_norm": 1.1000887155532837, "learning_rate": 1.036814725890356e-05, "loss": 0.4191, "step": 19824 }, { "epoch": 25.376, "grad_norm": 1.1611400842666626, "learning_rate": 1.0366146458583434e-05, "loss": 0.5186, "step": 19825 }, { "epoch": 25.37728, "grad_norm": 1.2061467170715332, "learning_rate": 1.0364145658263306e-05, "loss": 0.4728, "step": 19826 }, { "epoch": 25.37856, "grad_norm": 1.147557020187378, "learning_rate": 1.0362144857943178e-05, "loss": 0.4578, "step": 19827 }, { "epoch": 25.37984, "grad_norm": 1.1873794794082642, "learning_rate": 1.036014405762305e-05, "loss": 0.4687, "step": 19828 }, { "epoch": 25.38112, "grad_norm": 1.1945492029190063, "learning_rate": 1.0358143257302922e-05, "loss": 0.4752, "step": 19829 }, { "epoch": 25.3824, "grad_norm": 1.1753865480422974, "learning_rate": 1.0356142456982793e-05, "loss": 0.4974, "step": 19830 }, { "epoch": 25.38368, "grad_norm": 1.1818007230758667, "learning_rate": 1.0354141656662665e-05, "loss": 0.4648, "step": 19831 }, { "epoch": 25.38496, "grad_norm": 1.2011675834655762, "learning_rate": 1.0352140856342537e-05, "loss": 0.4546, "step": 19832 }, { "epoch": 25.38624, "grad_norm": 1.203870415687561, "learning_rate": 1.0350140056022409e-05, "loss": 0.4703, "step": 19833 }, { "epoch": 25.38752, "grad_norm": 1.1994456052780151, "learning_rate": 1.034813925570228e-05, "loss": 0.4695, "step": 19834 }, { "epoch": 25.3888, "grad_norm": 1.1361202001571655, "learning_rate": 1.0346138455382154e-05, "loss": 0.4831, "step": 19835 }, { "epoch": 25.39008, "grad_norm": 1.1568169593811035, "learning_rate": 1.0344137655062025e-05, "loss": 0.4666, "step": 19836 }, { "epoch": 25.39136, "grad_norm": 1.1744780540466309, "learning_rate": 1.0342136854741898e-05, "loss": 0.4628, "step": 19837 }, { "epoch": 25.39264, "grad_norm": 1.1357437372207642, "learning_rate": 1.0340136054421768e-05, "loss": 0.5288, "step": 19838 }, { "epoch": 25.39392, "grad_norm": 1.2219725847244263, "learning_rate": 1.0338135254101642e-05, "loss": 0.4923, "step": 19839 }, { "epoch": 25.3952, "grad_norm": 1.1251201629638672, "learning_rate": 1.0336134453781512e-05, "loss": 0.4548, "step": 19840 }, { "epoch": 25.39648, "grad_norm": 1.1264392137527466, "learning_rate": 1.0334133653461385e-05, "loss": 0.4398, "step": 19841 }, { "epoch": 25.39776, "grad_norm": 1.1508971452713013, "learning_rate": 1.0332132853141257e-05, "loss": 0.4722, "step": 19842 }, { "epoch": 25.39904, "grad_norm": 1.1928080320358276, "learning_rate": 1.033013205282113e-05, "loss": 0.4667, "step": 19843 }, { "epoch": 25.40032, "grad_norm": 1.1580041646957397, "learning_rate": 1.0328131252501001e-05, "loss": 0.4179, "step": 19844 }, { "epoch": 25.4016, "grad_norm": 1.135171890258789, "learning_rate": 1.0326130452180873e-05, "loss": 0.4607, "step": 19845 }, { "epoch": 25.40288, "grad_norm": 1.2124780416488647, "learning_rate": 1.0324129651860745e-05, "loss": 0.502, "step": 19846 }, { "epoch": 25.40416, "grad_norm": 1.1926053762435913, "learning_rate": 1.0322128851540617e-05, "loss": 0.4687, "step": 19847 }, { "epoch": 25.40544, "grad_norm": 1.2334791421890259, "learning_rate": 1.0320128051220488e-05, "loss": 0.5041, "step": 19848 }, { "epoch": 25.40672, "grad_norm": 1.1845077276229858, "learning_rate": 1.0318127250900362e-05, "loss": 0.4649, "step": 19849 }, { "epoch": 25.408, "grad_norm": 1.1910468339920044, "learning_rate": 1.0316126450580232e-05, "loss": 0.4352, "step": 19850 }, { "epoch": 25.40928, "grad_norm": 1.1773654222488403, "learning_rate": 1.0314125650260106e-05, "loss": 0.4306, "step": 19851 }, { "epoch": 25.41056, "grad_norm": 1.211543083190918, "learning_rate": 1.0312124849939976e-05, "loss": 0.4869, "step": 19852 }, { "epoch": 25.41184, "grad_norm": 1.2373056411743164, "learning_rate": 1.031012404961985e-05, "loss": 0.4914, "step": 19853 }, { "epoch": 25.41312, "grad_norm": 1.2094565629959106, "learning_rate": 1.030812324929972e-05, "loss": 0.4829, "step": 19854 }, { "epoch": 25.4144, "grad_norm": 1.1150031089782715, "learning_rate": 1.0306122448979593e-05, "loss": 0.4435, "step": 19855 }, { "epoch": 25.41568, "grad_norm": 1.189208984375, "learning_rate": 1.0304121648659465e-05, "loss": 0.4767, "step": 19856 }, { "epoch": 25.41696, "grad_norm": 1.1635322570800781, "learning_rate": 1.0302120848339337e-05, "loss": 0.4754, "step": 19857 }, { "epoch": 25.41824, "grad_norm": 1.2085617780685425, "learning_rate": 1.0300120048019209e-05, "loss": 0.5219, "step": 19858 }, { "epoch": 25.41952, "grad_norm": 1.150395393371582, "learning_rate": 1.029811924769908e-05, "loss": 0.4459, "step": 19859 }, { "epoch": 25.4208, "grad_norm": 1.1318018436431885, "learning_rate": 1.0296118447378952e-05, "loss": 0.453, "step": 19860 }, { "epoch": 25.42208, "grad_norm": 1.1467453241348267, "learning_rate": 1.0294117647058824e-05, "loss": 0.4603, "step": 19861 }, { "epoch": 25.42336, "grad_norm": 1.1553596258163452, "learning_rate": 1.0292116846738696e-05, "loss": 0.4754, "step": 19862 }, { "epoch": 25.42464, "grad_norm": 1.1549872159957886, "learning_rate": 1.0290116046418568e-05, "loss": 0.4961, "step": 19863 }, { "epoch": 25.42592, "grad_norm": 1.1191171407699585, "learning_rate": 1.028811524609844e-05, "loss": 0.4749, "step": 19864 }, { "epoch": 25.4272, "grad_norm": 1.1042835712432861, "learning_rate": 1.0286114445778312e-05, "loss": 0.4217, "step": 19865 }, { "epoch": 25.42848, "grad_norm": 1.2307777404785156, "learning_rate": 1.0284113645458184e-05, "loss": 0.4629, "step": 19866 }, { "epoch": 25.42976, "grad_norm": 1.1764090061187744, "learning_rate": 1.0282112845138055e-05, "loss": 0.4898, "step": 19867 }, { "epoch": 25.43104, "grad_norm": 1.189038634300232, "learning_rate": 1.0280112044817927e-05, "loss": 0.4423, "step": 19868 }, { "epoch": 25.43232, "grad_norm": 1.1858000755310059, "learning_rate": 1.02781112444978e-05, "loss": 0.4757, "step": 19869 }, { "epoch": 25.4336, "grad_norm": 1.191748023033142, "learning_rate": 1.0276110444177671e-05, "loss": 0.4882, "step": 19870 }, { "epoch": 25.43488, "grad_norm": 1.2290452718734741, "learning_rate": 1.0274109643857543e-05, "loss": 0.4923, "step": 19871 }, { "epoch": 25.43616, "grad_norm": 1.1514322757720947, "learning_rate": 1.0272108843537416e-05, "loss": 0.4417, "step": 19872 }, { "epoch": 25.43744, "grad_norm": 1.162440299987793, "learning_rate": 1.0270108043217287e-05, "loss": 0.4663, "step": 19873 }, { "epoch": 25.43872, "grad_norm": 1.1626965999603271, "learning_rate": 1.026810724289716e-05, "loss": 0.4808, "step": 19874 }, { "epoch": 25.44, "grad_norm": 1.1720807552337646, "learning_rate": 1.026610644257703e-05, "loss": 0.4818, "step": 19875 }, { "epoch": 25.44128, "grad_norm": 1.1925668716430664, "learning_rate": 1.0264105642256904e-05, "loss": 0.4541, "step": 19876 }, { "epoch": 25.44256, "grad_norm": 1.2147929668426514, "learning_rate": 1.0262104841936774e-05, "loss": 0.465, "step": 19877 }, { "epoch": 25.44384, "grad_norm": 1.1247954368591309, "learning_rate": 1.0260104041616648e-05, "loss": 0.4811, "step": 19878 }, { "epoch": 25.44512, "grad_norm": 1.1577873229980469, "learning_rate": 1.025810324129652e-05, "loss": 0.4534, "step": 19879 }, { "epoch": 25.4464, "grad_norm": 1.1290161609649658, "learning_rate": 1.0256102440976391e-05, "loss": 0.4636, "step": 19880 }, { "epoch": 25.44768, "grad_norm": 1.1666961908340454, "learning_rate": 1.0254101640656263e-05, "loss": 0.4773, "step": 19881 }, { "epoch": 25.44896, "grad_norm": 1.210201382637024, "learning_rate": 1.0252100840336135e-05, "loss": 0.5152, "step": 19882 }, { "epoch": 25.45024, "grad_norm": 1.1907308101654053, "learning_rate": 1.0250100040016007e-05, "loss": 0.4904, "step": 19883 }, { "epoch": 25.45152, "grad_norm": 1.2196985483169556, "learning_rate": 1.0248099239695879e-05, "loss": 0.499, "step": 19884 }, { "epoch": 25.4528, "grad_norm": 1.1352413892745972, "learning_rate": 1.024609843937575e-05, "loss": 0.4525, "step": 19885 }, { "epoch": 25.45408, "grad_norm": 1.1208443641662598, "learning_rate": 1.0244097639055624e-05, "loss": 0.4592, "step": 19886 }, { "epoch": 25.45536, "grad_norm": 1.152306079864502, "learning_rate": 1.0242096838735494e-05, "loss": 0.4724, "step": 19887 }, { "epoch": 25.45664, "grad_norm": 1.1952404975891113, "learning_rate": 1.0240096038415368e-05, "loss": 0.507, "step": 19888 }, { "epoch": 25.45792, "grad_norm": 1.207161545753479, "learning_rate": 1.0238095238095238e-05, "loss": 0.4736, "step": 19889 }, { "epoch": 25.4592, "grad_norm": 1.2183301448822021, "learning_rate": 1.0236094437775112e-05, "loss": 0.4445, "step": 19890 }, { "epoch": 25.46048, "grad_norm": 1.1657265424728394, "learning_rate": 1.0234093637454982e-05, "loss": 0.4539, "step": 19891 }, { "epoch": 25.46176, "grad_norm": 1.19996976852417, "learning_rate": 1.0232092837134855e-05, "loss": 0.4312, "step": 19892 }, { "epoch": 25.46304, "grad_norm": 1.202269434928894, "learning_rate": 1.0230092036814725e-05, "loss": 0.4801, "step": 19893 }, { "epoch": 25.46432, "grad_norm": 1.1666241884231567, "learning_rate": 1.0228091236494599e-05, "loss": 0.4813, "step": 19894 }, { "epoch": 25.4656, "grad_norm": 1.2247138023376465, "learning_rate": 1.022609043617447e-05, "loss": 0.4749, "step": 19895 }, { "epoch": 25.46688, "grad_norm": 1.2047659158706665, "learning_rate": 1.0224089635854343e-05, "loss": 0.4895, "step": 19896 }, { "epoch": 25.46816, "grad_norm": 1.2424787282943726, "learning_rate": 1.0222088835534215e-05, "loss": 0.4762, "step": 19897 }, { "epoch": 25.46944, "grad_norm": 1.190301775932312, "learning_rate": 1.0220088035214086e-05, "loss": 0.4786, "step": 19898 }, { "epoch": 25.47072, "grad_norm": 1.2236806154251099, "learning_rate": 1.0218087234893958e-05, "loss": 0.5193, "step": 19899 }, { "epoch": 25.472, "grad_norm": 1.1594198942184448, "learning_rate": 1.021608643457383e-05, "loss": 0.5058, "step": 19900 }, { "epoch": 25.47328, "grad_norm": 1.1569578647613525, "learning_rate": 1.0214085634253702e-05, "loss": 0.4701, "step": 19901 }, { "epoch": 25.47456, "grad_norm": 1.1034061908721924, "learning_rate": 1.0212084833933574e-05, "loss": 0.4354, "step": 19902 }, { "epoch": 25.47584, "grad_norm": 1.136178970336914, "learning_rate": 1.0210084033613446e-05, "loss": 0.4281, "step": 19903 }, { "epoch": 25.47712, "grad_norm": 1.1307159662246704, "learning_rate": 1.0208083233293318e-05, "loss": 0.4523, "step": 19904 }, { "epoch": 25.4784, "grad_norm": 1.1765360832214355, "learning_rate": 1.020608243297319e-05, "loss": 0.4607, "step": 19905 }, { "epoch": 25.47968, "grad_norm": 1.0838277339935303, "learning_rate": 1.0204081632653061e-05, "loss": 0.4631, "step": 19906 }, { "epoch": 25.48096, "grad_norm": 1.1757216453552246, "learning_rate": 1.0202080832332933e-05, "loss": 0.4862, "step": 19907 }, { "epoch": 25.48224, "grad_norm": 1.1511602401733398, "learning_rate": 1.0200080032012805e-05, "loss": 0.456, "step": 19908 }, { "epoch": 25.48352, "grad_norm": 1.1518641710281372, "learning_rate": 1.0198079231692679e-05, "loss": 0.5169, "step": 19909 }, { "epoch": 25.4848, "grad_norm": 1.1432472467422485, "learning_rate": 1.0196078431372549e-05, "loss": 0.4253, "step": 19910 }, { "epoch": 25.48608, "grad_norm": 1.1166365146636963, "learning_rate": 1.0194077631052422e-05, "loss": 0.464, "step": 19911 }, { "epoch": 25.48736, "grad_norm": 1.0852631330490112, "learning_rate": 1.0192076830732292e-05, "loss": 0.4376, "step": 19912 }, { "epoch": 25.48864, "grad_norm": 1.1255972385406494, "learning_rate": 1.0190076030412166e-05, "loss": 0.4564, "step": 19913 }, { "epoch": 25.48992, "grad_norm": 1.1327401399612427, "learning_rate": 1.0188075230092036e-05, "loss": 0.4365, "step": 19914 }, { "epoch": 25.4912, "grad_norm": 1.1716783046722412, "learning_rate": 1.018607442977191e-05, "loss": 0.5002, "step": 19915 }, { "epoch": 25.49248, "grad_norm": 1.2162928581237793, "learning_rate": 1.018407362945178e-05, "loss": 0.5105, "step": 19916 }, { "epoch": 25.49376, "grad_norm": 1.23090660572052, "learning_rate": 1.0182072829131653e-05, "loss": 0.4842, "step": 19917 }, { "epoch": 25.49504, "grad_norm": 1.1792917251586914, "learning_rate": 1.0180072028811525e-05, "loss": 0.505, "step": 19918 }, { "epoch": 25.49632, "grad_norm": 1.1648669242858887, "learning_rate": 1.0178071228491397e-05, "loss": 0.4483, "step": 19919 }, { "epoch": 25.4976, "grad_norm": 1.1607261896133423, "learning_rate": 1.0176070428171269e-05, "loss": 0.4921, "step": 19920 }, { "epoch": 25.49888, "grad_norm": 1.116874098777771, "learning_rate": 1.017406962785114e-05, "loss": 0.4774, "step": 19921 }, { "epoch": 25.50016, "grad_norm": 1.107079267501831, "learning_rate": 1.0172068827531013e-05, "loss": 0.4342, "step": 19922 }, { "epoch": 25.50144, "grad_norm": 1.134993553161621, "learning_rate": 1.0170068027210885e-05, "loss": 0.4498, "step": 19923 }, { "epoch": 25.50272, "grad_norm": 1.218907356262207, "learning_rate": 1.0168067226890756e-05, "loss": 0.4602, "step": 19924 }, { "epoch": 25.504, "grad_norm": 1.1606868505477905, "learning_rate": 1.016606642657063e-05, "loss": 0.4432, "step": 19925 }, { "epoch": 25.50528, "grad_norm": 1.0475679636001587, "learning_rate": 1.01640656262505e-05, "loss": 0.4417, "step": 19926 }, { "epoch": 25.50656, "grad_norm": 1.0612995624542236, "learning_rate": 1.0162064825930374e-05, "loss": 0.4264, "step": 19927 }, { "epoch": 25.50784, "grad_norm": 1.1685255765914917, "learning_rate": 1.0160064025610244e-05, "loss": 0.4719, "step": 19928 }, { "epoch": 25.50912, "grad_norm": 1.1424115896224976, "learning_rate": 1.0158063225290117e-05, "loss": 0.4454, "step": 19929 }, { "epoch": 25.5104, "grad_norm": 1.1809263229370117, "learning_rate": 1.0156062424969988e-05, "loss": 0.4515, "step": 19930 }, { "epoch": 25.51168, "grad_norm": 1.25111985206604, "learning_rate": 1.0154061624649861e-05, "loss": 0.5329, "step": 19931 }, { "epoch": 25.51296, "grad_norm": 1.130300760269165, "learning_rate": 1.0152060824329733e-05, "loss": 0.4795, "step": 19932 }, { "epoch": 25.51424, "grad_norm": 1.1218140125274658, "learning_rate": 1.0150060024009605e-05, "loss": 0.4573, "step": 19933 }, { "epoch": 25.51552, "grad_norm": 1.183595895767212, "learning_rate": 1.0148059223689477e-05, "loss": 0.4736, "step": 19934 }, { "epoch": 25.5168, "grad_norm": 1.1084133386611938, "learning_rate": 1.0146058423369348e-05, "loss": 0.4453, "step": 19935 }, { "epoch": 25.51808, "grad_norm": 1.1661556959152222, "learning_rate": 1.014405762304922e-05, "loss": 0.4992, "step": 19936 }, { "epoch": 25.51936, "grad_norm": 1.1977107524871826, "learning_rate": 1.0142056822729092e-05, "loss": 0.4746, "step": 19937 }, { "epoch": 25.52064, "grad_norm": 1.1810569763183594, "learning_rate": 1.0140056022408964e-05, "loss": 0.4634, "step": 19938 }, { "epoch": 25.52192, "grad_norm": 1.1430659294128418, "learning_rate": 1.0138055222088836e-05, "loss": 0.5255, "step": 19939 }, { "epoch": 25.5232, "grad_norm": 1.135312795639038, "learning_rate": 1.0136054421768708e-05, "loss": 0.4624, "step": 19940 }, { "epoch": 25.52448, "grad_norm": 1.0835295915603638, "learning_rate": 1.013405362144858e-05, "loss": 0.4393, "step": 19941 }, { "epoch": 25.52576, "grad_norm": 1.1327999830245972, "learning_rate": 1.0132052821128451e-05, "loss": 0.4165, "step": 19942 }, { "epoch": 25.52704, "grad_norm": 1.2144399881362915, "learning_rate": 1.0130052020808323e-05, "loss": 0.5114, "step": 19943 }, { "epoch": 25.52832, "grad_norm": 1.2352372407913208, "learning_rate": 1.0128051220488195e-05, "loss": 0.5182, "step": 19944 }, { "epoch": 25.5296, "grad_norm": 1.109834909439087, "learning_rate": 1.0126050420168067e-05, "loss": 0.4612, "step": 19945 }, { "epoch": 25.53088, "grad_norm": 1.163332462310791, "learning_rate": 1.0124049619847939e-05, "loss": 0.4823, "step": 19946 }, { "epoch": 25.53216, "grad_norm": 1.219772458076477, "learning_rate": 1.012204881952781e-05, "loss": 0.5057, "step": 19947 }, { "epoch": 25.53344, "grad_norm": 1.111663818359375, "learning_rate": 1.0120048019207684e-05, "loss": 0.4297, "step": 19948 }, { "epoch": 25.53472, "grad_norm": 1.1989513635635376, "learning_rate": 1.0118047218887554e-05, "loss": 0.4661, "step": 19949 }, { "epoch": 25.536, "grad_norm": 1.1225396394729614, "learning_rate": 1.0116046418567428e-05, "loss": 0.5113, "step": 19950 }, { "epoch": 25.53728, "grad_norm": 1.134542465209961, "learning_rate": 1.0114045618247298e-05, "loss": 0.4717, "step": 19951 }, { "epoch": 25.53856, "grad_norm": 1.1759467124938965, "learning_rate": 1.0112044817927172e-05, "loss": 0.4592, "step": 19952 }, { "epoch": 25.53984, "grad_norm": 1.1189574003219604, "learning_rate": 1.0110044017607042e-05, "loss": 0.4471, "step": 19953 }, { "epoch": 25.54112, "grad_norm": 1.2080943584442139, "learning_rate": 1.0108043217286915e-05, "loss": 0.5141, "step": 19954 }, { "epoch": 25.5424, "grad_norm": 1.1960744857788086, "learning_rate": 1.0106042416966787e-05, "loss": 0.4721, "step": 19955 }, { "epoch": 25.54368, "grad_norm": 1.1887983083724976, "learning_rate": 1.010404161664666e-05, "loss": 0.4531, "step": 19956 }, { "epoch": 25.54496, "grad_norm": 1.228134036064148, "learning_rate": 1.0102040816326531e-05, "loss": 0.476, "step": 19957 }, { "epoch": 25.54624, "grad_norm": 1.1741596460342407, "learning_rate": 1.0100040016006403e-05, "loss": 0.4897, "step": 19958 }, { "epoch": 25.54752, "grad_norm": 1.1398587226867676, "learning_rate": 1.0098039215686275e-05, "loss": 0.4474, "step": 19959 }, { "epoch": 25.5488, "grad_norm": 1.1780531406402588, "learning_rate": 1.0096038415366147e-05, "loss": 0.4595, "step": 19960 }, { "epoch": 25.55008, "grad_norm": 1.1379321813583374, "learning_rate": 1.0094037615046018e-05, "loss": 0.4659, "step": 19961 }, { "epoch": 25.55136, "grad_norm": 1.0986708402633667, "learning_rate": 1.0092036814725892e-05, "loss": 0.4721, "step": 19962 }, { "epoch": 25.55264, "grad_norm": 1.1313260793685913, "learning_rate": 1.0090036014405762e-05, "loss": 0.4701, "step": 19963 }, { "epoch": 25.55392, "grad_norm": 1.0673236846923828, "learning_rate": 1.0088035214085636e-05, "loss": 0.4375, "step": 19964 }, { "epoch": 25.5552, "grad_norm": 1.1325130462646484, "learning_rate": 1.0086034413765506e-05, "loss": 0.4712, "step": 19965 }, { "epoch": 25.55648, "grad_norm": 1.095177412033081, "learning_rate": 1.008403361344538e-05, "loss": 0.4617, "step": 19966 }, { "epoch": 25.557760000000002, "grad_norm": 1.203389048576355, "learning_rate": 1.008203281312525e-05, "loss": 0.4962, "step": 19967 }, { "epoch": 25.55904, "grad_norm": 1.1400015354156494, "learning_rate": 1.0080032012805123e-05, "loss": 0.4451, "step": 19968 }, { "epoch": 25.56032, "grad_norm": 1.153382420539856, "learning_rate": 1.0078031212484995e-05, "loss": 0.4675, "step": 19969 }, { "epoch": 25.5616, "grad_norm": 1.207768440246582, "learning_rate": 1.0076030412164867e-05, "loss": 0.5011, "step": 19970 }, { "epoch": 25.56288, "grad_norm": 1.1720563173294067, "learning_rate": 1.0074029611844739e-05, "loss": 0.4832, "step": 19971 }, { "epoch": 25.56416, "grad_norm": 1.2000620365142822, "learning_rate": 1.007202881152461e-05, "loss": 0.4259, "step": 19972 }, { "epoch": 25.56544, "grad_norm": 1.208329200744629, "learning_rate": 1.0070028011204482e-05, "loss": 0.543, "step": 19973 }, { "epoch": 25.56672, "grad_norm": 1.165389895439148, "learning_rate": 1.0068027210884354e-05, "loss": 0.4622, "step": 19974 }, { "epoch": 25.568, "grad_norm": 1.1432007551193237, "learning_rate": 1.0066026410564226e-05, "loss": 0.4856, "step": 19975 }, { "epoch": 25.56928, "grad_norm": 1.1231637001037598, "learning_rate": 1.0064025610244098e-05, "loss": 0.4722, "step": 19976 }, { "epoch": 25.57056, "grad_norm": 1.2253926992416382, "learning_rate": 1.006202480992397e-05, "loss": 0.4966, "step": 19977 }, { "epoch": 25.57184, "grad_norm": 1.1498738527297974, "learning_rate": 1.0060024009603842e-05, "loss": 0.4766, "step": 19978 }, { "epoch": 25.57312, "grad_norm": 1.264626383781433, "learning_rate": 1.0058023209283714e-05, "loss": 0.547, "step": 19979 }, { "epoch": 25.5744, "grad_norm": 1.1682270765304565, "learning_rate": 1.0056022408963585e-05, "loss": 0.4642, "step": 19980 }, { "epoch": 25.57568, "grad_norm": 1.1158068180084229, "learning_rate": 1.0054021608643457e-05, "loss": 0.4469, "step": 19981 }, { "epoch": 25.57696, "grad_norm": 1.223067283630371, "learning_rate": 1.005202080832333e-05, "loss": 0.4862, "step": 19982 }, { "epoch": 25.57824, "grad_norm": 1.2305022478103638, "learning_rate": 1.0050020008003201e-05, "loss": 0.5224, "step": 19983 }, { "epoch": 25.57952, "grad_norm": 1.176965594291687, "learning_rate": 1.0048019207683073e-05, "loss": 0.4521, "step": 19984 }, { "epoch": 25.5808, "grad_norm": 1.1142029762268066, "learning_rate": 1.0046018407362946e-05, "loss": 0.4352, "step": 19985 }, { "epoch": 25.58208, "grad_norm": 1.123673677444458, "learning_rate": 1.0044017607042817e-05, "loss": 0.4876, "step": 19986 }, { "epoch": 25.58336, "grad_norm": 1.1586241722106934, "learning_rate": 1.004201680672269e-05, "loss": 0.4535, "step": 19987 }, { "epoch": 25.58464, "grad_norm": 1.125301480293274, "learning_rate": 1.004001600640256e-05, "loss": 0.4405, "step": 19988 }, { "epoch": 25.58592, "grad_norm": 1.1621462106704712, "learning_rate": 1.0038015206082434e-05, "loss": 0.4536, "step": 19989 }, { "epoch": 25.5872, "grad_norm": 1.1989449262619019, "learning_rate": 1.0036014405762304e-05, "loss": 0.4923, "step": 19990 }, { "epoch": 25.58848, "grad_norm": 1.2146766185760498, "learning_rate": 1.0034013605442178e-05, "loss": 0.4667, "step": 19991 }, { "epoch": 25.58976, "grad_norm": 1.1815863847732544, "learning_rate": 1.003201280512205e-05, "loss": 0.4751, "step": 19992 }, { "epoch": 25.59104, "grad_norm": 1.1977012157440186, "learning_rate": 1.0030012004801921e-05, "loss": 0.4829, "step": 19993 }, { "epoch": 25.59232, "grad_norm": 1.1629470586776733, "learning_rate": 1.0028011204481793e-05, "loss": 0.4888, "step": 19994 }, { "epoch": 25.5936, "grad_norm": 1.2305023670196533, "learning_rate": 1.0026010404161665e-05, "loss": 0.5316, "step": 19995 }, { "epoch": 25.59488, "grad_norm": 1.1644953489303589, "learning_rate": 1.0024009603841537e-05, "loss": 0.5375, "step": 19996 }, { "epoch": 25.59616, "grad_norm": 1.1814461946487427, "learning_rate": 1.0022008803521409e-05, "loss": 0.4741, "step": 19997 }, { "epoch": 25.59744, "grad_norm": 1.1363074779510498, "learning_rate": 1.002000800320128e-05, "loss": 0.4249, "step": 19998 }, { "epoch": 25.59872, "grad_norm": 1.2371721267700195, "learning_rate": 1.0018007202881154e-05, "loss": 0.4753, "step": 19999 }, { "epoch": 25.6, "grad_norm": 1.137081265449524, "learning_rate": 1.0016006402561024e-05, "loss": 0.4507, "step": 20000 } ], "logging_steps": 1, "max_steps": 25000, "num_input_tokens_seen": 0, "num_train_epochs": 33, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.571466040761385e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }