{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 7520, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00026595744680851064, "grad_norm": 12.928236961364746, "learning_rate": 1.0638297872340427e-08, "loss": 1.5564, "step": 1 }, { "epoch": 0.0005319148936170213, "grad_norm": 12.116073608398438, "learning_rate": 2.1276595744680853e-08, "loss": 1.5756, "step": 2 }, { "epoch": 0.0007978723404255319, "grad_norm": 13.450613975524902, "learning_rate": 3.191489361702128e-08, "loss": 1.6078, "step": 3 }, { "epoch": 0.0010638297872340426, "grad_norm": 14.591333389282227, "learning_rate": 4.2553191489361707e-08, "loss": 1.6333, "step": 4 }, { "epoch": 0.0013297872340425532, "grad_norm": 14.167532920837402, "learning_rate": 5.319148936170213e-08, "loss": 1.4764, "step": 5 }, { "epoch": 0.0015957446808510637, "grad_norm": 11.665863037109375, "learning_rate": 6.382978723404255e-08, "loss": 1.5681, "step": 6 }, { "epoch": 0.0018617021276595746, "grad_norm": 12.705963134765625, "learning_rate": 7.446808510638299e-08, "loss": 1.5249, "step": 7 }, { "epoch": 0.002127659574468085, "grad_norm": 13.839447021484375, "learning_rate": 8.510638297872341e-08, "loss": 1.6567, "step": 8 }, { "epoch": 0.0023936170212765957, "grad_norm": 11.46570110321045, "learning_rate": 9.574468085106384e-08, "loss": 1.4166, "step": 9 }, { "epoch": 0.0026595744680851063, "grad_norm": 12.468977928161621, "learning_rate": 1.0638297872340426e-07, "loss": 1.4788, "step": 10 }, { "epoch": 0.002925531914893617, "grad_norm": 10.813947677612305, "learning_rate": 1.1702127659574468e-07, "loss": 1.3127, "step": 11 }, { "epoch": 0.0031914893617021275, "grad_norm": 12.833952903747559, "learning_rate": 1.276595744680851e-07, "loss": 1.5291, "step": 12 }, { "epoch": 0.003457446808510638, "grad_norm": 13.475564956665039, "learning_rate": 1.3829787234042553e-07, "loss": 1.4629, "step": 13 }, { "epoch": 0.003723404255319149, "grad_norm": 11.995802879333496, "learning_rate": 1.4893617021276598e-07, "loss": 1.5887, "step": 14 }, { "epoch": 0.003989361702127659, "grad_norm": 14.704851150512695, "learning_rate": 1.5957446808510638e-07, "loss": 1.4533, "step": 15 }, { "epoch": 0.00425531914893617, "grad_norm": 11.153929710388184, "learning_rate": 1.7021276595744683e-07, "loss": 1.4027, "step": 16 }, { "epoch": 0.0045212765957446804, "grad_norm": 14.091814994812012, "learning_rate": 1.8085106382978722e-07, "loss": 1.6199, "step": 17 }, { "epoch": 0.0047872340425531915, "grad_norm": 13.533143997192383, "learning_rate": 1.9148936170212767e-07, "loss": 1.4809, "step": 18 }, { "epoch": 0.0050531914893617025, "grad_norm": 13.076473236083984, "learning_rate": 2.0212765957446812e-07, "loss": 1.5374, "step": 19 }, { "epoch": 0.005319148936170213, "grad_norm": 13.062971115112305, "learning_rate": 2.1276595744680852e-07, "loss": 1.6008, "step": 20 }, { "epoch": 0.005585106382978724, "grad_norm": 13.033509254455566, "learning_rate": 2.2340425531914897e-07, "loss": 1.4679, "step": 21 }, { "epoch": 0.005851063829787234, "grad_norm": 11.98855972290039, "learning_rate": 2.3404255319148937e-07, "loss": 1.5049, "step": 22 }, { "epoch": 0.006117021276595745, "grad_norm": 13.161596298217773, "learning_rate": 2.446808510638298e-07, "loss": 1.5114, "step": 23 }, { "epoch": 0.006382978723404255, "grad_norm": 12.387269020080566, "learning_rate": 2.553191489361702e-07, "loss": 1.3019, "step": 24 }, { "epoch": 0.006648936170212766, "grad_norm": 10.667431831359863, "learning_rate": 2.6595744680851066e-07, "loss": 1.3113, "step": 25 }, { "epoch": 0.006914893617021276, "grad_norm": 11.682806015014648, "learning_rate": 2.7659574468085106e-07, "loss": 1.627, "step": 26 }, { "epoch": 0.007180851063829787, "grad_norm": 11.338486671447754, "learning_rate": 2.872340425531915e-07, "loss": 1.6309, "step": 27 }, { "epoch": 0.007446808510638298, "grad_norm": 12.796504020690918, "learning_rate": 2.9787234042553196e-07, "loss": 1.4464, "step": 28 }, { "epoch": 0.007712765957446808, "grad_norm": 12.2352876663208, "learning_rate": 3.0851063829787236e-07, "loss": 1.5748, "step": 29 }, { "epoch": 0.007978723404255319, "grad_norm": 10.04947566986084, "learning_rate": 3.1914893617021275e-07, "loss": 1.3302, "step": 30 }, { "epoch": 0.00824468085106383, "grad_norm": 11.51389217376709, "learning_rate": 3.297872340425532e-07, "loss": 1.3543, "step": 31 }, { "epoch": 0.00851063829787234, "grad_norm": 9.522992134094238, "learning_rate": 3.4042553191489365e-07, "loss": 1.4485, "step": 32 }, { "epoch": 0.008776595744680852, "grad_norm": 8.156554222106934, "learning_rate": 3.510638297872341e-07, "loss": 1.3791, "step": 33 }, { "epoch": 0.009042553191489361, "grad_norm": 10.546247482299805, "learning_rate": 3.6170212765957445e-07, "loss": 1.6197, "step": 34 }, { "epoch": 0.009308510638297872, "grad_norm": 8.094082832336426, "learning_rate": 3.723404255319149e-07, "loss": 1.2722, "step": 35 }, { "epoch": 0.009574468085106383, "grad_norm": 7.64621114730835, "learning_rate": 3.8297872340425535e-07, "loss": 1.2489, "step": 36 }, { "epoch": 0.009840425531914894, "grad_norm": 7.087127208709717, "learning_rate": 3.936170212765958e-07, "loss": 1.3383, "step": 37 }, { "epoch": 0.010106382978723405, "grad_norm": 7.989037990570068, "learning_rate": 4.0425531914893625e-07, "loss": 1.2275, "step": 38 }, { "epoch": 0.010372340425531914, "grad_norm": 9.057306289672852, "learning_rate": 4.148936170212766e-07, "loss": 1.4094, "step": 39 }, { "epoch": 0.010638297872340425, "grad_norm": 7.628477573394775, "learning_rate": 4.2553191489361704e-07, "loss": 1.3137, "step": 40 }, { "epoch": 0.010904255319148936, "grad_norm": 7.493610858917236, "learning_rate": 4.361702127659575e-07, "loss": 1.3603, "step": 41 }, { "epoch": 0.011170212765957447, "grad_norm": 6.819916725158691, "learning_rate": 4.4680851063829794e-07, "loss": 1.5013, "step": 42 }, { "epoch": 0.011436170212765957, "grad_norm": 7.222757339477539, "learning_rate": 4.574468085106383e-07, "loss": 1.4389, "step": 43 }, { "epoch": 0.011702127659574468, "grad_norm": 6.92927885055542, "learning_rate": 4.6808510638297873e-07, "loss": 1.386, "step": 44 }, { "epoch": 0.011968085106382979, "grad_norm": 6.100423336029053, "learning_rate": 4.787234042553192e-07, "loss": 1.3654, "step": 45 }, { "epoch": 0.01223404255319149, "grad_norm": 6.047520637512207, "learning_rate": 4.893617021276596e-07, "loss": 1.2467, "step": 46 }, { "epoch": 0.0125, "grad_norm": 6.429448127746582, "learning_rate": 5.000000000000001e-07, "loss": 1.2826, "step": 47 }, { "epoch": 0.01276595744680851, "grad_norm": 6.81625509262085, "learning_rate": 5.106382978723404e-07, "loss": 1.4576, "step": 48 }, { "epoch": 0.013031914893617021, "grad_norm": 5.9020609855651855, "learning_rate": 5.212765957446809e-07, "loss": 1.2929, "step": 49 }, { "epoch": 0.013297872340425532, "grad_norm": 6.343348979949951, "learning_rate": 5.319148936170213e-07, "loss": 1.4692, "step": 50 }, { "epoch": 0.013563829787234043, "grad_norm": 6.274758338928223, "learning_rate": 5.425531914893618e-07, "loss": 1.3331, "step": 51 }, { "epoch": 0.013829787234042552, "grad_norm": 6.188233852386475, "learning_rate": 5.531914893617021e-07, "loss": 1.4061, "step": 52 }, { "epoch": 0.014095744680851063, "grad_norm": 6.108701705932617, "learning_rate": 5.638297872340426e-07, "loss": 1.2786, "step": 53 }, { "epoch": 0.014361702127659574, "grad_norm": 6.032108306884766, "learning_rate": 5.74468085106383e-07, "loss": 1.3159, "step": 54 }, { "epoch": 0.014627659574468085, "grad_norm": 6.019993305206299, "learning_rate": 5.851063829787235e-07, "loss": 1.3846, "step": 55 }, { "epoch": 0.014893617021276596, "grad_norm": 6.405829906463623, "learning_rate": 5.957446808510639e-07, "loss": 1.3691, "step": 56 }, { "epoch": 0.015159574468085106, "grad_norm": 6.517266273498535, "learning_rate": 6.063829787234043e-07, "loss": 1.416, "step": 57 }, { "epoch": 0.015425531914893617, "grad_norm": 5.831709861755371, "learning_rate": 6.170212765957447e-07, "loss": 1.3022, "step": 58 }, { "epoch": 0.015691489361702126, "grad_norm": 6.413986682891846, "learning_rate": 6.276595744680851e-07, "loss": 1.2001, "step": 59 }, { "epoch": 0.015957446808510637, "grad_norm": 5.887234687805176, "learning_rate": 6.382978723404255e-07, "loss": 1.301, "step": 60 }, { "epoch": 0.016223404255319148, "grad_norm": 6.500317573547363, "learning_rate": 6.48936170212766e-07, "loss": 1.2389, "step": 61 }, { "epoch": 0.01648936170212766, "grad_norm": 5.423646450042725, "learning_rate": 6.595744680851064e-07, "loss": 1.1179, "step": 62 }, { "epoch": 0.01675531914893617, "grad_norm": 6.422118663787842, "learning_rate": 6.702127659574469e-07, "loss": 1.2685, "step": 63 }, { "epoch": 0.01702127659574468, "grad_norm": 6.100841999053955, "learning_rate": 6.808510638297873e-07, "loss": 1.3432, "step": 64 }, { "epoch": 0.017287234042553192, "grad_norm": 6.879647254943848, "learning_rate": 6.914893617021278e-07, "loss": 1.4595, "step": 65 }, { "epoch": 0.017553191489361703, "grad_norm": 5.739667892456055, "learning_rate": 7.021276595744682e-07, "loss": 1.254, "step": 66 }, { "epoch": 0.017819148936170214, "grad_norm": 5.58401346206665, "learning_rate": 7.127659574468087e-07, "loss": 1.275, "step": 67 }, { "epoch": 0.018085106382978722, "grad_norm": 5.75786018371582, "learning_rate": 7.234042553191489e-07, "loss": 1.2797, "step": 68 }, { "epoch": 0.018351063829787233, "grad_norm": 5.23975133895874, "learning_rate": 7.340425531914893e-07, "loss": 1.2314, "step": 69 }, { "epoch": 0.018617021276595744, "grad_norm": 5.783809661865234, "learning_rate": 7.446808510638298e-07, "loss": 1.2621, "step": 70 }, { "epoch": 0.018882978723404255, "grad_norm": 6.303256988525391, "learning_rate": 7.553191489361702e-07, "loss": 1.2988, "step": 71 }, { "epoch": 0.019148936170212766, "grad_norm": 6.035338401794434, "learning_rate": 7.659574468085107e-07, "loss": 1.3572, "step": 72 }, { "epoch": 0.019414893617021277, "grad_norm": 5.458433628082275, "learning_rate": 7.765957446808511e-07, "loss": 1.2515, "step": 73 }, { "epoch": 0.019680851063829788, "grad_norm": 5.706748008728027, "learning_rate": 7.872340425531916e-07, "loss": 1.2144, "step": 74 }, { "epoch": 0.0199468085106383, "grad_norm": 5.4996018409729, "learning_rate": 7.97872340425532e-07, "loss": 1.2999, "step": 75 }, { "epoch": 0.02021276595744681, "grad_norm": 5.666746139526367, "learning_rate": 8.085106382978725e-07, "loss": 1.2947, "step": 76 }, { "epoch": 0.020478723404255317, "grad_norm": 5.446689128875732, "learning_rate": 8.191489361702127e-07, "loss": 1.4081, "step": 77 }, { "epoch": 0.02074468085106383, "grad_norm": 5.886783123016357, "learning_rate": 8.297872340425532e-07, "loss": 1.5147, "step": 78 }, { "epoch": 0.02101063829787234, "grad_norm": 5.839478969573975, "learning_rate": 8.404255319148936e-07, "loss": 1.3047, "step": 79 }, { "epoch": 0.02127659574468085, "grad_norm": 5.6594767570495605, "learning_rate": 8.510638297872341e-07, "loss": 1.3499, "step": 80 }, { "epoch": 0.02154255319148936, "grad_norm": 5.712738990783691, "learning_rate": 8.617021276595745e-07, "loss": 1.2731, "step": 81 }, { "epoch": 0.021808510638297873, "grad_norm": 5.7129316329956055, "learning_rate": 8.72340425531915e-07, "loss": 1.2454, "step": 82 }, { "epoch": 0.022074468085106384, "grad_norm": 5.676748275756836, "learning_rate": 8.829787234042554e-07, "loss": 1.4916, "step": 83 }, { "epoch": 0.022340425531914895, "grad_norm": 5.481147289276123, "learning_rate": 8.936170212765959e-07, "loss": 1.3493, "step": 84 }, { "epoch": 0.022606382978723406, "grad_norm": 5.774475574493408, "learning_rate": 9.042553191489363e-07, "loss": 1.2583, "step": 85 }, { "epoch": 0.022872340425531913, "grad_norm": 6.059263229370117, "learning_rate": 9.148936170212766e-07, "loss": 1.2257, "step": 86 }, { "epoch": 0.023138297872340424, "grad_norm": 5.5594258308410645, "learning_rate": 9.25531914893617e-07, "loss": 1.3313, "step": 87 }, { "epoch": 0.023404255319148935, "grad_norm": 5.335761070251465, "learning_rate": 9.361702127659575e-07, "loss": 1.221, "step": 88 }, { "epoch": 0.023670212765957446, "grad_norm": 5.275820255279541, "learning_rate": 9.468085106382979e-07, "loss": 1.315, "step": 89 }, { "epoch": 0.023936170212765957, "grad_norm": 5.96125602722168, "learning_rate": 9.574468085106384e-07, "loss": 1.2792, "step": 90 }, { "epoch": 0.02420212765957447, "grad_norm": 5.549777984619141, "learning_rate": 9.680851063829788e-07, "loss": 1.2194, "step": 91 }, { "epoch": 0.02446808510638298, "grad_norm": 5.814997673034668, "learning_rate": 9.787234042553193e-07, "loss": 1.2917, "step": 92 }, { "epoch": 0.02473404255319149, "grad_norm": 5.332813739776611, "learning_rate": 9.893617021276597e-07, "loss": 1.2458, "step": 93 }, { "epoch": 0.025, "grad_norm": 5.473198890686035, "learning_rate": 1.0000000000000002e-06, "loss": 1.2752, "step": 94 }, { "epoch": 0.02526595744680851, "grad_norm": 5.484592914581299, "learning_rate": 1.0106382978723404e-06, "loss": 1.3052, "step": 95 }, { "epoch": 0.02553191489361702, "grad_norm": 6.4860453605651855, "learning_rate": 1.0212765957446809e-06, "loss": 1.4454, "step": 96 }, { "epoch": 0.02579787234042553, "grad_norm": 5.582982540130615, "learning_rate": 1.0319148936170213e-06, "loss": 1.2514, "step": 97 }, { "epoch": 0.026063829787234042, "grad_norm": 5.618495464324951, "learning_rate": 1.0425531914893618e-06, "loss": 1.4123, "step": 98 }, { "epoch": 0.026329787234042553, "grad_norm": 5.169803619384766, "learning_rate": 1.0531914893617022e-06, "loss": 1.3128, "step": 99 }, { "epoch": 0.026595744680851064, "grad_norm": 5.215284824371338, "learning_rate": 1.0638297872340427e-06, "loss": 1.4286, "step": 100 }, { "epoch": 0.026861702127659575, "grad_norm": 5.888491153717041, "learning_rate": 1.074468085106383e-06, "loss": 1.2953, "step": 101 }, { "epoch": 0.027127659574468086, "grad_norm": 5.597144603729248, "learning_rate": 1.0851063829787236e-06, "loss": 1.2401, "step": 102 }, { "epoch": 0.027393617021276597, "grad_norm": 5.215080261230469, "learning_rate": 1.095744680851064e-06, "loss": 1.1961, "step": 103 }, { "epoch": 0.027659574468085105, "grad_norm": 5.162172794342041, "learning_rate": 1.1063829787234042e-06, "loss": 1.2641, "step": 104 }, { "epoch": 0.027925531914893616, "grad_norm": 5.490815162658691, "learning_rate": 1.1170212765957447e-06, "loss": 1.1788, "step": 105 }, { "epoch": 0.028191489361702127, "grad_norm": 5.236513137817383, "learning_rate": 1.1276595744680851e-06, "loss": 1.3241, "step": 106 }, { "epoch": 0.028457446808510638, "grad_norm": 5.335816860198975, "learning_rate": 1.1382978723404256e-06, "loss": 1.299, "step": 107 }, { "epoch": 0.02872340425531915, "grad_norm": 5.176724910736084, "learning_rate": 1.148936170212766e-06, "loss": 1.3305, "step": 108 }, { "epoch": 0.02898936170212766, "grad_norm": 6.114458084106445, "learning_rate": 1.1595744680851065e-06, "loss": 1.3005, "step": 109 }, { "epoch": 0.02925531914893617, "grad_norm": 5.407876491546631, "learning_rate": 1.170212765957447e-06, "loss": 1.2806, "step": 110 }, { "epoch": 0.029521276595744682, "grad_norm": 4.949467658996582, "learning_rate": 1.1808510638297874e-06, "loss": 1.2961, "step": 111 }, { "epoch": 0.029787234042553193, "grad_norm": 6.091759204864502, "learning_rate": 1.1914893617021278e-06, "loss": 1.3533, "step": 112 }, { "epoch": 0.0300531914893617, "grad_norm": 6.605318069458008, "learning_rate": 1.202127659574468e-06, "loss": 1.3292, "step": 113 }, { "epoch": 0.03031914893617021, "grad_norm": 5.556684494018555, "learning_rate": 1.2127659574468085e-06, "loss": 1.2438, "step": 114 }, { "epoch": 0.030585106382978722, "grad_norm": 5.465230941772461, "learning_rate": 1.223404255319149e-06, "loss": 1.2679, "step": 115 }, { "epoch": 0.030851063829787233, "grad_norm": 5.770520210266113, "learning_rate": 1.2340425531914894e-06, "loss": 1.355, "step": 116 }, { "epoch": 0.031117021276595744, "grad_norm": 5.495830535888672, "learning_rate": 1.2446808510638299e-06, "loss": 1.2153, "step": 117 }, { "epoch": 0.03138297872340425, "grad_norm": 5.549342632293701, "learning_rate": 1.2553191489361701e-06, "loss": 1.3283, "step": 118 }, { "epoch": 0.03164893617021276, "grad_norm": 5.871270656585693, "learning_rate": 1.2659574468085106e-06, "loss": 1.2485, "step": 119 }, { "epoch": 0.031914893617021274, "grad_norm": 5.074721813201904, "learning_rate": 1.276595744680851e-06, "loss": 1.2725, "step": 120 }, { "epoch": 0.032180851063829785, "grad_norm": 5.2500715255737305, "learning_rate": 1.2872340425531915e-06, "loss": 1.1767, "step": 121 }, { "epoch": 0.032446808510638296, "grad_norm": 5.220420837402344, "learning_rate": 1.297872340425532e-06, "loss": 1.2566, "step": 122 }, { "epoch": 0.03271276595744681, "grad_norm": 5.691092014312744, "learning_rate": 1.3085106382978724e-06, "loss": 1.1828, "step": 123 }, { "epoch": 0.03297872340425532, "grad_norm": 5.540714740753174, "learning_rate": 1.3191489361702128e-06, "loss": 1.4373, "step": 124 }, { "epoch": 0.03324468085106383, "grad_norm": 5.538027286529541, "learning_rate": 1.3297872340425533e-06, "loss": 1.2955, "step": 125 }, { "epoch": 0.03351063829787234, "grad_norm": 5.601515769958496, "learning_rate": 1.3404255319148937e-06, "loss": 1.4246, "step": 126 }, { "epoch": 0.03377659574468085, "grad_norm": 5.398896217346191, "learning_rate": 1.3510638297872342e-06, "loss": 1.2479, "step": 127 }, { "epoch": 0.03404255319148936, "grad_norm": 5.281778335571289, "learning_rate": 1.3617021276595746e-06, "loss": 1.4188, "step": 128 }, { "epoch": 0.03430851063829787, "grad_norm": 5.898463249206543, "learning_rate": 1.372340425531915e-06, "loss": 1.2214, "step": 129 }, { "epoch": 0.034574468085106384, "grad_norm": 5.390676975250244, "learning_rate": 1.3829787234042555e-06, "loss": 1.2872, "step": 130 }, { "epoch": 0.034840425531914895, "grad_norm": 5.157502174377441, "learning_rate": 1.393617021276596e-06, "loss": 1.2954, "step": 131 }, { "epoch": 0.035106382978723406, "grad_norm": 5.678062438964844, "learning_rate": 1.4042553191489364e-06, "loss": 1.2732, "step": 132 }, { "epoch": 0.03537234042553192, "grad_norm": 5.359380722045898, "learning_rate": 1.4148936170212769e-06, "loss": 1.2858, "step": 133 }, { "epoch": 0.03563829787234043, "grad_norm": 6.153907775878906, "learning_rate": 1.4255319148936173e-06, "loss": 1.3225, "step": 134 }, { "epoch": 0.03590425531914894, "grad_norm": 5.03823709487915, "learning_rate": 1.4361702127659578e-06, "loss": 1.196, "step": 135 }, { "epoch": 0.036170212765957444, "grad_norm": 5.12296199798584, "learning_rate": 1.4468085106382978e-06, "loss": 1.1534, "step": 136 }, { "epoch": 0.036436170212765955, "grad_norm": 5.526867866516113, "learning_rate": 1.4574468085106382e-06, "loss": 1.3099, "step": 137 }, { "epoch": 0.036702127659574466, "grad_norm": 5.23512601852417, "learning_rate": 1.4680851063829787e-06, "loss": 1.167, "step": 138 }, { "epoch": 0.03696808510638298, "grad_norm": 5.28326940536499, "learning_rate": 1.4787234042553191e-06, "loss": 1.2882, "step": 139 }, { "epoch": 0.03723404255319149, "grad_norm": 6.0062336921691895, "learning_rate": 1.4893617021276596e-06, "loss": 1.2937, "step": 140 }, { "epoch": 0.0375, "grad_norm": 5.471292495727539, "learning_rate": 1.5e-06, "loss": 1.2783, "step": 141 }, { "epoch": 0.03776595744680851, "grad_norm": 4.784001350402832, "learning_rate": 1.5106382978723405e-06, "loss": 1.1493, "step": 142 }, { "epoch": 0.03803191489361702, "grad_norm": 5.167656898498535, "learning_rate": 1.521276595744681e-06, "loss": 1.2872, "step": 143 }, { "epoch": 0.03829787234042553, "grad_norm": 5.2528276443481445, "learning_rate": 1.5319148936170214e-06, "loss": 1.2876, "step": 144 }, { "epoch": 0.03856382978723404, "grad_norm": 5.4960784912109375, "learning_rate": 1.5425531914893618e-06, "loss": 1.2364, "step": 145 }, { "epoch": 0.038829787234042554, "grad_norm": 5.419551372528076, "learning_rate": 1.5531914893617023e-06, "loss": 1.3695, "step": 146 }, { "epoch": 0.039095744680851065, "grad_norm": 5.1890974044799805, "learning_rate": 1.5638297872340427e-06, "loss": 1.2263, "step": 147 }, { "epoch": 0.039361702127659576, "grad_norm": 5.578823566436768, "learning_rate": 1.5744680851063832e-06, "loss": 1.2531, "step": 148 }, { "epoch": 0.03962765957446809, "grad_norm": 5.37275505065918, "learning_rate": 1.5851063829787236e-06, "loss": 1.2201, "step": 149 }, { "epoch": 0.0398936170212766, "grad_norm": 5.344025135040283, "learning_rate": 1.595744680851064e-06, "loss": 1.1419, "step": 150 }, { "epoch": 0.04015957446808511, "grad_norm": 5.697562217712402, "learning_rate": 1.6063829787234045e-06, "loss": 1.3923, "step": 151 }, { "epoch": 0.04042553191489362, "grad_norm": 5.420823097229004, "learning_rate": 1.617021276595745e-06, "loss": 1.2936, "step": 152 }, { "epoch": 0.04069148936170213, "grad_norm": 5.53727912902832, "learning_rate": 1.6276595744680854e-06, "loss": 1.2047, "step": 153 }, { "epoch": 0.040957446808510635, "grad_norm": 5.577879428863525, "learning_rate": 1.6382978723404255e-06, "loss": 1.2495, "step": 154 }, { "epoch": 0.041223404255319146, "grad_norm": 5.115095138549805, "learning_rate": 1.648936170212766e-06, "loss": 1.3324, "step": 155 }, { "epoch": 0.04148936170212766, "grad_norm": 5.6801862716674805, "learning_rate": 1.6595744680851064e-06, "loss": 1.3554, "step": 156 }, { "epoch": 0.04175531914893617, "grad_norm": 5.293743133544922, "learning_rate": 1.6702127659574468e-06, "loss": 1.2226, "step": 157 }, { "epoch": 0.04202127659574468, "grad_norm": 5.129601955413818, "learning_rate": 1.6808510638297873e-06, "loss": 1.3393, "step": 158 }, { "epoch": 0.04228723404255319, "grad_norm": 5.572645664215088, "learning_rate": 1.6914893617021277e-06, "loss": 1.2734, "step": 159 }, { "epoch": 0.0425531914893617, "grad_norm": 4.944756507873535, "learning_rate": 1.7021276595744682e-06, "loss": 1.3417, "step": 160 }, { "epoch": 0.04281914893617021, "grad_norm": 4.982651710510254, "learning_rate": 1.7127659574468086e-06, "loss": 1.2622, "step": 161 }, { "epoch": 0.04308510638297872, "grad_norm": 5.134377479553223, "learning_rate": 1.723404255319149e-06, "loss": 1.1741, "step": 162 }, { "epoch": 0.043351063829787234, "grad_norm": 4.829857349395752, "learning_rate": 1.7340425531914895e-06, "loss": 1.2298, "step": 163 }, { "epoch": 0.043617021276595745, "grad_norm": 5.052809715270996, "learning_rate": 1.74468085106383e-06, "loss": 1.1607, "step": 164 }, { "epoch": 0.043882978723404256, "grad_norm": 5.3465776443481445, "learning_rate": 1.7553191489361704e-06, "loss": 1.3924, "step": 165 }, { "epoch": 0.04414893617021277, "grad_norm": 5.502316951751709, "learning_rate": 1.7659574468085109e-06, "loss": 1.1488, "step": 166 }, { "epoch": 0.04441489361702128, "grad_norm": 5.253002643585205, "learning_rate": 1.7765957446808513e-06, "loss": 1.2004, "step": 167 }, { "epoch": 0.04468085106382979, "grad_norm": 5.437882900238037, "learning_rate": 1.7872340425531918e-06, "loss": 1.3885, "step": 168 }, { "epoch": 0.0449468085106383, "grad_norm": 5.526264190673828, "learning_rate": 1.7978723404255322e-06, "loss": 1.2351, "step": 169 }, { "epoch": 0.04521276595744681, "grad_norm": 5.078868389129639, "learning_rate": 1.8085106382978727e-06, "loss": 1.1479, "step": 170 }, { "epoch": 0.04547872340425532, "grad_norm": 5.379688739776611, "learning_rate": 1.8191489361702131e-06, "loss": 1.246, "step": 171 }, { "epoch": 0.045744680851063826, "grad_norm": 4.756881237030029, "learning_rate": 1.8297872340425531e-06, "loss": 1.3602, "step": 172 }, { "epoch": 0.04601063829787234, "grad_norm": 5.651166915893555, "learning_rate": 1.8404255319148936e-06, "loss": 1.1183, "step": 173 }, { "epoch": 0.04627659574468085, "grad_norm": 5.725973129272461, "learning_rate": 1.851063829787234e-06, "loss": 1.2474, "step": 174 }, { "epoch": 0.04654255319148936, "grad_norm": 4.994713306427002, "learning_rate": 1.8617021276595745e-06, "loss": 1.1945, "step": 175 }, { "epoch": 0.04680851063829787, "grad_norm": 4.701328277587891, "learning_rate": 1.872340425531915e-06, "loss": 1.2735, "step": 176 }, { "epoch": 0.04707446808510638, "grad_norm": 5.917819023132324, "learning_rate": 1.8829787234042554e-06, "loss": 1.2192, "step": 177 }, { "epoch": 0.04734042553191489, "grad_norm": 5.055963039398193, "learning_rate": 1.8936170212765958e-06, "loss": 1.4119, "step": 178 }, { "epoch": 0.047606382978723404, "grad_norm": 5.516870021820068, "learning_rate": 1.9042553191489363e-06, "loss": 1.2739, "step": 179 }, { "epoch": 0.047872340425531915, "grad_norm": 5.217896461486816, "learning_rate": 1.9148936170212767e-06, "loss": 1.0916, "step": 180 }, { "epoch": 0.048138297872340426, "grad_norm": 5.3772807121276855, "learning_rate": 1.925531914893617e-06, "loss": 1.2636, "step": 181 }, { "epoch": 0.04840425531914894, "grad_norm": 5.261349678039551, "learning_rate": 1.9361702127659576e-06, "loss": 1.1872, "step": 182 }, { "epoch": 0.04867021276595745, "grad_norm": 5.209681510925293, "learning_rate": 1.946808510638298e-06, "loss": 1.1946, "step": 183 }, { "epoch": 0.04893617021276596, "grad_norm": 6.393560886383057, "learning_rate": 1.9574468085106385e-06, "loss": 1.4354, "step": 184 }, { "epoch": 0.04920212765957447, "grad_norm": 5.200966835021973, "learning_rate": 1.968085106382979e-06, "loss": 1.264, "step": 185 }, { "epoch": 0.04946808510638298, "grad_norm": 4.81060791015625, "learning_rate": 1.9787234042553194e-06, "loss": 1.345, "step": 186 }, { "epoch": 0.04973404255319149, "grad_norm": 5.786832332611084, "learning_rate": 1.98936170212766e-06, "loss": 1.2897, "step": 187 }, { "epoch": 0.05, "grad_norm": 5.332983493804932, "learning_rate": 2.0000000000000003e-06, "loss": 1.3621, "step": 188 }, { "epoch": 0.050265957446808514, "grad_norm": 5.093095779418945, "learning_rate": 2.0106382978723408e-06, "loss": 1.3366, "step": 189 }, { "epoch": 0.05053191489361702, "grad_norm": 5.604922771453857, "learning_rate": 2.021276595744681e-06, "loss": 1.2009, "step": 190 }, { "epoch": 0.05079787234042553, "grad_norm": 5.312707901000977, "learning_rate": 2.0319148936170213e-06, "loss": 1.1604, "step": 191 }, { "epoch": 0.05106382978723404, "grad_norm": 5.330122470855713, "learning_rate": 2.0425531914893617e-06, "loss": 1.2102, "step": 192 }, { "epoch": 0.05132978723404255, "grad_norm": 5.350152015686035, "learning_rate": 2.053191489361702e-06, "loss": 1.3483, "step": 193 }, { "epoch": 0.05159574468085106, "grad_norm": 5.540630340576172, "learning_rate": 2.0638297872340426e-06, "loss": 1.437, "step": 194 }, { "epoch": 0.05186170212765957, "grad_norm": 4.698929309844971, "learning_rate": 2.074468085106383e-06, "loss": 1.2083, "step": 195 }, { "epoch": 0.052127659574468084, "grad_norm": 5.128317356109619, "learning_rate": 2.0851063829787235e-06, "loss": 1.1502, "step": 196 }, { "epoch": 0.052393617021276595, "grad_norm": 5.425604343414307, "learning_rate": 2.095744680851064e-06, "loss": 1.2919, "step": 197 }, { "epoch": 0.052659574468085106, "grad_norm": 5.3685712814331055, "learning_rate": 2.1063829787234044e-06, "loss": 1.2305, "step": 198 }, { "epoch": 0.05292553191489362, "grad_norm": 6.010136127471924, "learning_rate": 2.117021276595745e-06, "loss": 1.0582, "step": 199 }, { "epoch": 0.05319148936170213, "grad_norm": 5.427469253540039, "learning_rate": 2.1276595744680853e-06, "loss": 1.2515, "step": 200 }, { "epoch": 0.05345744680851064, "grad_norm": 5.31635856628418, "learning_rate": 2.1382978723404258e-06, "loss": 1.2157, "step": 201 }, { "epoch": 0.05372340425531915, "grad_norm": 5.334502220153809, "learning_rate": 2.148936170212766e-06, "loss": 1.271, "step": 202 }, { "epoch": 0.05398936170212766, "grad_norm": 4.88215970993042, "learning_rate": 2.1595744680851067e-06, "loss": 1.2777, "step": 203 }, { "epoch": 0.05425531914893617, "grad_norm": 5.919299602508545, "learning_rate": 2.170212765957447e-06, "loss": 1.3336, "step": 204 }, { "epoch": 0.05452127659574468, "grad_norm": 5.037824630737305, "learning_rate": 2.1808510638297876e-06, "loss": 1.316, "step": 205 }, { "epoch": 0.054787234042553194, "grad_norm": 5.16343879699707, "learning_rate": 2.191489361702128e-06, "loss": 1.2724, "step": 206 }, { "epoch": 0.055053191489361705, "grad_norm": 5.36834192276001, "learning_rate": 2.2021276595744685e-06, "loss": 1.1693, "step": 207 }, { "epoch": 0.05531914893617021, "grad_norm": 4.99350118637085, "learning_rate": 2.2127659574468085e-06, "loss": 1.225, "step": 208 }, { "epoch": 0.05558510638297872, "grad_norm": 5.564612865447998, "learning_rate": 2.223404255319149e-06, "loss": 1.2125, "step": 209 }, { "epoch": 0.05585106382978723, "grad_norm": 5.21875, "learning_rate": 2.2340425531914894e-06, "loss": 1.3788, "step": 210 }, { "epoch": 0.05611702127659574, "grad_norm": 5.006836891174316, "learning_rate": 2.24468085106383e-06, "loss": 1.2095, "step": 211 }, { "epoch": 0.05638297872340425, "grad_norm": 5.6003546714782715, "learning_rate": 2.2553191489361703e-06, "loss": 1.3872, "step": 212 }, { "epoch": 0.056648936170212764, "grad_norm": 4.7773613929748535, "learning_rate": 2.2659574468085107e-06, "loss": 1.1979, "step": 213 }, { "epoch": 0.056914893617021275, "grad_norm": 4.554566860198975, "learning_rate": 2.276595744680851e-06, "loss": 1.1656, "step": 214 }, { "epoch": 0.057180851063829786, "grad_norm": 5.66951322555542, "learning_rate": 2.2872340425531916e-06, "loss": 1.3728, "step": 215 }, { "epoch": 0.0574468085106383, "grad_norm": 5.2931013107299805, "learning_rate": 2.297872340425532e-06, "loss": 1.2003, "step": 216 }, { "epoch": 0.05771276595744681, "grad_norm": 5.449213981628418, "learning_rate": 2.3085106382978725e-06, "loss": 1.2337, "step": 217 }, { "epoch": 0.05797872340425532, "grad_norm": 5.684970378875732, "learning_rate": 2.319148936170213e-06, "loss": 1.2196, "step": 218 }, { "epoch": 0.05824468085106383, "grad_norm": 5.038141250610352, "learning_rate": 2.3297872340425534e-06, "loss": 1.0954, "step": 219 }, { "epoch": 0.05851063829787234, "grad_norm": 5.255678176879883, "learning_rate": 2.340425531914894e-06, "loss": 1.3141, "step": 220 }, { "epoch": 0.05877659574468085, "grad_norm": 5.490760326385498, "learning_rate": 2.3510638297872343e-06, "loss": 1.1469, "step": 221 }, { "epoch": 0.059042553191489364, "grad_norm": 5.482240676879883, "learning_rate": 2.3617021276595748e-06, "loss": 1.2831, "step": 222 }, { "epoch": 0.059308510638297875, "grad_norm": 6.045271873474121, "learning_rate": 2.3723404255319152e-06, "loss": 1.1601, "step": 223 }, { "epoch": 0.059574468085106386, "grad_norm": 5.145684719085693, "learning_rate": 2.3829787234042557e-06, "loss": 1.1432, "step": 224 }, { "epoch": 0.0598404255319149, "grad_norm": 4.948934555053711, "learning_rate": 2.393617021276596e-06, "loss": 1.1199, "step": 225 }, { "epoch": 0.0601063829787234, "grad_norm": 5.273087978363037, "learning_rate": 2.404255319148936e-06, "loss": 1.3225, "step": 226 }, { "epoch": 0.06037234042553191, "grad_norm": 5.76677131652832, "learning_rate": 2.4148936170212766e-06, "loss": 1.3144, "step": 227 }, { "epoch": 0.06063829787234042, "grad_norm": 5.51316499710083, "learning_rate": 2.425531914893617e-06, "loss": 1.2931, "step": 228 }, { "epoch": 0.060904255319148934, "grad_norm": 5.077220916748047, "learning_rate": 2.4361702127659575e-06, "loss": 1.1972, "step": 229 }, { "epoch": 0.061170212765957445, "grad_norm": 5.733246803283691, "learning_rate": 2.446808510638298e-06, "loss": 1.2773, "step": 230 }, { "epoch": 0.061436170212765956, "grad_norm": 4.702721118927002, "learning_rate": 2.4574468085106384e-06, "loss": 1.2654, "step": 231 }, { "epoch": 0.06170212765957447, "grad_norm": 5.210516452789307, "learning_rate": 2.468085106382979e-06, "loss": 1.3222, "step": 232 }, { "epoch": 0.06196808510638298, "grad_norm": 5.6721720695495605, "learning_rate": 2.4787234042553193e-06, "loss": 1.1756, "step": 233 }, { "epoch": 0.06223404255319149, "grad_norm": 4.598169326782227, "learning_rate": 2.4893617021276598e-06, "loss": 1.2613, "step": 234 }, { "epoch": 0.0625, "grad_norm": 5.069137096405029, "learning_rate": 2.5e-06, "loss": 1.2629, "step": 235 }, { "epoch": 0.0627659574468085, "grad_norm": 4.875532627105713, "learning_rate": 2.5106382978723402e-06, "loss": 1.1515, "step": 236 }, { "epoch": 0.06303191489361702, "grad_norm": 5.547458171844482, "learning_rate": 2.521276595744681e-06, "loss": 1.4157, "step": 237 }, { "epoch": 0.06329787234042553, "grad_norm": 5.377124786376953, "learning_rate": 2.531914893617021e-06, "loss": 1.3036, "step": 238 }, { "epoch": 0.06356382978723404, "grad_norm": 5.135563850402832, "learning_rate": 2.542553191489362e-06, "loss": 1.1638, "step": 239 }, { "epoch": 0.06382978723404255, "grad_norm": 5.6008172035217285, "learning_rate": 2.553191489361702e-06, "loss": 1.2787, "step": 240 }, { "epoch": 0.06409574468085107, "grad_norm": 5.453914165496826, "learning_rate": 2.563829787234043e-06, "loss": 1.3239, "step": 241 }, { "epoch": 0.06436170212765957, "grad_norm": 5.219985485076904, "learning_rate": 2.574468085106383e-06, "loss": 1.0942, "step": 242 }, { "epoch": 0.06462765957446809, "grad_norm": 5.180700778961182, "learning_rate": 2.585106382978724e-06, "loss": 1.1501, "step": 243 }, { "epoch": 0.06489361702127659, "grad_norm": 5.2240071296691895, "learning_rate": 2.595744680851064e-06, "loss": 1.2269, "step": 244 }, { "epoch": 0.06515957446808511, "grad_norm": 6.328047275543213, "learning_rate": 2.6063829787234047e-06, "loss": 1.405, "step": 245 }, { "epoch": 0.06542553191489361, "grad_norm": 5.10886287689209, "learning_rate": 2.6170212765957447e-06, "loss": 1.2698, "step": 246 }, { "epoch": 0.06569148936170213, "grad_norm": 5.45538330078125, "learning_rate": 2.6276595744680856e-06, "loss": 1.33, "step": 247 }, { "epoch": 0.06595744680851064, "grad_norm": 5.294386386871338, "learning_rate": 2.6382978723404256e-06, "loss": 1.2895, "step": 248 }, { "epoch": 0.06622340425531915, "grad_norm": 4.7668776512146, "learning_rate": 2.6489361702127665e-06, "loss": 1.1176, "step": 249 }, { "epoch": 0.06648936170212766, "grad_norm": 4.915814399719238, "learning_rate": 2.6595744680851065e-06, "loss": 1.2469, "step": 250 }, { "epoch": 0.06675531914893618, "grad_norm": 5.320147514343262, "learning_rate": 2.6702127659574474e-06, "loss": 1.4904, "step": 251 }, { "epoch": 0.06702127659574468, "grad_norm": 5.417577266693115, "learning_rate": 2.6808510638297874e-06, "loss": 1.3166, "step": 252 }, { "epoch": 0.0672872340425532, "grad_norm": 4.704782485961914, "learning_rate": 2.6914893617021283e-06, "loss": 1.2362, "step": 253 }, { "epoch": 0.0675531914893617, "grad_norm": 5.100544452667236, "learning_rate": 2.7021276595744683e-06, "loss": 1.2969, "step": 254 }, { "epoch": 0.0678191489361702, "grad_norm": 6.336488723754883, "learning_rate": 2.7127659574468084e-06, "loss": 1.2708, "step": 255 }, { "epoch": 0.06808510638297872, "grad_norm": 5.281217098236084, "learning_rate": 2.7234042553191492e-06, "loss": 1.3103, "step": 256 }, { "epoch": 0.06835106382978723, "grad_norm": 5.127480983734131, "learning_rate": 2.7340425531914893e-06, "loss": 1.2957, "step": 257 }, { "epoch": 0.06861702127659575, "grad_norm": 5.289313316345215, "learning_rate": 2.74468085106383e-06, "loss": 1.2658, "step": 258 }, { "epoch": 0.06888297872340425, "grad_norm": 5.088155746459961, "learning_rate": 2.75531914893617e-06, "loss": 1.1359, "step": 259 }, { "epoch": 0.06914893617021277, "grad_norm": 5.367323875427246, "learning_rate": 2.765957446808511e-06, "loss": 1.2408, "step": 260 }, { "epoch": 0.06941489361702127, "grad_norm": 5.337047576904297, "learning_rate": 2.776595744680851e-06, "loss": 1.2908, "step": 261 }, { "epoch": 0.06968085106382979, "grad_norm": 5.167153358459473, "learning_rate": 2.787234042553192e-06, "loss": 1.3217, "step": 262 }, { "epoch": 0.0699468085106383, "grad_norm": 5.522439956665039, "learning_rate": 2.797872340425532e-06, "loss": 1.2799, "step": 263 }, { "epoch": 0.07021276595744681, "grad_norm": 4.691408157348633, "learning_rate": 2.808510638297873e-06, "loss": 1.096, "step": 264 }, { "epoch": 0.07047872340425532, "grad_norm": 5.208773612976074, "learning_rate": 2.819148936170213e-06, "loss": 1.3215, "step": 265 }, { "epoch": 0.07074468085106383, "grad_norm": 5.4790496826171875, "learning_rate": 2.8297872340425537e-06, "loss": 1.4218, "step": 266 }, { "epoch": 0.07101063829787234, "grad_norm": 5.256765842437744, "learning_rate": 2.8404255319148938e-06, "loss": 1.4242, "step": 267 }, { "epoch": 0.07127659574468086, "grad_norm": 4.874395370483398, "learning_rate": 2.8510638297872346e-06, "loss": 1.2518, "step": 268 }, { "epoch": 0.07154255319148936, "grad_norm": 5.108527183532715, "learning_rate": 2.8617021276595747e-06, "loss": 1.2919, "step": 269 }, { "epoch": 0.07180851063829788, "grad_norm": 5.333227157592773, "learning_rate": 2.8723404255319155e-06, "loss": 1.459, "step": 270 }, { "epoch": 0.07207446808510638, "grad_norm": 5.232532501220703, "learning_rate": 2.8829787234042556e-06, "loss": 1.1832, "step": 271 }, { "epoch": 0.07234042553191489, "grad_norm": 5.147657871246338, "learning_rate": 2.8936170212765956e-06, "loss": 1.3219, "step": 272 }, { "epoch": 0.0726063829787234, "grad_norm": 5.002472400665283, "learning_rate": 2.9042553191489365e-06, "loss": 1.2989, "step": 273 }, { "epoch": 0.07287234042553191, "grad_norm": 4.903095722198486, "learning_rate": 2.9148936170212765e-06, "loss": 1.1621, "step": 274 }, { "epoch": 0.07313829787234043, "grad_norm": 5.269963264465332, "learning_rate": 2.9255319148936174e-06, "loss": 1.2966, "step": 275 }, { "epoch": 0.07340425531914893, "grad_norm": 5.356837749481201, "learning_rate": 2.9361702127659574e-06, "loss": 1.2455, "step": 276 }, { "epoch": 0.07367021276595745, "grad_norm": 5.510587215423584, "learning_rate": 2.9468085106382983e-06, "loss": 1.2386, "step": 277 }, { "epoch": 0.07393617021276595, "grad_norm": 5.7554755210876465, "learning_rate": 2.9574468085106383e-06, "loss": 1.3096, "step": 278 }, { "epoch": 0.07420212765957447, "grad_norm": 5.236169815063477, "learning_rate": 2.968085106382979e-06, "loss": 1.2496, "step": 279 }, { "epoch": 0.07446808510638298, "grad_norm": 4.870725631713867, "learning_rate": 2.978723404255319e-06, "loss": 1.083, "step": 280 }, { "epoch": 0.0747340425531915, "grad_norm": 5.181726455688477, "learning_rate": 2.98936170212766e-06, "loss": 1.223, "step": 281 }, { "epoch": 0.075, "grad_norm": 4.924530506134033, "learning_rate": 3e-06, "loss": 1.2855, "step": 282 }, { "epoch": 0.07526595744680852, "grad_norm": 5.177605628967285, "learning_rate": 3.010638297872341e-06, "loss": 1.2215, "step": 283 }, { "epoch": 0.07553191489361702, "grad_norm": 4.895737648010254, "learning_rate": 3.021276595744681e-06, "loss": 1.2451, "step": 284 }, { "epoch": 0.07579787234042554, "grad_norm": 5.425995349884033, "learning_rate": 3.031914893617022e-06, "loss": 1.6053, "step": 285 }, { "epoch": 0.07606382978723404, "grad_norm": 5.228978157043457, "learning_rate": 3.042553191489362e-06, "loss": 1.1846, "step": 286 }, { "epoch": 0.07632978723404256, "grad_norm": 4.825231552124023, "learning_rate": 3.0531914893617027e-06, "loss": 1.1355, "step": 287 }, { "epoch": 0.07659574468085106, "grad_norm": 6.309840679168701, "learning_rate": 3.0638297872340428e-06, "loss": 1.1388, "step": 288 }, { "epoch": 0.07686170212765958, "grad_norm": 5.012725830078125, "learning_rate": 3.0744680851063836e-06, "loss": 0.9926, "step": 289 }, { "epoch": 0.07712765957446809, "grad_norm": 5.028249263763428, "learning_rate": 3.0851063829787237e-06, "loss": 1.2024, "step": 290 }, { "epoch": 0.07739361702127659, "grad_norm": 5.77925968170166, "learning_rate": 3.0957446808510637e-06, "loss": 1.5436, "step": 291 }, { "epoch": 0.07765957446808511, "grad_norm": 5.277095794677734, "learning_rate": 3.1063829787234046e-06, "loss": 1.2018, "step": 292 }, { "epoch": 0.07792553191489361, "grad_norm": 5.4600958824157715, "learning_rate": 3.1170212765957446e-06, "loss": 1.072, "step": 293 }, { "epoch": 0.07819148936170213, "grad_norm": 5.168891906738281, "learning_rate": 3.1276595744680855e-06, "loss": 1.3841, "step": 294 }, { "epoch": 0.07845744680851063, "grad_norm": 4.869060516357422, "learning_rate": 3.1382978723404255e-06, "loss": 1.1663, "step": 295 }, { "epoch": 0.07872340425531915, "grad_norm": 5.289313316345215, "learning_rate": 3.1489361702127664e-06, "loss": 1.0781, "step": 296 }, { "epoch": 0.07898936170212766, "grad_norm": 5.145017147064209, "learning_rate": 3.1595744680851064e-06, "loss": 1.1087, "step": 297 }, { "epoch": 0.07925531914893617, "grad_norm": 5.634250640869141, "learning_rate": 3.1702127659574473e-06, "loss": 1.3936, "step": 298 }, { "epoch": 0.07952127659574468, "grad_norm": 5.201961040496826, "learning_rate": 3.1808510638297873e-06, "loss": 1.3752, "step": 299 }, { "epoch": 0.0797872340425532, "grad_norm": 5.372065544128418, "learning_rate": 3.191489361702128e-06, "loss": 1.1715, "step": 300 }, { "epoch": 0.0800531914893617, "grad_norm": 6.010387420654297, "learning_rate": 3.202127659574468e-06, "loss": 1.2187, "step": 301 }, { "epoch": 0.08031914893617022, "grad_norm": 5.143375396728516, "learning_rate": 3.212765957446809e-06, "loss": 1.2051, "step": 302 }, { "epoch": 0.08058510638297872, "grad_norm": 5.376684665679932, "learning_rate": 3.223404255319149e-06, "loss": 1.2319, "step": 303 }, { "epoch": 0.08085106382978724, "grad_norm": 4.905093193054199, "learning_rate": 3.23404255319149e-06, "loss": 1.2187, "step": 304 }, { "epoch": 0.08111702127659574, "grad_norm": 5.650513648986816, "learning_rate": 3.24468085106383e-06, "loss": 1.1528, "step": 305 }, { "epoch": 0.08138297872340426, "grad_norm": 5.2889227867126465, "learning_rate": 3.255319148936171e-06, "loss": 1.0795, "step": 306 }, { "epoch": 0.08164893617021277, "grad_norm": 5.284914970397949, "learning_rate": 3.265957446808511e-06, "loss": 1.2885, "step": 307 }, { "epoch": 0.08191489361702127, "grad_norm": 5.4190449714660645, "learning_rate": 3.276595744680851e-06, "loss": 1.4991, "step": 308 }, { "epoch": 0.08218085106382979, "grad_norm": 4.965026378631592, "learning_rate": 3.287234042553192e-06, "loss": 1.2674, "step": 309 }, { "epoch": 0.08244680851063829, "grad_norm": 5.040426254272461, "learning_rate": 3.297872340425532e-06, "loss": 1.2347, "step": 310 }, { "epoch": 0.08271276595744681, "grad_norm": 5.759904384613037, "learning_rate": 3.3085106382978727e-06, "loss": 1.2976, "step": 311 }, { "epoch": 0.08297872340425531, "grad_norm": 4.893044471740723, "learning_rate": 3.3191489361702127e-06, "loss": 1.213, "step": 312 }, { "epoch": 0.08324468085106383, "grad_norm": 4.674813270568848, "learning_rate": 3.3297872340425536e-06, "loss": 1.2795, "step": 313 }, { "epoch": 0.08351063829787234, "grad_norm": 5.59810209274292, "learning_rate": 3.3404255319148936e-06, "loss": 1.2338, "step": 314 }, { "epoch": 0.08377659574468085, "grad_norm": 4.63198709487915, "learning_rate": 3.3510638297872345e-06, "loss": 1.2026, "step": 315 }, { "epoch": 0.08404255319148936, "grad_norm": 5.4756245613098145, "learning_rate": 3.3617021276595745e-06, "loss": 1.2838, "step": 316 }, { "epoch": 0.08430851063829788, "grad_norm": 5.258046627044678, "learning_rate": 3.3723404255319154e-06, "loss": 1.1449, "step": 317 }, { "epoch": 0.08457446808510638, "grad_norm": 5.205422878265381, "learning_rate": 3.3829787234042554e-06, "loss": 1.223, "step": 318 }, { "epoch": 0.0848404255319149, "grad_norm": 5.365026473999023, "learning_rate": 3.3936170212765963e-06, "loss": 1.191, "step": 319 }, { "epoch": 0.0851063829787234, "grad_norm": 5.367187023162842, "learning_rate": 3.4042553191489363e-06, "loss": 1.2246, "step": 320 }, { "epoch": 0.08537234042553192, "grad_norm": 5.512171745300293, "learning_rate": 3.414893617021277e-06, "loss": 1.2601, "step": 321 }, { "epoch": 0.08563829787234042, "grad_norm": 5.804540157318115, "learning_rate": 3.4255319148936172e-06, "loss": 1.1537, "step": 322 }, { "epoch": 0.08590425531914894, "grad_norm": 5.474178791046143, "learning_rate": 3.436170212765958e-06, "loss": 1.3175, "step": 323 }, { "epoch": 0.08617021276595745, "grad_norm": 5.454108715057373, "learning_rate": 3.446808510638298e-06, "loss": 1.1764, "step": 324 }, { "epoch": 0.08643617021276596, "grad_norm": 5.368601322174072, "learning_rate": 3.457446808510639e-06, "loss": 1.2001, "step": 325 }, { "epoch": 0.08670212765957447, "grad_norm": 5.19401741027832, "learning_rate": 3.468085106382979e-06, "loss": 1.2673, "step": 326 }, { "epoch": 0.08696808510638297, "grad_norm": 4.70231294631958, "learning_rate": 3.478723404255319e-06, "loss": 1.1736, "step": 327 }, { "epoch": 0.08723404255319149, "grad_norm": 5.607789039611816, "learning_rate": 3.48936170212766e-06, "loss": 1.1986, "step": 328 }, { "epoch": 0.0875, "grad_norm": 5.1046013832092285, "learning_rate": 3.5e-06, "loss": 1.2426, "step": 329 }, { "epoch": 0.08776595744680851, "grad_norm": 5.214546203613281, "learning_rate": 3.510638297872341e-06, "loss": 1.1211, "step": 330 }, { "epoch": 0.08803191489361702, "grad_norm": 4.989225387573242, "learning_rate": 3.521276595744681e-06, "loss": 1.3025, "step": 331 }, { "epoch": 0.08829787234042553, "grad_norm": 4.886022567749023, "learning_rate": 3.5319148936170217e-06, "loss": 1.2109, "step": 332 }, { "epoch": 0.08856382978723404, "grad_norm": 5.30552339553833, "learning_rate": 3.5425531914893617e-06, "loss": 1.1811, "step": 333 }, { "epoch": 0.08882978723404256, "grad_norm": 4.81152868270874, "learning_rate": 3.5531914893617026e-06, "loss": 1.1677, "step": 334 }, { "epoch": 0.08909574468085106, "grad_norm": 5.06434440612793, "learning_rate": 3.5638297872340426e-06, "loss": 1.2425, "step": 335 }, { "epoch": 0.08936170212765958, "grad_norm": 7.036694526672363, "learning_rate": 3.5744680851063835e-06, "loss": 1.2682, "step": 336 }, { "epoch": 0.08962765957446808, "grad_norm": 5.208419322967529, "learning_rate": 3.5851063829787235e-06, "loss": 1.2394, "step": 337 }, { "epoch": 0.0898936170212766, "grad_norm": 4.592006206512451, "learning_rate": 3.5957446808510644e-06, "loss": 1.2083, "step": 338 }, { "epoch": 0.0901595744680851, "grad_norm": 5.002110481262207, "learning_rate": 3.6063829787234044e-06, "loss": 1.2284, "step": 339 }, { "epoch": 0.09042553191489362, "grad_norm": 4.708452224731445, "learning_rate": 3.6170212765957453e-06, "loss": 1.1616, "step": 340 }, { "epoch": 0.09069148936170213, "grad_norm": 4.872410297393799, "learning_rate": 3.6276595744680853e-06, "loss": 1.181, "step": 341 }, { "epoch": 0.09095744680851064, "grad_norm": 5.24644136428833, "learning_rate": 3.6382978723404262e-06, "loss": 1.285, "step": 342 }, { "epoch": 0.09122340425531915, "grad_norm": 5.019744396209717, "learning_rate": 3.6489361702127662e-06, "loss": 1.2677, "step": 343 }, { "epoch": 0.09148936170212765, "grad_norm": 6.380999565124512, "learning_rate": 3.6595744680851063e-06, "loss": 1.1268, "step": 344 }, { "epoch": 0.09175531914893617, "grad_norm": 5.100999355316162, "learning_rate": 3.670212765957447e-06, "loss": 1.2023, "step": 345 }, { "epoch": 0.09202127659574467, "grad_norm": 5.221463203430176, "learning_rate": 3.680851063829787e-06, "loss": 1.2482, "step": 346 }, { "epoch": 0.09228723404255319, "grad_norm": 4.895312309265137, "learning_rate": 3.691489361702128e-06, "loss": 1.2515, "step": 347 }, { "epoch": 0.0925531914893617, "grad_norm": 4.988393306732178, "learning_rate": 3.702127659574468e-06, "loss": 1.1969, "step": 348 }, { "epoch": 0.09281914893617021, "grad_norm": 5.19982385635376, "learning_rate": 3.712765957446809e-06, "loss": 1.2488, "step": 349 }, { "epoch": 0.09308510638297872, "grad_norm": 5.010618686676025, "learning_rate": 3.723404255319149e-06, "loss": 1.2475, "step": 350 }, { "epoch": 0.09335106382978724, "grad_norm": 4.905212879180908, "learning_rate": 3.73404255319149e-06, "loss": 1.3921, "step": 351 }, { "epoch": 0.09361702127659574, "grad_norm": 5.373055458068848, "learning_rate": 3.74468085106383e-06, "loss": 1.4741, "step": 352 }, { "epoch": 0.09388297872340426, "grad_norm": 4.804662704467773, "learning_rate": 3.7553191489361707e-06, "loss": 1.2208, "step": 353 }, { "epoch": 0.09414893617021276, "grad_norm": 5.451242923736572, "learning_rate": 3.7659574468085108e-06, "loss": 1.3764, "step": 354 }, { "epoch": 0.09441489361702128, "grad_norm": 5.5642409324646, "learning_rate": 3.7765957446808516e-06, "loss": 1.4001, "step": 355 }, { "epoch": 0.09468085106382979, "grad_norm": 4.492448806762695, "learning_rate": 3.7872340425531917e-06, "loss": 1.1094, "step": 356 }, { "epoch": 0.0949468085106383, "grad_norm": 5.439316749572754, "learning_rate": 3.7978723404255325e-06, "loss": 1.3348, "step": 357 }, { "epoch": 0.09521276595744681, "grad_norm": 4.795385837554932, "learning_rate": 3.8085106382978726e-06, "loss": 1.23, "step": 358 }, { "epoch": 0.09547872340425533, "grad_norm": 5.010631084442139, "learning_rate": 3.819148936170213e-06, "loss": 1.1724, "step": 359 }, { "epoch": 0.09574468085106383, "grad_norm": 5.740480422973633, "learning_rate": 3.8297872340425535e-06, "loss": 1.3756, "step": 360 }, { "epoch": 0.09601063829787235, "grad_norm": 4.986555099487305, "learning_rate": 3.840425531914894e-06, "loss": 1.2722, "step": 361 }, { "epoch": 0.09627659574468085, "grad_norm": 5.041133880615234, "learning_rate": 3.851063829787234e-06, "loss": 1.0448, "step": 362 }, { "epoch": 0.09654255319148936, "grad_norm": 5.378165245056152, "learning_rate": 3.861702127659575e-06, "loss": 1.2111, "step": 363 }, { "epoch": 0.09680851063829787, "grad_norm": 4.8053059577941895, "learning_rate": 3.872340425531915e-06, "loss": 1.1344, "step": 364 }, { "epoch": 0.09707446808510638, "grad_norm": 5.25260066986084, "learning_rate": 3.882978723404256e-06, "loss": 1.1288, "step": 365 }, { "epoch": 0.0973404255319149, "grad_norm": 4.839104175567627, "learning_rate": 3.893617021276596e-06, "loss": 1.2131, "step": 366 }, { "epoch": 0.0976063829787234, "grad_norm": 5.487301826477051, "learning_rate": 3.904255319148937e-06, "loss": 1.1969, "step": 367 }, { "epoch": 0.09787234042553192, "grad_norm": 4.733921051025391, "learning_rate": 3.914893617021277e-06, "loss": 1.097, "step": 368 }, { "epoch": 0.09813829787234042, "grad_norm": 5.042628765106201, "learning_rate": 3.9255319148936175e-06, "loss": 1.3554, "step": 369 }, { "epoch": 0.09840425531914894, "grad_norm": 6.3879876136779785, "learning_rate": 3.936170212765958e-06, "loss": 1.1231, "step": 370 }, { "epoch": 0.09867021276595744, "grad_norm": 4.907758712768555, "learning_rate": 3.946808510638298e-06, "loss": 1.4223, "step": 371 }, { "epoch": 0.09893617021276596, "grad_norm": 4.765664577484131, "learning_rate": 3.957446808510639e-06, "loss": 1.2346, "step": 372 }, { "epoch": 0.09920212765957447, "grad_norm": 4.949317932128906, "learning_rate": 3.968085106382979e-06, "loss": 1.1447, "step": 373 }, { "epoch": 0.09946808510638298, "grad_norm": 5.256651878356934, "learning_rate": 3.97872340425532e-06, "loss": 1.25, "step": 374 }, { "epoch": 0.09973404255319149, "grad_norm": 5.307461261749268, "learning_rate": 3.98936170212766e-06, "loss": 1.3373, "step": 375 }, { "epoch": 0.1, "grad_norm": 5.324861526489258, "learning_rate": 4.000000000000001e-06, "loss": 1.1654, "step": 376 }, { "epoch": 0.10026595744680851, "grad_norm": 5.055593013763428, "learning_rate": 4.010638297872341e-06, "loss": 1.1508, "step": 377 }, { "epoch": 0.10053191489361703, "grad_norm": 4.892101287841797, "learning_rate": 4.0212765957446816e-06, "loss": 1.2529, "step": 378 }, { "epoch": 0.10079787234042553, "grad_norm": 4.846734523773193, "learning_rate": 4.031914893617022e-06, "loss": 1.1536, "step": 379 }, { "epoch": 0.10106382978723404, "grad_norm": 5.4368462562561035, "learning_rate": 4.042553191489362e-06, "loss": 1.1512, "step": 380 }, { "epoch": 0.10132978723404255, "grad_norm": 5.102158546447754, "learning_rate": 4.053191489361702e-06, "loss": 1.2382, "step": 381 }, { "epoch": 0.10159574468085106, "grad_norm": 5.7933030128479, "learning_rate": 4.0638297872340425e-06, "loss": 1.4996, "step": 382 }, { "epoch": 0.10186170212765958, "grad_norm": 4.7221221923828125, "learning_rate": 4.074468085106383e-06, "loss": 1.3471, "step": 383 }, { "epoch": 0.10212765957446808, "grad_norm": 4.660311222076416, "learning_rate": 4.085106382978723e-06, "loss": 1.103, "step": 384 }, { "epoch": 0.1023936170212766, "grad_norm": 5.399576663970947, "learning_rate": 4.095744680851064e-06, "loss": 1.3684, "step": 385 }, { "epoch": 0.1026595744680851, "grad_norm": 4.925390720367432, "learning_rate": 4.106382978723404e-06, "loss": 1.2596, "step": 386 }, { "epoch": 0.10292553191489362, "grad_norm": 5.198457717895508, "learning_rate": 4.117021276595745e-06, "loss": 1.2224, "step": 387 }, { "epoch": 0.10319148936170212, "grad_norm": 5.053544044494629, "learning_rate": 4.127659574468085e-06, "loss": 1.0447, "step": 388 }, { "epoch": 0.10345744680851064, "grad_norm": 5.769658088684082, "learning_rate": 4.138297872340426e-06, "loss": 1.4491, "step": 389 }, { "epoch": 0.10372340425531915, "grad_norm": 4.969061851501465, "learning_rate": 4.148936170212766e-06, "loss": 1.2964, "step": 390 }, { "epoch": 0.10398936170212766, "grad_norm": 4.825634479522705, "learning_rate": 4.1595744680851066e-06, "loss": 1.1521, "step": 391 }, { "epoch": 0.10425531914893617, "grad_norm": 5.240276336669922, "learning_rate": 4.170212765957447e-06, "loss": 1.27, "step": 392 }, { "epoch": 0.10452127659574469, "grad_norm": 4.926823139190674, "learning_rate": 4.1808510638297875e-06, "loss": 1.1428, "step": 393 }, { "epoch": 0.10478723404255319, "grad_norm": 5.143110275268555, "learning_rate": 4.191489361702128e-06, "loss": 1.2502, "step": 394 }, { "epoch": 0.10505319148936171, "grad_norm": 5.7517876625061035, "learning_rate": 4.202127659574468e-06, "loss": 1.3353, "step": 395 }, { "epoch": 0.10531914893617021, "grad_norm": 5.096099853515625, "learning_rate": 4.212765957446809e-06, "loss": 1.2383, "step": 396 }, { "epoch": 0.10558510638297873, "grad_norm": 5.0476484298706055, "learning_rate": 4.223404255319149e-06, "loss": 1.1639, "step": 397 }, { "epoch": 0.10585106382978723, "grad_norm": 5.166505813598633, "learning_rate": 4.23404255319149e-06, "loss": 1.327, "step": 398 }, { "epoch": 0.10611702127659574, "grad_norm": 5.315145969390869, "learning_rate": 4.24468085106383e-06, "loss": 1.2239, "step": 399 }, { "epoch": 0.10638297872340426, "grad_norm": 5.185245990753174, "learning_rate": 4.255319148936171e-06, "loss": 1.3102, "step": 400 }, { "epoch": 0.10664893617021276, "grad_norm": 5.77607536315918, "learning_rate": 4.265957446808511e-06, "loss": 1.3943, "step": 401 }, { "epoch": 0.10691489361702128, "grad_norm": 5.244495391845703, "learning_rate": 4.2765957446808515e-06, "loss": 1.2495, "step": 402 }, { "epoch": 0.10718085106382978, "grad_norm": 4.943081378936768, "learning_rate": 4.287234042553192e-06, "loss": 1.1773, "step": 403 }, { "epoch": 0.1074468085106383, "grad_norm": 4.948064804077148, "learning_rate": 4.297872340425532e-06, "loss": 1.2758, "step": 404 }, { "epoch": 0.1077127659574468, "grad_norm": 5.133402347564697, "learning_rate": 4.308510638297873e-06, "loss": 1.28, "step": 405 }, { "epoch": 0.10797872340425532, "grad_norm": 5.113506317138672, "learning_rate": 4.319148936170213e-06, "loss": 1.3164, "step": 406 }, { "epoch": 0.10824468085106383, "grad_norm": 5.551205635070801, "learning_rate": 4.329787234042554e-06, "loss": 1.3766, "step": 407 }, { "epoch": 0.10851063829787234, "grad_norm": 5.358046531677246, "learning_rate": 4.340425531914894e-06, "loss": 1.3146, "step": 408 }, { "epoch": 0.10877659574468085, "grad_norm": 4.947327136993408, "learning_rate": 4.351063829787235e-06, "loss": 1.2566, "step": 409 }, { "epoch": 0.10904255319148937, "grad_norm": 5.421116828918457, "learning_rate": 4.361702127659575e-06, "loss": 1.3041, "step": 410 }, { "epoch": 0.10930851063829787, "grad_norm": 5.073742866516113, "learning_rate": 4.3723404255319156e-06, "loss": 1.2297, "step": 411 }, { "epoch": 0.10957446808510639, "grad_norm": 4.688051700592041, "learning_rate": 4.382978723404256e-06, "loss": 1.281, "step": 412 }, { "epoch": 0.10984042553191489, "grad_norm": 4.957024097442627, "learning_rate": 4.3936170212765965e-06, "loss": 1.2235, "step": 413 }, { "epoch": 0.11010638297872341, "grad_norm": 4.920490741729736, "learning_rate": 4.404255319148937e-06, "loss": 1.3369, "step": 414 }, { "epoch": 0.11037234042553191, "grad_norm": 4.797316551208496, "learning_rate": 4.414893617021277e-06, "loss": 1.2144, "step": 415 }, { "epoch": 0.11063829787234042, "grad_norm": 5.424980640411377, "learning_rate": 4.425531914893617e-06, "loss": 1.3891, "step": 416 }, { "epoch": 0.11090425531914894, "grad_norm": 6.654335021972656, "learning_rate": 4.436170212765957e-06, "loss": 1.2438, "step": 417 }, { "epoch": 0.11117021276595744, "grad_norm": 4.950499057769775, "learning_rate": 4.446808510638298e-06, "loss": 1.1873, "step": 418 }, { "epoch": 0.11143617021276596, "grad_norm": 4.553642272949219, "learning_rate": 4.457446808510638e-06, "loss": 1.1059, "step": 419 }, { "epoch": 0.11170212765957446, "grad_norm": 5.221842288970947, "learning_rate": 4.468085106382979e-06, "loss": 1.2645, "step": 420 }, { "epoch": 0.11196808510638298, "grad_norm": 5.45412015914917, "learning_rate": 4.478723404255319e-06, "loss": 1.234, "step": 421 }, { "epoch": 0.11223404255319148, "grad_norm": 5.6037750244140625, "learning_rate": 4.48936170212766e-06, "loss": 1.2393, "step": 422 }, { "epoch": 0.1125, "grad_norm": 6.701963901519775, "learning_rate": 4.5e-06, "loss": 1.2275, "step": 423 }, { "epoch": 0.1127659574468085, "grad_norm": 5.183774471282959, "learning_rate": 4.5106382978723406e-06, "loss": 1.345, "step": 424 }, { "epoch": 0.11303191489361702, "grad_norm": 5.005707263946533, "learning_rate": 4.521276595744681e-06, "loss": 1.2778, "step": 425 }, { "epoch": 0.11329787234042553, "grad_norm": 4.887904644012451, "learning_rate": 4.5319148936170215e-06, "loss": 1.2156, "step": 426 }, { "epoch": 0.11356382978723405, "grad_norm": 5.077915191650391, "learning_rate": 4.542553191489362e-06, "loss": 1.3213, "step": 427 }, { "epoch": 0.11382978723404255, "grad_norm": 5.669859409332275, "learning_rate": 4.553191489361702e-06, "loss": 1.2028, "step": 428 }, { "epoch": 0.11409574468085107, "grad_norm": 4.871664047241211, "learning_rate": 4.563829787234043e-06, "loss": 1.2471, "step": 429 }, { "epoch": 0.11436170212765957, "grad_norm": 6.208220958709717, "learning_rate": 4.574468085106383e-06, "loss": 1.3042, "step": 430 }, { "epoch": 0.11462765957446809, "grad_norm": 5.47734260559082, "learning_rate": 4.585106382978724e-06, "loss": 1.1327, "step": 431 }, { "epoch": 0.1148936170212766, "grad_norm": 4.876042366027832, "learning_rate": 4.595744680851064e-06, "loss": 1.2484, "step": 432 }, { "epoch": 0.11515957446808511, "grad_norm": 4.497283458709717, "learning_rate": 4.606382978723405e-06, "loss": 1.0734, "step": 433 }, { "epoch": 0.11542553191489362, "grad_norm": 5.2405314445495605, "learning_rate": 4.617021276595745e-06, "loss": 1.3122, "step": 434 }, { "epoch": 0.11569148936170212, "grad_norm": 5.948802947998047, "learning_rate": 4.6276595744680855e-06, "loss": 1.2006, "step": 435 }, { "epoch": 0.11595744680851064, "grad_norm": 5.318106174468994, "learning_rate": 4.638297872340426e-06, "loss": 1.2712, "step": 436 }, { "epoch": 0.11622340425531914, "grad_norm": 5.686134338378906, "learning_rate": 4.648936170212766e-06, "loss": 1.3471, "step": 437 }, { "epoch": 0.11648936170212766, "grad_norm": 5.246779441833496, "learning_rate": 4.659574468085107e-06, "loss": 1.2967, "step": 438 }, { "epoch": 0.11675531914893617, "grad_norm": 4.675699710845947, "learning_rate": 4.670212765957447e-06, "loss": 1.2304, "step": 439 }, { "epoch": 0.11702127659574468, "grad_norm": 5.018355846405029, "learning_rate": 4.680851063829788e-06, "loss": 1.3061, "step": 440 }, { "epoch": 0.11728723404255319, "grad_norm": 5.387866497039795, "learning_rate": 4.691489361702128e-06, "loss": 1.3658, "step": 441 }, { "epoch": 0.1175531914893617, "grad_norm": 4.927948951721191, "learning_rate": 4.702127659574469e-06, "loss": 1.3331, "step": 442 }, { "epoch": 0.11781914893617021, "grad_norm": 5.1225738525390625, "learning_rate": 4.712765957446809e-06, "loss": 1.1334, "step": 443 }, { "epoch": 0.11808510638297873, "grad_norm": 4.9314751625061035, "learning_rate": 4.7234042553191496e-06, "loss": 1.2384, "step": 444 }, { "epoch": 0.11835106382978723, "grad_norm": 5.148207664489746, "learning_rate": 4.73404255319149e-06, "loss": 1.2677, "step": 445 }, { "epoch": 0.11861702127659575, "grad_norm": 4.629826068878174, "learning_rate": 4.7446808510638305e-06, "loss": 1.2096, "step": 446 }, { "epoch": 0.11888297872340425, "grad_norm": 4.850092887878418, "learning_rate": 4.755319148936171e-06, "loss": 1.2004, "step": 447 }, { "epoch": 0.11914893617021277, "grad_norm": 5.228341102600098, "learning_rate": 4.765957446808511e-06, "loss": 1.1828, "step": 448 }, { "epoch": 0.11941489361702128, "grad_norm": 4.738990306854248, "learning_rate": 4.776595744680852e-06, "loss": 1.2557, "step": 449 }, { "epoch": 0.1196808510638298, "grad_norm": 4.737931251525879, "learning_rate": 4.787234042553192e-06, "loss": 1.1705, "step": 450 }, { "epoch": 0.1199468085106383, "grad_norm": 4.852109432220459, "learning_rate": 4.797872340425533e-06, "loss": 1.175, "step": 451 }, { "epoch": 0.1202127659574468, "grad_norm": 4.808513641357422, "learning_rate": 4.808510638297872e-06, "loss": 1.3285, "step": 452 }, { "epoch": 0.12047872340425532, "grad_norm": 5.352870464324951, "learning_rate": 4.819148936170213e-06, "loss": 1.2471, "step": 453 }, { "epoch": 0.12074468085106382, "grad_norm": 4.533960819244385, "learning_rate": 4.829787234042553e-06, "loss": 1.2059, "step": 454 }, { "epoch": 0.12101063829787234, "grad_norm": 4.770225524902344, "learning_rate": 4.840425531914894e-06, "loss": 1.2049, "step": 455 }, { "epoch": 0.12127659574468085, "grad_norm": 5.0733418464660645, "learning_rate": 4.851063829787234e-06, "loss": 1.2758, "step": 456 }, { "epoch": 0.12154255319148936, "grad_norm": 4.347215175628662, "learning_rate": 4.8617021276595746e-06, "loss": 1.1401, "step": 457 }, { "epoch": 0.12180851063829787, "grad_norm": 5.329954147338867, "learning_rate": 4.872340425531915e-06, "loss": 1.276, "step": 458 }, { "epoch": 0.12207446808510639, "grad_norm": 5.255573272705078, "learning_rate": 4.8829787234042555e-06, "loss": 1.234, "step": 459 }, { "epoch": 0.12234042553191489, "grad_norm": 5.189822196960449, "learning_rate": 4.893617021276596e-06, "loss": 1.3676, "step": 460 }, { "epoch": 0.12260638297872341, "grad_norm": 5.039921283721924, "learning_rate": 4.904255319148936e-06, "loss": 1.3342, "step": 461 }, { "epoch": 0.12287234042553191, "grad_norm": 4.65778923034668, "learning_rate": 4.914893617021277e-06, "loss": 1.1117, "step": 462 }, { "epoch": 0.12313829787234043, "grad_norm": 5.006718635559082, "learning_rate": 4.925531914893617e-06, "loss": 1.2543, "step": 463 }, { "epoch": 0.12340425531914893, "grad_norm": 5.547107219696045, "learning_rate": 4.936170212765958e-06, "loss": 1.2113, "step": 464 }, { "epoch": 0.12367021276595745, "grad_norm": 6.148080348968506, "learning_rate": 4.946808510638298e-06, "loss": 1.1889, "step": 465 }, { "epoch": 0.12393617021276596, "grad_norm": 5.120206832885742, "learning_rate": 4.957446808510639e-06, "loss": 1.2198, "step": 466 }, { "epoch": 0.12420212765957447, "grad_norm": 5.487342834472656, "learning_rate": 4.968085106382979e-06, "loss": 1.2786, "step": 467 }, { "epoch": 0.12446808510638298, "grad_norm": 8.382891654968262, "learning_rate": 4.9787234042553195e-06, "loss": 1.3757, "step": 468 }, { "epoch": 0.1247340425531915, "grad_norm": 5.241554260253906, "learning_rate": 4.98936170212766e-06, "loss": 1.3302, "step": 469 }, { "epoch": 0.125, "grad_norm": 5.201963901519775, "learning_rate": 5e-06, "loss": 1.2948, "step": 470 }, { "epoch": 0.12526595744680852, "grad_norm": 5.143476486206055, "learning_rate": 5.010638297872341e-06, "loss": 1.2364, "step": 471 }, { "epoch": 0.125531914893617, "grad_norm": 4.847978115081787, "learning_rate": 5.0212765957446805e-06, "loss": 1.1692, "step": 472 }, { "epoch": 0.12579787234042553, "grad_norm": 7.869311809539795, "learning_rate": 5.031914893617022e-06, "loss": 1.3719, "step": 473 }, { "epoch": 0.12606382978723404, "grad_norm": 5.498979091644287, "learning_rate": 5.042553191489362e-06, "loss": 1.3422, "step": 474 }, { "epoch": 0.12632978723404256, "grad_norm": 6.362303256988525, "learning_rate": 5.053191489361703e-06, "loss": 1.4323, "step": 475 }, { "epoch": 0.12659574468085105, "grad_norm": 5.051971435546875, "learning_rate": 5.063829787234042e-06, "loss": 1.1821, "step": 476 }, { "epoch": 0.12686170212765957, "grad_norm": 4.8123250007629395, "learning_rate": 5.0744680851063836e-06, "loss": 1.2988, "step": 477 }, { "epoch": 0.1271276595744681, "grad_norm": 5.487412452697754, "learning_rate": 5.085106382978724e-06, "loss": 1.3167, "step": 478 }, { "epoch": 0.1273936170212766, "grad_norm": 8.315117835998535, "learning_rate": 5.0957446808510645e-06, "loss": 1.192, "step": 479 }, { "epoch": 0.1276595744680851, "grad_norm": 5.151649475097656, "learning_rate": 5.106382978723404e-06, "loss": 1.2499, "step": 480 }, { "epoch": 0.12792553191489361, "grad_norm": 5.335565567016602, "learning_rate": 5.117021276595745e-06, "loss": 1.2643, "step": 481 }, { "epoch": 0.12819148936170213, "grad_norm": 4.590991020202637, "learning_rate": 5.127659574468086e-06, "loss": 1.218, "step": 482 }, { "epoch": 0.12845744680851065, "grad_norm": 4.4650750160217285, "learning_rate": 5.138297872340426e-06, "loss": 1.1962, "step": 483 }, { "epoch": 0.12872340425531914, "grad_norm": 4.609473705291748, "learning_rate": 5.148936170212766e-06, "loss": 1.476, "step": 484 }, { "epoch": 0.12898936170212766, "grad_norm": 4.7010087966918945, "learning_rate": 5.159574468085107e-06, "loss": 1.1609, "step": 485 }, { "epoch": 0.12925531914893618, "grad_norm": 4.8034257888793945, "learning_rate": 5.170212765957448e-06, "loss": 1.3393, "step": 486 }, { "epoch": 0.1295212765957447, "grad_norm": 5.149427890777588, "learning_rate": 5.180851063829788e-06, "loss": 1.2883, "step": 487 }, { "epoch": 0.12978723404255318, "grad_norm": 5.017268657684326, "learning_rate": 5.191489361702128e-06, "loss": 1.1178, "step": 488 }, { "epoch": 0.1300531914893617, "grad_norm": 4.924554347991943, "learning_rate": 5.202127659574468e-06, "loss": 1.3381, "step": 489 }, { "epoch": 0.13031914893617022, "grad_norm": 4.674248218536377, "learning_rate": 5.212765957446809e-06, "loss": 1.0916, "step": 490 }, { "epoch": 0.1305851063829787, "grad_norm": 4.853366851806641, "learning_rate": 5.223404255319149e-06, "loss": 1.2784, "step": 491 }, { "epoch": 0.13085106382978723, "grad_norm": 5.032970428466797, "learning_rate": 5.2340425531914895e-06, "loss": 1.2575, "step": 492 }, { "epoch": 0.13111702127659575, "grad_norm": 4.911726474761963, "learning_rate": 5.24468085106383e-06, "loss": 1.2049, "step": 493 }, { "epoch": 0.13138297872340426, "grad_norm": 5.197798252105713, "learning_rate": 5.255319148936171e-06, "loss": 1.3461, "step": 494 }, { "epoch": 0.13164893617021275, "grad_norm": 4.873477458953857, "learning_rate": 5.265957446808511e-06, "loss": 1.2681, "step": 495 }, { "epoch": 0.13191489361702127, "grad_norm": 4.855223178863525, "learning_rate": 5.276595744680851e-06, "loss": 1.1849, "step": 496 }, { "epoch": 0.1321808510638298, "grad_norm": 5.735394477844238, "learning_rate": 5.287234042553192e-06, "loss": 1.2821, "step": 497 }, { "epoch": 0.1324468085106383, "grad_norm": 4.7265305519104, "learning_rate": 5.297872340425533e-06, "loss": 1.1253, "step": 498 }, { "epoch": 0.1327127659574468, "grad_norm": 5.138075351715088, "learning_rate": 5.308510638297873e-06, "loss": 1.1951, "step": 499 }, { "epoch": 0.13297872340425532, "grad_norm": 4.761940002441406, "learning_rate": 5.319148936170213e-06, "loss": 1.4573, "step": 500 }, { "epoch": 0.13297872340425532, "eval_loss": 1.276181697845459, "eval_runtime": 12.4372, "eval_samples_per_second": 32.162, "eval_steps_per_second": 4.02, "step": 500 }, { "epoch": 0.13324468085106383, "grad_norm": 5.0954132080078125, "learning_rate": 5.3297872340425535e-06, "loss": 1.43, "step": 501 }, { "epoch": 0.13351063829787235, "grad_norm": 5.592034816741943, "learning_rate": 5.340425531914895e-06, "loss": 1.3052, "step": 502 }, { "epoch": 0.13377659574468084, "grad_norm": 5.18677282333374, "learning_rate": 5.351063829787234e-06, "loss": 1.3141, "step": 503 }, { "epoch": 0.13404255319148936, "grad_norm": 5.0918707847595215, "learning_rate": 5.361702127659575e-06, "loss": 1.3649, "step": 504 }, { "epoch": 0.13430851063829788, "grad_norm": 4.749475002288818, "learning_rate": 5.372340425531915e-06, "loss": 1.1692, "step": 505 }, { "epoch": 0.1345744680851064, "grad_norm": 4.383024215698242, "learning_rate": 5.382978723404257e-06, "loss": 1.3438, "step": 506 }, { "epoch": 0.1348404255319149, "grad_norm": 4.863028049468994, "learning_rate": 5.393617021276596e-06, "loss": 1.3332, "step": 507 }, { "epoch": 0.1351063829787234, "grad_norm": 4.633965492248535, "learning_rate": 5.404255319148937e-06, "loss": 1.2012, "step": 508 }, { "epoch": 0.13537234042553192, "grad_norm": 5.257637023925781, "learning_rate": 5.414893617021277e-06, "loss": 1.3595, "step": 509 }, { "epoch": 0.1356382978723404, "grad_norm": 4.795042037963867, "learning_rate": 5.425531914893617e-06, "loss": 1.3843, "step": 510 }, { "epoch": 0.13590425531914893, "grad_norm": 5.261885643005371, "learning_rate": 5.436170212765958e-06, "loss": 1.2708, "step": 511 }, { "epoch": 0.13617021276595745, "grad_norm": 4.95104455947876, "learning_rate": 5.4468085106382985e-06, "loss": 1.2268, "step": 512 }, { "epoch": 0.13643617021276597, "grad_norm": 5.171029567718506, "learning_rate": 5.457446808510639e-06, "loss": 1.38, "step": 513 }, { "epoch": 0.13670212765957446, "grad_norm": 4.671914577484131, "learning_rate": 5.4680851063829785e-06, "loss": 1.1485, "step": 514 }, { "epoch": 0.13696808510638298, "grad_norm": 4.562173843383789, "learning_rate": 5.47872340425532e-06, "loss": 1.3282, "step": 515 }, { "epoch": 0.1372340425531915, "grad_norm": 4.870545387268066, "learning_rate": 5.48936170212766e-06, "loss": 1.1943, "step": 516 }, { "epoch": 0.1375, "grad_norm": 5.231775760650635, "learning_rate": 5.500000000000001e-06, "loss": 1.2763, "step": 517 }, { "epoch": 0.1377659574468085, "grad_norm": 5.05985689163208, "learning_rate": 5.51063829787234e-06, "loss": 1.2018, "step": 518 }, { "epoch": 0.13803191489361702, "grad_norm": 4.818659782409668, "learning_rate": 5.521276595744682e-06, "loss": 1.2307, "step": 519 }, { "epoch": 0.13829787234042554, "grad_norm": 4.803600311279297, "learning_rate": 5.531914893617022e-06, "loss": 1.3586, "step": 520 }, { "epoch": 0.13856382978723406, "grad_norm": 4.65132999420166, "learning_rate": 5.5425531914893625e-06, "loss": 1.2147, "step": 521 }, { "epoch": 0.13882978723404255, "grad_norm": 4.503746032714844, "learning_rate": 5.553191489361702e-06, "loss": 1.2307, "step": 522 }, { "epoch": 0.13909574468085106, "grad_norm": 4.557102203369141, "learning_rate": 5.563829787234043e-06, "loss": 1.1906, "step": 523 }, { "epoch": 0.13936170212765958, "grad_norm": 4.347774028778076, "learning_rate": 5.574468085106384e-06, "loss": 1.1632, "step": 524 }, { "epoch": 0.13962765957446807, "grad_norm": 4.431983947753906, "learning_rate": 5.5851063829787235e-06, "loss": 1.2617, "step": 525 }, { "epoch": 0.1398936170212766, "grad_norm": 4.971803665161133, "learning_rate": 5.595744680851064e-06, "loss": 1.2581, "step": 526 }, { "epoch": 0.1401595744680851, "grad_norm": 4.5451979637146, "learning_rate": 5.606382978723404e-06, "loss": 1.3048, "step": 527 }, { "epoch": 0.14042553191489363, "grad_norm": 4.687234878540039, "learning_rate": 5.617021276595746e-06, "loss": 1.2556, "step": 528 }, { "epoch": 0.14069148936170212, "grad_norm": 4.7519378662109375, "learning_rate": 5.627659574468085e-06, "loss": 1.2017, "step": 529 }, { "epoch": 0.14095744680851063, "grad_norm": 5.454826354980469, "learning_rate": 5.638297872340426e-06, "loss": 1.137, "step": 530 }, { "epoch": 0.14122340425531915, "grad_norm": 5.442596435546875, "learning_rate": 5.648936170212766e-06, "loss": 1.3776, "step": 531 }, { "epoch": 0.14148936170212767, "grad_norm": 5.057155132293701, "learning_rate": 5.6595744680851075e-06, "loss": 1.4229, "step": 532 }, { "epoch": 0.14175531914893616, "grad_norm": 4.806349277496338, "learning_rate": 5.670212765957447e-06, "loss": 1.2874, "step": 533 }, { "epoch": 0.14202127659574468, "grad_norm": 4.934086322784424, "learning_rate": 5.6808510638297875e-06, "loss": 1.3149, "step": 534 }, { "epoch": 0.1422872340425532, "grad_norm": 4.371129035949707, "learning_rate": 5.691489361702128e-06, "loss": 1.2567, "step": 535 }, { "epoch": 0.1425531914893617, "grad_norm": 5.498307228088379, "learning_rate": 5.702127659574469e-06, "loss": 1.166, "step": 536 }, { "epoch": 0.1428191489361702, "grad_norm": 4.467796802520752, "learning_rate": 5.712765957446809e-06, "loss": 1.1359, "step": 537 }, { "epoch": 0.14308510638297872, "grad_norm": 4.92448091506958, "learning_rate": 5.723404255319149e-06, "loss": 1.2873, "step": 538 }, { "epoch": 0.14335106382978724, "grad_norm": 4.561826705932617, "learning_rate": 5.73404255319149e-06, "loss": 1.0615, "step": 539 }, { "epoch": 0.14361702127659576, "grad_norm": 4.773728370666504, "learning_rate": 5.744680851063831e-06, "loss": 1.1718, "step": 540 }, { "epoch": 0.14388297872340425, "grad_norm": 4.3747639656066895, "learning_rate": 5.755319148936171e-06, "loss": 1.165, "step": 541 }, { "epoch": 0.14414893617021277, "grad_norm": 5.261002063751221, "learning_rate": 5.765957446808511e-06, "loss": 1.3091, "step": 542 }, { "epoch": 0.14441489361702128, "grad_norm": 5.58752965927124, "learning_rate": 5.7765957446808516e-06, "loss": 1.2045, "step": 543 }, { "epoch": 0.14468085106382977, "grad_norm": 4.371783256530762, "learning_rate": 5.787234042553191e-06, "loss": 1.1548, "step": 544 }, { "epoch": 0.1449468085106383, "grad_norm": 4.958721160888672, "learning_rate": 5.7978723404255325e-06, "loss": 1.4517, "step": 545 }, { "epoch": 0.1452127659574468, "grad_norm": 4.846461296081543, "learning_rate": 5.808510638297873e-06, "loss": 1.3224, "step": 546 }, { "epoch": 0.14547872340425533, "grad_norm": 5.132719039916992, "learning_rate": 5.819148936170213e-06, "loss": 1.1865, "step": 547 }, { "epoch": 0.14574468085106382, "grad_norm": 4.791563987731934, "learning_rate": 5.829787234042553e-06, "loss": 1.2571, "step": 548 }, { "epoch": 0.14601063829787234, "grad_norm": 5.137845039367676, "learning_rate": 5.840425531914894e-06, "loss": 1.3008, "step": 549 }, { "epoch": 0.14627659574468085, "grad_norm": 4.80680513381958, "learning_rate": 5.851063829787235e-06, "loss": 1.243, "step": 550 }, { "epoch": 0.14654255319148937, "grad_norm": 4.938924312591553, "learning_rate": 5.861702127659575e-06, "loss": 1.3482, "step": 551 }, { "epoch": 0.14680851063829786, "grad_norm": 5.239283561706543, "learning_rate": 5.872340425531915e-06, "loss": 1.1938, "step": 552 }, { "epoch": 0.14707446808510638, "grad_norm": 4.885773658752441, "learning_rate": 5.882978723404256e-06, "loss": 1.1257, "step": 553 }, { "epoch": 0.1473404255319149, "grad_norm": 5.183603763580322, "learning_rate": 5.8936170212765965e-06, "loss": 1.3353, "step": 554 }, { "epoch": 0.14760638297872342, "grad_norm": 4.765013694763184, "learning_rate": 5.904255319148937e-06, "loss": 1.2058, "step": 555 }, { "epoch": 0.1478723404255319, "grad_norm": 5.2760419845581055, "learning_rate": 5.9148936170212766e-06, "loss": 1.2109, "step": 556 }, { "epoch": 0.14813829787234042, "grad_norm": 5.04670524597168, "learning_rate": 5.925531914893618e-06, "loss": 1.3347, "step": 557 }, { "epoch": 0.14840425531914894, "grad_norm": 4.968268394470215, "learning_rate": 5.936170212765958e-06, "loss": 1.3295, "step": 558 }, { "epoch": 0.14867021276595746, "grad_norm": 4.791049480438232, "learning_rate": 5.946808510638299e-06, "loss": 1.2116, "step": 559 }, { "epoch": 0.14893617021276595, "grad_norm": 4.980474948883057, "learning_rate": 5.957446808510638e-06, "loss": 1.4063, "step": 560 }, { "epoch": 0.14920212765957447, "grad_norm": 4.56986141204834, "learning_rate": 5.968085106382979e-06, "loss": 1.2442, "step": 561 }, { "epoch": 0.149468085106383, "grad_norm": 4.691464424133301, "learning_rate": 5.97872340425532e-06, "loss": 1.2784, "step": 562 }, { "epoch": 0.14973404255319148, "grad_norm": 5.040019512176514, "learning_rate": 5.98936170212766e-06, "loss": 1.2195, "step": 563 }, { "epoch": 0.15, "grad_norm": 5.160355091094971, "learning_rate": 6e-06, "loss": 1.4814, "step": 564 }, { "epoch": 0.1502659574468085, "grad_norm": 4.696538925170898, "learning_rate": 6.010638297872341e-06, "loss": 1.2542, "step": 565 }, { "epoch": 0.15053191489361703, "grad_norm": 4.901849269866943, "learning_rate": 6.021276595744682e-06, "loss": 1.2633, "step": 566 }, { "epoch": 0.15079787234042552, "grad_norm": 4.936095237731934, "learning_rate": 6.0319148936170215e-06, "loss": 1.2812, "step": 567 }, { "epoch": 0.15106382978723404, "grad_norm": 4.6663055419921875, "learning_rate": 6.042553191489362e-06, "loss": 1.3449, "step": 568 }, { "epoch": 0.15132978723404256, "grad_norm": 4.95345401763916, "learning_rate": 6.053191489361702e-06, "loss": 1.1968, "step": 569 }, { "epoch": 0.15159574468085107, "grad_norm": 4.66139030456543, "learning_rate": 6.063829787234044e-06, "loss": 1.1773, "step": 570 }, { "epoch": 0.15186170212765956, "grad_norm": 5.310500144958496, "learning_rate": 6.074468085106383e-06, "loss": 1.2606, "step": 571 }, { "epoch": 0.15212765957446808, "grad_norm": 5.423430442810059, "learning_rate": 6.085106382978724e-06, "loss": 1.4334, "step": 572 }, { "epoch": 0.1523936170212766, "grad_norm": 5.189186096191406, "learning_rate": 6.095744680851064e-06, "loss": 1.2955, "step": 573 }, { "epoch": 0.15265957446808512, "grad_norm": 5.515524864196777, "learning_rate": 6.1063829787234055e-06, "loss": 1.2777, "step": 574 }, { "epoch": 0.1529255319148936, "grad_norm": 4.615379810333252, "learning_rate": 6.117021276595745e-06, "loss": 1.2492, "step": 575 }, { "epoch": 0.15319148936170213, "grad_norm": 4.674113750457764, "learning_rate": 6.1276595744680855e-06, "loss": 1.2807, "step": 576 }, { "epoch": 0.15345744680851064, "grad_norm": 4.907557487487793, "learning_rate": 6.138297872340426e-06, "loss": 1.4288, "step": 577 }, { "epoch": 0.15372340425531916, "grad_norm": 4.517690658569336, "learning_rate": 6.148936170212767e-06, "loss": 1.2274, "step": 578 }, { "epoch": 0.15398936170212765, "grad_norm": 4.350996971130371, "learning_rate": 6.159574468085107e-06, "loss": 1.284, "step": 579 }, { "epoch": 0.15425531914893617, "grad_norm": 4.552090644836426, "learning_rate": 6.170212765957447e-06, "loss": 1.193, "step": 580 }, { "epoch": 0.1545212765957447, "grad_norm": 5.3864827156066895, "learning_rate": 6.180851063829788e-06, "loss": 1.2869, "step": 581 }, { "epoch": 0.15478723404255318, "grad_norm": 4.946741104125977, "learning_rate": 6.191489361702127e-06, "loss": 1.1894, "step": 582 }, { "epoch": 0.1550531914893617, "grad_norm": 4.652212619781494, "learning_rate": 6.202127659574469e-06, "loss": 1.3841, "step": 583 }, { "epoch": 0.15531914893617021, "grad_norm": 4.876087188720703, "learning_rate": 6.212765957446809e-06, "loss": 1.4244, "step": 584 }, { "epoch": 0.15558510638297873, "grad_norm": 4.947083473205566, "learning_rate": 6.22340425531915e-06, "loss": 1.3616, "step": 585 }, { "epoch": 0.15585106382978722, "grad_norm": 4.663647174835205, "learning_rate": 6.234042553191489e-06, "loss": 1.2258, "step": 586 }, { "epoch": 0.15611702127659574, "grad_norm": 4.758052825927734, "learning_rate": 6.2446808510638305e-06, "loss": 1.1514, "step": 587 }, { "epoch": 0.15638297872340426, "grad_norm": 4.887540340423584, "learning_rate": 6.255319148936171e-06, "loss": 1.1887, "step": 588 }, { "epoch": 0.15664893617021278, "grad_norm": 4.9997477531433105, "learning_rate": 6.265957446808511e-06, "loss": 1.2235, "step": 589 }, { "epoch": 0.15691489361702127, "grad_norm": 5.29210090637207, "learning_rate": 6.276595744680851e-06, "loss": 1.3761, "step": 590 }, { "epoch": 0.15718085106382979, "grad_norm": 4.92548942565918, "learning_rate": 6.287234042553192e-06, "loss": 1.3848, "step": 591 }, { "epoch": 0.1574468085106383, "grad_norm": 5.194962978363037, "learning_rate": 6.297872340425533e-06, "loss": 1.4225, "step": 592 }, { "epoch": 0.15771276595744682, "grad_norm": 4.7201080322265625, "learning_rate": 6.308510638297873e-06, "loss": 1.142, "step": 593 }, { "epoch": 0.1579787234042553, "grad_norm": 4.397183895111084, "learning_rate": 6.319148936170213e-06, "loss": 1.0353, "step": 594 }, { "epoch": 0.15824468085106383, "grad_norm": 4.910755157470703, "learning_rate": 6.329787234042554e-06, "loss": 1.3927, "step": 595 }, { "epoch": 0.15851063829787235, "grad_norm": 4.846840858459473, "learning_rate": 6.3404255319148945e-06, "loss": 1.3298, "step": 596 }, { "epoch": 0.15877659574468084, "grad_norm": 4.725717067718506, "learning_rate": 6.351063829787234e-06, "loss": 1.319, "step": 597 }, { "epoch": 0.15904255319148936, "grad_norm": 4.561202049255371, "learning_rate": 6.361702127659575e-06, "loss": 1.3586, "step": 598 }, { "epoch": 0.15930851063829787, "grad_norm": 5.391122817993164, "learning_rate": 6.372340425531915e-06, "loss": 1.2876, "step": 599 }, { "epoch": 0.1595744680851064, "grad_norm": 4.996328830718994, "learning_rate": 6.382978723404256e-06, "loss": 1.5125, "step": 600 }, { "epoch": 0.15984042553191488, "grad_norm": 5.271803855895996, "learning_rate": 6.393617021276596e-06, "loss": 1.3858, "step": 601 }, { "epoch": 0.1601063829787234, "grad_norm": 4.3907318115234375, "learning_rate": 6.404255319148936e-06, "loss": 1.1134, "step": 602 }, { "epoch": 0.16037234042553192, "grad_norm": 5.224330902099609, "learning_rate": 6.414893617021277e-06, "loss": 1.572, "step": 603 }, { "epoch": 0.16063829787234044, "grad_norm": 5.044121742248535, "learning_rate": 6.425531914893618e-06, "loss": 1.4531, "step": 604 }, { "epoch": 0.16090425531914893, "grad_norm": 4.903571128845215, "learning_rate": 6.436170212765958e-06, "loss": 1.2779, "step": 605 }, { "epoch": 0.16117021276595744, "grad_norm": 4.621399402618408, "learning_rate": 6.446808510638298e-06, "loss": 1.1709, "step": 606 }, { "epoch": 0.16143617021276596, "grad_norm": 4.697232723236084, "learning_rate": 6.457446808510639e-06, "loss": 1.1601, "step": 607 }, { "epoch": 0.16170212765957448, "grad_norm": 5.482996940612793, "learning_rate": 6.46808510638298e-06, "loss": 1.401, "step": 608 }, { "epoch": 0.16196808510638297, "grad_norm": 4.974328994750977, "learning_rate": 6.4787234042553195e-06, "loss": 1.288, "step": 609 }, { "epoch": 0.1622340425531915, "grad_norm": 4.7073140144348145, "learning_rate": 6.48936170212766e-06, "loss": 1.331, "step": 610 }, { "epoch": 0.1625, "grad_norm": 4.540210247039795, "learning_rate": 6.5000000000000004e-06, "loss": 1.217, "step": 611 }, { "epoch": 0.16276595744680852, "grad_norm": 4.792731285095215, "learning_rate": 6.510638297872342e-06, "loss": 1.2696, "step": 612 }, { "epoch": 0.163031914893617, "grad_norm": 4.365908622741699, "learning_rate": 6.521276595744681e-06, "loss": 1.1104, "step": 613 }, { "epoch": 0.16329787234042553, "grad_norm": 4.6623101234436035, "learning_rate": 6.531914893617022e-06, "loss": 1.0165, "step": 614 }, { "epoch": 0.16356382978723405, "grad_norm": 4.874281883239746, "learning_rate": 6.542553191489362e-06, "loss": 1.3418, "step": 615 }, { "epoch": 0.16382978723404254, "grad_norm": 5.30225133895874, "learning_rate": 6.553191489361702e-06, "loss": 1.2965, "step": 616 }, { "epoch": 0.16409574468085106, "grad_norm": 5.1621880531311035, "learning_rate": 6.563829787234043e-06, "loss": 1.411, "step": 617 }, { "epoch": 0.16436170212765958, "grad_norm": 5.011656761169434, "learning_rate": 6.574468085106384e-06, "loss": 1.2324, "step": 618 }, { "epoch": 0.1646276595744681, "grad_norm": 4.633167743682861, "learning_rate": 6.585106382978724e-06, "loss": 1.2498, "step": 619 }, { "epoch": 0.16489361702127658, "grad_norm": 4.762227535247803, "learning_rate": 6.595744680851064e-06, "loss": 1.3774, "step": 620 }, { "epoch": 0.1651595744680851, "grad_norm": 4.581019401550293, "learning_rate": 6.606382978723405e-06, "loss": 1.2745, "step": 621 }, { "epoch": 0.16542553191489362, "grad_norm": 4.845024585723877, "learning_rate": 6.617021276595745e-06, "loss": 1.2003, "step": 622 }, { "epoch": 0.16569148936170214, "grad_norm": 4.555243015289307, "learning_rate": 6.627659574468086e-06, "loss": 1.265, "step": 623 }, { "epoch": 0.16595744680851063, "grad_norm": 4.3719987869262695, "learning_rate": 6.6382978723404254e-06, "loss": 1.2131, "step": 624 }, { "epoch": 0.16622340425531915, "grad_norm": 4.629434108734131, "learning_rate": 6.648936170212767e-06, "loss": 1.3491, "step": 625 }, { "epoch": 0.16648936170212766, "grad_norm": 5.0472540855407715, "learning_rate": 6.659574468085107e-06, "loss": 1.4119, "step": 626 }, { "epoch": 0.16675531914893618, "grad_norm": 4.784181594848633, "learning_rate": 6.670212765957448e-06, "loss": 1.3079, "step": 627 }, { "epoch": 0.16702127659574467, "grad_norm": 5.000133514404297, "learning_rate": 6.680851063829787e-06, "loss": 1.2378, "step": 628 }, { "epoch": 0.1672872340425532, "grad_norm": 4.911679267883301, "learning_rate": 6.6914893617021285e-06, "loss": 1.1824, "step": 629 }, { "epoch": 0.1675531914893617, "grad_norm": 4.674395561218262, "learning_rate": 6.702127659574469e-06, "loss": 1.1836, "step": 630 }, { "epoch": 0.16781914893617023, "grad_norm": 4.964152812957764, "learning_rate": 6.7127659574468094e-06, "loss": 1.2419, "step": 631 }, { "epoch": 0.16808510638297872, "grad_norm": 4.766603946685791, "learning_rate": 6.723404255319149e-06, "loss": 1.2885, "step": 632 }, { "epoch": 0.16835106382978723, "grad_norm": 4.679075241088867, "learning_rate": 6.7340425531914895e-06, "loss": 1.279, "step": 633 }, { "epoch": 0.16861702127659575, "grad_norm": 4.590879440307617, "learning_rate": 6.744680851063831e-06, "loss": 1.2808, "step": 634 }, { "epoch": 0.16888297872340424, "grad_norm": 4.539956092834473, "learning_rate": 6.75531914893617e-06, "loss": 1.3353, "step": 635 }, { "epoch": 0.16914893617021276, "grad_norm": 4.546907424926758, "learning_rate": 6.765957446808511e-06, "loss": 1.2691, "step": 636 }, { "epoch": 0.16941489361702128, "grad_norm": 4.260477066040039, "learning_rate": 6.776595744680851e-06, "loss": 1.313, "step": 637 }, { "epoch": 0.1696808510638298, "grad_norm": 4.697219371795654, "learning_rate": 6.787234042553193e-06, "loss": 1.131, "step": 638 }, { "epoch": 0.1699468085106383, "grad_norm": 4.471210479736328, "learning_rate": 6.797872340425532e-06, "loss": 1.1466, "step": 639 }, { "epoch": 0.1702127659574468, "grad_norm": 5.731024742126465, "learning_rate": 6.808510638297873e-06, "loss": 1.1923, "step": 640 }, { "epoch": 0.17047872340425532, "grad_norm": 4.853487491607666, "learning_rate": 6.819148936170213e-06, "loss": 1.3019, "step": 641 }, { "epoch": 0.17074468085106384, "grad_norm": 4.857687950134277, "learning_rate": 6.829787234042554e-06, "loss": 1.382, "step": 642 }, { "epoch": 0.17101063829787233, "grad_norm": 5.497145652770996, "learning_rate": 6.840425531914894e-06, "loss": 1.2611, "step": 643 }, { "epoch": 0.17127659574468085, "grad_norm": 4.852382659912109, "learning_rate": 6.8510638297872344e-06, "loss": 1.3002, "step": 644 }, { "epoch": 0.17154255319148937, "grad_norm": 4.891834259033203, "learning_rate": 6.861702127659575e-06, "loss": 1.3009, "step": 645 }, { "epoch": 0.17180851063829788, "grad_norm": 5.264189720153809, "learning_rate": 6.872340425531916e-06, "loss": 1.2047, "step": 646 }, { "epoch": 0.17207446808510637, "grad_norm": 4.408929347991943, "learning_rate": 6.882978723404256e-06, "loss": 1.4105, "step": 647 }, { "epoch": 0.1723404255319149, "grad_norm": 4.550996780395508, "learning_rate": 6.893617021276596e-06, "loss": 1.4495, "step": 648 }, { "epoch": 0.1726063829787234, "grad_norm": 4.704092025756836, "learning_rate": 6.904255319148937e-06, "loss": 1.2031, "step": 649 }, { "epoch": 0.17287234042553193, "grad_norm": 4.802618026733398, "learning_rate": 6.914893617021278e-06, "loss": 1.2879, "step": 650 }, { "epoch": 0.17313829787234042, "grad_norm": 4.637843608856201, "learning_rate": 6.925531914893618e-06, "loss": 1.2621, "step": 651 }, { "epoch": 0.17340425531914894, "grad_norm": 4.558661937713623, "learning_rate": 6.936170212765958e-06, "loss": 1.1671, "step": 652 }, { "epoch": 0.17367021276595745, "grad_norm": 4.981627464294434, "learning_rate": 6.9468085106382985e-06, "loss": 1.2137, "step": 653 }, { "epoch": 0.17393617021276594, "grad_norm": 4.708109378814697, "learning_rate": 6.957446808510638e-06, "loss": 1.1408, "step": 654 }, { "epoch": 0.17420212765957446, "grad_norm": 5.328996658325195, "learning_rate": 6.968085106382979e-06, "loss": 1.1697, "step": 655 }, { "epoch": 0.17446808510638298, "grad_norm": 4.988645553588867, "learning_rate": 6.97872340425532e-06, "loss": 1.2962, "step": 656 }, { "epoch": 0.1747340425531915, "grad_norm": 5.570682048797607, "learning_rate": 6.98936170212766e-06, "loss": 1.4083, "step": 657 }, { "epoch": 0.175, "grad_norm": 5.141003608703613, "learning_rate": 7e-06, "loss": 1.2558, "step": 658 }, { "epoch": 0.1752659574468085, "grad_norm": 4.548361778259277, "learning_rate": 7.010638297872341e-06, "loss": 1.2556, "step": 659 }, { "epoch": 0.17553191489361702, "grad_norm": 4.381852149963379, "learning_rate": 7.021276595744682e-06, "loss": 1.3609, "step": 660 }, { "epoch": 0.17579787234042554, "grad_norm": 4.388241767883301, "learning_rate": 7.031914893617022e-06, "loss": 1.2165, "step": 661 }, { "epoch": 0.17606382978723403, "grad_norm": 4.472124099731445, "learning_rate": 7.042553191489362e-06, "loss": 1.3372, "step": 662 }, { "epoch": 0.17632978723404255, "grad_norm": 4.284490585327148, "learning_rate": 7.053191489361703e-06, "loss": 1.1206, "step": 663 }, { "epoch": 0.17659574468085107, "grad_norm": 4.448127269744873, "learning_rate": 7.0638297872340434e-06, "loss": 1.3206, "step": 664 }, { "epoch": 0.1768617021276596, "grad_norm": 4.701923847198486, "learning_rate": 7.074468085106384e-06, "loss": 1.1289, "step": 665 }, { "epoch": 0.17712765957446808, "grad_norm": 4.249335289001465, "learning_rate": 7.0851063829787235e-06, "loss": 1.136, "step": 666 }, { "epoch": 0.1773936170212766, "grad_norm": 4.292792320251465, "learning_rate": 7.095744680851065e-06, "loss": 1.1827, "step": 667 }, { "epoch": 0.1776595744680851, "grad_norm": 4.595381736755371, "learning_rate": 7.106382978723405e-06, "loss": 1.1449, "step": 668 }, { "epoch": 0.1779255319148936, "grad_norm": 4.856510162353516, "learning_rate": 7.117021276595745e-06, "loss": 1.2378, "step": 669 }, { "epoch": 0.17819148936170212, "grad_norm": 4.735593318939209, "learning_rate": 7.127659574468085e-06, "loss": 1.1641, "step": 670 }, { "epoch": 0.17845744680851064, "grad_norm": 4.771074295043945, "learning_rate": 7.138297872340426e-06, "loss": 1.33, "step": 671 }, { "epoch": 0.17872340425531916, "grad_norm": 4.873645782470703, "learning_rate": 7.148936170212767e-06, "loss": 1.3388, "step": 672 }, { "epoch": 0.17898936170212765, "grad_norm": 4.672497749328613, "learning_rate": 7.159574468085107e-06, "loss": 1.3479, "step": 673 }, { "epoch": 0.17925531914893617, "grad_norm": 4.454950332641602, "learning_rate": 7.170212765957447e-06, "loss": 1.3631, "step": 674 }, { "epoch": 0.17952127659574468, "grad_norm": 5.085921764373779, "learning_rate": 7.1808510638297875e-06, "loss": 1.4711, "step": 675 }, { "epoch": 0.1797872340425532, "grad_norm": 4.528400421142578, "learning_rate": 7.191489361702129e-06, "loss": 1.1868, "step": 676 }, { "epoch": 0.1800531914893617, "grad_norm": 4.722430229187012, "learning_rate": 7.2021276595744684e-06, "loss": 1.3842, "step": 677 }, { "epoch": 0.1803191489361702, "grad_norm": 4.894054889678955, "learning_rate": 7.212765957446809e-06, "loss": 1.4365, "step": 678 }, { "epoch": 0.18058510638297873, "grad_norm": 4.8365559577941895, "learning_rate": 7.223404255319149e-06, "loss": 1.4409, "step": 679 }, { "epoch": 0.18085106382978725, "grad_norm": 5.0071916580200195, "learning_rate": 7.234042553191491e-06, "loss": 1.214, "step": 680 }, { "epoch": 0.18111702127659574, "grad_norm": 4.514876365661621, "learning_rate": 7.24468085106383e-06, "loss": 1.1646, "step": 681 }, { "epoch": 0.18138297872340425, "grad_norm": 4.465925693511963, "learning_rate": 7.255319148936171e-06, "loss": 1.2662, "step": 682 }, { "epoch": 0.18164893617021277, "grad_norm": 4.698017120361328, "learning_rate": 7.265957446808511e-06, "loss": 1.3683, "step": 683 }, { "epoch": 0.1819148936170213, "grad_norm": 4.704659461975098, "learning_rate": 7.2765957446808524e-06, "loss": 1.2236, "step": 684 }, { "epoch": 0.18218085106382978, "grad_norm": 4.9184675216674805, "learning_rate": 7.287234042553192e-06, "loss": 1.1904, "step": 685 }, { "epoch": 0.1824468085106383, "grad_norm": 4.5409088134765625, "learning_rate": 7.2978723404255325e-06, "loss": 1.2257, "step": 686 }, { "epoch": 0.18271276595744682, "grad_norm": 4.9037556648254395, "learning_rate": 7.308510638297873e-06, "loss": 1.31, "step": 687 }, { "epoch": 0.1829787234042553, "grad_norm": 4.719064235687256, "learning_rate": 7.3191489361702125e-06, "loss": 1.2651, "step": 688 }, { "epoch": 0.18324468085106382, "grad_norm": 4.5164971351623535, "learning_rate": 7.329787234042554e-06, "loss": 1.306, "step": 689 }, { "epoch": 0.18351063829787234, "grad_norm": 4.281124591827393, "learning_rate": 7.340425531914894e-06, "loss": 1.1963, "step": 690 }, { "epoch": 0.18377659574468086, "grad_norm": 4.6168951988220215, "learning_rate": 7.351063829787235e-06, "loss": 1.2118, "step": 691 }, { "epoch": 0.18404255319148935, "grad_norm": 4.85908842086792, "learning_rate": 7.361702127659574e-06, "loss": 1.2587, "step": 692 }, { "epoch": 0.18430851063829787, "grad_norm": 4.3025336265563965, "learning_rate": 7.372340425531916e-06, "loss": 1.1239, "step": 693 }, { "epoch": 0.18457446808510639, "grad_norm": 4.3702311515808105, "learning_rate": 7.382978723404256e-06, "loss": 1.0654, "step": 694 }, { "epoch": 0.1848404255319149, "grad_norm": 4.243852615356445, "learning_rate": 7.3936170212765965e-06, "loss": 1.2725, "step": 695 }, { "epoch": 0.1851063829787234, "grad_norm": 4.241601467132568, "learning_rate": 7.404255319148936e-06, "loss": 1.1379, "step": 696 }, { "epoch": 0.1853723404255319, "grad_norm": 4.863661766052246, "learning_rate": 7.4148936170212774e-06, "loss": 1.2644, "step": 697 }, { "epoch": 0.18563829787234043, "grad_norm": 4.637073040008545, "learning_rate": 7.425531914893618e-06, "loss": 1.3296, "step": 698 }, { "epoch": 0.18590425531914895, "grad_norm": 4.703394889831543, "learning_rate": 7.436170212765958e-06, "loss": 1.3016, "step": 699 }, { "epoch": 0.18617021276595744, "grad_norm": 4.478874206542969, "learning_rate": 7.446808510638298e-06, "loss": 1.3163, "step": 700 }, { "epoch": 0.18643617021276596, "grad_norm": 4.600717067718506, "learning_rate": 7.457446808510639e-06, "loss": 1.3648, "step": 701 }, { "epoch": 0.18670212765957447, "grad_norm": 4.729065418243408, "learning_rate": 7.46808510638298e-06, "loss": 1.3604, "step": 702 }, { "epoch": 0.186968085106383, "grad_norm": 4.127298831939697, "learning_rate": 7.47872340425532e-06, "loss": 1.153, "step": 703 }, { "epoch": 0.18723404255319148, "grad_norm": 4.612214088439941, "learning_rate": 7.48936170212766e-06, "loss": 1.2951, "step": 704 }, { "epoch": 0.1875, "grad_norm": 5.011428356170654, "learning_rate": 7.500000000000001e-06, "loss": 1.4121, "step": 705 }, { "epoch": 0.18776595744680852, "grad_norm": 4.605989933013916, "learning_rate": 7.5106382978723415e-06, "loss": 1.262, "step": 706 }, { "epoch": 0.188031914893617, "grad_norm": 5.028648853302002, "learning_rate": 7.521276595744681e-06, "loss": 1.4181, "step": 707 }, { "epoch": 0.18829787234042553, "grad_norm": 4.571159839630127, "learning_rate": 7.5319148936170215e-06, "loss": 1.2364, "step": 708 }, { "epoch": 0.18856382978723404, "grad_norm": 4.608417510986328, "learning_rate": 7.542553191489362e-06, "loss": 1.3094, "step": 709 }, { "epoch": 0.18882978723404256, "grad_norm": 4.881725311279297, "learning_rate": 7.553191489361703e-06, "loss": 1.313, "step": 710 }, { "epoch": 0.18909574468085105, "grad_norm": 4.912058353424072, "learning_rate": 7.563829787234043e-06, "loss": 1.392, "step": 711 }, { "epoch": 0.18936170212765957, "grad_norm": 4.419525623321533, "learning_rate": 7.574468085106383e-06, "loss": 1.2366, "step": 712 }, { "epoch": 0.1896276595744681, "grad_norm": 4.507438659667969, "learning_rate": 7.585106382978724e-06, "loss": 1.2404, "step": 713 }, { "epoch": 0.1898936170212766, "grad_norm": 4.561898708343506, "learning_rate": 7.595744680851065e-06, "loss": 1.3596, "step": 714 }, { "epoch": 0.1901595744680851, "grad_norm": 4.635844707489014, "learning_rate": 7.606382978723405e-06, "loss": 1.2898, "step": 715 }, { "epoch": 0.19042553191489361, "grad_norm": 5.374488353729248, "learning_rate": 7.617021276595745e-06, "loss": 1.3445, "step": 716 }, { "epoch": 0.19069148936170213, "grad_norm": 4.574670314788818, "learning_rate": 7.627659574468086e-06, "loss": 1.2414, "step": 717 }, { "epoch": 0.19095744680851065, "grad_norm": 4.509703159332275, "learning_rate": 7.638297872340426e-06, "loss": 1.1649, "step": 718 }, { "epoch": 0.19122340425531914, "grad_norm": 4.2057929039001465, "learning_rate": 7.648936170212766e-06, "loss": 1.3734, "step": 719 }, { "epoch": 0.19148936170212766, "grad_norm": 4.571545124053955, "learning_rate": 7.659574468085107e-06, "loss": 1.2722, "step": 720 }, { "epoch": 0.19175531914893618, "grad_norm": 4.561543941497803, "learning_rate": 7.670212765957448e-06, "loss": 1.4057, "step": 721 }, { "epoch": 0.1920212765957447, "grad_norm": 4.365459442138672, "learning_rate": 7.680851063829788e-06, "loss": 1.2348, "step": 722 }, { "epoch": 0.19228723404255318, "grad_norm": 4.416993141174316, "learning_rate": 7.691489361702127e-06, "loss": 1.3065, "step": 723 }, { "epoch": 0.1925531914893617, "grad_norm": 4.762002944946289, "learning_rate": 7.702127659574469e-06, "loss": 1.3231, "step": 724 }, { "epoch": 0.19281914893617022, "grad_norm": 5.0312604904174805, "learning_rate": 7.71276595744681e-06, "loss": 1.3851, "step": 725 }, { "epoch": 0.1930851063829787, "grad_norm": 4.8303046226501465, "learning_rate": 7.72340425531915e-06, "loss": 1.3391, "step": 726 }, { "epoch": 0.19335106382978723, "grad_norm": 5.312425136566162, "learning_rate": 7.73404255319149e-06, "loss": 1.3422, "step": 727 }, { "epoch": 0.19361702127659575, "grad_norm": 4.574582576751709, "learning_rate": 7.74468085106383e-06, "loss": 1.2543, "step": 728 }, { "epoch": 0.19388297872340426, "grad_norm": 4.735869884490967, "learning_rate": 7.755319148936172e-06, "loss": 1.427, "step": 729 }, { "epoch": 0.19414893617021275, "grad_norm": 4.317601203918457, "learning_rate": 7.765957446808511e-06, "loss": 1.221, "step": 730 }, { "epoch": 0.19441489361702127, "grad_norm": 4.69275426864624, "learning_rate": 7.776595744680851e-06, "loss": 1.2186, "step": 731 }, { "epoch": 0.1946808510638298, "grad_norm": 4.865464210510254, "learning_rate": 7.787234042553192e-06, "loss": 1.3243, "step": 732 }, { "epoch": 0.1949468085106383, "grad_norm": 4.288273811340332, "learning_rate": 7.797872340425534e-06, "loss": 1.2224, "step": 733 }, { "epoch": 0.1952127659574468, "grad_norm": 4.230968475341797, "learning_rate": 7.808510638297873e-06, "loss": 1.1869, "step": 734 }, { "epoch": 0.19547872340425532, "grad_norm": 5.056215286254883, "learning_rate": 7.819148936170213e-06, "loss": 1.2755, "step": 735 }, { "epoch": 0.19574468085106383, "grad_norm": 4.373525142669678, "learning_rate": 7.829787234042554e-06, "loss": 1.2649, "step": 736 }, { "epoch": 0.19601063829787235, "grad_norm": 4.4216179847717285, "learning_rate": 7.840425531914895e-06, "loss": 1.2578, "step": 737 }, { "epoch": 0.19627659574468084, "grad_norm": 4.517039775848389, "learning_rate": 7.851063829787235e-06, "loss": 1.1759, "step": 738 }, { "epoch": 0.19654255319148936, "grad_norm": 4.973018169403076, "learning_rate": 7.861702127659575e-06, "loss": 1.2073, "step": 739 }, { "epoch": 0.19680851063829788, "grad_norm": 4.714282035827637, "learning_rate": 7.872340425531916e-06, "loss": 1.3551, "step": 740 }, { "epoch": 0.1970744680851064, "grad_norm": 4.824267387390137, "learning_rate": 7.882978723404257e-06, "loss": 1.287, "step": 741 }, { "epoch": 0.1973404255319149, "grad_norm": 4.343824863433838, "learning_rate": 7.893617021276597e-06, "loss": 1.1736, "step": 742 }, { "epoch": 0.1976063829787234, "grad_norm": 5.130711555480957, "learning_rate": 7.904255319148936e-06, "loss": 1.3622, "step": 743 }, { "epoch": 0.19787234042553192, "grad_norm": 4.943610191345215, "learning_rate": 7.914893617021278e-06, "loss": 1.2538, "step": 744 }, { "epoch": 0.1981382978723404, "grad_norm": 4.978169918060303, "learning_rate": 7.925531914893617e-06, "loss": 1.2547, "step": 745 }, { "epoch": 0.19840425531914893, "grad_norm": 4.933815956115723, "learning_rate": 7.936170212765959e-06, "loss": 1.3827, "step": 746 }, { "epoch": 0.19867021276595745, "grad_norm": 4.288017272949219, "learning_rate": 7.946808510638298e-06, "loss": 1.2695, "step": 747 }, { "epoch": 0.19893617021276597, "grad_norm": 4.4305267333984375, "learning_rate": 7.95744680851064e-06, "loss": 1.1459, "step": 748 }, { "epoch": 0.19920212765957446, "grad_norm": 4.959934711456299, "learning_rate": 7.968085106382979e-06, "loss": 1.1793, "step": 749 }, { "epoch": 0.19946808510638298, "grad_norm": 4.623016834259033, "learning_rate": 7.97872340425532e-06, "loss": 1.2508, "step": 750 }, { "epoch": 0.1997340425531915, "grad_norm": 4.426565170288086, "learning_rate": 7.98936170212766e-06, "loss": 1.2464, "step": 751 }, { "epoch": 0.2, "grad_norm": 4.914389610290527, "learning_rate": 8.000000000000001e-06, "loss": 1.2941, "step": 752 }, { "epoch": 0.2002659574468085, "grad_norm": 4.474592685699463, "learning_rate": 8.010638297872341e-06, "loss": 1.2285, "step": 753 }, { "epoch": 0.20053191489361702, "grad_norm": 4.237037181854248, "learning_rate": 8.021276595744682e-06, "loss": 1.3422, "step": 754 }, { "epoch": 0.20079787234042554, "grad_norm": 4.545922756195068, "learning_rate": 8.031914893617022e-06, "loss": 1.2456, "step": 755 }, { "epoch": 0.20106382978723406, "grad_norm": 4.951487064361572, "learning_rate": 8.042553191489363e-06, "loss": 1.3001, "step": 756 }, { "epoch": 0.20132978723404255, "grad_norm": 5.056552886962891, "learning_rate": 8.053191489361703e-06, "loss": 1.3875, "step": 757 }, { "epoch": 0.20159574468085106, "grad_norm": 4.5373101234436035, "learning_rate": 8.063829787234044e-06, "loss": 1.2855, "step": 758 }, { "epoch": 0.20186170212765958, "grad_norm": 4.698331832885742, "learning_rate": 8.074468085106384e-06, "loss": 1.1841, "step": 759 }, { "epoch": 0.20212765957446807, "grad_norm": 4.885603904724121, "learning_rate": 8.085106382978723e-06, "loss": 1.2843, "step": 760 }, { "epoch": 0.2023936170212766, "grad_norm": 4.819825172424316, "learning_rate": 8.095744680851065e-06, "loss": 1.2908, "step": 761 }, { "epoch": 0.2026595744680851, "grad_norm": 4.332822799682617, "learning_rate": 8.106382978723404e-06, "loss": 1.1986, "step": 762 }, { "epoch": 0.20292553191489363, "grad_norm": 4.102404594421387, "learning_rate": 8.117021276595745e-06, "loss": 1.3478, "step": 763 }, { "epoch": 0.20319148936170212, "grad_norm": 4.496637344360352, "learning_rate": 8.127659574468085e-06, "loss": 1.265, "step": 764 }, { "epoch": 0.20345744680851063, "grad_norm": 4.544750690460205, "learning_rate": 8.138297872340426e-06, "loss": 1.2299, "step": 765 }, { "epoch": 0.20372340425531915, "grad_norm": 4.774095058441162, "learning_rate": 8.148936170212766e-06, "loss": 1.3596, "step": 766 }, { "epoch": 0.20398936170212767, "grad_norm": 4.508190155029297, "learning_rate": 8.159574468085107e-06, "loss": 1.3143, "step": 767 }, { "epoch": 0.20425531914893616, "grad_norm": 4.832380771636963, "learning_rate": 8.170212765957447e-06, "loss": 1.2449, "step": 768 }, { "epoch": 0.20452127659574468, "grad_norm": 4.282026290893555, "learning_rate": 8.180851063829788e-06, "loss": 1.199, "step": 769 }, { "epoch": 0.2047872340425532, "grad_norm": 4.594806671142578, "learning_rate": 8.191489361702128e-06, "loss": 1.2466, "step": 770 }, { "epoch": 0.2050531914893617, "grad_norm": 4.925674915313721, "learning_rate": 8.202127659574469e-06, "loss": 1.2771, "step": 771 }, { "epoch": 0.2053191489361702, "grad_norm": 4.634965419769287, "learning_rate": 8.212765957446809e-06, "loss": 1.2511, "step": 772 }, { "epoch": 0.20558510638297872, "grad_norm": 4.774378776550293, "learning_rate": 8.22340425531915e-06, "loss": 1.1902, "step": 773 }, { "epoch": 0.20585106382978724, "grad_norm": 4.943484783172607, "learning_rate": 8.23404255319149e-06, "loss": 1.454, "step": 774 }, { "epoch": 0.20611702127659576, "grad_norm": 4.800187587738037, "learning_rate": 8.24468085106383e-06, "loss": 1.3709, "step": 775 }, { "epoch": 0.20638297872340425, "grad_norm": 5.566744327545166, "learning_rate": 8.25531914893617e-06, "loss": 1.3158, "step": 776 }, { "epoch": 0.20664893617021277, "grad_norm": 4.241647720336914, "learning_rate": 8.265957446808512e-06, "loss": 1.3173, "step": 777 }, { "epoch": 0.20691489361702128, "grad_norm": 4.561349868774414, "learning_rate": 8.276595744680851e-06, "loss": 1.1971, "step": 778 }, { "epoch": 0.20718085106382977, "grad_norm": 4.4153828620910645, "learning_rate": 8.287234042553191e-06, "loss": 1.2479, "step": 779 }, { "epoch": 0.2074468085106383, "grad_norm": 4.6610107421875, "learning_rate": 8.297872340425532e-06, "loss": 1.5759, "step": 780 }, { "epoch": 0.2077127659574468, "grad_norm": 5.142064094543457, "learning_rate": 8.308510638297874e-06, "loss": 1.3802, "step": 781 }, { "epoch": 0.20797872340425533, "grad_norm": 4.54619026184082, "learning_rate": 8.319148936170213e-06, "loss": 1.3185, "step": 782 }, { "epoch": 0.20824468085106382, "grad_norm": 4.640912055969238, "learning_rate": 8.329787234042553e-06, "loss": 1.2491, "step": 783 }, { "epoch": 0.20851063829787234, "grad_norm": 4.866705894470215, "learning_rate": 8.340425531914894e-06, "loss": 1.28, "step": 784 }, { "epoch": 0.20877659574468085, "grad_norm": 4.362489700317383, "learning_rate": 8.351063829787235e-06, "loss": 1.3603, "step": 785 }, { "epoch": 0.20904255319148937, "grad_norm": 4.756308078765869, "learning_rate": 8.361702127659575e-06, "loss": 1.4108, "step": 786 }, { "epoch": 0.20930851063829786, "grad_norm": 4.564047813415527, "learning_rate": 8.372340425531915e-06, "loss": 1.3404, "step": 787 }, { "epoch": 0.20957446808510638, "grad_norm": 4.4327921867370605, "learning_rate": 8.382978723404256e-06, "loss": 1.2675, "step": 788 }, { "epoch": 0.2098404255319149, "grad_norm": 4.656761646270752, "learning_rate": 8.393617021276597e-06, "loss": 1.2601, "step": 789 }, { "epoch": 0.21010638297872342, "grad_norm": 4.353705883026123, "learning_rate": 8.404255319148937e-06, "loss": 1.2144, "step": 790 }, { "epoch": 0.2103723404255319, "grad_norm": 4.420286655426025, "learning_rate": 8.414893617021276e-06, "loss": 1.249, "step": 791 }, { "epoch": 0.21063829787234042, "grad_norm": 4.781008243560791, "learning_rate": 8.425531914893618e-06, "loss": 1.3132, "step": 792 }, { "epoch": 0.21090425531914894, "grad_norm": 5.137455463409424, "learning_rate": 8.436170212765959e-06, "loss": 1.2915, "step": 793 }, { "epoch": 0.21117021276595746, "grad_norm": 4.893155097961426, "learning_rate": 8.446808510638299e-06, "loss": 1.3679, "step": 794 }, { "epoch": 0.21143617021276595, "grad_norm": 4.635669708251953, "learning_rate": 8.457446808510638e-06, "loss": 1.3222, "step": 795 }, { "epoch": 0.21170212765957447, "grad_norm": 4.853140354156494, "learning_rate": 8.46808510638298e-06, "loss": 1.2849, "step": 796 }, { "epoch": 0.211968085106383, "grad_norm": 4.836693286895752, "learning_rate": 8.47872340425532e-06, "loss": 1.395, "step": 797 }, { "epoch": 0.21223404255319148, "grad_norm": 4.493725299835205, "learning_rate": 8.48936170212766e-06, "loss": 1.3197, "step": 798 }, { "epoch": 0.2125, "grad_norm": 5.088167190551758, "learning_rate": 8.5e-06, "loss": 1.4093, "step": 799 }, { "epoch": 0.2127659574468085, "grad_norm": 4.372249603271484, "learning_rate": 8.510638297872341e-06, "loss": 1.3612, "step": 800 }, { "epoch": 0.21303191489361703, "grad_norm": 4.2862420082092285, "learning_rate": 8.521276595744683e-06, "loss": 1.2227, "step": 801 }, { "epoch": 0.21329787234042552, "grad_norm": 4.741192817687988, "learning_rate": 8.531914893617022e-06, "loss": 1.2799, "step": 802 }, { "epoch": 0.21356382978723404, "grad_norm": 5.022809982299805, "learning_rate": 8.542553191489362e-06, "loss": 1.407, "step": 803 }, { "epoch": 0.21382978723404256, "grad_norm": 4.443842887878418, "learning_rate": 8.553191489361703e-06, "loss": 1.3346, "step": 804 }, { "epoch": 0.21409574468085107, "grad_norm": 4.133638858795166, "learning_rate": 8.563829787234044e-06, "loss": 1.2443, "step": 805 }, { "epoch": 0.21436170212765956, "grad_norm": 4.916075706481934, "learning_rate": 8.574468085106384e-06, "loss": 1.3503, "step": 806 }, { "epoch": 0.21462765957446808, "grad_norm": 4.634794235229492, "learning_rate": 8.585106382978724e-06, "loss": 1.4072, "step": 807 }, { "epoch": 0.2148936170212766, "grad_norm": 4.912757396697998, "learning_rate": 8.595744680851065e-06, "loss": 1.3311, "step": 808 }, { "epoch": 0.21515957446808512, "grad_norm": 5.202310085296631, "learning_rate": 8.606382978723406e-06, "loss": 1.3224, "step": 809 }, { "epoch": 0.2154255319148936, "grad_norm": 4.477729320526123, "learning_rate": 8.617021276595746e-06, "loss": 1.2806, "step": 810 }, { "epoch": 0.21569148936170213, "grad_norm": 4.493345260620117, "learning_rate": 8.627659574468085e-06, "loss": 1.0227, "step": 811 }, { "epoch": 0.21595744680851064, "grad_norm": 5.053197383880615, "learning_rate": 8.638297872340427e-06, "loss": 1.2941, "step": 812 }, { "epoch": 0.21622340425531916, "grad_norm": 4.492358684539795, "learning_rate": 8.648936170212768e-06, "loss": 1.2651, "step": 813 }, { "epoch": 0.21648936170212765, "grad_norm": 4.270611763000488, "learning_rate": 8.659574468085108e-06, "loss": 1.2417, "step": 814 }, { "epoch": 0.21675531914893617, "grad_norm": 4.236185073852539, "learning_rate": 8.670212765957447e-06, "loss": 1.1717, "step": 815 }, { "epoch": 0.2170212765957447, "grad_norm": 4.765509128570557, "learning_rate": 8.680851063829788e-06, "loss": 1.3134, "step": 816 }, { "epoch": 0.21728723404255318, "grad_norm": 5.146259784698486, "learning_rate": 8.691489361702128e-06, "loss": 1.4561, "step": 817 }, { "epoch": 0.2175531914893617, "grad_norm": 4.461063385009766, "learning_rate": 8.70212765957447e-06, "loss": 1.2138, "step": 818 }, { "epoch": 0.21781914893617021, "grad_norm": 4.676782608032227, "learning_rate": 8.712765957446809e-06, "loss": 1.2614, "step": 819 }, { "epoch": 0.21808510638297873, "grad_norm": 4.411204814910889, "learning_rate": 8.72340425531915e-06, "loss": 1.3142, "step": 820 }, { "epoch": 0.21835106382978722, "grad_norm": 4.208769798278809, "learning_rate": 8.73404255319149e-06, "loss": 1.4278, "step": 821 }, { "epoch": 0.21861702127659574, "grad_norm": 4.132145404815674, "learning_rate": 8.744680851063831e-06, "loss": 1.214, "step": 822 }, { "epoch": 0.21888297872340426, "grad_norm": 4.246182441711426, "learning_rate": 8.75531914893617e-06, "loss": 1.4079, "step": 823 }, { "epoch": 0.21914893617021278, "grad_norm": 4.859819412231445, "learning_rate": 8.765957446808512e-06, "loss": 1.2343, "step": 824 }, { "epoch": 0.21941489361702127, "grad_norm": 4.722071170806885, "learning_rate": 8.776595744680852e-06, "loss": 1.276, "step": 825 }, { "epoch": 0.21968085106382979, "grad_norm": 4.489323139190674, "learning_rate": 8.787234042553193e-06, "loss": 1.2388, "step": 826 }, { "epoch": 0.2199468085106383, "grad_norm": 4.459937572479248, "learning_rate": 8.797872340425533e-06, "loss": 1.1911, "step": 827 }, { "epoch": 0.22021276595744682, "grad_norm": 4.6483988761901855, "learning_rate": 8.808510638297874e-06, "loss": 1.5344, "step": 828 }, { "epoch": 0.2204787234042553, "grad_norm": 4.822110176086426, "learning_rate": 8.819148936170213e-06, "loss": 1.2885, "step": 829 }, { "epoch": 0.22074468085106383, "grad_norm": 4.722024917602539, "learning_rate": 8.829787234042555e-06, "loss": 1.2496, "step": 830 }, { "epoch": 0.22101063829787235, "grad_norm": 5.146275520324707, "learning_rate": 8.840425531914894e-06, "loss": 1.3017, "step": 831 }, { "epoch": 0.22127659574468084, "grad_norm": 4.489665508270264, "learning_rate": 8.851063829787234e-06, "loss": 1.1933, "step": 832 }, { "epoch": 0.22154255319148936, "grad_norm": 4.318885803222656, "learning_rate": 8.861702127659575e-06, "loss": 1.1849, "step": 833 }, { "epoch": 0.22180851063829787, "grad_norm": 4.603454113006592, "learning_rate": 8.872340425531915e-06, "loss": 1.3538, "step": 834 }, { "epoch": 0.2220744680851064, "grad_norm": 4.531906604766846, "learning_rate": 8.882978723404256e-06, "loss": 1.3913, "step": 835 }, { "epoch": 0.22234042553191488, "grad_norm": 4.391329288482666, "learning_rate": 8.893617021276596e-06, "loss": 1.289, "step": 836 }, { "epoch": 0.2226063829787234, "grad_norm": 5.546546459197998, "learning_rate": 8.904255319148937e-06, "loss": 1.2507, "step": 837 }, { "epoch": 0.22287234042553192, "grad_norm": 4.61740779876709, "learning_rate": 8.914893617021277e-06, "loss": 1.3726, "step": 838 }, { "epoch": 0.22313829787234044, "grad_norm": 4.953794479370117, "learning_rate": 8.925531914893618e-06, "loss": 1.2434, "step": 839 }, { "epoch": 0.22340425531914893, "grad_norm": 4.278190612792969, "learning_rate": 8.936170212765958e-06, "loss": 1.2559, "step": 840 }, { "epoch": 0.22367021276595744, "grad_norm": 4.941532135009766, "learning_rate": 8.946808510638299e-06, "loss": 1.3278, "step": 841 }, { "epoch": 0.22393617021276596, "grad_norm": 4.883002758026123, "learning_rate": 8.957446808510638e-06, "loss": 1.2537, "step": 842 }, { "epoch": 0.22420212765957448, "grad_norm": 4.7191619873046875, "learning_rate": 8.96808510638298e-06, "loss": 1.2726, "step": 843 }, { "epoch": 0.22446808510638297, "grad_norm": 4.509050369262695, "learning_rate": 8.97872340425532e-06, "loss": 1.2025, "step": 844 }, { "epoch": 0.2247340425531915, "grad_norm": 3.9332523345947266, "learning_rate": 8.98936170212766e-06, "loss": 1.1207, "step": 845 }, { "epoch": 0.225, "grad_norm": 4.3128204345703125, "learning_rate": 9e-06, "loss": 1.2433, "step": 846 }, { "epoch": 0.22526595744680852, "grad_norm": 4.253404140472412, "learning_rate": 9.010638297872342e-06, "loss": 1.2193, "step": 847 }, { "epoch": 0.225531914893617, "grad_norm": 4.779951572418213, "learning_rate": 9.021276595744681e-06, "loss": 1.2158, "step": 848 }, { "epoch": 0.22579787234042553, "grad_norm": 4.481555461883545, "learning_rate": 9.031914893617022e-06, "loss": 1.4551, "step": 849 }, { "epoch": 0.22606382978723405, "grad_norm": 4.955724239349365, "learning_rate": 9.042553191489362e-06, "loss": 1.4291, "step": 850 }, { "epoch": 0.22632978723404254, "grad_norm": 4.106208801269531, "learning_rate": 9.053191489361702e-06, "loss": 1.3655, "step": 851 }, { "epoch": 0.22659574468085106, "grad_norm": 4.6892499923706055, "learning_rate": 9.063829787234043e-06, "loss": 1.2516, "step": 852 }, { "epoch": 0.22686170212765958, "grad_norm": 4.553836822509766, "learning_rate": 9.074468085106384e-06, "loss": 1.2107, "step": 853 }, { "epoch": 0.2271276595744681, "grad_norm": 5.072434902191162, "learning_rate": 9.085106382978724e-06, "loss": 1.3445, "step": 854 }, { "epoch": 0.22739361702127658, "grad_norm": 4.725018501281738, "learning_rate": 9.095744680851063e-06, "loss": 1.2701, "step": 855 }, { "epoch": 0.2276595744680851, "grad_norm": 4.630471706390381, "learning_rate": 9.106382978723405e-06, "loss": 1.3229, "step": 856 }, { "epoch": 0.22792553191489362, "grad_norm": 4.0610880851745605, "learning_rate": 9.117021276595746e-06, "loss": 1.0857, "step": 857 }, { "epoch": 0.22819148936170214, "grad_norm": 4.523334503173828, "learning_rate": 9.127659574468086e-06, "loss": 1.446, "step": 858 }, { "epoch": 0.22845744680851063, "grad_norm": 5.042343616485596, "learning_rate": 9.138297872340425e-06, "loss": 1.3728, "step": 859 }, { "epoch": 0.22872340425531915, "grad_norm": 4.5774664878845215, "learning_rate": 9.148936170212767e-06, "loss": 1.3178, "step": 860 }, { "epoch": 0.22898936170212766, "grad_norm": 4.425473213195801, "learning_rate": 9.159574468085108e-06, "loss": 1.3412, "step": 861 }, { "epoch": 0.22925531914893618, "grad_norm": 4.738778114318848, "learning_rate": 9.170212765957447e-06, "loss": 1.3676, "step": 862 }, { "epoch": 0.22952127659574467, "grad_norm": 4.462982654571533, "learning_rate": 9.180851063829787e-06, "loss": 1.2755, "step": 863 }, { "epoch": 0.2297872340425532, "grad_norm": 4.682027816772461, "learning_rate": 9.191489361702128e-06, "loss": 1.2625, "step": 864 }, { "epoch": 0.2300531914893617, "grad_norm": 4.37489652633667, "learning_rate": 9.20212765957447e-06, "loss": 1.291, "step": 865 }, { "epoch": 0.23031914893617023, "grad_norm": 4.652685642242432, "learning_rate": 9.21276595744681e-06, "loss": 1.1782, "step": 866 }, { "epoch": 0.23058510638297872, "grad_norm": 4.401131629943848, "learning_rate": 9.223404255319149e-06, "loss": 1.2626, "step": 867 }, { "epoch": 0.23085106382978723, "grad_norm": 4.712587356567383, "learning_rate": 9.23404255319149e-06, "loss": 1.2888, "step": 868 }, { "epoch": 0.23111702127659575, "grad_norm": 4.425190448760986, "learning_rate": 9.244680851063831e-06, "loss": 1.2566, "step": 869 }, { "epoch": 0.23138297872340424, "grad_norm": 5.040404319763184, "learning_rate": 9.255319148936171e-06, "loss": 1.1856, "step": 870 }, { "epoch": 0.23164893617021276, "grad_norm": 4.372191905975342, "learning_rate": 9.26595744680851e-06, "loss": 1.3153, "step": 871 }, { "epoch": 0.23191489361702128, "grad_norm": 4.518852233886719, "learning_rate": 9.276595744680852e-06, "loss": 1.2652, "step": 872 }, { "epoch": 0.2321808510638298, "grad_norm": 5.675739288330078, "learning_rate": 9.287234042553193e-06, "loss": 1.2654, "step": 873 }, { "epoch": 0.2324468085106383, "grad_norm": 4.503605842590332, "learning_rate": 9.297872340425533e-06, "loss": 1.2693, "step": 874 }, { "epoch": 0.2327127659574468, "grad_norm": 4.573145866394043, "learning_rate": 9.308510638297872e-06, "loss": 1.3126, "step": 875 }, { "epoch": 0.23297872340425532, "grad_norm": 4.833911418914795, "learning_rate": 9.319148936170214e-06, "loss": 1.3583, "step": 876 }, { "epoch": 0.23324468085106384, "grad_norm": 4.768589496612549, "learning_rate": 9.329787234042555e-06, "loss": 1.273, "step": 877 }, { "epoch": 0.23351063829787233, "grad_norm": 4.1959638595581055, "learning_rate": 9.340425531914895e-06, "loss": 1.1774, "step": 878 }, { "epoch": 0.23377659574468085, "grad_norm": 4.231587886810303, "learning_rate": 9.351063829787234e-06, "loss": 1.3215, "step": 879 }, { "epoch": 0.23404255319148937, "grad_norm": 4.725379943847656, "learning_rate": 9.361702127659576e-06, "loss": 1.3458, "step": 880 }, { "epoch": 0.23430851063829788, "grad_norm": 4.831368446350098, "learning_rate": 9.372340425531917e-06, "loss": 1.3499, "step": 881 }, { "epoch": 0.23457446808510637, "grad_norm": 4.571084499359131, "learning_rate": 9.382978723404256e-06, "loss": 1.2071, "step": 882 }, { "epoch": 0.2348404255319149, "grad_norm": 4.676523208618164, "learning_rate": 9.393617021276596e-06, "loss": 1.3009, "step": 883 }, { "epoch": 0.2351063829787234, "grad_norm": 4.406195640563965, "learning_rate": 9.404255319148937e-06, "loss": 1.3127, "step": 884 }, { "epoch": 0.23537234042553193, "grad_norm": 4.958892822265625, "learning_rate": 9.414893617021279e-06, "loss": 1.3724, "step": 885 }, { "epoch": 0.23563829787234042, "grad_norm": 4.296865463256836, "learning_rate": 9.425531914893618e-06, "loss": 1.2535, "step": 886 }, { "epoch": 0.23590425531914894, "grad_norm": 4.650951862335205, "learning_rate": 9.436170212765958e-06, "loss": 1.2432, "step": 887 }, { "epoch": 0.23617021276595745, "grad_norm": 4.3874831199646, "learning_rate": 9.446808510638299e-06, "loss": 1.4075, "step": 888 }, { "epoch": 0.23643617021276594, "grad_norm": 4.246219158172607, "learning_rate": 9.457446808510639e-06, "loss": 1.2787, "step": 889 }, { "epoch": 0.23670212765957446, "grad_norm": 4.379426956176758, "learning_rate": 9.46808510638298e-06, "loss": 1.2586, "step": 890 }, { "epoch": 0.23696808510638298, "grad_norm": 4.164050102233887, "learning_rate": 9.47872340425532e-06, "loss": 1.3071, "step": 891 }, { "epoch": 0.2372340425531915, "grad_norm": 4.572608947753906, "learning_rate": 9.489361702127661e-06, "loss": 1.3735, "step": 892 }, { "epoch": 0.2375, "grad_norm": 4.812750339508057, "learning_rate": 9.5e-06, "loss": 1.3627, "step": 893 }, { "epoch": 0.2377659574468085, "grad_norm": 4.5463056564331055, "learning_rate": 9.510638297872342e-06, "loss": 1.2688, "step": 894 }, { "epoch": 0.23803191489361702, "grad_norm": 4.700718402862549, "learning_rate": 9.521276595744681e-06, "loss": 1.3242, "step": 895 }, { "epoch": 0.23829787234042554, "grad_norm": 4.626996040344238, "learning_rate": 9.531914893617023e-06, "loss": 1.3346, "step": 896 }, { "epoch": 0.23856382978723403, "grad_norm": 4.4340643882751465, "learning_rate": 9.542553191489362e-06, "loss": 1.266, "step": 897 }, { "epoch": 0.23882978723404255, "grad_norm": 4.288296222686768, "learning_rate": 9.553191489361704e-06, "loss": 1.3097, "step": 898 }, { "epoch": 0.23909574468085107, "grad_norm": 4.531320571899414, "learning_rate": 9.563829787234043e-06, "loss": 1.2607, "step": 899 }, { "epoch": 0.2393617021276596, "grad_norm": 4.4416985511779785, "learning_rate": 9.574468085106385e-06, "loss": 1.2443, "step": 900 }, { "epoch": 0.23962765957446808, "grad_norm": 4.752575397491455, "learning_rate": 9.585106382978724e-06, "loss": 1.263, "step": 901 }, { "epoch": 0.2398936170212766, "grad_norm": 4.418696403503418, "learning_rate": 9.595744680851065e-06, "loss": 1.4263, "step": 902 }, { "epoch": 0.2401595744680851, "grad_norm": 4.149245262145996, "learning_rate": 9.606382978723405e-06, "loss": 1.2097, "step": 903 }, { "epoch": 0.2404255319148936, "grad_norm": 4.261038303375244, "learning_rate": 9.617021276595745e-06, "loss": 1.284, "step": 904 }, { "epoch": 0.24069148936170212, "grad_norm": 4.526815414428711, "learning_rate": 9.627659574468086e-06, "loss": 1.2036, "step": 905 }, { "epoch": 0.24095744680851064, "grad_norm": 4.194947719573975, "learning_rate": 9.638297872340426e-06, "loss": 1.3215, "step": 906 }, { "epoch": 0.24122340425531916, "grad_norm": 4.903501987457275, "learning_rate": 9.648936170212767e-06, "loss": 1.2824, "step": 907 }, { "epoch": 0.24148936170212765, "grad_norm": 4.600060939788818, "learning_rate": 9.659574468085106e-06, "loss": 1.3283, "step": 908 }, { "epoch": 0.24175531914893617, "grad_norm": 4.43640661239624, "learning_rate": 9.670212765957448e-06, "loss": 1.2952, "step": 909 }, { "epoch": 0.24202127659574468, "grad_norm": 4.518085479736328, "learning_rate": 9.680851063829787e-06, "loss": 1.2436, "step": 910 }, { "epoch": 0.2422872340425532, "grad_norm": 4.508195877075195, "learning_rate": 9.691489361702129e-06, "loss": 1.448, "step": 911 }, { "epoch": 0.2425531914893617, "grad_norm": 4.132392406463623, "learning_rate": 9.702127659574468e-06, "loss": 1.2467, "step": 912 }, { "epoch": 0.2428191489361702, "grad_norm": 4.272422790527344, "learning_rate": 9.71276595744681e-06, "loss": 1.1718, "step": 913 }, { "epoch": 0.24308510638297873, "grad_norm": 3.7474145889282227, "learning_rate": 9.723404255319149e-06, "loss": 1.2312, "step": 914 }, { "epoch": 0.24335106382978725, "grad_norm": 4.318002700805664, "learning_rate": 9.73404255319149e-06, "loss": 1.2954, "step": 915 }, { "epoch": 0.24361702127659574, "grad_norm": 4.300724506378174, "learning_rate": 9.74468085106383e-06, "loss": 1.324, "step": 916 }, { "epoch": 0.24388297872340425, "grad_norm": 4.362585067749023, "learning_rate": 9.755319148936171e-06, "loss": 1.2939, "step": 917 }, { "epoch": 0.24414893617021277, "grad_norm": 4.705591678619385, "learning_rate": 9.765957446808511e-06, "loss": 1.3472, "step": 918 }, { "epoch": 0.2444148936170213, "grad_norm": 4.612809658050537, "learning_rate": 9.776595744680852e-06, "loss": 1.323, "step": 919 }, { "epoch": 0.24468085106382978, "grad_norm": 4.289991855621338, "learning_rate": 9.787234042553192e-06, "loss": 1.3352, "step": 920 }, { "epoch": 0.2449468085106383, "grad_norm": 4.43556022644043, "learning_rate": 9.797872340425533e-06, "loss": 1.2358, "step": 921 }, { "epoch": 0.24521276595744682, "grad_norm": 4.365429878234863, "learning_rate": 9.808510638297873e-06, "loss": 1.3711, "step": 922 }, { "epoch": 0.2454787234042553, "grad_norm": 4.680497646331787, "learning_rate": 9.819148936170212e-06, "loss": 1.3057, "step": 923 }, { "epoch": 0.24574468085106382, "grad_norm": 4.54257869720459, "learning_rate": 9.829787234042554e-06, "loss": 1.4173, "step": 924 }, { "epoch": 0.24601063829787234, "grad_norm": 4.676888465881348, "learning_rate": 9.840425531914895e-06, "loss": 1.386, "step": 925 }, { "epoch": 0.24627659574468086, "grad_norm": 4.417918682098389, "learning_rate": 9.851063829787235e-06, "loss": 1.4044, "step": 926 }, { "epoch": 0.24654255319148935, "grad_norm": 4.195037841796875, "learning_rate": 9.861702127659574e-06, "loss": 1.2735, "step": 927 }, { "epoch": 0.24680851063829787, "grad_norm": 4.587873935699463, "learning_rate": 9.872340425531915e-06, "loss": 1.2647, "step": 928 }, { "epoch": 0.24707446808510639, "grad_norm": 4.467301845550537, "learning_rate": 9.882978723404257e-06, "loss": 1.387, "step": 929 }, { "epoch": 0.2473404255319149, "grad_norm": 4.606912136077881, "learning_rate": 9.893617021276596e-06, "loss": 1.3188, "step": 930 }, { "epoch": 0.2476063829787234, "grad_norm": 4.470932483673096, "learning_rate": 9.904255319148936e-06, "loss": 1.3166, "step": 931 }, { "epoch": 0.2478723404255319, "grad_norm": 4.317614555358887, "learning_rate": 9.914893617021277e-06, "loss": 1.3514, "step": 932 }, { "epoch": 0.24813829787234043, "grad_norm": 4.443989276885986, "learning_rate": 9.925531914893619e-06, "loss": 1.2636, "step": 933 }, { "epoch": 0.24840425531914895, "grad_norm": 4.796088218688965, "learning_rate": 9.936170212765958e-06, "loss": 1.2652, "step": 934 }, { "epoch": 0.24867021276595744, "grad_norm": 4.967231750488281, "learning_rate": 9.946808510638298e-06, "loss": 1.4264, "step": 935 }, { "epoch": 0.24893617021276596, "grad_norm": 4.075037002563477, "learning_rate": 9.957446808510639e-06, "loss": 1.1912, "step": 936 }, { "epoch": 0.24920212765957447, "grad_norm": 4.505919933319092, "learning_rate": 9.96808510638298e-06, "loss": 1.3069, "step": 937 }, { "epoch": 0.249468085106383, "grad_norm": 4.194151878356934, "learning_rate": 9.97872340425532e-06, "loss": 1.3177, "step": 938 }, { "epoch": 0.24973404255319148, "grad_norm": 4.591639518737793, "learning_rate": 9.98936170212766e-06, "loss": 1.3742, "step": 939 }, { "epoch": 0.25, "grad_norm": 4.259275913238525, "learning_rate": 1e-05, "loss": 1.2802, "step": 940 }, { "epoch": 0.2502659574468085, "grad_norm": 5.042564392089844, "learning_rate": 9.999999922647056e-06, "loss": 1.3329, "step": 941 }, { "epoch": 0.25053191489361704, "grad_norm": 4.728914737701416, "learning_rate": 9.999999690588228e-06, "loss": 1.2498, "step": 942 }, { "epoch": 0.25079787234042555, "grad_norm": 4.191166877746582, "learning_rate": 9.999999303823525e-06, "loss": 1.3322, "step": 943 }, { "epoch": 0.251063829787234, "grad_norm": 4.627315044403076, "learning_rate": 9.999998762352953e-06, "loss": 1.4223, "step": 944 }, { "epoch": 0.25132978723404253, "grad_norm": 4.210728168487549, "learning_rate": 9.999998066176536e-06, "loss": 1.2534, "step": 945 }, { "epoch": 0.25159574468085105, "grad_norm": 4.210343837738037, "learning_rate": 9.99999721529429e-06, "loss": 1.2587, "step": 946 }, { "epoch": 0.25186170212765957, "grad_norm": 4.43513298034668, "learning_rate": 9.999996209706243e-06, "loss": 1.2222, "step": 947 }, { "epoch": 0.2521276595744681, "grad_norm": 4.577609539031982, "learning_rate": 9.999995049412428e-06, "loss": 1.3063, "step": 948 }, { "epoch": 0.2523936170212766, "grad_norm": 4.520708084106445, "learning_rate": 9.99999373441288e-06, "loss": 1.2357, "step": 949 }, { "epoch": 0.2526595744680851, "grad_norm": 4.051931858062744, "learning_rate": 9.999992264707636e-06, "loss": 1.265, "step": 950 }, { "epoch": 0.25292553191489364, "grad_norm": 4.30267333984375, "learning_rate": 9.999990640296747e-06, "loss": 1.1791, "step": 951 }, { "epoch": 0.2531914893617021, "grad_norm": 4.397022724151611, "learning_rate": 9.99998886118026e-06, "loss": 1.2239, "step": 952 }, { "epoch": 0.2534574468085106, "grad_norm": 4.552164077758789, "learning_rate": 9.999986927358231e-06, "loss": 1.3983, "step": 953 }, { "epoch": 0.25372340425531914, "grad_norm": 4.569587707519531, "learning_rate": 9.999984838830721e-06, "loss": 1.3307, "step": 954 }, { "epoch": 0.25398936170212766, "grad_norm": 4.352025985717773, "learning_rate": 9.999982595597793e-06, "loss": 1.3996, "step": 955 }, { "epoch": 0.2542553191489362, "grad_norm": 4.358248710632324, "learning_rate": 9.999980197659515e-06, "loss": 1.4166, "step": 956 }, { "epoch": 0.2545212765957447, "grad_norm": 4.449854373931885, "learning_rate": 9.999977645015963e-06, "loss": 1.2414, "step": 957 }, { "epoch": 0.2547872340425532, "grad_norm": 4.66248083114624, "learning_rate": 9.999974937667217e-06, "loss": 1.2852, "step": 958 }, { "epoch": 0.2550531914893617, "grad_norm": 4.217624187469482, "learning_rate": 9.99997207561336e-06, "loss": 1.2624, "step": 959 }, { "epoch": 0.2553191489361702, "grad_norm": 4.449913501739502, "learning_rate": 9.99996905885448e-06, "loss": 1.2733, "step": 960 }, { "epoch": 0.2555851063829787, "grad_norm": 3.9325287342071533, "learning_rate": 9.99996588739067e-06, "loss": 1.2253, "step": 961 }, { "epoch": 0.25585106382978723, "grad_norm": 4.425497531890869, "learning_rate": 9.99996256122203e-06, "loss": 1.1233, "step": 962 }, { "epoch": 0.25611702127659575, "grad_norm": 3.946796178817749, "learning_rate": 9.99995908034866e-06, "loss": 1.2961, "step": 963 }, { "epoch": 0.25638297872340426, "grad_norm": 4.145402431488037, "learning_rate": 9.999955444770671e-06, "loss": 1.3856, "step": 964 }, { "epoch": 0.2566489361702128, "grad_norm": 4.4032206535339355, "learning_rate": 9.99995165448817e-06, "loss": 1.3649, "step": 965 }, { "epoch": 0.2569148936170213, "grad_norm": 4.492345333099365, "learning_rate": 9.999947709501282e-06, "loss": 1.2992, "step": 966 }, { "epoch": 0.25718085106382976, "grad_norm": 4.298032760620117, "learning_rate": 9.999943609810125e-06, "loss": 1.3756, "step": 967 }, { "epoch": 0.2574468085106383, "grad_norm": 3.9896862506866455, "learning_rate": 9.999939355414825e-06, "loss": 1.2034, "step": 968 }, { "epoch": 0.2577127659574468, "grad_norm": 4.537227630615234, "learning_rate": 9.999934946315516e-06, "loss": 1.2959, "step": 969 }, { "epoch": 0.2579787234042553, "grad_norm": 4.087522029876709, "learning_rate": 9.999930382512331e-06, "loss": 1.2928, "step": 970 }, { "epoch": 0.25824468085106383, "grad_norm": 4.388976573944092, "learning_rate": 9.999925664005415e-06, "loss": 1.2452, "step": 971 }, { "epoch": 0.25851063829787235, "grad_norm": 4.264836311340332, "learning_rate": 9.99992079079491e-06, "loss": 1.3477, "step": 972 }, { "epoch": 0.25877659574468087, "grad_norm": 4.548455715179443, "learning_rate": 9.999915762880971e-06, "loss": 1.2818, "step": 973 }, { "epoch": 0.2590425531914894, "grad_norm": 4.096053600311279, "learning_rate": 9.99991058026375e-06, "loss": 1.1407, "step": 974 }, { "epoch": 0.25930851063829785, "grad_norm": 4.8142571449279785, "learning_rate": 9.99990524294341e-06, "loss": 1.5322, "step": 975 }, { "epoch": 0.25957446808510637, "grad_norm": 4.194404602050781, "learning_rate": 9.999899750920115e-06, "loss": 1.2874, "step": 976 }, { "epoch": 0.2598404255319149, "grad_norm": 3.905287504196167, "learning_rate": 9.999894104194037e-06, "loss": 1.1986, "step": 977 }, { "epoch": 0.2601063829787234, "grad_norm": 4.401111602783203, "learning_rate": 9.999888302765347e-06, "loss": 1.2148, "step": 978 }, { "epoch": 0.2603723404255319, "grad_norm": 4.558286666870117, "learning_rate": 9.999882346634225e-06, "loss": 1.247, "step": 979 }, { "epoch": 0.26063829787234044, "grad_norm": 3.902086019515991, "learning_rate": 9.999876235800859e-06, "loss": 1.3395, "step": 980 }, { "epoch": 0.26090425531914896, "grad_norm": 4.327469825744629, "learning_rate": 9.999869970265434e-06, "loss": 1.301, "step": 981 }, { "epoch": 0.2611702127659574, "grad_norm": 4.4269609451293945, "learning_rate": 9.999863550028147e-06, "loss": 1.3436, "step": 982 }, { "epoch": 0.26143617021276594, "grad_norm": 4.277595520019531, "learning_rate": 9.999856975089193e-06, "loss": 1.3487, "step": 983 }, { "epoch": 0.26170212765957446, "grad_norm": 5.5637311935424805, "learning_rate": 9.99985024544878e-06, "loss": 1.3848, "step": 984 }, { "epoch": 0.261968085106383, "grad_norm": 4.938830852508545, "learning_rate": 9.999843361107111e-06, "loss": 1.2637, "step": 985 }, { "epoch": 0.2622340425531915, "grad_norm": 4.1854376792907715, "learning_rate": 9.999836322064404e-06, "loss": 1.2802, "step": 986 }, { "epoch": 0.2625, "grad_norm": 4.120711803436279, "learning_rate": 9.999829128320873e-06, "loss": 1.2468, "step": 987 }, { "epoch": 0.26276595744680853, "grad_norm": 4.207146167755127, "learning_rate": 9.999821779876744e-06, "loss": 1.2662, "step": 988 }, { "epoch": 0.26303191489361705, "grad_norm": 4.666594505310059, "learning_rate": 9.999814276732242e-06, "loss": 1.3755, "step": 989 }, { "epoch": 0.2632978723404255, "grad_norm": 4.344621181488037, "learning_rate": 9.9998066188876e-06, "loss": 1.3096, "step": 990 }, { "epoch": 0.263563829787234, "grad_norm": 4.433095455169678, "learning_rate": 9.999798806343055e-06, "loss": 1.3499, "step": 991 }, { "epoch": 0.26382978723404255, "grad_norm": 4.92564058303833, "learning_rate": 9.999790839098847e-06, "loss": 1.281, "step": 992 }, { "epoch": 0.26409574468085106, "grad_norm": 4.6375603675842285, "learning_rate": 9.999782717155225e-06, "loss": 1.3261, "step": 993 }, { "epoch": 0.2643617021276596, "grad_norm": 4.372560024261475, "learning_rate": 9.999774440512438e-06, "loss": 1.186, "step": 994 }, { "epoch": 0.2646276595744681, "grad_norm": 4.910377502441406, "learning_rate": 9.999766009170743e-06, "loss": 1.4187, "step": 995 }, { "epoch": 0.2648936170212766, "grad_norm": 4.599401473999023, "learning_rate": 9.999757423130402e-06, "loss": 1.4278, "step": 996 }, { "epoch": 0.2651595744680851, "grad_norm": 4.204658508300781, "learning_rate": 9.999748682391682e-06, "loss": 1.3376, "step": 997 }, { "epoch": 0.2654255319148936, "grad_norm": 4.476613998413086, "learning_rate": 9.999739786954849e-06, "loss": 1.1909, "step": 998 }, { "epoch": 0.2656914893617021, "grad_norm": 4.173623561859131, "learning_rate": 9.999730736820182e-06, "loss": 1.2678, "step": 999 }, { "epoch": 0.26595744680851063, "grad_norm": 4.294970989227295, "learning_rate": 9.999721531987958e-06, "loss": 1.224, "step": 1000 }, { "epoch": 0.26595744680851063, "eval_loss": 1.3182601928710938, "eval_runtime": 12.5838, "eval_samples_per_second": 31.787, "eval_steps_per_second": 3.973, "step": 1000 }, { "epoch": 0.26622340425531915, "grad_norm": 4.1402411460876465, "learning_rate": 9.999712172458462e-06, "loss": 1.1836, "step": 1001 }, { "epoch": 0.26648936170212767, "grad_norm": 5.045607566833496, "learning_rate": 9.999702658231987e-06, "loss": 1.2545, "step": 1002 }, { "epoch": 0.2667553191489362, "grad_norm": 4.2975921630859375, "learning_rate": 9.999692989308827e-06, "loss": 1.4903, "step": 1003 }, { "epoch": 0.2670212765957447, "grad_norm": 4.366122245788574, "learning_rate": 9.999683165689277e-06, "loss": 1.3197, "step": 1004 }, { "epoch": 0.26728723404255317, "grad_norm": 4.20319938659668, "learning_rate": 9.999673187373644e-06, "loss": 1.5023, "step": 1005 }, { "epoch": 0.2675531914893617, "grad_norm": 4.779364109039307, "learning_rate": 9.999663054362236e-06, "loss": 1.4043, "step": 1006 }, { "epoch": 0.2678191489361702, "grad_norm": 4.18774938583374, "learning_rate": 9.999652766655367e-06, "loss": 1.2043, "step": 1007 }, { "epoch": 0.2680851063829787, "grad_norm": 4.277698040008545, "learning_rate": 9.999642324253357e-06, "loss": 1.3012, "step": 1008 }, { "epoch": 0.26835106382978724, "grad_norm": 4.673196315765381, "learning_rate": 9.999631727156523e-06, "loss": 1.4028, "step": 1009 }, { "epoch": 0.26861702127659576, "grad_norm": 3.9610633850097656, "learning_rate": 9.9996209753652e-06, "loss": 1.2564, "step": 1010 }, { "epoch": 0.2688829787234043, "grad_norm": 4.724634170532227, "learning_rate": 9.999610068879717e-06, "loss": 1.2371, "step": 1011 }, { "epoch": 0.2691489361702128, "grad_norm": 4.770898342132568, "learning_rate": 9.999599007700411e-06, "loss": 1.3291, "step": 1012 }, { "epoch": 0.26941489361702126, "grad_norm": 4.2460551261901855, "learning_rate": 9.999587791827627e-06, "loss": 1.321, "step": 1013 }, { "epoch": 0.2696808510638298, "grad_norm": 4.29102897644043, "learning_rate": 9.99957642126171e-06, "loss": 1.2469, "step": 1014 }, { "epoch": 0.2699468085106383, "grad_norm": 4.516227722167969, "learning_rate": 9.999564896003013e-06, "loss": 1.2158, "step": 1015 }, { "epoch": 0.2702127659574468, "grad_norm": 4.530557632446289, "learning_rate": 9.999553216051892e-06, "loss": 1.3454, "step": 1016 }, { "epoch": 0.27047872340425533, "grad_norm": 4.2970290184021, "learning_rate": 9.999541381408706e-06, "loss": 1.3784, "step": 1017 }, { "epoch": 0.27074468085106385, "grad_norm": 4.136434078216553, "learning_rate": 9.999529392073825e-06, "loss": 1.2268, "step": 1018 }, { "epoch": 0.27101063829787236, "grad_norm": 4.108096122741699, "learning_rate": 9.999517248047618e-06, "loss": 1.2798, "step": 1019 }, { "epoch": 0.2712765957446808, "grad_norm": 4.367121696472168, "learning_rate": 9.99950494933046e-06, "loss": 1.2629, "step": 1020 }, { "epoch": 0.27154255319148934, "grad_norm": 4.400355815887451, "learning_rate": 9.999492495922735e-06, "loss": 1.3386, "step": 1021 }, { "epoch": 0.27180851063829786, "grad_norm": 4.384739875793457, "learning_rate": 9.999479887824826e-06, "loss": 1.2904, "step": 1022 }, { "epoch": 0.2720744680851064, "grad_norm": 4.273925304412842, "learning_rate": 9.999467125037121e-06, "loss": 1.268, "step": 1023 }, { "epoch": 0.2723404255319149, "grad_norm": 4.222406387329102, "learning_rate": 9.999454207560019e-06, "loss": 1.2875, "step": 1024 }, { "epoch": 0.2726063829787234, "grad_norm": 4.79681396484375, "learning_rate": 9.999441135393917e-06, "loss": 1.3315, "step": 1025 }, { "epoch": 0.27287234042553193, "grad_norm": 4.473938941955566, "learning_rate": 9.99942790853922e-06, "loss": 1.4033, "step": 1026 }, { "epoch": 0.27313829787234045, "grad_norm": 4.128412246704102, "learning_rate": 9.999414526996337e-06, "loss": 1.1818, "step": 1027 }, { "epoch": 0.2734042553191489, "grad_norm": 4.2525739669799805, "learning_rate": 9.999400990765683e-06, "loss": 1.2004, "step": 1028 }, { "epoch": 0.27367021276595743, "grad_norm": 4.565985202789307, "learning_rate": 9.999387299847677e-06, "loss": 1.3035, "step": 1029 }, { "epoch": 0.27393617021276595, "grad_norm": 4.308706283569336, "learning_rate": 9.99937345424274e-06, "loss": 1.2976, "step": 1030 }, { "epoch": 0.27420212765957447, "grad_norm": 4.31046724319458, "learning_rate": 9.999359453951303e-06, "loss": 1.3213, "step": 1031 }, { "epoch": 0.274468085106383, "grad_norm": 4.618355751037598, "learning_rate": 9.9993452989738e-06, "loss": 1.3231, "step": 1032 }, { "epoch": 0.2747340425531915, "grad_norm": 4.580687999725342, "learning_rate": 9.999330989310665e-06, "loss": 1.3654, "step": 1033 }, { "epoch": 0.275, "grad_norm": 4.229262351989746, "learning_rate": 9.999316524962347e-06, "loss": 1.2944, "step": 1034 }, { "epoch": 0.2752659574468085, "grad_norm": 3.708747148513794, "learning_rate": 9.999301905929286e-06, "loss": 1.154, "step": 1035 }, { "epoch": 0.275531914893617, "grad_norm": 4.275104999542236, "learning_rate": 9.999287132211938e-06, "loss": 1.2148, "step": 1036 }, { "epoch": 0.2757978723404255, "grad_norm": 4.225863456726074, "learning_rate": 9.999272203810763e-06, "loss": 1.4705, "step": 1037 }, { "epoch": 0.27606382978723404, "grad_norm": 4.132633209228516, "learning_rate": 9.999257120726219e-06, "loss": 1.2538, "step": 1038 }, { "epoch": 0.27632978723404256, "grad_norm": 5.643379211425781, "learning_rate": 9.999241882958772e-06, "loss": 1.2564, "step": 1039 }, { "epoch": 0.2765957446808511, "grad_norm": 4.306319713592529, "learning_rate": 9.999226490508897e-06, "loss": 1.4085, "step": 1040 }, { "epoch": 0.2768617021276596, "grad_norm": 4.2022247314453125, "learning_rate": 9.99921094337707e-06, "loss": 1.3632, "step": 1041 }, { "epoch": 0.2771276595744681, "grad_norm": 4.866800785064697, "learning_rate": 9.999195241563768e-06, "loss": 1.3262, "step": 1042 }, { "epoch": 0.2773936170212766, "grad_norm": 4.111828327178955, "learning_rate": 9.99917938506948e-06, "loss": 1.3087, "step": 1043 }, { "epoch": 0.2776595744680851, "grad_norm": 4.37149715423584, "learning_rate": 9.999163373894696e-06, "loss": 1.2089, "step": 1044 }, { "epoch": 0.2779255319148936, "grad_norm": 4.524958610534668, "learning_rate": 9.999147208039912e-06, "loss": 1.1935, "step": 1045 }, { "epoch": 0.2781914893617021, "grad_norm": 4.5271406173706055, "learning_rate": 9.999130887505627e-06, "loss": 1.3111, "step": 1046 }, { "epoch": 0.27845744680851064, "grad_norm": 4.4966301918029785, "learning_rate": 9.999114412292347e-06, "loss": 1.3695, "step": 1047 }, { "epoch": 0.27872340425531916, "grad_norm": 4.8100714683532715, "learning_rate": 9.999097782400582e-06, "loss": 1.3152, "step": 1048 }, { "epoch": 0.2789893617021277, "grad_norm": 4.238595962524414, "learning_rate": 9.999080997830845e-06, "loss": 1.2533, "step": 1049 }, { "epoch": 0.27925531914893614, "grad_norm": 4.036017417907715, "learning_rate": 9.999064058583657e-06, "loss": 1.1984, "step": 1050 }, { "epoch": 0.27952127659574466, "grad_norm": 4.587932586669922, "learning_rate": 9.99904696465954e-06, "loss": 1.2216, "step": 1051 }, { "epoch": 0.2797872340425532, "grad_norm": 5.027749538421631, "learning_rate": 9.999029716059026e-06, "loss": 1.4618, "step": 1052 }, { "epoch": 0.2800531914893617, "grad_norm": 4.331791400909424, "learning_rate": 9.999012312782645e-06, "loss": 1.2566, "step": 1053 }, { "epoch": 0.2803191489361702, "grad_norm": 4.737422943115234, "learning_rate": 9.99899475483094e-06, "loss": 1.2935, "step": 1054 }, { "epoch": 0.28058510638297873, "grad_norm": 4.8805832862854, "learning_rate": 9.998977042204449e-06, "loss": 1.3277, "step": 1055 }, { "epoch": 0.28085106382978725, "grad_norm": 4.296173095703125, "learning_rate": 9.998959174903725e-06, "loss": 1.341, "step": 1056 }, { "epoch": 0.28111702127659577, "grad_norm": 4.3713788986206055, "learning_rate": 9.998941152929316e-06, "loss": 1.308, "step": 1057 }, { "epoch": 0.28138297872340423, "grad_norm": 4.576108932495117, "learning_rate": 9.998922976281785e-06, "loss": 1.2585, "step": 1058 }, { "epoch": 0.28164893617021275, "grad_norm": 4.187806129455566, "learning_rate": 9.998904644961689e-06, "loss": 1.393, "step": 1059 }, { "epoch": 0.28191489361702127, "grad_norm": 4.360199928283691, "learning_rate": 9.9988861589696e-06, "loss": 1.4, "step": 1060 }, { "epoch": 0.2821808510638298, "grad_norm": 4.283745288848877, "learning_rate": 9.998867518306087e-06, "loss": 1.2823, "step": 1061 }, { "epoch": 0.2824468085106383, "grad_norm": 3.8223369121551514, "learning_rate": 9.998848722971727e-06, "loss": 1.3144, "step": 1062 }, { "epoch": 0.2827127659574468, "grad_norm": 4.405114650726318, "learning_rate": 9.998829772967103e-06, "loss": 1.4051, "step": 1063 }, { "epoch": 0.28297872340425534, "grad_norm": 4.547544479370117, "learning_rate": 9.9988106682928e-06, "loss": 1.2622, "step": 1064 }, { "epoch": 0.28324468085106386, "grad_norm": 3.850954055786133, "learning_rate": 9.998791408949408e-06, "loss": 1.197, "step": 1065 }, { "epoch": 0.2835106382978723, "grad_norm": 3.994758367538452, "learning_rate": 9.998771994937528e-06, "loss": 1.1907, "step": 1066 }, { "epoch": 0.28377659574468084, "grad_norm": 4.24208927154541, "learning_rate": 9.998752426257754e-06, "loss": 1.4078, "step": 1067 }, { "epoch": 0.28404255319148936, "grad_norm": 4.435787200927734, "learning_rate": 9.998732702910697e-06, "loss": 1.2044, "step": 1068 }, { "epoch": 0.2843085106382979, "grad_norm": 4.169311046600342, "learning_rate": 9.998712824896963e-06, "loss": 1.2126, "step": 1069 }, { "epoch": 0.2845744680851064, "grad_norm": 4.478437900543213, "learning_rate": 9.99869279221717e-06, "loss": 1.3164, "step": 1070 }, { "epoch": 0.2848404255319149, "grad_norm": 4.775943756103516, "learning_rate": 9.998672604871936e-06, "loss": 1.3169, "step": 1071 }, { "epoch": 0.2851063829787234, "grad_norm": 4.637179374694824, "learning_rate": 9.998652262861888e-06, "loss": 1.2441, "step": 1072 }, { "epoch": 0.2853723404255319, "grad_norm": 4.511475086212158, "learning_rate": 9.998631766187651e-06, "loss": 1.3766, "step": 1073 }, { "epoch": 0.2856382978723404, "grad_norm": 4.503199100494385, "learning_rate": 9.998611114849866e-06, "loss": 1.1787, "step": 1074 }, { "epoch": 0.2859042553191489, "grad_norm": 4.549198627471924, "learning_rate": 9.998590308849164e-06, "loss": 1.3229, "step": 1075 }, { "epoch": 0.28617021276595744, "grad_norm": 4.182891368865967, "learning_rate": 9.998569348186194e-06, "loss": 1.2659, "step": 1076 }, { "epoch": 0.28643617021276596, "grad_norm": 4.964444160461426, "learning_rate": 9.998548232861604e-06, "loss": 1.4196, "step": 1077 }, { "epoch": 0.2867021276595745, "grad_norm": 4.905456066131592, "learning_rate": 9.998526962876047e-06, "loss": 1.3089, "step": 1078 }, { "epoch": 0.286968085106383, "grad_norm": 4.207391738891602, "learning_rate": 9.998505538230179e-06, "loss": 1.3231, "step": 1079 }, { "epoch": 0.2872340425531915, "grad_norm": 4.414906024932861, "learning_rate": 9.998483958924666e-06, "loss": 1.229, "step": 1080 }, { "epoch": 0.2875, "grad_norm": 4.2714667320251465, "learning_rate": 9.998462224960176e-06, "loss": 1.4204, "step": 1081 }, { "epoch": 0.2877659574468085, "grad_norm": 4.423734188079834, "learning_rate": 9.998440336337376e-06, "loss": 1.3774, "step": 1082 }, { "epoch": 0.288031914893617, "grad_norm": 4.450468063354492, "learning_rate": 9.998418293056949e-06, "loss": 1.2639, "step": 1083 }, { "epoch": 0.28829787234042553, "grad_norm": 4.328600883483887, "learning_rate": 9.998396095119575e-06, "loss": 1.3594, "step": 1084 }, { "epoch": 0.28856382978723405, "grad_norm": 4.951174259185791, "learning_rate": 9.998373742525941e-06, "loss": 1.4862, "step": 1085 }, { "epoch": 0.28882978723404257, "grad_norm": 4.484705924987793, "learning_rate": 9.998351235276738e-06, "loss": 1.3577, "step": 1086 }, { "epoch": 0.2890957446808511, "grad_norm": 4.428178310394287, "learning_rate": 9.998328573372664e-06, "loss": 1.2438, "step": 1087 }, { "epoch": 0.28936170212765955, "grad_norm": 4.682640552520752, "learning_rate": 9.998305756814419e-06, "loss": 1.3493, "step": 1088 }, { "epoch": 0.28962765957446807, "grad_norm": 4.30879020690918, "learning_rate": 9.998282785602709e-06, "loss": 1.253, "step": 1089 }, { "epoch": 0.2898936170212766, "grad_norm": 4.327608108520508, "learning_rate": 9.998259659738243e-06, "loss": 1.3574, "step": 1090 }, { "epoch": 0.2901595744680851, "grad_norm": 3.996189594268799, "learning_rate": 9.998236379221742e-06, "loss": 1.1811, "step": 1091 }, { "epoch": 0.2904255319148936, "grad_norm": 4.262546062469482, "learning_rate": 9.99821294405392e-06, "loss": 1.1899, "step": 1092 }, { "epoch": 0.29069148936170214, "grad_norm": 3.7779383659362793, "learning_rate": 9.998189354235506e-06, "loss": 1.3034, "step": 1093 }, { "epoch": 0.29095744680851066, "grad_norm": 4.748449325561523, "learning_rate": 9.998165609767228e-06, "loss": 1.1943, "step": 1094 }, { "epoch": 0.2912234042553192, "grad_norm": 4.325401782989502, "learning_rate": 9.998141710649822e-06, "loss": 1.2955, "step": 1095 }, { "epoch": 0.29148936170212764, "grad_norm": 4.276817321777344, "learning_rate": 9.998117656884025e-06, "loss": 1.2853, "step": 1096 }, { "epoch": 0.29175531914893615, "grad_norm": 4.66014289855957, "learning_rate": 9.998093448470585e-06, "loss": 1.2643, "step": 1097 }, { "epoch": 0.29202127659574467, "grad_norm": 3.963014602661133, "learning_rate": 9.998069085410249e-06, "loss": 1.2145, "step": 1098 }, { "epoch": 0.2922872340425532, "grad_norm": 4.040323734283447, "learning_rate": 9.99804456770377e-06, "loss": 1.3845, "step": 1099 }, { "epoch": 0.2925531914893617, "grad_norm": 3.8575801849365234, "learning_rate": 9.99801989535191e-06, "loss": 1.131, "step": 1100 }, { "epoch": 0.2928191489361702, "grad_norm": 4.067200183868408, "learning_rate": 9.997995068355428e-06, "loss": 1.352, "step": 1101 }, { "epoch": 0.29308510638297874, "grad_norm": 4.207942962646484, "learning_rate": 9.997970086715096e-06, "loss": 1.2372, "step": 1102 }, { "epoch": 0.2933510638297872, "grad_norm": 4.058019638061523, "learning_rate": 9.997944950431684e-06, "loss": 1.203, "step": 1103 }, { "epoch": 0.2936170212765957, "grad_norm": 4.622230052947998, "learning_rate": 9.99791965950597e-06, "loss": 1.3916, "step": 1104 }, { "epoch": 0.29388297872340424, "grad_norm": 4.3508076667785645, "learning_rate": 9.997894213938738e-06, "loss": 1.3344, "step": 1105 }, { "epoch": 0.29414893617021276, "grad_norm": 3.9889092445373535, "learning_rate": 9.997868613730775e-06, "loss": 1.1658, "step": 1106 }, { "epoch": 0.2944148936170213, "grad_norm": 4.091287136077881, "learning_rate": 9.997842858882873e-06, "loss": 1.3258, "step": 1107 }, { "epoch": 0.2946808510638298, "grad_norm": 4.280172824859619, "learning_rate": 9.997816949395828e-06, "loss": 1.3231, "step": 1108 }, { "epoch": 0.2949468085106383, "grad_norm": 4.268125057220459, "learning_rate": 9.997790885270444e-06, "loss": 1.1984, "step": 1109 }, { "epoch": 0.29521276595744683, "grad_norm": 4.030393600463867, "learning_rate": 9.997764666507523e-06, "loss": 1.3441, "step": 1110 }, { "epoch": 0.2954787234042553, "grad_norm": 4.591287136077881, "learning_rate": 9.997738293107882e-06, "loss": 1.3059, "step": 1111 }, { "epoch": 0.2957446808510638, "grad_norm": 5.225955486297607, "learning_rate": 9.997711765072333e-06, "loss": 1.3236, "step": 1112 }, { "epoch": 0.29601063829787233, "grad_norm": 4.161701679229736, "learning_rate": 9.997685082401698e-06, "loss": 1.2, "step": 1113 }, { "epoch": 0.29627659574468085, "grad_norm": 4.316693305969238, "learning_rate": 9.997658245096802e-06, "loss": 1.2758, "step": 1114 }, { "epoch": 0.29654255319148937, "grad_norm": 4.311786651611328, "learning_rate": 9.997631253158477e-06, "loss": 1.1873, "step": 1115 }, { "epoch": 0.2968085106382979, "grad_norm": 4.271190643310547, "learning_rate": 9.997604106587555e-06, "loss": 1.1661, "step": 1116 }, { "epoch": 0.2970744680851064, "grad_norm": 4.620399475097656, "learning_rate": 9.99757680538488e-06, "loss": 1.3542, "step": 1117 }, { "epoch": 0.2973404255319149, "grad_norm": 4.287705421447754, "learning_rate": 9.997549349551295e-06, "loss": 1.3467, "step": 1118 }, { "epoch": 0.2976063829787234, "grad_norm": 4.158224105834961, "learning_rate": 9.997521739087647e-06, "loss": 1.229, "step": 1119 }, { "epoch": 0.2978723404255319, "grad_norm": 4.308200836181641, "learning_rate": 9.997493973994793e-06, "loss": 1.3478, "step": 1120 }, { "epoch": 0.2981382978723404, "grad_norm": 4.467398643493652, "learning_rate": 9.997466054273593e-06, "loss": 1.2729, "step": 1121 }, { "epoch": 0.29840425531914894, "grad_norm": 4.264455318450928, "learning_rate": 9.997437979924908e-06, "loss": 1.234, "step": 1122 }, { "epoch": 0.29867021276595745, "grad_norm": 4.258848190307617, "learning_rate": 9.99740975094961e-06, "loss": 1.1682, "step": 1123 }, { "epoch": 0.298936170212766, "grad_norm": 4.3061089515686035, "learning_rate": 9.99738136734857e-06, "loss": 1.3241, "step": 1124 }, { "epoch": 0.2992021276595745, "grad_norm": 4.324080467224121, "learning_rate": 9.997352829122667e-06, "loss": 1.254, "step": 1125 }, { "epoch": 0.29946808510638295, "grad_norm": 4.312755584716797, "learning_rate": 9.997324136272784e-06, "loss": 1.309, "step": 1126 }, { "epoch": 0.29973404255319147, "grad_norm": 4.023726463317871, "learning_rate": 9.997295288799806e-06, "loss": 1.238, "step": 1127 }, { "epoch": 0.3, "grad_norm": 4.355762004852295, "learning_rate": 9.99726628670463e-06, "loss": 1.2271, "step": 1128 }, { "epoch": 0.3002659574468085, "grad_norm": 4.85224723815918, "learning_rate": 9.997237129988154e-06, "loss": 1.2849, "step": 1129 }, { "epoch": 0.300531914893617, "grad_norm": 4.464909553527832, "learning_rate": 9.997207818651273e-06, "loss": 1.2992, "step": 1130 }, { "epoch": 0.30079787234042554, "grad_norm": 3.7525863647460938, "learning_rate": 9.997178352694902e-06, "loss": 1.1764, "step": 1131 }, { "epoch": 0.30106382978723406, "grad_norm": 4.892136096954346, "learning_rate": 9.997148732119947e-06, "loss": 1.4041, "step": 1132 }, { "epoch": 0.3013297872340426, "grad_norm": 3.8774726390838623, "learning_rate": 9.99711895692733e-06, "loss": 1.1936, "step": 1133 }, { "epoch": 0.30159574468085104, "grad_norm": 4.585043907165527, "learning_rate": 9.997089027117966e-06, "loss": 1.2402, "step": 1134 }, { "epoch": 0.30186170212765956, "grad_norm": 4.731383800506592, "learning_rate": 9.997058942692786e-06, "loss": 1.3886, "step": 1135 }, { "epoch": 0.3021276595744681, "grad_norm": 4.4259033203125, "learning_rate": 9.997028703652718e-06, "loss": 1.4784, "step": 1136 }, { "epoch": 0.3023936170212766, "grad_norm": 4.584959030151367, "learning_rate": 9.996998309998699e-06, "loss": 1.1575, "step": 1137 }, { "epoch": 0.3026595744680851, "grad_norm": 4.300727844238281, "learning_rate": 9.996967761731668e-06, "loss": 1.3999, "step": 1138 }, { "epoch": 0.30292553191489363, "grad_norm": 4.30328893661499, "learning_rate": 9.996937058852575e-06, "loss": 1.3061, "step": 1139 }, { "epoch": 0.30319148936170215, "grad_norm": 4.1981964111328125, "learning_rate": 9.996906201362361e-06, "loss": 1.3078, "step": 1140 }, { "epoch": 0.3034574468085106, "grad_norm": 4.507598876953125, "learning_rate": 9.99687518926199e-06, "loss": 1.3732, "step": 1141 }, { "epoch": 0.30372340425531913, "grad_norm": 4.559037685394287, "learning_rate": 9.996844022552416e-06, "loss": 1.3447, "step": 1142 }, { "epoch": 0.30398936170212765, "grad_norm": 4.10542106628418, "learning_rate": 9.996812701234604e-06, "loss": 1.2118, "step": 1143 }, { "epoch": 0.30425531914893617, "grad_norm": 4.441193103790283, "learning_rate": 9.996781225309526e-06, "loss": 1.3549, "step": 1144 }, { "epoch": 0.3045212765957447, "grad_norm": 4.166191577911377, "learning_rate": 9.996749594778153e-06, "loss": 1.3067, "step": 1145 }, { "epoch": 0.3047872340425532, "grad_norm": 4.284362316131592, "learning_rate": 9.996717809641464e-06, "loss": 1.31, "step": 1146 }, { "epoch": 0.3050531914893617, "grad_norm": 4.457339286804199, "learning_rate": 9.996685869900444e-06, "loss": 1.2858, "step": 1147 }, { "epoch": 0.30531914893617024, "grad_norm": 5.572897434234619, "learning_rate": 9.99665377555608e-06, "loss": 1.3094, "step": 1148 }, { "epoch": 0.3055851063829787, "grad_norm": 3.9291319847106934, "learning_rate": 9.996621526609364e-06, "loss": 1.1499, "step": 1149 }, { "epoch": 0.3058510638297872, "grad_norm": 4.23716926574707, "learning_rate": 9.996589123061297e-06, "loss": 1.1395, "step": 1150 }, { "epoch": 0.30611702127659574, "grad_norm": 4.1819047927856445, "learning_rate": 9.99655656491288e-06, "loss": 1.2152, "step": 1151 }, { "epoch": 0.30638297872340425, "grad_norm": 4.467685222625732, "learning_rate": 9.99652385216512e-06, "loss": 1.38, "step": 1152 }, { "epoch": 0.30664893617021277, "grad_norm": 3.723454236984253, "learning_rate": 9.996490984819027e-06, "loss": 1.1745, "step": 1153 }, { "epoch": 0.3069148936170213, "grad_norm": 4.097151756286621, "learning_rate": 9.996457962875623e-06, "loss": 1.3743, "step": 1154 }, { "epoch": 0.3071808510638298, "grad_norm": 4.7414326667785645, "learning_rate": 9.996424786335925e-06, "loss": 1.4252, "step": 1155 }, { "epoch": 0.3074468085106383, "grad_norm": 3.7857699394226074, "learning_rate": 9.996391455200963e-06, "loss": 1.2984, "step": 1156 }, { "epoch": 0.3077127659574468, "grad_norm": 4.953484535217285, "learning_rate": 9.996357969471767e-06, "loss": 1.3539, "step": 1157 }, { "epoch": 0.3079787234042553, "grad_norm": 4.564802646636963, "learning_rate": 9.996324329149372e-06, "loss": 1.2833, "step": 1158 }, { "epoch": 0.3082446808510638, "grad_norm": 4.2867045402526855, "learning_rate": 9.99629053423482e-06, "loss": 1.2933, "step": 1159 }, { "epoch": 0.30851063829787234, "grad_norm": 4.2070817947387695, "learning_rate": 9.996256584729157e-06, "loss": 1.163, "step": 1160 }, { "epoch": 0.30877659574468086, "grad_norm": 4.603311061859131, "learning_rate": 9.996222480633433e-06, "loss": 1.2404, "step": 1161 }, { "epoch": 0.3090425531914894, "grad_norm": 4.443660736083984, "learning_rate": 9.996188221948702e-06, "loss": 1.3518, "step": 1162 }, { "epoch": 0.3093085106382979, "grad_norm": 4.2897443771362305, "learning_rate": 9.996153808676025e-06, "loss": 1.2786, "step": 1163 }, { "epoch": 0.30957446808510636, "grad_norm": 4.69590425491333, "learning_rate": 9.996119240816469e-06, "loss": 1.3259, "step": 1164 }, { "epoch": 0.3098404255319149, "grad_norm": 4.064958095550537, "learning_rate": 9.996084518371101e-06, "loss": 1.2768, "step": 1165 }, { "epoch": 0.3101063829787234, "grad_norm": 4.3534626960754395, "learning_rate": 9.996049641340994e-06, "loss": 1.3245, "step": 1166 }, { "epoch": 0.3103723404255319, "grad_norm": 4.278623580932617, "learning_rate": 9.996014609727232e-06, "loss": 1.405, "step": 1167 }, { "epoch": 0.31063829787234043, "grad_norm": 4.835923671722412, "learning_rate": 9.995979423530893e-06, "loss": 1.2416, "step": 1168 }, { "epoch": 0.31090425531914895, "grad_norm": 4.191746711730957, "learning_rate": 9.99594408275307e-06, "loss": 1.154, "step": 1169 }, { "epoch": 0.31117021276595747, "grad_norm": 3.9082558155059814, "learning_rate": 9.995908587394854e-06, "loss": 1.2412, "step": 1170 }, { "epoch": 0.311436170212766, "grad_norm": 4.342267036437988, "learning_rate": 9.995872937457345e-06, "loss": 1.2312, "step": 1171 }, { "epoch": 0.31170212765957445, "grad_norm": 4.569537162780762, "learning_rate": 9.995837132941646e-06, "loss": 1.3551, "step": 1172 }, { "epoch": 0.31196808510638296, "grad_norm": 4.246980667114258, "learning_rate": 9.995801173848863e-06, "loss": 1.2517, "step": 1173 }, { "epoch": 0.3122340425531915, "grad_norm": 4.276669025421143, "learning_rate": 9.995765060180111e-06, "loss": 1.2417, "step": 1174 }, { "epoch": 0.3125, "grad_norm": 4.076509952545166, "learning_rate": 9.995728791936505e-06, "loss": 1.2837, "step": 1175 }, { "epoch": 0.3127659574468085, "grad_norm": 4.078117370605469, "learning_rate": 9.99569236911917e-06, "loss": 1.1589, "step": 1176 }, { "epoch": 0.31303191489361704, "grad_norm": 4.253208637237549, "learning_rate": 9.995655791729231e-06, "loss": 1.4023, "step": 1177 }, { "epoch": 0.31329787234042555, "grad_norm": 4.0782790184021, "learning_rate": 9.99561905976782e-06, "loss": 1.2094, "step": 1178 }, { "epoch": 0.313563829787234, "grad_norm": 4.714814186096191, "learning_rate": 9.995582173236073e-06, "loss": 1.2883, "step": 1179 }, { "epoch": 0.31382978723404253, "grad_norm": 4.640500068664551, "learning_rate": 9.995545132135133e-06, "loss": 1.3784, "step": 1180 }, { "epoch": 0.31409574468085105, "grad_norm": 4.722717761993408, "learning_rate": 9.995507936466144e-06, "loss": 1.2644, "step": 1181 }, { "epoch": 0.31436170212765957, "grad_norm": 4.296687602996826, "learning_rate": 9.99547058623026e-06, "loss": 1.2238, "step": 1182 }, { "epoch": 0.3146276595744681, "grad_norm": 4.157870769500732, "learning_rate": 9.995433081428631e-06, "loss": 1.2275, "step": 1183 }, { "epoch": 0.3148936170212766, "grad_norm": 4.162895202636719, "learning_rate": 9.995395422062424e-06, "loss": 1.2697, "step": 1184 }, { "epoch": 0.3151595744680851, "grad_norm": 4.142743110656738, "learning_rate": 9.9953576081328e-06, "loss": 1.2514, "step": 1185 }, { "epoch": 0.31542553191489364, "grad_norm": 4.504545211791992, "learning_rate": 9.995319639640932e-06, "loss": 1.1996, "step": 1186 }, { "epoch": 0.3156914893617021, "grad_norm": 4.5642523765563965, "learning_rate": 9.995281516587992e-06, "loss": 1.4783, "step": 1187 }, { "epoch": 0.3159574468085106, "grad_norm": 4.14572286605835, "learning_rate": 9.99524323897516e-06, "loss": 1.3261, "step": 1188 }, { "epoch": 0.31622340425531914, "grad_norm": 4.159525394439697, "learning_rate": 9.995204806803622e-06, "loss": 1.3492, "step": 1189 }, { "epoch": 0.31648936170212766, "grad_norm": 3.9404852390289307, "learning_rate": 9.995166220074566e-06, "loss": 1.2726, "step": 1190 }, { "epoch": 0.3167553191489362, "grad_norm": 4.158994197845459, "learning_rate": 9.995127478789186e-06, "loss": 1.2472, "step": 1191 }, { "epoch": 0.3170212765957447, "grad_norm": 4.277184009552002, "learning_rate": 9.995088582948682e-06, "loss": 1.3549, "step": 1192 }, { "epoch": 0.3172872340425532, "grad_norm": 4.210202217102051, "learning_rate": 9.995049532554253e-06, "loss": 1.313, "step": 1193 }, { "epoch": 0.3175531914893617, "grad_norm": 4.146048545837402, "learning_rate": 9.995010327607113e-06, "loss": 1.3272, "step": 1194 }, { "epoch": 0.3178191489361702, "grad_norm": 4.287917137145996, "learning_rate": 9.994970968108473e-06, "loss": 1.4158, "step": 1195 }, { "epoch": 0.3180851063829787, "grad_norm": 3.8834691047668457, "learning_rate": 9.99493145405955e-06, "loss": 1.1957, "step": 1196 }, { "epoch": 0.31835106382978723, "grad_norm": 4.134634494781494, "learning_rate": 9.994891785461565e-06, "loss": 1.3806, "step": 1197 }, { "epoch": 0.31861702127659575, "grad_norm": 4.137069225311279, "learning_rate": 9.99485196231575e-06, "loss": 1.2337, "step": 1198 }, { "epoch": 0.31888297872340426, "grad_norm": 3.9084503650665283, "learning_rate": 9.994811984623332e-06, "loss": 1.1263, "step": 1199 }, { "epoch": 0.3191489361702128, "grad_norm": 4.515985012054443, "learning_rate": 9.994771852385552e-06, "loss": 1.3851, "step": 1200 }, { "epoch": 0.3194148936170213, "grad_norm": 4.150672912597656, "learning_rate": 9.994731565603651e-06, "loss": 1.2034, "step": 1201 }, { "epoch": 0.31968085106382976, "grad_norm": 4.727832317352295, "learning_rate": 9.994691124278874e-06, "loss": 1.3987, "step": 1202 }, { "epoch": 0.3199468085106383, "grad_norm": 4.292087554931641, "learning_rate": 9.994650528412472e-06, "loss": 1.3757, "step": 1203 }, { "epoch": 0.3202127659574468, "grad_norm": 4.135016918182373, "learning_rate": 9.994609778005704e-06, "loss": 1.3413, "step": 1204 }, { "epoch": 0.3204787234042553, "grad_norm": 4.273712635040283, "learning_rate": 9.994568873059829e-06, "loss": 1.2102, "step": 1205 }, { "epoch": 0.32074468085106383, "grad_norm": 4.216573715209961, "learning_rate": 9.994527813576111e-06, "loss": 1.3998, "step": 1206 }, { "epoch": 0.32101063829787235, "grad_norm": 3.847257375717163, "learning_rate": 9.994486599555823e-06, "loss": 1.1265, "step": 1207 }, { "epoch": 0.32127659574468087, "grad_norm": 4.784033298492432, "learning_rate": 9.99444523100024e-06, "loss": 1.3363, "step": 1208 }, { "epoch": 0.3215425531914894, "grad_norm": 4.474783897399902, "learning_rate": 9.994403707910642e-06, "loss": 1.2317, "step": 1209 }, { "epoch": 0.32180851063829785, "grad_norm": 4.004277229309082, "learning_rate": 9.994362030288312e-06, "loss": 1.2477, "step": 1210 }, { "epoch": 0.32207446808510637, "grad_norm": 3.9819071292877197, "learning_rate": 9.99432019813454e-06, "loss": 1.1898, "step": 1211 }, { "epoch": 0.3223404255319149, "grad_norm": 3.8308217525482178, "learning_rate": 9.994278211450622e-06, "loss": 1.287, "step": 1212 }, { "epoch": 0.3226063829787234, "grad_norm": 4.272090435028076, "learning_rate": 9.994236070237854e-06, "loss": 1.3905, "step": 1213 }, { "epoch": 0.3228723404255319, "grad_norm": 4.1817169189453125, "learning_rate": 9.994193774497544e-06, "loss": 1.2512, "step": 1214 }, { "epoch": 0.32313829787234044, "grad_norm": 3.9769554138183594, "learning_rate": 9.994151324231e-06, "loss": 1.2287, "step": 1215 }, { "epoch": 0.32340425531914896, "grad_norm": 4.290254592895508, "learning_rate": 9.994108719439533e-06, "loss": 1.2741, "step": 1216 }, { "epoch": 0.3236702127659574, "grad_norm": 4.185919284820557, "learning_rate": 9.994065960124462e-06, "loss": 1.3203, "step": 1217 }, { "epoch": 0.32393617021276594, "grad_norm": 4.25853967666626, "learning_rate": 9.994023046287109e-06, "loss": 1.3062, "step": 1218 }, { "epoch": 0.32420212765957446, "grad_norm": 3.9912209510803223, "learning_rate": 9.993979977928805e-06, "loss": 1.1988, "step": 1219 }, { "epoch": 0.324468085106383, "grad_norm": 3.865492343902588, "learning_rate": 9.993936755050881e-06, "loss": 1.1626, "step": 1220 }, { "epoch": 0.3247340425531915, "grad_norm": 4.017344951629639, "learning_rate": 9.993893377654673e-06, "loss": 1.3626, "step": 1221 }, { "epoch": 0.325, "grad_norm": 3.9618587493896484, "learning_rate": 9.993849845741525e-06, "loss": 1.361, "step": 1222 }, { "epoch": 0.32526595744680853, "grad_norm": 4.2321648597717285, "learning_rate": 9.993806159312783e-06, "loss": 1.3773, "step": 1223 }, { "epoch": 0.32553191489361705, "grad_norm": 4.570196151733398, "learning_rate": 9.9937623183698e-06, "loss": 1.3895, "step": 1224 }, { "epoch": 0.3257978723404255, "grad_norm": 3.9867353439331055, "learning_rate": 9.99371832291393e-06, "loss": 1.1623, "step": 1225 }, { "epoch": 0.326063829787234, "grad_norm": 5.1412200927734375, "learning_rate": 9.993674172946536e-06, "loss": 1.3987, "step": 1226 }, { "epoch": 0.32632978723404255, "grad_norm": 4.0850605964660645, "learning_rate": 9.993629868468984e-06, "loss": 1.2399, "step": 1227 }, { "epoch": 0.32659574468085106, "grad_norm": 5.263411521911621, "learning_rate": 9.993585409482645e-06, "loss": 1.311, "step": 1228 }, { "epoch": 0.3268617021276596, "grad_norm": 3.8653786182403564, "learning_rate": 9.993540795988895e-06, "loss": 1.1391, "step": 1229 }, { "epoch": 0.3271276595744681, "grad_norm": 4.475793838500977, "learning_rate": 9.993496027989112e-06, "loss": 1.2644, "step": 1230 }, { "epoch": 0.3273936170212766, "grad_norm": 4.395388603210449, "learning_rate": 9.993451105484682e-06, "loss": 1.342, "step": 1231 }, { "epoch": 0.3276595744680851, "grad_norm": 4.290927410125732, "learning_rate": 9.993406028476997e-06, "loss": 1.3893, "step": 1232 }, { "epoch": 0.3279255319148936, "grad_norm": 4.348012924194336, "learning_rate": 9.993360796967451e-06, "loss": 1.2903, "step": 1233 }, { "epoch": 0.3281914893617021, "grad_norm": 4.174604415893555, "learning_rate": 9.993315410957442e-06, "loss": 1.2951, "step": 1234 }, { "epoch": 0.32845744680851063, "grad_norm": 4.359421253204346, "learning_rate": 9.993269870448375e-06, "loss": 1.4433, "step": 1235 }, { "epoch": 0.32872340425531915, "grad_norm": 4.25851583480835, "learning_rate": 9.99322417544166e-06, "loss": 1.2445, "step": 1236 }, { "epoch": 0.32898936170212767, "grad_norm": 4.110776901245117, "learning_rate": 9.993178325938711e-06, "loss": 1.3569, "step": 1237 }, { "epoch": 0.3292553191489362, "grad_norm": 4.008944988250732, "learning_rate": 9.993132321940947e-06, "loss": 1.2227, "step": 1238 }, { "epoch": 0.3295212765957447, "grad_norm": 4.228448390960693, "learning_rate": 9.993086163449787e-06, "loss": 1.2388, "step": 1239 }, { "epoch": 0.32978723404255317, "grad_norm": 4.701793193817139, "learning_rate": 9.993039850466664e-06, "loss": 1.5212, "step": 1240 }, { "epoch": 0.3300531914893617, "grad_norm": 4.4202094078063965, "learning_rate": 9.99299338299301e-06, "loss": 1.2413, "step": 1241 }, { "epoch": 0.3303191489361702, "grad_norm": 4.218541622161865, "learning_rate": 9.992946761030261e-06, "loss": 1.2663, "step": 1242 }, { "epoch": 0.3305851063829787, "grad_norm": 4.355581283569336, "learning_rate": 9.99289998457986e-06, "loss": 1.3233, "step": 1243 }, { "epoch": 0.33085106382978724, "grad_norm": 4.184298992156982, "learning_rate": 9.992853053643257e-06, "loss": 1.3291, "step": 1244 }, { "epoch": 0.33111702127659576, "grad_norm": 4.030219078063965, "learning_rate": 9.992805968221902e-06, "loss": 1.3502, "step": 1245 }, { "epoch": 0.3313829787234043, "grad_norm": 4.068756103515625, "learning_rate": 9.992758728317252e-06, "loss": 1.1977, "step": 1246 }, { "epoch": 0.3316489361702128, "grad_norm": 4.332919120788574, "learning_rate": 9.99271133393077e-06, "loss": 1.2899, "step": 1247 }, { "epoch": 0.33191489361702126, "grad_norm": 3.9694416522979736, "learning_rate": 9.992663785063919e-06, "loss": 1.3366, "step": 1248 }, { "epoch": 0.3321808510638298, "grad_norm": 3.924436569213867, "learning_rate": 9.992616081718171e-06, "loss": 1.2552, "step": 1249 }, { "epoch": 0.3324468085106383, "grad_norm": 4.128008842468262, "learning_rate": 9.992568223895007e-06, "loss": 1.2872, "step": 1250 }, { "epoch": 0.3327127659574468, "grad_norm": 4.744760036468506, "learning_rate": 9.992520211595902e-06, "loss": 1.2885, "step": 1251 }, { "epoch": 0.33297872340425533, "grad_norm": 3.722013235092163, "learning_rate": 9.992472044822344e-06, "loss": 1.1684, "step": 1252 }, { "epoch": 0.33324468085106385, "grad_norm": 4.375733852386475, "learning_rate": 9.992423723575822e-06, "loss": 1.4177, "step": 1253 }, { "epoch": 0.33351063829787236, "grad_norm": 4.03129243850708, "learning_rate": 9.992375247857833e-06, "loss": 1.3669, "step": 1254 }, { "epoch": 0.3337765957446808, "grad_norm": 3.828651189804077, "learning_rate": 9.992326617669876e-06, "loss": 1.3573, "step": 1255 }, { "epoch": 0.33404255319148934, "grad_norm": 4.016900062561035, "learning_rate": 9.992277833013457e-06, "loss": 1.2265, "step": 1256 }, { "epoch": 0.33430851063829786, "grad_norm": 4.38175630569458, "learning_rate": 9.992228893890084e-06, "loss": 1.3774, "step": 1257 }, { "epoch": 0.3345744680851064, "grad_norm": 4.081117153167725, "learning_rate": 9.992179800301269e-06, "loss": 1.2978, "step": 1258 }, { "epoch": 0.3348404255319149, "grad_norm": 4.280460834503174, "learning_rate": 9.992130552248535e-06, "loss": 1.1316, "step": 1259 }, { "epoch": 0.3351063829787234, "grad_norm": 4.5057268142700195, "learning_rate": 9.992081149733404e-06, "loss": 1.3776, "step": 1260 }, { "epoch": 0.33537234042553193, "grad_norm": 3.8671257495880127, "learning_rate": 9.992031592757405e-06, "loss": 1.3541, "step": 1261 }, { "epoch": 0.33563829787234045, "grad_norm": 4.478667736053467, "learning_rate": 9.991981881322072e-06, "loss": 1.3155, "step": 1262 }, { "epoch": 0.3359042553191489, "grad_norm": 5.32509183883667, "learning_rate": 9.991932015428941e-06, "loss": 1.3662, "step": 1263 }, { "epoch": 0.33617021276595743, "grad_norm": 4.138638973236084, "learning_rate": 9.991881995079558e-06, "loss": 1.3641, "step": 1264 }, { "epoch": 0.33643617021276595, "grad_norm": 4.780951499938965, "learning_rate": 9.991831820275466e-06, "loss": 1.4626, "step": 1265 }, { "epoch": 0.33670212765957447, "grad_norm": 3.6165192127227783, "learning_rate": 9.991781491018223e-06, "loss": 1.2914, "step": 1266 }, { "epoch": 0.336968085106383, "grad_norm": 4.3747992515563965, "learning_rate": 9.991731007309382e-06, "loss": 1.2756, "step": 1267 }, { "epoch": 0.3372340425531915, "grad_norm": 5.0972580909729, "learning_rate": 9.991680369150507e-06, "loss": 1.4694, "step": 1268 }, { "epoch": 0.3375, "grad_norm": 3.841791868209839, "learning_rate": 9.991629576543164e-06, "loss": 1.1905, "step": 1269 }, { "epoch": 0.3377659574468085, "grad_norm": 4.1475324630737305, "learning_rate": 9.991578629488926e-06, "loss": 1.3379, "step": 1270 }, { "epoch": 0.338031914893617, "grad_norm": 4.152446269989014, "learning_rate": 9.991527527989366e-06, "loss": 1.1402, "step": 1271 }, { "epoch": 0.3382978723404255, "grad_norm": 4.5577006340026855, "learning_rate": 9.99147627204607e-06, "loss": 1.3844, "step": 1272 }, { "epoch": 0.33856382978723404, "grad_norm": 4.605076313018799, "learning_rate": 9.991424861660621e-06, "loss": 1.4557, "step": 1273 }, { "epoch": 0.33882978723404256, "grad_norm": 4.045496940612793, "learning_rate": 9.99137329683461e-06, "loss": 1.2976, "step": 1274 }, { "epoch": 0.3390957446808511, "grad_norm": 4.148492336273193, "learning_rate": 9.991321577569632e-06, "loss": 1.4065, "step": 1275 }, { "epoch": 0.3393617021276596, "grad_norm": 4.128026485443115, "learning_rate": 9.991269703867288e-06, "loss": 1.3056, "step": 1276 }, { "epoch": 0.3396276595744681, "grad_norm": 4.140103340148926, "learning_rate": 9.991217675729184e-06, "loss": 1.3136, "step": 1277 }, { "epoch": 0.3398936170212766, "grad_norm": 4.122238636016846, "learning_rate": 9.991165493156927e-06, "loss": 1.2575, "step": 1278 }, { "epoch": 0.3401595744680851, "grad_norm": 4.590948104858398, "learning_rate": 9.991113156152134e-06, "loss": 1.2896, "step": 1279 }, { "epoch": 0.3404255319148936, "grad_norm": 4.469196796417236, "learning_rate": 9.991060664716423e-06, "loss": 1.4088, "step": 1280 }, { "epoch": 0.3406914893617021, "grad_norm": 4.643316268920898, "learning_rate": 9.99100801885142e-06, "loss": 1.4124, "step": 1281 }, { "epoch": 0.34095744680851064, "grad_norm": 4.106162071228027, "learning_rate": 9.990955218558751e-06, "loss": 1.3555, "step": 1282 }, { "epoch": 0.34122340425531916, "grad_norm": 4.337850093841553, "learning_rate": 9.990902263840053e-06, "loss": 1.1865, "step": 1283 }, { "epoch": 0.3414893617021277, "grad_norm": 3.8557538986206055, "learning_rate": 9.990849154696963e-06, "loss": 1.2002, "step": 1284 }, { "epoch": 0.34175531914893614, "grad_norm": 4.412120342254639, "learning_rate": 9.990795891131125e-06, "loss": 1.3584, "step": 1285 }, { "epoch": 0.34202127659574466, "grad_norm": 5.199094772338867, "learning_rate": 9.990742473144184e-06, "loss": 1.3745, "step": 1286 }, { "epoch": 0.3422872340425532, "grad_norm": 3.8888189792633057, "learning_rate": 9.990688900737795e-06, "loss": 1.2443, "step": 1287 }, { "epoch": 0.3425531914893617, "grad_norm": 3.81540846824646, "learning_rate": 9.990635173913616e-06, "loss": 1.347, "step": 1288 }, { "epoch": 0.3428191489361702, "grad_norm": 4.090488910675049, "learning_rate": 9.990581292673309e-06, "loss": 1.283, "step": 1289 }, { "epoch": 0.34308510638297873, "grad_norm": 4.115976333618164, "learning_rate": 9.990527257018544e-06, "loss": 1.2893, "step": 1290 }, { "epoch": 0.34335106382978725, "grad_norm": 3.9170165061950684, "learning_rate": 9.990473066950987e-06, "loss": 1.2133, "step": 1291 }, { "epoch": 0.34361702127659577, "grad_norm": 3.8994202613830566, "learning_rate": 9.990418722472317e-06, "loss": 1.1986, "step": 1292 }, { "epoch": 0.34388297872340423, "grad_norm": 3.8675310611724854, "learning_rate": 9.990364223584218e-06, "loss": 1.16, "step": 1293 }, { "epoch": 0.34414893617021275, "grad_norm": 4.010871410369873, "learning_rate": 9.990309570288374e-06, "loss": 1.2748, "step": 1294 }, { "epoch": 0.34441489361702127, "grad_norm": 4.264376163482666, "learning_rate": 9.990254762586477e-06, "loss": 1.167, "step": 1295 }, { "epoch": 0.3446808510638298, "grad_norm": 4.201075553894043, "learning_rate": 9.990199800480222e-06, "loss": 1.2061, "step": 1296 }, { "epoch": 0.3449468085106383, "grad_norm": 4.1181535720825195, "learning_rate": 9.99014468397131e-06, "loss": 1.188, "step": 1297 }, { "epoch": 0.3452127659574468, "grad_norm": 3.747342824935913, "learning_rate": 9.990089413061445e-06, "loss": 1.1944, "step": 1298 }, { "epoch": 0.34547872340425534, "grad_norm": 4.067655086517334, "learning_rate": 9.990033987752341e-06, "loss": 1.1876, "step": 1299 }, { "epoch": 0.34574468085106386, "grad_norm": 4.090482234954834, "learning_rate": 9.989978408045709e-06, "loss": 1.2122, "step": 1300 }, { "epoch": 0.3460106382978723, "grad_norm": 3.879619598388672, "learning_rate": 9.989922673943271e-06, "loss": 1.2099, "step": 1301 }, { "epoch": 0.34627659574468084, "grad_norm": 4.814892768859863, "learning_rate": 9.98986678544675e-06, "loss": 1.3879, "step": 1302 }, { "epoch": 0.34654255319148936, "grad_norm": 4.234111309051514, "learning_rate": 9.989810742557875e-06, "loss": 1.5134, "step": 1303 }, { "epoch": 0.3468085106382979, "grad_norm": 4.2561469078063965, "learning_rate": 9.989754545278381e-06, "loss": 1.3591, "step": 1304 }, { "epoch": 0.3470744680851064, "grad_norm": 4.519184112548828, "learning_rate": 9.989698193610007e-06, "loss": 1.1676, "step": 1305 }, { "epoch": 0.3473404255319149, "grad_norm": 4.09921407699585, "learning_rate": 9.989641687554496e-06, "loss": 1.238, "step": 1306 }, { "epoch": 0.3476063829787234, "grad_norm": 3.9749245643615723, "learning_rate": 9.989585027113598e-06, "loss": 1.2444, "step": 1307 }, { "epoch": 0.3478723404255319, "grad_norm": 4.225282192230225, "learning_rate": 9.989528212289064e-06, "loss": 1.1724, "step": 1308 }, { "epoch": 0.3481382978723404, "grad_norm": 4.391535758972168, "learning_rate": 9.98947124308265e-06, "loss": 1.4058, "step": 1309 }, { "epoch": 0.3484042553191489, "grad_norm": 3.8815417289733887, "learning_rate": 9.989414119496126e-06, "loss": 1.2464, "step": 1310 }, { "epoch": 0.34867021276595744, "grad_norm": 4.186168193817139, "learning_rate": 9.989356841531252e-06, "loss": 1.2393, "step": 1311 }, { "epoch": 0.34893617021276596, "grad_norm": 3.9777474403381348, "learning_rate": 9.989299409189802e-06, "loss": 1.1674, "step": 1312 }, { "epoch": 0.3492021276595745, "grad_norm": 4.088747978210449, "learning_rate": 9.989241822473557e-06, "loss": 1.2024, "step": 1313 }, { "epoch": 0.349468085106383, "grad_norm": 4.297309398651123, "learning_rate": 9.989184081384295e-06, "loss": 1.384, "step": 1314 }, { "epoch": 0.3497340425531915, "grad_norm": 3.6362228393554688, "learning_rate": 9.989126185923803e-06, "loss": 1.266, "step": 1315 }, { "epoch": 0.35, "grad_norm": 4.015252113342285, "learning_rate": 9.989068136093873e-06, "loss": 1.2447, "step": 1316 }, { "epoch": 0.3502659574468085, "grad_norm": 3.9256210327148438, "learning_rate": 9.989009931896302e-06, "loss": 1.2674, "step": 1317 }, { "epoch": 0.350531914893617, "grad_norm": 4.108496189117432, "learning_rate": 9.988951573332888e-06, "loss": 1.232, "step": 1318 }, { "epoch": 0.35079787234042553, "grad_norm": 4.183421611785889, "learning_rate": 9.98889306040544e-06, "loss": 1.2652, "step": 1319 }, { "epoch": 0.35106382978723405, "grad_norm": 4.556921482086182, "learning_rate": 9.988834393115768e-06, "loss": 1.3536, "step": 1320 }, { "epoch": 0.35132978723404257, "grad_norm": 4.081547737121582, "learning_rate": 9.988775571465684e-06, "loss": 1.3168, "step": 1321 }, { "epoch": 0.3515957446808511, "grad_norm": 4.136814594268799, "learning_rate": 9.988716595457011e-06, "loss": 1.3124, "step": 1322 }, { "epoch": 0.35186170212765955, "grad_norm": 4.485897064208984, "learning_rate": 9.988657465091572e-06, "loss": 1.3164, "step": 1323 }, { "epoch": 0.35212765957446807, "grad_norm": 4.273427963256836, "learning_rate": 9.988598180371198e-06, "loss": 1.2051, "step": 1324 }, { "epoch": 0.3523936170212766, "grad_norm": 3.715895175933838, "learning_rate": 9.988538741297724e-06, "loss": 1.0755, "step": 1325 }, { "epoch": 0.3526595744680851, "grad_norm": 3.932218551635742, "learning_rate": 9.98847914787299e-06, "loss": 1.4028, "step": 1326 }, { "epoch": 0.3529255319148936, "grad_norm": 4.555146217346191, "learning_rate": 9.988419400098834e-06, "loss": 1.2805, "step": 1327 }, { "epoch": 0.35319148936170214, "grad_norm": 4.291238784790039, "learning_rate": 9.98835949797711e-06, "loss": 1.3683, "step": 1328 }, { "epoch": 0.35345744680851066, "grad_norm": 4.525993824005127, "learning_rate": 9.98829944150967e-06, "loss": 1.2788, "step": 1329 }, { "epoch": 0.3537234042553192, "grad_norm": 3.771448850631714, "learning_rate": 9.988239230698373e-06, "loss": 1.3256, "step": 1330 }, { "epoch": 0.35398936170212764, "grad_norm": 4.0126633644104, "learning_rate": 9.988178865545081e-06, "loss": 1.2984, "step": 1331 }, { "epoch": 0.35425531914893615, "grad_norm": 3.521714210510254, "learning_rate": 9.988118346051663e-06, "loss": 1.192, "step": 1332 }, { "epoch": 0.35452127659574467, "grad_norm": 4.065241813659668, "learning_rate": 9.98805767221999e-06, "loss": 1.383, "step": 1333 }, { "epoch": 0.3547872340425532, "grad_norm": 4.3708720207214355, "learning_rate": 9.987996844051939e-06, "loss": 1.3586, "step": 1334 }, { "epoch": 0.3550531914893617, "grad_norm": 4.104064464569092, "learning_rate": 9.987935861549393e-06, "loss": 1.2583, "step": 1335 }, { "epoch": 0.3553191489361702, "grad_norm": 4.293087959289551, "learning_rate": 9.98787472471424e-06, "loss": 1.3606, "step": 1336 }, { "epoch": 0.35558510638297874, "grad_norm": 3.906818151473999, "learning_rate": 9.98781343354837e-06, "loss": 1.2305, "step": 1337 }, { "epoch": 0.3558510638297872, "grad_norm": 4.049057960510254, "learning_rate": 9.98775198805368e-06, "loss": 1.1915, "step": 1338 }, { "epoch": 0.3561170212765957, "grad_norm": 4.160476207733154, "learning_rate": 9.987690388232071e-06, "loss": 1.3273, "step": 1339 }, { "epoch": 0.35638297872340424, "grad_norm": 4.2301344871521, "learning_rate": 9.98762863408545e-06, "loss": 1.242, "step": 1340 }, { "epoch": 0.35664893617021276, "grad_norm": 4.272438049316406, "learning_rate": 9.987566725615725e-06, "loss": 1.3378, "step": 1341 }, { "epoch": 0.3569148936170213, "grad_norm": 4.048627853393555, "learning_rate": 9.987504662824814e-06, "loss": 1.2938, "step": 1342 }, { "epoch": 0.3571808510638298, "grad_norm": 4.272396087646484, "learning_rate": 9.987442445714637e-06, "loss": 1.363, "step": 1343 }, { "epoch": 0.3574468085106383, "grad_norm": 4.04710578918457, "learning_rate": 9.98738007428712e-06, "loss": 1.3823, "step": 1344 }, { "epoch": 0.35771276595744683, "grad_norm": 4.724300384521484, "learning_rate": 9.98731754854419e-06, "loss": 1.4429, "step": 1345 }, { "epoch": 0.3579787234042553, "grad_norm": 4.071347713470459, "learning_rate": 9.987254868487783e-06, "loss": 1.2203, "step": 1346 }, { "epoch": 0.3582446808510638, "grad_norm": 3.8509132862091064, "learning_rate": 9.987192034119839e-06, "loss": 1.2774, "step": 1347 }, { "epoch": 0.35851063829787233, "grad_norm": 3.7690467834472656, "learning_rate": 9.987129045442304e-06, "loss": 1.1786, "step": 1348 }, { "epoch": 0.35877659574468085, "grad_norm": 4.102452754974365, "learning_rate": 9.987065902457122e-06, "loss": 1.232, "step": 1349 }, { "epoch": 0.35904255319148937, "grad_norm": 4.353301048278809, "learning_rate": 9.98700260516625e-06, "loss": 1.204, "step": 1350 }, { "epoch": 0.3593085106382979, "grad_norm": 4.020050048828125, "learning_rate": 9.986939153571647e-06, "loss": 1.2681, "step": 1351 }, { "epoch": 0.3595744680851064, "grad_norm": 4.041562080383301, "learning_rate": 9.986875547675274e-06, "loss": 1.2093, "step": 1352 }, { "epoch": 0.3598404255319149, "grad_norm": 3.9428937435150146, "learning_rate": 9.9868117874791e-06, "loss": 1.4088, "step": 1353 }, { "epoch": 0.3601063829787234, "grad_norm": 3.8776018619537354, "learning_rate": 9.986747872985099e-06, "loss": 1.2944, "step": 1354 }, { "epoch": 0.3603723404255319, "grad_norm": 4.4396796226501465, "learning_rate": 9.986683804195248e-06, "loss": 1.2328, "step": 1355 }, { "epoch": 0.3606382978723404, "grad_norm": 6.8338093757629395, "learning_rate": 9.986619581111528e-06, "loss": 1.2865, "step": 1356 }, { "epoch": 0.36090425531914894, "grad_norm": 3.8783535957336426, "learning_rate": 9.986555203735926e-06, "loss": 1.2004, "step": 1357 }, { "epoch": 0.36117021276595745, "grad_norm": 4.063074111938477, "learning_rate": 9.986490672070438e-06, "loss": 1.2033, "step": 1358 }, { "epoch": 0.361436170212766, "grad_norm": 5.602739334106445, "learning_rate": 9.986425986117055e-06, "loss": 1.2993, "step": 1359 }, { "epoch": 0.3617021276595745, "grad_norm": 3.687655448913574, "learning_rate": 9.986361145877783e-06, "loss": 1.1984, "step": 1360 }, { "epoch": 0.36196808510638295, "grad_norm": 4.312001705169678, "learning_rate": 9.986296151354625e-06, "loss": 1.2943, "step": 1361 }, { "epoch": 0.36223404255319147, "grad_norm": 4.478762149810791, "learning_rate": 9.986231002549594e-06, "loss": 1.294, "step": 1362 }, { "epoch": 0.3625, "grad_norm": 4.86306095123291, "learning_rate": 9.986165699464706e-06, "loss": 1.5325, "step": 1363 }, { "epoch": 0.3627659574468085, "grad_norm": 4.426929950714111, "learning_rate": 9.986100242101982e-06, "loss": 1.3561, "step": 1364 }, { "epoch": 0.363031914893617, "grad_norm": 4.546680450439453, "learning_rate": 9.986034630463443e-06, "loss": 1.3143, "step": 1365 }, { "epoch": 0.36329787234042554, "grad_norm": 4.5038957595825195, "learning_rate": 9.985968864551123e-06, "loss": 1.2948, "step": 1366 }, { "epoch": 0.36356382978723406, "grad_norm": 4.967344284057617, "learning_rate": 9.985902944367058e-06, "loss": 1.2844, "step": 1367 }, { "epoch": 0.3638297872340426, "grad_norm": 3.8887312412261963, "learning_rate": 9.985836869913283e-06, "loss": 1.2737, "step": 1368 }, { "epoch": 0.36409574468085104, "grad_norm": 4.1144795417785645, "learning_rate": 9.985770641191847e-06, "loss": 1.3379, "step": 1369 }, { "epoch": 0.36436170212765956, "grad_norm": 4.12211275100708, "learning_rate": 9.985704258204798e-06, "loss": 1.3465, "step": 1370 }, { "epoch": 0.3646276595744681, "grad_norm": 4.424558162689209, "learning_rate": 9.985637720954188e-06, "loss": 1.0785, "step": 1371 }, { "epoch": 0.3648936170212766, "grad_norm": 4.308188438415527, "learning_rate": 9.985571029442078e-06, "loss": 1.4829, "step": 1372 }, { "epoch": 0.3651595744680851, "grad_norm": 3.587887763977051, "learning_rate": 9.98550418367053e-06, "loss": 1.2684, "step": 1373 }, { "epoch": 0.36542553191489363, "grad_norm": 4.300267696380615, "learning_rate": 9.985437183641612e-06, "loss": 1.305, "step": 1374 }, { "epoch": 0.36569148936170215, "grad_norm": 4.035099506378174, "learning_rate": 9.985370029357399e-06, "loss": 1.2249, "step": 1375 }, { "epoch": 0.3659574468085106, "grad_norm": 3.958627939224243, "learning_rate": 9.985302720819967e-06, "loss": 1.2176, "step": 1376 }, { "epoch": 0.36622340425531913, "grad_norm": 4.257254600524902, "learning_rate": 9.9852352580314e-06, "loss": 1.2714, "step": 1377 }, { "epoch": 0.36648936170212765, "grad_norm": 4.782037258148193, "learning_rate": 9.985167640993784e-06, "loss": 1.4979, "step": 1378 }, { "epoch": 0.36675531914893617, "grad_norm": 4.400300025939941, "learning_rate": 9.985099869709213e-06, "loss": 1.3505, "step": 1379 }, { "epoch": 0.3670212765957447, "grad_norm": 4.289068698883057, "learning_rate": 9.985031944179781e-06, "loss": 1.2113, "step": 1380 }, { "epoch": 0.3672872340425532, "grad_norm": 4.770625591278076, "learning_rate": 9.984963864407593e-06, "loss": 1.4373, "step": 1381 }, { "epoch": 0.3675531914893617, "grad_norm": 4.392122268676758, "learning_rate": 9.984895630394755e-06, "loss": 1.3069, "step": 1382 }, { "epoch": 0.36781914893617024, "grad_norm": 3.9814369678497314, "learning_rate": 9.984827242143376e-06, "loss": 1.281, "step": 1383 }, { "epoch": 0.3680851063829787, "grad_norm": 3.9791054725646973, "learning_rate": 9.984758699655572e-06, "loss": 1.1758, "step": 1384 }, { "epoch": 0.3683510638297872, "grad_norm": 4.434001922607422, "learning_rate": 9.984690002933465e-06, "loss": 1.3586, "step": 1385 }, { "epoch": 0.36861702127659574, "grad_norm": 4.445183753967285, "learning_rate": 9.984621151979183e-06, "loss": 1.367, "step": 1386 }, { "epoch": 0.36888297872340425, "grad_norm": 3.8560211658477783, "learning_rate": 9.984552146794853e-06, "loss": 1.2933, "step": 1387 }, { "epoch": 0.36914893617021277, "grad_norm": 4.20532751083374, "learning_rate": 9.984482987382612e-06, "loss": 1.3036, "step": 1388 }, { "epoch": 0.3694148936170213, "grad_norm": 4.1775898933410645, "learning_rate": 9.984413673744597e-06, "loss": 1.1862, "step": 1389 }, { "epoch": 0.3696808510638298, "grad_norm": 4.668176651000977, "learning_rate": 9.984344205882954e-06, "loss": 1.3125, "step": 1390 }, { "epoch": 0.3699468085106383, "grad_norm": 4.170348644256592, "learning_rate": 9.984274583799833e-06, "loss": 1.1855, "step": 1391 }, { "epoch": 0.3702127659574468, "grad_norm": 3.893609046936035, "learning_rate": 9.98420480749739e-06, "loss": 1.3567, "step": 1392 }, { "epoch": 0.3704787234042553, "grad_norm": 3.791059970855713, "learning_rate": 9.98413487697778e-06, "loss": 1.2596, "step": 1393 }, { "epoch": 0.3707446808510638, "grad_norm": 3.89493465423584, "learning_rate": 9.984064792243171e-06, "loss": 1.1468, "step": 1394 }, { "epoch": 0.37101063829787234, "grad_norm": 3.932354211807251, "learning_rate": 9.983994553295728e-06, "loss": 1.2274, "step": 1395 }, { "epoch": 0.37127659574468086, "grad_norm": 3.772759199142456, "learning_rate": 9.983924160137627e-06, "loss": 1.1687, "step": 1396 }, { "epoch": 0.3715425531914894, "grad_norm": 4.090175628662109, "learning_rate": 9.983853612771043e-06, "loss": 1.1627, "step": 1397 }, { "epoch": 0.3718085106382979, "grad_norm": 5.041259288787842, "learning_rate": 9.983782911198161e-06, "loss": 1.2878, "step": 1398 }, { "epoch": 0.37207446808510636, "grad_norm": 4.565484523773193, "learning_rate": 9.98371205542117e-06, "loss": 1.2838, "step": 1399 }, { "epoch": 0.3723404255319149, "grad_norm": 3.94577956199646, "learning_rate": 9.983641045442256e-06, "loss": 1.3253, "step": 1400 }, { "epoch": 0.3726063829787234, "grad_norm": 3.559597969055176, "learning_rate": 9.983569881263625e-06, "loss": 1.0896, "step": 1401 }, { "epoch": 0.3728723404255319, "grad_norm": 4.101516246795654, "learning_rate": 9.983498562887471e-06, "loss": 1.4844, "step": 1402 }, { "epoch": 0.37313829787234043, "grad_norm": 4.680913925170898, "learning_rate": 9.983427090316005e-06, "loss": 1.3343, "step": 1403 }, { "epoch": 0.37340425531914895, "grad_norm": 5.2188286781311035, "learning_rate": 9.983355463551439e-06, "loss": 1.3206, "step": 1404 }, { "epoch": 0.37367021276595747, "grad_norm": 4.363986968994141, "learning_rate": 9.983283682595986e-06, "loss": 1.5722, "step": 1405 }, { "epoch": 0.373936170212766, "grad_norm": 4.405764579772949, "learning_rate": 9.98321174745187e-06, "loss": 1.3106, "step": 1406 }, { "epoch": 0.37420212765957445, "grad_norm": 3.671576738357544, "learning_rate": 9.983139658121316e-06, "loss": 1.1663, "step": 1407 }, { "epoch": 0.37446808510638296, "grad_norm": 4.068467140197754, "learning_rate": 9.983067414606553e-06, "loss": 1.3443, "step": 1408 }, { "epoch": 0.3747340425531915, "grad_norm": 4.050812244415283, "learning_rate": 9.982995016909817e-06, "loss": 1.2671, "step": 1409 }, { "epoch": 0.375, "grad_norm": 4.016097545623779, "learning_rate": 9.98292246503335e-06, "loss": 1.2389, "step": 1410 }, { "epoch": 0.3752659574468085, "grad_norm": 4.278280258178711, "learning_rate": 9.982849758979394e-06, "loss": 1.3095, "step": 1411 }, { "epoch": 0.37553191489361704, "grad_norm": 3.826686143875122, "learning_rate": 9.9827768987502e-06, "loss": 1.0923, "step": 1412 }, { "epoch": 0.37579787234042555, "grad_norm": 3.954808473587036, "learning_rate": 9.982703884348023e-06, "loss": 1.3359, "step": 1413 }, { "epoch": 0.376063829787234, "grad_norm": 3.8342320919036865, "learning_rate": 9.982630715775121e-06, "loss": 1.287, "step": 1414 }, { "epoch": 0.37632978723404253, "grad_norm": 4.190742492675781, "learning_rate": 9.982557393033758e-06, "loss": 1.2957, "step": 1415 }, { "epoch": 0.37659574468085105, "grad_norm": 4.030623435974121, "learning_rate": 9.982483916126204e-06, "loss": 1.2992, "step": 1416 }, { "epoch": 0.37686170212765957, "grad_norm": 4.164768695831299, "learning_rate": 9.98241028505473e-06, "loss": 1.5608, "step": 1417 }, { "epoch": 0.3771276595744681, "grad_norm": 4.243110656738281, "learning_rate": 9.982336499821617e-06, "loss": 1.3214, "step": 1418 }, { "epoch": 0.3773936170212766, "grad_norm": 3.969595193862915, "learning_rate": 9.982262560429147e-06, "loss": 1.3743, "step": 1419 }, { "epoch": 0.3776595744680851, "grad_norm": 4.253571033477783, "learning_rate": 9.982188466879607e-06, "loss": 1.329, "step": 1420 }, { "epoch": 0.37792553191489364, "grad_norm": 4.254541397094727, "learning_rate": 9.98211421917529e-06, "loss": 1.3093, "step": 1421 }, { "epoch": 0.3781914893617021, "grad_norm": 4.365729808807373, "learning_rate": 9.982039817318491e-06, "loss": 1.3744, "step": 1422 }, { "epoch": 0.3784574468085106, "grad_norm": 4.0368499755859375, "learning_rate": 9.981965261311519e-06, "loss": 1.1517, "step": 1423 }, { "epoch": 0.37872340425531914, "grad_norm": 4.165602207183838, "learning_rate": 9.981890551156673e-06, "loss": 1.2983, "step": 1424 }, { "epoch": 0.37898936170212766, "grad_norm": 4.241005897521973, "learning_rate": 9.981815686856268e-06, "loss": 1.2491, "step": 1425 }, { "epoch": 0.3792553191489362, "grad_norm": 3.9506289958953857, "learning_rate": 9.981740668412622e-06, "loss": 1.175, "step": 1426 }, { "epoch": 0.3795212765957447, "grad_norm": 4.209918022155762, "learning_rate": 9.981665495828053e-06, "loss": 1.379, "step": 1427 }, { "epoch": 0.3797872340425532, "grad_norm": 4.048032283782959, "learning_rate": 9.981590169104889e-06, "loss": 1.4339, "step": 1428 }, { "epoch": 0.3800531914893617, "grad_norm": 3.9107158184051514, "learning_rate": 9.98151468824546e-06, "loss": 1.4468, "step": 1429 }, { "epoch": 0.3803191489361702, "grad_norm": 3.8230321407318115, "learning_rate": 9.981439053252102e-06, "loss": 1.2942, "step": 1430 }, { "epoch": 0.3805851063829787, "grad_norm": 3.772338390350342, "learning_rate": 9.981363264127154e-06, "loss": 1.3236, "step": 1431 }, { "epoch": 0.38085106382978723, "grad_norm": 4.234860897064209, "learning_rate": 9.981287320872962e-06, "loss": 1.3763, "step": 1432 }, { "epoch": 0.38111702127659575, "grad_norm": 3.8890817165374756, "learning_rate": 9.981211223491876e-06, "loss": 1.3667, "step": 1433 }, { "epoch": 0.38138297872340426, "grad_norm": 3.8217055797576904, "learning_rate": 9.98113497198625e-06, "loss": 1.1392, "step": 1434 }, { "epoch": 0.3816489361702128, "grad_norm": 3.9971745014190674, "learning_rate": 9.981058566358443e-06, "loss": 1.1892, "step": 1435 }, { "epoch": 0.3819148936170213, "grad_norm": 4.417277812957764, "learning_rate": 9.98098200661082e-06, "loss": 1.3306, "step": 1436 }, { "epoch": 0.38218085106382976, "grad_norm": 4.433936595916748, "learning_rate": 9.980905292745749e-06, "loss": 1.2253, "step": 1437 }, { "epoch": 0.3824468085106383, "grad_norm": 3.668414831161499, "learning_rate": 9.980828424765603e-06, "loss": 1.3243, "step": 1438 }, { "epoch": 0.3827127659574468, "grad_norm": 4.062864303588867, "learning_rate": 9.980751402672762e-06, "loss": 1.2416, "step": 1439 }, { "epoch": 0.3829787234042553, "grad_norm": 4.28949499130249, "learning_rate": 9.980674226469608e-06, "loss": 1.3018, "step": 1440 }, { "epoch": 0.38324468085106383, "grad_norm": 3.598482847213745, "learning_rate": 9.980596896158532e-06, "loss": 1.1174, "step": 1441 }, { "epoch": 0.38351063829787235, "grad_norm": 4.300634384155273, "learning_rate": 9.980519411741922e-06, "loss": 1.3079, "step": 1442 }, { "epoch": 0.38377659574468087, "grad_norm": 4.2363128662109375, "learning_rate": 9.980441773222178e-06, "loss": 1.3546, "step": 1443 }, { "epoch": 0.3840425531914894, "grad_norm": 4.521866321563721, "learning_rate": 9.980363980601702e-06, "loss": 1.2007, "step": 1444 }, { "epoch": 0.38430851063829785, "grad_norm": 3.9129135608673096, "learning_rate": 9.9802860338829e-06, "loss": 1.3101, "step": 1445 }, { "epoch": 0.38457446808510637, "grad_norm": 4.559953689575195, "learning_rate": 9.980207933068185e-06, "loss": 1.3183, "step": 1446 }, { "epoch": 0.3848404255319149, "grad_norm": 4.102110385894775, "learning_rate": 9.980129678159974e-06, "loss": 1.2549, "step": 1447 }, { "epoch": 0.3851063829787234, "grad_norm": 4.215007781982422, "learning_rate": 9.980051269160686e-06, "loss": 1.3281, "step": 1448 }, { "epoch": 0.3853723404255319, "grad_norm": 4.188117980957031, "learning_rate": 9.97997270607275e-06, "loss": 1.267, "step": 1449 }, { "epoch": 0.38563829787234044, "grad_norm": 3.9828150272369385, "learning_rate": 9.979893988898592e-06, "loss": 1.2967, "step": 1450 }, { "epoch": 0.38590425531914896, "grad_norm": 3.9680116176605225, "learning_rate": 9.979815117640654e-06, "loss": 1.2711, "step": 1451 }, { "epoch": 0.3861702127659574, "grad_norm": 3.9651451110839844, "learning_rate": 9.979736092301374e-06, "loss": 1.2298, "step": 1452 }, { "epoch": 0.38643617021276594, "grad_norm": 3.7032337188720703, "learning_rate": 9.979656912883193e-06, "loss": 1.1644, "step": 1453 }, { "epoch": 0.38670212765957446, "grad_norm": 4.174644470214844, "learning_rate": 9.979577579388566e-06, "loss": 1.1941, "step": 1454 }, { "epoch": 0.386968085106383, "grad_norm": 3.9499082565307617, "learning_rate": 9.979498091819946e-06, "loss": 1.2205, "step": 1455 }, { "epoch": 0.3872340425531915, "grad_norm": 4.005082130432129, "learning_rate": 9.979418450179792e-06, "loss": 1.2983, "step": 1456 }, { "epoch": 0.3875, "grad_norm": 4.425258159637451, "learning_rate": 9.97933865447057e-06, "loss": 1.3444, "step": 1457 }, { "epoch": 0.38776595744680853, "grad_norm": 4.169209003448486, "learning_rate": 9.979258704694747e-06, "loss": 1.3914, "step": 1458 }, { "epoch": 0.38803191489361705, "grad_norm": 3.7960317134857178, "learning_rate": 9.979178600854797e-06, "loss": 1.2186, "step": 1459 }, { "epoch": 0.3882978723404255, "grad_norm": 3.9216535091400146, "learning_rate": 9.979098342953198e-06, "loss": 1.0839, "step": 1460 }, { "epoch": 0.388563829787234, "grad_norm": 4.077401638031006, "learning_rate": 9.979017930992436e-06, "loss": 1.225, "step": 1461 }, { "epoch": 0.38882978723404255, "grad_norm": 3.871135950088501, "learning_rate": 9.978937364974996e-06, "loss": 1.2545, "step": 1462 }, { "epoch": 0.38909574468085106, "grad_norm": 4.12876558303833, "learning_rate": 9.978856644903373e-06, "loss": 1.3806, "step": 1463 }, { "epoch": 0.3893617021276596, "grad_norm": 4.172638416290283, "learning_rate": 9.978775770780061e-06, "loss": 1.3444, "step": 1464 }, { "epoch": 0.3896276595744681, "grad_norm": 4.253303050994873, "learning_rate": 9.978694742607566e-06, "loss": 1.3015, "step": 1465 }, { "epoch": 0.3898936170212766, "grad_norm": 3.937948226928711, "learning_rate": 9.978613560388396e-06, "loss": 1.4014, "step": 1466 }, { "epoch": 0.3901595744680851, "grad_norm": 3.959920644760132, "learning_rate": 9.978532224125059e-06, "loss": 1.2797, "step": 1467 }, { "epoch": 0.3904255319148936, "grad_norm": 4.240394592285156, "learning_rate": 9.978450733820073e-06, "loss": 1.3541, "step": 1468 }, { "epoch": 0.3906914893617021, "grad_norm": 4.060705661773682, "learning_rate": 9.97836908947596e-06, "loss": 1.2997, "step": 1469 }, { "epoch": 0.39095744680851063, "grad_norm": 4.276419162750244, "learning_rate": 9.978287291095248e-06, "loss": 1.4451, "step": 1470 }, { "epoch": 0.39122340425531915, "grad_norm": 3.961526393890381, "learning_rate": 9.978205338680465e-06, "loss": 1.3248, "step": 1471 }, { "epoch": 0.39148936170212767, "grad_norm": 4.002696514129639, "learning_rate": 9.978123232234147e-06, "loss": 1.3274, "step": 1472 }, { "epoch": 0.3917553191489362, "grad_norm": 3.857750654220581, "learning_rate": 9.978040971758836e-06, "loss": 1.2552, "step": 1473 }, { "epoch": 0.3920212765957447, "grad_norm": 3.973501682281494, "learning_rate": 9.977958557257077e-06, "loss": 1.3911, "step": 1474 }, { "epoch": 0.39228723404255317, "grad_norm": 4.301419258117676, "learning_rate": 9.977875988731418e-06, "loss": 1.2423, "step": 1475 }, { "epoch": 0.3925531914893617, "grad_norm": 3.7840960025787354, "learning_rate": 9.977793266184416e-06, "loss": 1.1739, "step": 1476 }, { "epoch": 0.3928191489361702, "grad_norm": 3.6807820796966553, "learning_rate": 9.977710389618628e-06, "loss": 1.1685, "step": 1477 }, { "epoch": 0.3930851063829787, "grad_norm": 3.942674398422241, "learning_rate": 9.977627359036624e-06, "loss": 1.2033, "step": 1478 }, { "epoch": 0.39335106382978724, "grad_norm": 4.07774543762207, "learning_rate": 9.977544174440965e-06, "loss": 1.2707, "step": 1479 }, { "epoch": 0.39361702127659576, "grad_norm": 4.302217483520508, "learning_rate": 9.977460835834231e-06, "loss": 1.3944, "step": 1480 }, { "epoch": 0.3938829787234043, "grad_norm": 4.006019592285156, "learning_rate": 9.977377343218998e-06, "loss": 1.3301, "step": 1481 }, { "epoch": 0.3941489361702128, "grad_norm": 4.067336082458496, "learning_rate": 9.977293696597849e-06, "loss": 1.3282, "step": 1482 }, { "epoch": 0.39441489361702126, "grad_norm": 4.4912004470825195, "learning_rate": 9.977209895973374e-06, "loss": 1.374, "step": 1483 }, { "epoch": 0.3946808510638298, "grad_norm": 3.933626651763916, "learning_rate": 9.977125941348165e-06, "loss": 1.1584, "step": 1484 }, { "epoch": 0.3949468085106383, "grad_norm": 4.08411169052124, "learning_rate": 9.97704183272482e-06, "loss": 1.3587, "step": 1485 }, { "epoch": 0.3952127659574468, "grad_norm": 4.316272735595703, "learning_rate": 9.976957570105939e-06, "loss": 1.2544, "step": 1486 }, { "epoch": 0.39547872340425533, "grad_norm": 4.05543851852417, "learning_rate": 9.976873153494132e-06, "loss": 1.1699, "step": 1487 }, { "epoch": 0.39574468085106385, "grad_norm": 4.137149810791016, "learning_rate": 9.976788582892012e-06, "loss": 1.3501, "step": 1488 }, { "epoch": 0.39601063829787236, "grad_norm": 3.830085515975952, "learning_rate": 9.976703858302192e-06, "loss": 1.2818, "step": 1489 }, { "epoch": 0.3962765957446808, "grad_norm": 4.138214588165283, "learning_rate": 9.976618979727295e-06, "loss": 1.2769, "step": 1490 }, { "epoch": 0.39654255319148934, "grad_norm": 4.205438137054443, "learning_rate": 9.976533947169948e-06, "loss": 1.4103, "step": 1491 }, { "epoch": 0.39680851063829786, "grad_norm": 4.104953289031982, "learning_rate": 9.976448760632782e-06, "loss": 1.3701, "step": 1492 }, { "epoch": 0.3970744680851064, "grad_norm": 3.725175619125366, "learning_rate": 9.976363420118432e-06, "loss": 1.2986, "step": 1493 }, { "epoch": 0.3973404255319149, "grad_norm": 4.973143577575684, "learning_rate": 9.97627792562954e-06, "loss": 1.3123, "step": 1494 }, { "epoch": 0.3976063829787234, "grad_norm": 3.5973260402679443, "learning_rate": 9.976192277168748e-06, "loss": 1.1878, "step": 1495 }, { "epoch": 0.39787234042553193, "grad_norm": 3.9308860301971436, "learning_rate": 9.97610647473871e-06, "loss": 1.3139, "step": 1496 }, { "epoch": 0.39813829787234045, "grad_norm": 3.831552028656006, "learning_rate": 9.976020518342078e-06, "loss": 1.249, "step": 1497 }, { "epoch": 0.3984042553191489, "grad_norm": 3.8937809467315674, "learning_rate": 9.975934407981512e-06, "loss": 1.2361, "step": 1498 }, { "epoch": 0.39867021276595743, "grad_norm": 4.4092512130737305, "learning_rate": 9.97584814365968e-06, "loss": 1.424, "step": 1499 }, { "epoch": 0.39893617021276595, "grad_norm": 4.096745491027832, "learning_rate": 9.975761725379243e-06, "loss": 1.3488, "step": 1500 }, { "epoch": 0.39893617021276595, "eval_loss": 1.3084138631820679, "eval_runtime": 12.5754, "eval_samples_per_second": 31.808, "eval_steps_per_second": 3.976, "step": 1500 }, { "epoch": 0.39920212765957447, "grad_norm": 5.023965835571289, "learning_rate": 9.975675153142884e-06, "loss": 1.3409, "step": 1501 }, { "epoch": 0.399468085106383, "grad_norm": 4.182278156280518, "learning_rate": 9.975588426953276e-06, "loss": 1.2497, "step": 1502 }, { "epoch": 0.3997340425531915, "grad_norm": 3.872786283493042, "learning_rate": 9.975501546813104e-06, "loss": 1.29, "step": 1503 }, { "epoch": 0.4, "grad_norm": 3.9527881145477295, "learning_rate": 9.975414512725058e-06, "loss": 1.3427, "step": 1504 }, { "epoch": 0.4002659574468085, "grad_norm": 3.563168525695801, "learning_rate": 9.975327324691828e-06, "loss": 1.2509, "step": 1505 }, { "epoch": 0.400531914893617, "grad_norm": 3.8460729122161865, "learning_rate": 9.975239982716113e-06, "loss": 1.214, "step": 1506 }, { "epoch": 0.4007978723404255, "grad_norm": 4.321569442749023, "learning_rate": 9.975152486800615e-06, "loss": 1.1959, "step": 1507 }, { "epoch": 0.40106382978723404, "grad_norm": 4.102901935577393, "learning_rate": 9.975064836948041e-06, "loss": 1.2786, "step": 1508 }, { "epoch": 0.40132978723404256, "grad_norm": 3.8385143280029297, "learning_rate": 9.974977033161103e-06, "loss": 1.3574, "step": 1509 }, { "epoch": 0.4015957446808511, "grad_norm": 3.912363290786743, "learning_rate": 9.97488907544252e-06, "loss": 1.388, "step": 1510 }, { "epoch": 0.4018617021276596, "grad_norm": 4.346206188201904, "learning_rate": 9.974800963795012e-06, "loss": 1.4532, "step": 1511 }, { "epoch": 0.4021276595744681, "grad_norm": 4.346587657928467, "learning_rate": 9.974712698221306e-06, "loss": 1.2098, "step": 1512 }, { "epoch": 0.4023936170212766, "grad_norm": 3.9622318744659424, "learning_rate": 9.97462427872413e-06, "loss": 1.1556, "step": 1513 }, { "epoch": 0.4026595744680851, "grad_norm": 3.903508186340332, "learning_rate": 9.974535705306222e-06, "loss": 1.1644, "step": 1514 }, { "epoch": 0.4029255319148936, "grad_norm": 4.4463605880737305, "learning_rate": 9.974446977970322e-06, "loss": 1.4892, "step": 1515 }, { "epoch": 0.4031914893617021, "grad_norm": 3.8401832580566406, "learning_rate": 9.974358096719178e-06, "loss": 1.3681, "step": 1516 }, { "epoch": 0.40345744680851064, "grad_norm": 4.009060382843018, "learning_rate": 9.974269061555537e-06, "loss": 1.2134, "step": 1517 }, { "epoch": 0.40372340425531916, "grad_norm": 3.609969139099121, "learning_rate": 9.974179872482153e-06, "loss": 1.34, "step": 1518 }, { "epoch": 0.4039893617021277, "grad_norm": 4.289672374725342, "learning_rate": 9.97409052950179e-06, "loss": 1.4246, "step": 1519 }, { "epoch": 0.40425531914893614, "grad_norm": 3.6479434967041016, "learning_rate": 9.974001032617208e-06, "loss": 1.2366, "step": 1520 }, { "epoch": 0.40452127659574466, "grad_norm": 4.251558780670166, "learning_rate": 9.973911381831178e-06, "loss": 1.3208, "step": 1521 }, { "epoch": 0.4047872340425532, "grad_norm": 3.7560923099517822, "learning_rate": 9.973821577146475e-06, "loss": 1.2298, "step": 1522 }, { "epoch": 0.4050531914893617, "grad_norm": 3.9338622093200684, "learning_rate": 9.973731618565876e-06, "loss": 1.34, "step": 1523 }, { "epoch": 0.4053191489361702, "grad_norm": 3.8561365604400635, "learning_rate": 9.973641506092165e-06, "loss": 1.4198, "step": 1524 }, { "epoch": 0.40558510638297873, "grad_norm": 3.7590527534484863, "learning_rate": 9.973551239728129e-06, "loss": 1.3644, "step": 1525 }, { "epoch": 0.40585106382978725, "grad_norm": 4.470832824707031, "learning_rate": 9.973460819476562e-06, "loss": 1.3641, "step": 1526 }, { "epoch": 0.40611702127659577, "grad_norm": 3.5494723320007324, "learning_rate": 9.973370245340264e-06, "loss": 1.2552, "step": 1527 }, { "epoch": 0.40638297872340423, "grad_norm": 4.204685211181641, "learning_rate": 9.973279517322033e-06, "loss": 1.3577, "step": 1528 }, { "epoch": 0.40664893617021275, "grad_norm": 4.775966167449951, "learning_rate": 9.97318863542468e-06, "loss": 1.4342, "step": 1529 }, { "epoch": 0.40691489361702127, "grad_norm": 4.2795729637146, "learning_rate": 9.973097599651013e-06, "loss": 1.3033, "step": 1530 }, { "epoch": 0.4071808510638298, "grad_norm": 4.110699653625488, "learning_rate": 9.973006410003853e-06, "loss": 1.3463, "step": 1531 }, { "epoch": 0.4074468085106383, "grad_norm": 3.8819406032562256, "learning_rate": 9.97291506648602e-06, "loss": 1.1908, "step": 1532 }, { "epoch": 0.4077127659574468, "grad_norm": 4.164956092834473, "learning_rate": 9.972823569100338e-06, "loss": 1.2573, "step": 1533 }, { "epoch": 0.40797872340425534, "grad_norm": 3.9775986671447754, "learning_rate": 9.97273191784964e-06, "loss": 1.2141, "step": 1534 }, { "epoch": 0.40824468085106386, "grad_norm": 4.500059604644775, "learning_rate": 9.972640112736764e-06, "loss": 1.3342, "step": 1535 }, { "epoch": 0.4085106382978723, "grad_norm": 4.081606864929199, "learning_rate": 9.972548153764547e-06, "loss": 1.2027, "step": 1536 }, { "epoch": 0.40877659574468084, "grad_norm": 4.272010803222656, "learning_rate": 9.972456040935838e-06, "loss": 1.2332, "step": 1537 }, { "epoch": 0.40904255319148936, "grad_norm": 4.042487144470215, "learning_rate": 9.972363774253481e-06, "loss": 1.1932, "step": 1538 }, { "epoch": 0.4093085106382979, "grad_norm": 3.9628350734710693, "learning_rate": 9.972271353720337e-06, "loss": 1.2636, "step": 1539 }, { "epoch": 0.4095744680851064, "grad_norm": 4.018553256988525, "learning_rate": 9.972178779339264e-06, "loss": 1.2822, "step": 1540 }, { "epoch": 0.4098404255319149, "grad_norm": 4.054775714874268, "learning_rate": 9.972086051113123e-06, "loss": 1.3419, "step": 1541 }, { "epoch": 0.4101063829787234, "grad_norm": 4.035485744476318, "learning_rate": 9.971993169044787e-06, "loss": 1.2586, "step": 1542 }, { "epoch": 0.4103723404255319, "grad_norm": 4.139084815979004, "learning_rate": 9.971900133137128e-06, "loss": 1.3533, "step": 1543 }, { "epoch": 0.4106382978723404, "grad_norm": 3.9709324836730957, "learning_rate": 9.971806943393026e-06, "loss": 1.1807, "step": 1544 }, { "epoch": 0.4109042553191489, "grad_norm": 3.836603879928589, "learning_rate": 9.971713599815364e-06, "loss": 1.2364, "step": 1545 }, { "epoch": 0.41117021276595744, "grad_norm": 3.484250068664551, "learning_rate": 9.97162010240703e-06, "loss": 1.2536, "step": 1546 }, { "epoch": 0.41143617021276596, "grad_norm": 4.203670978546143, "learning_rate": 9.971526451170914e-06, "loss": 1.2339, "step": 1547 }, { "epoch": 0.4117021276595745, "grad_norm": 3.7969377040863037, "learning_rate": 9.971432646109919e-06, "loss": 1.4205, "step": 1548 }, { "epoch": 0.411968085106383, "grad_norm": 3.9421546459198, "learning_rate": 9.971338687226944e-06, "loss": 1.2441, "step": 1549 }, { "epoch": 0.4122340425531915, "grad_norm": 3.8566412925720215, "learning_rate": 9.971244574524897e-06, "loss": 1.3148, "step": 1550 }, { "epoch": 0.4125, "grad_norm": 3.6699059009552, "learning_rate": 9.971150308006689e-06, "loss": 1.1396, "step": 1551 }, { "epoch": 0.4127659574468085, "grad_norm": 4.328299522399902, "learning_rate": 9.971055887675238e-06, "loss": 1.4105, "step": 1552 }, { "epoch": 0.413031914893617, "grad_norm": 3.6258397102355957, "learning_rate": 9.970961313533465e-06, "loss": 1.2399, "step": 1553 }, { "epoch": 0.41329787234042553, "grad_norm": 4.217952251434326, "learning_rate": 9.970866585584298e-06, "loss": 1.2643, "step": 1554 }, { "epoch": 0.41356382978723405, "grad_norm": 3.8410286903381348, "learning_rate": 9.970771703830666e-06, "loss": 1.3982, "step": 1555 }, { "epoch": 0.41382978723404257, "grad_norm": 4.1184234619140625, "learning_rate": 9.970676668275504e-06, "loss": 1.3206, "step": 1556 }, { "epoch": 0.4140957446808511, "grad_norm": 3.805264472961426, "learning_rate": 9.970581478921755e-06, "loss": 1.3301, "step": 1557 }, { "epoch": 0.41436170212765955, "grad_norm": 3.7191929817199707, "learning_rate": 9.970486135772362e-06, "loss": 1.3443, "step": 1558 }, { "epoch": 0.41462765957446807, "grad_norm": 3.7962100505828857, "learning_rate": 9.970390638830275e-06, "loss": 1.1145, "step": 1559 }, { "epoch": 0.4148936170212766, "grad_norm": 3.8480000495910645, "learning_rate": 9.970294988098452e-06, "loss": 1.303, "step": 1560 }, { "epoch": 0.4151595744680851, "grad_norm": 4.154008388519287, "learning_rate": 9.970199183579847e-06, "loss": 1.2505, "step": 1561 }, { "epoch": 0.4154255319148936, "grad_norm": 3.6945624351501465, "learning_rate": 9.97010322527743e-06, "loss": 1.2318, "step": 1562 }, { "epoch": 0.41569148936170214, "grad_norm": 4.145558834075928, "learning_rate": 9.970007113194168e-06, "loss": 1.2855, "step": 1563 }, { "epoch": 0.41595744680851066, "grad_norm": 4.037220001220703, "learning_rate": 9.969910847333032e-06, "loss": 1.2599, "step": 1564 }, { "epoch": 0.4162234042553192, "grad_norm": 4.070208549499512, "learning_rate": 9.969814427697007e-06, "loss": 1.3002, "step": 1565 }, { "epoch": 0.41648936170212764, "grad_norm": 4.0794548988342285, "learning_rate": 9.969717854289069e-06, "loss": 1.3807, "step": 1566 }, { "epoch": 0.41675531914893615, "grad_norm": 3.9017162322998047, "learning_rate": 9.969621127112211e-06, "loss": 1.1982, "step": 1567 }, { "epoch": 0.41702127659574467, "grad_norm": 4.089752674102783, "learning_rate": 9.969524246169424e-06, "loss": 1.2734, "step": 1568 }, { "epoch": 0.4172872340425532, "grad_norm": 3.7550644874572754, "learning_rate": 9.969427211463705e-06, "loss": 1.2207, "step": 1569 }, { "epoch": 0.4175531914893617, "grad_norm": 3.9977076053619385, "learning_rate": 9.969330022998057e-06, "loss": 1.3695, "step": 1570 }, { "epoch": 0.4178191489361702, "grad_norm": 4.422798156738281, "learning_rate": 9.969232680775491e-06, "loss": 1.3292, "step": 1571 }, { "epoch": 0.41808510638297874, "grad_norm": 4.122771263122559, "learning_rate": 9.969135184799013e-06, "loss": 1.3753, "step": 1572 }, { "epoch": 0.4183510638297872, "grad_norm": 3.827120542526245, "learning_rate": 9.969037535071641e-06, "loss": 1.2738, "step": 1573 }, { "epoch": 0.4186170212765957, "grad_norm": 3.823761463165283, "learning_rate": 9.968939731596399e-06, "loss": 1.2201, "step": 1574 }, { "epoch": 0.41888297872340424, "grad_norm": 4.0475616455078125, "learning_rate": 9.96884177437631e-06, "loss": 1.3511, "step": 1575 }, { "epoch": 0.41914893617021276, "grad_norm": 4.167337894439697, "learning_rate": 9.968743663414408e-06, "loss": 1.3725, "step": 1576 }, { "epoch": 0.4194148936170213, "grad_norm": 4.683474063873291, "learning_rate": 9.968645398713726e-06, "loss": 1.3719, "step": 1577 }, { "epoch": 0.4196808510638298, "grad_norm": 4.450965881347656, "learning_rate": 9.968546980277305e-06, "loss": 1.2847, "step": 1578 }, { "epoch": 0.4199468085106383, "grad_norm": 4.25331449508667, "learning_rate": 9.968448408108191e-06, "loss": 1.4151, "step": 1579 }, { "epoch": 0.42021276595744683, "grad_norm": 4.090495586395264, "learning_rate": 9.968349682209434e-06, "loss": 1.2518, "step": 1580 }, { "epoch": 0.4204787234042553, "grad_norm": 4.116806507110596, "learning_rate": 9.96825080258409e-06, "loss": 1.3986, "step": 1581 }, { "epoch": 0.4207446808510638, "grad_norm": 4.016780376434326, "learning_rate": 9.968151769235216e-06, "loss": 1.2488, "step": 1582 }, { "epoch": 0.42101063829787233, "grad_norm": 4.153627872467041, "learning_rate": 9.968052582165874e-06, "loss": 1.3459, "step": 1583 }, { "epoch": 0.42127659574468085, "grad_norm": 4.0243048667907715, "learning_rate": 9.96795324137914e-06, "loss": 1.2554, "step": 1584 }, { "epoch": 0.42154255319148937, "grad_norm": 4.162500381469727, "learning_rate": 9.96785374687808e-06, "loss": 1.3597, "step": 1585 }, { "epoch": 0.4218085106382979, "grad_norm": 3.8271100521087646, "learning_rate": 9.967754098665778e-06, "loss": 1.2375, "step": 1586 }, { "epoch": 0.4220744680851064, "grad_norm": 3.73313045501709, "learning_rate": 9.967654296745317e-06, "loss": 1.1394, "step": 1587 }, { "epoch": 0.4223404255319149, "grad_norm": 4.17546272277832, "learning_rate": 9.96755434111978e-06, "loss": 1.3004, "step": 1588 }, { "epoch": 0.4226063829787234, "grad_norm": 3.7987289428710938, "learning_rate": 9.967454231792267e-06, "loss": 1.2551, "step": 1589 }, { "epoch": 0.4228723404255319, "grad_norm": 4.171220779418945, "learning_rate": 9.967353968765868e-06, "loss": 1.2722, "step": 1590 }, { "epoch": 0.4231382978723404, "grad_norm": 4.090373516082764, "learning_rate": 9.96725355204369e-06, "loss": 1.2963, "step": 1591 }, { "epoch": 0.42340425531914894, "grad_norm": 4.222188949584961, "learning_rate": 9.967152981628841e-06, "loss": 1.1075, "step": 1592 }, { "epoch": 0.42367021276595745, "grad_norm": 3.9014172554016113, "learning_rate": 9.967052257524428e-06, "loss": 1.251, "step": 1593 }, { "epoch": 0.423936170212766, "grad_norm": 4.0223870277404785, "learning_rate": 9.966951379733572e-06, "loss": 1.1924, "step": 1594 }, { "epoch": 0.4242021276595745, "grad_norm": 3.724557876586914, "learning_rate": 9.96685034825939e-06, "loss": 1.206, "step": 1595 }, { "epoch": 0.42446808510638295, "grad_norm": 4.103020191192627, "learning_rate": 9.966749163105011e-06, "loss": 1.374, "step": 1596 }, { "epoch": 0.42473404255319147, "grad_norm": 3.997119188308716, "learning_rate": 9.966647824273567e-06, "loss": 1.2097, "step": 1597 }, { "epoch": 0.425, "grad_norm": 4.226285934448242, "learning_rate": 9.966546331768192e-06, "loss": 1.3387, "step": 1598 }, { "epoch": 0.4252659574468085, "grad_norm": 4.060708999633789, "learning_rate": 9.966444685592025e-06, "loss": 1.2762, "step": 1599 }, { "epoch": 0.425531914893617, "grad_norm": 4.005706787109375, "learning_rate": 9.966342885748212e-06, "loss": 1.2845, "step": 1600 }, { "epoch": 0.42579787234042554, "grad_norm": 4.201882839202881, "learning_rate": 9.966240932239904e-06, "loss": 1.2953, "step": 1601 }, { "epoch": 0.42606382978723406, "grad_norm": 3.7558727264404297, "learning_rate": 9.966138825070254e-06, "loss": 1.2806, "step": 1602 }, { "epoch": 0.4263297872340426, "grad_norm": 3.9751381874084473, "learning_rate": 9.96603656424242e-06, "loss": 1.2354, "step": 1603 }, { "epoch": 0.42659574468085104, "grad_norm": 3.775033712387085, "learning_rate": 9.96593414975957e-06, "loss": 1.2592, "step": 1604 }, { "epoch": 0.42686170212765956, "grad_norm": 4.114045143127441, "learning_rate": 9.965831581624872e-06, "loss": 1.1019, "step": 1605 }, { "epoch": 0.4271276595744681, "grad_norm": 3.6853203773498535, "learning_rate": 9.965728859841497e-06, "loss": 1.356, "step": 1606 }, { "epoch": 0.4273936170212766, "grad_norm": 3.8778109550476074, "learning_rate": 9.965625984412623e-06, "loss": 1.2266, "step": 1607 }, { "epoch": 0.4276595744680851, "grad_norm": 3.860879421234131, "learning_rate": 9.965522955341437e-06, "loss": 1.2998, "step": 1608 }, { "epoch": 0.42792553191489363, "grad_norm": 3.7324464321136475, "learning_rate": 9.965419772631125e-06, "loss": 1.3103, "step": 1609 }, { "epoch": 0.42819148936170215, "grad_norm": 3.8030385971069336, "learning_rate": 9.965316436284877e-06, "loss": 1.2967, "step": 1610 }, { "epoch": 0.4284574468085106, "grad_norm": 4.376537322998047, "learning_rate": 9.965212946305893e-06, "loss": 1.4258, "step": 1611 }, { "epoch": 0.42872340425531913, "grad_norm": 4.365556716918945, "learning_rate": 9.965109302697376e-06, "loss": 1.3794, "step": 1612 }, { "epoch": 0.42898936170212765, "grad_norm": 4.431367874145508, "learning_rate": 9.96500550546253e-06, "loss": 1.2973, "step": 1613 }, { "epoch": 0.42925531914893617, "grad_norm": 4.084920406341553, "learning_rate": 9.96490155460457e-06, "loss": 1.2417, "step": 1614 }, { "epoch": 0.4295212765957447, "grad_norm": 3.6877284049987793, "learning_rate": 9.964797450126708e-06, "loss": 1.2577, "step": 1615 }, { "epoch": 0.4297872340425532, "grad_norm": 4.147090911865234, "learning_rate": 9.964693192032168e-06, "loss": 1.3127, "step": 1616 }, { "epoch": 0.4300531914893617, "grad_norm": 3.9144530296325684, "learning_rate": 9.964588780324176e-06, "loss": 1.2333, "step": 1617 }, { "epoch": 0.43031914893617024, "grad_norm": 3.9510538578033447, "learning_rate": 9.964484215005963e-06, "loss": 1.2541, "step": 1618 }, { "epoch": 0.4305851063829787, "grad_norm": 4.1784892082214355, "learning_rate": 9.964379496080763e-06, "loss": 1.3247, "step": 1619 }, { "epoch": 0.4308510638297872, "grad_norm": 3.9380571842193604, "learning_rate": 9.964274623551814e-06, "loss": 1.3042, "step": 1620 }, { "epoch": 0.43111702127659574, "grad_norm": 3.6729469299316406, "learning_rate": 9.964169597422367e-06, "loss": 1.2064, "step": 1621 }, { "epoch": 0.43138297872340425, "grad_norm": 4.168332576751709, "learning_rate": 9.964064417695666e-06, "loss": 1.2936, "step": 1622 }, { "epoch": 0.43164893617021277, "grad_norm": 3.7848429679870605, "learning_rate": 9.963959084374969e-06, "loss": 1.3055, "step": 1623 }, { "epoch": 0.4319148936170213, "grad_norm": 3.760188579559326, "learning_rate": 9.963853597463533e-06, "loss": 1.2085, "step": 1624 }, { "epoch": 0.4321808510638298, "grad_norm": 3.734712839126587, "learning_rate": 9.963747956964623e-06, "loss": 1.1788, "step": 1625 }, { "epoch": 0.4324468085106383, "grad_norm": 4.398496627807617, "learning_rate": 9.963642162881506e-06, "loss": 1.1853, "step": 1626 }, { "epoch": 0.4327127659574468, "grad_norm": 4.267323970794678, "learning_rate": 9.963536215217457e-06, "loss": 1.2317, "step": 1627 }, { "epoch": 0.4329787234042553, "grad_norm": 4.306065082550049, "learning_rate": 9.963430113975753e-06, "loss": 1.5309, "step": 1628 }, { "epoch": 0.4332446808510638, "grad_norm": 3.862356424331665, "learning_rate": 9.963323859159679e-06, "loss": 1.2449, "step": 1629 }, { "epoch": 0.43351063829787234, "grad_norm": 3.6479053497314453, "learning_rate": 9.96321745077252e-06, "loss": 1.1502, "step": 1630 }, { "epoch": 0.43377659574468086, "grad_norm": 3.702998399734497, "learning_rate": 9.963110888817569e-06, "loss": 1.1776, "step": 1631 }, { "epoch": 0.4340425531914894, "grad_norm": 4.183767795562744, "learning_rate": 9.963004173298125e-06, "loss": 1.2266, "step": 1632 }, { "epoch": 0.4343085106382979, "grad_norm": 3.9834625720977783, "learning_rate": 9.96289730421749e-06, "loss": 1.222, "step": 1633 }, { "epoch": 0.43457446808510636, "grad_norm": 3.971428871154785, "learning_rate": 9.962790281578966e-06, "loss": 1.3843, "step": 1634 }, { "epoch": 0.4348404255319149, "grad_norm": 3.833468437194824, "learning_rate": 9.96268310538587e-06, "loss": 1.3268, "step": 1635 }, { "epoch": 0.4351063829787234, "grad_norm": 3.7899720668792725, "learning_rate": 9.962575775641516e-06, "loss": 1.2939, "step": 1636 }, { "epoch": 0.4353723404255319, "grad_norm": 3.8362271785736084, "learning_rate": 9.962468292349223e-06, "loss": 1.2681, "step": 1637 }, { "epoch": 0.43563829787234043, "grad_norm": 3.884549140930176, "learning_rate": 9.96236065551232e-06, "loss": 1.267, "step": 1638 }, { "epoch": 0.43590425531914895, "grad_norm": 3.975801944732666, "learning_rate": 9.962252865134136e-06, "loss": 1.3039, "step": 1639 }, { "epoch": 0.43617021276595747, "grad_norm": 4.278522491455078, "learning_rate": 9.962144921218005e-06, "loss": 1.3885, "step": 1640 }, { "epoch": 0.436436170212766, "grad_norm": 3.9850552082061768, "learning_rate": 9.962036823767269e-06, "loss": 1.2586, "step": 1641 }, { "epoch": 0.43670212765957445, "grad_norm": 4.315723419189453, "learning_rate": 9.961928572785272e-06, "loss": 1.3281, "step": 1642 }, { "epoch": 0.43696808510638296, "grad_norm": 3.7114546298980713, "learning_rate": 9.96182016827536e-06, "loss": 1.1813, "step": 1643 }, { "epoch": 0.4372340425531915, "grad_norm": 4.079943656921387, "learning_rate": 9.961711610240892e-06, "loss": 1.2878, "step": 1644 }, { "epoch": 0.4375, "grad_norm": 3.7427685260772705, "learning_rate": 9.961602898685225e-06, "loss": 1.3068, "step": 1645 }, { "epoch": 0.4377659574468085, "grad_norm": 4.234682083129883, "learning_rate": 9.961494033611726e-06, "loss": 1.4143, "step": 1646 }, { "epoch": 0.43803191489361704, "grad_norm": 3.7043113708496094, "learning_rate": 9.961385015023755e-06, "loss": 1.356, "step": 1647 }, { "epoch": 0.43829787234042555, "grad_norm": 3.9575397968292236, "learning_rate": 9.961275842924694e-06, "loss": 1.3257, "step": 1648 }, { "epoch": 0.438563829787234, "grad_norm": 4.285686016082764, "learning_rate": 9.961166517317914e-06, "loss": 1.2934, "step": 1649 }, { "epoch": 0.43882978723404253, "grad_norm": 4.141624927520752, "learning_rate": 9.961057038206804e-06, "loss": 1.1941, "step": 1650 }, { "epoch": 0.43909574468085105, "grad_norm": 3.7219042778015137, "learning_rate": 9.960947405594747e-06, "loss": 1.309, "step": 1651 }, { "epoch": 0.43936170212765957, "grad_norm": 4.113218307495117, "learning_rate": 9.960837619485136e-06, "loss": 1.2331, "step": 1652 }, { "epoch": 0.4396276595744681, "grad_norm": 4.069479465484619, "learning_rate": 9.96072767988137e-06, "loss": 1.1383, "step": 1653 }, { "epoch": 0.4398936170212766, "grad_norm": 3.974097967147827, "learning_rate": 9.960617586786847e-06, "loss": 1.2015, "step": 1654 }, { "epoch": 0.4401595744680851, "grad_norm": 3.991530656814575, "learning_rate": 9.960507340204977e-06, "loss": 1.254, "step": 1655 }, { "epoch": 0.44042553191489364, "grad_norm": 4.121614933013916, "learning_rate": 9.960396940139169e-06, "loss": 1.4372, "step": 1656 }, { "epoch": 0.4406914893617021, "grad_norm": 4.809171676635742, "learning_rate": 9.960286386592839e-06, "loss": 1.1771, "step": 1657 }, { "epoch": 0.4409574468085106, "grad_norm": 3.7910423278808594, "learning_rate": 9.960175679569409e-06, "loss": 1.4103, "step": 1658 }, { "epoch": 0.44122340425531914, "grad_norm": 3.5597236156463623, "learning_rate": 9.960064819072305e-06, "loss": 1.2461, "step": 1659 }, { "epoch": 0.44148936170212766, "grad_norm": 4.393692493438721, "learning_rate": 9.959953805104953e-06, "loss": 1.3746, "step": 1660 }, { "epoch": 0.4417553191489362, "grad_norm": 4.309146881103516, "learning_rate": 9.959842637670791e-06, "loss": 1.2619, "step": 1661 }, { "epoch": 0.4420212765957447, "grad_norm": 4.537207126617432, "learning_rate": 9.95973131677326e-06, "loss": 1.2895, "step": 1662 }, { "epoch": 0.4422872340425532, "grad_norm": 4.204534530639648, "learning_rate": 9.959619842415802e-06, "loss": 1.2458, "step": 1663 }, { "epoch": 0.4425531914893617, "grad_norm": 3.859935998916626, "learning_rate": 9.959508214601866e-06, "loss": 1.2334, "step": 1664 }, { "epoch": 0.4428191489361702, "grad_norm": 4.042413711547852, "learning_rate": 9.959396433334907e-06, "loss": 1.451, "step": 1665 }, { "epoch": 0.4430851063829787, "grad_norm": 4.226952075958252, "learning_rate": 9.959284498618385e-06, "loss": 1.3204, "step": 1666 }, { "epoch": 0.44335106382978723, "grad_norm": 4.049594402313232, "learning_rate": 9.95917241045576e-06, "loss": 1.3671, "step": 1667 }, { "epoch": 0.44361702127659575, "grad_norm": 3.731627941131592, "learning_rate": 9.959060168850504e-06, "loss": 1.289, "step": 1668 }, { "epoch": 0.44388297872340426, "grad_norm": 4.097120761871338, "learning_rate": 9.958947773806084e-06, "loss": 1.2126, "step": 1669 }, { "epoch": 0.4441489361702128, "grad_norm": 4.148438930511475, "learning_rate": 9.958835225325984e-06, "loss": 1.1967, "step": 1670 }, { "epoch": 0.4444148936170213, "grad_norm": 3.9843711853027344, "learning_rate": 9.958722523413685e-06, "loss": 1.3463, "step": 1671 }, { "epoch": 0.44468085106382976, "grad_norm": 4.3066630363464355, "learning_rate": 9.958609668072673e-06, "loss": 1.4344, "step": 1672 }, { "epoch": 0.4449468085106383, "grad_norm": 3.673088550567627, "learning_rate": 9.958496659306436e-06, "loss": 1.3849, "step": 1673 }, { "epoch": 0.4452127659574468, "grad_norm": 4.2683210372924805, "learning_rate": 9.958383497118478e-06, "loss": 1.3148, "step": 1674 }, { "epoch": 0.4454787234042553, "grad_norm": 3.677374839782715, "learning_rate": 9.958270181512295e-06, "loss": 1.1148, "step": 1675 }, { "epoch": 0.44574468085106383, "grad_norm": 4.075168132781982, "learning_rate": 9.958156712491396e-06, "loss": 1.4016, "step": 1676 }, { "epoch": 0.44601063829787235, "grad_norm": 4.137705326080322, "learning_rate": 9.95804309005929e-06, "loss": 1.3865, "step": 1677 }, { "epoch": 0.44627659574468087, "grad_norm": 3.7367939949035645, "learning_rate": 9.957929314219494e-06, "loss": 1.3304, "step": 1678 }, { "epoch": 0.4465425531914894, "grad_norm": 3.8000895977020264, "learning_rate": 9.957815384975528e-06, "loss": 1.4171, "step": 1679 }, { "epoch": 0.44680851063829785, "grad_norm": 3.774846315383911, "learning_rate": 9.957701302330915e-06, "loss": 1.0019, "step": 1680 }, { "epoch": 0.44707446808510637, "grad_norm": 3.7514147758483887, "learning_rate": 9.957587066289189e-06, "loss": 1.0711, "step": 1681 }, { "epoch": 0.4473404255319149, "grad_norm": 4.298345565795898, "learning_rate": 9.957472676853882e-06, "loss": 1.2902, "step": 1682 }, { "epoch": 0.4476063829787234, "grad_norm": 3.632465362548828, "learning_rate": 9.957358134028535e-06, "loss": 1.1969, "step": 1683 }, { "epoch": 0.4478723404255319, "grad_norm": 3.680661201477051, "learning_rate": 9.957243437816688e-06, "loss": 1.2266, "step": 1684 }, { "epoch": 0.44813829787234044, "grad_norm": 3.757211208343506, "learning_rate": 9.957128588221895e-06, "loss": 1.2374, "step": 1685 }, { "epoch": 0.44840425531914896, "grad_norm": 3.93074107170105, "learning_rate": 9.957013585247703e-06, "loss": 1.2285, "step": 1686 }, { "epoch": 0.4486702127659574, "grad_norm": 4.218538284301758, "learning_rate": 9.95689842889768e-06, "loss": 1.1887, "step": 1687 }, { "epoch": 0.44893617021276594, "grad_norm": 4.04231595993042, "learning_rate": 9.95678311917538e-06, "loss": 1.3696, "step": 1688 }, { "epoch": 0.44920212765957446, "grad_norm": 3.7490601539611816, "learning_rate": 9.956667656084376e-06, "loss": 1.2857, "step": 1689 }, { "epoch": 0.449468085106383, "grad_norm": 3.642409324645996, "learning_rate": 9.956552039628237e-06, "loss": 1.1536, "step": 1690 }, { "epoch": 0.4497340425531915, "grad_norm": 4.070724964141846, "learning_rate": 9.956436269810543e-06, "loss": 1.3129, "step": 1691 }, { "epoch": 0.45, "grad_norm": 3.6677682399749756, "learning_rate": 9.956320346634877e-06, "loss": 1.2578, "step": 1692 }, { "epoch": 0.45026595744680853, "grad_norm": 3.783087730407715, "learning_rate": 9.956204270104823e-06, "loss": 1.2943, "step": 1693 }, { "epoch": 0.45053191489361705, "grad_norm": 4.206989765167236, "learning_rate": 9.956088040223975e-06, "loss": 1.4913, "step": 1694 }, { "epoch": 0.4507978723404255, "grad_norm": 4.3370819091796875, "learning_rate": 9.955971656995927e-06, "loss": 1.1996, "step": 1695 }, { "epoch": 0.451063829787234, "grad_norm": 3.9697062969207764, "learning_rate": 9.95585512042428e-06, "loss": 1.253, "step": 1696 }, { "epoch": 0.45132978723404255, "grad_norm": 3.6939969062805176, "learning_rate": 9.95573843051264e-06, "loss": 1.1627, "step": 1697 }, { "epoch": 0.45159574468085106, "grad_norm": 4.0041351318359375, "learning_rate": 9.955621587264621e-06, "loss": 1.2185, "step": 1698 }, { "epoch": 0.4518617021276596, "grad_norm": 4.0276079177856445, "learning_rate": 9.955504590683834e-06, "loss": 1.2071, "step": 1699 }, { "epoch": 0.4521276595744681, "grad_norm": 4.058544158935547, "learning_rate": 9.955387440773902e-06, "loss": 1.2284, "step": 1700 }, { "epoch": 0.4523936170212766, "grad_norm": 3.8239941596984863, "learning_rate": 9.955270137538446e-06, "loss": 1.3371, "step": 1701 }, { "epoch": 0.4526595744680851, "grad_norm": 4.147292613983154, "learning_rate": 9.955152680981099e-06, "loss": 1.3542, "step": 1702 }, { "epoch": 0.4529255319148936, "grad_norm": 3.7271342277526855, "learning_rate": 9.955035071105495e-06, "loss": 1.0038, "step": 1703 }, { "epoch": 0.4531914893617021, "grad_norm": 4.002806663513184, "learning_rate": 9.954917307915272e-06, "loss": 1.3361, "step": 1704 }, { "epoch": 0.45345744680851063, "grad_norm": 3.8606765270233154, "learning_rate": 9.954799391414073e-06, "loss": 1.2703, "step": 1705 }, { "epoch": 0.45372340425531915, "grad_norm": 4.117914199829102, "learning_rate": 9.954681321605546e-06, "loss": 1.4262, "step": 1706 }, { "epoch": 0.45398936170212767, "grad_norm": 3.956178903579712, "learning_rate": 9.954563098493349e-06, "loss": 1.2889, "step": 1707 }, { "epoch": 0.4542553191489362, "grad_norm": 3.8659157752990723, "learning_rate": 9.954444722081133e-06, "loss": 1.2892, "step": 1708 }, { "epoch": 0.4545212765957447, "grad_norm": 3.936624765396118, "learning_rate": 9.954326192372565e-06, "loss": 1.5031, "step": 1709 }, { "epoch": 0.45478723404255317, "grad_norm": 3.8671083450317383, "learning_rate": 9.954207509371313e-06, "loss": 1.3221, "step": 1710 }, { "epoch": 0.4550531914893617, "grad_norm": 4.292788505554199, "learning_rate": 9.954088673081048e-06, "loss": 1.3216, "step": 1711 }, { "epoch": 0.4553191489361702, "grad_norm": 3.8020899295806885, "learning_rate": 9.953969683505444e-06, "loss": 1.2248, "step": 1712 }, { "epoch": 0.4555851063829787, "grad_norm": 4.227027893066406, "learning_rate": 9.953850540648189e-06, "loss": 1.2624, "step": 1713 }, { "epoch": 0.45585106382978724, "grad_norm": 4.067933559417725, "learning_rate": 9.953731244512963e-06, "loss": 1.2756, "step": 1714 }, { "epoch": 0.45611702127659576, "grad_norm": 3.9916749000549316, "learning_rate": 9.953611795103462e-06, "loss": 1.2651, "step": 1715 }, { "epoch": 0.4563829787234043, "grad_norm": 4.110116004943848, "learning_rate": 9.953492192423379e-06, "loss": 1.3669, "step": 1716 }, { "epoch": 0.4566489361702128, "grad_norm": 4.194306373596191, "learning_rate": 9.953372436476414e-06, "loss": 1.534, "step": 1717 }, { "epoch": 0.45691489361702126, "grad_norm": 3.9467716217041016, "learning_rate": 9.953252527266275e-06, "loss": 1.2748, "step": 1718 }, { "epoch": 0.4571808510638298, "grad_norm": 4.1253886222839355, "learning_rate": 9.953132464796674e-06, "loss": 1.2625, "step": 1719 }, { "epoch": 0.4574468085106383, "grad_norm": 4.45941162109375, "learning_rate": 9.95301224907132e-06, "loss": 1.3565, "step": 1720 }, { "epoch": 0.4577127659574468, "grad_norm": 4.033083915710449, "learning_rate": 9.952891880093935e-06, "loss": 1.2789, "step": 1721 }, { "epoch": 0.45797872340425533, "grad_norm": 4.035634517669678, "learning_rate": 9.952771357868245e-06, "loss": 1.2641, "step": 1722 }, { "epoch": 0.45824468085106385, "grad_norm": 3.722550630569458, "learning_rate": 9.952650682397978e-06, "loss": 1.3316, "step": 1723 }, { "epoch": 0.45851063829787236, "grad_norm": 3.8771049976348877, "learning_rate": 9.952529853686868e-06, "loss": 1.3889, "step": 1724 }, { "epoch": 0.4587765957446808, "grad_norm": 4.175072193145752, "learning_rate": 9.952408871738652e-06, "loss": 1.3766, "step": 1725 }, { "epoch": 0.45904255319148934, "grad_norm": 3.859618902206421, "learning_rate": 9.952287736557078e-06, "loss": 1.1251, "step": 1726 }, { "epoch": 0.45930851063829786, "grad_norm": 4.060375213623047, "learning_rate": 9.952166448145887e-06, "loss": 1.2308, "step": 1727 }, { "epoch": 0.4595744680851064, "grad_norm": 3.9827208518981934, "learning_rate": 9.952045006508839e-06, "loss": 1.2434, "step": 1728 }, { "epoch": 0.4598404255319149, "grad_norm": 3.8347811698913574, "learning_rate": 9.951923411649686e-06, "loss": 1.1165, "step": 1729 }, { "epoch": 0.4601063829787234, "grad_norm": 3.8551104068756104, "learning_rate": 9.951801663572194e-06, "loss": 1.2536, "step": 1730 }, { "epoch": 0.46037234042553193, "grad_norm": 4.300414562225342, "learning_rate": 9.951679762280127e-06, "loss": 1.3653, "step": 1731 }, { "epoch": 0.46063829787234045, "grad_norm": 3.9349825382232666, "learning_rate": 9.95155770777726e-06, "loss": 1.1563, "step": 1732 }, { "epoch": 0.4609042553191489, "grad_norm": 4.161105632781982, "learning_rate": 9.951435500067366e-06, "loss": 1.3807, "step": 1733 }, { "epoch": 0.46117021276595743, "grad_norm": 4.0084686279296875, "learning_rate": 9.95131313915423e-06, "loss": 1.2486, "step": 1734 }, { "epoch": 0.46143617021276595, "grad_norm": 3.6559159755706787, "learning_rate": 9.951190625041634e-06, "loss": 1.2063, "step": 1735 }, { "epoch": 0.46170212765957447, "grad_norm": 3.99893856048584, "learning_rate": 9.95106795773337e-06, "loss": 1.2945, "step": 1736 }, { "epoch": 0.461968085106383, "grad_norm": 4.061460018157959, "learning_rate": 9.950945137233237e-06, "loss": 1.3383, "step": 1737 }, { "epoch": 0.4622340425531915, "grad_norm": 4.054213047027588, "learning_rate": 9.950822163545032e-06, "loss": 1.2836, "step": 1738 }, { "epoch": 0.4625, "grad_norm": 3.9057390689849854, "learning_rate": 9.95069903667256e-06, "loss": 1.2157, "step": 1739 }, { "epoch": 0.4627659574468085, "grad_norm": 3.977504014968872, "learning_rate": 9.95057575661963e-06, "loss": 1.322, "step": 1740 }, { "epoch": 0.463031914893617, "grad_norm": 3.478853702545166, "learning_rate": 9.950452323390058e-06, "loss": 1.1772, "step": 1741 }, { "epoch": 0.4632978723404255, "grad_norm": 3.8592848777770996, "learning_rate": 9.950328736987664e-06, "loss": 1.3234, "step": 1742 }, { "epoch": 0.46356382978723404, "grad_norm": 3.858339309692383, "learning_rate": 9.95020499741627e-06, "loss": 1.3079, "step": 1743 }, { "epoch": 0.46382978723404256, "grad_norm": 3.797468900680542, "learning_rate": 9.950081104679704e-06, "loss": 1.1611, "step": 1744 }, { "epoch": 0.4640957446808511, "grad_norm": 3.9753012657165527, "learning_rate": 9.949957058781802e-06, "loss": 1.3449, "step": 1745 }, { "epoch": 0.4643617021276596, "grad_norm": 4.22615385055542, "learning_rate": 9.9498328597264e-06, "loss": 1.1605, "step": 1746 }, { "epoch": 0.4646276595744681, "grad_norm": 4.091019153594971, "learning_rate": 9.949708507517342e-06, "loss": 1.2877, "step": 1747 }, { "epoch": 0.4648936170212766, "grad_norm": 4.121149063110352, "learning_rate": 9.949584002158474e-06, "loss": 1.2463, "step": 1748 }, { "epoch": 0.4651595744680851, "grad_norm": 4.406885147094727, "learning_rate": 9.949459343653652e-06, "loss": 1.3303, "step": 1749 }, { "epoch": 0.4654255319148936, "grad_norm": 4.5540666580200195, "learning_rate": 9.94933453200673e-06, "loss": 1.3149, "step": 1750 }, { "epoch": 0.4656914893617021, "grad_norm": 3.9736440181732178, "learning_rate": 9.949209567221569e-06, "loss": 1.4947, "step": 1751 }, { "epoch": 0.46595744680851064, "grad_norm": 4.265797138214111, "learning_rate": 9.949084449302038e-06, "loss": 1.2727, "step": 1752 }, { "epoch": 0.46622340425531916, "grad_norm": 3.906663656234741, "learning_rate": 9.948959178252007e-06, "loss": 1.2346, "step": 1753 }, { "epoch": 0.4664893617021277, "grad_norm": 3.8884990215301514, "learning_rate": 9.948833754075351e-06, "loss": 1.2997, "step": 1754 }, { "epoch": 0.46675531914893614, "grad_norm": 3.943458080291748, "learning_rate": 9.948708176775954e-06, "loss": 1.2945, "step": 1755 }, { "epoch": 0.46702127659574466, "grad_norm": 3.9176204204559326, "learning_rate": 9.9485824463577e-06, "loss": 1.2714, "step": 1756 }, { "epoch": 0.4672872340425532, "grad_norm": 3.834636926651001, "learning_rate": 9.948456562824478e-06, "loss": 1.1341, "step": 1757 }, { "epoch": 0.4675531914893617, "grad_norm": 3.8121955394744873, "learning_rate": 9.948330526180183e-06, "loss": 1.3064, "step": 1758 }, { "epoch": 0.4678191489361702, "grad_norm": 4.121542930603027, "learning_rate": 9.948204336428717e-06, "loss": 1.2775, "step": 1759 }, { "epoch": 0.46808510638297873, "grad_norm": 4.043048858642578, "learning_rate": 9.948077993573983e-06, "loss": 1.2601, "step": 1760 }, { "epoch": 0.46835106382978725, "grad_norm": 3.7144079208374023, "learning_rate": 9.94795149761989e-06, "loss": 1.1136, "step": 1761 }, { "epoch": 0.46861702127659577, "grad_norm": 4.818117141723633, "learning_rate": 9.947824848570352e-06, "loss": 1.4366, "step": 1762 }, { "epoch": 0.46888297872340423, "grad_norm": 4.190409183502197, "learning_rate": 9.947698046429287e-06, "loss": 1.2308, "step": 1763 }, { "epoch": 0.46914893617021275, "grad_norm": 4.0341267585754395, "learning_rate": 9.94757109120062e-06, "loss": 1.2466, "step": 1764 }, { "epoch": 0.46941489361702127, "grad_norm": 3.9223225116729736, "learning_rate": 9.947443982888279e-06, "loss": 1.212, "step": 1765 }, { "epoch": 0.4696808510638298, "grad_norm": 4.121956825256348, "learning_rate": 9.947316721496196e-06, "loss": 1.2635, "step": 1766 }, { "epoch": 0.4699468085106383, "grad_norm": 3.9485208988189697, "learning_rate": 9.947189307028308e-06, "loss": 1.3579, "step": 1767 }, { "epoch": 0.4702127659574468, "grad_norm": 4.009948253631592, "learning_rate": 9.947061739488559e-06, "loss": 1.4448, "step": 1768 }, { "epoch": 0.47047872340425534, "grad_norm": 4.2954912185668945, "learning_rate": 9.946934018880896e-06, "loss": 1.1665, "step": 1769 }, { "epoch": 0.47074468085106386, "grad_norm": 3.6225626468658447, "learning_rate": 9.94680614520927e-06, "loss": 1.2863, "step": 1770 }, { "epoch": 0.4710106382978723, "grad_norm": 3.9409780502319336, "learning_rate": 9.946678118477635e-06, "loss": 1.1042, "step": 1771 }, { "epoch": 0.47127659574468084, "grad_norm": 3.5868918895721436, "learning_rate": 9.946549938689958e-06, "loss": 1.1924, "step": 1772 }, { "epoch": 0.47154255319148936, "grad_norm": 3.5596354007720947, "learning_rate": 9.946421605850201e-06, "loss": 1.1459, "step": 1773 }, { "epoch": 0.4718085106382979, "grad_norm": 3.595719337463379, "learning_rate": 9.946293119962336e-06, "loss": 1.2274, "step": 1774 }, { "epoch": 0.4720744680851064, "grad_norm": 4.341657638549805, "learning_rate": 9.946164481030339e-06, "loss": 1.433, "step": 1775 }, { "epoch": 0.4723404255319149, "grad_norm": 4.137777328491211, "learning_rate": 9.946035689058189e-06, "loss": 1.3307, "step": 1776 }, { "epoch": 0.4726063829787234, "grad_norm": 4.115199565887451, "learning_rate": 9.94590674404987e-06, "loss": 1.3575, "step": 1777 }, { "epoch": 0.4728723404255319, "grad_norm": 3.9467270374298096, "learning_rate": 9.945777646009375e-06, "loss": 1.1772, "step": 1778 }, { "epoch": 0.4731382978723404, "grad_norm": 3.986268997192383, "learning_rate": 9.945648394940697e-06, "loss": 1.3949, "step": 1779 }, { "epoch": 0.4734042553191489, "grad_norm": 4.070546627044678, "learning_rate": 9.945518990847835e-06, "loss": 1.3664, "step": 1780 }, { "epoch": 0.47367021276595744, "grad_norm": 4.0783233642578125, "learning_rate": 9.94538943373479e-06, "loss": 1.3199, "step": 1781 }, { "epoch": 0.47393617021276596, "grad_norm": 4.331148147583008, "learning_rate": 9.945259723605579e-06, "loss": 1.3809, "step": 1782 }, { "epoch": 0.4742021276595745, "grad_norm": 4.163266658782959, "learning_rate": 9.945129860464205e-06, "loss": 1.3325, "step": 1783 }, { "epoch": 0.474468085106383, "grad_norm": 4.23274564743042, "learning_rate": 9.944999844314693e-06, "loss": 1.3793, "step": 1784 }, { "epoch": 0.4747340425531915, "grad_norm": 4.219319820404053, "learning_rate": 9.944869675161062e-06, "loss": 1.3631, "step": 1785 }, { "epoch": 0.475, "grad_norm": 4.5794830322265625, "learning_rate": 9.944739353007344e-06, "loss": 1.3941, "step": 1786 }, { "epoch": 0.4752659574468085, "grad_norm": 3.806102752685547, "learning_rate": 9.944608877857567e-06, "loss": 1.2896, "step": 1787 }, { "epoch": 0.475531914893617, "grad_norm": 3.927706241607666, "learning_rate": 9.94447824971577e-06, "loss": 1.4121, "step": 1788 }, { "epoch": 0.47579787234042553, "grad_norm": 3.8713526725769043, "learning_rate": 9.944347468585995e-06, "loss": 1.3029, "step": 1789 }, { "epoch": 0.47606382978723405, "grad_norm": 3.6732828617095947, "learning_rate": 9.944216534472287e-06, "loss": 1.2379, "step": 1790 }, { "epoch": 0.47632978723404257, "grad_norm": 4.1793084144592285, "learning_rate": 9.9440854473787e-06, "loss": 1.391, "step": 1791 }, { "epoch": 0.4765957446808511, "grad_norm": 4.131939888000488, "learning_rate": 9.943954207309287e-06, "loss": 1.2346, "step": 1792 }, { "epoch": 0.47686170212765955, "grad_norm": 4.083577632904053, "learning_rate": 9.94382281426811e-06, "loss": 1.4478, "step": 1793 }, { "epoch": 0.47712765957446807, "grad_norm": 3.640902280807495, "learning_rate": 9.943691268259234e-06, "loss": 1.2515, "step": 1794 }, { "epoch": 0.4773936170212766, "grad_norm": 4.226308345794678, "learning_rate": 9.943559569286731e-06, "loss": 1.3599, "step": 1795 }, { "epoch": 0.4776595744680851, "grad_norm": 4.301510810852051, "learning_rate": 9.943427717354674e-06, "loss": 1.2623, "step": 1796 }, { "epoch": 0.4779255319148936, "grad_norm": 3.6332836151123047, "learning_rate": 9.943295712467145e-06, "loss": 1.2776, "step": 1797 }, { "epoch": 0.47819148936170214, "grad_norm": 3.6086063385009766, "learning_rate": 9.943163554628223e-06, "loss": 1.2306, "step": 1798 }, { "epoch": 0.47845744680851066, "grad_norm": 3.787510395050049, "learning_rate": 9.943031243842004e-06, "loss": 1.3904, "step": 1799 }, { "epoch": 0.4787234042553192, "grad_norm": 4.257116317749023, "learning_rate": 9.942898780112578e-06, "loss": 1.2504, "step": 1800 }, { "epoch": 0.47898936170212764, "grad_norm": 4.033913612365723, "learning_rate": 9.942766163444044e-06, "loss": 1.1252, "step": 1801 }, { "epoch": 0.47925531914893615, "grad_norm": 3.9039859771728516, "learning_rate": 9.942633393840504e-06, "loss": 1.2183, "step": 1802 }, { "epoch": 0.47952127659574467, "grad_norm": 4.116021156311035, "learning_rate": 9.94250047130607e-06, "loss": 1.3872, "step": 1803 }, { "epoch": 0.4797872340425532, "grad_norm": 4.146193504333496, "learning_rate": 9.94236739584485e-06, "loss": 1.2302, "step": 1804 }, { "epoch": 0.4800531914893617, "grad_norm": 4.098079681396484, "learning_rate": 9.942234167460966e-06, "loss": 1.3785, "step": 1805 }, { "epoch": 0.4803191489361702, "grad_norm": 3.643486976623535, "learning_rate": 9.942100786158537e-06, "loss": 1.1499, "step": 1806 }, { "epoch": 0.48058510638297874, "grad_norm": 4.246469974517822, "learning_rate": 9.94196725194169e-06, "loss": 1.3295, "step": 1807 }, { "epoch": 0.4808510638297872, "grad_norm": 3.857382297515869, "learning_rate": 9.94183356481456e-06, "loss": 1.325, "step": 1808 }, { "epoch": 0.4811170212765957, "grad_norm": 3.5324032306671143, "learning_rate": 9.94169972478128e-06, "loss": 1.1482, "step": 1809 }, { "epoch": 0.48138297872340424, "grad_norm": 3.7972612380981445, "learning_rate": 9.941565731845993e-06, "loss": 1.4476, "step": 1810 }, { "epoch": 0.48164893617021276, "grad_norm": 3.770042896270752, "learning_rate": 9.941431586012844e-06, "loss": 1.3034, "step": 1811 }, { "epoch": 0.4819148936170213, "grad_norm": 3.675645351409912, "learning_rate": 9.941297287285984e-06, "loss": 1.2526, "step": 1812 }, { "epoch": 0.4821808510638298, "grad_norm": 3.526350975036621, "learning_rate": 9.941162835669568e-06, "loss": 1.1573, "step": 1813 }, { "epoch": 0.4824468085106383, "grad_norm": 3.4532649517059326, "learning_rate": 9.941028231167756e-06, "loss": 1.1735, "step": 1814 }, { "epoch": 0.48271276595744683, "grad_norm": 3.9783992767333984, "learning_rate": 9.940893473784714e-06, "loss": 1.3828, "step": 1815 }, { "epoch": 0.4829787234042553, "grad_norm": 4.059201717376709, "learning_rate": 9.940758563524611e-06, "loss": 1.2649, "step": 1816 }, { "epoch": 0.4832446808510638, "grad_norm": 4.069849491119385, "learning_rate": 9.94062350039162e-06, "loss": 1.2833, "step": 1817 }, { "epoch": 0.48351063829787233, "grad_norm": 3.488699197769165, "learning_rate": 9.940488284389923e-06, "loss": 1.0884, "step": 1818 }, { "epoch": 0.48377659574468085, "grad_norm": 3.721902370452881, "learning_rate": 9.940352915523699e-06, "loss": 1.2442, "step": 1819 }, { "epoch": 0.48404255319148937, "grad_norm": 4.082354545593262, "learning_rate": 9.94021739379714e-06, "loss": 1.3406, "step": 1820 }, { "epoch": 0.4843085106382979, "grad_norm": 3.9286141395568848, "learning_rate": 9.94008171921444e-06, "loss": 1.2856, "step": 1821 }, { "epoch": 0.4845744680851064, "grad_norm": 3.968208074569702, "learning_rate": 9.939945891779795e-06, "loss": 1.3172, "step": 1822 }, { "epoch": 0.4848404255319149, "grad_norm": 4.114230155944824, "learning_rate": 9.939809911497407e-06, "loss": 1.2936, "step": 1823 }, { "epoch": 0.4851063829787234, "grad_norm": 3.840162754058838, "learning_rate": 9.939673778371484e-06, "loss": 1.3923, "step": 1824 }, { "epoch": 0.4853723404255319, "grad_norm": 4.272914886474609, "learning_rate": 9.939537492406239e-06, "loss": 1.2932, "step": 1825 }, { "epoch": 0.4856382978723404, "grad_norm": 3.7386868000030518, "learning_rate": 9.939401053605889e-06, "loss": 1.3849, "step": 1826 }, { "epoch": 0.48590425531914894, "grad_norm": 4.278271675109863, "learning_rate": 9.939264461974654e-06, "loss": 1.2878, "step": 1827 }, { "epoch": 0.48617021276595745, "grad_norm": 3.827216386795044, "learning_rate": 9.939127717516763e-06, "loss": 1.2833, "step": 1828 }, { "epoch": 0.486436170212766, "grad_norm": 3.888113498687744, "learning_rate": 9.938990820236445e-06, "loss": 1.2384, "step": 1829 }, { "epoch": 0.4867021276595745, "grad_norm": 3.886965036392212, "learning_rate": 9.938853770137935e-06, "loss": 1.3365, "step": 1830 }, { "epoch": 0.48696808510638295, "grad_norm": 3.9059507846832275, "learning_rate": 9.938716567225475e-06, "loss": 1.3569, "step": 1831 }, { "epoch": 0.48723404255319147, "grad_norm": 3.922834634780884, "learning_rate": 9.93857921150331e-06, "loss": 1.2035, "step": 1832 }, { "epoch": 0.4875, "grad_norm": 3.949385643005371, "learning_rate": 9.938441702975689e-06, "loss": 1.3485, "step": 1833 }, { "epoch": 0.4877659574468085, "grad_norm": 4.1959333419799805, "learning_rate": 9.938304041646869e-06, "loss": 1.3079, "step": 1834 }, { "epoch": 0.488031914893617, "grad_norm": 3.98871111869812, "learning_rate": 9.938166227521106e-06, "loss": 1.3067, "step": 1835 }, { "epoch": 0.48829787234042554, "grad_norm": 4.129928112030029, "learning_rate": 9.938028260602668e-06, "loss": 1.3053, "step": 1836 }, { "epoch": 0.48856382978723406, "grad_norm": 4.131626129150391, "learning_rate": 9.937890140895819e-06, "loss": 1.3332, "step": 1837 }, { "epoch": 0.4888297872340426, "grad_norm": 3.8896591663360596, "learning_rate": 9.937751868404838e-06, "loss": 1.2105, "step": 1838 }, { "epoch": 0.48909574468085104, "grad_norm": 3.6959292888641357, "learning_rate": 9.937613443134e-06, "loss": 1.1607, "step": 1839 }, { "epoch": 0.48936170212765956, "grad_norm": 4.914716720581055, "learning_rate": 9.937474865087588e-06, "loss": 1.1406, "step": 1840 }, { "epoch": 0.4896276595744681, "grad_norm": 3.811239004135132, "learning_rate": 9.93733613426989e-06, "loss": 1.2047, "step": 1841 }, { "epoch": 0.4898936170212766, "grad_norm": 3.8995115756988525, "learning_rate": 9.937197250685202e-06, "loss": 1.1582, "step": 1842 }, { "epoch": 0.4901595744680851, "grad_norm": 3.6087286472320557, "learning_rate": 9.937058214337817e-06, "loss": 1.1866, "step": 1843 }, { "epoch": 0.49042553191489363, "grad_norm": 3.854526996612549, "learning_rate": 9.936919025232036e-06, "loss": 1.2744, "step": 1844 }, { "epoch": 0.49069148936170215, "grad_norm": 3.870508909225464, "learning_rate": 9.936779683372169e-06, "loss": 1.1989, "step": 1845 }, { "epoch": 0.4909574468085106, "grad_norm": 4.0505194664001465, "learning_rate": 9.936640188762527e-06, "loss": 1.206, "step": 1846 }, { "epoch": 0.49122340425531913, "grad_norm": 3.8995118141174316, "learning_rate": 9.936500541407424e-06, "loss": 1.1642, "step": 1847 }, { "epoch": 0.49148936170212765, "grad_norm": 4.045437812805176, "learning_rate": 9.936360741311185e-06, "loss": 1.2949, "step": 1848 }, { "epoch": 0.49175531914893617, "grad_norm": 3.954519271850586, "learning_rate": 9.93622078847813e-06, "loss": 1.3334, "step": 1849 }, { "epoch": 0.4920212765957447, "grad_norm": 3.9482545852661133, "learning_rate": 9.936080682912594e-06, "loss": 1.2859, "step": 1850 }, { "epoch": 0.4922872340425532, "grad_norm": 3.7565512657165527, "learning_rate": 9.935940424618908e-06, "loss": 1.1294, "step": 1851 }, { "epoch": 0.4925531914893617, "grad_norm": 4.012822151184082, "learning_rate": 9.935800013601415e-06, "loss": 1.4283, "step": 1852 }, { "epoch": 0.49281914893617024, "grad_norm": 3.7840845584869385, "learning_rate": 9.935659449864458e-06, "loss": 1.332, "step": 1853 }, { "epoch": 0.4930851063829787, "grad_norm": 4.097705364227295, "learning_rate": 9.935518733412387e-06, "loss": 1.1062, "step": 1854 }, { "epoch": 0.4933510638297872, "grad_norm": 4.073275089263916, "learning_rate": 9.935377864249558e-06, "loss": 1.4567, "step": 1855 }, { "epoch": 0.49361702127659574, "grad_norm": 4.020910263061523, "learning_rate": 9.935236842380325e-06, "loss": 1.247, "step": 1856 }, { "epoch": 0.49388297872340425, "grad_norm": 4.380120277404785, "learning_rate": 9.935095667809053e-06, "loss": 1.2439, "step": 1857 }, { "epoch": 0.49414893617021277, "grad_norm": 3.8681838512420654, "learning_rate": 9.934954340540111e-06, "loss": 1.3522, "step": 1858 }, { "epoch": 0.4944148936170213, "grad_norm": 3.7794203758239746, "learning_rate": 9.934812860577871e-06, "loss": 1.1068, "step": 1859 }, { "epoch": 0.4946808510638298, "grad_norm": 3.9970266819000244, "learning_rate": 9.934671227926714e-06, "loss": 1.228, "step": 1860 }, { "epoch": 0.4949468085106383, "grad_norm": 4.03349494934082, "learning_rate": 9.934529442591016e-06, "loss": 1.5158, "step": 1861 }, { "epoch": 0.4952127659574468, "grad_norm": 3.6862449645996094, "learning_rate": 9.934387504575169e-06, "loss": 1.3988, "step": 1862 }, { "epoch": 0.4954787234042553, "grad_norm": 3.7959797382354736, "learning_rate": 9.934245413883561e-06, "loss": 1.2412, "step": 1863 }, { "epoch": 0.4957446808510638, "grad_norm": 3.952791929244995, "learning_rate": 9.934103170520592e-06, "loss": 1.3866, "step": 1864 }, { "epoch": 0.49601063829787234, "grad_norm": 3.7724785804748535, "learning_rate": 9.933960774490663e-06, "loss": 1.1724, "step": 1865 }, { "epoch": 0.49627659574468086, "grad_norm": 3.9937689304351807, "learning_rate": 9.933818225798178e-06, "loss": 1.3353, "step": 1866 }, { "epoch": 0.4965425531914894, "grad_norm": 3.818441152572632, "learning_rate": 9.933675524447549e-06, "loss": 1.205, "step": 1867 }, { "epoch": 0.4968085106382979, "grad_norm": 3.97725772857666, "learning_rate": 9.933532670443188e-06, "loss": 1.289, "step": 1868 }, { "epoch": 0.49707446808510636, "grad_norm": 3.930464744567871, "learning_rate": 9.93338966378952e-06, "loss": 1.5099, "step": 1869 }, { "epoch": 0.4973404255319149, "grad_norm": 4.353559494018555, "learning_rate": 9.933246504490966e-06, "loss": 1.4003, "step": 1870 }, { "epoch": 0.4976063829787234, "grad_norm": 3.9544339179992676, "learning_rate": 9.933103192551958e-06, "loss": 1.1387, "step": 1871 }, { "epoch": 0.4978723404255319, "grad_norm": 3.9833321571350098, "learning_rate": 9.932959727976928e-06, "loss": 1.2584, "step": 1872 }, { "epoch": 0.49813829787234043, "grad_norm": 3.862346887588501, "learning_rate": 9.932816110770317e-06, "loss": 1.4073, "step": 1873 }, { "epoch": 0.49840425531914895, "grad_norm": 3.7747912406921387, "learning_rate": 9.932672340936568e-06, "loss": 1.2541, "step": 1874 }, { "epoch": 0.49867021276595747, "grad_norm": 4.324585437774658, "learning_rate": 9.93252841848013e-06, "loss": 1.4344, "step": 1875 }, { "epoch": 0.498936170212766, "grad_norm": 4.572371006011963, "learning_rate": 9.932384343405452e-06, "loss": 1.246, "step": 1876 }, { "epoch": 0.49920212765957445, "grad_norm": 4.566850662231445, "learning_rate": 9.932240115716998e-06, "loss": 1.2813, "step": 1877 }, { "epoch": 0.49946808510638296, "grad_norm": 3.940889358520508, "learning_rate": 9.932095735419228e-06, "loss": 1.1925, "step": 1878 }, { "epoch": 0.4997340425531915, "grad_norm": 3.6935203075408936, "learning_rate": 9.93195120251661e-06, "loss": 1.2649, "step": 1879 }, { "epoch": 0.5, "grad_norm": 4.11472749710083, "learning_rate": 9.931806517013612e-06, "loss": 1.3672, "step": 1880 }, { "epoch": 0.5002659574468085, "grad_norm": 4.156626224517822, "learning_rate": 9.931661678914717e-06, "loss": 1.4258, "step": 1881 }, { "epoch": 0.500531914893617, "grad_norm": 4.2577805519104, "learning_rate": 9.9315166882244e-06, "loss": 1.3524, "step": 1882 }, { "epoch": 0.5007978723404255, "grad_norm": 3.9902119636535645, "learning_rate": 9.931371544947154e-06, "loss": 1.2988, "step": 1883 }, { "epoch": 0.5010638297872341, "grad_norm": 4.20100736618042, "learning_rate": 9.931226249087465e-06, "loss": 1.3102, "step": 1884 }, { "epoch": 0.5013297872340425, "grad_norm": 4.172153949737549, "learning_rate": 9.93108080064983e-06, "loss": 1.2019, "step": 1885 }, { "epoch": 0.5015957446808511, "grad_norm": 4.27764892578125, "learning_rate": 9.93093519963875e-06, "loss": 1.2075, "step": 1886 }, { "epoch": 0.5018617021276596, "grad_norm": 4.327826023101807, "learning_rate": 9.930789446058729e-06, "loss": 1.2459, "step": 1887 }, { "epoch": 0.502127659574468, "grad_norm": 4.269448757171631, "learning_rate": 9.930643539914276e-06, "loss": 1.4385, "step": 1888 }, { "epoch": 0.5023936170212766, "grad_norm": 3.7377564907073975, "learning_rate": 9.930497481209908e-06, "loss": 1.2267, "step": 1889 }, { "epoch": 0.5026595744680851, "grad_norm": 3.958397388458252, "learning_rate": 9.930351269950144e-06, "loss": 1.3289, "step": 1890 }, { "epoch": 0.5029255319148936, "grad_norm": 3.992171049118042, "learning_rate": 9.930204906139506e-06, "loss": 1.2989, "step": 1891 }, { "epoch": 0.5031914893617021, "grad_norm": 3.8019278049468994, "learning_rate": 9.930058389782523e-06, "loss": 1.3542, "step": 1892 }, { "epoch": 0.5034574468085107, "grad_norm": 3.7610788345336914, "learning_rate": 9.929911720883729e-06, "loss": 1.247, "step": 1893 }, { "epoch": 0.5037234042553191, "grad_norm": 3.765941619873047, "learning_rate": 9.929764899447662e-06, "loss": 1.3651, "step": 1894 }, { "epoch": 0.5039893617021277, "grad_norm": 4.16331672668457, "learning_rate": 9.929617925478868e-06, "loss": 1.28, "step": 1895 }, { "epoch": 0.5042553191489362, "grad_norm": 4.166515827178955, "learning_rate": 9.929470798981888e-06, "loss": 1.2401, "step": 1896 }, { "epoch": 0.5045212765957446, "grad_norm": 4.0264177322387695, "learning_rate": 9.929323519961278e-06, "loss": 1.3036, "step": 1897 }, { "epoch": 0.5047872340425532, "grad_norm": 3.85672926902771, "learning_rate": 9.929176088421596e-06, "loss": 1.1619, "step": 1898 }, { "epoch": 0.5050531914893617, "grad_norm": 4.00507926940918, "learning_rate": 9.929028504367402e-06, "loss": 1.2787, "step": 1899 }, { "epoch": 0.5053191489361702, "grad_norm": 3.6691126823425293, "learning_rate": 9.928880767803264e-06, "loss": 1.3256, "step": 1900 }, { "epoch": 0.5055851063829787, "grad_norm": 4.093438625335693, "learning_rate": 9.92873287873375e-06, "loss": 1.2623, "step": 1901 }, { "epoch": 0.5058510638297873, "grad_norm": 3.689911127090454, "learning_rate": 9.92858483716344e-06, "loss": 1.4022, "step": 1902 }, { "epoch": 0.5061170212765957, "grad_norm": 4.178584575653076, "learning_rate": 9.928436643096909e-06, "loss": 1.3588, "step": 1903 }, { "epoch": 0.5063829787234042, "grad_norm": 4.098899841308594, "learning_rate": 9.928288296538749e-06, "loss": 1.2687, "step": 1904 }, { "epoch": 0.5066489361702128, "grad_norm": 4.034060001373291, "learning_rate": 9.928139797493545e-06, "loss": 1.2859, "step": 1905 }, { "epoch": 0.5069148936170212, "grad_norm": 4.75716495513916, "learning_rate": 9.927991145965894e-06, "loss": 1.445, "step": 1906 }, { "epoch": 0.5071808510638298, "grad_norm": 3.466297149658203, "learning_rate": 9.927842341960396e-06, "loss": 1.0634, "step": 1907 }, { "epoch": 0.5074468085106383, "grad_norm": 3.9337103366851807, "learning_rate": 9.927693385481652e-06, "loss": 1.4115, "step": 1908 }, { "epoch": 0.5077127659574469, "grad_norm": 3.6876132488250732, "learning_rate": 9.927544276534275e-06, "loss": 1.2333, "step": 1909 }, { "epoch": 0.5079787234042553, "grad_norm": 4.154485702514648, "learning_rate": 9.927395015122876e-06, "loss": 1.2432, "step": 1910 }, { "epoch": 0.5082446808510638, "grad_norm": 4.0430073738098145, "learning_rate": 9.927245601252074e-06, "loss": 1.3562, "step": 1911 }, { "epoch": 0.5085106382978724, "grad_norm": 3.6701016426086426, "learning_rate": 9.927096034926491e-06, "loss": 1.2138, "step": 1912 }, { "epoch": 0.5087765957446808, "grad_norm": 3.7969815731048584, "learning_rate": 9.926946316150757e-06, "loss": 1.3166, "step": 1913 }, { "epoch": 0.5090425531914894, "grad_norm": 3.662705183029175, "learning_rate": 9.926796444929502e-06, "loss": 1.1107, "step": 1914 }, { "epoch": 0.5093085106382979, "grad_norm": 3.8880231380462646, "learning_rate": 9.926646421267366e-06, "loss": 1.2989, "step": 1915 }, { "epoch": 0.5095744680851064, "grad_norm": 3.6114046573638916, "learning_rate": 9.926496245168989e-06, "loss": 1.1822, "step": 1916 }, { "epoch": 0.5098404255319149, "grad_norm": 3.799083948135376, "learning_rate": 9.926345916639018e-06, "loss": 1.1918, "step": 1917 }, { "epoch": 0.5101063829787233, "grad_norm": 3.4708175659179688, "learning_rate": 9.926195435682102e-06, "loss": 1.1244, "step": 1918 }, { "epoch": 0.5103723404255319, "grad_norm": 4.323407173156738, "learning_rate": 9.926044802302904e-06, "loss": 1.275, "step": 1919 }, { "epoch": 0.5106382978723404, "grad_norm": 3.8659491539001465, "learning_rate": 9.925894016506076e-06, "loss": 1.2904, "step": 1920 }, { "epoch": 0.510904255319149, "grad_norm": 3.7898192405700684, "learning_rate": 9.925743078296288e-06, "loss": 1.2569, "step": 1921 }, { "epoch": 0.5111702127659574, "grad_norm": 3.559047222137451, "learning_rate": 9.925591987678212e-06, "loss": 1.3267, "step": 1922 }, { "epoch": 0.511436170212766, "grad_norm": 3.8164639472961426, "learning_rate": 9.925440744656518e-06, "loss": 1.2059, "step": 1923 }, { "epoch": 0.5117021276595745, "grad_norm": 4.318164825439453, "learning_rate": 9.925289349235892e-06, "loss": 1.3528, "step": 1924 }, { "epoch": 0.511968085106383, "grad_norm": 3.8021814823150635, "learning_rate": 9.925137801421011e-06, "loss": 1.2096, "step": 1925 }, { "epoch": 0.5122340425531915, "grad_norm": 3.7836246490478516, "learning_rate": 9.924986101216569e-06, "loss": 1.2719, "step": 1926 }, { "epoch": 0.5125, "grad_norm": 4.108916282653809, "learning_rate": 9.92483424862726e-06, "loss": 1.4018, "step": 1927 }, { "epoch": 0.5127659574468085, "grad_norm": 3.7151575088500977, "learning_rate": 9.92468224365778e-06, "loss": 1.3966, "step": 1928 }, { "epoch": 0.513031914893617, "grad_norm": 3.5576205253601074, "learning_rate": 9.924530086312834e-06, "loss": 1.2066, "step": 1929 }, { "epoch": 0.5132978723404256, "grad_norm": 3.6642985343933105, "learning_rate": 9.924377776597128e-06, "loss": 1.3887, "step": 1930 }, { "epoch": 0.513563829787234, "grad_norm": 4.360495567321777, "learning_rate": 9.924225314515375e-06, "loss": 1.6151, "step": 1931 }, { "epoch": 0.5138297872340426, "grad_norm": 3.934380292892456, "learning_rate": 9.924072700072296e-06, "loss": 1.2027, "step": 1932 }, { "epoch": 0.5140957446808511, "grad_norm": 3.95251727104187, "learning_rate": 9.923919933272608e-06, "loss": 1.4496, "step": 1933 }, { "epoch": 0.5143617021276595, "grad_norm": 3.660336494445801, "learning_rate": 9.923767014121042e-06, "loss": 1.2549, "step": 1934 }, { "epoch": 0.5146276595744681, "grad_norm": 3.936469316482544, "learning_rate": 9.923613942622326e-06, "loss": 1.3851, "step": 1935 }, { "epoch": 0.5148936170212766, "grad_norm": 3.912565231323242, "learning_rate": 9.923460718781198e-06, "loss": 1.303, "step": 1936 }, { "epoch": 0.5151595744680851, "grad_norm": 3.9063549041748047, "learning_rate": 9.923307342602399e-06, "loss": 1.315, "step": 1937 }, { "epoch": 0.5154255319148936, "grad_norm": 3.749720335006714, "learning_rate": 9.923153814090675e-06, "loss": 1.2961, "step": 1938 }, { "epoch": 0.5156914893617022, "grad_norm": 3.978954315185547, "learning_rate": 9.923000133250776e-06, "loss": 1.4325, "step": 1939 }, { "epoch": 0.5159574468085106, "grad_norm": 4.081971645355225, "learning_rate": 9.922846300087454e-06, "loss": 1.2811, "step": 1940 }, { "epoch": 0.5162234042553191, "grad_norm": 3.9421591758728027, "learning_rate": 9.922692314605472e-06, "loss": 1.3513, "step": 1941 }, { "epoch": 0.5164893617021277, "grad_norm": 3.6500041484832764, "learning_rate": 9.922538176809597e-06, "loss": 1.2927, "step": 1942 }, { "epoch": 0.5167553191489361, "grad_norm": 3.858421564102173, "learning_rate": 9.922383886704594e-06, "loss": 1.1699, "step": 1943 }, { "epoch": 0.5170212765957447, "grad_norm": 4.286783695220947, "learning_rate": 9.922229444295238e-06, "loss": 1.4037, "step": 1944 }, { "epoch": 0.5172872340425532, "grad_norm": 4.163476943969727, "learning_rate": 9.922074849586308e-06, "loss": 1.1268, "step": 1945 }, { "epoch": 0.5175531914893617, "grad_norm": 3.8577239513397217, "learning_rate": 9.921920102582587e-06, "loss": 1.2154, "step": 1946 }, { "epoch": 0.5178191489361702, "grad_norm": 4.213263988494873, "learning_rate": 9.921765203288862e-06, "loss": 1.3188, "step": 1947 }, { "epoch": 0.5180851063829788, "grad_norm": 3.817172050476074, "learning_rate": 9.921610151709929e-06, "loss": 1.2897, "step": 1948 }, { "epoch": 0.5183510638297872, "grad_norm": 3.954479694366455, "learning_rate": 9.921454947850582e-06, "loss": 1.1568, "step": 1949 }, { "epoch": 0.5186170212765957, "grad_norm": 4.054901123046875, "learning_rate": 9.921299591715624e-06, "loss": 1.1991, "step": 1950 }, { "epoch": 0.5188829787234043, "grad_norm": 3.9514553546905518, "learning_rate": 9.921144083309864e-06, "loss": 1.2588, "step": 1951 }, { "epoch": 0.5191489361702127, "grad_norm": 4.228671550750732, "learning_rate": 9.920988422638112e-06, "loss": 1.3348, "step": 1952 }, { "epoch": 0.5194148936170213, "grad_norm": 3.997422695159912, "learning_rate": 9.920832609705184e-06, "loss": 1.2402, "step": 1953 }, { "epoch": 0.5196808510638298, "grad_norm": 3.8394384384155273, "learning_rate": 9.920676644515902e-06, "loss": 1.222, "step": 1954 }, { "epoch": 0.5199468085106383, "grad_norm": 3.654381036758423, "learning_rate": 9.92052052707509e-06, "loss": 1.4059, "step": 1955 }, { "epoch": 0.5202127659574468, "grad_norm": 3.881578207015991, "learning_rate": 9.92036425738758e-06, "loss": 1.3507, "step": 1956 }, { "epoch": 0.5204787234042553, "grad_norm": 3.819066286087036, "learning_rate": 9.920207835458208e-06, "loss": 1.3433, "step": 1957 }, { "epoch": 0.5207446808510638, "grad_norm": 3.2657382488250732, "learning_rate": 9.920051261291812e-06, "loss": 1.0601, "step": 1958 }, { "epoch": 0.5210106382978723, "grad_norm": 3.789560556411743, "learning_rate": 9.919894534893237e-06, "loss": 1.2395, "step": 1959 }, { "epoch": 0.5212765957446809, "grad_norm": 3.620661973953247, "learning_rate": 9.919737656267335e-06, "loss": 1.1793, "step": 1960 }, { "epoch": 0.5215425531914893, "grad_norm": 4.208719253540039, "learning_rate": 9.919580625418955e-06, "loss": 1.5431, "step": 1961 }, { "epoch": 0.5218085106382979, "grad_norm": 4.2255024909973145, "learning_rate": 9.919423442352958e-06, "loss": 1.3665, "step": 1962 }, { "epoch": 0.5220744680851064, "grad_norm": 4.246603965759277, "learning_rate": 9.91926610707421e-06, "loss": 1.2552, "step": 1963 }, { "epoch": 0.5223404255319148, "grad_norm": 4.042827606201172, "learning_rate": 9.919108619587575e-06, "loss": 1.2171, "step": 1964 }, { "epoch": 0.5226063829787234, "grad_norm": 4.006556510925293, "learning_rate": 9.918950979897928e-06, "loss": 1.2559, "step": 1965 }, { "epoch": 0.5228723404255319, "grad_norm": 3.7249419689178467, "learning_rate": 9.918793188010147e-06, "loss": 1.0816, "step": 1966 }, { "epoch": 0.5231382978723405, "grad_norm": 4.087320804595947, "learning_rate": 9.918635243929115e-06, "loss": 1.2607, "step": 1967 }, { "epoch": 0.5234042553191489, "grad_norm": 4.031649589538574, "learning_rate": 9.918477147659715e-06, "loss": 1.2983, "step": 1968 }, { "epoch": 0.5236702127659575, "grad_norm": 4.055499076843262, "learning_rate": 9.918318899206842e-06, "loss": 1.2686, "step": 1969 }, { "epoch": 0.523936170212766, "grad_norm": 4.922122955322266, "learning_rate": 9.918160498575394e-06, "loss": 1.2761, "step": 1970 }, { "epoch": 0.5242021276595744, "grad_norm": 4.155685901641846, "learning_rate": 9.918001945770267e-06, "loss": 1.3004, "step": 1971 }, { "epoch": 0.524468085106383, "grad_norm": 4.165022373199463, "learning_rate": 9.91784324079637e-06, "loss": 1.4643, "step": 1972 }, { "epoch": 0.5247340425531914, "grad_norm": 3.9013566970825195, "learning_rate": 9.917684383658614e-06, "loss": 1.2264, "step": 1973 }, { "epoch": 0.525, "grad_norm": 4.016994953155518, "learning_rate": 9.917525374361913e-06, "loss": 1.2748, "step": 1974 }, { "epoch": 0.5252659574468085, "grad_norm": 4.0600996017456055, "learning_rate": 9.917366212911187e-06, "loss": 1.2, "step": 1975 }, { "epoch": 0.5255319148936171, "grad_norm": 4.1870903968811035, "learning_rate": 9.91720689931136e-06, "loss": 1.2307, "step": 1976 }, { "epoch": 0.5257978723404255, "grad_norm": 3.7501108646392822, "learning_rate": 9.917047433567364e-06, "loss": 1.2853, "step": 1977 }, { "epoch": 0.5260638297872341, "grad_norm": 3.8789479732513428, "learning_rate": 9.91688781568413e-06, "loss": 1.3571, "step": 1978 }, { "epoch": 0.5263297872340426, "grad_norm": 3.641453981399536, "learning_rate": 9.9167280456666e-06, "loss": 1.1975, "step": 1979 }, { "epoch": 0.526595744680851, "grad_norm": 4.097661972045898, "learning_rate": 9.916568123519713e-06, "loss": 1.2415, "step": 1980 }, { "epoch": 0.5268617021276596, "grad_norm": 3.447585105895996, "learning_rate": 9.91640804924842e-06, "loss": 1.1599, "step": 1981 }, { "epoch": 0.527127659574468, "grad_norm": 3.906158208847046, "learning_rate": 9.916247822857675e-06, "loss": 1.2141, "step": 1982 }, { "epoch": 0.5273936170212766, "grad_norm": 4.226005554199219, "learning_rate": 9.916087444352433e-06, "loss": 1.3575, "step": 1983 }, { "epoch": 0.5276595744680851, "grad_norm": 3.955073118209839, "learning_rate": 9.91592691373766e-06, "loss": 1.159, "step": 1984 }, { "epoch": 0.5279255319148937, "grad_norm": 3.770538568496704, "learning_rate": 9.915766231018317e-06, "loss": 1.2722, "step": 1985 }, { "epoch": 0.5281914893617021, "grad_norm": 4.1326422691345215, "learning_rate": 9.91560539619938e-06, "loss": 1.4044, "step": 1986 }, { "epoch": 0.5284574468085106, "grad_norm": 3.933978319168091, "learning_rate": 9.915444409285827e-06, "loss": 1.1495, "step": 1987 }, { "epoch": 0.5287234042553192, "grad_norm": 3.8940069675445557, "learning_rate": 9.915283270282637e-06, "loss": 1.2658, "step": 1988 }, { "epoch": 0.5289893617021276, "grad_norm": 3.8015975952148438, "learning_rate": 9.915121979194793e-06, "loss": 1.2155, "step": 1989 }, { "epoch": 0.5292553191489362, "grad_norm": 4.204024791717529, "learning_rate": 9.914960536027289e-06, "loss": 1.3081, "step": 1990 }, { "epoch": 0.5295212765957447, "grad_norm": 3.80530047416687, "learning_rate": 9.91479894078512e-06, "loss": 1.2827, "step": 1991 }, { "epoch": 0.5297872340425532, "grad_norm": 4.011538505554199, "learning_rate": 9.914637193473284e-06, "loss": 1.2801, "step": 1992 }, { "epoch": 0.5300531914893617, "grad_norm": 3.848898410797119, "learning_rate": 9.914475294096788e-06, "loss": 1.2904, "step": 1993 }, { "epoch": 0.5303191489361702, "grad_norm": 3.7076499462127686, "learning_rate": 9.91431324266064e-06, "loss": 1.3455, "step": 1994 }, { "epoch": 0.5305851063829787, "grad_norm": 4.372555255889893, "learning_rate": 9.914151039169855e-06, "loss": 1.3233, "step": 1995 }, { "epoch": 0.5308510638297872, "grad_norm": 4.168186664581299, "learning_rate": 9.913988683629449e-06, "loss": 1.3303, "step": 1996 }, { "epoch": 0.5311170212765958, "grad_norm": 3.4844412803649902, "learning_rate": 9.91382617604445e-06, "loss": 1.28, "step": 1997 }, { "epoch": 0.5313829787234042, "grad_norm": 3.981612205505371, "learning_rate": 9.913663516419883e-06, "loss": 1.4133, "step": 1998 }, { "epoch": 0.5316489361702128, "grad_norm": 3.6310243606567383, "learning_rate": 9.913500704760781e-06, "loss": 1.2546, "step": 1999 }, { "epoch": 0.5319148936170213, "grad_norm": 3.6045448780059814, "learning_rate": 9.913337741072183e-06, "loss": 1.1445, "step": 2000 }, { "epoch": 0.5319148936170213, "eval_loss": 1.2938566207885742, "eval_runtime": 12.2817, "eval_samples_per_second": 32.569, "eval_steps_per_second": 4.071, "step": 2000 }, { "epoch": 0.5321808510638298, "grad_norm": 4.040936470031738, "learning_rate": 9.913174625359132e-06, "loss": 1.2325, "step": 2001 }, { "epoch": 0.5324468085106383, "grad_norm": 3.7908430099487305, "learning_rate": 9.913011357626672e-06, "loss": 1.3091, "step": 2002 }, { "epoch": 0.5327127659574468, "grad_norm": 3.7691242694854736, "learning_rate": 9.912847937879855e-06, "loss": 1.2236, "step": 2003 }, { "epoch": 0.5329787234042553, "grad_norm": 4.643370628356934, "learning_rate": 9.91268436612374e-06, "loss": 1.3033, "step": 2004 }, { "epoch": 0.5332446808510638, "grad_norm": 3.5233020782470703, "learning_rate": 9.912520642363387e-06, "loss": 1.1542, "step": 2005 }, { "epoch": 0.5335106382978724, "grad_norm": 4.1154022216796875, "learning_rate": 9.912356766603862e-06, "loss": 1.4088, "step": 2006 }, { "epoch": 0.5337765957446808, "grad_norm": 5.4873247146606445, "learning_rate": 9.912192738850234e-06, "loss": 1.3057, "step": 2007 }, { "epoch": 0.5340425531914894, "grad_norm": 3.9308226108551025, "learning_rate": 9.912028559107577e-06, "loss": 1.2788, "step": 2008 }, { "epoch": 0.5343085106382979, "grad_norm": 3.6488893032073975, "learning_rate": 9.91186422738098e-06, "loss": 1.1555, "step": 2009 }, { "epoch": 0.5345744680851063, "grad_norm": 3.553065061569214, "learning_rate": 9.911699743675513e-06, "loss": 1.2228, "step": 2010 }, { "epoch": 0.5348404255319149, "grad_norm": 3.8336079120635986, "learning_rate": 9.911535107996278e-06, "loss": 1.2563, "step": 2011 }, { "epoch": 0.5351063829787234, "grad_norm": 4.1601715087890625, "learning_rate": 9.911370320348363e-06, "loss": 1.2525, "step": 2012 }, { "epoch": 0.535372340425532, "grad_norm": 3.4441726207733154, "learning_rate": 9.911205380736868e-06, "loss": 1.2293, "step": 2013 }, { "epoch": 0.5356382978723404, "grad_norm": 4.281271457672119, "learning_rate": 9.911040289166896e-06, "loss": 1.5168, "step": 2014 }, { "epoch": 0.535904255319149, "grad_norm": 3.982959508895874, "learning_rate": 9.910875045643555e-06, "loss": 1.2864, "step": 2015 }, { "epoch": 0.5361702127659574, "grad_norm": 3.9199705123901367, "learning_rate": 9.91070965017196e-06, "loss": 1.2906, "step": 2016 }, { "epoch": 0.5364361702127659, "grad_norm": 4.073878288269043, "learning_rate": 9.910544102757224e-06, "loss": 1.2435, "step": 2017 }, { "epoch": 0.5367021276595745, "grad_norm": 4.169588088989258, "learning_rate": 9.910378403404473e-06, "loss": 1.3231, "step": 2018 }, { "epoch": 0.5369680851063829, "grad_norm": 3.7797560691833496, "learning_rate": 9.910212552118835e-06, "loss": 1.2632, "step": 2019 }, { "epoch": 0.5372340425531915, "grad_norm": 4.002804756164551, "learning_rate": 9.910046548905437e-06, "loss": 1.3988, "step": 2020 }, { "epoch": 0.5375, "grad_norm": 3.8956003189086914, "learning_rate": 9.90988039376942e-06, "loss": 1.2534, "step": 2021 }, { "epoch": 0.5377659574468086, "grad_norm": 3.6937549114227295, "learning_rate": 9.90971408671592e-06, "loss": 1.2312, "step": 2022 }, { "epoch": 0.538031914893617, "grad_norm": 3.7216007709503174, "learning_rate": 9.909547627750089e-06, "loss": 1.2408, "step": 2023 }, { "epoch": 0.5382978723404256, "grad_norm": 3.827702760696411, "learning_rate": 9.909381016877074e-06, "loss": 1.2551, "step": 2024 }, { "epoch": 0.538563829787234, "grad_norm": 3.5307586193084717, "learning_rate": 9.909214254102027e-06, "loss": 1.2352, "step": 2025 }, { "epoch": 0.5388297872340425, "grad_norm": 3.7490625381469727, "learning_rate": 9.909047339430113e-06, "loss": 1.2867, "step": 2026 }, { "epoch": 0.5390957446808511, "grad_norm": 4.107030391693115, "learning_rate": 9.908880272866495e-06, "loss": 1.3459, "step": 2027 }, { "epoch": 0.5393617021276595, "grad_norm": 3.855973482131958, "learning_rate": 9.908713054416342e-06, "loss": 1.224, "step": 2028 }, { "epoch": 0.5396276595744681, "grad_norm": 4.167142391204834, "learning_rate": 9.908545684084826e-06, "loss": 1.4258, "step": 2029 }, { "epoch": 0.5398936170212766, "grad_norm": 3.899373769760132, "learning_rate": 9.90837816187713e-06, "loss": 1.2853, "step": 2030 }, { "epoch": 0.5401595744680852, "grad_norm": 3.8360328674316406, "learning_rate": 9.908210487798433e-06, "loss": 1.3503, "step": 2031 }, { "epoch": 0.5404255319148936, "grad_norm": 3.633971929550171, "learning_rate": 9.908042661853926e-06, "loss": 1.0622, "step": 2032 }, { "epoch": 0.5406914893617021, "grad_norm": 4.1685991287231445, "learning_rate": 9.9078746840488e-06, "loss": 1.3733, "step": 2033 }, { "epoch": 0.5409574468085107, "grad_norm": 3.9930756092071533, "learning_rate": 9.907706554388253e-06, "loss": 1.4306, "step": 2034 }, { "epoch": 0.5412234042553191, "grad_norm": 3.9129087924957275, "learning_rate": 9.907538272877487e-06, "loss": 1.1834, "step": 2035 }, { "epoch": 0.5414893617021277, "grad_norm": 3.658611536026001, "learning_rate": 9.90736983952171e-06, "loss": 1.1908, "step": 2036 }, { "epoch": 0.5417553191489362, "grad_norm": 3.9367542266845703, "learning_rate": 9.907201254326132e-06, "loss": 1.2853, "step": 2037 }, { "epoch": 0.5420212765957447, "grad_norm": 3.9035940170288086, "learning_rate": 9.907032517295966e-06, "loss": 1.2867, "step": 2038 }, { "epoch": 0.5422872340425532, "grad_norm": 3.702096939086914, "learning_rate": 9.906863628436441e-06, "loss": 1.2614, "step": 2039 }, { "epoch": 0.5425531914893617, "grad_norm": 4.073267459869385, "learning_rate": 9.906694587752777e-06, "loss": 1.3793, "step": 2040 }, { "epoch": 0.5428191489361702, "grad_norm": 3.864699363708496, "learning_rate": 9.906525395250206e-06, "loss": 1.1233, "step": 2041 }, { "epoch": 0.5430851063829787, "grad_norm": 3.8738772869110107, "learning_rate": 9.906356050933962e-06, "loss": 1.1704, "step": 2042 }, { "epoch": 0.5433510638297873, "grad_norm": 3.837299108505249, "learning_rate": 9.906186554809284e-06, "loss": 1.1802, "step": 2043 }, { "epoch": 0.5436170212765957, "grad_norm": 4.00624942779541, "learning_rate": 9.906016906881419e-06, "loss": 1.2934, "step": 2044 }, { "epoch": 0.5438829787234043, "grad_norm": 3.6519479751586914, "learning_rate": 9.905847107155615e-06, "loss": 1.2313, "step": 2045 }, { "epoch": 0.5441489361702128, "grad_norm": 4.127234935760498, "learning_rate": 9.905677155637126e-06, "loss": 1.476, "step": 2046 }, { "epoch": 0.5444148936170212, "grad_norm": 3.580862283706665, "learning_rate": 9.90550705233121e-06, "loss": 1.1991, "step": 2047 }, { "epoch": 0.5446808510638298, "grad_norm": 4.004328727722168, "learning_rate": 9.90533679724313e-06, "loss": 1.2811, "step": 2048 }, { "epoch": 0.5449468085106383, "grad_norm": 3.6748900413513184, "learning_rate": 9.905166390378154e-06, "loss": 1.3381, "step": 2049 }, { "epoch": 0.5452127659574468, "grad_norm": 3.5765295028686523, "learning_rate": 9.904995831741553e-06, "loss": 1.2265, "step": 2050 }, { "epoch": 0.5454787234042553, "grad_norm": 3.910905361175537, "learning_rate": 9.904825121338609e-06, "loss": 1.2516, "step": 2051 }, { "epoch": 0.5457446808510639, "grad_norm": 3.8337693214416504, "learning_rate": 9.9046542591746e-06, "loss": 1.2997, "step": 2052 }, { "epoch": 0.5460106382978723, "grad_norm": 3.837082862854004, "learning_rate": 9.904483245254812e-06, "loss": 1.3341, "step": 2053 }, { "epoch": 0.5462765957446809, "grad_norm": 4.098066806793213, "learning_rate": 9.90431207958454e-06, "loss": 1.2182, "step": 2054 }, { "epoch": 0.5465425531914894, "grad_norm": 4.022514343261719, "learning_rate": 9.904140762169079e-06, "loss": 1.4144, "step": 2055 }, { "epoch": 0.5468085106382978, "grad_norm": 3.779283046722412, "learning_rate": 9.903969293013727e-06, "loss": 1.2291, "step": 2056 }, { "epoch": 0.5470744680851064, "grad_norm": 4.28890323638916, "learning_rate": 9.903797672123791e-06, "loss": 1.3899, "step": 2057 }, { "epoch": 0.5473404255319149, "grad_norm": 3.720780372619629, "learning_rate": 9.903625899504583e-06, "loss": 1.1992, "step": 2058 }, { "epoch": 0.5476063829787234, "grad_norm": 3.80373215675354, "learning_rate": 9.903453975161416e-06, "loss": 1.322, "step": 2059 }, { "epoch": 0.5478723404255319, "grad_norm": 4.012282371520996, "learning_rate": 9.90328189909961e-06, "loss": 1.1998, "step": 2060 }, { "epoch": 0.5481382978723405, "grad_norm": 4.059588432312012, "learning_rate": 9.903109671324488e-06, "loss": 1.286, "step": 2061 }, { "epoch": 0.5484042553191489, "grad_norm": 3.9015207290649414, "learning_rate": 9.902937291841383e-06, "loss": 1.3525, "step": 2062 }, { "epoch": 0.5486702127659574, "grad_norm": 4.0359954833984375, "learning_rate": 9.902764760655623e-06, "loss": 1.3094, "step": 2063 }, { "epoch": 0.548936170212766, "grad_norm": 3.487372875213623, "learning_rate": 9.90259207777255e-06, "loss": 1.2127, "step": 2064 }, { "epoch": 0.5492021276595744, "grad_norm": 3.607064723968506, "learning_rate": 9.902419243197505e-06, "loss": 1.2091, "step": 2065 }, { "epoch": 0.549468085106383, "grad_norm": 3.9896395206451416, "learning_rate": 9.902246256935837e-06, "loss": 1.3059, "step": 2066 }, { "epoch": 0.5497340425531915, "grad_norm": 4.376030445098877, "learning_rate": 9.9020731189929e-06, "loss": 1.3092, "step": 2067 }, { "epoch": 0.55, "grad_norm": 3.3590362071990967, "learning_rate": 9.901899829374048e-06, "loss": 1.201, "step": 2068 }, { "epoch": 0.5502659574468085, "grad_norm": 3.7063753604888916, "learning_rate": 9.901726388084643e-06, "loss": 1.182, "step": 2069 }, { "epoch": 0.550531914893617, "grad_norm": 3.709569215774536, "learning_rate": 9.901552795130054e-06, "loss": 1.1766, "step": 2070 }, { "epoch": 0.5507978723404255, "grad_norm": 4.3449249267578125, "learning_rate": 9.90137905051565e-06, "loss": 1.3167, "step": 2071 }, { "epoch": 0.551063829787234, "grad_norm": 3.8162055015563965, "learning_rate": 9.901205154246807e-06, "loss": 1.2192, "step": 2072 }, { "epoch": 0.5513297872340426, "grad_norm": 3.792880058288574, "learning_rate": 9.901031106328907e-06, "loss": 1.2957, "step": 2073 }, { "epoch": 0.551595744680851, "grad_norm": 3.6657822132110596, "learning_rate": 9.900856906767334e-06, "loss": 1.3045, "step": 2074 }, { "epoch": 0.5518617021276596, "grad_norm": 3.327601194381714, "learning_rate": 9.900682555567478e-06, "loss": 1.1348, "step": 2075 }, { "epoch": 0.5521276595744681, "grad_norm": 3.9993128776550293, "learning_rate": 9.900508052734734e-06, "loss": 1.2678, "step": 2076 }, { "epoch": 0.5523936170212767, "grad_norm": 3.922495126724243, "learning_rate": 9.900333398274501e-06, "loss": 1.1644, "step": 2077 }, { "epoch": 0.5526595744680851, "grad_norm": 3.6909377574920654, "learning_rate": 9.900158592192184e-06, "loss": 1.208, "step": 2078 }, { "epoch": 0.5529255319148936, "grad_norm": 4.378490924835205, "learning_rate": 9.89998363449319e-06, "loss": 1.2866, "step": 2079 }, { "epoch": 0.5531914893617021, "grad_norm": 3.6202850341796875, "learning_rate": 9.899808525182935e-06, "loss": 1.238, "step": 2080 }, { "epoch": 0.5534574468085106, "grad_norm": 3.9422550201416016, "learning_rate": 9.899633264266835e-06, "loss": 1.2932, "step": 2081 }, { "epoch": 0.5537234042553192, "grad_norm": 4.002807140350342, "learning_rate": 9.899457851750312e-06, "loss": 1.301, "step": 2082 }, { "epoch": 0.5539893617021276, "grad_norm": 4.242476940155029, "learning_rate": 9.899282287638795e-06, "loss": 1.2967, "step": 2083 }, { "epoch": 0.5542553191489362, "grad_norm": 4.148952007293701, "learning_rate": 9.899106571937716e-06, "loss": 1.2863, "step": 2084 }, { "epoch": 0.5545212765957447, "grad_norm": 3.8258893489837646, "learning_rate": 9.898930704652512e-06, "loss": 1.2253, "step": 2085 }, { "epoch": 0.5547872340425531, "grad_norm": 4.117706298828125, "learning_rate": 9.898754685788623e-06, "loss": 1.3706, "step": 2086 }, { "epoch": 0.5550531914893617, "grad_norm": 3.989381790161133, "learning_rate": 9.898578515351498e-06, "loss": 1.2585, "step": 2087 }, { "epoch": 0.5553191489361702, "grad_norm": 3.8721275329589844, "learning_rate": 9.898402193346585e-06, "loss": 1.1284, "step": 2088 }, { "epoch": 0.5555851063829788, "grad_norm": 4.169785499572754, "learning_rate": 9.898225719779342e-06, "loss": 1.2176, "step": 2089 }, { "epoch": 0.5558510638297872, "grad_norm": 3.8007307052612305, "learning_rate": 9.898049094655229e-06, "loss": 1.1421, "step": 2090 }, { "epoch": 0.5561170212765958, "grad_norm": 3.48579740524292, "learning_rate": 9.897872317979708e-06, "loss": 1.1123, "step": 2091 }, { "epoch": 0.5563829787234043, "grad_norm": 3.6224656105041504, "learning_rate": 9.897695389758253e-06, "loss": 1.2452, "step": 2092 }, { "epoch": 0.5566489361702127, "grad_norm": 4.0066752433776855, "learning_rate": 9.897518309996336e-06, "loss": 1.3127, "step": 2093 }, { "epoch": 0.5569148936170213, "grad_norm": 3.5834217071533203, "learning_rate": 9.897341078699437e-06, "loss": 1.1945, "step": 2094 }, { "epoch": 0.5571808510638298, "grad_norm": 3.616166830062866, "learning_rate": 9.897163695873036e-06, "loss": 1.2113, "step": 2095 }, { "epoch": 0.5574468085106383, "grad_norm": 4.5236945152282715, "learning_rate": 9.896986161522627e-06, "loss": 1.556, "step": 2096 }, { "epoch": 0.5577127659574468, "grad_norm": 4.006591320037842, "learning_rate": 9.896808475653701e-06, "loss": 1.3505, "step": 2097 }, { "epoch": 0.5579787234042554, "grad_norm": 4.137003421783447, "learning_rate": 9.896630638271755e-06, "loss": 1.2105, "step": 2098 }, { "epoch": 0.5582446808510638, "grad_norm": 4.136394500732422, "learning_rate": 9.896452649382291e-06, "loss": 1.4277, "step": 2099 }, { "epoch": 0.5585106382978723, "grad_norm": 3.8342485427856445, "learning_rate": 9.896274508990818e-06, "loss": 1.2839, "step": 2100 }, { "epoch": 0.5587765957446809, "grad_norm": 3.687845230102539, "learning_rate": 9.896096217102848e-06, "loss": 1.1659, "step": 2101 }, { "epoch": 0.5590425531914893, "grad_norm": 3.971306562423706, "learning_rate": 9.895917773723895e-06, "loss": 1.4681, "step": 2102 }, { "epoch": 0.5593085106382979, "grad_norm": 3.5636236667633057, "learning_rate": 9.895739178859483e-06, "loss": 1.2463, "step": 2103 }, { "epoch": 0.5595744680851064, "grad_norm": 4.580478191375732, "learning_rate": 9.895560432515136e-06, "loss": 1.488, "step": 2104 }, { "epoch": 0.5598404255319149, "grad_norm": 3.5549540519714355, "learning_rate": 9.895381534696385e-06, "loss": 1.1869, "step": 2105 }, { "epoch": 0.5601063829787234, "grad_norm": 3.6891443729400635, "learning_rate": 9.895202485408766e-06, "loss": 1.2356, "step": 2106 }, { "epoch": 0.560372340425532, "grad_norm": 4.139247894287109, "learning_rate": 9.895023284657821e-06, "loss": 1.2941, "step": 2107 }, { "epoch": 0.5606382978723404, "grad_norm": 3.616758346557617, "learning_rate": 9.89484393244909e-06, "loss": 1.2292, "step": 2108 }, { "epoch": 0.5609042553191489, "grad_norm": 3.634755849838257, "learning_rate": 9.894664428788126e-06, "loss": 1.2215, "step": 2109 }, { "epoch": 0.5611702127659575, "grad_norm": 3.9066550731658936, "learning_rate": 9.89448477368048e-06, "loss": 1.3777, "step": 2110 }, { "epoch": 0.5614361702127659, "grad_norm": 3.8861474990844727, "learning_rate": 9.894304967131713e-06, "loss": 1.2666, "step": 2111 }, { "epoch": 0.5617021276595745, "grad_norm": 3.3856041431427, "learning_rate": 9.894125009147389e-06, "loss": 1.3001, "step": 2112 }, { "epoch": 0.561968085106383, "grad_norm": 3.5979838371276855, "learning_rate": 9.893944899733076e-06, "loss": 1.2005, "step": 2113 }, { "epoch": 0.5622340425531915, "grad_norm": 3.851020336151123, "learning_rate": 9.893764638894345e-06, "loss": 1.3479, "step": 2114 }, { "epoch": 0.5625, "grad_norm": 4.208298206329346, "learning_rate": 9.893584226636773e-06, "loss": 1.3329, "step": 2115 }, { "epoch": 0.5627659574468085, "grad_norm": 3.6734988689422607, "learning_rate": 9.893403662965944e-06, "loss": 1.3678, "step": 2116 }, { "epoch": 0.563031914893617, "grad_norm": 3.708069324493408, "learning_rate": 9.893222947887446e-06, "loss": 1.3176, "step": 2117 }, { "epoch": 0.5632978723404255, "grad_norm": 4.194994926452637, "learning_rate": 9.893042081406868e-06, "loss": 1.381, "step": 2118 }, { "epoch": 0.5635638297872341, "grad_norm": 3.740922689437866, "learning_rate": 9.892861063529807e-06, "loss": 1.1555, "step": 2119 }, { "epoch": 0.5638297872340425, "grad_norm": 3.744663715362549, "learning_rate": 9.892679894261865e-06, "loss": 1.132, "step": 2120 }, { "epoch": 0.5640957446808511, "grad_norm": 4.050332546234131, "learning_rate": 9.892498573608645e-06, "loss": 1.3709, "step": 2121 }, { "epoch": 0.5643617021276596, "grad_norm": 3.9612951278686523, "learning_rate": 9.89231710157576e-06, "loss": 1.2954, "step": 2122 }, { "epoch": 0.564627659574468, "grad_norm": 3.165841817855835, "learning_rate": 9.892135478168824e-06, "loss": 1.1757, "step": 2123 }, { "epoch": 0.5648936170212766, "grad_norm": 3.6281683444976807, "learning_rate": 9.891953703393455e-06, "loss": 1.0733, "step": 2124 }, { "epoch": 0.5651595744680851, "grad_norm": 3.7431442737579346, "learning_rate": 9.89177177725528e-06, "loss": 1.3628, "step": 2125 }, { "epoch": 0.5654255319148936, "grad_norm": 3.704817295074463, "learning_rate": 9.891589699759929e-06, "loss": 1.284, "step": 2126 }, { "epoch": 0.5656914893617021, "grad_norm": 3.5511844158172607, "learning_rate": 9.89140747091303e-06, "loss": 1.1152, "step": 2127 }, { "epoch": 0.5659574468085107, "grad_norm": 3.450695753097534, "learning_rate": 9.891225090720227e-06, "loss": 1.2245, "step": 2128 }, { "epoch": 0.5662234042553191, "grad_norm": 3.8009350299835205, "learning_rate": 9.891042559187161e-06, "loss": 1.319, "step": 2129 }, { "epoch": 0.5664893617021277, "grad_norm": 4.276994228363037, "learning_rate": 9.890859876319479e-06, "loss": 1.3191, "step": 2130 }, { "epoch": 0.5667553191489362, "grad_norm": 4.0986738204956055, "learning_rate": 9.890677042122834e-06, "loss": 1.2553, "step": 2131 }, { "epoch": 0.5670212765957446, "grad_norm": 3.861093044281006, "learning_rate": 9.890494056602883e-06, "loss": 1.1618, "step": 2132 }, { "epoch": 0.5672872340425532, "grad_norm": 3.8807971477508545, "learning_rate": 9.89031091976529e-06, "loss": 1.3676, "step": 2133 }, { "epoch": 0.5675531914893617, "grad_norm": 3.5750906467437744, "learning_rate": 9.890127631615719e-06, "loss": 1.3009, "step": 2134 }, { "epoch": 0.5678191489361702, "grad_norm": 3.740861654281616, "learning_rate": 9.88994419215984e-06, "loss": 1.3059, "step": 2135 }, { "epoch": 0.5680851063829787, "grad_norm": 3.945333480834961, "learning_rate": 9.88976060140333e-06, "loss": 1.3027, "step": 2136 }, { "epoch": 0.5683510638297873, "grad_norm": 3.9484307765960693, "learning_rate": 9.889576859351873e-06, "loss": 1.4177, "step": 2137 }, { "epoch": 0.5686170212765957, "grad_norm": 3.9661643505096436, "learning_rate": 9.88939296601115e-06, "loss": 1.3607, "step": 2138 }, { "epoch": 0.5688829787234042, "grad_norm": 3.4872074127197266, "learning_rate": 9.88920892138685e-06, "loss": 1.1658, "step": 2139 }, { "epoch": 0.5691489361702128, "grad_norm": 3.545102119445801, "learning_rate": 9.889024725484672e-06, "loss": 1.1813, "step": 2140 }, { "epoch": 0.5694148936170212, "grad_norm": 3.738452434539795, "learning_rate": 9.888840378310312e-06, "loss": 1.2977, "step": 2141 }, { "epoch": 0.5696808510638298, "grad_norm": 3.6037521362304688, "learning_rate": 9.888655879869475e-06, "loss": 1.2053, "step": 2142 }, { "epoch": 0.5699468085106383, "grad_norm": 4.002810955047607, "learning_rate": 9.888471230167869e-06, "loss": 1.1678, "step": 2143 }, { "epoch": 0.5702127659574469, "grad_norm": 3.659442186355591, "learning_rate": 9.88828642921121e-06, "loss": 1.3656, "step": 2144 }, { "epoch": 0.5704787234042553, "grad_norm": 3.817089557647705, "learning_rate": 9.88810147700521e-06, "loss": 1.3597, "step": 2145 }, { "epoch": 0.5707446808510638, "grad_norm": 3.5655431747436523, "learning_rate": 9.887916373555597e-06, "loss": 1.2276, "step": 2146 }, { "epoch": 0.5710106382978724, "grad_norm": 3.873889923095703, "learning_rate": 9.887731118868098e-06, "loss": 1.3873, "step": 2147 }, { "epoch": 0.5712765957446808, "grad_norm": 4.273273468017578, "learning_rate": 9.887545712948441e-06, "loss": 1.366, "step": 2148 }, { "epoch": 0.5715425531914894, "grad_norm": 3.5899455547332764, "learning_rate": 9.887360155802366e-06, "loss": 1.1787, "step": 2149 }, { "epoch": 0.5718085106382979, "grad_norm": 3.615471124649048, "learning_rate": 9.887174447435615e-06, "loss": 1.1561, "step": 2150 }, { "epoch": 0.5720744680851064, "grad_norm": 3.8445990085601807, "learning_rate": 9.886988587853933e-06, "loss": 1.315, "step": 2151 }, { "epoch": 0.5723404255319149, "grad_norm": 3.989668846130371, "learning_rate": 9.886802577063068e-06, "loss": 1.3116, "step": 2152 }, { "epoch": 0.5726063829787233, "grad_norm": 4.619128227233887, "learning_rate": 9.886616415068779e-06, "loss": 1.3862, "step": 2153 }, { "epoch": 0.5728723404255319, "grad_norm": 3.6989963054656982, "learning_rate": 9.886430101876825e-06, "loss": 1.2221, "step": 2154 }, { "epoch": 0.5731382978723404, "grad_norm": 4.153132915496826, "learning_rate": 9.886243637492969e-06, "loss": 1.2128, "step": 2155 }, { "epoch": 0.573404255319149, "grad_norm": 3.970520257949829, "learning_rate": 9.886057021922984e-06, "loss": 1.2802, "step": 2156 }, { "epoch": 0.5736702127659574, "grad_norm": 3.751838207244873, "learning_rate": 9.885870255172642e-06, "loss": 1.1967, "step": 2157 }, { "epoch": 0.573936170212766, "grad_norm": 3.6611552238464355, "learning_rate": 9.88568333724772e-06, "loss": 1.2956, "step": 2158 }, { "epoch": 0.5742021276595745, "grad_norm": 4.170332908630371, "learning_rate": 9.885496268154005e-06, "loss": 1.2867, "step": 2159 }, { "epoch": 0.574468085106383, "grad_norm": 3.5777552127838135, "learning_rate": 9.885309047897285e-06, "loss": 1.1703, "step": 2160 }, { "epoch": 0.5747340425531915, "grad_norm": 3.9369912147521973, "learning_rate": 9.88512167648335e-06, "loss": 1.3682, "step": 2161 }, { "epoch": 0.575, "grad_norm": 4.30880069732666, "learning_rate": 9.884934153917998e-06, "loss": 1.2892, "step": 2162 }, { "epoch": 0.5752659574468085, "grad_norm": 4.251465797424316, "learning_rate": 9.884746480207031e-06, "loss": 1.3043, "step": 2163 }, { "epoch": 0.575531914893617, "grad_norm": 3.4858951568603516, "learning_rate": 9.88455865535626e-06, "loss": 1.3418, "step": 2164 }, { "epoch": 0.5757978723404256, "grad_norm": 3.715372085571289, "learning_rate": 9.88437067937149e-06, "loss": 1.274, "step": 2165 }, { "epoch": 0.576063829787234, "grad_norm": 3.5083811283111572, "learning_rate": 9.884182552258543e-06, "loss": 1.1127, "step": 2166 }, { "epoch": 0.5763297872340426, "grad_norm": 4.5049004554748535, "learning_rate": 9.883994274023237e-06, "loss": 1.3182, "step": 2167 }, { "epoch": 0.5765957446808511, "grad_norm": 4.002771377563477, "learning_rate": 9.883805844671396e-06, "loss": 1.4289, "step": 2168 }, { "epoch": 0.5768617021276595, "grad_norm": 3.691743850708008, "learning_rate": 9.883617264208854e-06, "loss": 1.3677, "step": 2169 }, { "epoch": 0.5771276595744681, "grad_norm": 4.031147003173828, "learning_rate": 9.883428532641445e-06, "loss": 1.1805, "step": 2170 }, { "epoch": 0.5773936170212766, "grad_norm": 4.453026294708252, "learning_rate": 9.883239649975007e-06, "loss": 1.4034, "step": 2171 }, { "epoch": 0.5776595744680851, "grad_norm": 3.6685361862182617, "learning_rate": 9.883050616215383e-06, "loss": 1.3169, "step": 2172 }, { "epoch": 0.5779255319148936, "grad_norm": 3.6789016723632812, "learning_rate": 9.882861431368425e-06, "loss": 1.3912, "step": 2173 }, { "epoch": 0.5781914893617022, "grad_norm": 3.6971778869628906, "learning_rate": 9.882672095439987e-06, "loss": 1.1346, "step": 2174 }, { "epoch": 0.5784574468085106, "grad_norm": 3.8128819465637207, "learning_rate": 9.882482608435924e-06, "loss": 1.3105, "step": 2175 }, { "epoch": 0.5787234042553191, "grad_norm": 4.369806289672852, "learning_rate": 9.882292970362101e-06, "loss": 1.3673, "step": 2176 }, { "epoch": 0.5789893617021277, "grad_norm": 3.403639316558838, "learning_rate": 9.882103181224386e-06, "loss": 1.2435, "step": 2177 }, { "epoch": 0.5792553191489361, "grad_norm": 3.7755768299102783, "learning_rate": 9.88191324102865e-06, "loss": 1.3237, "step": 2178 }, { "epoch": 0.5795212765957447, "grad_norm": 3.4330899715423584, "learning_rate": 9.88172314978077e-06, "loss": 1.249, "step": 2179 }, { "epoch": 0.5797872340425532, "grad_norm": 3.9291467666625977, "learning_rate": 9.88153290748663e-06, "loss": 1.4475, "step": 2180 }, { "epoch": 0.5800531914893617, "grad_norm": 3.731370210647583, "learning_rate": 9.881342514152114e-06, "loss": 1.2166, "step": 2181 }, { "epoch": 0.5803191489361702, "grad_norm": 3.7620556354522705, "learning_rate": 9.881151969783113e-06, "loss": 1.2329, "step": 2182 }, { "epoch": 0.5805851063829788, "grad_norm": 3.822985887527466, "learning_rate": 9.880961274385523e-06, "loss": 1.2219, "step": 2183 }, { "epoch": 0.5808510638297872, "grad_norm": 3.2141547203063965, "learning_rate": 9.880770427965245e-06, "loss": 1.0712, "step": 2184 }, { "epoch": 0.5811170212765957, "grad_norm": 3.733004331588745, "learning_rate": 9.880579430528183e-06, "loss": 1.203, "step": 2185 }, { "epoch": 0.5813829787234043, "grad_norm": 3.6706783771514893, "learning_rate": 9.880388282080247e-06, "loss": 1.1757, "step": 2186 }, { "epoch": 0.5816489361702127, "grad_norm": 3.7189342975616455, "learning_rate": 9.880196982627352e-06, "loss": 1.2265, "step": 2187 }, { "epoch": 0.5819148936170213, "grad_norm": 3.8598103523254395, "learning_rate": 9.88000553217542e-06, "loss": 1.2892, "step": 2188 }, { "epoch": 0.5821808510638298, "grad_norm": 3.854811191558838, "learning_rate": 9.879813930730367e-06, "loss": 1.1292, "step": 2189 }, { "epoch": 0.5824468085106383, "grad_norm": 4.142318248748779, "learning_rate": 9.879622178298128e-06, "loss": 1.1795, "step": 2190 }, { "epoch": 0.5827127659574468, "grad_norm": 3.688462257385254, "learning_rate": 9.879430274884632e-06, "loss": 1.2044, "step": 2191 }, { "epoch": 0.5829787234042553, "grad_norm": 3.4742586612701416, "learning_rate": 9.879238220495818e-06, "loss": 1.1547, "step": 2192 }, { "epoch": 0.5832446808510638, "grad_norm": 3.9008736610412598, "learning_rate": 9.87904601513763e-06, "loss": 1.2293, "step": 2193 }, { "epoch": 0.5835106382978723, "grad_norm": 3.70694899559021, "learning_rate": 9.878853658816015e-06, "loss": 1.2758, "step": 2194 }, { "epoch": 0.5837765957446809, "grad_norm": 4.015002727508545, "learning_rate": 9.878661151536923e-06, "loss": 1.3352, "step": 2195 }, { "epoch": 0.5840425531914893, "grad_norm": 3.423016309738159, "learning_rate": 9.87846849330631e-06, "loss": 1.1313, "step": 2196 }, { "epoch": 0.5843085106382979, "grad_norm": 3.549492120742798, "learning_rate": 9.87827568413014e-06, "loss": 1.3162, "step": 2197 }, { "epoch": 0.5845744680851064, "grad_norm": 4.05422306060791, "learning_rate": 9.878082724014375e-06, "loss": 1.2593, "step": 2198 }, { "epoch": 0.5848404255319148, "grad_norm": 3.875730514526367, "learning_rate": 9.877889612964988e-06, "loss": 1.1837, "step": 2199 }, { "epoch": 0.5851063829787234, "grad_norm": 3.4176459312438965, "learning_rate": 9.877696350987954e-06, "loss": 1.1748, "step": 2200 }, { "epoch": 0.5853723404255319, "grad_norm": 4.281347751617432, "learning_rate": 9.87750293808925e-06, "loss": 1.272, "step": 2201 }, { "epoch": 0.5856382978723405, "grad_norm": 4.0162577629089355, "learning_rate": 9.877309374274865e-06, "loss": 1.2567, "step": 2202 }, { "epoch": 0.5859042553191489, "grad_norm": 4.051181793212891, "learning_rate": 9.877115659550785e-06, "loss": 1.2305, "step": 2203 }, { "epoch": 0.5861702127659575, "grad_norm": 3.711719512939453, "learning_rate": 9.876921793923005e-06, "loss": 1.1956, "step": 2204 }, { "epoch": 0.586436170212766, "grad_norm": 3.402353048324585, "learning_rate": 9.876727777397522e-06, "loss": 1.1938, "step": 2205 }, { "epoch": 0.5867021276595744, "grad_norm": 3.7966136932373047, "learning_rate": 9.87653360998034e-06, "loss": 1.2964, "step": 2206 }, { "epoch": 0.586968085106383, "grad_norm": 3.816732406616211, "learning_rate": 9.876339291677466e-06, "loss": 1.2739, "step": 2207 }, { "epoch": 0.5872340425531914, "grad_norm": 3.801443576812744, "learning_rate": 9.876144822494913e-06, "loss": 1.2832, "step": 2208 }, { "epoch": 0.5875, "grad_norm": 3.7559401988983154, "learning_rate": 9.8759502024387e-06, "loss": 1.2176, "step": 2209 }, { "epoch": 0.5877659574468085, "grad_norm": 3.9138758182525635, "learning_rate": 9.875755431514846e-06, "loss": 1.3423, "step": 2210 }, { "epoch": 0.5880319148936171, "grad_norm": 4.0434041023254395, "learning_rate": 9.875560509729379e-06, "loss": 1.3064, "step": 2211 }, { "epoch": 0.5882978723404255, "grad_norm": 3.7799887657165527, "learning_rate": 9.87536543708833e-06, "loss": 1.2518, "step": 2212 }, { "epoch": 0.5885638297872341, "grad_norm": 3.8034684658050537, "learning_rate": 9.875170213597731e-06, "loss": 1.2485, "step": 2213 }, { "epoch": 0.5888297872340426, "grad_norm": 4.390495300292969, "learning_rate": 9.874974839263629e-06, "loss": 1.263, "step": 2214 }, { "epoch": 0.589095744680851, "grad_norm": 4.027488708496094, "learning_rate": 9.874779314092065e-06, "loss": 1.2718, "step": 2215 }, { "epoch": 0.5893617021276596, "grad_norm": 3.8035428524017334, "learning_rate": 9.87458363808909e-06, "loss": 1.2636, "step": 2216 }, { "epoch": 0.589627659574468, "grad_norm": 3.5652413368225098, "learning_rate": 9.874387811260756e-06, "loss": 1.241, "step": 2217 }, { "epoch": 0.5898936170212766, "grad_norm": 4.2285614013671875, "learning_rate": 9.874191833613128e-06, "loss": 1.1943, "step": 2218 }, { "epoch": 0.5901595744680851, "grad_norm": 4.229702472686768, "learning_rate": 9.873995705152264e-06, "loss": 1.382, "step": 2219 }, { "epoch": 0.5904255319148937, "grad_norm": 4.092412948608398, "learning_rate": 9.873799425884235e-06, "loss": 1.132, "step": 2220 }, { "epoch": 0.5906914893617021, "grad_norm": 3.6512703895568848, "learning_rate": 9.873602995815113e-06, "loss": 1.2022, "step": 2221 }, { "epoch": 0.5909574468085106, "grad_norm": 3.634768009185791, "learning_rate": 9.873406414950977e-06, "loss": 1.2932, "step": 2222 }, { "epoch": 0.5912234042553192, "grad_norm": 3.6227974891662598, "learning_rate": 9.873209683297908e-06, "loss": 1.2947, "step": 2223 }, { "epoch": 0.5914893617021276, "grad_norm": 3.5124943256378174, "learning_rate": 9.873012800861996e-06, "loss": 1.1896, "step": 2224 }, { "epoch": 0.5917553191489362, "grad_norm": 3.759474992752075, "learning_rate": 9.872815767649329e-06, "loss": 1.2116, "step": 2225 }, { "epoch": 0.5920212765957447, "grad_norm": 3.7036375999450684, "learning_rate": 9.872618583666005e-06, "loss": 1.2293, "step": 2226 }, { "epoch": 0.5922872340425532, "grad_norm": 3.61789608001709, "learning_rate": 9.872421248918124e-06, "loss": 1.2121, "step": 2227 }, { "epoch": 0.5925531914893617, "grad_norm": 4.019472122192383, "learning_rate": 9.872223763411794e-06, "loss": 1.1467, "step": 2228 }, { "epoch": 0.5928191489361702, "grad_norm": 3.774531364440918, "learning_rate": 9.872026127153126e-06, "loss": 1.3685, "step": 2229 }, { "epoch": 0.5930851063829787, "grad_norm": 3.9165661334991455, "learning_rate": 9.871828340148232e-06, "loss": 1.1668, "step": 2230 }, { "epoch": 0.5933510638297872, "grad_norm": 3.762282133102417, "learning_rate": 9.871630402403235e-06, "loss": 1.2315, "step": 2231 }, { "epoch": 0.5936170212765958, "grad_norm": 3.96540904045105, "learning_rate": 9.871432313924255e-06, "loss": 1.3042, "step": 2232 }, { "epoch": 0.5938829787234042, "grad_norm": 4.1440229415893555, "learning_rate": 9.871234074717424e-06, "loss": 1.3715, "step": 2233 }, { "epoch": 0.5941489361702128, "grad_norm": 3.7638661861419678, "learning_rate": 9.871035684788878e-06, "loss": 1.2619, "step": 2234 }, { "epoch": 0.5944148936170213, "grad_norm": 3.5591323375701904, "learning_rate": 9.870837144144752e-06, "loss": 1.1941, "step": 2235 }, { "epoch": 0.5946808510638298, "grad_norm": 4.143522262573242, "learning_rate": 9.87063845279119e-06, "loss": 1.1687, "step": 2236 }, { "epoch": 0.5949468085106383, "grad_norm": 4.148569583892822, "learning_rate": 9.87043961073434e-06, "loss": 1.4218, "step": 2237 }, { "epoch": 0.5952127659574468, "grad_norm": 3.687147378921509, "learning_rate": 9.870240617980353e-06, "loss": 1.1311, "step": 2238 }, { "epoch": 0.5954787234042553, "grad_norm": 3.5179238319396973, "learning_rate": 9.870041474535388e-06, "loss": 1.1823, "step": 2239 }, { "epoch": 0.5957446808510638, "grad_norm": 3.844238519668579, "learning_rate": 9.869842180405607e-06, "loss": 1.3256, "step": 2240 }, { "epoch": 0.5960106382978724, "grad_norm": 3.9333431720733643, "learning_rate": 9.869642735597174e-06, "loss": 1.3545, "step": 2241 }, { "epoch": 0.5962765957446808, "grad_norm": 3.531179666519165, "learning_rate": 9.869443140116261e-06, "loss": 1.3254, "step": 2242 }, { "epoch": 0.5965425531914894, "grad_norm": 3.795381546020508, "learning_rate": 9.869243393969045e-06, "loss": 1.2744, "step": 2243 }, { "epoch": 0.5968085106382979, "grad_norm": 4.001238822937012, "learning_rate": 9.869043497161707e-06, "loss": 1.3585, "step": 2244 }, { "epoch": 0.5970744680851063, "grad_norm": 4.289900302886963, "learning_rate": 9.868843449700429e-06, "loss": 1.3628, "step": 2245 }, { "epoch": 0.5973404255319149, "grad_norm": 3.581144332885742, "learning_rate": 9.868643251591403e-06, "loss": 1.3021, "step": 2246 }, { "epoch": 0.5976063829787234, "grad_norm": 3.504152536392212, "learning_rate": 9.868442902840823e-06, "loss": 1.2073, "step": 2247 }, { "epoch": 0.597872340425532, "grad_norm": 3.648141622543335, "learning_rate": 9.868242403454886e-06, "loss": 1.3169, "step": 2248 }, { "epoch": 0.5981382978723404, "grad_norm": 3.544408082962036, "learning_rate": 9.8680417534398e-06, "loss": 1.1334, "step": 2249 }, { "epoch": 0.598404255319149, "grad_norm": 3.6868479251861572, "learning_rate": 9.867840952801768e-06, "loss": 1.209, "step": 2250 }, { "epoch": 0.5986702127659574, "grad_norm": 3.6805198192596436, "learning_rate": 9.867640001547007e-06, "loss": 1.3011, "step": 2251 }, { "epoch": 0.5989361702127659, "grad_norm": 3.646977186203003, "learning_rate": 9.867438899681734e-06, "loss": 1.2178, "step": 2252 }, { "epoch": 0.5992021276595745, "grad_norm": 3.4612386226654053, "learning_rate": 9.867237647212168e-06, "loss": 1.1646, "step": 2253 }, { "epoch": 0.5994680851063829, "grad_norm": 3.663968324661255, "learning_rate": 9.867036244144544e-06, "loss": 1.2337, "step": 2254 }, { "epoch": 0.5997340425531915, "grad_norm": 3.724919080734253, "learning_rate": 9.866834690485083e-06, "loss": 1.3467, "step": 2255 }, { "epoch": 0.6, "grad_norm": 3.6140668392181396, "learning_rate": 9.86663298624003e-06, "loss": 1.2684, "step": 2256 }, { "epoch": 0.6002659574468086, "grad_norm": 3.805572271347046, "learning_rate": 9.866431131415621e-06, "loss": 1.3172, "step": 2257 }, { "epoch": 0.600531914893617, "grad_norm": 3.921037435531616, "learning_rate": 9.866229126018104e-06, "loss": 1.1632, "step": 2258 }, { "epoch": 0.6007978723404256, "grad_norm": 4.814824104309082, "learning_rate": 9.866026970053728e-06, "loss": 1.371, "step": 2259 }, { "epoch": 0.601063829787234, "grad_norm": 3.8934485912323, "learning_rate": 9.86582466352875e-06, "loss": 1.2192, "step": 2260 }, { "epoch": 0.6013297872340425, "grad_norm": 4.167794704437256, "learning_rate": 9.865622206449428e-06, "loss": 1.3167, "step": 2261 }, { "epoch": 0.6015957446808511, "grad_norm": 3.916013479232788, "learning_rate": 9.865419598822025e-06, "loss": 1.2492, "step": 2262 }, { "epoch": 0.6018617021276595, "grad_norm": 3.5649423599243164, "learning_rate": 9.865216840652811e-06, "loss": 1.1833, "step": 2263 }, { "epoch": 0.6021276595744681, "grad_norm": 3.508890151977539, "learning_rate": 9.865013931948061e-06, "loss": 1.2527, "step": 2264 }, { "epoch": 0.6023936170212766, "grad_norm": 3.513054132461548, "learning_rate": 9.864810872714053e-06, "loss": 1.2032, "step": 2265 }, { "epoch": 0.6026595744680852, "grad_norm": 3.777679443359375, "learning_rate": 9.864607662957066e-06, "loss": 1.3355, "step": 2266 }, { "epoch": 0.6029255319148936, "grad_norm": 3.778639316558838, "learning_rate": 9.864404302683393e-06, "loss": 1.3697, "step": 2267 }, { "epoch": 0.6031914893617021, "grad_norm": 3.5880136489868164, "learning_rate": 9.864200791899323e-06, "loss": 1.2124, "step": 2268 }, { "epoch": 0.6034574468085107, "grad_norm": 3.5101895332336426, "learning_rate": 9.863997130611153e-06, "loss": 1.1641, "step": 2269 }, { "epoch": 0.6037234042553191, "grad_norm": 3.5391786098480225, "learning_rate": 9.863793318825186e-06, "loss": 1.2167, "step": 2270 }, { "epoch": 0.6039893617021277, "grad_norm": 3.74766206741333, "learning_rate": 9.863589356547728e-06, "loss": 1.3565, "step": 2271 }, { "epoch": 0.6042553191489362, "grad_norm": 3.966728925704956, "learning_rate": 9.863385243785088e-06, "loss": 1.3416, "step": 2272 }, { "epoch": 0.6045212765957447, "grad_norm": 3.2839200496673584, "learning_rate": 9.863180980543582e-06, "loss": 1.1073, "step": 2273 }, { "epoch": 0.6047872340425532, "grad_norm": 3.958099603652954, "learning_rate": 9.862976566829532e-06, "loss": 1.356, "step": 2274 }, { "epoch": 0.6050531914893617, "grad_norm": 3.6041507720947266, "learning_rate": 9.862772002649261e-06, "loss": 1.4091, "step": 2275 }, { "epoch": 0.6053191489361702, "grad_norm": 3.320826530456543, "learning_rate": 9.862567288009099e-06, "loss": 1.196, "step": 2276 }, { "epoch": 0.6055851063829787, "grad_norm": 3.375542163848877, "learning_rate": 9.862362422915382e-06, "loss": 1.161, "step": 2277 }, { "epoch": 0.6058510638297873, "grad_norm": 3.680457353591919, "learning_rate": 9.862157407374446e-06, "loss": 1.129, "step": 2278 }, { "epoch": 0.6061170212765957, "grad_norm": 3.8363595008850098, "learning_rate": 9.861952241392633e-06, "loss": 1.309, "step": 2279 }, { "epoch": 0.6063829787234043, "grad_norm": 3.7582051753997803, "learning_rate": 9.861746924976297e-06, "loss": 1.2328, "step": 2280 }, { "epoch": 0.6066489361702128, "grad_norm": 3.5171892642974854, "learning_rate": 9.861541458131785e-06, "loss": 1.2098, "step": 2281 }, { "epoch": 0.6069148936170212, "grad_norm": 3.905834197998047, "learning_rate": 9.861335840865455e-06, "loss": 1.2909, "step": 2282 }, { "epoch": 0.6071808510638298, "grad_norm": 3.9347522258758545, "learning_rate": 9.861130073183674e-06, "loss": 1.265, "step": 2283 }, { "epoch": 0.6074468085106383, "grad_norm": 3.6212542057037354, "learning_rate": 9.860924155092803e-06, "loss": 1.3044, "step": 2284 }, { "epoch": 0.6077127659574468, "grad_norm": 3.9703807830810547, "learning_rate": 9.860718086599217e-06, "loss": 1.3497, "step": 2285 }, { "epoch": 0.6079787234042553, "grad_norm": 3.94783091545105, "learning_rate": 9.860511867709289e-06, "loss": 1.248, "step": 2286 }, { "epoch": 0.6082446808510639, "grad_norm": 4.237410545349121, "learning_rate": 9.860305498429404e-06, "loss": 1.3791, "step": 2287 }, { "epoch": 0.6085106382978723, "grad_norm": 3.7259433269500732, "learning_rate": 9.860098978765942e-06, "loss": 1.3233, "step": 2288 }, { "epoch": 0.6087765957446809, "grad_norm": 3.8508055210113525, "learning_rate": 9.859892308725296e-06, "loss": 1.2324, "step": 2289 }, { "epoch": 0.6090425531914894, "grad_norm": 3.8663196563720703, "learning_rate": 9.859685488313861e-06, "loss": 1.2425, "step": 2290 }, { "epoch": 0.6093085106382978, "grad_norm": 4.03026008605957, "learning_rate": 9.859478517538035e-06, "loss": 1.2932, "step": 2291 }, { "epoch": 0.6095744680851064, "grad_norm": 3.517122745513916, "learning_rate": 9.859271396404223e-06, "loss": 1.1597, "step": 2292 }, { "epoch": 0.6098404255319149, "grad_norm": 3.6704776287078857, "learning_rate": 9.85906412491883e-06, "loss": 1.1834, "step": 2293 }, { "epoch": 0.6101063829787234, "grad_norm": 4.267923831939697, "learning_rate": 9.858856703088276e-06, "loss": 1.1888, "step": 2294 }, { "epoch": 0.6103723404255319, "grad_norm": 4.178102493286133, "learning_rate": 9.85864913091897e-06, "loss": 1.3685, "step": 2295 }, { "epoch": 0.6106382978723405, "grad_norm": 4.176131725311279, "learning_rate": 9.858441408417345e-06, "loss": 1.231, "step": 2296 }, { "epoch": 0.6109042553191489, "grad_norm": 3.4884450435638428, "learning_rate": 9.85823353558982e-06, "loss": 1.2206, "step": 2297 }, { "epoch": 0.6111702127659574, "grad_norm": 3.8766729831695557, "learning_rate": 9.85802551244283e-06, "loss": 1.3035, "step": 2298 }, { "epoch": 0.611436170212766, "grad_norm": 3.5301473140716553, "learning_rate": 9.857817338982811e-06, "loss": 1.1712, "step": 2299 }, { "epoch": 0.6117021276595744, "grad_norm": 3.7902379035949707, "learning_rate": 9.857609015216205e-06, "loss": 1.1324, "step": 2300 }, { "epoch": 0.611968085106383, "grad_norm": 4.028817176818848, "learning_rate": 9.857400541149455e-06, "loss": 1.3142, "step": 2301 }, { "epoch": 0.6122340425531915, "grad_norm": 3.6242549419403076, "learning_rate": 9.857191916789016e-06, "loss": 1.2368, "step": 2302 }, { "epoch": 0.6125, "grad_norm": 3.6776719093322754, "learning_rate": 9.856983142141338e-06, "loss": 1.3289, "step": 2303 }, { "epoch": 0.6127659574468085, "grad_norm": 3.8104121685028076, "learning_rate": 9.856774217212886e-06, "loss": 1.3076, "step": 2304 }, { "epoch": 0.613031914893617, "grad_norm": 3.668893337249756, "learning_rate": 9.85656514201012e-06, "loss": 1.2935, "step": 2305 }, { "epoch": 0.6132978723404255, "grad_norm": 3.5787241458892822, "learning_rate": 9.85635591653951e-06, "loss": 1.1477, "step": 2306 }, { "epoch": 0.613563829787234, "grad_norm": 3.9113807678222656, "learning_rate": 9.856146540807531e-06, "loss": 1.3338, "step": 2307 }, { "epoch": 0.6138297872340426, "grad_norm": 3.6910572052001953, "learning_rate": 9.85593701482066e-06, "loss": 1.1302, "step": 2308 }, { "epoch": 0.614095744680851, "grad_norm": 4.1038689613342285, "learning_rate": 9.855727338585381e-06, "loss": 1.4519, "step": 2309 }, { "epoch": 0.6143617021276596, "grad_norm": 3.5061099529266357, "learning_rate": 9.855517512108182e-06, "loss": 1.2243, "step": 2310 }, { "epoch": 0.6146276595744681, "grad_norm": 3.5231192111968994, "learning_rate": 9.855307535395553e-06, "loss": 1.2158, "step": 2311 }, { "epoch": 0.6148936170212767, "grad_norm": 3.8572421073913574, "learning_rate": 9.855097408453993e-06, "loss": 1.2392, "step": 2312 }, { "epoch": 0.6151595744680851, "grad_norm": 3.7707557678222656, "learning_rate": 9.854887131290002e-06, "loss": 1.2316, "step": 2313 }, { "epoch": 0.6154255319148936, "grad_norm": 3.860130548477173, "learning_rate": 9.854676703910092e-06, "loss": 1.2118, "step": 2314 }, { "epoch": 0.6156914893617021, "grad_norm": 3.404811382293701, "learning_rate": 9.854466126320763e-06, "loss": 1.1942, "step": 2315 }, { "epoch": 0.6159574468085106, "grad_norm": 3.659116268157959, "learning_rate": 9.854255398528541e-06, "loss": 1.2822, "step": 2316 }, { "epoch": 0.6162234042553192, "grad_norm": 3.97190260887146, "learning_rate": 9.85404452053994e-06, "loss": 1.3892, "step": 2317 }, { "epoch": 0.6164893617021276, "grad_norm": 3.99293851852417, "learning_rate": 9.853833492361486e-06, "loss": 1.2248, "step": 2318 }, { "epoch": 0.6167553191489362, "grad_norm": 3.846611499786377, "learning_rate": 9.85362231399971e-06, "loss": 1.3553, "step": 2319 }, { "epoch": 0.6170212765957447, "grad_norm": 3.922665596008301, "learning_rate": 9.853410985461145e-06, "loss": 1.2831, "step": 2320 }, { "epoch": 0.6172872340425531, "grad_norm": 3.788879871368408, "learning_rate": 9.85319950675233e-06, "loss": 1.3213, "step": 2321 }, { "epoch": 0.6175531914893617, "grad_norm": 3.7415027618408203, "learning_rate": 9.852987877879807e-06, "loss": 1.1951, "step": 2322 }, { "epoch": 0.6178191489361702, "grad_norm": 4.016115665435791, "learning_rate": 9.852776098850128e-06, "loss": 1.2595, "step": 2323 }, { "epoch": 0.6180851063829788, "grad_norm": 3.5927200317382812, "learning_rate": 9.85256416966984e-06, "loss": 1.2103, "step": 2324 }, { "epoch": 0.6183510638297872, "grad_norm": 3.9768147468566895, "learning_rate": 9.852352090345504e-06, "loss": 1.3389, "step": 2325 }, { "epoch": 0.6186170212765958, "grad_norm": 3.378852605819702, "learning_rate": 9.852139860883684e-06, "loss": 1.1266, "step": 2326 }, { "epoch": 0.6188829787234043, "grad_norm": 4.071725368499756, "learning_rate": 9.851927481290943e-06, "loss": 1.4006, "step": 2327 }, { "epoch": 0.6191489361702127, "grad_norm": 3.721118688583374, "learning_rate": 9.851714951573853e-06, "loss": 1.2344, "step": 2328 }, { "epoch": 0.6194148936170213, "grad_norm": 3.551180839538574, "learning_rate": 9.851502271738989e-06, "loss": 1.3175, "step": 2329 }, { "epoch": 0.6196808510638298, "grad_norm": 3.6764516830444336, "learning_rate": 9.851289441792934e-06, "loss": 1.2169, "step": 2330 }, { "epoch": 0.6199468085106383, "grad_norm": 3.8505606651306152, "learning_rate": 9.851076461742272e-06, "loss": 1.3586, "step": 2331 }, { "epoch": 0.6202127659574468, "grad_norm": 3.9605445861816406, "learning_rate": 9.850863331593591e-06, "loss": 1.2454, "step": 2332 }, { "epoch": 0.6204787234042554, "grad_norm": 4.140010833740234, "learning_rate": 9.85065005135349e-06, "loss": 1.4014, "step": 2333 }, { "epoch": 0.6207446808510638, "grad_norm": 4.118074417114258, "learning_rate": 9.850436621028565e-06, "loss": 1.2367, "step": 2334 }, { "epoch": 0.6210106382978723, "grad_norm": 3.6424777507781982, "learning_rate": 9.85022304062542e-06, "loss": 1.129, "step": 2335 }, { "epoch": 0.6212765957446809, "grad_norm": 3.643145799636841, "learning_rate": 9.850009310150662e-06, "loss": 1.3767, "step": 2336 }, { "epoch": 0.6215425531914893, "grad_norm": 3.913959503173828, "learning_rate": 9.849795429610908e-06, "loss": 1.1977, "step": 2337 }, { "epoch": 0.6218085106382979, "grad_norm": 3.91186261177063, "learning_rate": 9.849581399012772e-06, "loss": 1.2842, "step": 2338 }, { "epoch": 0.6220744680851064, "grad_norm": 3.7167961597442627, "learning_rate": 9.849367218362879e-06, "loss": 1.2802, "step": 2339 }, { "epoch": 0.6223404255319149, "grad_norm": 3.5471532344818115, "learning_rate": 9.849152887667855e-06, "loss": 1.2785, "step": 2340 }, { "epoch": 0.6226063829787234, "grad_norm": 4.358826637268066, "learning_rate": 9.84893840693433e-06, "loss": 1.1696, "step": 2341 }, { "epoch": 0.622872340425532, "grad_norm": 3.869590997695923, "learning_rate": 9.848723776168942e-06, "loss": 1.3316, "step": 2342 }, { "epoch": 0.6231382978723404, "grad_norm": 4.493122577667236, "learning_rate": 9.848508995378333e-06, "loss": 1.2928, "step": 2343 }, { "epoch": 0.6234042553191489, "grad_norm": 3.808885335922241, "learning_rate": 9.848294064569146e-06, "loss": 1.331, "step": 2344 }, { "epoch": 0.6236702127659575, "grad_norm": 3.6614105701446533, "learning_rate": 9.848078983748032e-06, "loss": 1.3549, "step": 2345 }, { "epoch": 0.6239361702127659, "grad_norm": 3.5685722827911377, "learning_rate": 9.847863752921649e-06, "loss": 1.1914, "step": 2346 }, { "epoch": 0.6242021276595745, "grad_norm": 4.203314781188965, "learning_rate": 9.847648372096652e-06, "loss": 1.3369, "step": 2347 }, { "epoch": 0.624468085106383, "grad_norm": 3.762103796005249, "learning_rate": 9.847432841279707e-06, "loss": 1.261, "step": 2348 }, { "epoch": 0.6247340425531915, "grad_norm": 4.371121883392334, "learning_rate": 9.847217160477483e-06, "loss": 1.3071, "step": 2349 }, { "epoch": 0.625, "grad_norm": 3.928662061691284, "learning_rate": 9.847001329696653e-06, "loss": 1.2321, "step": 2350 }, { "epoch": 0.6252659574468085, "grad_norm": 3.7375707626342773, "learning_rate": 9.846785348943896e-06, "loss": 1.3022, "step": 2351 }, { "epoch": 0.625531914893617, "grad_norm": 3.684936046600342, "learning_rate": 9.846569218225892e-06, "loss": 1.2365, "step": 2352 }, { "epoch": 0.6257978723404255, "grad_norm": 3.5079708099365234, "learning_rate": 9.846352937549332e-06, "loss": 1.2328, "step": 2353 }, { "epoch": 0.6260638297872341, "grad_norm": 3.814976692199707, "learning_rate": 9.846136506920907e-06, "loss": 1.1824, "step": 2354 }, { "epoch": 0.6263297872340425, "grad_norm": 3.3843934535980225, "learning_rate": 9.84591992634731e-06, "loss": 1.0477, "step": 2355 }, { "epoch": 0.6265957446808511, "grad_norm": 3.712428569793701, "learning_rate": 9.845703195835248e-06, "loss": 1.2826, "step": 2356 }, { "epoch": 0.6268617021276596, "grad_norm": 3.617882251739502, "learning_rate": 9.845486315391421e-06, "loss": 1.2472, "step": 2357 }, { "epoch": 0.627127659574468, "grad_norm": 4.057145595550537, "learning_rate": 9.845269285022545e-06, "loss": 1.4144, "step": 2358 }, { "epoch": 0.6273936170212766, "grad_norm": 4.23139762878418, "learning_rate": 9.845052104735331e-06, "loss": 1.4445, "step": 2359 }, { "epoch": 0.6276595744680851, "grad_norm": 3.8976731300354004, "learning_rate": 9.844834774536503e-06, "loss": 1.2646, "step": 2360 }, { "epoch": 0.6279255319148936, "grad_norm": 3.6036627292633057, "learning_rate": 9.844617294432781e-06, "loss": 1.251, "step": 2361 }, { "epoch": 0.6281914893617021, "grad_norm": 3.4059393405914307, "learning_rate": 9.844399664430896e-06, "loss": 1.1432, "step": 2362 }, { "epoch": 0.6284574468085107, "grad_norm": 3.6594855785369873, "learning_rate": 9.844181884537583e-06, "loss": 1.3047, "step": 2363 }, { "epoch": 0.6287234042553191, "grad_norm": 4.183903217315674, "learning_rate": 9.843963954759578e-06, "loss": 1.2951, "step": 2364 }, { "epoch": 0.6289893617021277, "grad_norm": 3.496905565261841, "learning_rate": 9.843745875103628e-06, "loss": 1.3087, "step": 2365 }, { "epoch": 0.6292553191489362, "grad_norm": 3.5995302200317383, "learning_rate": 9.843527645576475e-06, "loss": 1.2998, "step": 2366 }, { "epoch": 0.6295212765957446, "grad_norm": 3.597393035888672, "learning_rate": 9.843309266184875e-06, "loss": 1.2151, "step": 2367 }, { "epoch": 0.6297872340425532, "grad_norm": 3.922405481338501, "learning_rate": 9.843090736935583e-06, "loss": 1.4409, "step": 2368 }, { "epoch": 0.6300531914893617, "grad_norm": 3.7593741416931152, "learning_rate": 9.842872057835363e-06, "loss": 1.0905, "step": 2369 }, { "epoch": 0.6303191489361702, "grad_norm": 3.570892572402954, "learning_rate": 9.842653228890979e-06, "loss": 1.2337, "step": 2370 }, { "epoch": 0.6305851063829787, "grad_norm": 3.2270023822784424, "learning_rate": 9.842434250109202e-06, "loss": 0.9824, "step": 2371 }, { "epoch": 0.6308510638297873, "grad_norm": 3.9054601192474365, "learning_rate": 9.84221512149681e-06, "loss": 1.3091, "step": 2372 }, { "epoch": 0.6311170212765957, "grad_norm": 3.7820627689361572, "learning_rate": 9.84199584306058e-06, "loss": 1.2331, "step": 2373 }, { "epoch": 0.6313829787234042, "grad_norm": 3.407257080078125, "learning_rate": 9.841776414807297e-06, "loss": 1.1868, "step": 2374 }, { "epoch": 0.6316489361702128, "grad_norm": 3.471640110015869, "learning_rate": 9.841556836743752e-06, "loss": 1.2025, "step": 2375 }, { "epoch": 0.6319148936170212, "grad_norm": 3.824422597885132, "learning_rate": 9.841337108876739e-06, "loss": 1.1932, "step": 2376 }, { "epoch": 0.6321808510638298, "grad_norm": 3.6980538368225098, "learning_rate": 9.841117231213055e-06, "loss": 1.2374, "step": 2377 }, { "epoch": 0.6324468085106383, "grad_norm": 3.9002277851104736, "learning_rate": 9.840897203759502e-06, "loss": 1.3205, "step": 2378 }, { "epoch": 0.6327127659574469, "grad_norm": 3.993248462677002, "learning_rate": 9.840677026522893e-06, "loss": 1.1262, "step": 2379 }, { "epoch": 0.6329787234042553, "grad_norm": 3.8742499351501465, "learning_rate": 9.840456699510038e-06, "loss": 1.1456, "step": 2380 }, { "epoch": 0.6332446808510638, "grad_norm": 3.772584915161133, "learning_rate": 9.840236222727752e-06, "loss": 1.1367, "step": 2381 }, { "epoch": 0.6335106382978724, "grad_norm": 3.7653708457946777, "learning_rate": 9.840015596182861e-06, "loss": 1.24, "step": 2382 }, { "epoch": 0.6337765957446808, "grad_norm": 3.4554617404937744, "learning_rate": 9.839794819882188e-06, "loss": 1.2708, "step": 2383 }, { "epoch": 0.6340425531914894, "grad_norm": 3.808807611465454, "learning_rate": 9.839573893832564e-06, "loss": 1.3985, "step": 2384 }, { "epoch": 0.6343085106382979, "grad_norm": 3.6254007816314697, "learning_rate": 9.839352818040825e-06, "loss": 1.3145, "step": 2385 }, { "epoch": 0.6345744680851064, "grad_norm": 3.83559513092041, "learning_rate": 9.839131592513814e-06, "loss": 1.2868, "step": 2386 }, { "epoch": 0.6348404255319149, "grad_norm": 3.465432643890381, "learning_rate": 9.838910217258375e-06, "loss": 1.213, "step": 2387 }, { "epoch": 0.6351063829787233, "grad_norm": 3.762899160385132, "learning_rate": 9.838688692281356e-06, "loss": 1.3678, "step": 2388 }, { "epoch": 0.6353723404255319, "grad_norm": 3.573856830596924, "learning_rate": 9.83846701758961e-06, "loss": 1.3181, "step": 2389 }, { "epoch": 0.6356382978723404, "grad_norm": 3.873749256134033, "learning_rate": 9.838245193189999e-06, "loss": 1.252, "step": 2390 }, { "epoch": 0.635904255319149, "grad_norm": 3.5495100021362305, "learning_rate": 9.838023219089386e-06, "loss": 1.352, "step": 2391 }, { "epoch": 0.6361702127659574, "grad_norm": 3.6257059574127197, "learning_rate": 9.837801095294639e-06, "loss": 1.2099, "step": 2392 }, { "epoch": 0.636436170212766, "grad_norm": 3.658745288848877, "learning_rate": 9.83757882181263e-06, "loss": 1.2089, "step": 2393 }, { "epoch": 0.6367021276595745, "grad_norm": 3.6948094367980957, "learning_rate": 9.837356398650235e-06, "loss": 1.3032, "step": 2394 }, { "epoch": 0.636968085106383, "grad_norm": 3.677865743637085, "learning_rate": 9.83713382581434e-06, "loss": 1.2295, "step": 2395 }, { "epoch": 0.6372340425531915, "grad_norm": 3.758213758468628, "learning_rate": 9.836911103311828e-06, "loss": 1.2542, "step": 2396 }, { "epoch": 0.6375, "grad_norm": 3.710860252380371, "learning_rate": 9.836688231149593e-06, "loss": 1.3331, "step": 2397 }, { "epoch": 0.6377659574468085, "grad_norm": 3.436738967895508, "learning_rate": 9.836465209334529e-06, "loss": 1.1318, "step": 2398 }, { "epoch": 0.638031914893617, "grad_norm": 4.398902416229248, "learning_rate": 9.836242037873536e-06, "loss": 1.3268, "step": 2399 }, { "epoch": 0.6382978723404256, "grad_norm": 3.483926773071289, "learning_rate": 9.836018716773522e-06, "loss": 1.1744, "step": 2400 }, { "epoch": 0.638563829787234, "grad_norm": 3.766038417816162, "learning_rate": 9.835795246041395e-06, "loss": 1.1829, "step": 2401 }, { "epoch": 0.6388297872340426, "grad_norm": 3.7989938259124756, "learning_rate": 9.835571625684068e-06, "loss": 1.2691, "step": 2402 }, { "epoch": 0.6390957446808511, "grad_norm": 3.6767778396606445, "learning_rate": 9.835347855708464e-06, "loss": 1.1456, "step": 2403 }, { "epoch": 0.6393617021276595, "grad_norm": 3.689368963241577, "learning_rate": 9.835123936121504e-06, "loss": 1.2714, "step": 2404 }, { "epoch": 0.6396276595744681, "grad_norm": 3.6774284839630127, "learning_rate": 9.834899866930116e-06, "loss": 1.1968, "step": 2405 }, { "epoch": 0.6398936170212766, "grad_norm": 3.734713077545166, "learning_rate": 9.834675648141235e-06, "loss": 1.4036, "step": 2406 }, { "epoch": 0.6401595744680851, "grad_norm": 3.4915902614593506, "learning_rate": 9.834451279761796e-06, "loss": 1.0733, "step": 2407 }, { "epoch": 0.6404255319148936, "grad_norm": 3.5466091632843018, "learning_rate": 9.834226761798742e-06, "loss": 1.2197, "step": 2408 }, { "epoch": 0.6406914893617022, "grad_norm": 3.5611202716827393, "learning_rate": 9.83400209425902e-06, "loss": 1.092, "step": 2409 }, { "epoch": 0.6409574468085106, "grad_norm": 3.35369610786438, "learning_rate": 9.833777277149585e-06, "loss": 1.2385, "step": 2410 }, { "epoch": 0.6412234042553191, "grad_norm": 3.7679550647735596, "learning_rate": 9.833552310477388e-06, "loss": 1.0647, "step": 2411 }, { "epoch": 0.6414893617021277, "grad_norm": 3.6990325450897217, "learning_rate": 9.833327194249392e-06, "loss": 1.1853, "step": 2412 }, { "epoch": 0.6417553191489361, "grad_norm": 3.6745262145996094, "learning_rate": 9.833101928472562e-06, "loss": 1.2038, "step": 2413 }, { "epoch": 0.6420212765957447, "grad_norm": 3.357508897781372, "learning_rate": 9.832876513153867e-06, "loss": 1.0274, "step": 2414 }, { "epoch": 0.6422872340425532, "grad_norm": 3.786376953125, "learning_rate": 9.832650948300284e-06, "loss": 1.288, "step": 2415 }, { "epoch": 0.6425531914893617, "grad_norm": 3.253251314163208, "learning_rate": 9.83242523391879e-06, "loss": 1.0876, "step": 2416 }, { "epoch": 0.6428191489361702, "grad_norm": 3.3168015480041504, "learning_rate": 9.832199370016371e-06, "loss": 1.1551, "step": 2417 }, { "epoch": 0.6430851063829788, "grad_norm": 3.8747761249542236, "learning_rate": 9.831973356600013e-06, "loss": 1.2343, "step": 2418 }, { "epoch": 0.6433510638297872, "grad_norm": 3.9137704372406006, "learning_rate": 9.83174719367671e-06, "loss": 1.1782, "step": 2419 }, { "epoch": 0.6436170212765957, "grad_norm": 3.64943528175354, "learning_rate": 9.831520881253462e-06, "loss": 1.0506, "step": 2420 }, { "epoch": 0.6438829787234043, "grad_norm": 3.5648887157440186, "learning_rate": 9.83129441933727e-06, "loss": 1.0195, "step": 2421 }, { "epoch": 0.6441489361702127, "grad_norm": 3.6668763160705566, "learning_rate": 9.83106780793514e-06, "loss": 1.349, "step": 2422 }, { "epoch": 0.6444148936170213, "grad_norm": 3.6365723609924316, "learning_rate": 9.830841047054083e-06, "loss": 1.2105, "step": 2423 }, { "epoch": 0.6446808510638298, "grad_norm": 3.657466411590576, "learning_rate": 9.830614136701116e-06, "loss": 1.2453, "step": 2424 }, { "epoch": 0.6449468085106383, "grad_norm": 3.7750251293182373, "learning_rate": 9.83038707688326e-06, "loss": 1.2753, "step": 2425 }, { "epoch": 0.6452127659574468, "grad_norm": 3.4032111167907715, "learning_rate": 9.830159867607543e-06, "loss": 1.2054, "step": 2426 }, { "epoch": 0.6454787234042553, "grad_norm": 3.546877861022949, "learning_rate": 9.82993250888099e-06, "loss": 1.35, "step": 2427 }, { "epoch": 0.6457446808510638, "grad_norm": 3.5076162815093994, "learning_rate": 9.829705000710642e-06, "loss": 1.1382, "step": 2428 }, { "epoch": 0.6460106382978723, "grad_norm": 3.955322742462158, "learning_rate": 9.829477343103533e-06, "loss": 1.3948, "step": 2429 }, { "epoch": 0.6462765957446809, "grad_norm": 3.5918376445770264, "learning_rate": 9.82924953606671e-06, "loss": 1.2271, "step": 2430 }, { "epoch": 0.6465425531914893, "grad_norm": 3.8371551036834717, "learning_rate": 9.82902157960722e-06, "loss": 1.2004, "step": 2431 }, { "epoch": 0.6468085106382979, "grad_norm": 3.573141098022461, "learning_rate": 9.828793473732116e-06, "loss": 1.2059, "step": 2432 }, { "epoch": 0.6470744680851064, "grad_norm": 3.8021459579467773, "learning_rate": 9.828565218448457e-06, "loss": 1.1852, "step": 2433 }, { "epoch": 0.6473404255319148, "grad_norm": 4.022589206695557, "learning_rate": 9.828336813763308e-06, "loss": 1.2385, "step": 2434 }, { "epoch": 0.6476063829787234, "grad_norm": 3.364841938018799, "learning_rate": 9.82810825968373e-06, "loss": 1.1976, "step": 2435 }, { "epoch": 0.6478723404255319, "grad_norm": 4.046548843383789, "learning_rate": 9.8278795562168e-06, "loss": 1.3522, "step": 2436 }, { "epoch": 0.6481382978723405, "grad_norm": 3.795485019683838, "learning_rate": 9.82765070336959e-06, "loss": 1.2166, "step": 2437 }, { "epoch": 0.6484042553191489, "grad_norm": 3.8107662200927734, "learning_rate": 9.827421701149187e-06, "loss": 1.3138, "step": 2438 }, { "epoch": 0.6486702127659575, "grad_norm": 3.618577241897583, "learning_rate": 9.82719254956267e-06, "loss": 1.1677, "step": 2439 }, { "epoch": 0.648936170212766, "grad_norm": 3.680255651473999, "learning_rate": 9.826963248617133e-06, "loss": 1.2319, "step": 2440 }, { "epoch": 0.6492021276595744, "grad_norm": 3.6145694255828857, "learning_rate": 9.82673379831967e-06, "loss": 1.2276, "step": 2441 }, { "epoch": 0.649468085106383, "grad_norm": 3.643686532974243, "learning_rate": 9.82650419867738e-06, "loss": 1.2989, "step": 2442 }, { "epoch": 0.6497340425531914, "grad_norm": 3.774909019470215, "learning_rate": 9.82627444969737e-06, "loss": 1.2749, "step": 2443 }, { "epoch": 0.65, "grad_norm": 3.7553470134735107, "learning_rate": 9.826044551386743e-06, "loss": 1.0902, "step": 2444 }, { "epoch": 0.6502659574468085, "grad_norm": 3.453191041946411, "learning_rate": 9.825814503752618e-06, "loss": 1.2609, "step": 2445 }, { "epoch": 0.6505319148936171, "grad_norm": 3.889417886734009, "learning_rate": 9.825584306802109e-06, "loss": 1.2514, "step": 2446 }, { "epoch": 0.6507978723404255, "grad_norm": 3.5073375701904297, "learning_rate": 9.825353960542342e-06, "loss": 1.2466, "step": 2447 }, { "epoch": 0.6510638297872341, "grad_norm": 3.4606523513793945, "learning_rate": 9.825123464980442e-06, "loss": 1.1156, "step": 2448 }, { "epoch": 0.6513297872340426, "grad_norm": 3.831897497177124, "learning_rate": 9.82489282012354e-06, "loss": 1.1323, "step": 2449 }, { "epoch": 0.651595744680851, "grad_norm": 4.391724109649658, "learning_rate": 9.824662025978774e-06, "loss": 1.2543, "step": 2450 }, { "epoch": 0.6518617021276596, "grad_norm": 3.8090097904205322, "learning_rate": 9.824431082553285e-06, "loss": 1.3592, "step": 2451 }, { "epoch": 0.652127659574468, "grad_norm": 3.706662893295288, "learning_rate": 9.824199989854217e-06, "loss": 1.2753, "step": 2452 }, { "epoch": 0.6523936170212766, "grad_norm": 4.826519966125488, "learning_rate": 9.823968747888722e-06, "loss": 1.501, "step": 2453 }, { "epoch": 0.6526595744680851, "grad_norm": 3.7181127071380615, "learning_rate": 9.823737356663956e-06, "loss": 1.283, "step": 2454 }, { "epoch": 0.6529255319148937, "grad_norm": 3.6020474433898926, "learning_rate": 9.823505816187076e-06, "loss": 1.195, "step": 2455 }, { "epoch": 0.6531914893617021, "grad_norm": 3.7805116176605225, "learning_rate": 9.823274126465245e-06, "loss": 1.3032, "step": 2456 }, { "epoch": 0.6534574468085106, "grad_norm": 3.6897008419036865, "learning_rate": 9.823042287505636e-06, "loss": 1.33, "step": 2457 }, { "epoch": 0.6537234042553192, "grad_norm": 3.6036691665649414, "learning_rate": 9.82281029931542e-06, "loss": 1.2454, "step": 2458 }, { "epoch": 0.6539893617021276, "grad_norm": 3.8645083904266357, "learning_rate": 9.822578161901774e-06, "loss": 1.4082, "step": 2459 }, { "epoch": 0.6542553191489362, "grad_norm": 3.982588052749634, "learning_rate": 9.822345875271884e-06, "loss": 1.2635, "step": 2460 }, { "epoch": 0.6545212765957447, "grad_norm": 3.576320171356201, "learning_rate": 9.822113439432933e-06, "loss": 1.3524, "step": 2461 }, { "epoch": 0.6547872340425532, "grad_norm": 3.387544870376587, "learning_rate": 9.821880854392115e-06, "loss": 1.2344, "step": 2462 }, { "epoch": 0.6550531914893617, "grad_norm": 3.385258436203003, "learning_rate": 9.821648120156628e-06, "loss": 1.2054, "step": 2463 }, { "epoch": 0.6553191489361702, "grad_norm": 3.952305316925049, "learning_rate": 9.82141523673367e-06, "loss": 1.153, "step": 2464 }, { "epoch": 0.6555851063829787, "grad_norm": 3.8070571422576904, "learning_rate": 9.821182204130448e-06, "loss": 1.3405, "step": 2465 }, { "epoch": 0.6558510638297872, "grad_norm": 3.9651296138763428, "learning_rate": 9.820949022354174e-06, "loss": 1.3205, "step": 2466 }, { "epoch": 0.6561170212765958, "grad_norm": 3.980510950088501, "learning_rate": 9.82071569141206e-06, "loss": 1.401, "step": 2467 }, { "epoch": 0.6563829787234042, "grad_norm": 4.441346168518066, "learning_rate": 9.820482211311326e-06, "loss": 1.3839, "step": 2468 }, { "epoch": 0.6566489361702128, "grad_norm": 3.4150032997131348, "learning_rate": 9.820248582059197e-06, "loss": 1.0058, "step": 2469 }, { "epoch": 0.6569148936170213, "grad_norm": 3.4013893604278564, "learning_rate": 9.820014803662905e-06, "loss": 1.1612, "step": 2470 }, { "epoch": 0.6571808510638298, "grad_norm": 4.017107009887695, "learning_rate": 9.819780876129677e-06, "loss": 1.2295, "step": 2471 }, { "epoch": 0.6574468085106383, "grad_norm": 3.500370979309082, "learning_rate": 9.819546799466756e-06, "loss": 1.2573, "step": 2472 }, { "epoch": 0.6577127659574468, "grad_norm": 3.7119557857513428, "learning_rate": 9.81931257368138e-06, "loss": 1.1827, "step": 2473 }, { "epoch": 0.6579787234042553, "grad_norm": 4.006588935852051, "learning_rate": 9.8190781987808e-06, "loss": 1.3236, "step": 2474 }, { "epoch": 0.6582446808510638, "grad_norm": 3.6574013233184814, "learning_rate": 9.818843674772268e-06, "loss": 1.2783, "step": 2475 }, { "epoch": 0.6585106382978724, "grad_norm": 3.4724280834198, "learning_rate": 9.818609001663038e-06, "loss": 1.3469, "step": 2476 }, { "epoch": 0.6587765957446808, "grad_norm": 3.3943772315979004, "learning_rate": 9.818374179460372e-06, "loss": 1.1934, "step": 2477 }, { "epoch": 0.6590425531914894, "grad_norm": 3.6822094917297363, "learning_rate": 9.818139208171537e-06, "loss": 1.3505, "step": 2478 }, { "epoch": 0.6593085106382979, "grad_norm": 3.474010467529297, "learning_rate": 9.817904087803802e-06, "loss": 1.1487, "step": 2479 }, { "epoch": 0.6595744680851063, "grad_norm": 3.4429280757904053, "learning_rate": 9.817668818364441e-06, "loss": 1.1786, "step": 2480 }, { "epoch": 0.6598404255319149, "grad_norm": 4.096560955047607, "learning_rate": 9.817433399860736e-06, "loss": 1.3167, "step": 2481 }, { "epoch": 0.6601063829787234, "grad_norm": 3.4501636028289795, "learning_rate": 9.817197832299971e-06, "loss": 1.0416, "step": 2482 }, { "epoch": 0.660372340425532, "grad_norm": 3.7687666416168213, "learning_rate": 9.816962115689432e-06, "loss": 1.1121, "step": 2483 }, { "epoch": 0.6606382978723404, "grad_norm": 3.6816604137420654, "learning_rate": 9.816726250036413e-06, "loss": 1.2019, "step": 2484 }, { "epoch": 0.660904255319149, "grad_norm": 4.033024787902832, "learning_rate": 9.816490235348215e-06, "loss": 1.3078, "step": 2485 }, { "epoch": 0.6611702127659574, "grad_norm": 3.7372167110443115, "learning_rate": 9.816254071632137e-06, "loss": 1.4434, "step": 2486 }, { "epoch": 0.6614361702127659, "grad_norm": 3.694561004638672, "learning_rate": 9.816017758895488e-06, "loss": 1.2969, "step": 2487 }, { "epoch": 0.6617021276595745, "grad_norm": 4.178577423095703, "learning_rate": 9.815781297145578e-06, "loss": 1.3661, "step": 2488 }, { "epoch": 0.6619680851063829, "grad_norm": 3.647728681564331, "learning_rate": 9.815544686389727e-06, "loss": 1.1693, "step": 2489 }, { "epoch": 0.6622340425531915, "grad_norm": 3.6795883178710938, "learning_rate": 9.815307926635252e-06, "loss": 1.2308, "step": 2490 }, { "epoch": 0.6625, "grad_norm": 3.8441531658172607, "learning_rate": 9.81507101788948e-06, "loss": 1.2011, "step": 2491 }, { "epoch": 0.6627659574468086, "grad_norm": 3.512495994567871, "learning_rate": 9.814833960159744e-06, "loss": 1.1509, "step": 2492 }, { "epoch": 0.663031914893617, "grad_norm": 3.631899356842041, "learning_rate": 9.814596753453376e-06, "loss": 1.0989, "step": 2493 }, { "epoch": 0.6632978723404256, "grad_norm": 3.5272533893585205, "learning_rate": 9.814359397777716e-06, "loss": 1.3053, "step": 2494 }, { "epoch": 0.663563829787234, "grad_norm": 3.492922306060791, "learning_rate": 9.814121893140105e-06, "loss": 1.2977, "step": 2495 }, { "epoch": 0.6638297872340425, "grad_norm": 3.5858078002929688, "learning_rate": 9.813884239547898e-06, "loss": 1.1963, "step": 2496 }, { "epoch": 0.6640957446808511, "grad_norm": 3.4466118812561035, "learning_rate": 9.813646437008444e-06, "loss": 1.266, "step": 2497 }, { "epoch": 0.6643617021276595, "grad_norm": 3.682159900665283, "learning_rate": 9.813408485529103e-06, "loss": 1.1549, "step": 2498 }, { "epoch": 0.6646276595744681, "grad_norm": 4.358649253845215, "learning_rate": 9.813170385117235e-06, "loss": 1.3577, "step": 2499 }, { "epoch": 0.6648936170212766, "grad_norm": 4.059812068939209, "learning_rate": 9.81293213578021e-06, "loss": 1.3728, "step": 2500 }, { "epoch": 0.6648936170212766, "eval_loss": 1.2857128381729126, "eval_runtime": 12.6822, "eval_samples_per_second": 31.54, "eval_steps_per_second": 3.943, "step": 2500 }, { "epoch": 0.6651595744680852, "grad_norm": 3.519260883331299, "learning_rate": 9.812693737525396e-06, "loss": 1.1743, "step": 2501 }, { "epoch": 0.6654255319148936, "grad_norm": 4.004322052001953, "learning_rate": 9.812455190360172e-06, "loss": 1.2847, "step": 2502 }, { "epoch": 0.6656914893617021, "grad_norm": 3.699012517929077, "learning_rate": 9.81221649429192e-06, "loss": 1.3645, "step": 2503 }, { "epoch": 0.6659574468085107, "grad_norm": 3.5919108390808105, "learning_rate": 9.811977649328021e-06, "loss": 1.1794, "step": 2504 }, { "epoch": 0.6662234042553191, "grad_norm": 3.382624626159668, "learning_rate": 9.81173865547587e-06, "loss": 1.2909, "step": 2505 }, { "epoch": 0.6664893617021277, "grad_norm": 3.7188732624053955, "learning_rate": 9.811499512742861e-06, "loss": 1.2731, "step": 2506 }, { "epoch": 0.6667553191489362, "grad_norm": 3.5745997428894043, "learning_rate": 9.811260221136392e-06, "loss": 1.1994, "step": 2507 }, { "epoch": 0.6670212765957447, "grad_norm": 3.6393473148345947, "learning_rate": 9.811020780663865e-06, "loss": 1.2335, "step": 2508 }, { "epoch": 0.6672872340425532, "grad_norm": 3.4967026710510254, "learning_rate": 9.810781191332692e-06, "loss": 1.2272, "step": 2509 }, { "epoch": 0.6675531914893617, "grad_norm": 3.826430559158325, "learning_rate": 9.810541453150286e-06, "loss": 1.3689, "step": 2510 }, { "epoch": 0.6678191489361702, "grad_norm": 4.058473110198975, "learning_rate": 9.810301566124063e-06, "loss": 1.1942, "step": 2511 }, { "epoch": 0.6680851063829787, "grad_norm": 3.5520458221435547, "learning_rate": 9.810061530261446e-06, "loss": 1.1599, "step": 2512 }, { "epoch": 0.6683510638297873, "grad_norm": 3.7619452476501465, "learning_rate": 9.80982134556986e-06, "loss": 1.2391, "step": 2513 }, { "epoch": 0.6686170212765957, "grad_norm": 3.9400548934936523, "learning_rate": 9.809581012056743e-06, "loss": 1.2792, "step": 2514 }, { "epoch": 0.6688829787234043, "grad_norm": 3.3986830711364746, "learning_rate": 9.809340529729523e-06, "loss": 1.2333, "step": 2515 }, { "epoch": 0.6691489361702128, "grad_norm": 3.8278701305389404, "learning_rate": 9.809099898595647e-06, "loss": 1.2988, "step": 2516 }, { "epoch": 0.6694148936170212, "grad_norm": 3.8813681602478027, "learning_rate": 9.808859118662558e-06, "loss": 1.1505, "step": 2517 }, { "epoch": 0.6696808510638298, "grad_norm": 3.5952844619750977, "learning_rate": 9.808618189937706e-06, "loss": 1.3804, "step": 2518 }, { "epoch": 0.6699468085106383, "grad_norm": 3.642479181289673, "learning_rate": 9.808377112428546e-06, "loss": 1.2918, "step": 2519 }, { "epoch": 0.6702127659574468, "grad_norm": 3.810826301574707, "learning_rate": 9.808135886142536e-06, "loss": 1.3684, "step": 2520 }, { "epoch": 0.6704787234042553, "grad_norm": 3.843879222869873, "learning_rate": 9.807894511087141e-06, "loss": 1.2815, "step": 2521 }, { "epoch": 0.6707446808510639, "grad_norm": 3.68229341506958, "learning_rate": 9.807652987269829e-06, "loss": 1.1894, "step": 2522 }, { "epoch": 0.6710106382978723, "grad_norm": 3.585465669631958, "learning_rate": 9.807411314698075e-06, "loss": 1.3078, "step": 2523 }, { "epoch": 0.6712765957446809, "grad_norm": 3.825195074081421, "learning_rate": 9.807169493379353e-06, "loss": 1.2117, "step": 2524 }, { "epoch": 0.6715425531914894, "grad_norm": 3.376753091812134, "learning_rate": 9.806927523321148e-06, "loss": 1.1575, "step": 2525 }, { "epoch": 0.6718085106382978, "grad_norm": 3.877986431121826, "learning_rate": 9.806685404530946e-06, "loss": 1.3773, "step": 2526 }, { "epoch": 0.6720744680851064, "grad_norm": 3.9964683055877686, "learning_rate": 9.806443137016237e-06, "loss": 1.2466, "step": 2527 }, { "epoch": 0.6723404255319149, "grad_norm": 3.6897804737091064, "learning_rate": 9.80620072078452e-06, "loss": 1.2107, "step": 2528 }, { "epoch": 0.6726063829787234, "grad_norm": 3.921840190887451, "learning_rate": 9.805958155843294e-06, "loss": 1.226, "step": 2529 }, { "epoch": 0.6728723404255319, "grad_norm": 3.4277050495147705, "learning_rate": 9.805715442200065e-06, "loss": 1.2126, "step": 2530 }, { "epoch": 0.6731382978723405, "grad_norm": 3.841946601867676, "learning_rate": 9.805472579862342e-06, "loss": 1.323, "step": 2531 }, { "epoch": 0.6734042553191489, "grad_norm": 3.7039599418640137, "learning_rate": 9.805229568837637e-06, "loss": 1.2843, "step": 2532 }, { "epoch": 0.6736702127659574, "grad_norm": 3.5301520824432373, "learning_rate": 9.804986409133475e-06, "loss": 1.0612, "step": 2533 }, { "epoch": 0.673936170212766, "grad_norm": 4.042654037475586, "learning_rate": 9.804743100757375e-06, "loss": 1.215, "step": 2534 }, { "epoch": 0.6742021276595744, "grad_norm": 3.895273447036743, "learning_rate": 9.804499643716866e-06, "loss": 1.4006, "step": 2535 }, { "epoch": 0.674468085106383, "grad_norm": 3.5299017429351807, "learning_rate": 9.804256038019482e-06, "loss": 1.3813, "step": 2536 }, { "epoch": 0.6747340425531915, "grad_norm": 3.8434762954711914, "learning_rate": 9.80401228367276e-06, "loss": 1.4165, "step": 2537 }, { "epoch": 0.675, "grad_norm": 4.0280256271362305, "learning_rate": 9.803768380684242e-06, "loss": 1.3851, "step": 2538 }, { "epoch": 0.6752659574468085, "grad_norm": 3.663043260574341, "learning_rate": 9.803524329061474e-06, "loss": 1.3044, "step": 2539 }, { "epoch": 0.675531914893617, "grad_norm": 3.575730562210083, "learning_rate": 9.803280128812009e-06, "loss": 1.2849, "step": 2540 }, { "epoch": 0.6757978723404255, "grad_norm": 3.7937097549438477, "learning_rate": 9.8030357799434e-06, "loss": 1.2569, "step": 2541 }, { "epoch": 0.676063829787234, "grad_norm": 3.982719898223877, "learning_rate": 9.80279128246321e-06, "loss": 1.411, "step": 2542 }, { "epoch": 0.6763297872340426, "grad_norm": 3.825068950653076, "learning_rate": 9.802546636379001e-06, "loss": 1.295, "step": 2543 }, { "epoch": 0.676595744680851, "grad_norm": 3.8499345779418945, "learning_rate": 9.80230184169835e-06, "loss": 1.282, "step": 2544 }, { "epoch": 0.6768617021276596, "grad_norm": 3.4873030185699463, "learning_rate": 9.802056898428823e-06, "loss": 1.2803, "step": 2545 }, { "epoch": 0.6771276595744681, "grad_norm": 3.9438254833221436, "learning_rate": 9.801811806578001e-06, "loss": 1.2881, "step": 2546 }, { "epoch": 0.6773936170212767, "grad_norm": 3.392169237136841, "learning_rate": 9.80156656615347e-06, "loss": 1.2485, "step": 2547 }, { "epoch": 0.6776595744680851, "grad_norm": 3.8698456287384033, "learning_rate": 9.801321177162814e-06, "loss": 1.281, "step": 2548 }, { "epoch": 0.6779255319148936, "grad_norm": 3.8232076168060303, "learning_rate": 9.801075639613628e-06, "loss": 1.3045, "step": 2549 }, { "epoch": 0.6781914893617021, "grad_norm": 3.8453428745269775, "learning_rate": 9.80082995351351e-06, "loss": 1.2239, "step": 2550 }, { "epoch": 0.6784574468085106, "grad_norm": 3.7375547885894775, "learning_rate": 9.800584118870063e-06, "loss": 1.195, "step": 2551 }, { "epoch": 0.6787234042553192, "grad_norm": 3.84708571434021, "learning_rate": 9.800338135690889e-06, "loss": 1.1614, "step": 2552 }, { "epoch": 0.6789893617021276, "grad_norm": 3.612217664718628, "learning_rate": 9.800092003983602e-06, "loss": 1.2499, "step": 2553 }, { "epoch": 0.6792553191489362, "grad_norm": 3.217289447784424, "learning_rate": 9.799845723755818e-06, "loss": 1.1648, "step": 2554 }, { "epoch": 0.6795212765957447, "grad_norm": 4.510238170623779, "learning_rate": 9.799599295015154e-06, "loss": 1.2728, "step": 2555 }, { "epoch": 0.6797872340425531, "grad_norm": 4.0085129737854, "learning_rate": 9.79935271776924e-06, "loss": 1.3524, "step": 2556 }, { "epoch": 0.6800531914893617, "grad_norm": 3.8481833934783936, "learning_rate": 9.799105992025699e-06, "loss": 1.2783, "step": 2557 }, { "epoch": 0.6803191489361702, "grad_norm": 3.901775598526001, "learning_rate": 9.79885911779217e-06, "loss": 1.1736, "step": 2558 }, { "epoch": 0.6805851063829788, "grad_norm": 3.864826202392578, "learning_rate": 9.798612095076291e-06, "loss": 1.3108, "step": 2559 }, { "epoch": 0.6808510638297872, "grad_norm": 3.7867627143859863, "learning_rate": 9.798364923885703e-06, "loss": 1.1626, "step": 2560 }, { "epoch": 0.6811170212765958, "grad_norm": 3.8203864097595215, "learning_rate": 9.798117604228054e-06, "loss": 1.2232, "step": 2561 }, { "epoch": 0.6813829787234043, "grad_norm": 3.5479917526245117, "learning_rate": 9.797870136110998e-06, "loss": 1.1571, "step": 2562 }, { "epoch": 0.6816489361702127, "grad_norm": 3.782655715942383, "learning_rate": 9.797622519542193e-06, "loss": 1.3004, "step": 2563 }, { "epoch": 0.6819148936170213, "grad_norm": 3.477875232696533, "learning_rate": 9.797374754529297e-06, "loss": 1.0335, "step": 2564 }, { "epoch": 0.6821808510638298, "grad_norm": 3.8241772651672363, "learning_rate": 9.797126841079979e-06, "loss": 1.4163, "step": 2565 }, { "epoch": 0.6824468085106383, "grad_norm": 3.764817476272583, "learning_rate": 9.796878779201906e-06, "loss": 1.2243, "step": 2566 }, { "epoch": 0.6827127659574468, "grad_norm": 3.784823417663574, "learning_rate": 9.796630568902758e-06, "loss": 1.4082, "step": 2567 }, { "epoch": 0.6829787234042554, "grad_norm": 3.3941454887390137, "learning_rate": 9.796382210190212e-06, "loss": 1.0939, "step": 2568 }, { "epoch": 0.6832446808510638, "grad_norm": 3.484823226928711, "learning_rate": 9.796133703071956e-06, "loss": 1.2322, "step": 2569 }, { "epoch": 0.6835106382978723, "grad_norm": 3.6055960655212402, "learning_rate": 9.795885047555673e-06, "loss": 1.3383, "step": 2570 }, { "epoch": 0.6837765957446809, "grad_norm": 3.7031943798065186, "learning_rate": 9.795636243649061e-06, "loss": 1.2987, "step": 2571 }, { "epoch": 0.6840425531914893, "grad_norm": 3.5490245819091797, "learning_rate": 9.795387291359819e-06, "loss": 1.291, "step": 2572 }, { "epoch": 0.6843085106382979, "grad_norm": 3.611907958984375, "learning_rate": 9.795138190695647e-06, "loss": 1.2693, "step": 2573 }, { "epoch": 0.6845744680851064, "grad_norm": 3.580634832382202, "learning_rate": 9.794888941664253e-06, "loss": 1.3336, "step": 2574 }, { "epoch": 0.6848404255319149, "grad_norm": 3.957103967666626, "learning_rate": 9.794639544273352e-06, "loss": 1.2077, "step": 2575 }, { "epoch": 0.6851063829787234, "grad_norm": 3.5140933990478516, "learning_rate": 9.794389998530659e-06, "loss": 1.2885, "step": 2576 }, { "epoch": 0.685372340425532, "grad_norm": 3.6171066761016846, "learning_rate": 9.794140304443891e-06, "loss": 1.2211, "step": 2577 }, { "epoch": 0.6856382978723404, "grad_norm": 3.641486167907715, "learning_rate": 9.793890462020781e-06, "loss": 1.0571, "step": 2578 }, { "epoch": 0.6859042553191489, "grad_norm": 3.605208396911621, "learning_rate": 9.793640471269055e-06, "loss": 1.1932, "step": 2579 }, { "epoch": 0.6861702127659575, "grad_norm": 3.67253041267395, "learning_rate": 9.793390332196448e-06, "loss": 1.1474, "step": 2580 }, { "epoch": 0.6864361702127659, "grad_norm": 4.190906524658203, "learning_rate": 9.793140044810701e-06, "loss": 1.2488, "step": 2581 }, { "epoch": 0.6867021276595745, "grad_norm": 4.1439104080200195, "learning_rate": 9.792889609119558e-06, "loss": 1.2747, "step": 2582 }, { "epoch": 0.686968085106383, "grad_norm": 3.9002907276153564, "learning_rate": 9.79263902513077e-06, "loss": 1.2291, "step": 2583 }, { "epoch": 0.6872340425531915, "grad_norm": 3.6862435340881348, "learning_rate": 9.792388292852084e-06, "loss": 1.1637, "step": 2584 }, { "epoch": 0.6875, "grad_norm": 3.789638042449951, "learning_rate": 9.792137412291265e-06, "loss": 1.1779, "step": 2585 }, { "epoch": 0.6877659574468085, "grad_norm": 3.5384011268615723, "learning_rate": 9.791886383456071e-06, "loss": 1.2701, "step": 2586 }, { "epoch": 0.688031914893617, "grad_norm": 3.6008050441741943, "learning_rate": 9.79163520635427e-06, "loss": 1.2479, "step": 2587 }, { "epoch": 0.6882978723404255, "grad_norm": 3.71974515914917, "learning_rate": 9.791383880993635e-06, "loss": 1.267, "step": 2588 }, { "epoch": 0.6885638297872341, "grad_norm": 3.5324504375457764, "learning_rate": 9.791132407381942e-06, "loss": 1.2725, "step": 2589 }, { "epoch": 0.6888297872340425, "grad_norm": 3.602149724960327, "learning_rate": 9.790880785526971e-06, "loss": 1.1551, "step": 2590 }, { "epoch": 0.6890957446808511, "grad_norm": 3.761108160018921, "learning_rate": 9.790629015436508e-06, "loss": 1.2654, "step": 2591 }, { "epoch": 0.6893617021276596, "grad_norm": 3.6845576763153076, "learning_rate": 9.790377097118342e-06, "loss": 1.1352, "step": 2592 }, { "epoch": 0.689627659574468, "grad_norm": 3.4206063747406006, "learning_rate": 9.79012503058027e-06, "loss": 1.1649, "step": 2593 }, { "epoch": 0.6898936170212766, "grad_norm": 3.91064190864563, "learning_rate": 9.789872815830089e-06, "loss": 1.2736, "step": 2594 }, { "epoch": 0.6901595744680851, "grad_norm": 3.3683114051818848, "learning_rate": 9.789620452875605e-06, "loss": 1.1734, "step": 2595 }, { "epoch": 0.6904255319148936, "grad_norm": 3.797476053237915, "learning_rate": 9.789367941724623e-06, "loss": 1.239, "step": 2596 }, { "epoch": 0.6906914893617021, "grad_norm": 3.623358964920044, "learning_rate": 9.78911528238496e-06, "loss": 1.2941, "step": 2597 }, { "epoch": 0.6909574468085107, "grad_norm": 4.187454700469971, "learning_rate": 9.78886247486443e-06, "loss": 1.3176, "step": 2598 }, { "epoch": 0.6912234042553191, "grad_norm": 4.131342887878418, "learning_rate": 9.78860951917086e-06, "loss": 1.3183, "step": 2599 }, { "epoch": 0.6914893617021277, "grad_norm": 3.6273796558380127, "learning_rate": 9.78835641531207e-06, "loss": 1.1836, "step": 2600 }, { "epoch": 0.6917553191489362, "grad_norm": 3.8663980960845947, "learning_rate": 9.788103163295897e-06, "loss": 1.4566, "step": 2601 }, { "epoch": 0.6920212765957446, "grad_norm": 3.8288991451263428, "learning_rate": 9.787849763130174e-06, "loss": 1.2238, "step": 2602 }, { "epoch": 0.6922872340425532, "grad_norm": 4.178062438964844, "learning_rate": 9.787596214822743e-06, "loss": 1.399, "step": 2603 }, { "epoch": 0.6925531914893617, "grad_norm": 3.824878215789795, "learning_rate": 9.787342518381447e-06, "loss": 1.2654, "step": 2604 }, { "epoch": 0.6928191489361702, "grad_norm": 3.742422103881836, "learning_rate": 9.787088673814137e-06, "loss": 1.3921, "step": 2605 }, { "epoch": 0.6930851063829787, "grad_norm": 4.080827713012695, "learning_rate": 9.78683468112867e-06, "loss": 1.2525, "step": 2606 }, { "epoch": 0.6933510638297873, "grad_norm": 3.393066883087158, "learning_rate": 9.7865805403329e-06, "loss": 1.0471, "step": 2607 }, { "epoch": 0.6936170212765957, "grad_norm": 3.3034181594848633, "learning_rate": 9.786326251434694e-06, "loss": 1.1627, "step": 2608 }, { "epoch": 0.6938829787234042, "grad_norm": 3.8288989067077637, "learning_rate": 9.786071814441918e-06, "loss": 1.2483, "step": 2609 }, { "epoch": 0.6941489361702128, "grad_norm": 3.4944722652435303, "learning_rate": 9.785817229362445e-06, "loss": 1.2921, "step": 2610 }, { "epoch": 0.6944148936170212, "grad_norm": 3.653322219848633, "learning_rate": 9.785562496204151e-06, "loss": 1.2367, "step": 2611 }, { "epoch": 0.6946808510638298, "grad_norm": 3.3792853355407715, "learning_rate": 9.785307614974922e-06, "loss": 1.1746, "step": 2612 }, { "epoch": 0.6949468085106383, "grad_norm": 3.608031988143921, "learning_rate": 9.78505258568264e-06, "loss": 1.2059, "step": 2613 }, { "epoch": 0.6952127659574469, "grad_norm": 4.2280402183532715, "learning_rate": 9.784797408335195e-06, "loss": 1.294, "step": 2614 }, { "epoch": 0.6954787234042553, "grad_norm": 3.8257791996002197, "learning_rate": 9.784542082940488e-06, "loss": 1.3261, "step": 2615 }, { "epoch": 0.6957446808510638, "grad_norm": 3.9494855403900146, "learning_rate": 9.784286609506415e-06, "loss": 1.3776, "step": 2616 }, { "epoch": 0.6960106382978724, "grad_norm": 3.8635013103485107, "learning_rate": 9.78403098804088e-06, "loss": 1.3371, "step": 2617 }, { "epoch": 0.6962765957446808, "grad_norm": 3.8114707469940186, "learning_rate": 9.783775218551796e-06, "loss": 1.3064, "step": 2618 }, { "epoch": 0.6965425531914894, "grad_norm": 3.8006489276885986, "learning_rate": 9.783519301047072e-06, "loss": 1.3864, "step": 2619 }, { "epoch": 0.6968085106382979, "grad_norm": 3.504070997238159, "learning_rate": 9.783263235534632e-06, "loss": 1.2172, "step": 2620 }, { "epoch": 0.6970744680851064, "grad_norm": 3.741771936416626, "learning_rate": 9.783007022022394e-06, "loss": 1.2375, "step": 2621 }, { "epoch": 0.6973404255319149, "grad_norm": 3.5260889530181885, "learning_rate": 9.782750660518288e-06, "loss": 1.4035, "step": 2622 }, { "epoch": 0.6976063829787233, "grad_norm": 3.832963466644287, "learning_rate": 9.782494151030245e-06, "loss": 1.2979, "step": 2623 }, { "epoch": 0.6978723404255319, "grad_norm": 3.5783939361572266, "learning_rate": 9.782237493566202e-06, "loss": 1.1859, "step": 2624 }, { "epoch": 0.6981382978723404, "grad_norm": 3.677419900894165, "learning_rate": 9.781980688134102e-06, "loss": 1.2306, "step": 2625 }, { "epoch": 0.698404255319149, "grad_norm": 3.812321901321411, "learning_rate": 9.781723734741889e-06, "loss": 1.3585, "step": 2626 }, { "epoch": 0.6986702127659574, "grad_norm": 3.3270645141601562, "learning_rate": 9.781466633397512e-06, "loss": 1.0776, "step": 2627 }, { "epoch": 0.698936170212766, "grad_norm": 3.6559667587280273, "learning_rate": 9.78120938410893e-06, "loss": 1.3296, "step": 2628 }, { "epoch": 0.6992021276595745, "grad_norm": 3.707422971725464, "learning_rate": 9.7809519868841e-06, "loss": 1.2396, "step": 2629 }, { "epoch": 0.699468085106383, "grad_norm": 3.875147581100464, "learning_rate": 9.780694441730987e-06, "loss": 1.4079, "step": 2630 }, { "epoch": 0.6997340425531915, "grad_norm": 4.308002471923828, "learning_rate": 9.780436748657559e-06, "loss": 1.3675, "step": 2631 }, { "epoch": 0.7, "grad_norm": 3.6063718795776367, "learning_rate": 9.780178907671788e-06, "loss": 1.1953, "step": 2632 }, { "epoch": 0.7002659574468085, "grad_norm": 3.582390308380127, "learning_rate": 9.779920918781656e-06, "loss": 1.2841, "step": 2633 }, { "epoch": 0.700531914893617, "grad_norm": 3.8668954372406006, "learning_rate": 9.779662781995144e-06, "loss": 1.3806, "step": 2634 }, { "epoch": 0.7007978723404256, "grad_norm": 3.4479143619537354, "learning_rate": 9.779404497320236e-06, "loss": 1.3201, "step": 2635 }, { "epoch": 0.701063829787234, "grad_norm": 4.041039943695068, "learning_rate": 9.779146064764925e-06, "loss": 1.1912, "step": 2636 }, { "epoch": 0.7013297872340426, "grad_norm": 3.944117307662964, "learning_rate": 9.77888748433721e-06, "loss": 1.1603, "step": 2637 }, { "epoch": 0.7015957446808511, "grad_norm": 4.008464336395264, "learning_rate": 9.77862875604509e-06, "loss": 1.3612, "step": 2638 }, { "epoch": 0.7018617021276595, "grad_norm": 3.5746493339538574, "learning_rate": 9.778369879896568e-06, "loss": 1.3117, "step": 2639 }, { "epoch": 0.7021276595744681, "grad_norm": 4.120686054229736, "learning_rate": 9.778110855899659e-06, "loss": 1.2801, "step": 2640 }, { "epoch": 0.7023936170212766, "grad_norm": 3.7582547664642334, "learning_rate": 9.777851684062371e-06, "loss": 1.291, "step": 2641 }, { "epoch": 0.7026595744680851, "grad_norm": 3.8033053874969482, "learning_rate": 9.77759236439273e-06, "loss": 1.3342, "step": 2642 }, { "epoch": 0.7029255319148936, "grad_norm": 3.712113618850708, "learning_rate": 9.777332896898754e-06, "loss": 1.1921, "step": 2643 }, { "epoch": 0.7031914893617022, "grad_norm": 3.1552655696868896, "learning_rate": 9.777073281588476e-06, "loss": 1.1407, "step": 2644 }, { "epoch": 0.7034574468085106, "grad_norm": 4.050416946411133, "learning_rate": 9.776813518469924e-06, "loss": 1.3787, "step": 2645 }, { "epoch": 0.7037234042553191, "grad_norm": 3.63802170753479, "learning_rate": 9.77655360755114e-06, "loss": 1.3203, "step": 2646 }, { "epoch": 0.7039893617021277, "grad_norm": 4.1890482902526855, "learning_rate": 9.77629354884016e-06, "loss": 1.3532, "step": 2647 }, { "epoch": 0.7042553191489361, "grad_norm": 4.1286444664001465, "learning_rate": 9.776033342345038e-06, "loss": 1.2704, "step": 2648 }, { "epoch": 0.7045212765957447, "grad_norm": 3.4052047729492188, "learning_rate": 9.77577298807382e-06, "loss": 1.2537, "step": 2649 }, { "epoch": 0.7047872340425532, "grad_norm": 4.194342136383057, "learning_rate": 9.775512486034564e-06, "loss": 1.449, "step": 2650 }, { "epoch": 0.7050531914893617, "grad_norm": 3.945206880569458, "learning_rate": 9.775251836235327e-06, "loss": 1.357, "step": 2651 }, { "epoch": 0.7053191489361702, "grad_norm": 3.5744996070861816, "learning_rate": 9.774991038684177e-06, "loss": 1.2701, "step": 2652 }, { "epoch": 0.7055851063829788, "grad_norm": 3.9091970920562744, "learning_rate": 9.774730093389182e-06, "loss": 1.3401, "step": 2653 }, { "epoch": 0.7058510638297872, "grad_norm": 3.7527072429656982, "learning_rate": 9.774469000358418e-06, "loss": 1.2886, "step": 2654 }, { "epoch": 0.7061170212765957, "grad_norm": 3.5021281242370605, "learning_rate": 9.774207759599961e-06, "loss": 1.2253, "step": 2655 }, { "epoch": 0.7063829787234043, "grad_norm": 3.725334405899048, "learning_rate": 9.773946371121894e-06, "loss": 1.3451, "step": 2656 }, { "epoch": 0.7066489361702127, "grad_norm": 3.3787760734558105, "learning_rate": 9.773684834932306e-06, "loss": 1.183, "step": 2657 }, { "epoch": 0.7069148936170213, "grad_norm": 3.956935167312622, "learning_rate": 9.77342315103929e-06, "loss": 1.3828, "step": 2658 }, { "epoch": 0.7071808510638298, "grad_norm": 3.7493388652801514, "learning_rate": 9.77316131945094e-06, "loss": 1.2192, "step": 2659 }, { "epoch": 0.7074468085106383, "grad_norm": 4.022577285766602, "learning_rate": 9.772899340175362e-06, "loss": 1.2509, "step": 2660 }, { "epoch": 0.7077127659574468, "grad_norm": 3.9888761043548584, "learning_rate": 9.772637213220658e-06, "loss": 1.3076, "step": 2661 }, { "epoch": 0.7079787234042553, "grad_norm": 3.502845048904419, "learning_rate": 9.772374938594937e-06, "loss": 1.4205, "step": 2662 }, { "epoch": 0.7082446808510638, "grad_norm": 3.611692190170288, "learning_rate": 9.772112516306318e-06, "loss": 1.2036, "step": 2663 }, { "epoch": 0.7085106382978723, "grad_norm": 3.3075003623962402, "learning_rate": 9.77184994636292e-06, "loss": 1.1399, "step": 2664 }, { "epoch": 0.7087765957446809, "grad_norm": 3.6357240676879883, "learning_rate": 9.771587228772866e-06, "loss": 1.2438, "step": 2665 }, { "epoch": 0.7090425531914893, "grad_norm": 3.798506259918213, "learning_rate": 9.771324363544286e-06, "loss": 1.2793, "step": 2666 }, { "epoch": 0.7093085106382979, "grad_norm": 3.3980555534362793, "learning_rate": 9.771061350685312e-06, "loss": 1.2446, "step": 2667 }, { "epoch": 0.7095744680851064, "grad_norm": 3.5380852222442627, "learning_rate": 9.770798190204083e-06, "loss": 1.1996, "step": 2668 }, { "epoch": 0.7098404255319148, "grad_norm": 3.93696665763855, "learning_rate": 9.77053488210874e-06, "loss": 1.2549, "step": 2669 }, { "epoch": 0.7101063829787234, "grad_norm": 4.042500019073486, "learning_rate": 9.770271426407432e-06, "loss": 1.455, "step": 2670 }, { "epoch": 0.7103723404255319, "grad_norm": 3.6526906490325928, "learning_rate": 9.770007823108309e-06, "loss": 1.3447, "step": 2671 }, { "epoch": 0.7106382978723405, "grad_norm": 3.8958542346954346, "learning_rate": 9.76974407221953e-06, "loss": 1.2542, "step": 2672 }, { "epoch": 0.7109042553191489, "grad_norm": 3.5408430099487305, "learning_rate": 9.769480173749252e-06, "loss": 1.3333, "step": 2673 }, { "epoch": 0.7111702127659575, "grad_norm": 3.586918592453003, "learning_rate": 9.769216127705643e-06, "loss": 1.2469, "step": 2674 }, { "epoch": 0.711436170212766, "grad_norm": 3.6321678161621094, "learning_rate": 9.76895193409687e-06, "loss": 1.3352, "step": 2675 }, { "epoch": 0.7117021276595744, "grad_norm": 3.4352383613586426, "learning_rate": 9.768687592931111e-06, "loss": 1.228, "step": 2676 }, { "epoch": 0.711968085106383, "grad_norm": 3.756770610809326, "learning_rate": 9.768423104216544e-06, "loss": 1.1776, "step": 2677 }, { "epoch": 0.7122340425531914, "grad_norm": 4.270863056182861, "learning_rate": 9.76815846796135e-06, "loss": 1.2372, "step": 2678 }, { "epoch": 0.7125, "grad_norm": 4.0467848777771, "learning_rate": 9.767893684173722e-06, "loss": 1.33, "step": 2679 }, { "epoch": 0.7127659574468085, "grad_norm": 3.9330484867095947, "learning_rate": 9.767628752861848e-06, "loss": 1.2019, "step": 2680 }, { "epoch": 0.7130319148936171, "grad_norm": 4.011680603027344, "learning_rate": 9.767363674033928e-06, "loss": 1.1982, "step": 2681 }, { "epoch": 0.7132978723404255, "grad_norm": 3.5905420780181885, "learning_rate": 9.767098447698163e-06, "loss": 1.2441, "step": 2682 }, { "epoch": 0.7135638297872341, "grad_norm": 3.8876521587371826, "learning_rate": 9.766833073862758e-06, "loss": 1.3112, "step": 2683 }, { "epoch": 0.7138297872340426, "grad_norm": 3.6759207248687744, "learning_rate": 9.766567552535928e-06, "loss": 1.2974, "step": 2684 }, { "epoch": 0.714095744680851, "grad_norm": 3.6160476207733154, "learning_rate": 9.766301883725884e-06, "loss": 1.3107, "step": 2685 }, { "epoch": 0.7143617021276596, "grad_norm": 3.9795331954956055, "learning_rate": 9.766036067440849e-06, "loss": 1.4063, "step": 2686 }, { "epoch": 0.714627659574468, "grad_norm": 3.899998188018799, "learning_rate": 9.765770103689045e-06, "loss": 1.3517, "step": 2687 }, { "epoch": 0.7148936170212766, "grad_norm": 3.501302719116211, "learning_rate": 9.765503992478704e-06, "loss": 1.078, "step": 2688 }, { "epoch": 0.7151595744680851, "grad_norm": 3.4490084648132324, "learning_rate": 9.76523773381806e-06, "loss": 1.2363, "step": 2689 }, { "epoch": 0.7154255319148937, "grad_norm": 3.773393154144287, "learning_rate": 9.76497132771535e-06, "loss": 1.2677, "step": 2690 }, { "epoch": 0.7156914893617021, "grad_norm": 3.2833402156829834, "learning_rate": 9.764704774178816e-06, "loss": 1.2409, "step": 2691 }, { "epoch": 0.7159574468085106, "grad_norm": 3.798407793045044, "learning_rate": 9.764438073216706e-06, "loss": 1.2375, "step": 2692 }, { "epoch": 0.7162234042553192, "grad_norm": 3.383553981781006, "learning_rate": 9.764171224837274e-06, "loss": 1.223, "step": 2693 }, { "epoch": 0.7164893617021276, "grad_norm": 3.781569242477417, "learning_rate": 9.763904229048775e-06, "loss": 1.1822, "step": 2694 }, { "epoch": 0.7167553191489362, "grad_norm": 3.862577438354492, "learning_rate": 9.76363708585947e-06, "loss": 1.2266, "step": 2695 }, { "epoch": 0.7170212765957447, "grad_norm": 3.4044363498687744, "learning_rate": 9.763369795277627e-06, "loss": 1.1887, "step": 2696 }, { "epoch": 0.7172872340425532, "grad_norm": 3.930368185043335, "learning_rate": 9.763102357311511e-06, "loss": 1.2911, "step": 2697 }, { "epoch": 0.7175531914893617, "grad_norm": 3.72084379196167, "learning_rate": 9.762834771969403e-06, "loss": 1.2693, "step": 2698 }, { "epoch": 0.7178191489361702, "grad_norm": 3.3735997676849365, "learning_rate": 9.762567039259577e-06, "loss": 1.2202, "step": 2699 }, { "epoch": 0.7180851063829787, "grad_norm": 3.3215930461883545, "learning_rate": 9.762299159190322e-06, "loss": 1.311, "step": 2700 }, { "epoch": 0.7183510638297872, "grad_norm": 3.2667737007141113, "learning_rate": 9.762031131769923e-06, "loss": 1.1621, "step": 2701 }, { "epoch": 0.7186170212765958, "grad_norm": 3.8327572345733643, "learning_rate": 9.761762957006673e-06, "loss": 1.2764, "step": 2702 }, { "epoch": 0.7188829787234042, "grad_norm": 3.693328857421875, "learning_rate": 9.761494634908872e-06, "loss": 1.168, "step": 2703 }, { "epoch": 0.7191489361702128, "grad_norm": 3.7882509231567383, "learning_rate": 9.761226165484822e-06, "loss": 1.3076, "step": 2704 }, { "epoch": 0.7194148936170213, "grad_norm": 3.366978645324707, "learning_rate": 9.760957548742828e-06, "loss": 1.3628, "step": 2705 }, { "epoch": 0.7196808510638298, "grad_norm": 3.4671497344970703, "learning_rate": 9.7606887846912e-06, "loss": 1.2197, "step": 2706 }, { "epoch": 0.7199468085106383, "grad_norm": 4.486639022827148, "learning_rate": 9.760419873338261e-06, "loss": 1.1786, "step": 2707 }, { "epoch": 0.7202127659574468, "grad_norm": 3.5285980701446533, "learning_rate": 9.760150814692321e-06, "loss": 1.0701, "step": 2708 }, { "epoch": 0.7204787234042553, "grad_norm": 3.4500350952148438, "learning_rate": 9.759881608761714e-06, "loss": 1.1768, "step": 2709 }, { "epoch": 0.7207446808510638, "grad_norm": 3.219653606414795, "learning_rate": 9.759612255554765e-06, "loss": 1.1413, "step": 2710 }, { "epoch": 0.7210106382978724, "grad_norm": 3.7905290126800537, "learning_rate": 9.75934275507981e-06, "loss": 1.3632, "step": 2711 }, { "epoch": 0.7212765957446808, "grad_norm": 3.765892744064331, "learning_rate": 9.759073107345186e-06, "loss": 1.3237, "step": 2712 }, { "epoch": 0.7215425531914894, "grad_norm": 3.8589115142822266, "learning_rate": 9.758803312359236e-06, "loss": 1.3028, "step": 2713 }, { "epoch": 0.7218085106382979, "grad_norm": 3.688624143600464, "learning_rate": 9.758533370130308e-06, "loss": 1.2325, "step": 2714 }, { "epoch": 0.7220744680851063, "grad_norm": 3.397474765777588, "learning_rate": 9.758263280666757e-06, "loss": 1.3173, "step": 2715 }, { "epoch": 0.7223404255319149, "grad_norm": 3.9396157264709473, "learning_rate": 9.757993043976937e-06, "loss": 1.4517, "step": 2716 }, { "epoch": 0.7226063829787234, "grad_norm": 3.5887930393218994, "learning_rate": 9.757722660069211e-06, "loss": 1.1431, "step": 2717 }, { "epoch": 0.722872340425532, "grad_norm": 3.520183563232422, "learning_rate": 9.757452128951945e-06, "loss": 1.3442, "step": 2718 }, { "epoch": 0.7231382978723404, "grad_norm": 3.704939365386963, "learning_rate": 9.757181450633507e-06, "loss": 1.2257, "step": 2719 }, { "epoch": 0.723404255319149, "grad_norm": 4.201409816741943, "learning_rate": 9.756910625122276e-06, "loss": 1.234, "step": 2720 }, { "epoch": 0.7236702127659574, "grad_norm": 3.571162700653076, "learning_rate": 9.756639652426627e-06, "loss": 1.195, "step": 2721 }, { "epoch": 0.7239361702127659, "grad_norm": 3.463414192199707, "learning_rate": 9.75636853255495e-06, "loss": 1.2494, "step": 2722 }, { "epoch": 0.7242021276595745, "grad_norm": 3.4496824741363525, "learning_rate": 9.75609726551563e-06, "loss": 1.1707, "step": 2723 }, { "epoch": 0.7244680851063829, "grad_norm": 3.9885363578796387, "learning_rate": 9.75582585131706e-06, "loss": 1.2613, "step": 2724 }, { "epoch": 0.7247340425531915, "grad_norm": 4.085259437561035, "learning_rate": 9.755554289967638e-06, "loss": 1.2527, "step": 2725 }, { "epoch": 0.725, "grad_norm": 4.417264938354492, "learning_rate": 9.755282581475769e-06, "loss": 1.466, "step": 2726 }, { "epoch": 0.7252659574468086, "grad_norm": 3.954056739807129, "learning_rate": 9.755010725849857e-06, "loss": 1.2379, "step": 2727 }, { "epoch": 0.725531914893617, "grad_norm": 3.838103771209717, "learning_rate": 9.754738723098316e-06, "loss": 1.1999, "step": 2728 }, { "epoch": 0.7257978723404256, "grad_norm": 4.1355695724487305, "learning_rate": 9.75446657322956e-06, "loss": 1.2805, "step": 2729 }, { "epoch": 0.726063829787234, "grad_norm": 4.266016483306885, "learning_rate": 9.75419427625201e-06, "loss": 1.274, "step": 2730 }, { "epoch": 0.7263297872340425, "grad_norm": 3.8930816650390625, "learning_rate": 9.753921832174094e-06, "loss": 1.3094, "step": 2731 }, { "epoch": 0.7265957446808511, "grad_norm": 3.7425036430358887, "learning_rate": 9.753649241004238e-06, "loss": 1.2826, "step": 2732 }, { "epoch": 0.7268617021276595, "grad_norm": 4.708345890045166, "learning_rate": 9.753376502750878e-06, "loss": 1.4243, "step": 2733 }, { "epoch": 0.7271276595744681, "grad_norm": 3.6511597633361816, "learning_rate": 9.753103617422452e-06, "loss": 1.1892, "step": 2734 }, { "epoch": 0.7273936170212766, "grad_norm": 3.807124376296997, "learning_rate": 9.752830585027406e-06, "loss": 1.2767, "step": 2735 }, { "epoch": 0.7276595744680852, "grad_norm": 3.596545457839966, "learning_rate": 9.752557405574184e-06, "loss": 1.1901, "step": 2736 }, { "epoch": 0.7279255319148936, "grad_norm": 3.6757147312164307, "learning_rate": 9.752284079071242e-06, "loss": 1.4032, "step": 2737 }, { "epoch": 0.7281914893617021, "grad_norm": 3.862985372543335, "learning_rate": 9.752010605527033e-06, "loss": 1.1524, "step": 2738 }, { "epoch": 0.7284574468085107, "grad_norm": 3.685128927230835, "learning_rate": 9.751736984950023e-06, "loss": 1.1703, "step": 2739 }, { "epoch": 0.7287234042553191, "grad_norm": 3.4319050312042236, "learning_rate": 9.751463217348675e-06, "loss": 1.1965, "step": 2740 }, { "epoch": 0.7289893617021277, "grad_norm": 3.4726648330688477, "learning_rate": 9.751189302731463e-06, "loss": 1.24, "step": 2741 }, { "epoch": 0.7292553191489362, "grad_norm": 3.4759905338287354, "learning_rate": 9.750915241106857e-06, "loss": 1.1663, "step": 2742 }, { "epoch": 0.7295212765957447, "grad_norm": 3.5179250240325928, "learning_rate": 9.750641032483344e-06, "loss": 1.1964, "step": 2743 }, { "epoch": 0.7297872340425532, "grad_norm": 3.397850751876831, "learning_rate": 9.750366676869401e-06, "loss": 1.159, "step": 2744 }, { "epoch": 0.7300531914893617, "grad_norm": 3.505492687225342, "learning_rate": 9.75009217427352e-06, "loss": 1.4271, "step": 2745 }, { "epoch": 0.7303191489361702, "grad_norm": 3.516559362411499, "learning_rate": 9.749817524704198e-06, "loss": 1.2119, "step": 2746 }, { "epoch": 0.7305851063829787, "grad_norm": 3.5949020385742188, "learning_rate": 9.749542728169925e-06, "loss": 1.1291, "step": 2747 }, { "epoch": 0.7308510638297873, "grad_norm": 3.3480985164642334, "learning_rate": 9.749267784679211e-06, "loss": 1.1421, "step": 2748 }, { "epoch": 0.7311170212765957, "grad_norm": 3.4003922939300537, "learning_rate": 9.74899269424056e-06, "loss": 1.3106, "step": 2749 }, { "epoch": 0.7313829787234043, "grad_norm": 3.5191762447357178, "learning_rate": 9.748717456862484e-06, "loss": 1.1878, "step": 2750 }, { "epoch": 0.7316489361702128, "grad_norm": 3.5664145946502686, "learning_rate": 9.748442072553496e-06, "loss": 1.2272, "step": 2751 }, { "epoch": 0.7319148936170212, "grad_norm": 3.928241491317749, "learning_rate": 9.748166541322124e-06, "loss": 1.2986, "step": 2752 }, { "epoch": 0.7321808510638298, "grad_norm": 3.8403828144073486, "learning_rate": 9.747890863176887e-06, "loss": 1.3132, "step": 2753 }, { "epoch": 0.7324468085106383, "grad_norm": 3.4996137619018555, "learning_rate": 9.747615038126317e-06, "loss": 1.3824, "step": 2754 }, { "epoch": 0.7327127659574468, "grad_norm": 3.5281126499176025, "learning_rate": 9.747339066178947e-06, "loss": 1.3015, "step": 2755 }, { "epoch": 0.7329787234042553, "grad_norm": 3.466567277908325, "learning_rate": 9.747062947343318e-06, "loss": 1.2638, "step": 2756 }, { "epoch": 0.7332446808510639, "grad_norm": 3.8412346839904785, "learning_rate": 9.746786681627971e-06, "loss": 1.1944, "step": 2757 }, { "epoch": 0.7335106382978723, "grad_norm": 3.3403968811035156, "learning_rate": 9.746510269041459e-06, "loss": 1.215, "step": 2758 }, { "epoch": 0.7337765957446809, "grad_norm": 3.735173225402832, "learning_rate": 9.746233709592328e-06, "loss": 1.393, "step": 2759 }, { "epoch": 0.7340425531914894, "grad_norm": 4.095008373260498, "learning_rate": 9.745957003289138e-06, "loss": 1.2848, "step": 2760 }, { "epoch": 0.7343085106382978, "grad_norm": 3.8568758964538574, "learning_rate": 9.745680150140452e-06, "loss": 1.3195, "step": 2761 }, { "epoch": 0.7345744680851064, "grad_norm": 3.512941360473633, "learning_rate": 9.745403150154833e-06, "loss": 1.0682, "step": 2762 }, { "epoch": 0.7348404255319149, "grad_norm": 4.007373332977295, "learning_rate": 9.745126003340854e-06, "loss": 1.2665, "step": 2763 }, { "epoch": 0.7351063829787234, "grad_norm": 3.8637166023254395, "learning_rate": 9.74484870970709e-06, "loss": 1.4367, "step": 2764 }, { "epoch": 0.7353723404255319, "grad_norm": 3.6544454097747803, "learning_rate": 9.744571269262122e-06, "loss": 1.157, "step": 2765 }, { "epoch": 0.7356382978723405, "grad_norm": 3.5814568996429443, "learning_rate": 9.744293682014532e-06, "loss": 1.2989, "step": 2766 }, { "epoch": 0.7359042553191489, "grad_norm": 3.59860897064209, "learning_rate": 9.74401594797291e-06, "loss": 1.1852, "step": 2767 }, { "epoch": 0.7361702127659574, "grad_norm": 3.694519519805908, "learning_rate": 9.743738067145849e-06, "loss": 1.3947, "step": 2768 }, { "epoch": 0.736436170212766, "grad_norm": 3.570734977722168, "learning_rate": 9.743460039541947e-06, "loss": 1.3176, "step": 2769 }, { "epoch": 0.7367021276595744, "grad_norm": 3.448857545852661, "learning_rate": 9.743181865169806e-06, "loss": 1.2162, "step": 2770 }, { "epoch": 0.736968085106383, "grad_norm": 3.7955188751220703, "learning_rate": 9.742903544038033e-06, "loss": 1.2489, "step": 2771 }, { "epoch": 0.7372340425531915, "grad_norm": 3.520260810852051, "learning_rate": 9.742625076155244e-06, "loss": 1.2545, "step": 2772 }, { "epoch": 0.7375, "grad_norm": 3.3301799297332764, "learning_rate": 9.742346461530048e-06, "loss": 1.0909, "step": 2773 }, { "epoch": 0.7377659574468085, "grad_norm": 3.57509708404541, "learning_rate": 9.742067700171069e-06, "loss": 1.2049, "step": 2774 }, { "epoch": 0.738031914893617, "grad_norm": 3.4712679386138916, "learning_rate": 9.741788792086934e-06, "loss": 1.1797, "step": 2775 }, { "epoch": 0.7382978723404255, "grad_norm": 3.4553110599517822, "learning_rate": 9.74150973728627e-06, "loss": 1.1082, "step": 2776 }, { "epoch": 0.738563829787234, "grad_norm": 3.6550087928771973, "learning_rate": 9.741230535777712e-06, "loss": 1.281, "step": 2777 }, { "epoch": 0.7388297872340426, "grad_norm": 3.3699588775634766, "learning_rate": 9.7409511875699e-06, "loss": 1.2331, "step": 2778 }, { "epoch": 0.739095744680851, "grad_norm": 3.393129825592041, "learning_rate": 9.740671692671478e-06, "loss": 1.1614, "step": 2779 }, { "epoch": 0.7393617021276596, "grad_norm": 3.888546943664551, "learning_rate": 9.74039205109109e-06, "loss": 1.3773, "step": 2780 }, { "epoch": 0.7396276595744681, "grad_norm": 3.5572216510772705, "learning_rate": 9.740112262837391e-06, "loss": 1.2269, "step": 2781 }, { "epoch": 0.7398936170212767, "grad_norm": 3.7788665294647217, "learning_rate": 9.73983232791904e-06, "loss": 1.2385, "step": 2782 }, { "epoch": 0.7401595744680851, "grad_norm": 4.092897891998291, "learning_rate": 9.739552246344692e-06, "loss": 1.3396, "step": 2783 }, { "epoch": 0.7404255319148936, "grad_norm": 3.679199457168579, "learning_rate": 9.73927201812302e-06, "loss": 1.2957, "step": 2784 }, { "epoch": 0.7406914893617021, "grad_norm": 3.590893030166626, "learning_rate": 9.738991643262693e-06, "loss": 1.3364, "step": 2785 }, { "epoch": 0.7409574468085106, "grad_norm": 3.5082991123199463, "learning_rate": 9.738711121772384e-06, "loss": 1.1921, "step": 2786 }, { "epoch": 0.7412234042553192, "grad_norm": 3.556530475616455, "learning_rate": 9.738430453660774e-06, "loss": 1.2388, "step": 2787 }, { "epoch": 0.7414893617021276, "grad_norm": 4.152648448944092, "learning_rate": 9.738149638936547e-06, "loss": 1.3962, "step": 2788 }, { "epoch": 0.7417553191489362, "grad_norm": 3.8726470470428467, "learning_rate": 9.73786867760839e-06, "loss": 1.368, "step": 2789 }, { "epoch": 0.7420212765957447, "grad_norm": 3.4200189113616943, "learning_rate": 9.737587569685e-06, "loss": 1.3165, "step": 2790 }, { "epoch": 0.7422872340425531, "grad_norm": 3.8217222690582275, "learning_rate": 9.737306315175072e-06, "loss": 1.07, "step": 2791 }, { "epoch": 0.7425531914893617, "grad_norm": 4.083987236022949, "learning_rate": 9.73702491408731e-06, "loss": 1.2129, "step": 2792 }, { "epoch": 0.7428191489361702, "grad_norm": 3.396623373031616, "learning_rate": 9.73674336643042e-06, "loss": 1.1692, "step": 2793 }, { "epoch": 0.7430851063829788, "grad_norm": 3.545069456100464, "learning_rate": 9.736461672213112e-06, "loss": 1.2257, "step": 2794 }, { "epoch": 0.7433510638297872, "grad_norm": 3.856208324432373, "learning_rate": 9.736179831444103e-06, "loss": 1.4061, "step": 2795 }, { "epoch": 0.7436170212765958, "grad_norm": 3.6652262210845947, "learning_rate": 9.735897844132116e-06, "loss": 1.1792, "step": 2796 }, { "epoch": 0.7438829787234043, "grad_norm": 3.402409791946411, "learning_rate": 9.735615710285873e-06, "loss": 1.1954, "step": 2797 }, { "epoch": 0.7441489361702127, "grad_norm": 4.120236396789551, "learning_rate": 9.735333429914103e-06, "loss": 1.3625, "step": 2798 }, { "epoch": 0.7444148936170213, "grad_norm": 3.873011350631714, "learning_rate": 9.735051003025543e-06, "loss": 1.1915, "step": 2799 }, { "epoch": 0.7446808510638298, "grad_norm": 3.4933876991271973, "learning_rate": 9.73476842962893e-06, "loss": 1.1695, "step": 2800 }, { "epoch": 0.7449468085106383, "grad_norm": 3.8242671489715576, "learning_rate": 9.734485709733007e-06, "loss": 1.2618, "step": 2801 }, { "epoch": 0.7452127659574468, "grad_norm": 3.512907028198242, "learning_rate": 9.734202843346522e-06, "loss": 1.1924, "step": 2802 }, { "epoch": 0.7454787234042554, "grad_norm": 4.221972465515137, "learning_rate": 9.733919830478227e-06, "loss": 1.2335, "step": 2803 }, { "epoch": 0.7457446808510638, "grad_norm": 3.864529609680176, "learning_rate": 9.73363667113688e-06, "loss": 1.3128, "step": 2804 }, { "epoch": 0.7460106382978723, "grad_norm": 4.328346252441406, "learning_rate": 9.73335336533124e-06, "loss": 1.3956, "step": 2805 }, { "epoch": 0.7462765957446809, "grad_norm": 3.605314254760742, "learning_rate": 9.733069913070074e-06, "loss": 1.1795, "step": 2806 }, { "epoch": 0.7465425531914893, "grad_norm": 4.531727313995361, "learning_rate": 9.732786314362154e-06, "loss": 1.3895, "step": 2807 }, { "epoch": 0.7468085106382979, "grad_norm": 3.587550163269043, "learning_rate": 9.732502569216252e-06, "loss": 1.289, "step": 2808 }, { "epoch": 0.7470744680851064, "grad_norm": 3.99782133102417, "learning_rate": 9.73221867764115e-06, "loss": 1.3014, "step": 2809 }, { "epoch": 0.7473404255319149, "grad_norm": 3.9140994548797607, "learning_rate": 9.731934639645628e-06, "loss": 1.2428, "step": 2810 }, { "epoch": 0.7476063829787234, "grad_norm": 3.7804577350616455, "learning_rate": 9.73165045523848e-06, "loss": 1.2315, "step": 2811 }, { "epoch": 0.747872340425532, "grad_norm": 4.103899002075195, "learning_rate": 9.731366124428495e-06, "loss": 1.4515, "step": 2812 }, { "epoch": 0.7481382978723404, "grad_norm": 4.170511245727539, "learning_rate": 9.73108164722447e-06, "loss": 1.3773, "step": 2813 }, { "epoch": 0.7484042553191489, "grad_norm": 3.4937591552734375, "learning_rate": 9.73079702363521e-06, "loss": 1.1113, "step": 2814 }, { "epoch": 0.7486702127659575, "grad_norm": 3.6979286670684814, "learning_rate": 9.730512253669523e-06, "loss": 1.2525, "step": 2815 }, { "epoch": 0.7489361702127659, "grad_norm": 3.6911709308624268, "learning_rate": 9.730227337336214e-06, "loss": 1.2443, "step": 2816 }, { "epoch": 0.7492021276595745, "grad_norm": 3.462308883666992, "learning_rate": 9.729942274644102e-06, "loss": 1.1075, "step": 2817 }, { "epoch": 0.749468085106383, "grad_norm": 4.0079240798950195, "learning_rate": 9.729657065602007e-06, "loss": 1.2715, "step": 2818 }, { "epoch": 0.7497340425531915, "grad_norm": 3.6619253158569336, "learning_rate": 9.729371710218755e-06, "loss": 1.135, "step": 2819 }, { "epoch": 0.75, "grad_norm": 3.3799519538879395, "learning_rate": 9.729086208503174e-06, "loss": 1.2331, "step": 2820 }, { "epoch": 0.7502659574468085, "grad_norm": 3.828418493270874, "learning_rate": 9.728800560464097e-06, "loss": 1.3006, "step": 2821 }, { "epoch": 0.750531914893617, "grad_norm": 4.1295928955078125, "learning_rate": 9.728514766110366e-06, "loss": 1.2404, "step": 2822 }, { "epoch": 0.7507978723404255, "grad_norm": 3.73343825340271, "learning_rate": 9.728228825450818e-06, "loss": 1.3261, "step": 2823 }, { "epoch": 0.7510638297872341, "grad_norm": 3.336246967315674, "learning_rate": 9.727942738494305e-06, "loss": 1.0928, "step": 2824 }, { "epoch": 0.7513297872340425, "grad_norm": 3.4438130855560303, "learning_rate": 9.727656505249676e-06, "loss": 1.2058, "step": 2825 }, { "epoch": 0.7515957446808511, "grad_norm": 3.7546231746673584, "learning_rate": 9.72737012572579e-06, "loss": 1.1447, "step": 2826 }, { "epoch": 0.7518617021276596, "grad_norm": 4.008635520935059, "learning_rate": 9.727083599931506e-06, "loss": 1.3526, "step": 2827 }, { "epoch": 0.752127659574468, "grad_norm": 4.192075729370117, "learning_rate": 9.726796927875688e-06, "loss": 1.3889, "step": 2828 }, { "epoch": 0.7523936170212766, "grad_norm": 3.805386543273926, "learning_rate": 9.726510109567211e-06, "loss": 1.3894, "step": 2829 }, { "epoch": 0.7526595744680851, "grad_norm": 3.9009950160980225, "learning_rate": 9.726223145014946e-06, "loss": 1.2844, "step": 2830 }, { "epoch": 0.7529255319148936, "grad_norm": 3.870450735092163, "learning_rate": 9.725936034227771e-06, "loss": 1.2328, "step": 2831 }, { "epoch": 0.7531914893617021, "grad_norm": 3.5746779441833496, "learning_rate": 9.725648777214571e-06, "loss": 1.2661, "step": 2832 }, { "epoch": 0.7534574468085107, "grad_norm": 4.304332733154297, "learning_rate": 9.725361373984235e-06, "loss": 1.2722, "step": 2833 }, { "epoch": 0.7537234042553191, "grad_norm": 3.693098783493042, "learning_rate": 9.725073824545655e-06, "loss": 1.3476, "step": 2834 }, { "epoch": 0.7539893617021277, "grad_norm": 3.3664565086364746, "learning_rate": 9.724786128907726e-06, "loss": 1.2575, "step": 2835 }, { "epoch": 0.7542553191489362, "grad_norm": 3.585892915725708, "learning_rate": 9.724498287079353e-06, "loss": 1.3478, "step": 2836 }, { "epoch": 0.7545212765957446, "grad_norm": 3.768718957901001, "learning_rate": 9.72421029906944e-06, "loss": 1.2749, "step": 2837 }, { "epoch": 0.7547872340425532, "grad_norm": 3.891233205795288, "learning_rate": 9.723922164886898e-06, "loss": 1.3033, "step": 2838 }, { "epoch": 0.7550531914893617, "grad_norm": 3.5751054286956787, "learning_rate": 9.723633884540643e-06, "loss": 1.1453, "step": 2839 }, { "epoch": 0.7553191489361702, "grad_norm": 3.516754150390625, "learning_rate": 9.723345458039595e-06, "loss": 1.2553, "step": 2840 }, { "epoch": 0.7555851063829787, "grad_norm": 3.76668643951416, "learning_rate": 9.723056885392677e-06, "loss": 1.3444, "step": 2841 }, { "epoch": 0.7558510638297873, "grad_norm": 3.9877772331237793, "learning_rate": 9.722768166608818e-06, "loss": 1.2582, "step": 2842 }, { "epoch": 0.7561170212765957, "grad_norm": 3.631065607070923, "learning_rate": 9.72247930169695e-06, "loss": 1.3652, "step": 2843 }, { "epoch": 0.7563829787234042, "grad_norm": 3.124361515045166, "learning_rate": 9.722190290666014e-06, "loss": 0.9727, "step": 2844 }, { "epoch": 0.7566489361702128, "grad_norm": 3.7869699001312256, "learning_rate": 9.721901133524951e-06, "loss": 1.3348, "step": 2845 }, { "epoch": 0.7569148936170212, "grad_norm": 3.49450421333313, "learning_rate": 9.721611830282707e-06, "loss": 1.2607, "step": 2846 }, { "epoch": 0.7571808510638298, "grad_norm": 4.137457370758057, "learning_rate": 9.721322380948235e-06, "loss": 1.2993, "step": 2847 }, { "epoch": 0.7574468085106383, "grad_norm": 3.492685317993164, "learning_rate": 9.721032785530488e-06, "loss": 1.3636, "step": 2848 }, { "epoch": 0.7577127659574469, "grad_norm": 3.78635835647583, "learning_rate": 9.72074304403843e-06, "loss": 1.3039, "step": 2849 }, { "epoch": 0.7579787234042553, "grad_norm": 3.5052456855773926, "learning_rate": 9.720453156481023e-06, "loss": 1.1737, "step": 2850 }, { "epoch": 0.7582446808510638, "grad_norm": 3.5687224864959717, "learning_rate": 9.72016312286724e-06, "loss": 1.3378, "step": 2851 }, { "epoch": 0.7585106382978724, "grad_norm": 3.2821710109710693, "learning_rate": 9.71987294320605e-06, "loss": 1.0614, "step": 2852 }, { "epoch": 0.7587765957446808, "grad_norm": 3.9896838665008545, "learning_rate": 9.719582617506434e-06, "loss": 1.4842, "step": 2853 }, { "epoch": 0.7590425531914894, "grad_norm": 3.674095392227173, "learning_rate": 9.719292145777377e-06, "loss": 1.2268, "step": 2854 }, { "epoch": 0.7593085106382979, "grad_norm": 3.586404800415039, "learning_rate": 9.719001528027863e-06, "loss": 1.3219, "step": 2855 }, { "epoch": 0.7595744680851064, "grad_norm": 3.734853744506836, "learning_rate": 9.718710764266888e-06, "loss": 1.2469, "step": 2856 }, { "epoch": 0.7598404255319149, "grad_norm": 3.4392611980438232, "learning_rate": 9.718419854503444e-06, "loss": 1.1928, "step": 2857 }, { "epoch": 0.7601063829787233, "grad_norm": 3.7639527320861816, "learning_rate": 9.718128798746537e-06, "loss": 1.2995, "step": 2858 }, { "epoch": 0.7603723404255319, "grad_norm": 3.564790964126587, "learning_rate": 9.717837597005169e-06, "loss": 1.2086, "step": 2859 }, { "epoch": 0.7606382978723404, "grad_norm": 3.9883244037628174, "learning_rate": 9.71754624928835e-06, "loss": 1.2138, "step": 2860 }, { "epoch": 0.760904255319149, "grad_norm": 3.823289632797241, "learning_rate": 9.717254755605097e-06, "loss": 1.2225, "step": 2861 }, { "epoch": 0.7611702127659574, "grad_norm": 3.4945852756500244, "learning_rate": 9.716963115964427e-06, "loss": 1.26, "step": 2862 }, { "epoch": 0.761436170212766, "grad_norm": 3.7626545429229736, "learning_rate": 9.716671330375366e-06, "loss": 1.2424, "step": 2863 }, { "epoch": 0.7617021276595745, "grad_norm": 3.789428949356079, "learning_rate": 9.71637939884694e-06, "loss": 1.3538, "step": 2864 }, { "epoch": 0.761968085106383, "grad_norm": 3.781531810760498, "learning_rate": 9.716087321388184e-06, "loss": 1.2693, "step": 2865 }, { "epoch": 0.7622340425531915, "grad_norm": 3.184601306915283, "learning_rate": 9.715795098008132e-06, "loss": 1.0477, "step": 2866 }, { "epoch": 0.7625, "grad_norm": 3.636810302734375, "learning_rate": 9.715502728715827e-06, "loss": 1.2691, "step": 2867 }, { "epoch": 0.7627659574468085, "grad_norm": 4.0694122314453125, "learning_rate": 9.715210213520317e-06, "loss": 1.3419, "step": 2868 }, { "epoch": 0.763031914893617, "grad_norm": 3.9551241397857666, "learning_rate": 9.714917552430652e-06, "loss": 1.2398, "step": 2869 }, { "epoch": 0.7632978723404256, "grad_norm": 3.7696473598480225, "learning_rate": 9.714624745455885e-06, "loss": 1.2691, "step": 2870 }, { "epoch": 0.763563829787234, "grad_norm": 3.726793050765991, "learning_rate": 9.71433179260508e-06, "loss": 1.2308, "step": 2871 }, { "epoch": 0.7638297872340426, "grad_norm": 3.6226067543029785, "learning_rate": 9.714038693887298e-06, "loss": 1.3653, "step": 2872 }, { "epoch": 0.7640957446808511, "grad_norm": 3.4948949813842773, "learning_rate": 9.713745449311606e-06, "loss": 1.2048, "step": 2873 }, { "epoch": 0.7643617021276595, "grad_norm": 3.3849282264709473, "learning_rate": 9.713452058887084e-06, "loss": 1.1664, "step": 2874 }, { "epoch": 0.7646276595744681, "grad_norm": 3.9506824016571045, "learning_rate": 9.713158522622804e-06, "loss": 1.4175, "step": 2875 }, { "epoch": 0.7648936170212766, "grad_norm": 3.5069642066955566, "learning_rate": 9.71286484052785e-06, "loss": 1.2298, "step": 2876 }, { "epoch": 0.7651595744680851, "grad_norm": 3.5655500888824463, "learning_rate": 9.71257101261131e-06, "loss": 1.1717, "step": 2877 }, { "epoch": 0.7654255319148936, "grad_norm": 3.450375556945801, "learning_rate": 9.712277038882274e-06, "loss": 1.1573, "step": 2878 }, { "epoch": 0.7656914893617022, "grad_norm": 3.849936008453369, "learning_rate": 9.711982919349839e-06, "loss": 1.1671, "step": 2879 }, { "epoch": 0.7659574468085106, "grad_norm": 3.557499647140503, "learning_rate": 9.711688654023105e-06, "loss": 1.2369, "step": 2880 }, { "epoch": 0.7662234042553191, "grad_norm": 4.1276326179504395, "learning_rate": 9.711394242911177e-06, "loss": 1.2304, "step": 2881 }, { "epoch": 0.7664893617021277, "grad_norm": 3.553694725036621, "learning_rate": 9.711099686023161e-06, "loss": 1.285, "step": 2882 }, { "epoch": 0.7667553191489361, "grad_norm": 3.484138250350952, "learning_rate": 9.710804983368177e-06, "loss": 1.2578, "step": 2883 }, { "epoch": 0.7670212765957447, "grad_norm": 3.855220317840576, "learning_rate": 9.71051013495534e-06, "loss": 1.2213, "step": 2884 }, { "epoch": 0.7672872340425532, "grad_norm": 3.9998855590820312, "learning_rate": 9.710215140793774e-06, "loss": 1.231, "step": 2885 }, { "epoch": 0.7675531914893617, "grad_norm": 3.568758487701416, "learning_rate": 9.709920000892605e-06, "loss": 1.1779, "step": 2886 }, { "epoch": 0.7678191489361702, "grad_norm": 3.5209362506866455, "learning_rate": 9.709624715260965e-06, "loss": 1.0908, "step": 2887 }, { "epoch": 0.7680851063829788, "grad_norm": 3.783108949661255, "learning_rate": 9.709329283907993e-06, "loss": 1.3374, "step": 2888 }, { "epoch": 0.7683510638297872, "grad_norm": 3.672305107116699, "learning_rate": 9.70903370684283e-06, "loss": 1.2719, "step": 2889 }, { "epoch": 0.7686170212765957, "grad_norm": 3.9783568382263184, "learning_rate": 9.708737984074616e-06, "loss": 1.2343, "step": 2890 }, { "epoch": 0.7688829787234043, "grad_norm": 3.6471900939941406, "learning_rate": 9.708442115612508e-06, "loss": 1.1384, "step": 2891 }, { "epoch": 0.7691489361702127, "grad_norm": 3.8330166339874268, "learning_rate": 9.708146101465657e-06, "loss": 1.3178, "step": 2892 }, { "epoch": 0.7694148936170213, "grad_norm": 3.224055290222168, "learning_rate": 9.707849941643222e-06, "loss": 1.087, "step": 2893 }, { "epoch": 0.7696808510638298, "grad_norm": 4.061996936798096, "learning_rate": 9.707553636154366e-06, "loss": 1.4389, "step": 2894 }, { "epoch": 0.7699468085106383, "grad_norm": 3.7000250816345215, "learning_rate": 9.707257185008259e-06, "loss": 1.2383, "step": 2895 }, { "epoch": 0.7702127659574468, "grad_norm": 3.3188624382019043, "learning_rate": 9.706960588214072e-06, "loss": 1.1835, "step": 2896 }, { "epoch": 0.7704787234042553, "grad_norm": 3.68198299407959, "learning_rate": 9.706663845780984e-06, "loss": 1.2511, "step": 2897 }, { "epoch": 0.7707446808510638, "grad_norm": 3.831139326095581, "learning_rate": 9.706366957718174e-06, "loss": 1.3409, "step": 2898 }, { "epoch": 0.7710106382978723, "grad_norm": 3.3753414154052734, "learning_rate": 9.70606992403483e-06, "loss": 1.1988, "step": 2899 }, { "epoch": 0.7712765957446809, "grad_norm": 3.3466532230377197, "learning_rate": 9.705772744740142e-06, "loss": 1.1079, "step": 2900 }, { "epoch": 0.7715425531914893, "grad_norm": 3.39589524269104, "learning_rate": 9.705475419843304e-06, "loss": 1.2094, "step": 2901 }, { "epoch": 0.7718085106382979, "grad_norm": 3.5272488594055176, "learning_rate": 9.705177949353516e-06, "loss": 1.2466, "step": 2902 }, { "epoch": 0.7720744680851064, "grad_norm": 3.9202656745910645, "learning_rate": 9.704880333279985e-06, "loss": 1.2347, "step": 2903 }, { "epoch": 0.7723404255319148, "grad_norm": 3.421706199645996, "learning_rate": 9.704582571631915e-06, "loss": 1.1643, "step": 2904 }, { "epoch": 0.7726063829787234, "grad_norm": 3.8939504623413086, "learning_rate": 9.704284664418521e-06, "loss": 1.4996, "step": 2905 }, { "epoch": 0.7728723404255319, "grad_norm": 3.362236976623535, "learning_rate": 9.703986611649024e-06, "loss": 1.2661, "step": 2906 }, { "epoch": 0.7731382978723405, "grad_norm": 3.2896718978881836, "learning_rate": 9.70368841333264e-06, "loss": 1.0865, "step": 2907 }, { "epoch": 0.7734042553191489, "grad_norm": 3.662534475326538, "learning_rate": 9.7033900694786e-06, "loss": 1.223, "step": 2908 }, { "epoch": 0.7736702127659575, "grad_norm": 3.7135627269744873, "learning_rate": 9.703091580096132e-06, "loss": 1.4123, "step": 2909 }, { "epoch": 0.773936170212766, "grad_norm": 3.431130886077881, "learning_rate": 9.702792945194475e-06, "loss": 1.139, "step": 2910 }, { "epoch": 0.7742021276595744, "grad_norm": 4.038398742675781, "learning_rate": 9.702494164782866e-06, "loss": 1.3352, "step": 2911 }, { "epoch": 0.774468085106383, "grad_norm": 3.5457537174224854, "learning_rate": 9.702195238870552e-06, "loss": 1.2472, "step": 2912 }, { "epoch": 0.7747340425531914, "grad_norm": 3.9684653282165527, "learning_rate": 9.70189616746678e-06, "loss": 1.2834, "step": 2913 }, { "epoch": 0.775, "grad_norm": 3.520798683166504, "learning_rate": 9.701596950580807e-06, "loss": 1.1989, "step": 2914 }, { "epoch": 0.7752659574468085, "grad_norm": 3.4203343391418457, "learning_rate": 9.701297588221888e-06, "loss": 1.2368, "step": 2915 }, { "epoch": 0.7755319148936171, "grad_norm": 3.5501503944396973, "learning_rate": 9.700998080399287e-06, "loss": 1.2317, "step": 2916 }, { "epoch": 0.7757978723404255, "grad_norm": 3.5603249073028564, "learning_rate": 9.700698427122269e-06, "loss": 1.2071, "step": 2917 }, { "epoch": 0.7760638297872341, "grad_norm": 3.5951790809631348, "learning_rate": 9.700398628400109e-06, "loss": 1.1681, "step": 2918 }, { "epoch": 0.7763297872340426, "grad_norm": 3.6561312675476074, "learning_rate": 9.700098684242082e-06, "loss": 1.3097, "step": 2919 }, { "epoch": 0.776595744680851, "grad_norm": 3.628885269165039, "learning_rate": 9.699798594657464e-06, "loss": 1.2199, "step": 2920 }, { "epoch": 0.7768617021276596, "grad_norm": 3.6864166259765625, "learning_rate": 9.699498359655548e-06, "loss": 1.2123, "step": 2921 }, { "epoch": 0.777127659574468, "grad_norm": 4.034405708312988, "learning_rate": 9.699197979245617e-06, "loss": 1.3019, "step": 2922 }, { "epoch": 0.7773936170212766, "grad_norm": 3.9352498054504395, "learning_rate": 9.69889745343697e-06, "loss": 1.4196, "step": 2923 }, { "epoch": 0.7776595744680851, "grad_norm": 3.983980894088745, "learning_rate": 9.698596782238904e-06, "loss": 1.1829, "step": 2924 }, { "epoch": 0.7779255319148937, "grad_norm": 3.4715261459350586, "learning_rate": 9.698295965660721e-06, "loss": 1.144, "step": 2925 }, { "epoch": 0.7781914893617021, "grad_norm": 3.7768967151641846, "learning_rate": 9.69799500371173e-06, "loss": 1.2891, "step": 2926 }, { "epoch": 0.7784574468085106, "grad_norm": 3.628307580947876, "learning_rate": 9.697693896401239e-06, "loss": 1.2956, "step": 2927 }, { "epoch": 0.7787234042553192, "grad_norm": 3.601635456085205, "learning_rate": 9.697392643738571e-06, "loss": 1.2924, "step": 2928 }, { "epoch": 0.7789893617021276, "grad_norm": 3.6882519721984863, "learning_rate": 9.697091245733043e-06, "loss": 1.2887, "step": 2929 }, { "epoch": 0.7792553191489362, "grad_norm": 3.7858314514160156, "learning_rate": 9.696789702393982e-06, "loss": 1.3439, "step": 2930 }, { "epoch": 0.7795212765957447, "grad_norm": 3.6974260807037354, "learning_rate": 9.696488013730717e-06, "loss": 1.2487, "step": 2931 }, { "epoch": 0.7797872340425532, "grad_norm": 3.5106611251831055, "learning_rate": 9.696186179752587e-06, "loss": 1.1533, "step": 2932 }, { "epoch": 0.7800531914893617, "grad_norm": 3.440690279006958, "learning_rate": 9.695884200468923e-06, "loss": 1.1004, "step": 2933 }, { "epoch": 0.7803191489361702, "grad_norm": 3.43935227394104, "learning_rate": 9.695582075889077e-06, "loss": 1.192, "step": 2934 }, { "epoch": 0.7805851063829787, "grad_norm": 3.6551554203033447, "learning_rate": 9.695279806022391e-06, "loss": 1.2693, "step": 2935 }, { "epoch": 0.7808510638297872, "grad_norm": 3.6879799365997314, "learning_rate": 9.694977390878219e-06, "loss": 1.3101, "step": 2936 }, { "epoch": 0.7811170212765958, "grad_norm": 3.6642568111419678, "learning_rate": 9.69467483046592e-06, "loss": 1.3313, "step": 2937 }, { "epoch": 0.7813829787234042, "grad_norm": 3.6739001274108887, "learning_rate": 9.694372124794855e-06, "loss": 1.175, "step": 2938 }, { "epoch": 0.7816489361702128, "grad_norm": 3.346895933151245, "learning_rate": 9.69406927387439e-06, "loss": 1.135, "step": 2939 }, { "epoch": 0.7819148936170213, "grad_norm": 3.605050563812256, "learning_rate": 9.693766277713893e-06, "loss": 1.2365, "step": 2940 }, { "epoch": 0.7821808510638298, "grad_norm": 3.56868839263916, "learning_rate": 9.693463136322743e-06, "loss": 1.2756, "step": 2941 }, { "epoch": 0.7824468085106383, "grad_norm": 3.4643678665161133, "learning_rate": 9.693159849710317e-06, "loss": 1.1344, "step": 2942 }, { "epoch": 0.7827127659574468, "grad_norm": 3.7843425273895264, "learning_rate": 9.692856417885998e-06, "loss": 1.2301, "step": 2943 }, { "epoch": 0.7829787234042553, "grad_norm": 3.7226831912994385, "learning_rate": 9.69255284085918e-06, "loss": 1.2124, "step": 2944 }, { "epoch": 0.7832446808510638, "grad_norm": 3.5860259532928467, "learning_rate": 9.69224911863925e-06, "loss": 1.2237, "step": 2945 }, { "epoch": 0.7835106382978724, "grad_norm": 3.68369722366333, "learning_rate": 9.691945251235608e-06, "loss": 1.3566, "step": 2946 }, { "epoch": 0.7837765957446808, "grad_norm": 3.778324842453003, "learning_rate": 9.691641238657655e-06, "loss": 1.2369, "step": 2947 }, { "epoch": 0.7840425531914894, "grad_norm": 3.4326350688934326, "learning_rate": 9.6913370809148e-06, "loss": 1.0766, "step": 2948 }, { "epoch": 0.7843085106382979, "grad_norm": 3.609269380569458, "learning_rate": 9.691032778016452e-06, "loss": 1.228, "step": 2949 }, { "epoch": 0.7845744680851063, "grad_norm": 3.3350110054016113, "learning_rate": 9.690728329972025e-06, "loss": 1.1658, "step": 2950 }, { "epoch": 0.7848404255319149, "grad_norm": 3.53971004486084, "learning_rate": 9.690423736790944e-06, "loss": 1.2674, "step": 2951 }, { "epoch": 0.7851063829787234, "grad_norm": 3.3145904541015625, "learning_rate": 9.690118998482628e-06, "loss": 1.2601, "step": 2952 }, { "epoch": 0.785372340425532, "grad_norm": 3.7415387630462646, "learning_rate": 9.689814115056509e-06, "loss": 1.3693, "step": 2953 }, { "epoch": 0.7856382978723404, "grad_norm": 3.2443130016326904, "learning_rate": 9.689509086522019e-06, "loss": 1.1516, "step": 2954 }, { "epoch": 0.785904255319149, "grad_norm": 3.4239816665649414, "learning_rate": 9.689203912888597e-06, "loss": 1.2722, "step": 2955 }, { "epoch": 0.7861702127659574, "grad_norm": 3.5822324752807617, "learning_rate": 9.688898594165685e-06, "loss": 1.2253, "step": 2956 }, { "epoch": 0.7864361702127659, "grad_norm": 3.2302675247192383, "learning_rate": 9.688593130362731e-06, "loss": 1.1031, "step": 2957 }, { "epoch": 0.7867021276595745, "grad_norm": 3.6517271995544434, "learning_rate": 9.688287521489184e-06, "loss": 1.2459, "step": 2958 }, { "epoch": 0.7869680851063829, "grad_norm": 3.772766351699829, "learning_rate": 9.687981767554502e-06, "loss": 1.2623, "step": 2959 }, { "epoch": 0.7872340425531915, "grad_norm": 3.646852731704712, "learning_rate": 9.687675868568145e-06, "loss": 1.2951, "step": 2960 }, { "epoch": 0.7875, "grad_norm": 3.738582134246826, "learning_rate": 9.687369824539577e-06, "loss": 1.3321, "step": 2961 }, { "epoch": 0.7877659574468086, "grad_norm": 3.6618778705596924, "learning_rate": 9.687063635478269e-06, "loss": 1.3527, "step": 2962 }, { "epoch": 0.788031914893617, "grad_norm": 3.6133735179901123, "learning_rate": 9.686757301393693e-06, "loss": 1.2852, "step": 2963 }, { "epoch": 0.7882978723404256, "grad_norm": 3.7590041160583496, "learning_rate": 9.686450822295327e-06, "loss": 1.2057, "step": 2964 }, { "epoch": 0.788563829787234, "grad_norm": 3.4455080032348633, "learning_rate": 9.686144198192658e-06, "loss": 1.2478, "step": 2965 }, { "epoch": 0.7888297872340425, "grad_norm": 3.4166572093963623, "learning_rate": 9.685837429095169e-06, "loss": 1.2585, "step": 2966 }, { "epoch": 0.7890957446808511, "grad_norm": 3.322124719619751, "learning_rate": 9.685530515012352e-06, "loss": 1.2452, "step": 2967 }, { "epoch": 0.7893617021276595, "grad_norm": 3.493075132369995, "learning_rate": 9.685223455953703e-06, "loss": 1.1951, "step": 2968 }, { "epoch": 0.7896276595744681, "grad_norm": 3.7366654872894287, "learning_rate": 9.684916251928727e-06, "loss": 1.4098, "step": 2969 }, { "epoch": 0.7898936170212766, "grad_norm": 3.846484899520874, "learning_rate": 9.684608902946926e-06, "loss": 1.2726, "step": 2970 }, { "epoch": 0.7901595744680852, "grad_norm": 3.382856607437134, "learning_rate": 9.684301409017808e-06, "loss": 1.2072, "step": 2971 }, { "epoch": 0.7904255319148936, "grad_norm": 3.600064277648926, "learning_rate": 9.68399377015089e-06, "loss": 1.2991, "step": 2972 }, { "epoch": 0.7906914893617021, "grad_norm": 3.4890823364257812, "learning_rate": 9.683685986355692e-06, "loss": 1.303, "step": 2973 }, { "epoch": 0.7909574468085107, "grad_norm": 3.2720248699188232, "learning_rate": 9.683378057641735e-06, "loss": 1.305, "step": 2974 }, { "epoch": 0.7912234042553191, "grad_norm": 3.3121964931488037, "learning_rate": 9.683069984018545e-06, "loss": 1.228, "step": 2975 }, { "epoch": 0.7914893617021277, "grad_norm": 3.5907375812530518, "learning_rate": 9.682761765495657e-06, "loss": 1.3374, "step": 2976 }, { "epoch": 0.7917553191489362, "grad_norm": 3.518444538116455, "learning_rate": 9.682453402082607e-06, "loss": 1.0759, "step": 2977 }, { "epoch": 0.7920212765957447, "grad_norm": 3.7533528804779053, "learning_rate": 9.682144893788934e-06, "loss": 1.2666, "step": 2978 }, { "epoch": 0.7922872340425532, "grad_norm": 3.877476453781128, "learning_rate": 9.681836240624187e-06, "loss": 1.2371, "step": 2979 }, { "epoch": 0.7925531914893617, "grad_norm": 3.945760488510132, "learning_rate": 9.681527442597916e-06, "loss": 1.282, "step": 2980 }, { "epoch": 0.7928191489361702, "grad_norm": 3.585514783859253, "learning_rate": 9.681218499719673e-06, "loss": 1.3038, "step": 2981 }, { "epoch": 0.7930851063829787, "grad_norm": 4.198021411895752, "learning_rate": 9.680909411999018e-06, "loss": 1.4758, "step": 2982 }, { "epoch": 0.7933510638297873, "grad_norm": 3.670048713684082, "learning_rate": 9.680600179445514e-06, "loss": 1.2579, "step": 2983 }, { "epoch": 0.7936170212765957, "grad_norm": 3.6147031784057617, "learning_rate": 9.68029080206873e-06, "loss": 1.2565, "step": 2984 }, { "epoch": 0.7938829787234043, "grad_norm": 3.589110851287842, "learning_rate": 9.67998127987824e-06, "loss": 1.2516, "step": 2985 }, { "epoch": 0.7941489361702128, "grad_norm": 3.5315637588500977, "learning_rate": 9.679671612883615e-06, "loss": 1.2206, "step": 2986 }, { "epoch": 0.7944148936170212, "grad_norm": 3.6465420722961426, "learning_rate": 9.679361801094445e-06, "loss": 1.2784, "step": 2987 }, { "epoch": 0.7946808510638298, "grad_norm": 3.6671435832977295, "learning_rate": 9.679051844520308e-06, "loss": 1.4118, "step": 2988 }, { "epoch": 0.7949468085106383, "grad_norm": 3.479151725769043, "learning_rate": 9.6787417431708e-06, "loss": 1.303, "step": 2989 }, { "epoch": 0.7952127659574468, "grad_norm": 3.694517135620117, "learning_rate": 9.678431497055515e-06, "loss": 1.1658, "step": 2990 }, { "epoch": 0.7954787234042553, "grad_norm": 3.453770637512207, "learning_rate": 9.67812110618405e-06, "loss": 1.2784, "step": 2991 }, { "epoch": 0.7957446808510639, "grad_norm": 3.926161527633667, "learning_rate": 9.677810570566011e-06, "loss": 1.2926, "step": 2992 }, { "epoch": 0.7960106382978723, "grad_norm": 3.6100566387176514, "learning_rate": 9.677499890211005e-06, "loss": 1.2504, "step": 2993 }, { "epoch": 0.7962765957446809, "grad_norm": 3.496819019317627, "learning_rate": 9.677189065128646e-06, "loss": 1.1922, "step": 2994 }, { "epoch": 0.7965425531914894, "grad_norm": 3.4073357582092285, "learning_rate": 9.676878095328547e-06, "loss": 1.1934, "step": 2995 }, { "epoch": 0.7968085106382978, "grad_norm": 3.5559115409851074, "learning_rate": 9.676566980820338e-06, "loss": 1.3128, "step": 2996 }, { "epoch": 0.7970744680851064, "grad_norm": 3.844743013381958, "learning_rate": 9.676255721613639e-06, "loss": 1.2881, "step": 2997 }, { "epoch": 0.7973404255319149, "grad_norm": 3.2858474254608154, "learning_rate": 9.675944317718083e-06, "loss": 1.2103, "step": 2998 }, { "epoch": 0.7976063829787234, "grad_norm": 3.7412915229797363, "learning_rate": 9.675632769143303e-06, "loss": 1.2254, "step": 2999 }, { "epoch": 0.7978723404255319, "grad_norm": 4.140746116638184, "learning_rate": 9.67532107589894e-06, "loss": 1.2933, "step": 3000 }, { "epoch": 0.7978723404255319, "eval_loss": 1.2683638334274292, "eval_runtime": 12.6307, "eval_samples_per_second": 31.669, "eval_steps_per_second": 3.959, "step": 3000 }, { "epoch": 0.7981382978723405, "grad_norm": 3.8456828594207764, "learning_rate": 9.67500923799464e-06, "loss": 1.3237, "step": 3001 }, { "epoch": 0.7984042553191489, "grad_norm": 3.4592676162719727, "learning_rate": 9.67469725544005e-06, "loss": 1.0598, "step": 3002 }, { "epoch": 0.7986702127659574, "grad_norm": 3.729926586151123, "learning_rate": 9.674385128244823e-06, "loss": 1.2681, "step": 3003 }, { "epoch": 0.798936170212766, "grad_norm": 3.4208433628082275, "learning_rate": 9.674072856418616e-06, "loss": 1.3245, "step": 3004 }, { "epoch": 0.7992021276595744, "grad_norm": 3.511957883834839, "learning_rate": 9.673760439971091e-06, "loss": 1.1623, "step": 3005 }, { "epoch": 0.799468085106383, "grad_norm": 3.794137477874756, "learning_rate": 9.673447878911916e-06, "loss": 1.1303, "step": 3006 }, { "epoch": 0.7997340425531915, "grad_norm": 3.826404571533203, "learning_rate": 9.673135173250763e-06, "loss": 1.3698, "step": 3007 }, { "epoch": 0.8, "grad_norm": 3.5505003929138184, "learning_rate": 9.672822322997305e-06, "loss": 1.257, "step": 3008 }, { "epoch": 0.8002659574468085, "grad_norm": 3.616678237915039, "learning_rate": 9.672509328161222e-06, "loss": 1.263, "step": 3009 }, { "epoch": 0.800531914893617, "grad_norm": 3.5338237285614014, "learning_rate": 9.672196188752201e-06, "loss": 1.2328, "step": 3010 }, { "epoch": 0.8007978723404255, "grad_norm": 3.4037692546844482, "learning_rate": 9.671882904779927e-06, "loss": 1.1843, "step": 3011 }, { "epoch": 0.801063829787234, "grad_norm": 3.918245315551758, "learning_rate": 9.671569476254096e-06, "loss": 1.3486, "step": 3012 }, { "epoch": 0.8013297872340426, "grad_norm": 3.5351336002349854, "learning_rate": 9.671255903184405e-06, "loss": 1.3272, "step": 3013 }, { "epoch": 0.801595744680851, "grad_norm": 3.9071462154388428, "learning_rate": 9.670942185580557e-06, "loss": 1.1649, "step": 3014 }, { "epoch": 0.8018617021276596, "grad_norm": 3.493410110473633, "learning_rate": 9.670628323452259e-06, "loss": 1.1651, "step": 3015 }, { "epoch": 0.8021276595744681, "grad_norm": 3.2986040115356445, "learning_rate": 9.670314316809222e-06, "loss": 1.2718, "step": 3016 }, { "epoch": 0.8023936170212767, "grad_norm": 3.4360411167144775, "learning_rate": 9.67000016566116e-06, "loss": 1.1393, "step": 3017 }, { "epoch": 0.8026595744680851, "grad_norm": 3.690444231033325, "learning_rate": 9.669685870017795e-06, "loss": 1.1887, "step": 3018 }, { "epoch": 0.8029255319148936, "grad_norm": 3.58248233795166, "learning_rate": 9.669371429888852e-06, "loss": 1.3714, "step": 3019 }, { "epoch": 0.8031914893617021, "grad_norm": 3.723407745361328, "learning_rate": 9.66905684528406e-06, "loss": 1.2999, "step": 3020 }, { "epoch": 0.8034574468085106, "grad_norm": 3.7996089458465576, "learning_rate": 9.66874211621315e-06, "loss": 1.3091, "step": 3021 }, { "epoch": 0.8037234042553192, "grad_norm": 3.741523265838623, "learning_rate": 9.668427242685864e-06, "loss": 1.261, "step": 3022 }, { "epoch": 0.8039893617021276, "grad_norm": 3.6952426433563232, "learning_rate": 9.668112224711941e-06, "loss": 1.3148, "step": 3023 }, { "epoch": 0.8042553191489362, "grad_norm": 3.728320837020874, "learning_rate": 9.667797062301133e-06, "loss": 1.2188, "step": 3024 }, { "epoch": 0.8045212765957447, "grad_norm": 3.7836687564849854, "learning_rate": 9.667481755463183e-06, "loss": 1.3981, "step": 3025 }, { "epoch": 0.8047872340425531, "grad_norm": 3.308515787124634, "learning_rate": 9.667166304207856e-06, "loss": 1.2107, "step": 3026 }, { "epoch": 0.8050531914893617, "grad_norm": 3.5682644844055176, "learning_rate": 9.666850708544907e-06, "loss": 1.2288, "step": 3027 }, { "epoch": 0.8053191489361702, "grad_norm": 3.817530632019043, "learning_rate": 9.666534968484105e-06, "loss": 1.2821, "step": 3028 }, { "epoch": 0.8055851063829788, "grad_norm": 3.1704676151275635, "learning_rate": 9.666219084035215e-06, "loss": 1.1683, "step": 3029 }, { "epoch": 0.8058510638297872, "grad_norm": 3.884427547454834, "learning_rate": 9.665903055208013e-06, "loss": 1.3448, "step": 3030 }, { "epoch": 0.8061170212765958, "grad_norm": 3.8523178100585938, "learning_rate": 9.665586882012278e-06, "loss": 1.1827, "step": 3031 }, { "epoch": 0.8063829787234043, "grad_norm": 3.217390298843384, "learning_rate": 9.66527056445779e-06, "loss": 1.1782, "step": 3032 }, { "epoch": 0.8066489361702127, "grad_norm": 3.484069585800171, "learning_rate": 9.66495410255434e-06, "loss": 1.2279, "step": 3033 }, { "epoch": 0.8069148936170213, "grad_norm": 3.62542724609375, "learning_rate": 9.664637496311717e-06, "loss": 1.232, "step": 3034 }, { "epoch": 0.8071808510638298, "grad_norm": 3.6373066902160645, "learning_rate": 9.664320745739717e-06, "loss": 1.2463, "step": 3035 }, { "epoch": 0.8074468085106383, "grad_norm": 3.3646364212036133, "learning_rate": 9.664003850848142e-06, "loss": 1.1543, "step": 3036 }, { "epoch": 0.8077127659574468, "grad_norm": 3.772383689880371, "learning_rate": 9.663686811646798e-06, "loss": 1.3646, "step": 3037 }, { "epoch": 0.8079787234042554, "grad_norm": 3.8896496295928955, "learning_rate": 9.663369628145493e-06, "loss": 1.2321, "step": 3038 }, { "epoch": 0.8082446808510638, "grad_norm": 4.038544654846191, "learning_rate": 9.66305230035404e-06, "loss": 1.2345, "step": 3039 }, { "epoch": 0.8085106382978723, "grad_norm": 3.7592129707336426, "learning_rate": 9.662734828282258e-06, "loss": 1.2879, "step": 3040 }, { "epoch": 0.8087765957446809, "grad_norm": 3.3927769660949707, "learning_rate": 9.662417211939974e-06, "loss": 1.2495, "step": 3041 }, { "epoch": 0.8090425531914893, "grad_norm": 3.7398223876953125, "learning_rate": 9.662099451337009e-06, "loss": 1.2328, "step": 3042 }, { "epoch": 0.8093085106382979, "grad_norm": 3.697510004043579, "learning_rate": 9.6617815464832e-06, "loss": 1.2306, "step": 3043 }, { "epoch": 0.8095744680851064, "grad_norm": 3.362252712249756, "learning_rate": 9.66146349738838e-06, "loss": 1.2598, "step": 3044 }, { "epoch": 0.8098404255319149, "grad_norm": 3.629018783569336, "learning_rate": 9.661145304062391e-06, "loss": 1.2364, "step": 3045 }, { "epoch": 0.8101063829787234, "grad_norm": 3.6889262199401855, "learning_rate": 9.66082696651508e-06, "loss": 1.2122, "step": 3046 }, { "epoch": 0.810372340425532, "grad_norm": 3.6210176944732666, "learning_rate": 9.660508484756295e-06, "loss": 1.2425, "step": 3047 }, { "epoch": 0.8106382978723404, "grad_norm": 3.52443528175354, "learning_rate": 9.66018985879589e-06, "loss": 1.1755, "step": 3048 }, { "epoch": 0.8109042553191489, "grad_norm": 3.6943182945251465, "learning_rate": 9.659871088643724e-06, "loss": 1.2033, "step": 3049 }, { "epoch": 0.8111702127659575, "grad_norm": 3.6708784103393555, "learning_rate": 9.65955217430966e-06, "loss": 1.2418, "step": 3050 }, { "epoch": 0.8114361702127659, "grad_norm": 3.3263115882873535, "learning_rate": 9.659233115803565e-06, "loss": 1.133, "step": 3051 }, { "epoch": 0.8117021276595745, "grad_norm": 3.9797048568725586, "learning_rate": 9.658913913135314e-06, "loss": 1.2549, "step": 3052 }, { "epoch": 0.811968085106383, "grad_norm": 3.505920648574829, "learning_rate": 9.658594566314781e-06, "loss": 1.3769, "step": 3053 }, { "epoch": 0.8122340425531915, "grad_norm": 3.466444492340088, "learning_rate": 9.658275075351846e-06, "loss": 1.2394, "step": 3054 }, { "epoch": 0.8125, "grad_norm": 3.4919936656951904, "learning_rate": 9.657955440256396e-06, "loss": 1.1807, "step": 3055 }, { "epoch": 0.8127659574468085, "grad_norm": 3.8641278743743896, "learning_rate": 9.65763566103832e-06, "loss": 1.2532, "step": 3056 }, { "epoch": 0.813031914893617, "grad_norm": 3.5937435626983643, "learning_rate": 9.657315737707514e-06, "loss": 1.2234, "step": 3057 }, { "epoch": 0.8132978723404255, "grad_norm": 3.8876571655273438, "learning_rate": 9.656995670273877e-06, "loss": 1.2057, "step": 3058 }, { "epoch": 0.8135638297872341, "grad_norm": 3.532804012298584, "learning_rate": 9.656675458747308e-06, "loss": 1.2109, "step": 3059 }, { "epoch": 0.8138297872340425, "grad_norm": 3.421060800552368, "learning_rate": 9.65635510313772e-06, "loss": 1.2677, "step": 3060 }, { "epoch": 0.8140957446808511, "grad_norm": 3.599653720855713, "learning_rate": 9.656034603455022e-06, "loss": 1.2561, "step": 3061 }, { "epoch": 0.8143617021276596, "grad_norm": 3.297154664993286, "learning_rate": 9.655713959709133e-06, "loss": 1.1693, "step": 3062 }, { "epoch": 0.814627659574468, "grad_norm": 3.678478240966797, "learning_rate": 9.65539317190997e-06, "loss": 1.2403, "step": 3063 }, { "epoch": 0.8148936170212766, "grad_norm": 3.6876394748687744, "learning_rate": 9.655072240067464e-06, "loss": 1.2774, "step": 3064 }, { "epoch": 0.8151595744680851, "grad_norm": 3.6876394748687744, "learning_rate": 9.65475116419154e-06, "loss": 1.1866, "step": 3065 }, { "epoch": 0.8154255319148936, "grad_norm": 4.459439277648926, "learning_rate": 9.654429944292136e-06, "loss": 1.255, "step": 3066 }, { "epoch": 0.8156914893617021, "grad_norm": 3.636715888977051, "learning_rate": 9.65410858037919e-06, "loss": 1.4368, "step": 3067 }, { "epoch": 0.8159574468085107, "grad_norm": 3.7368946075439453, "learning_rate": 9.653787072462644e-06, "loss": 1.3039, "step": 3068 }, { "epoch": 0.8162234042553191, "grad_norm": 3.32794451713562, "learning_rate": 9.653465420552445e-06, "loss": 1.1366, "step": 3069 }, { "epoch": 0.8164893617021277, "grad_norm": 3.3161087036132812, "learning_rate": 9.65314362465855e-06, "loss": 1.0602, "step": 3070 }, { "epoch": 0.8167553191489362, "grad_norm": 3.6150729656219482, "learning_rate": 9.652821684790912e-06, "loss": 1.3939, "step": 3071 }, { "epoch": 0.8170212765957446, "grad_norm": 3.7740049362182617, "learning_rate": 9.652499600959493e-06, "loss": 1.3626, "step": 3072 }, { "epoch": 0.8172872340425532, "grad_norm": 3.8331871032714844, "learning_rate": 9.65217737317426e-06, "loss": 1.3151, "step": 3073 }, { "epoch": 0.8175531914893617, "grad_norm": 3.3269927501678467, "learning_rate": 9.65185500144518e-06, "loss": 1.1879, "step": 3074 }, { "epoch": 0.8178191489361702, "grad_norm": 3.318422555923462, "learning_rate": 9.651532485782231e-06, "loss": 1.2128, "step": 3075 }, { "epoch": 0.8180851063829787, "grad_norm": 3.8798575401306152, "learning_rate": 9.65120982619539e-06, "loss": 1.2097, "step": 3076 }, { "epoch": 0.8183510638297873, "grad_norm": 3.538886785507202, "learning_rate": 9.650887022694639e-06, "loss": 1.2558, "step": 3077 }, { "epoch": 0.8186170212765957, "grad_norm": 3.8403117656707764, "learning_rate": 9.65056407528997e-06, "loss": 1.4618, "step": 3078 }, { "epoch": 0.8188829787234042, "grad_norm": 3.731025218963623, "learning_rate": 9.650240983991372e-06, "loss": 1.2627, "step": 3079 }, { "epoch": 0.8191489361702128, "grad_norm": 3.7986326217651367, "learning_rate": 9.649917748808844e-06, "loss": 1.2213, "step": 3080 }, { "epoch": 0.8194148936170212, "grad_norm": 3.556394577026367, "learning_rate": 9.649594369752384e-06, "loss": 1.2093, "step": 3081 }, { "epoch": 0.8196808510638298, "grad_norm": 3.989525318145752, "learning_rate": 9.649270846832001e-06, "loss": 1.4164, "step": 3082 }, { "epoch": 0.8199468085106383, "grad_norm": 3.6029410362243652, "learning_rate": 9.648947180057705e-06, "loss": 1.315, "step": 3083 }, { "epoch": 0.8202127659574469, "grad_norm": 3.677532196044922, "learning_rate": 9.648623369439509e-06, "loss": 1.3006, "step": 3084 }, { "epoch": 0.8204787234042553, "grad_norm": 3.241009473800659, "learning_rate": 9.648299414987434e-06, "loss": 1.1637, "step": 3085 }, { "epoch": 0.8207446808510638, "grad_norm": 3.470125198364258, "learning_rate": 9.647975316711502e-06, "loss": 1.1894, "step": 3086 }, { "epoch": 0.8210106382978724, "grad_norm": 3.6613218784332275, "learning_rate": 9.647651074621741e-06, "loss": 1.2222, "step": 3087 }, { "epoch": 0.8212765957446808, "grad_norm": 3.4483370780944824, "learning_rate": 9.647326688728184e-06, "loss": 1.1142, "step": 3088 }, { "epoch": 0.8215425531914894, "grad_norm": 3.830843687057495, "learning_rate": 9.647002159040868e-06, "loss": 1.2923, "step": 3089 }, { "epoch": 0.8218085106382979, "grad_norm": 3.445209264755249, "learning_rate": 9.646677485569834e-06, "loss": 1.2042, "step": 3090 }, { "epoch": 0.8220744680851064, "grad_norm": 3.818505048751831, "learning_rate": 9.646352668325128e-06, "loss": 1.3102, "step": 3091 }, { "epoch": 0.8223404255319149, "grad_norm": 3.4437718391418457, "learning_rate": 9.646027707316798e-06, "loss": 1.1836, "step": 3092 }, { "epoch": 0.8226063829787233, "grad_norm": 3.690908670425415, "learning_rate": 9.645702602554902e-06, "loss": 1.1375, "step": 3093 }, { "epoch": 0.8228723404255319, "grad_norm": 4.1998209953308105, "learning_rate": 9.645377354049499e-06, "loss": 1.3336, "step": 3094 }, { "epoch": 0.8231382978723404, "grad_norm": 3.559067487716675, "learning_rate": 9.64505196181065e-06, "loss": 1.1967, "step": 3095 }, { "epoch": 0.823404255319149, "grad_norm": 3.657874584197998, "learning_rate": 9.644726425848425e-06, "loss": 1.2603, "step": 3096 }, { "epoch": 0.8236702127659574, "grad_norm": 3.2679355144500732, "learning_rate": 9.644400746172896e-06, "loss": 1.177, "step": 3097 }, { "epoch": 0.823936170212766, "grad_norm": 3.9587206840515137, "learning_rate": 9.644074922794139e-06, "loss": 1.2768, "step": 3098 }, { "epoch": 0.8242021276595745, "grad_norm": 3.2773869037628174, "learning_rate": 9.643748955722238e-06, "loss": 1.2397, "step": 3099 }, { "epoch": 0.824468085106383, "grad_norm": 3.796388864517212, "learning_rate": 9.643422844967274e-06, "loss": 1.3281, "step": 3100 }, { "epoch": 0.8247340425531915, "grad_norm": 3.6081080436706543, "learning_rate": 9.643096590539343e-06, "loss": 1.1514, "step": 3101 }, { "epoch": 0.825, "grad_norm": 3.6461782455444336, "learning_rate": 9.642770192448537e-06, "loss": 1.3713, "step": 3102 }, { "epoch": 0.8252659574468085, "grad_norm": 3.731442451477051, "learning_rate": 9.642443650704954e-06, "loss": 1.3621, "step": 3103 }, { "epoch": 0.825531914893617, "grad_norm": 3.8544721603393555, "learning_rate": 9.642116965318697e-06, "loss": 1.2699, "step": 3104 }, { "epoch": 0.8257978723404256, "grad_norm": 3.6057963371276855, "learning_rate": 9.641790136299877e-06, "loss": 1.1425, "step": 3105 }, { "epoch": 0.826063829787234, "grad_norm": 3.618706226348877, "learning_rate": 9.641463163658606e-06, "loss": 1.309, "step": 3106 }, { "epoch": 0.8263297872340426, "grad_norm": 3.2677018642425537, "learning_rate": 9.641136047405e-06, "loss": 1.221, "step": 3107 }, { "epoch": 0.8265957446808511, "grad_norm": 3.311882734298706, "learning_rate": 9.64080878754918e-06, "loss": 1.2231, "step": 3108 }, { "epoch": 0.8268617021276595, "grad_norm": 3.435105562210083, "learning_rate": 9.640481384101273e-06, "loss": 1.3697, "step": 3109 }, { "epoch": 0.8271276595744681, "grad_norm": 3.77473783493042, "learning_rate": 9.640153837071407e-06, "loss": 1.4063, "step": 3110 }, { "epoch": 0.8273936170212766, "grad_norm": 3.6035094261169434, "learning_rate": 9.63982614646972e-06, "loss": 1.3273, "step": 3111 }, { "epoch": 0.8276595744680851, "grad_norm": 3.4138381481170654, "learning_rate": 9.639498312306348e-06, "loss": 1.1646, "step": 3112 }, { "epoch": 0.8279255319148936, "grad_norm": 3.638125419616699, "learning_rate": 9.639170334591437e-06, "loss": 1.3288, "step": 3113 }, { "epoch": 0.8281914893617022, "grad_norm": 3.917206287384033, "learning_rate": 9.638842213335132e-06, "loss": 1.3541, "step": 3114 }, { "epoch": 0.8284574468085106, "grad_norm": 4.120351314544678, "learning_rate": 9.63851394854759e-06, "loss": 1.3473, "step": 3115 }, { "epoch": 0.8287234042553191, "grad_norm": 3.6400179862976074, "learning_rate": 9.638185540238963e-06, "loss": 1.3199, "step": 3116 }, { "epoch": 0.8289893617021277, "grad_norm": 3.4678385257720947, "learning_rate": 9.637856988419413e-06, "loss": 1.3348, "step": 3117 }, { "epoch": 0.8292553191489361, "grad_norm": 3.490227460861206, "learning_rate": 9.637528293099111e-06, "loss": 1.2041, "step": 3118 }, { "epoch": 0.8295212765957447, "grad_norm": 3.3085920810699463, "learning_rate": 9.637199454288222e-06, "loss": 1.2509, "step": 3119 }, { "epoch": 0.8297872340425532, "grad_norm": 3.5364296436309814, "learning_rate": 9.636870471996923e-06, "loss": 1.3302, "step": 3120 }, { "epoch": 0.8300531914893617, "grad_norm": 3.952470302581787, "learning_rate": 9.636541346235392e-06, "loss": 1.3387, "step": 3121 }, { "epoch": 0.8303191489361702, "grad_norm": 3.678920269012451, "learning_rate": 9.636212077013812e-06, "loss": 1.2225, "step": 3122 }, { "epoch": 0.8305851063829788, "grad_norm": 3.4960269927978516, "learning_rate": 9.635882664342373e-06, "loss": 1.1883, "step": 3123 }, { "epoch": 0.8308510638297872, "grad_norm": 3.1453335285186768, "learning_rate": 9.635553108231266e-06, "loss": 1.0471, "step": 3124 }, { "epoch": 0.8311170212765957, "grad_norm": 3.6323747634887695, "learning_rate": 9.635223408690688e-06, "loss": 1.1595, "step": 3125 }, { "epoch": 0.8313829787234043, "grad_norm": 3.2408368587493896, "learning_rate": 9.634893565730841e-06, "loss": 1.2454, "step": 3126 }, { "epoch": 0.8316489361702127, "grad_norm": 3.628117322921753, "learning_rate": 9.63456357936193e-06, "loss": 1.3161, "step": 3127 }, { "epoch": 0.8319148936170213, "grad_norm": 3.896415948867798, "learning_rate": 9.634233449594165e-06, "loss": 1.29, "step": 3128 }, { "epoch": 0.8321808510638298, "grad_norm": 3.3425135612487793, "learning_rate": 9.63390317643776e-06, "loss": 1.0845, "step": 3129 }, { "epoch": 0.8324468085106383, "grad_norm": 3.593471050262451, "learning_rate": 9.633572759902936e-06, "loss": 1.1751, "step": 3130 }, { "epoch": 0.8327127659574468, "grad_norm": 3.8105530738830566, "learning_rate": 9.633242199999916e-06, "loss": 1.2935, "step": 3131 }, { "epoch": 0.8329787234042553, "grad_norm": 3.5633177757263184, "learning_rate": 9.632911496738927e-06, "loss": 1.2376, "step": 3132 }, { "epoch": 0.8332446808510638, "grad_norm": 3.5305428504943848, "learning_rate": 9.632580650130201e-06, "loss": 1.2905, "step": 3133 }, { "epoch": 0.8335106382978723, "grad_norm": 3.328059196472168, "learning_rate": 9.632249660183977e-06, "loss": 1.2773, "step": 3134 }, { "epoch": 0.8337765957446809, "grad_norm": 3.8208043575286865, "learning_rate": 9.631918526910493e-06, "loss": 1.2472, "step": 3135 }, { "epoch": 0.8340425531914893, "grad_norm": 3.6366043090820312, "learning_rate": 9.631587250319998e-06, "loss": 1.1361, "step": 3136 }, { "epoch": 0.8343085106382979, "grad_norm": 3.3834152221679688, "learning_rate": 9.631255830422739e-06, "loss": 1.2766, "step": 3137 }, { "epoch": 0.8345744680851064, "grad_norm": 3.6326873302459717, "learning_rate": 9.630924267228973e-06, "loss": 1.2792, "step": 3138 }, { "epoch": 0.8348404255319148, "grad_norm": 3.720566749572754, "learning_rate": 9.630592560748957e-06, "loss": 1.113, "step": 3139 }, { "epoch": 0.8351063829787234, "grad_norm": 3.732006549835205, "learning_rate": 9.630260710992956e-06, "loss": 1.1235, "step": 3140 }, { "epoch": 0.8353723404255319, "grad_norm": 3.3565263748168945, "learning_rate": 9.629928717971237e-06, "loss": 1.1881, "step": 3141 }, { "epoch": 0.8356382978723405, "grad_norm": 3.7368946075439453, "learning_rate": 9.629596581694072e-06, "loss": 1.2955, "step": 3142 }, { "epoch": 0.8359042553191489, "grad_norm": 3.77895188331604, "learning_rate": 9.629264302171739e-06, "loss": 1.2691, "step": 3143 }, { "epoch": 0.8361702127659575, "grad_norm": 3.6195473670959473, "learning_rate": 9.628931879414519e-06, "loss": 1.125, "step": 3144 }, { "epoch": 0.836436170212766, "grad_norm": 3.4380621910095215, "learning_rate": 9.628599313432694e-06, "loss": 1.2379, "step": 3145 }, { "epoch": 0.8367021276595744, "grad_norm": 3.972651958465576, "learning_rate": 9.628266604236558e-06, "loss": 1.2316, "step": 3146 }, { "epoch": 0.836968085106383, "grad_norm": 3.770378351211548, "learning_rate": 9.627933751836405e-06, "loss": 1.4091, "step": 3147 }, { "epoch": 0.8372340425531914, "grad_norm": 3.359567165374756, "learning_rate": 9.627600756242532e-06, "loss": 1.076, "step": 3148 }, { "epoch": 0.8375, "grad_norm": 3.5449929237365723, "learning_rate": 9.627267617465243e-06, "loss": 1.1785, "step": 3149 }, { "epoch": 0.8377659574468085, "grad_norm": 3.8262412548065186, "learning_rate": 9.626934335514847e-06, "loss": 1.1613, "step": 3150 }, { "epoch": 0.8380319148936171, "grad_norm": 3.5842607021331787, "learning_rate": 9.626600910401656e-06, "loss": 1.4153, "step": 3151 }, { "epoch": 0.8382978723404255, "grad_norm": 3.2474827766418457, "learning_rate": 9.626267342135983e-06, "loss": 1.1652, "step": 3152 }, { "epoch": 0.8385638297872341, "grad_norm": 3.3414809703826904, "learning_rate": 9.625933630728153e-06, "loss": 1.062, "step": 3153 }, { "epoch": 0.8388297872340426, "grad_norm": 3.496842384338379, "learning_rate": 9.62559977618849e-06, "loss": 1.255, "step": 3154 }, { "epoch": 0.839095744680851, "grad_norm": 3.2567241191864014, "learning_rate": 9.625265778527325e-06, "loss": 1.1378, "step": 3155 }, { "epoch": 0.8393617021276596, "grad_norm": 3.720892906188965, "learning_rate": 9.62493163775499e-06, "loss": 1.4717, "step": 3156 }, { "epoch": 0.839627659574468, "grad_norm": 3.342963695526123, "learning_rate": 9.624597353881827e-06, "loss": 1.2974, "step": 3157 }, { "epoch": 0.8398936170212766, "grad_norm": 3.3030459880828857, "learning_rate": 9.624262926918174e-06, "loss": 1.1823, "step": 3158 }, { "epoch": 0.8401595744680851, "grad_norm": 3.4827306270599365, "learning_rate": 9.623928356874384e-06, "loss": 1.2282, "step": 3159 }, { "epoch": 0.8404255319148937, "grad_norm": 3.247631311416626, "learning_rate": 9.623593643760805e-06, "loss": 1.2173, "step": 3160 }, { "epoch": 0.8406914893617021, "grad_norm": 3.571974515914917, "learning_rate": 9.623258787587795e-06, "loss": 1.2277, "step": 3161 }, { "epoch": 0.8409574468085106, "grad_norm": 3.5363829135894775, "learning_rate": 9.622923788365716e-06, "loss": 1.2212, "step": 3162 }, { "epoch": 0.8412234042553192, "grad_norm": 3.816324234008789, "learning_rate": 9.622588646104934e-06, "loss": 1.3759, "step": 3163 }, { "epoch": 0.8414893617021276, "grad_norm": 3.8033061027526855, "learning_rate": 9.622253360815814e-06, "loss": 1.1493, "step": 3164 }, { "epoch": 0.8417553191489362, "grad_norm": 3.7425754070281982, "learning_rate": 9.621917932508733e-06, "loss": 1.1964, "step": 3165 }, { "epoch": 0.8420212765957447, "grad_norm": 3.4991588592529297, "learning_rate": 9.62158236119407e-06, "loss": 1.2337, "step": 3166 }, { "epoch": 0.8422872340425532, "grad_norm": 3.450436592102051, "learning_rate": 9.621246646882209e-06, "loss": 1.1413, "step": 3167 }, { "epoch": 0.8425531914893617, "grad_norm": 3.449032783508301, "learning_rate": 9.620910789583534e-06, "loss": 1.269, "step": 3168 }, { "epoch": 0.8428191489361702, "grad_norm": 3.609985589981079, "learning_rate": 9.62057478930844e-06, "loss": 1.2008, "step": 3169 }, { "epoch": 0.8430851063829787, "grad_norm": 3.5072379112243652, "learning_rate": 9.620238646067322e-06, "loss": 1.2176, "step": 3170 }, { "epoch": 0.8433510638297872, "grad_norm": 3.481480836868286, "learning_rate": 9.619902359870579e-06, "loss": 1.2152, "step": 3171 }, { "epoch": 0.8436170212765958, "grad_norm": 3.640972852706909, "learning_rate": 9.619565930728618e-06, "loss": 1.4143, "step": 3172 }, { "epoch": 0.8438829787234042, "grad_norm": 3.5323524475097656, "learning_rate": 9.61922935865185e-06, "loss": 1.1856, "step": 3173 }, { "epoch": 0.8441489361702128, "grad_norm": 3.837163209915161, "learning_rate": 9.618892643650686e-06, "loss": 1.243, "step": 3174 }, { "epoch": 0.8444148936170213, "grad_norm": 3.702387809753418, "learning_rate": 9.618555785735546e-06, "loss": 1.1177, "step": 3175 }, { "epoch": 0.8446808510638298, "grad_norm": 3.696453094482422, "learning_rate": 9.618218784916851e-06, "loss": 1.2794, "step": 3176 }, { "epoch": 0.8449468085106383, "grad_norm": 3.467315435409546, "learning_rate": 9.617881641205032e-06, "loss": 1.1261, "step": 3177 }, { "epoch": 0.8452127659574468, "grad_norm": 3.392866849899292, "learning_rate": 9.617544354610516e-06, "loss": 1.3169, "step": 3178 }, { "epoch": 0.8454787234042553, "grad_norm": 3.4695167541503906, "learning_rate": 9.617206925143742e-06, "loss": 1.3706, "step": 3179 }, { "epoch": 0.8457446808510638, "grad_norm": 3.658966064453125, "learning_rate": 9.61686935281515e-06, "loss": 1.289, "step": 3180 }, { "epoch": 0.8460106382978724, "grad_norm": 3.779771327972412, "learning_rate": 9.616531637635183e-06, "loss": 1.2999, "step": 3181 }, { "epoch": 0.8462765957446808, "grad_norm": 3.8787152767181396, "learning_rate": 9.616193779614294e-06, "loss": 1.2876, "step": 3182 }, { "epoch": 0.8465425531914894, "grad_norm": 3.5529751777648926, "learning_rate": 9.615855778762933e-06, "loss": 1.2511, "step": 3183 }, { "epoch": 0.8468085106382979, "grad_norm": 4.681981563568115, "learning_rate": 9.61551763509156e-06, "loss": 1.3139, "step": 3184 }, { "epoch": 0.8470744680851063, "grad_norm": 3.130150556564331, "learning_rate": 9.615179348610638e-06, "loss": 1.1744, "step": 3185 }, { "epoch": 0.8473404255319149, "grad_norm": 3.374901056289673, "learning_rate": 9.614840919330632e-06, "loss": 1.0669, "step": 3186 }, { "epoch": 0.8476063829787234, "grad_norm": 3.805163621902466, "learning_rate": 9.614502347262015e-06, "loss": 1.3958, "step": 3187 }, { "epoch": 0.847872340425532, "grad_norm": 3.173311948776245, "learning_rate": 9.614163632415265e-06, "loss": 1.2402, "step": 3188 }, { "epoch": 0.8481382978723404, "grad_norm": 3.7105321884155273, "learning_rate": 9.613824774800857e-06, "loss": 1.2364, "step": 3189 }, { "epoch": 0.848404255319149, "grad_norm": 3.5191519260406494, "learning_rate": 9.613485774429279e-06, "loss": 1.3238, "step": 3190 }, { "epoch": 0.8486702127659574, "grad_norm": 3.2969210147857666, "learning_rate": 9.613146631311018e-06, "loss": 1.2284, "step": 3191 }, { "epoch": 0.8489361702127659, "grad_norm": 3.6637449264526367, "learning_rate": 9.612807345456571e-06, "loss": 1.1128, "step": 3192 }, { "epoch": 0.8492021276595745, "grad_norm": 3.9408974647521973, "learning_rate": 9.612467916876434e-06, "loss": 1.171, "step": 3193 }, { "epoch": 0.8494680851063829, "grad_norm": 3.3598899841308594, "learning_rate": 9.612128345581108e-06, "loss": 1.1941, "step": 3194 }, { "epoch": 0.8497340425531915, "grad_norm": 3.5474600791931152, "learning_rate": 9.6117886315811e-06, "loss": 1.1679, "step": 3195 }, { "epoch": 0.85, "grad_norm": 3.9404945373535156, "learning_rate": 9.611448774886925e-06, "loss": 1.3117, "step": 3196 }, { "epoch": 0.8502659574468086, "grad_norm": 3.389488935470581, "learning_rate": 9.611108775509093e-06, "loss": 1.1708, "step": 3197 }, { "epoch": 0.850531914893617, "grad_norm": 3.5706136226654053, "learning_rate": 9.610768633458127e-06, "loss": 1.249, "step": 3198 }, { "epoch": 0.8507978723404256, "grad_norm": 3.899035930633545, "learning_rate": 9.610428348744552e-06, "loss": 1.2828, "step": 3199 }, { "epoch": 0.851063829787234, "grad_norm": 3.648972511291504, "learning_rate": 9.610087921378895e-06, "loss": 1.2152, "step": 3200 }, { "epoch": 0.8513297872340425, "grad_norm": 3.762350559234619, "learning_rate": 9.60974735137169e-06, "loss": 1.3663, "step": 3201 }, { "epoch": 0.8515957446808511, "grad_norm": 3.8155291080474854, "learning_rate": 9.609406638733474e-06, "loss": 1.1777, "step": 3202 }, { "epoch": 0.8518617021276595, "grad_norm": 3.5268514156341553, "learning_rate": 9.609065783474792e-06, "loss": 1.2634, "step": 3203 }, { "epoch": 0.8521276595744681, "grad_norm": 3.3057730197906494, "learning_rate": 9.608724785606186e-06, "loss": 1.2208, "step": 3204 }, { "epoch": 0.8523936170212766, "grad_norm": 3.9648935794830322, "learning_rate": 9.60838364513821e-06, "loss": 1.2936, "step": 3205 }, { "epoch": 0.8526595744680852, "grad_norm": 3.8742856979370117, "learning_rate": 9.608042362081418e-06, "loss": 1.298, "step": 3206 }, { "epoch": 0.8529255319148936, "grad_norm": 3.845383644104004, "learning_rate": 9.60770093644637e-06, "loss": 1.2274, "step": 3207 }, { "epoch": 0.8531914893617021, "grad_norm": 3.532756805419922, "learning_rate": 9.60735936824363e-06, "loss": 1.339, "step": 3208 }, { "epoch": 0.8534574468085107, "grad_norm": 3.7821319103240967, "learning_rate": 9.607017657483768e-06, "loss": 1.3414, "step": 3209 }, { "epoch": 0.8537234042553191, "grad_norm": 3.5962960720062256, "learning_rate": 9.606675804177355e-06, "loss": 1.1815, "step": 3210 }, { "epoch": 0.8539893617021277, "grad_norm": 3.8669700622558594, "learning_rate": 9.606333808334966e-06, "loss": 1.2821, "step": 3211 }, { "epoch": 0.8542553191489362, "grad_norm": 3.288717269897461, "learning_rate": 9.605991669967189e-06, "loss": 1.1532, "step": 3212 }, { "epoch": 0.8545212765957447, "grad_norm": 3.445049285888672, "learning_rate": 9.605649389084605e-06, "loss": 1.2534, "step": 3213 }, { "epoch": 0.8547872340425532, "grad_norm": 3.075615644454956, "learning_rate": 9.605306965697809e-06, "loss": 1.0243, "step": 3214 }, { "epoch": 0.8550531914893617, "grad_norm": 3.6676225662231445, "learning_rate": 9.604964399817392e-06, "loss": 1.2927, "step": 3215 }, { "epoch": 0.8553191489361702, "grad_norm": 3.4644627571105957, "learning_rate": 9.604621691453954e-06, "loss": 1.2167, "step": 3216 }, { "epoch": 0.8555851063829787, "grad_norm": 3.3108158111572266, "learning_rate": 9.6042788406181e-06, "loss": 1.2437, "step": 3217 }, { "epoch": 0.8558510638297873, "grad_norm": 3.634568929672241, "learning_rate": 9.603935847320437e-06, "loss": 1.2587, "step": 3218 }, { "epoch": 0.8561170212765957, "grad_norm": 3.472355365753174, "learning_rate": 9.603592711571581e-06, "loss": 1.1544, "step": 3219 }, { "epoch": 0.8563829787234043, "grad_norm": 3.7467241287231445, "learning_rate": 9.603249433382145e-06, "loss": 1.1884, "step": 3220 }, { "epoch": 0.8566489361702128, "grad_norm": 4.016312599182129, "learning_rate": 9.60290601276275e-06, "loss": 1.2884, "step": 3221 }, { "epoch": 0.8569148936170212, "grad_norm": 3.432687282562256, "learning_rate": 9.602562449724027e-06, "loss": 1.2495, "step": 3222 }, { "epoch": 0.8571808510638298, "grad_norm": 3.466148614883423, "learning_rate": 9.6022187442766e-06, "loss": 1.0967, "step": 3223 }, { "epoch": 0.8574468085106383, "grad_norm": 3.7120723724365234, "learning_rate": 9.60187489643111e-06, "loss": 1.1666, "step": 3224 }, { "epoch": 0.8577127659574468, "grad_norm": 3.6994261741638184, "learning_rate": 9.60153090619819e-06, "loss": 1.3106, "step": 3225 }, { "epoch": 0.8579787234042553, "grad_norm": 3.481760025024414, "learning_rate": 9.601186773588486e-06, "loss": 1.2581, "step": 3226 }, { "epoch": 0.8582446808510639, "grad_norm": 3.5702121257781982, "learning_rate": 9.600842498612647e-06, "loss": 1.3228, "step": 3227 }, { "epoch": 0.8585106382978723, "grad_norm": 4.04725980758667, "learning_rate": 9.600498081281324e-06, "loss": 1.2431, "step": 3228 }, { "epoch": 0.8587765957446809, "grad_norm": 3.632622480392456, "learning_rate": 9.600153521605176e-06, "loss": 1.1693, "step": 3229 }, { "epoch": 0.8590425531914894, "grad_norm": 3.6271767616271973, "learning_rate": 9.59980881959486e-06, "loss": 1.2398, "step": 3230 }, { "epoch": 0.8593085106382978, "grad_norm": 3.3347911834716797, "learning_rate": 9.599463975261042e-06, "loss": 1.1603, "step": 3231 }, { "epoch": 0.8595744680851064, "grad_norm": 3.6934587955474854, "learning_rate": 9.599118988614396e-06, "loss": 1.305, "step": 3232 }, { "epoch": 0.8598404255319149, "grad_norm": 3.461353063583374, "learning_rate": 9.598773859665593e-06, "loss": 1.2013, "step": 3233 }, { "epoch": 0.8601063829787234, "grad_norm": 3.2839810848236084, "learning_rate": 9.598428588425312e-06, "loss": 1.1208, "step": 3234 }, { "epoch": 0.8603723404255319, "grad_norm": 3.599320650100708, "learning_rate": 9.598083174904235e-06, "loss": 1.4372, "step": 3235 }, { "epoch": 0.8606382978723405, "grad_norm": 3.540738105773926, "learning_rate": 9.597737619113055e-06, "loss": 1.0961, "step": 3236 }, { "epoch": 0.8609042553191489, "grad_norm": 3.327744722366333, "learning_rate": 9.597391921062457e-06, "loss": 1.2087, "step": 3237 }, { "epoch": 0.8611702127659574, "grad_norm": 3.619152545928955, "learning_rate": 9.59704608076314e-06, "loss": 1.3197, "step": 3238 }, { "epoch": 0.861436170212766, "grad_norm": 3.381136178970337, "learning_rate": 9.596700098225806e-06, "loss": 1.258, "step": 3239 }, { "epoch": 0.8617021276595744, "grad_norm": 3.6447596549987793, "learning_rate": 9.59635397346116e-06, "loss": 1.1877, "step": 3240 }, { "epoch": 0.861968085106383, "grad_norm": 4.12053918838501, "learning_rate": 9.596007706479908e-06, "loss": 1.3712, "step": 3241 }, { "epoch": 0.8622340425531915, "grad_norm": 3.1644914150238037, "learning_rate": 9.595661297292768e-06, "loss": 1.079, "step": 3242 }, { "epoch": 0.8625, "grad_norm": 4.086709022521973, "learning_rate": 9.595314745910455e-06, "loss": 1.2766, "step": 3243 }, { "epoch": 0.8627659574468085, "grad_norm": 4.086410999298096, "learning_rate": 9.594968052343697e-06, "loss": 1.2103, "step": 3244 }, { "epoch": 0.863031914893617, "grad_norm": 3.550549030303955, "learning_rate": 9.594621216603215e-06, "loss": 1.3625, "step": 3245 }, { "epoch": 0.8632978723404255, "grad_norm": 3.555739402770996, "learning_rate": 9.594274238699744e-06, "loss": 1.2163, "step": 3246 }, { "epoch": 0.863563829787234, "grad_norm": 3.2902424335479736, "learning_rate": 9.593927118644017e-06, "loss": 0.9849, "step": 3247 }, { "epoch": 0.8638297872340426, "grad_norm": 3.554675579071045, "learning_rate": 9.593579856446778e-06, "loss": 1.1437, "step": 3248 }, { "epoch": 0.864095744680851, "grad_norm": 3.3788020610809326, "learning_rate": 9.59323245211877e-06, "loss": 1.2336, "step": 3249 }, { "epoch": 0.8643617021276596, "grad_norm": 3.4318618774414062, "learning_rate": 9.592884905670742e-06, "loss": 1.2021, "step": 3250 }, { "epoch": 0.8646276595744681, "grad_norm": 3.5366907119750977, "learning_rate": 9.592537217113446e-06, "loss": 1.3365, "step": 3251 }, { "epoch": 0.8648936170212767, "grad_norm": 3.7782368659973145, "learning_rate": 9.592189386457645e-06, "loss": 1.3855, "step": 3252 }, { "epoch": 0.8651595744680851, "grad_norm": 3.480111837387085, "learning_rate": 9.591841413714094e-06, "loss": 1.2029, "step": 3253 }, { "epoch": 0.8654255319148936, "grad_norm": 3.305756092071533, "learning_rate": 9.591493298893567e-06, "loss": 1.1172, "step": 3254 }, { "epoch": 0.8656914893617021, "grad_norm": 3.342085361480713, "learning_rate": 9.591145042006829e-06, "loss": 1.0662, "step": 3255 }, { "epoch": 0.8659574468085106, "grad_norm": 3.6532325744628906, "learning_rate": 9.590796643064658e-06, "loss": 1.2083, "step": 3256 }, { "epoch": 0.8662234042553192, "grad_norm": 3.8469889163970947, "learning_rate": 9.590448102077835e-06, "loss": 1.1185, "step": 3257 }, { "epoch": 0.8664893617021276, "grad_norm": 3.6516644954681396, "learning_rate": 9.590099419057142e-06, "loss": 1.314, "step": 3258 }, { "epoch": 0.8667553191489362, "grad_norm": 3.6090152263641357, "learning_rate": 9.58975059401337e-06, "loss": 1.2411, "step": 3259 }, { "epoch": 0.8670212765957447, "grad_norm": 3.436042308807373, "learning_rate": 9.589401626957309e-06, "loss": 1.3095, "step": 3260 }, { "epoch": 0.8672872340425531, "grad_norm": 3.2654285430908203, "learning_rate": 9.589052517899759e-06, "loss": 1.1265, "step": 3261 }, { "epoch": 0.8675531914893617, "grad_norm": 3.6885263919830322, "learning_rate": 9.588703266851523e-06, "loss": 1.2568, "step": 3262 }, { "epoch": 0.8678191489361702, "grad_norm": 3.9233293533325195, "learning_rate": 9.588353873823404e-06, "loss": 1.2273, "step": 3263 }, { "epoch": 0.8680851063829788, "grad_norm": 3.254892349243164, "learning_rate": 9.588004338826213e-06, "loss": 1.0894, "step": 3264 }, { "epoch": 0.8683510638297872, "grad_norm": 3.3320047855377197, "learning_rate": 9.58765466187077e-06, "loss": 1.3296, "step": 3265 }, { "epoch": 0.8686170212765958, "grad_norm": 3.730386972427368, "learning_rate": 9.587304842967887e-06, "loss": 1.3909, "step": 3266 }, { "epoch": 0.8688829787234043, "grad_norm": 3.557739734649658, "learning_rate": 9.586954882128391e-06, "loss": 1.2858, "step": 3267 }, { "epoch": 0.8691489361702127, "grad_norm": 3.292858362197876, "learning_rate": 9.58660477936311e-06, "loss": 1.2351, "step": 3268 }, { "epoch": 0.8694148936170213, "grad_norm": 3.87530255317688, "learning_rate": 9.58625453468288e-06, "loss": 1.1993, "step": 3269 }, { "epoch": 0.8696808510638298, "grad_norm": 3.5502493381500244, "learning_rate": 9.585904148098532e-06, "loss": 1.2225, "step": 3270 }, { "epoch": 0.8699468085106383, "grad_norm": 3.9256691932678223, "learning_rate": 9.585553619620913e-06, "loss": 1.4114, "step": 3271 }, { "epoch": 0.8702127659574468, "grad_norm": 3.4120373725891113, "learning_rate": 9.585202949260866e-06, "loss": 1.1049, "step": 3272 }, { "epoch": 0.8704787234042554, "grad_norm": 3.6664795875549316, "learning_rate": 9.58485213702924e-06, "loss": 1.1906, "step": 3273 }, { "epoch": 0.8707446808510638, "grad_norm": 3.315964460372925, "learning_rate": 9.584501182936891e-06, "loss": 1.1104, "step": 3274 }, { "epoch": 0.8710106382978723, "grad_norm": 3.3911890983581543, "learning_rate": 9.584150086994678e-06, "loss": 1.1979, "step": 3275 }, { "epoch": 0.8712765957446809, "grad_norm": 3.3415443897247314, "learning_rate": 9.583798849213467e-06, "loss": 1.2044, "step": 3276 }, { "epoch": 0.8715425531914893, "grad_norm": 3.4745638370513916, "learning_rate": 9.58344746960412e-06, "loss": 1.2126, "step": 3277 }, { "epoch": 0.8718085106382979, "grad_norm": 3.358224868774414, "learning_rate": 9.58309594817751e-06, "loss": 1.2591, "step": 3278 }, { "epoch": 0.8720744680851064, "grad_norm": 3.607102155685425, "learning_rate": 9.582744284944519e-06, "loss": 1.2529, "step": 3279 }, { "epoch": 0.8723404255319149, "grad_norm": 3.4642441272735596, "learning_rate": 9.582392479916023e-06, "loss": 1.1749, "step": 3280 }, { "epoch": 0.8726063829787234, "grad_norm": 3.5729122161865234, "learning_rate": 9.582040533102908e-06, "loss": 1.3488, "step": 3281 }, { "epoch": 0.872872340425532, "grad_norm": 3.499811887741089, "learning_rate": 9.581688444516064e-06, "loss": 1.1714, "step": 3282 }, { "epoch": 0.8731382978723404, "grad_norm": 3.7235212326049805, "learning_rate": 9.581336214166386e-06, "loss": 1.2336, "step": 3283 }, { "epoch": 0.8734042553191489, "grad_norm": 3.3966002464294434, "learning_rate": 9.580983842064772e-06, "loss": 1.2197, "step": 3284 }, { "epoch": 0.8736702127659575, "grad_norm": 3.7711052894592285, "learning_rate": 9.580631328222124e-06, "loss": 1.3275, "step": 3285 }, { "epoch": 0.8739361702127659, "grad_norm": 3.6308035850524902, "learning_rate": 9.58027867264935e-06, "loss": 1.1036, "step": 3286 }, { "epoch": 0.8742021276595745, "grad_norm": 3.5871105194091797, "learning_rate": 9.579925875357361e-06, "loss": 1.2099, "step": 3287 }, { "epoch": 0.874468085106383, "grad_norm": 3.3607616424560547, "learning_rate": 9.579572936357073e-06, "loss": 1.3576, "step": 3288 }, { "epoch": 0.8747340425531915, "grad_norm": 3.5098683834075928, "learning_rate": 9.579219855659407e-06, "loss": 1.1218, "step": 3289 }, { "epoch": 0.875, "grad_norm": 3.2693376541137695, "learning_rate": 9.578866633275289e-06, "loss": 1.2022, "step": 3290 }, { "epoch": 0.8752659574468085, "grad_norm": 3.9929087162017822, "learning_rate": 9.578513269215643e-06, "loss": 1.2267, "step": 3291 }, { "epoch": 0.875531914893617, "grad_norm": 3.7925865650177, "learning_rate": 9.578159763491408e-06, "loss": 1.3087, "step": 3292 }, { "epoch": 0.8757978723404255, "grad_norm": 3.5196733474731445, "learning_rate": 9.577806116113519e-06, "loss": 1.2655, "step": 3293 }, { "epoch": 0.8760638297872341, "grad_norm": 3.529148578643799, "learning_rate": 9.57745232709292e-06, "loss": 1.1591, "step": 3294 }, { "epoch": 0.8763297872340425, "grad_norm": 3.423691987991333, "learning_rate": 9.577098396440557e-06, "loss": 1.2312, "step": 3295 }, { "epoch": 0.8765957446808511, "grad_norm": 3.6896872520446777, "learning_rate": 9.57674432416738e-06, "loss": 1.3319, "step": 3296 }, { "epoch": 0.8768617021276596, "grad_norm": 3.2412073612213135, "learning_rate": 9.576390110284343e-06, "loss": 1.1944, "step": 3297 }, { "epoch": 0.877127659574468, "grad_norm": 3.716688871383667, "learning_rate": 9.576035754802411e-06, "loss": 1.1713, "step": 3298 }, { "epoch": 0.8773936170212766, "grad_norm": 3.721823215484619, "learning_rate": 9.575681257732546e-06, "loss": 1.2639, "step": 3299 }, { "epoch": 0.8776595744680851, "grad_norm": 3.4668095111846924, "learning_rate": 9.575326619085713e-06, "loss": 1.2198, "step": 3300 }, { "epoch": 0.8779255319148936, "grad_norm": 3.647254467010498, "learning_rate": 9.574971838872889e-06, "loss": 1.2587, "step": 3301 }, { "epoch": 0.8781914893617021, "grad_norm": 3.563108205795288, "learning_rate": 9.574616917105049e-06, "loss": 1.2173, "step": 3302 }, { "epoch": 0.8784574468085107, "grad_norm": 5.121861457824707, "learning_rate": 9.574261853793176e-06, "loss": 1.2889, "step": 3303 }, { "epoch": 0.8787234042553191, "grad_norm": 3.9446914196014404, "learning_rate": 9.573906648948256e-06, "loss": 1.4498, "step": 3304 }, { "epoch": 0.8789893617021277, "grad_norm": 3.368877649307251, "learning_rate": 9.573551302581279e-06, "loss": 1.1592, "step": 3305 }, { "epoch": 0.8792553191489362, "grad_norm": 3.4360673427581787, "learning_rate": 9.57319581470324e-06, "loss": 1.2784, "step": 3306 }, { "epoch": 0.8795212765957446, "grad_norm": 3.9499571323394775, "learning_rate": 9.572840185325139e-06, "loss": 1.2127, "step": 3307 }, { "epoch": 0.8797872340425532, "grad_norm": 3.3917598724365234, "learning_rate": 9.572484414457976e-06, "loss": 1.1193, "step": 3308 }, { "epoch": 0.8800531914893617, "grad_norm": 3.3946712017059326, "learning_rate": 9.572128502112765e-06, "loss": 1.2026, "step": 3309 }, { "epoch": 0.8803191489361702, "grad_norm": 3.7101964950561523, "learning_rate": 9.571772448300514e-06, "loss": 1.2095, "step": 3310 }, { "epoch": 0.8805851063829787, "grad_norm": 3.727922201156616, "learning_rate": 9.571416253032241e-06, "loss": 1.4194, "step": 3311 }, { "epoch": 0.8808510638297873, "grad_norm": 3.457578182220459, "learning_rate": 9.571059916318967e-06, "loss": 1.26, "step": 3312 }, { "epoch": 0.8811170212765957, "grad_norm": 3.6214683055877686, "learning_rate": 9.570703438171717e-06, "loss": 1.3319, "step": 3313 }, { "epoch": 0.8813829787234042, "grad_norm": 3.4604907035827637, "learning_rate": 9.570346818601522e-06, "loss": 1.1988, "step": 3314 }, { "epoch": 0.8816489361702128, "grad_norm": 3.6304855346679688, "learning_rate": 9.569990057619414e-06, "loss": 1.3127, "step": 3315 }, { "epoch": 0.8819148936170212, "grad_norm": 3.6774277687072754, "learning_rate": 9.569633155236436e-06, "loss": 1.1874, "step": 3316 }, { "epoch": 0.8821808510638298, "grad_norm": 3.3065695762634277, "learning_rate": 9.569276111463626e-06, "loss": 1.2098, "step": 3317 }, { "epoch": 0.8824468085106383, "grad_norm": 3.712066650390625, "learning_rate": 9.568918926312033e-06, "loss": 1.2148, "step": 3318 }, { "epoch": 0.8827127659574469, "grad_norm": 3.215933084487915, "learning_rate": 9.568561599792709e-06, "loss": 1.2424, "step": 3319 }, { "epoch": 0.8829787234042553, "grad_norm": 3.317523717880249, "learning_rate": 9.568204131916712e-06, "loss": 1.1701, "step": 3320 }, { "epoch": 0.8832446808510638, "grad_norm": 4.0422749519348145, "learning_rate": 9.5678465226951e-06, "loss": 1.3527, "step": 3321 }, { "epoch": 0.8835106382978724, "grad_norm": 3.700969934463501, "learning_rate": 9.56748877213894e-06, "loss": 1.243, "step": 3322 }, { "epoch": 0.8837765957446808, "grad_norm": 3.6172409057617188, "learning_rate": 9.567130880259296e-06, "loss": 1.3409, "step": 3323 }, { "epoch": 0.8840425531914894, "grad_norm": 3.587956190109253, "learning_rate": 9.56677284706725e-06, "loss": 1.327, "step": 3324 }, { "epoch": 0.8843085106382979, "grad_norm": 3.8839058876037598, "learning_rate": 9.566414672573873e-06, "loss": 1.2556, "step": 3325 }, { "epoch": 0.8845744680851064, "grad_norm": 3.610464572906494, "learning_rate": 9.56605635679025e-06, "loss": 1.2233, "step": 3326 }, { "epoch": 0.8848404255319149, "grad_norm": 3.350374221801758, "learning_rate": 9.565697899727466e-06, "loss": 1.1454, "step": 3327 }, { "epoch": 0.8851063829787233, "grad_norm": 3.175729513168335, "learning_rate": 9.565339301396616e-06, "loss": 1.1474, "step": 3328 }, { "epoch": 0.8853723404255319, "grad_norm": 3.39150333404541, "learning_rate": 9.564980561808793e-06, "loss": 1.1578, "step": 3329 }, { "epoch": 0.8856382978723404, "grad_norm": 4.003450393676758, "learning_rate": 9.564621680975095e-06, "loss": 1.3537, "step": 3330 }, { "epoch": 0.885904255319149, "grad_norm": 3.366062879562378, "learning_rate": 9.564262658906628e-06, "loss": 1.2119, "step": 3331 }, { "epoch": 0.8861702127659574, "grad_norm": 4.014388084411621, "learning_rate": 9.563903495614503e-06, "loss": 1.3046, "step": 3332 }, { "epoch": 0.886436170212766, "grad_norm": 3.3641979694366455, "learning_rate": 9.563544191109828e-06, "loss": 1.1204, "step": 3333 }, { "epoch": 0.8867021276595745, "grad_norm": 3.584113836288452, "learning_rate": 9.563184745403725e-06, "loss": 1.1223, "step": 3334 }, { "epoch": 0.886968085106383, "grad_norm": 3.905111312866211, "learning_rate": 9.562825158507311e-06, "loss": 1.2031, "step": 3335 }, { "epoch": 0.8872340425531915, "grad_norm": 3.787869453430176, "learning_rate": 9.562465430431716e-06, "loss": 1.1798, "step": 3336 }, { "epoch": 0.8875, "grad_norm": 3.336646795272827, "learning_rate": 9.562105561188069e-06, "loss": 1.0405, "step": 3337 }, { "epoch": 0.8877659574468085, "grad_norm": 3.7780652046203613, "learning_rate": 9.561745550787504e-06, "loss": 1.1147, "step": 3338 }, { "epoch": 0.888031914893617, "grad_norm": 3.8940999507904053, "learning_rate": 9.561385399241164e-06, "loss": 1.371, "step": 3339 }, { "epoch": 0.8882978723404256, "grad_norm": 3.7703256607055664, "learning_rate": 9.561025106560184e-06, "loss": 1.2073, "step": 3340 }, { "epoch": 0.888563829787234, "grad_norm": 3.8208539485931396, "learning_rate": 9.560664672755721e-06, "loss": 1.3914, "step": 3341 }, { "epoch": 0.8888297872340426, "grad_norm": 3.8787341117858887, "learning_rate": 9.560304097838922e-06, "loss": 1.2999, "step": 3342 }, { "epoch": 0.8890957446808511, "grad_norm": 3.4178457260131836, "learning_rate": 9.559943381820947e-06, "loss": 1.2978, "step": 3343 }, { "epoch": 0.8893617021276595, "grad_norm": 3.7168829441070557, "learning_rate": 9.559582524712953e-06, "loss": 1.2428, "step": 3344 }, { "epoch": 0.8896276595744681, "grad_norm": 3.8447728157043457, "learning_rate": 9.55922152652611e-06, "loss": 1.3121, "step": 3345 }, { "epoch": 0.8898936170212766, "grad_norm": 3.5572218894958496, "learning_rate": 9.558860387271583e-06, "loss": 1.3853, "step": 3346 }, { "epoch": 0.8901595744680851, "grad_norm": 3.461214780807495, "learning_rate": 9.558499106960548e-06, "loss": 1.2634, "step": 3347 }, { "epoch": 0.8904255319148936, "grad_norm": 3.4366822242736816, "learning_rate": 9.558137685604184e-06, "loss": 1.322, "step": 3348 }, { "epoch": 0.8906914893617022, "grad_norm": 3.7072808742523193, "learning_rate": 9.557776123213673e-06, "loss": 1.2393, "step": 3349 }, { "epoch": 0.8909574468085106, "grad_norm": 3.6192643642425537, "learning_rate": 9.557414419800204e-06, "loss": 1.2106, "step": 3350 }, { "epoch": 0.8912234042553191, "grad_norm": 3.3502161502838135, "learning_rate": 9.557052575374967e-06, "loss": 1.1333, "step": 3351 }, { "epoch": 0.8914893617021277, "grad_norm": 3.4909167289733887, "learning_rate": 9.556690589949158e-06, "loss": 1.2107, "step": 3352 }, { "epoch": 0.8917553191489361, "grad_norm": 3.3816614151000977, "learning_rate": 9.556328463533976e-06, "loss": 1.217, "step": 3353 }, { "epoch": 0.8920212765957447, "grad_norm": 3.6492433547973633, "learning_rate": 9.55596619614063e-06, "loss": 1.1954, "step": 3354 }, { "epoch": 0.8922872340425532, "grad_norm": 3.4829185009002686, "learning_rate": 9.555603787780321e-06, "loss": 1.1374, "step": 3355 }, { "epoch": 0.8925531914893617, "grad_norm": 3.2989566326141357, "learning_rate": 9.555241238464271e-06, "loss": 1.2678, "step": 3356 }, { "epoch": 0.8928191489361702, "grad_norm": 3.325765609741211, "learning_rate": 9.554878548203695e-06, "loss": 1.1352, "step": 3357 }, { "epoch": 0.8930851063829788, "grad_norm": 3.680143356323242, "learning_rate": 9.55451571700981e-06, "loss": 1.1376, "step": 3358 }, { "epoch": 0.8933510638297872, "grad_norm": 3.4539363384246826, "learning_rate": 9.554152744893848e-06, "loss": 1.2099, "step": 3359 }, { "epoch": 0.8936170212765957, "grad_norm": 3.541053295135498, "learning_rate": 9.553789631867039e-06, "loss": 1.2115, "step": 3360 }, { "epoch": 0.8938829787234043, "grad_norm": 3.2321863174438477, "learning_rate": 9.553426377940618e-06, "loss": 1.2008, "step": 3361 }, { "epoch": 0.8941489361702127, "grad_norm": 4.26365852355957, "learning_rate": 9.553062983125822e-06, "loss": 1.3757, "step": 3362 }, { "epoch": 0.8944148936170213, "grad_norm": 3.7996468544006348, "learning_rate": 9.552699447433899e-06, "loss": 1.3071, "step": 3363 }, { "epoch": 0.8946808510638298, "grad_norm": 3.2904140949249268, "learning_rate": 9.552335770876094e-06, "loss": 1.0914, "step": 3364 }, { "epoch": 0.8949468085106383, "grad_norm": 3.48201584815979, "learning_rate": 9.551971953463659e-06, "loss": 1.1438, "step": 3365 }, { "epoch": 0.8952127659574468, "grad_norm": 3.721348285675049, "learning_rate": 9.551607995207854e-06, "loss": 1.1116, "step": 3366 }, { "epoch": 0.8954787234042553, "grad_norm": 3.6480965614318848, "learning_rate": 9.551243896119938e-06, "loss": 1.1571, "step": 3367 }, { "epoch": 0.8957446808510638, "grad_norm": 3.7615323066711426, "learning_rate": 9.550879656211179e-06, "loss": 1.4653, "step": 3368 }, { "epoch": 0.8960106382978723, "grad_norm": 3.1234636306762695, "learning_rate": 9.550515275492843e-06, "loss": 1.1518, "step": 3369 }, { "epoch": 0.8962765957446809, "grad_norm": 3.5595285892486572, "learning_rate": 9.550150753976209e-06, "loss": 1.213, "step": 3370 }, { "epoch": 0.8965425531914893, "grad_norm": 3.4824399948120117, "learning_rate": 9.549786091672553e-06, "loss": 1.1228, "step": 3371 }, { "epoch": 0.8968085106382979, "grad_norm": 3.6110517978668213, "learning_rate": 9.549421288593157e-06, "loss": 1.3169, "step": 3372 }, { "epoch": 0.8970744680851064, "grad_norm": 4.197827339172363, "learning_rate": 9.549056344749312e-06, "loss": 1.4542, "step": 3373 }, { "epoch": 0.8973404255319148, "grad_norm": 3.3921542167663574, "learning_rate": 9.548691260152308e-06, "loss": 1.236, "step": 3374 }, { "epoch": 0.8976063829787234, "grad_norm": 3.5142951011657715, "learning_rate": 9.54832603481344e-06, "loss": 1.2546, "step": 3375 }, { "epoch": 0.8978723404255319, "grad_norm": 3.390557050704956, "learning_rate": 9.547960668744009e-06, "loss": 1.2041, "step": 3376 }, { "epoch": 0.8981382978723405, "grad_norm": 3.5497653484344482, "learning_rate": 9.547595161955321e-06, "loss": 1.2139, "step": 3377 }, { "epoch": 0.8984042553191489, "grad_norm": 3.379268169403076, "learning_rate": 9.547229514458684e-06, "loss": 1.1503, "step": 3378 }, { "epoch": 0.8986702127659575, "grad_norm": 3.826500177383423, "learning_rate": 9.546863726265414e-06, "loss": 1.2808, "step": 3379 }, { "epoch": 0.898936170212766, "grad_norm": 3.121777296066284, "learning_rate": 9.546497797386824e-06, "loss": 1.1966, "step": 3380 }, { "epoch": 0.8992021276595744, "grad_norm": 3.6707565784454346, "learning_rate": 9.546131727834242e-06, "loss": 1.33, "step": 3381 }, { "epoch": 0.899468085106383, "grad_norm": 3.555612325668335, "learning_rate": 9.545765517618992e-06, "loss": 1.1858, "step": 3382 }, { "epoch": 0.8997340425531914, "grad_norm": 3.481360912322998, "learning_rate": 9.545399166752402e-06, "loss": 1.4109, "step": 3383 }, { "epoch": 0.9, "grad_norm": 3.1930184364318848, "learning_rate": 9.545032675245814e-06, "loss": 1.1161, "step": 3384 }, { "epoch": 0.9002659574468085, "grad_norm": 3.5262556076049805, "learning_rate": 9.544666043110562e-06, "loss": 1.2255, "step": 3385 }, { "epoch": 0.9005319148936171, "grad_norm": 3.4826877117156982, "learning_rate": 9.544299270357992e-06, "loss": 1.2001, "step": 3386 }, { "epoch": 0.9007978723404255, "grad_norm": 3.602201223373413, "learning_rate": 9.543932356999452e-06, "loss": 1.2133, "step": 3387 }, { "epoch": 0.9010638297872341, "grad_norm": 3.6607158184051514, "learning_rate": 9.543565303046297e-06, "loss": 1.1962, "step": 3388 }, { "epoch": 0.9013297872340426, "grad_norm": 3.664412260055542, "learning_rate": 9.543198108509879e-06, "loss": 1.2857, "step": 3389 }, { "epoch": 0.901595744680851, "grad_norm": 3.5442616939544678, "learning_rate": 9.542830773401564e-06, "loss": 1.2096, "step": 3390 }, { "epoch": 0.9018617021276596, "grad_norm": 4.058464527130127, "learning_rate": 9.542463297732716e-06, "loss": 1.4371, "step": 3391 }, { "epoch": 0.902127659574468, "grad_norm": 3.6064326763153076, "learning_rate": 9.542095681514708e-06, "loss": 1.2809, "step": 3392 }, { "epoch": 0.9023936170212766, "grad_norm": 3.585545301437378, "learning_rate": 9.541727924758907e-06, "loss": 1.3174, "step": 3393 }, { "epoch": 0.9026595744680851, "grad_norm": 3.465228319168091, "learning_rate": 9.5413600274767e-06, "loss": 1.2042, "step": 3394 }, { "epoch": 0.9029255319148937, "grad_norm": 3.581475019454956, "learning_rate": 9.540991989679468e-06, "loss": 1.3837, "step": 3395 }, { "epoch": 0.9031914893617021, "grad_norm": 3.4275171756744385, "learning_rate": 9.540623811378597e-06, "loss": 1.209, "step": 3396 }, { "epoch": 0.9034574468085106, "grad_norm": 3.159125328063965, "learning_rate": 9.540255492585478e-06, "loss": 1.2519, "step": 3397 }, { "epoch": 0.9037234042553192, "grad_norm": 3.7644615173339844, "learning_rate": 9.53988703331151e-06, "loss": 1.2965, "step": 3398 }, { "epoch": 0.9039893617021276, "grad_norm": 3.519270896911621, "learning_rate": 9.53951843356809e-06, "loss": 1.2125, "step": 3399 }, { "epoch": 0.9042553191489362, "grad_norm": 3.7408711910247803, "learning_rate": 9.539149693366628e-06, "loss": 1.3432, "step": 3400 }, { "epoch": 0.9045212765957447, "grad_norm": 3.343994617462158, "learning_rate": 9.538780812718527e-06, "loss": 1.2149, "step": 3401 }, { "epoch": 0.9047872340425532, "grad_norm": 3.3215134143829346, "learning_rate": 9.538411791635205e-06, "loss": 1.2844, "step": 3402 }, { "epoch": 0.9050531914893617, "grad_norm": 3.9590845108032227, "learning_rate": 9.53804263012808e-06, "loss": 1.289, "step": 3403 }, { "epoch": 0.9053191489361702, "grad_norm": 3.299415349960327, "learning_rate": 9.537673328208572e-06, "loss": 1.0875, "step": 3404 }, { "epoch": 0.9055851063829787, "grad_norm": 3.5640780925750732, "learning_rate": 9.53730388588811e-06, "loss": 1.2735, "step": 3405 }, { "epoch": 0.9058510638297872, "grad_norm": 3.2300360202789307, "learning_rate": 9.536934303178123e-06, "loss": 1.3574, "step": 3406 }, { "epoch": 0.9061170212765958, "grad_norm": 3.6983630657196045, "learning_rate": 9.536564580090046e-06, "loss": 1.2751, "step": 3407 }, { "epoch": 0.9063829787234042, "grad_norm": 3.740288257598877, "learning_rate": 9.536194716635322e-06, "loss": 1.25, "step": 3408 }, { "epoch": 0.9066489361702128, "grad_norm": 3.6063649654388428, "learning_rate": 9.535824712825393e-06, "loss": 1.1656, "step": 3409 }, { "epoch": 0.9069148936170213, "grad_norm": 3.738442897796631, "learning_rate": 9.535454568671705e-06, "loss": 1.3204, "step": 3410 }, { "epoch": 0.9071808510638298, "grad_norm": 3.7406976222991943, "learning_rate": 9.535084284185714e-06, "loss": 1.2681, "step": 3411 }, { "epoch": 0.9074468085106383, "grad_norm": 3.7773613929748535, "learning_rate": 9.534713859378875e-06, "loss": 1.2303, "step": 3412 }, { "epoch": 0.9077127659574468, "grad_norm": 3.531691312789917, "learning_rate": 9.53434329426265e-06, "loss": 1.1495, "step": 3413 }, { "epoch": 0.9079787234042553, "grad_norm": 3.730365514755249, "learning_rate": 9.533972588848507e-06, "loss": 1.1998, "step": 3414 }, { "epoch": 0.9082446808510638, "grad_norm": 4.04153299331665, "learning_rate": 9.533601743147911e-06, "loss": 1.2527, "step": 3415 }, { "epoch": 0.9085106382978724, "grad_norm": 3.547910451889038, "learning_rate": 9.53323075717234e-06, "loss": 1.3033, "step": 3416 }, { "epoch": 0.9087765957446808, "grad_norm": 3.444802761077881, "learning_rate": 9.532859630933276e-06, "loss": 1.2513, "step": 3417 }, { "epoch": 0.9090425531914894, "grad_norm": 3.7553112506866455, "learning_rate": 9.532488364442195e-06, "loss": 1.1689, "step": 3418 }, { "epoch": 0.9093085106382979, "grad_norm": 3.748389959335327, "learning_rate": 9.532116957710587e-06, "loss": 1.2341, "step": 3419 }, { "epoch": 0.9095744680851063, "grad_norm": 3.5497937202453613, "learning_rate": 9.531745410749946e-06, "loss": 1.198, "step": 3420 }, { "epoch": 0.9098404255319149, "grad_norm": 3.540468692779541, "learning_rate": 9.531373723571765e-06, "loss": 1.3774, "step": 3421 }, { "epoch": 0.9101063829787234, "grad_norm": 3.332838535308838, "learning_rate": 9.531001896187548e-06, "loss": 1.3205, "step": 3422 }, { "epoch": 0.910372340425532, "grad_norm": 3.7700576782226562, "learning_rate": 9.530629928608797e-06, "loss": 1.0956, "step": 3423 }, { "epoch": 0.9106382978723404, "grad_norm": 3.387652635574341, "learning_rate": 9.530257820847022e-06, "loss": 1.1835, "step": 3424 }, { "epoch": 0.910904255319149, "grad_norm": 3.9318602085113525, "learning_rate": 9.529885572913735e-06, "loss": 1.3197, "step": 3425 }, { "epoch": 0.9111702127659574, "grad_norm": 3.158997058868408, "learning_rate": 9.529513184820458e-06, "loss": 1.2074, "step": 3426 }, { "epoch": 0.9114361702127659, "grad_norm": 3.5039327144622803, "learning_rate": 9.529140656578707e-06, "loss": 1.3652, "step": 3427 }, { "epoch": 0.9117021276595745, "grad_norm": 3.682145118713379, "learning_rate": 9.528767988200015e-06, "loss": 1.1703, "step": 3428 }, { "epoch": 0.9119680851063829, "grad_norm": 3.6255364418029785, "learning_rate": 9.528395179695907e-06, "loss": 1.269, "step": 3429 }, { "epoch": 0.9122340425531915, "grad_norm": 3.666750907897949, "learning_rate": 9.528022231077921e-06, "loss": 1.4003, "step": 3430 }, { "epoch": 0.9125, "grad_norm": 3.167771816253662, "learning_rate": 9.527649142357596e-06, "loss": 1.1409, "step": 3431 }, { "epoch": 0.9127659574468086, "grad_norm": 3.6556570529937744, "learning_rate": 9.527275913546475e-06, "loss": 1.3847, "step": 3432 }, { "epoch": 0.913031914893617, "grad_norm": 3.794574737548828, "learning_rate": 9.526902544656108e-06, "loss": 1.3673, "step": 3433 }, { "epoch": 0.9132978723404256, "grad_norm": 3.597594976425171, "learning_rate": 9.526529035698046e-06, "loss": 1.068, "step": 3434 }, { "epoch": 0.913563829787234, "grad_norm": 3.1316208839416504, "learning_rate": 9.526155386683848e-06, "loss": 1.1379, "step": 3435 }, { "epoch": 0.9138297872340425, "grad_norm": 3.3742425441741943, "learning_rate": 9.525781597625073e-06, "loss": 1.2233, "step": 3436 }, { "epoch": 0.9140957446808511, "grad_norm": 3.6747100353240967, "learning_rate": 9.525407668533286e-06, "loss": 1.3035, "step": 3437 }, { "epoch": 0.9143617021276595, "grad_norm": 3.4809205532073975, "learning_rate": 9.525033599420058e-06, "loss": 1.1033, "step": 3438 }, { "epoch": 0.9146276595744681, "grad_norm": 3.575571298599243, "learning_rate": 9.524659390296961e-06, "loss": 1.222, "step": 3439 }, { "epoch": 0.9148936170212766, "grad_norm": 3.502336263656616, "learning_rate": 9.524285041175578e-06, "loss": 1.1575, "step": 3440 }, { "epoch": 0.9151595744680852, "grad_norm": 3.6172244548797607, "learning_rate": 9.523910552067489e-06, "loss": 1.1852, "step": 3441 }, { "epoch": 0.9154255319148936, "grad_norm": 3.6247096061706543, "learning_rate": 9.523535922984281e-06, "loss": 1.4405, "step": 3442 }, { "epoch": 0.9156914893617021, "grad_norm": 3.5026776790618896, "learning_rate": 9.523161153937546e-06, "loss": 1.2206, "step": 3443 }, { "epoch": 0.9159574468085107, "grad_norm": 3.7139501571655273, "learning_rate": 9.522786244938877e-06, "loss": 1.3555, "step": 3444 }, { "epoch": 0.9162234042553191, "grad_norm": 3.3043665885925293, "learning_rate": 9.522411195999879e-06, "loss": 1.0747, "step": 3445 }, { "epoch": 0.9164893617021277, "grad_norm": 3.3844451904296875, "learning_rate": 9.522036007132154e-06, "loss": 1.2419, "step": 3446 }, { "epoch": 0.9167553191489362, "grad_norm": 3.499330520629883, "learning_rate": 9.521660678347311e-06, "loss": 1.2287, "step": 3447 }, { "epoch": 0.9170212765957447, "grad_norm": 3.4153192043304443, "learning_rate": 9.521285209656964e-06, "loss": 1.2425, "step": 3448 }, { "epoch": 0.9172872340425532, "grad_norm": 3.838230848312378, "learning_rate": 9.520909601072726e-06, "loss": 1.2476, "step": 3449 }, { "epoch": 0.9175531914893617, "grad_norm": 3.879303455352783, "learning_rate": 9.520533852606226e-06, "loss": 1.2743, "step": 3450 }, { "epoch": 0.9178191489361702, "grad_norm": 3.2687835693359375, "learning_rate": 9.520157964269083e-06, "loss": 1.0722, "step": 3451 }, { "epoch": 0.9180851063829787, "grad_norm": 3.6070616245269775, "learning_rate": 9.519781936072933e-06, "loss": 1.2863, "step": 3452 }, { "epoch": 0.9183510638297873, "grad_norm": 3.410642623901367, "learning_rate": 9.519405768029408e-06, "loss": 1.2184, "step": 3453 }, { "epoch": 0.9186170212765957, "grad_norm": 3.642425775527954, "learning_rate": 9.519029460150148e-06, "loss": 1.2836, "step": 3454 }, { "epoch": 0.9188829787234043, "grad_norm": 3.6479597091674805, "learning_rate": 9.518653012446794e-06, "loss": 1.3349, "step": 3455 }, { "epoch": 0.9191489361702128, "grad_norm": 3.2941248416900635, "learning_rate": 9.518276424931e-06, "loss": 1.1445, "step": 3456 }, { "epoch": 0.9194148936170212, "grad_norm": 3.3414933681488037, "learning_rate": 9.51789969761441e-06, "loss": 1.3321, "step": 3457 }, { "epoch": 0.9196808510638298, "grad_norm": 3.39167857170105, "learning_rate": 9.517522830508685e-06, "loss": 1.222, "step": 3458 }, { "epoch": 0.9199468085106383, "grad_norm": 3.520202875137329, "learning_rate": 9.517145823625485e-06, "loss": 1.2299, "step": 3459 }, { "epoch": 0.9202127659574468, "grad_norm": 3.953166961669922, "learning_rate": 9.516768676976476e-06, "loss": 1.3692, "step": 3460 }, { "epoch": 0.9204787234042553, "grad_norm": 3.654834032058716, "learning_rate": 9.516391390573326e-06, "loss": 1.1788, "step": 3461 }, { "epoch": 0.9207446808510639, "grad_norm": 4.268529415130615, "learning_rate": 9.516013964427708e-06, "loss": 1.3661, "step": 3462 }, { "epoch": 0.9210106382978723, "grad_norm": 3.7426726818084717, "learning_rate": 9.515636398551302e-06, "loss": 1.3322, "step": 3463 }, { "epoch": 0.9212765957446809, "grad_norm": 3.7757678031921387, "learning_rate": 9.515258692955788e-06, "loss": 1.2663, "step": 3464 }, { "epoch": 0.9215425531914894, "grad_norm": 3.2425293922424316, "learning_rate": 9.514880847652855e-06, "loss": 1.1537, "step": 3465 }, { "epoch": 0.9218085106382978, "grad_norm": 3.891484498977661, "learning_rate": 9.514502862654192e-06, "loss": 1.3394, "step": 3466 }, { "epoch": 0.9220744680851064, "grad_norm": 3.499422788619995, "learning_rate": 9.514124737971495e-06, "loss": 1.3386, "step": 3467 }, { "epoch": 0.9223404255319149, "grad_norm": 3.8201444149017334, "learning_rate": 9.513746473616466e-06, "loss": 1.2374, "step": 3468 }, { "epoch": 0.9226063829787234, "grad_norm": 3.488330841064453, "learning_rate": 9.513368069600806e-06, "loss": 1.1239, "step": 3469 }, { "epoch": 0.9228723404255319, "grad_norm": 3.2124156951904297, "learning_rate": 9.512989525936223e-06, "loss": 1.2058, "step": 3470 }, { "epoch": 0.9231382978723405, "grad_norm": 3.4447717666625977, "learning_rate": 9.512610842634432e-06, "loss": 1.1785, "step": 3471 }, { "epoch": 0.9234042553191489, "grad_norm": 3.3703794479370117, "learning_rate": 9.512232019707148e-06, "loss": 1.3696, "step": 3472 }, { "epoch": 0.9236702127659574, "grad_norm": 3.2821013927459717, "learning_rate": 9.511853057166094e-06, "loss": 1.181, "step": 3473 }, { "epoch": 0.923936170212766, "grad_norm": 3.2314436435699463, "learning_rate": 9.511473955022992e-06, "loss": 1.2571, "step": 3474 }, { "epoch": 0.9242021276595744, "grad_norm": 3.635651111602783, "learning_rate": 9.511094713289575e-06, "loss": 1.2779, "step": 3475 }, { "epoch": 0.924468085106383, "grad_norm": 3.7356226444244385, "learning_rate": 9.510715331977579e-06, "loss": 1.3406, "step": 3476 }, { "epoch": 0.9247340425531915, "grad_norm": 3.5567257404327393, "learning_rate": 9.510335811098737e-06, "loss": 1.2792, "step": 3477 }, { "epoch": 0.925, "grad_norm": 3.603287696838379, "learning_rate": 9.509956150664796e-06, "loss": 1.1966, "step": 3478 }, { "epoch": 0.9252659574468085, "grad_norm": 3.915576219558716, "learning_rate": 9.509576350687502e-06, "loss": 1.2955, "step": 3479 }, { "epoch": 0.925531914893617, "grad_norm": 3.7345378398895264, "learning_rate": 9.509196411178605e-06, "loss": 1.1994, "step": 3480 }, { "epoch": 0.9257978723404255, "grad_norm": 3.4640583992004395, "learning_rate": 9.508816332149862e-06, "loss": 1.1937, "step": 3481 }, { "epoch": 0.926063829787234, "grad_norm": 3.5885074138641357, "learning_rate": 9.508436113613036e-06, "loss": 1.2895, "step": 3482 }, { "epoch": 0.9263297872340426, "grad_norm": 3.241925001144409, "learning_rate": 9.508055755579886e-06, "loss": 1.1693, "step": 3483 }, { "epoch": 0.926595744680851, "grad_norm": 3.664020538330078, "learning_rate": 9.507675258062183e-06, "loss": 1.2333, "step": 3484 }, { "epoch": 0.9268617021276596, "grad_norm": 3.365907669067383, "learning_rate": 9.507294621071702e-06, "loss": 1.1572, "step": 3485 }, { "epoch": 0.9271276595744681, "grad_norm": 3.634084939956665, "learning_rate": 9.506913844620217e-06, "loss": 1.1676, "step": 3486 }, { "epoch": 0.9273936170212767, "grad_norm": 3.2822062969207764, "learning_rate": 9.506532928719514e-06, "loss": 1.2271, "step": 3487 }, { "epoch": 0.9276595744680851, "grad_norm": 3.920335292816162, "learning_rate": 9.506151873381376e-06, "loss": 1.3218, "step": 3488 }, { "epoch": 0.9279255319148936, "grad_norm": 3.8373231887817383, "learning_rate": 9.505770678617592e-06, "loss": 1.2391, "step": 3489 }, { "epoch": 0.9281914893617021, "grad_norm": 3.5426108837127686, "learning_rate": 9.50538934443996e-06, "loss": 1.2676, "step": 3490 }, { "epoch": 0.9284574468085106, "grad_norm": 3.550251007080078, "learning_rate": 9.505007870860276e-06, "loss": 1.2651, "step": 3491 }, { "epoch": 0.9287234042553192, "grad_norm": 3.3801169395446777, "learning_rate": 9.504626257890345e-06, "loss": 1.1764, "step": 3492 }, { "epoch": 0.9289893617021276, "grad_norm": 4.002630233764648, "learning_rate": 9.504244505541974e-06, "loss": 1.2602, "step": 3493 }, { "epoch": 0.9292553191489362, "grad_norm": 3.6300952434539795, "learning_rate": 9.503862613826976e-06, "loss": 1.1864, "step": 3494 }, { "epoch": 0.9295212765957447, "grad_norm": 3.574536085128784, "learning_rate": 9.503480582757163e-06, "loss": 1.3364, "step": 3495 }, { "epoch": 0.9297872340425531, "grad_norm": 3.6244354248046875, "learning_rate": 9.50309841234436e-06, "loss": 1.1998, "step": 3496 }, { "epoch": 0.9300531914893617, "grad_norm": 3.826706886291504, "learning_rate": 9.502716102600393e-06, "loss": 1.1791, "step": 3497 }, { "epoch": 0.9303191489361702, "grad_norm": 3.3346476554870605, "learning_rate": 9.502333653537085e-06, "loss": 1.1943, "step": 3498 }, { "epoch": 0.9305851063829788, "grad_norm": 3.4599905014038086, "learning_rate": 9.501951065166276e-06, "loss": 1.2966, "step": 3499 }, { "epoch": 0.9308510638297872, "grad_norm": 3.6470425128936768, "learning_rate": 9.501568337499798e-06, "loss": 1.2633, "step": 3500 }, { "epoch": 0.9308510638297872, "eval_loss": 1.2690000534057617, "eval_runtime": 12.8787, "eval_samples_per_second": 31.059, "eval_steps_per_second": 3.882, "step": 3500 }, { "epoch": 0.9311170212765958, "grad_norm": 3.7849044799804688, "learning_rate": 9.501185470549496e-06, "loss": 1.2158, "step": 3501 }, { "epoch": 0.9313829787234043, "grad_norm": 3.3262534141540527, "learning_rate": 9.500802464327217e-06, "loss": 1.2429, "step": 3502 }, { "epoch": 0.9316489361702127, "grad_norm": 3.458172559738159, "learning_rate": 9.500419318844811e-06, "loss": 1.2177, "step": 3503 }, { "epoch": 0.9319148936170213, "grad_norm": 3.7243428230285645, "learning_rate": 9.500036034114132e-06, "loss": 1.2877, "step": 3504 }, { "epoch": 0.9321808510638298, "grad_norm": 3.6194655895233154, "learning_rate": 9.49965261014704e-06, "loss": 1.3507, "step": 3505 }, { "epoch": 0.9324468085106383, "grad_norm": 3.4799468517303467, "learning_rate": 9.499269046955398e-06, "loss": 1.2658, "step": 3506 }, { "epoch": 0.9327127659574468, "grad_norm": 3.6711440086364746, "learning_rate": 9.498885344551077e-06, "loss": 1.1922, "step": 3507 }, { "epoch": 0.9329787234042554, "grad_norm": 3.7202506065368652, "learning_rate": 9.498501502945943e-06, "loss": 1.1922, "step": 3508 }, { "epoch": 0.9332446808510638, "grad_norm": 3.440639019012451, "learning_rate": 9.498117522151878e-06, "loss": 1.1795, "step": 3509 }, { "epoch": 0.9335106382978723, "grad_norm": 3.513429880142212, "learning_rate": 9.497733402180761e-06, "loss": 1.2098, "step": 3510 }, { "epoch": 0.9337765957446809, "grad_norm": 3.599651575088501, "learning_rate": 9.497349143044478e-06, "loss": 1.2052, "step": 3511 }, { "epoch": 0.9340425531914893, "grad_norm": 4.015235900878906, "learning_rate": 9.496964744754915e-06, "loss": 1.233, "step": 3512 }, { "epoch": 0.9343085106382979, "grad_norm": 3.3815979957580566, "learning_rate": 9.49658020732397e-06, "loss": 1.1291, "step": 3513 }, { "epoch": 0.9345744680851064, "grad_norm": 3.3032724857330322, "learning_rate": 9.49619553076354e-06, "loss": 1.2174, "step": 3514 }, { "epoch": 0.9348404255319149, "grad_norm": 3.571817398071289, "learning_rate": 9.495810715085526e-06, "loss": 1.3212, "step": 3515 }, { "epoch": 0.9351063829787234, "grad_norm": 3.5486996173858643, "learning_rate": 9.495425760301836e-06, "loss": 1.1428, "step": 3516 }, { "epoch": 0.935372340425532, "grad_norm": 3.3801069259643555, "learning_rate": 9.495040666424378e-06, "loss": 1.1673, "step": 3517 }, { "epoch": 0.9356382978723404, "grad_norm": 3.6057615280151367, "learning_rate": 9.494655433465071e-06, "loss": 1.1342, "step": 3518 }, { "epoch": 0.9359042553191489, "grad_norm": 3.6146769523620605, "learning_rate": 9.494270061435834e-06, "loss": 1.4436, "step": 3519 }, { "epoch": 0.9361702127659575, "grad_norm": 3.200052499771118, "learning_rate": 9.493884550348589e-06, "loss": 1.1598, "step": 3520 }, { "epoch": 0.9364361702127659, "grad_norm": 3.6785783767700195, "learning_rate": 9.493498900215265e-06, "loss": 1.2838, "step": 3521 }, { "epoch": 0.9367021276595745, "grad_norm": 3.905540943145752, "learning_rate": 9.493113111047794e-06, "loss": 1.2665, "step": 3522 }, { "epoch": 0.936968085106383, "grad_norm": 3.300579786300659, "learning_rate": 9.492727182858115e-06, "loss": 1.2111, "step": 3523 }, { "epoch": 0.9372340425531915, "grad_norm": 3.8752784729003906, "learning_rate": 9.492341115658167e-06, "loss": 1.2444, "step": 3524 }, { "epoch": 0.9375, "grad_norm": 3.561800241470337, "learning_rate": 9.491954909459895e-06, "loss": 1.2224, "step": 3525 }, { "epoch": 0.9377659574468085, "grad_norm": 3.434983730316162, "learning_rate": 9.491568564275252e-06, "loss": 1.2249, "step": 3526 }, { "epoch": 0.938031914893617, "grad_norm": 3.5711958408355713, "learning_rate": 9.491182080116185e-06, "loss": 1.3134, "step": 3527 }, { "epoch": 0.9382978723404255, "grad_norm": 3.2614593505859375, "learning_rate": 9.490795456994658e-06, "loss": 1.1418, "step": 3528 }, { "epoch": 0.9385638297872341, "grad_norm": 3.7001163959503174, "learning_rate": 9.490408694922635e-06, "loss": 1.2611, "step": 3529 }, { "epoch": 0.9388297872340425, "grad_norm": 3.287165880203247, "learning_rate": 9.490021793912079e-06, "loss": 1.1458, "step": 3530 }, { "epoch": 0.9390957446808511, "grad_norm": 3.9669268131256104, "learning_rate": 9.489634753974961e-06, "loss": 1.1978, "step": 3531 }, { "epoch": 0.9393617021276596, "grad_norm": 3.8696441650390625, "learning_rate": 9.48924757512326e-06, "loss": 1.3488, "step": 3532 }, { "epoch": 0.939627659574468, "grad_norm": 3.8109893798828125, "learning_rate": 9.48886025736895e-06, "loss": 1.2341, "step": 3533 }, { "epoch": 0.9398936170212766, "grad_norm": 3.3541629314422607, "learning_rate": 9.488472800724022e-06, "loss": 1.1629, "step": 3534 }, { "epoch": 0.9401595744680851, "grad_norm": 3.4784152507781982, "learning_rate": 9.48808520520046e-06, "loss": 1.3021, "step": 3535 }, { "epoch": 0.9404255319148936, "grad_norm": 3.4299418926239014, "learning_rate": 9.487697470810257e-06, "loss": 1.1674, "step": 3536 }, { "epoch": 0.9406914893617021, "grad_norm": 3.467414617538452, "learning_rate": 9.487309597565413e-06, "loss": 1.1953, "step": 3537 }, { "epoch": 0.9409574468085107, "grad_norm": 3.263312816619873, "learning_rate": 9.486921585477924e-06, "loss": 1.1662, "step": 3538 }, { "epoch": 0.9412234042553191, "grad_norm": 3.3032853603363037, "learning_rate": 9.486533434559801e-06, "loss": 1.2386, "step": 3539 }, { "epoch": 0.9414893617021277, "grad_norm": 3.641338348388672, "learning_rate": 9.48614514482305e-06, "loss": 1.25, "step": 3540 }, { "epoch": 0.9417553191489362, "grad_norm": 3.5189712047576904, "learning_rate": 9.485756716279686e-06, "loss": 1.2763, "step": 3541 }, { "epoch": 0.9420212765957446, "grad_norm": 3.464155912399292, "learning_rate": 9.485368148941728e-06, "loss": 1.278, "step": 3542 }, { "epoch": 0.9422872340425532, "grad_norm": 3.5938682556152344, "learning_rate": 9.484979442821199e-06, "loss": 1.1817, "step": 3543 }, { "epoch": 0.9425531914893617, "grad_norm": 3.399099588394165, "learning_rate": 9.484590597930125e-06, "loss": 1.3007, "step": 3544 }, { "epoch": 0.9428191489361702, "grad_norm": 3.681652545928955, "learning_rate": 9.484201614280539e-06, "loss": 1.1233, "step": 3545 }, { "epoch": 0.9430851063829787, "grad_norm": 3.4110119342803955, "learning_rate": 9.483812491884475e-06, "loss": 1.3159, "step": 3546 }, { "epoch": 0.9433510638297873, "grad_norm": 3.347201347351074, "learning_rate": 9.483423230753975e-06, "loss": 1.2668, "step": 3547 }, { "epoch": 0.9436170212765957, "grad_norm": 3.551835775375366, "learning_rate": 9.48303383090108e-06, "loss": 1.2695, "step": 3548 }, { "epoch": 0.9438829787234042, "grad_norm": 7.742011547088623, "learning_rate": 9.48264429233784e-06, "loss": 1.3468, "step": 3549 }, { "epoch": 0.9441489361702128, "grad_norm": 3.5810296535491943, "learning_rate": 9.482254615076307e-06, "loss": 1.2088, "step": 3550 }, { "epoch": 0.9444148936170212, "grad_norm": 3.6081788539886475, "learning_rate": 9.481864799128541e-06, "loss": 1.199, "step": 3551 }, { "epoch": 0.9446808510638298, "grad_norm": 3.4480881690979004, "learning_rate": 9.481474844506602e-06, "loss": 1.2016, "step": 3552 }, { "epoch": 0.9449468085106383, "grad_norm": 3.4126522541046143, "learning_rate": 9.481084751222553e-06, "loss": 1.0633, "step": 3553 }, { "epoch": 0.9452127659574469, "grad_norm": 3.731552839279175, "learning_rate": 9.480694519288467e-06, "loss": 1.3171, "step": 3554 }, { "epoch": 0.9454787234042553, "grad_norm": 3.7800607681274414, "learning_rate": 9.480304148716418e-06, "loss": 1.4008, "step": 3555 }, { "epoch": 0.9457446808510638, "grad_norm": 3.509230375289917, "learning_rate": 9.47991363951848e-06, "loss": 1.2949, "step": 3556 }, { "epoch": 0.9460106382978724, "grad_norm": 3.7124991416931152, "learning_rate": 9.479522991706744e-06, "loss": 1.1951, "step": 3557 }, { "epoch": 0.9462765957446808, "grad_norm": 3.6707465648651123, "learning_rate": 9.479132205293291e-06, "loss": 1.1625, "step": 3558 }, { "epoch": 0.9465425531914894, "grad_norm": 3.456841468811035, "learning_rate": 9.478741280290214e-06, "loss": 1.1969, "step": 3559 }, { "epoch": 0.9468085106382979, "grad_norm": 4.189627170562744, "learning_rate": 9.478350216709609e-06, "loss": 1.4571, "step": 3560 }, { "epoch": 0.9470744680851064, "grad_norm": 3.5188887119293213, "learning_rate": 9.477959014563575e-06, "loss": 1.2589, "step": 3561 }, { "epoch": 0.9473404255319149, "grad_norm": 3.594780206680298, "learning_rate": 9.477567673864217e-06, "loss": 1.2652, "step": 3562 }, { "epoch": 0.9476063829787233, "grad_norm": 3.3485286235809326, "learning_rate": 9.477176194623644e-06, "loss": 1.2256, "step": 3563 }, { "epoch": 0.9478723404255319, "grad_norm": 3.549306631088257, "learning_rate": 9.476784576853967e-06, "loss": 1.2868, "step": 3564 }, { "epoch": 0.9481382978723404, "grad_norm": 3.50877046585083, "learning_rate": 9.476392820567306e-06, "loss": 1.0912, "step": 3565 }, { "epoch": 0.948404255319149, "grad_norm": 3.3570492267608643, "learning_rate": 9.476000925775782e-06, "loss": 1.2827, "step": 3566 }, { "epoch": 0.9486702127659574, "grad_norm": 3.3039703369140625, "learning_rate": 9.475608892491516e-06, "loss": 1.1552, "step": 3567 }, { "epoch": 0.948936170212766, "grad_norm": 3.559574604034424, "learning_rate": 9.475216720726644e-06, "loss": 1.1988, "step": 3568 }, { "epoch": 0.9492021276595745, "grad_norm": 3.8060848712921143, "learning_rate": 9.474824410493298e-06, "loss": 1.3264, "step": 3569 }, { "epoch": 0.949468085106383, "grad_norm": 3.3232123851776123, "learning_rate": 9.474431961803615e-06, "loss": 1.1884, "step": 3570 }, { "epoch": 0.9497340425531915, "grad_norm": 3.821077346801758, "learning_rate": 9.47403937466974e-06, "loss": 1.3414, "step": 3571 }, { "epoch": 0.95, "grad_norm": 3.464698076248169, "learning_rate": 9.473646649103819e-06, "loss": 1.1284, "step": 3572 }, { "epoch": 0.9502659574468085, "grad_norm": 3.464268922805786, "learning_rate": 9.473253785118003e-06, "loss": 1.3262, "step": 3573 }, { "epoch": 0.950531914893617, "grad_norm": 3.7841787338256836, "learning_rate": 9.472860782724448e-06, "loss": 1.1169, "step": 3574 }, { "epoch": 0.9507978723404256, "grad_norm": 3.278888463973999, "learning_rate": 9.472467641935314e-06, "loss": 1.1413, "step": 3575 }, { "epoch": 0.951063829787234, "grad_norm": 3.321603536605835, "learning_rate": 9.472074362762767e-06, "loss": 1.0513, "step": 3576 }, { "epoch": 0.9513297872340426, "grad_norm": 3.8839926719665527, "learning_rate": 9.471680945218973e-06, "loss": 1.2412, "step": 3577 }, { "epoch": 0.9515957446808511, "grad_norm": 3.5885181427001953, "learning_rate": 9.471287389316107e-06, "loss": 1.1092, "step": 3578 }, { "epoch": 0.9518617021276595, "grad_norm": 3.592010498046875, "learning_rate": 9.470893695066345e-06, "loss": 1.275, "step": 3579 }, { "epoch": 0.9521276595744681, "grad_norm": 3.785581111907959, "learning_rate": 9.470499862481867e-06, "loss": 1.3256, "step": 3580 }, { "epoch": 0.9523936170212766, "grad_norm": 3.41489315032959, "learning_rate": 9.47010589157486e-06, "loss": 1.2419, "step": 3581 }, { "epoch": 0.9526595744680851, "grad_norm": 3.4412648677825928, "learning_rate": 9.469711782357513e-06, "loss": 1.3029, "step": 3582 }, { "epoch": 0.9529255319148936, "grad_norm": 3.6879758834838867, "learning_rate": 9.469317534842025e-06, "loss": 1.217, "step": 3583 }, { "epoch": 0.9531914893617022, "grad_norm": 3.8642208576202393, "learning_rate": 9.468923149040587e-06, "loss": 1.3035, "step": 3584 }, { "epoch": 0.9534574468085106, "grad_norm": 3.9491965770721436, "learning_rate": 9.468528624965406e-06, "loss": 1.3494, "step": 3585 }, { "epoch": 0.9537234042553191, "grad_norm": 3.6963748931884766, "learning_rate": 9.468133962628688e-06, "loss": 1.1793, "step": 3586 }, { "epoch": 0.9539893617021277, "grad_norm": 3.4110567569732666, "learning_rate": 9.467739162042643e-06, "loss": 1.1798, "step": 3587 }, { "epoch": 0.9542553191489361, "grad_norm": 3.718494176864624, "learning_rate": 9.46734422321949e-06, "loss": 1.3528, "step": 3588 }, { "epoch": 0.9545212765957447, "grad_norm": 3.9455974102020264, "learning_rate": 9.466949146171449e-06, "loss": 1.341, "step": 3589 }, { "epoch": 0.9547872340425532, "grad_norm": 3.668195962905884, "learning_rate": 9.46655393091074e-06, "loss": 1.1503, "step": 3590 }, { "epoch": 0.9550531914893617, "grad_norm": 3.662208080291748, "learning_rate": 9.466158577449593e-06, "loss": 1.3243, "step": 3591 }, { "epoch": 0.9553191489361702, "grad_norm": 3.463543176651001, "learning_rate": 9.465763085800244e-06, "loss": 1.187, "step": 3592 }, { "epoch": 0.9555851063829788, "grad_norm": 3.6207196712493896, "learning_rate": 9.465367455974926e-06, "loss": 1.2523, "step": 3593 }, { "epoch": 0.9558510638297872, "grad_norm": 3.3348443508148193, "learning_rate": 9.46497168798588e-06, "loss": 1.2145, "step": 3594 }, { "epoch": 0.9561170212765957, "grad_norm": 4.174299240112305, "learning_rate": 9.464575781845355e-06, "loss": 1.4818, "step": 3595 }, { "epoch": 0.9563829787234043, "grad_norm": 3.3657476902008057, "learning_rate": 9.464179737565598e-06, "loss": 1.2587, "step": 3596 }, { "epoch": 0.9566489361702127, "grad_norm": 3.697920560836792, "learning_rate": 9.463783555158866e-06, "loss": 1.36, "step": 3597 }, { "epoch": 0.9569148936170213, "grad_norm": 3.825244903564453, "learning_rate": 9.463387234637413e-06, "loss": 1.2879, "step": 3598 }, { "epoch": 0.9571808510638298, "grad_norm": 3.5759551525115967, "learning_rate": 9.462990776013504e-06, "loss": 1.4189, "step": 3599 }, { "epoch": 0.9574468085106383, "grad_norm": 3.6317455768585205, "learning_rate": 9.462594179299408e-06, "loss": 1.3723, "step": 3600 }, { "epoch": 0.9577127659574468, "grad_norm": 3.254585027694702, "learning_rate": 9.46219744450739e-06, "loss": 1.1231, "step": 3601 }, { "epoch": 0.9579787234042553, "grad_norm": 3.0535624027252197, "learning_rate": 9.461800571649734e-06, "loss": 1.0536, "step": 3602 }, { "epoch": 0.9582446808510638, "grad_norm": 3.603959798812866, "learning_rate": 9.461403560738713e-06, "loss": 1.254, "step": 3603 }, { "epoch": 0.9585106382978723, "grad_norm": 3.4408342838287354, "learning_rate": 9.461006411786613e-06, "loss": 1.2253, "step": 3604 }, { "epoch": 0.9587765957446809, "grad_norm": 3.6801369190216064, "learning_rate": 9.460609124805724e-06, "loss": 1.2253, "step": 3605 }, { "epoch": 0.9590425531914893, "grad_norm": 3.968122959136963, "learning_rate": 9.460211699808334e-06, "loss": 1.2456, "step": 3606 }, { "epoch": 0.9593085106382979, "grad_norm": 3.602989912033081, "learning_rate": 9.459814136806746e-06, "loss": 1.2261, "step": 3607 }, { "epoch": 0.9595744680851064, "grad_norm": 3.5720174312591553, "learning_rate": 9.459416435813258e-06, "loss": 1.1869, "step": 3608 }, { "epoch": 0.9598404255319148, "grad_norm": 3.626312732696533, "learning_rate": 9.459018596840173e-06, "loss": 1.3385, "step": 3609 }, { "epoch": 0.9601063829787234, "grad_norm": 3.5388100147247314, "learning_rate": 9.458620619899803e-06, "loss": 1.2523, "step": 3610 }, { "epoch": 0.9603723404255319, "grad_norm": 3.8266894817352295, "learning_rate": 9.458222505004462e-06, "loss": 1.4002, "step": 3611 }, { "epoch": 0.9606382978723405, "grad_norm": 3.576223373413086, "learning_rate": 9.457824252166467e-06, "loss": 1.2669, "step": 3612 }, { "epoch": 0.9609042553191489, "grad_norm": 3.5163745880126953, "learning_rate": 9.457425861398144e-06, "loss": 1.1806, "step": 3613 }, { "epoch": 0.9611702127659575, "grad_norm": 3.586691379547119, "learning_rate": 9.457027332711814e-06, "loss": 1.3403, "step": 3614 }, { "epoch": 0.961436170212766, "grad_norm": 3.5483405590057373, "learning_rate": 9.456628666119812e-06, "loss": 1.2426, "step": 3615 }, { "epoch": 0.9617021276595744, "grad_norm": 3.600684881210327, "learning_rate": 9.456229861634471e-06, "loss": 1.2333, "step": 3616 }, { "epoch": 0.961968085106383, "grad_norm": 3.446035385131836, "learning_rate": 9.455830919268134e-06, "loss": 1.161, "step": 3617 }, { "epoch": 0.9622340425531914, "grad_norm": 3.329267978668213, "learning_rate": 9.45543183903314e-06, "loss": 1.1162, "step": 3618 }, { "epoch": 0.9625, "grad_norm": 3.4342401027679443, "learning_rate": 9.45503262094184e-06, "loss": 1.3068, "step": 3619 }, { "epoch": 0.9627659574468085, "grad_norm": 3.230329751968384, "learning_rate": 9.454633265006585e-06, "loss": 1.1398, "step": 3620 }, { "epoch": 0.9630319148936171, "grad_norm": 3.3767967224121094, "learning_rate": 9.454233771239733e-06, "loss": 1.3104, "step": 3621 }, { "epoch": 0.9632978723404255, "grad_norm": 3.2001163959503174, "learning_rate": 9.453834139653643e-06, "loss": 1.1632, "step": 3622 }, { "epoch": 0.9635638297872341, "grad_norm": 3.9331612586975098, "learning_rate": 9.453434370260683e-06, "loss": 1.3891, "step": 3623 }, { "epoch": 0.9638297872340426, "grad_norm": 4.0084052085876465, "learning_rate": 9.453034463073218e-06, "loss": 1.4323, "step": 3624 }, { "epoch": 0.964095744680851, "grad_norm": 3.2673776149749756, "learning_rate": 9.452634418103626e-06, "loss": 1.0984, "step": 3625 }, { "epoch": 0.9643617021276596, "grad_norm": 3.2544898986816406, "learning_rate": 9.45223423536428e-06, "loss": 1.2681, "step": 3626 }, { "epoch": 0.964627659574468, "grad_norm": 3.625535488128662, "learning_rate": 9.451833914867567e-06, "loss": 1.258, "step": 3627 }, { "epoch": 0.9648936170212766, "grad_norm": 3.048551082611084, "learning_rate": 9.451433456625871e-06, "loss": 1.207, "step": 3628 }, { "epoch": 0.9651595744680851, "grad_norm": 3.567139148712158, "learning_rate": 9.451032860651583e-06, "loss": 1.2771, "step": 3629 }, { "epoch": 0.9654255319148937, "grad_norm": 3.618807077407837, "learning_rate": 9.450632126957098e-06, "loss": 1.2666, "step": 3630 }, { "epoch": 0.9656914893617021, "grad_norm": 3.4883675575256348, "learning_rate": 9.450231255554814e-06, "loss": 1.1142, "step": 3631 }, { "epoch": 0.9659574468085106, "grad_norm": 3.687424898147583, "learning_rate": 9.449830246457136e-06, "loss": 1.1745, "step": 3632 }, { "epoch": 0.9662234042553192, "grad_norm": 3.457051992416382, "learning_rate": 9.44942909967647e-06, "loss": 1.1846, "step": 3633 }, { "epoch": 0.9664893617021276, "grad_norm": 3.5090994834899902, "learning_rate": 9.449027815225231e-06, "loss": 1.3255, "step": 3634 }, { "epoch": 0.9667553191489362, "grad_norm": 3.2658236026763916, "learning_rate": 9.448626393115833e-06, "loss": 1.0964, "step": 3635 }, { "epoch": 0.9670212765957447, "grad_norm": 3.7192766666412354, "learning_rate": 9.448224833360695e-06, "loss": 1.3171, "step": 3636 }, { "epoch": 0.9672872340425532, "grad_norm": 3.891343355178833, "learning_rate": 9.447823135972247e-06, "loss": 1.206, "step": 3637 }, { "epoch": 0.9675531914893617, "grad_norm": 3.7228803634643555, "learning_rate": 9.447421300962911e-06, "loss": 1.2032, "step": 3638 }, { "epoch": 0.9678191489361702, "grad_norm": 3.348090171813965, "learning_rate": 9.447019328345125e-06, "loss": 1.2437, "step": 3639 }, { "epoch": 0.9680851063829787, "grad_norm": 3.3824315071105957, "learning_rate": 9.446617218131326e-06, "loss": 1.1005, "step": 3640 }, { "epoch": 0.9683510638297872, "grad_norm": 4.107891082763672, "learning_rate": 9.446214970333954e-06, "loss": 1.3365, "step": 3641 }, { "epoch": 0.9686170212765958, "grad_norm": 3.609551191329956, "learning_rate": 9.445812584965458e-06, "loss": 1.2756, "step": 3642 }, { "epoch": 0.9688829787234042, "grad_norm": 3.625800371170044, "learning_rate": 9.445410062038284e-06, "loss": 1.2114, "step": 3643 }, { "epoch": 0.9691489361702128, "grad_norm": 3.605753183364868, "learning_rate": 9.445007401564889e-06, "loss": 1.3025, "step": 3644 }, { "epoch": 0.9694148936170213, "grad_norm": 3.2446835041046143, "learning_rate": 9.444604603557733e-06, "loss": 1.2037, "step": 3645 }, { "epoch": 0.9696808510638298, "grad_norm": 3.478797674179077, "learning_rate": 9.444201668029278e-06, "loss": 1.2862, "step": 3646 }, { "epoch": 0.9699468085106383, "grad_norm": 3.33634352684021, "learning_rate": 9.443798594991989e-06, "loss": 1.1298, "step": 3647 }, { "epoch": 0.9702127659574468, "grad_norm": 3.82041597366333, "learning_rate": 9.44339538445834e-06, "loss": 1.2301, "step": 3648 }, { "epoch": 0.9704787234042553, "grad_norm": 3.5176687240600586, "learning_rate": 9.442992036440808e-06, "loss": 1.1489, "step": 3649 }, { "epoch": 0.9707446808510638, "grad_norm": 3.265772819519043, "learning_rate": 9.44258855095187e-06, "loss": 1.1147, "step": 3650 }, { "epoch": 0.9710106382978724, "grad_norm": 3.5735883712768555, "learning_rate": 9.442184928004012e-06, "loss": 1.2768, "step": 3651 }, { "epoch": 0.9712765957446808, "grad_norm": 3.6002047061920166, "learning_rate": 9.441781167609722e-06, "loss": 1.3395, "step": 3652 }, { "epoch": 0.9715425531914894, "grad_norm": 3.8888189792633057, "learning_rate": 9.441377269781496e-06, "loss": 1.2223, "step": 3653 }, { "epoch": 0.9718085106382979, "grad_norm": 3.6971378326416016, "learning_rate": 9.440973234531825e-06, "loss": 1.1813, "step": 3654 }, { "epoch": 0.9720744680851063, "grad_norm": 3.6079912185668945, "learning_rate": 9.440569061873213e-06, "loss": 1.1156, "step": 3655 }, { "epoch": 0.9723404255319149, "grad_norm": 3.839540481567383, "learning_rate": 9.440164751818168e-06, "loss": 1.4711, "step": 3656 }, { "epoch": 0.9726063829787234, "grad_norm": 3.7191896438598633, "learning_rate": 9.439760304379197e-06, "loss": 1.2351, "step": 3657 }, { "epoch": 0.972872340425532, "grad_norm": 3.902529001235962, "learning_rate": 9.439355719568817e-06, "loss": 1.3487, "step": 3658 }, { "epoch": 0.9731382978723404, "grad_norm": 3.389925241470337, "learning_rate": 9.438950997399543e-06, "loss": 1.1905, "step": 3659 }, { "epoch": 0.973404255319149, "grad_norm": 3.6134610176086426, "learning_rate": 9.438546137883898e-06, "loss": 1.2323, "step": 3660 }, { "epoch": 0.9736702127659574, "grad_norm": 4.062784671783447, "learning_rate": 9.438141141034409e-06, "loss": 1.2437, "step": 3661 }, { "epoch": 0.9739361702127659, "grad_norm": 3.6207644939422607, "learning_rate": 9.437736006863611e-06, "loss": 1.2922, "step": 3662 }, { "epoch": 0.9742021276595745, "grad_norm": 3.2939248085021973, "learning_rate": 9.437330735384034e-06, "loss": 1.2348, "step": 3663 }, { "epoch": 0.9744680851063829, "grad_norm": 3.6209723949432373, "learning_rate": 9.43692532660822e-06, "loss": 1.2698, "step": 3664 }, { "epoch": 0.9747340425531915, "grad_norm": 3.766961097717285, "learning_rate": 9.436519780548712e-06, "loss": 1.3306, "step": 3665 }, { "epoch": 0.975, "grad_norm": 3.1702146530151367, "learning_rate": 9.43611409721806e-06, "loss": 1.2877, "step": 3666 }, { "epoch": 0.9752659574468086, "grad_norm": 3.411604642868042, "learning_rate": 9.435708276628814e-06, "loss": 1.1874, "step": 3667 }, { "epoch": 0.975531914893617, "grad_norm": 3.3507773876190186, "learning_rate": 9.435302318793533e-06, "loss": 1.1614, "step": 3668 }, { "epoch": 0.9757978723404256, "grad_norm": 3.42853045463562, "learning_rate": 9.434896223724774e-06, "loss": 1.128, "step": 3669 }, { "epoch": 0.976063829787234, "grad_norm": 3.5911173820495605, "learning_rate": 9.434489991435106e-06, "loss": 1.2216, "step": 3670 }, { "epoch": 0.9763297872340425, "grad_norm": 3.4679529666900635, "learning_rate": 9.434083621937096e-06, "loss": 1.1932, "step": 3671 }, { "epoch": 0.9765957446808511, "grad_norm": 3.4107143878936768, "learning_rate": 9.433677115243318e-06, "loss": 1.1279, "step": 3672 }, { "epoch": 0.9768617021276595, "grad_norm": 3.5593109130859375, "learning_rate": 9.433270471366352e-06, "loss": 1.1996, "step": 3673 }, { "epoch": 0.9771276595744681, "grad_norm": 3.193164110183716, "learning_rate": 9.432863690318777e-06, "loss": 1.103, "step": 3674 }, { "epoch": 0.9773936170212766, "grad_norm": 3.5351223945617676, "learning_rate": 9.432456772113179e-06, "loss": 1.2212, "step": 3675 }, { "epoch": 0.9776595744680852, "grad_norm": 3.4629955291748047, "learning_rate": 9.432049716762151e-06, "loss": 1.2055, "step": 3676 }, { "epoch": 0.9779255319148936, "grad_norm": 3.661907196044922, "learning_rate": 9.431642524278286e-06, "loss": 1.3389, "step": 3677 }, { "epoch": 0.9781914893617021, "grad_norm": 3.140364408493042, "learning_rate": 9.431235194674185e-06, "loss": 1.2099, "step": 3678 }, { "epoch": 0.9784574468085107, "grad_norm": 3.7145817279815674, "learning_rate": 9.43082772796245e-06, "loss": 1.49, "step": 3679 }, { "epoch": 0.9787234042553191, "grad_norm": 3.3982760906219482, "learning_rate": 9.430420124155687e-06, "loss": 1.2001, "step": 3680 }, { "epoch": 0.9789893617021277, "grad_norm": 3.7518324851989746, "learning_rate": 9.43001238326651e-06, "loss": 1.4143, "step": 3681 }, { "epoch": 0.9792553191489362, "grad_norm": 3.708822250366211, "learning_rate": 9.429604505307535e-06, "loss": 1.2038, "step": 3682 }, { "epoch": 0.9795212765957447, "grad_norm": 3.5261037349700928, "learning_rate": 9.42919649029138e-06, "loss": 1.2233, "step": 3683 }, { "epoch": 0.9797872340425532, "grad_norm": 3.842564582824707, "learning_rate": 9.428788338230672e-06, "loss": 1.3385, "step": 3684 }, { "epoch": 0.9800531914893617, "grad_norm": 3.688267230987549, "learning_rate": 9.428380049138038e-06, "loss": 1.2034, "step": 3685 }, { "epoch": 0.9803191489361702, "grad_norm": 3.877396583557129, "learning_rate": 9.42797162302611e-06, "loss": 1.2775, "step": 3686 }, { "epoch": 0.9805851063829787, "grad_norm": 3.4748518466949463, "learning_rate": 9.427563059907528e-06, "loss": 1.4141, "step": 3687 }, { "epoch": 0.9808510638297873, "grad_norm": 3.0281589031219482, "learning_rate": 9.427154359794931e-06, "loss": 1.2591, "step": 3688 }, { "epoch": 0.9811170212765957, "grad_norm": 3.5246212482452393, "learning_rate": 9.42674552270097e-06, "loss": 1.1775, "step": 3689 }, { "epoch": 0.9813829787234043, "grad_norm": 3.599862813949585, "learning_rate": 9.426336548638287e-06, "loss": 1.187, "step": 3690 }, { "epoch": 0.9816489361702128, "grad_norm": 3.8031740188598633, "learning_rate": 9.42592743761954e-06, "loss": 1.3704, "step": 3691 }, { "epoch": 0.9819148936170212, "grad_norm": 3.708652973175049, "learning_rate": 9.425518189657388e-06, "loss": 1.2567, "step": 3692 }, { "epoch": 0.9821808510638298, "grad_norm": 3.341240882873535, "learning_rate": 9.425108804764493e-06, "loss": 1.4062, "step": 3693 }, { "epoch": 0.9824468085106383, "grad_norm": 3.5106687545776367, "learning_rate": 9.42469928295352e-06, "loss": 1.1759, "step": 3694 }, { "epoch": 0.9827127659574468, "grad_norm": 3.153082847595215, "learning_rate": 9.424289624237143e-06, "loss": 1.1955, "step": 3695 }, { "epoch": 0.9829787234042553, "grad_norm": 3.4173176288604736, "learning_rate": 9.423879828628038e-06, "loss": 1.3188, "step": 3696 }, { "epoch": 0.9832446808510639, "grad_norm": 3.5854523181915283, "learning_rate": 9.42346989613888e-06, "loss": 1.2425, "step": 3697 }, { "epoch": 0.9835106382978723, "grad_norm": 3.536123752593994, "learning_rate": 9.423059826782355e-06, "loss": 1.2088, "step": 3698 }, { "epoch": 0.9837765957446809, "grad_norm": 3.5280613899230957, "learning_rate": 9.422649620571155e-06, "loss": 1.4956, "step": 3699 }, { "epoch": 0.9840425531914894, "grad_norm": 3.896684169769287, "learning_rate": 9.422239277517964e-06, "loss": 1.3236, "step": 3700 }, { "epoch": 0.9843085106382978, "grad_norm": 3.417961597442627, "learning_rate": 9.421828797635487e-06, "loss": 1.2044, "step": 3701 }, { "epoch": 0.9845744680851064, "grad_norm": 3.4376044273376465, "learning_rate": 9.421418180936419e-06, "loss": 1.2657, "step": 3702 }, { "epoch": 0.9848404255319149, "grad_norm": 3.8742475509643555, "learning_rate": 9.421007427433467e-06, "loss": 1.2526, "step": 3703 }, { "epoch": 0.9851063829787234, "grad_norm": 4.002706527709961, "learning_rate": 9.42059653713934e-06, "loss": 1.446, "step": 3704 }, { "epoch": 0.9853723404255319, "grad_norm": 3.462308883666992, "learning_rate": 9.420185510066753e-06, "loss": 1.2338, "step": 3705 }, { "epoch": 0.9856382978723405, "grad_norm": 3.684730291366577, "learning_rate": 9.41977434622842e-06, "loss": 1.2417, "step": 3706 }, { "epoch": 0.9859042553191489, "grad_norm": 3.5235018730163574, "learning_rate": 9.419363045637067e-06, "loss": 1.3775, "step": 3707 }, { "epoch": 0.9861702127659574, "grad_norm": 3.2986860275268555, "learning_rate": 9.418951608305417e-06, "loss": 1.1967, "step": 3708 }, { "epoch": 0.986436170212766, "grad_norm": 3.2341742515563965, "learning_rate": 9.418540034246202e-06, "loss": 1.1223, "step": 3709 }, { "epoch": 0.9867021276595744, "grad_norm": 3.5601837635040283, "learning_rate": 9.418128323472157e-06, "loss": 1.2934, "step": 3710 }, { "epoch": 0.986968085106383, "grad_norm": 4.002072811126709, "learning_rate": 9.41771647599602e-06, "loss": 1.2226, "step": 3711 }, { "epoch": 0.9872340425531915, "grad_norm": 3.6095480918884277, "learning_rate": 9.417304491830533e-06, "loss": 1.2332, "step": 3712 }, { "epoch": 0.9875, "grad_norm": 3.7682595252990723, "learning_rate": 9.416892370988445e-06, "loss": 1.1929, "step": 3713 }, { "epoch": 0.9877659574468085, "grad_norm": 3.4983551502227783, "learning_rate": 9.416480113482505e-06, "loss": 1.2426, "step": 3714 }, { "epoch": 0.988031914893617, "grad_norm": 3.490725040435791, "learning_rate": 9.416067719325472e-06, "loss": 1.2009, "step": 3715 }, { "epoch": 0.9882978723404255, "grad_norm": 3.564605474472046, "learning_rate": 9.415655188530104e-06, "loss": 1.2105, "step": 3716 }, { "epoch": 0.988563829787234, "grad_norm": 3.5361475944519043, "learning_rate": 9.415242521109166e-06, "loss": 1.3389, "step": 3717 }, { "epoch": 0.9888297872340426, "grad_norm": 3.3671114444732666, "learning_rate": 9.414829717075426e-06, "loss": 1.3157, "step": 3718 }, { "epoch": 0.989095744680851, "grad_norm": 3.7442715167999268, "learning_rate": 9.414416776441656e-06, "loss": 1.1551, "step": 3719 }, { "epoch": 0.9893617021276596, "grad_norm": 3.4414875507354736, "learning_rate": 9.414003699220636e-06, "loss": 1.2135, "step": 3720 }, { "epoch": 0.9896276595744681, "grad_norm": 4.052205562591553, "learning_rate": 9.413590485425143e-06, "loss": 1.3299, "step": 3721 }, { "epoch": 0.9898936170212767, "grad_norm": 3.0953876972198486, "learning_rate": 9.413177135067964e-06, "loss": 1.1183, "step": 3722 }, { "epoch": 0.9901595744680851, "grad_norm": 3.767108678817749, "learning_rate": 9.41276364816189e-06, "loss": 1.325, "step": 3723 }, { "epoch": 0.9904255319148936, "grad_norm": 3.3017489910125732, "learning_rate": 9.412350024719713e-06, "loss": 1.2328, "step": 3724 }, { "epoch": 0.9906914893617021, "grad_norm": 3.5287554264068604, "learning_rate": 9.41193626475423e-06, "loss": 1.2442, "step": 3725 }, { "epoch": 0.9909574468085106, "grad_norm": 3.6898324489593506, "learning_rate": 9.411522368278243e-06, "loss": 1.2682, "step": 3726 }, { "epoch": 0.9912234042553192, "grad_norm": 3.9228873252868652, "learning_rate": 9.411108335304562e-06, "loss": 1.3415, "step": 3727 }, { "epoch": 0.9914893617021276, "grad_norm": 3.9011435508728027, "learning_rate": 9.410694165845996e-06, "loss": 1.2388, "step": 3728 }, { "epoch": 0.9917553191489362, "grad_norm": 3.714230537414551, "learning_rate": 9.41027985991536e-06, "loss": 1.2085, "step": 3729 }, { "epoch": 0.9920212765957447, "grad_norm": 3.627887010574341, "learning_rate": 9.409865417525473e-06, "loss": 1.2682, "step": 3730 }, { "epoch": 0.9922872340425531, "grad_norm": 3.4126439094543457, "learning_rate": 9.409450838689156e-06, "loss": 1.2089, "step": 3731 }, { "epoch": 0.9925531914893617, "grad_norm": 3.5555756092071533, "learning_rate": 9.409036123419239e-06, "loss": 1.2066, "step": 3732 }, { "epoch": 0.9928191489361702, "grad_norm": 3.5292632579803467, "learning_rate": 9.408621271728555e-06, "loss": 1.1913, "step": 3733 }, { "epoch": 0.9930851063829788, "grad_norm": 3.5443150997161865, "learning_rate": 9.408206283629937e-06, "loss": 1.2293, "step": 3734 }, { "epoch": 0.9933510638297872, "grad_norm": 3.8415119647979736, "learning_rate": 9.407791159136226e-06, "loss": 1.496, "step": 3735 }, { "epoch": 0.9936170212765958, "grad_norm": 3.647085189819336, "learning_rate": 9.407375898260267e-06, "loss": 1.1983, "step": 3736 }, { "epoch": 0.9938829787234043, "grad_norm": 3.2950799465179443, "learning_rate": 9.40696050101491e-06, "loss": 1.1298, "step": 3737 }, { "epoch": 0.9941489361702127, "grad_norm": 3.837249517440796, "learning_rate": 9.406544967413008e-06, "loss": 1.2763, "step": 3738 }, { "epoch": 0.9944148936170213, "grad_norm": 3.437069892883301, "learning_rate": 9.406129297467414e-06, "loss": 1.1689, "step": 3739 }, { "epoch": 0.9946808510638298, "grad_norm": 3.7600064277648926, "learning_rate": 9.405713491190992e-06, "loss": 1.4092, "step": 3740 }, { "epoch": 0.9949468085106383, "grad_norm": 3.547830104827881, "learning_rate": 9.405297548596607e-06, "loss": 1.3794, "step": 3741 }, { "epoch": 0.9952127659574468, "grad_norm": 3.673377752304077, "learning_rate": 9.404881469697132e-06, "loss": 1.1934, "step": 3742 }, { "epoch": 0.9954787234042554, "grad_norm": 3.6018290519714355, "learning_rate": 9.404465254505435e-06, "loss": 1.2228, "step": 3743 }, { "epoch": 0.9957446808510638, "grad_norm": 3.5014569759368896, "learning_rate": 9.4040489030344e-06, "loss": 1.1731, "step": 3744 }, { "epoch": 0.9960106382978723, "grad_norm": 3.6044108867645264, "learning_rate": 9.403632415296907e-06, "loss": 1.2917, "step": 3745 }, { "epoch": 0.9962765957446809, "grad_norm": 3.626147985458374, "learning_rate": 9.40321579130584e-06, "loss": 1.2297, "step": 3746 }, { "epoch": 0.9965425531914893, "grad_norm": 3.5548157691955566, "learning_rate": 9.402799031074095e-06, "loss": 1.2096, "step": 3747 }, { "epoch": 0.9968085106382979, "grad_norm": 4.016201019287109, "learning_rate": 9.402382134614563e-06, "loss": 1.2461, "step": 3748 }, { "epoch": 0.9970744680851064, "grad_norm": 3.2637929916381836, "learning_rate": 9.401965101940144e-06, "loss": 1.1531, "step": 3749 }, { "epoch": 0.9973404255319149, "grad_norm": 3.330240249633789, "learning_rate": 9.40154793306374e-06, "loss": 1.1598, "step": 3750 }, { "epoch": 0.9976063829787234, "grad_norm": 3.522907257080078, "learning_rate": 9.401130627998265e-06, "loss": 1.1563, "step": 3751 }, { "epoch": 0.997872340425532, "grad_norm": 3.462400197982788, "learning_rate": 9.400713186756625e-06, "loss": 1.0948, "step": 3752 }, { "epoch": 0.9981382978723404, "grad_norm": 3.6393964290618896, "learning_rate": 9.400295609351738e-06, "loss": 1.2499, "step": 3753 }, { "epoch": 0.9984042553191489, "grad_norm": 3.4382801055908203, "learning_rate": 9.399877895796526e-06, "loss": 1.2587, "step": 3754 }, { "epoch": 0.9986702127659575, "grad_norm": 3.769301414489746, "learning_rate": 9.399460046103908e-06, "loss": 1.283, "step": 3755 }, { "epoch": 0.9989361702127659, "grad_norm": 3.3904542922973633, "learning_rate": 9.399042060286819e-06, "loss": 1.3667, "step": 3756 }, { "epoch": 0.9992021276595745, "grad_norm": 3.413027763366699, "learning_rate": 9.398623938358188e-06, "loss": 1.1575, "step": 3757 }, { "epoch": 0.999468085106383, "grad_norm": 3.8313398361206055, "learning_rate": 9.398205680330954e-06, "loss": 1.1665, "step": 3758 }, { "epoch": 0.9997340425531915, "grad_norm": 3.5040853023529053, "learning_rate": 9.397787286218058e-06, "loss": 1.3182, "step": 3759 }, { "epoch": 1.0, "grad_norm": 3.6746809482574463, "learning_rate": 9.397368756032445e-06, "loss": 1.2287, "step": 3760 }, { "epoch": 1.0002659574468085, "grad_norm": 3.308379650115967, "learning_rate": 9.396950089787066e-06, "loss": 0.8299, "step": 3761 }, { "epoch": 1.000531914893617, "grad_norm": 3.8195013999938965, "learning_rate": 9.396531287494877e-06, "loss": 0.8431, "step": 3762 }, { "epoch": 1.0007978723404256, "grad_norm": 3.317417621612549, "learning_rate": 9.396112349168832e-06, "loss": 0.9087, "step": 3763 }, { "epoch": 1.001063829787234, "grad_norm": 3.6359126567840576, "learning_rate": 9.395693274821893e-06, "loss": 0.8605, "step": 3764 }, { "epoch": 1.0013297872340425, "grad_norm": 3.3946707248687744, "learning_rate": 9.39527406446703e-06, "loss": 0.9424, "step": 3765 }, { "epoch": 1.001595744680851, "grad_norm": 3.7910523414611816, "learning_rate": 9.394854718117214e-06, "loss": 0.7635, "step": 3766 }, { "epoch": 1.0018617021276597, "grad_norm": 3.847181558609009, "learning_rate": 9.394435235785417e-06, "loss": 0.8419, "step": 3767 }, { "epoch": 1.0021276595744681, "grad_norm": 3.5999948978424072, "learning_rate": 9.394015617484621e-06, "loss": 0.7906, "step": 3768 }, { "epoch": 1.0023936170212766, "grad_norm": 3.53528094291687, "learning_rate": 9.393595863227808e-06, "loss": 0.7652, "step": 3769 }, { "epoch": 1.002659574468085, "grad_norm": 4.102449417114258, "learning_rate": 9.393175973027967e-06, "loss": 0.837, "step": 3770 }, { "epoch": 1.0029255319148935, "grad_norm": 4.625784397125244, "learning_rate": 9.392755946898087e-06, "loss": 0.8694, "step": 3771 }, { "epoch": 1.0031914893617022, "grad_norm": 3.7955758571624756, "learning_rate": 9.392335784851168e-06, "loss": 0.7127, "step": 3772 }, { "epoch": 1.0034574468085107, "grad_norm": 4.6287970542907715, "learning_rate": 9.39191548690021e-06, "loss": 0.6634, "step": 3773 }, { "epoch": 1.0037234042553191, "grad_norm": 4.188403129577637, "learning_rate": 9.391495053058213e-06, "loss": 0.7676, "step": 3774 }, { "epoch": 1.0039893617021276, "grad_norm": 4.061558723449707, "learning_rate": 9.39107448333819e-06, "loss": 0.6863, "step": 3775 }, { "epoch": 1.004255319148936, "grad_norm": 3.9614672660827637, "learning_rate": 9.390653777753151e-06, "loss": 0.8902, "step": 3776 }, { "epoch": 1.0045212765957447, "grad_norm": 3.7978405952453613, "learning_rate": 9.390232936316116e-06, "loss": 0.8576, "step": 3777 }, { "epoch": 1.0047872340425532, "grad_norm": 4.081401348114014, "learning_rate": 9.389811959040106e-06, "loss": 0.9293, "step": 3778 }, { "epoch": 1.0050531914893617, "grad_norm": 4.4708123207092285, "learning_rate": 9.389390845938147e-06, "loss": 0.7971, "step": 3779 }, { "epoch": 1.0053191489361701, "grad_norm": 3.670398235321045, "learning_rate": 9.388969597023265e-06, "loss": 0.7746, "step": 3780 }, { "epoch": 1.0055851063829788, "grad_norm": 3.678659200668335, "learning_rate": 9.388548212308496e-06, "loss": 0.7505, "step": 3781 }, { "epoch": 1.0058510638297873, "grad_norm": 3.943781614303589, "learning_rate": 9.388126691806879e-06, "loss": 0.7205, "step": 3782 }, { "epoch": 1.0061170212765957, "grad_norm": 3.976630926132202, "learning_rate": 9.387705035531455e-06, "loss": 0.8597, "step": 3783 }, { "epoch": 1.0063829787234042, "grad_norm": 3.6376004219055176, "learning_rate": 9.387283243495273e-06, "loss": 0.7911, "step": 3784 }, { "epoch": 1.0066489361702127, "grad_norm": 3.698863983154297, "learning_rate": 9.386861315711382e-06, "loss": 0.7718, "step": 3785 }, { "epoch": 1.0069148936170214, "grad_norm": 3.553309679031372, "learning_rate": 9.386439252192836e-06, "loss": 0.8233, "step": 3786 }, { "epoch": 1.0071808510638298, "grad_norm": 3.588423252105713, "learning_rate": 9.386017052952694e-06, "loss": 0.782, "step": 3787 }, { "epoch": 1.0074468085106383, "grad_norm": 3.5977461338043213, "learning_rate": 9.385594718004023e-06, "loss": 0.8548, "step": 3788 }, { "epoch": 1.0077127659574467, "grad_norm": 4.447713375091553, "learning_rate": 9.385172247359887e-06, "loss": 0.833, "step": 3789 }, { "epoch": 1.0079787234042554, "grad_norm": 3.6044774055480957, "learning_rate": 9.384749641033358e-06, "loss": 0.8453, "step": 3790 }, { "epoch": 1.008244680851064, "grad_norm": 3.4909749031066895, "learning_rate": 9.384326899037515e-06, "loss": 0.7723, "step": 3791 }, { "epoch": 1.0085106382978724, "grad_norm": 3.8825156688690186, "learning_rate": 9.383904021385433e-06, "loss": 0.7219, "step": 3792 }, { "epoch": 1.0087765957446808, "grad_norm": 4.605208396911621, "learning_rate": 9.3834810080902e-06, "loss": 0.8625, "step": 3793 }, { "epoch": 1.0090425531914893, "grad_norm": 3.8827695846557617, "learning_rate": 9.383057859164904e-06, "loss": 0.7579, "step": 3794 }, { "epoch": 1.009308510638298, "grad_norm": 3.8152899742126465, "learning_rate": 9.382634574622637e-06, "loss": 0.7785, "step": 3795 }, { "epoch": 1.0095744680851064, "grad_norm": 3.9749300479888916, "learning_rate": 9.382211154476497e-06, "loss": 0.7768, "step": 3796 }, { "epoch": 1.0098404255319149, "grad_norm": 3.9352428913116455, "learning_rate": 9.381787598739586e-06, "loss": 0.9265, "step": 3797 }, { "epoch": 1.0101063829787233, "grad_norm": 3.8235480785369873, "learning_rate": 9.381363907425006e-06, "loss": 0.7915, "step": 3798 }, { "epoch": 1.0103723404255318, "grad_norm": 4.1063103675842285, "learning_rate": 9.380940080545869e-06, "loss": 0.8271, "step": 3799 }, { "epoch": 1.0106382978723405, "grad_norm": 3.7685892581939697, "learning_rate": 9.380516118115287e-06, "loss": 0.7611, "step": 3800 }, { "epoch": 1.010904255319149, "grad_norm": 3.679269790649414, "learning_rate": 9.380092020146379e-06, "loss": 0.7943, "step": 3801 }, { "epoch": 1.0111702127659574, "grad_norm": 3.7096617221832275, "learning_rate": 9.379667786652267e-06, "loss": 0.8254, "step": 3802 }, { "epoch": 1.0114361702127659, "grad_norm": 3.4425570964813232, "learning_rate": 9.379243417646077e-06, "loss": 0.7538, "step": 3803 }, { "epoch": 1.0117021276595746, "grad_norm": 3.324869155883789, "learning_rate": 9.378818913140941e-06, "loss": 0.6687, "step": 3804 }, { "epoch": 1.011968085106383, "grad_norm": 3.6117424964904785, "learning_rate": 9.378394273149992e-06, "loss": 0.8059, "step": 3805 }, { "epoch": 1.0122340425531915, "grad_norm": 3.843747615814209, "learning_rate": 9.377969497686369e-06, "loss": 0.7257, "step": 3806 }, { "epoch": 1.0125, "grad_norm": 3.997349977493286, "learning_rate": 9.377544586763216e-06, "loss": 0.837, "step": 3807 }, { "epoch": 1.0127659574468084, "grad_norm": 3.5746796131134033, "learning_rate": 9.377119540393677e-06, "loss": 0.7891, "step": 3808 }, { "epoch": 1.013031914893617, "grad_norm": 3.7787206172943115, "learning_rate": 9.37669435859091e-06, "loss": 0.7984, "step": 3809 }, { "epoch": 1.0132978723404256, "grad_norm": 4.2211174964904785, "learning_rate": 9.376269041368063e-06, "loss": 0.7274, "step": 3810 }, { "epoch": 1.013563829787234, "grad_norm": 3.591057300567627, "learning_rate": 9.375843588738302e-06, "loss": 0.807, "step": 3811 }, { "epoch": 1.0138297872340425, "grad_norm": 3.5017266273498535, "learning_rate": 9.375418000714787e-06, "loss": 0.7173, "step": 3812 }, { "epoch": 1.014095744680851, "grad_norm": 4.4692487716674805, "learning_rate": 9.374992277310688e-06, "loss": 0.7584, "step": 3813 }, { "epoch": 1.0143617021276596, "grad_norm": 4.453067302703857, "learning_rate": 9.374566418539178e-06, "loss": 0.8444, "step": 3814 }, { "epoch": 1.014627659574468, "grad_norm": 4.007133483886719, "learning_rate": 9.37414042441343e-06, "loss": 0.7163, "step": 3815 }, { "epoch": 1.0148936170212766, "grad_norm": 3.714021682739258, "learning_rate": 9.37371429494663e-06, "loss": 0.7979, "step": 3816 }, { "epoch": 1.015159574468085, "grad_norm": 4.196898460388184, "learning_rate": 9.37328803015196e-06, "loss": 0.8057, "step": 3817 }, { "epoch": 1.0154255319148937, "grad_norm": 3.6794686317443848, "learning_rate": 9.37286163004261e-06, "loss": 0.8608, "step": 3818 }, { "epoch": 1.0156914893617022, "grad_norm": 4.034078121185303, "learning_rate": 9.37243509463177e-06, "loss": 0.8794, "step": 3819 }, { "epoch": 1.0159574468085106, "grad_norm": 3.671816110610962, "learning_rate": 9.37200842393264e-06, "loss": 0.755, "step": 3820 }, { "epoch": 1.016223404255319, "grad_norm": 3.6856508255004883, "learning_rate": 9.371581617958424e-06, "loss": 0.7839, "step": 3821 }, { "epoch": 1.0164893617021276, "grad_norm": 4.332293510437012, "learning_rate": 9.371154676722326e-06, "loss": 0.8305, "step": 3822 }, { "epoch": 1.0167553191489362, "grad_norm": 4.032402038574219, "learning_rate": 9.370727600237557e-06, "loss": 0.8552, "step": 3823 }, { "epoch": 1.0170212765957447, "grad_norm": 4.2808756828308105, "learning_rate": 9.370300388517329e-06, "loss": 0.8609, "step": 3824 }, { "epoch": 1.0172872340425532, "grad_norm": 3.675684690475464, "learning_rate": 9.36987304157486e-06, "loss": 0.7307, "step": 3825 }, { "epoch": 1.0175531914893616, "grad_norm": 3.6821727752685547, "learning_rate": 9.369445559423376e-06, "loss": 0.8393, "step": 3826 }, { "epoch": 1.0178191489361703, "grad_norm": 4.112141132354736, "learning_rate": 9.369017942076101e-06, "loss": 0.8027, "step": 3827 }, { "epoch": 1.0180851063829788, "grad_norm": 3.8829188346862793, "learning_rate": 9.368590189546268e-06, "loss": 0.8558, "step": 3828 }, { "epoch": 1.0183510638297872, "grad_norm": 4.182821750640869, "learning_rate": 9.368162301847112e-06, "loss": 0.9872, "step": 3829 }, { "epoch": 1.0186170212765957, "grad_norm": 4.043810844421387, "learning_rate": 9.36773427899187e-06, "loss": 0.731, "step": 3830 }, { "epoch": 1.0188829787234042, "grad_norm": 3.6814448833465576, "learning_rate": 9.367306120993787e-06, "loss": 0.7434, "step": 3831 }, { "epoch": 1.0191489361702128, "grad_norm": 3.823333978652954, "learning_rate": 9.366877827866112e-06, "loss": 0.7962, "step": 3832 }, { "epoch": 1.0194148936170213, "grad_norm": 4.10197639465332, "learning_rate": 9.366449399622092e-06, "loss": 0.8655, "step": 3833 }, { "epoch": 1.0196808510638298, "grad_norm": 3.4033734798431396, "learning_rate": 9.366020836274991e-06, "loss": 0.6871, "step": 3834 }, { "epoch": 1.0199468085106382, "grad_norm": 3.9210493564605713, "learning_rate": 9.365592137838063e-06, "loss": 0.8913, "step": 3835 }, { "epoch": 1.0202127659574467, "grad_norm": 3.972930431365967, "learning_rate": 9.365163304324576e-06, "loss": 0.7394, "step": 3836 }, { "epoch": 1.0204787234042554, "grad_norm": 3.603489875793457, "learning_rate": 9.364734335747795e-06, "loss": 0.6501, "step": 3837 }, { "epoch": 1.0207446808510638, "grad_norm": 3.678868532180786, "learning_rate": 9.364305232120997e-06, "loss": 0.7685, "step": 3838 }, { "epoch": 1.0210106382978723, "grad_norm": 4.074692726135254, "learning_rate": 9.363875993457454e-06, "loss": 0.8085, "step": 3839 }, { "epoch": 1.0212765957446808, "grad_norm": 3.683279514312744, "learning_rate": 9.363446619770452e-06, "loss": 0.7703, "step": 3840 }, { "epoch": 1.0215425531914895, "grad_norm": 3.837007999420166, "learning_rate": 9.363017111073273e-06, "loss": 0.8403, "step": 3841 }, { "epoch": 1.021808510638298, "grad_norm": 4.0264973640441895, "learning_rate": 9.362587467379208e-06, "loss": 0.8001, "step": 3842 }, { "epoch": 1.0220744680851064, "grad_norm": 3.9169387817382812, "learning_rate": 9.362157688701551e-06, "loss": 0.7603, "step": 3843 }, { "epoch": 1.0223404255319148, "grad_norm": 3.4985976219177246, "learning_rate": 9.3617277750536e-06, "loss": 0.6856, "step": 3844 }, { "epoch": 1.0226063829787233, "grad_norm": 3.9737682342529297, "learning_rate": 9.361297726448656e-06, "loss": 0.8021, "step": 3845 }, { "epoch": 1.022872340425532, "grad_norm": 4.206306457519531, "learning_rate": 9.360867542900023e-06, "loss": 0.7726, "step": 3846 }, { "epoch": 1.0231382978723405, "grad_norm": 3.5013468265533447, "learning_rate": 9.360437224421017e-06, "loss": 0.7046, "step": 3847 }, { "epoch": 1.023404255319149, "grad_norm": 4.186954021453857, "learning_rate": 9.360006771024947e-06, "loss": 0.8574, "step": 3848 }, { "epoch": 1.0236702127659574, "grad_norm": 3.8380942344665527, "learning_rate": 9.359576182725136e-06, "loss": 0.8463, "step": 3849 }, { "epoch": 1.023936170212766, "grad_norm": 4.439043998718262, "learning_rate": 9.359145459534906e-06, "loss": 0.868, "step": 3850 }, { "epoch": 1.0242021276595745, "grad_norm": 3.555283546447754, "learning_rate": 9.358714601467581e-06, "loss": 0.7842, "step": 3851 }, { "epoch": 1.024468085106383, "grad_norm": 3.4938576221466064, "learning_rate": 9.358283608536498e-06, "loss": 0.8562, "step": 3852 }, { "epoch": 1.0247340425531914, "grad_norm": 3.709388256072998, "learning_rate": 9.357852480754985e-06, "loss": 0.7753, "step": 3853 }, { "epoch": 1.025, "grad_norm": 3.594524621963501, "learning_rate": 9.357421218136387e-06, "loss": 0.9016, "step": 3854 }, { "epoch": 1.0252659574468086, "grad_norm": 3.8423714637756348, "learning_rate": 9.356989820694046e-06, "loss": 0.918, "step": 3855 }, { "epoch": 1.025531914893617, "grad_norm": 4.120334625244141, "learning_rate": 9.356558288441312e-06, "loss": 0.8276, "step": 3856 }, { "epoch": 1.0257978723404255, "grad_norm": 3.7441205978393555, "learning_rate": 9.356126621391532e-06, "loss": 0.6485, "step": 3857 }, { "epoch": 1.026063829787234, "grad_norm": 3.652815341949463, "learning_rate": 9.35569481955807e-06, "loss": 0.8443, "step": 3858 }, { "epoch": 1.0263297872340424, "grad_norm": 3.8127315044403076, "learning_rate": 9.355262882954277e-06, "loss": 0.8928, "step": 3859 }, { "epoch": 1.0265957446808511, "grad_norm": 4.254662036895752, "learning_rate": 9.354830811593527e-06, "loss": 0.7228, "step": 3860 }, { "epoch": 1.0268617021276596, "grad_norm": 3.737208366394043, "learning_rate": 9.354398605489182e-06, "loss": 0.7144, "step": 3861 }, { "epoch": 1.027127659574468, "grad_norm": 4.630359172821045, "learning_rate": 9.353966264654619e-06, "loss": 1.0136, "step": 3862 }, { "epoch": 1.0273936170212765, "grad_norm": 4.139670372009277, "learning_rate": 9.353533789103213e-06, "loss": 0.7467, "step": 3863 }, { "epoch": 1.0276595744680852, "grad_norm": 3.5735762119293213, "learning_rate": 9.353101178848345e-06, "loss": 0.6863, "step": 3864 }, { "epoch": 1.0279255319148937, "grad_norm": 4.091590881347656, "learning_rate": 9.352668433903402e-06, "loss": 0.9083, "step": 3865 }, { "epoch": 1.0281914893617021, "grad_norm": 4.462408065795898, "learning_rate": 9.352235554281775e-06, "loss": 0.8134, "step": 3866 }, { "epoch": 1.0284574468085106, "grad_norm": 4.514068603515625, "learning_rate": 9.351802539996853e-06, "loss": 0.8516, "step": 3867 }, { "epoch": 1.028723404255319, "grad_norm": 4.771678447723389, "learning_rate": 9.351369391062037e-06, "loss": 0.8317, "step": 3868 }, { "epoch": 1.0289893617021277, "grad_norm": 3.9608962535858154, "learning_rate": 9.350936107490731e-06, "loss": 0.7668, "step": 3869 }, { "epoch": 1.0292553191489362, "grad_norm": 3.6606082916259766, "learning_rate": 9.350502689296337e-06, "loss": 0.8021, "step": 3870 }, { "epoch": 1.0295212765957447, "grad_norm": 3.395991563796997, "learning_rate": 9.35006913649227e-06, "loss": 0.7561, "step": 3871 }, { "epoch": 1.0297872340425531, "grad_norm": 3.9416377544403076, "learning_rate": 9.34963544909194e-06, "loss": 0.6551, "step": 3872 }, { "epoch": 1.0300531914893618, "grad_norm": 3.8515100479125977, "learning_rate": 9.34920162710877e-06, "loss": 0.9596, "step": 3873 }, { "epoch": 1.0303191489361703, "grad_norm": 3.532066583633423, "learning_rate": 9.34876767055618e-06, "loss": 0.7312, "step": 3874 }, { "epoch": 1.0305851063829787, "grad_norm": 3.523547887802124, "learning_rate": 9.3483335794476e-06, "loss": 0.9029, "step": 3875 }, { "epoch": 1.0308510638297872, "grad_norm": 3.8942482471466064, "learning_rate": 9.347899353796456e-06, "loss": 0.852, "step": 3876 }, { "epoch": 1.0311170212765957, "grad_norm": 3.8025577068328857, "learning_rate": 9.347464993616191e-06, "loss": 0.7704, "step": 3877 }, { "epoch": 1.0313829787234043, "grad_norm": 3.5986201763153076, "learning_rate": 9.347030498920239e-06, "loss": 0.8289, "step": 3878 }, { "epoch": 1.0316489361702128, "grad_norm": 4.27517032623291, "learning_rate": 9.346595869722044e-06, "loss": 0.9252, "step": 3879 }, { "epoch": 1.0319148936170213, "grad_norm": 3.845385789871216, "learning_rate": 9.346161106035056e-06, "loss": 0.7372, "step": 3880 }, { "epoch": 1.0321808510638297, "grad_norm": 3.875645875930786, "learning_rate": 9.345726207872728e-06, "loss": 0.9036, "step": 3881 }, { "epoch": 1.0324468085106382, "grad_norm": 4.004083156585693, "learning_rate": 9.345291175248514e-06, "loss": 0.8, "step": 3882 }, { "epoch": 1.0327127659574469, "grad_norm": 4.025826930999756, "learning_rate": 9.344856008175874e-06, "loss": 0.8063, "step": 3883 }, { "epoch": 1.0329787234042553, "grad_norm": 4.168485641479492, "learning_rate": 9.344420706668274e-06, "loss": 0.8712, "step": 3884 }, { "epoch": 1.0332446808510638, "grad_norm": 3.7525241374969482, "learning_rate": 9.343985270739184e-06, "loss": 0.8075, "step": 3885 }, { "epoch": 1.0335106382978723, "grad_norm": 4.079540729522705, "learning_rate": 9.343549700402073e-06, "loss": 0.7574, "step": 3886 }, { "epoch": 1.033776595744681, "grad_norm": 3.5480105876922607, "learning_rate": 9.34311399567042e-06, "loss": 0.8544, "step": 3887 }, { "epoch": 1.0340425531914894, "grad_norm": 3.6420836448669434, "learning_rate": 9.342678156557709e-06, "loss": 0.8279, "step": 3888 }, { "epoch": 1.0343085106382979, "grad_norm": 3.8541533946990967, "learning_rate": 9.342242183077422e-06, "loss": 0.8794, "step": 3889 }, { "epoch": 1.0345744680851063, "grad_norm": 3.5861008167266846, "learning_rate": 9.341806075243049e-06, "loss": 0.7949, "step": 3890 }, { "epoch": 1.0348404255319148, "grad_norm": 4.284236431121826, "learning_rate": 9.341369833068086e-06, "loss": 0.7882, "step": 3891 }, { "epoch": 1.0351063829787235, "grad_norm": 4.239330768585205, "learning_rate": 9.340933456566028e-06, "loss": 0.8299, "step": 3892 }, { "epoch": 1.035372340425532, "grad_norm": 4.633347988128662, "learning_rate": 9.340496945750377e-06, "loss": 0.9297, "step": 3893 }, { "epoch": 1.0356382978723404, "grad_norm": 4.2658538818359375, "learning_rate": 9.340060300634642e-06, "loss": 0.7928, "step": 3894 }, { "epoch": 1.0359042553191489, "grad_norm": 3.876652717590332, "learning_rate": 9.33962352123233e-06, "loss": 0.7742, "step": 3895 }, { "epoch": 1.0361702127659576, "grad_norm": 3.939422130584717, "learning_rate": 9.339186607556959e-06, "loss": 0.7676, "step": 3896 }, { "epoch": 1.036436170212766, "grad_norm": 3.9666736125946045, "learning_rate": 9.338749559622042e-06, "loss": 0.8759, "step": 3897 }, { "epoch": 1.0367021276595745, "grad_norm": 3.6032910346984863, "learning_rate": 9.338312377441108e-06, "loss": 0.6806, "step": 3898 }, { "epoch": 1.036968085106383, "grad_norm": 3.6236395835876465, "learning_rate": 9.337875061027681e-06, "loss": 0.8275, "step": 3899 }, { "epoch": 1.0372340425531914, "grad_norm": 4.132247447967529, "learning_rate": 9.337437610395292e-06, "loss": 0.8429, "step": 3900 }, { "epoch": 1.0375, "grad_norm": 3.7111639976501465, "learning_rate": 9.337000025557477e-06, "loss": 0.9638, "step": 3901 }, { "epoch": 1.0377659574468086, "grad_norm": 3.9870896339416504, "learning_rate": 9.336562306527775e-06, "loss": 0.7931, "step": 3902 }, { "epoch": 1.038031914893617, "grad_norm": 3.9265518188476562, "learning_rate": 9.336124453319729e-06, "loss": 0.7928, "step": 3903 }, { "epoch": 1.0382978723404255, "grad_norm": 3.5974245071411133, "learning_rate": 9.335686465946888e-06, "loss": 0.7127, "step": 3904 }, { "epoch": 1.038563829787234, "grad_norm": 3.6213388442993164, "learning_rate": 9.335248344422803e-06, "loss": 0.7669, "step": 3905 }, { "epoch": 1.0388297872340426, "grad_norm": 4.555843830108643, "learning_rate": 9.33481008876103e-06, "loss": 0.8885, "step": 3906 }, { "epoch": 1.039095744680851, "grad_norm": 4.553684234619141, "learning_rate": 9.33437169897513e-06, "loss": 0.9339, "step": 3907 }, { "epoch": 1.0393617021276595, "grad_norm": 4.390134811401367, "learning_rate": 9.333933175078665e-06, "loss": 0.887, "step": 3908 }, { "epoch": 1.039627659574468, "grad_norm": 4.3838677406311035, "learning_rate": 9.333494517085205e-06, "loss": 0.8234, "step": 3909 }, { "epoch": 1.0398936170212767, "grad_norm": 4.019488334655762, "learning_rate": 9.333055725008323e-06, "loss": 0.9096, "step": 3910 }, { "epoch": 1.0401595744680852, "grad_norm": 3.4591004848480225, "learning_rate": 9.332616798861596e-06, "loss": 0.7404, "step": 3911 }, { "epoch": 1.0404255319148936, "grad_norm": 4.587208271026611, "learning_rate": 9.332177738658603e-06, "loss": 0.8192, "step": 3912 }, { "epoch": 1.040691489361702, "grad_norm": 3.734438180923462, "learning_rate": 9.331738544412932e-06, "loss": 0.8286, "step": 3913 }, { "epoch": 1.0409574468085105, "grad_norm": 3.7644083499908447, "learning_rate": 9.33129921613817e-06, "loss": 0.8243, "step": 3914 }, { "epoch": 1.0412234042553192, "grad_norm": 3.412766456604004, "learning_rate": 9.33085975384791e-06, "loss": 0.8141, "step": 3915 }, { "epoch": 1.0414893617021277, "grad_norm": 3.1695566177368164, "learning_rate": 9.33042015755575e-06, "loss": 0.6531, "step": 3916 }, { "epoch": 1.0417553191489362, "grad_norm": 4.0986151695251465, "learning_rate": 9.329980427275293e-06, "loss": 0.8253, "step": 3917 }, { "epoch": 1.0420212765957446, "grad_norm": 3.9123079776763916, "learning_rate": 9.329540563020143e-06, "loss": 0.8211, "step": 3918 }, { "epoch": 1.0422872340425533, "grad_norm": 3.860915184020996, "learning_rate": 9.32910056480391e-06, "loss": 0.7886, "step": 3919 }, { "epoch": 1.0425531914893618, "grad_norm": 3.6465773582458496, "learning_rate": 9.328660432640211e-06, "loss": 0.7254, "step": 3920 }, { "epoch": 1.0428191489361702, "grad_norm": 4.174450874328613, "learning_rate": 9.328220166542659e-06, "loss": 0.8686, "step": 3921 }, { "epoch": 1.0430851063829787, "grad_norm": 3.563661575317383, "learning_rate": 9.32777976652488e-06, "loss": 0.8862, "step": 3922 }, { "epoch": 1.0433510638297872, "grad_norm": 3.976609468460083, "learning_rate": 9.3273392326005e-06, "loss": 0.9412, "step": 3923 }, { "epoch": 1.0436170212765958, "grad_norm": 3.979386568069458, "learning_rate": 9.32689856478315e-06, "loss": 0.767, "step": 3924 }, { "epoch": 1.0438829787234043, "grad_norm": 3.6504030227661133, "learning_rate": 9.326457763086463e-06, "loss": 0.7288, "step": 3925 }, { "epoch": 1.0441489361702128, "grad_norm": 3.5788464546203613, "learning_rate": 9.32601682752408e-06, "loss": 0.7756, "step": 3926 }, { "epoch": 1.0444148936170212, "grad_norm": 4.129055976867676, "learning_rate": 9.325575758109642e-06, "loss": 0.8129, "step": 3927 }, { "epoch": 1.0446808510638297, "grad_norm": 4.022395133972168, "learning_rate": 9.325134554856799e-06, "loss": 0.8346, "step": 3928 }, { "epoch": 1.0449468085106384, "grad_norm": 3.9106342792510986, "learning_rate": 9.3246932177792e-06, "loss": 0.7345, "step": 3929 }, { "epoch": 1.0452127659574468, "grad_norm": 5.765318870544434, "learning_rate": 9.324251746890501e-06, "loss": 1.0247, "step": 3930 }, { "epoch": 1.0454787234042553, "grad_norm": 3.858736276626587, "learning_rate": 9.323810142204361e-06, "loss": 0.8736, "step": 3931 }, { "epoch": 1.0457446808510638, "grad_norm": 3.313824415206909, "learning_rate": 9.323368403734445e-06, "loss": 0.8105, "step": 3932 }, { "epoch": 1.0460106382978724, "grad_norm": 3.7220394611358643, "learning_rate": 9.32292653149442e-06, "loss": 0.7904, "step": 3933 }, { "epoch": 1.046276595744681, "grad_norm": 3.852928638458252, "learning_rate": 9.32248452549796e-06, "loss": 0.7263, "step": 3934 }, { "epoch": 1.0465425531914894, "grad_norm": 3.9275519847869873, "learning_rate": 9.322042385758738e-06, "loss": 0.8318, "step": 3935 }, { "epoch": 1.0468085106382978, "grad_norm": 4.239774227142334, "learning_rate": 9.321600112290439e-06, "loss": 0.7238, "step": 3936 }, { "epoch": 1.0470744680851063, "grad_norm": 3.672391891479492, "learning_rate": 9.321157705106741e-06, "loss": 0.87, "step": 3937 }, { "epoch": 1.047340425531915, "grad_norm": 3.510413646697998, "learning_rate": 9.320715164221338e-06, "loss": 0.7332, "step": 3938 }, { "epoch": 1.0476063829787234, "grad_norm": 3.9943974018096924, "learning_rate": 9.32027248964792e-06, "loss": 0.7492, "step": 3939 }, { "epoch": 1.047872340425532, "grad_norm": 3.3832719326019287, "learning_rate": 9.319829681400185e-06, "loss": 0.7657, "step": 3940 }, { "epoch": 1.0481382978723404, "grad_norm": 3.761160135269165, "learning_rate": 9.319386739491834e-06, "loss": 0.7968, "step": 3941 }, { "epoch": 1.048404255319149, "grad_norm": 3.9942009449005127, "learning_rate": 9.31894366393657e-06, "loss": 0.8027, "step": 3942 }, { "epoch": 1.0486702127659575, "grad_norm": 3.8257179260253906, "learning_rate": 9.318500454748105e-06, "loss": 0.8245, "step": 3943 }, { "epoch": 1.048936170212766, "grad_norm": 4.181244850158691, "learning_rate": 9.318057111940153e-06, "loss": 0.7048, "step": 3944 }, { "epoch": 1.0492021276595744, "grad_norm": 4.021924018859863, "learning_rate": 9.317613635526431e-06, "loss": 0.8669, "step": 3945 }, { "epoch": 1.049468085106383, "grad_norm": 4.112471580505371, "learning_rate": 9.317170025520656e-06, "loss": 0.7719, "step": 3946 }, { "epoch": 1.0497340425531916, "grad_norm": 4.079671859741211, "learning_rate": 9.31672628193656e-06, "loss": 0.9156, "step": 3947 }, { "epoch": 1.05, "grad_norm": 3.6803247928619385, "learning_rate": 9.31628240478787e-06, "loss": 0.741, "step": 3948 }, { "epoch": 1.0502659574468085, "grad_norm": 3.8785572052001953, "learning_rate": 9.315838394088322e-06, "loss": 0.7652, "step": 3949 }, { "epoch": 1.050531914893617, "grad_norm": 3.9115874767303467, "learning_rate": 9.31539424985165e-06, "loss": 0.8373, "step": 3950 }, { "epoch": 1.0507978723404254, "grad_norm": 4.03147029876709, "learning_rate": 9.3149499720916e-06, "loss": 0.7918, "step": 3951 }, { "epoch": 1.0510638297872341, "grad_norm": 3.7957963943481445, "learning_rate": 9.31450556082192e-06, "loss": 0.8583, "step": 3952 }, { "epoch": 1.0513297872340426, "grad_norm": 3.83341646194458, "learning_rate": 9.314061016056354e-06, "loss": 0.8166, "step": 3953 }, { "epoch": 1.051595744680851, "grad_norm": 3.7149436473846436, "learning_rate": 9.313616337808664e-06, "loss": 0.7958, "step": 3954 }, { "epoch": 1.0518617021276595, "grad_norm": 3.941300392150879, "learning_rate": 9.313171526092606e-06, "loss": 0.8765, "step": 3955 }, { "epoch": 1.0521276595744682, "grad_norm": 3.688690423965454, "learning_rate": 9.312726580921942e-06, "loss": 0.7011, "step": 3956 }, { "epoch": 1.0523936170212767, "grad_norm": 3.683009147644043, "learning_rate": 9.31228150231044e-06, "loss": 0.7307, "step": 3957 }, { "epoch": 1.0526595744680851, "grad_norm": 3.816660165786743, "learning_rate": 9.311836290271872e-06, "loss": 0.8001, "step": 3958 }, { "epoch": 1.0529255319148936, "grad_norm": 3.8870654106140137, "learning_rate": 9.311390944820012e-06, "loss": 0.7563, "step": 3959 }, { "epoch": 1.053191489361702, "grad_norm": 4.011544704437256, "learning_rate": 9.31094546596864e-06, "loss": 0.946, "step": 3960 }, { "epoch": 1.0534574468085107, "grad_norm": 4.572283744812012, "learning_rate": 9.31049985373154e-06, "loss": 0.8803, "step": 3961 }, { "epoch": 1.0537234042553192, "grad_norm": 3.7621991634368896, "learning_rate": 9.310054108122499e-06, "loss": 0.8607, "step": 3962 }, { "epoch": 1.0539893617021276, "grad_norm": 3.4957644939422607, "learning_rate": 9.309608229155311e-06, "loss": 0.7627, "step": 3963 }, { "epoch": 1.054255319148936, "grad_norm": 4.007942199707031, "learning_rate": 9.30916221684377e-06, "loss": 0.7599, "step": 3964 }, { "epoch": 1.0545212765957448, "grad_norm": 3.790900945663452, "learning_rate": 9.308716071201676e-06, "loss": 0.6845, "step": 3965 }, { "epoch": 1.0547872340425533, "grad_norm": 4.06134557723999, "learning_rate": 9.308269792242833e-06, "loss": 0.8446, "step": 3966 }, { "epoch": 1.0550531914893617, "grad_norm": 3.927212715148926, "learning_rate": 9.30782337998105e-06, "loss": 0.8009, "step": 3967 }, { "epoch": 1.0553191489361702, "grad_norm": 3.9333722591400146, "learning_rate": 9.307376834430142e-06, "loss": 0.8184, "step": 3968 }, { "epoch": 1.0555851063829786, "grad_norm": 4.4977288246154785, "learning_rate": 9.306930155603923e-06, "loss": 0.841, "step": 3969 }, { "epoch": 1.0558510638297873, "grad_norm": 3.587890386581421, "learning_rate": 9.306483343516212e-06, "loss": 0.6937, "step": 3970 }, { "epoch": 1.0561170212765958, "grad_norm": 4.001445293426514, "learning_rate": 9.30603639818084e-06, "loss": 0.8711, "step": 3971 }, { "epoch": 1.0563829787234043, "grad_norm": 3.6268887519836426, "learning_rate": 9.30558931961163e-06, "loss": 0.7053, "step": 3972 }, { "epoch": 1.0566489361702127, "grad_norm": 3.929903030395508, "learning_rate": 9.305142107822415e-06, "loss": 0.8549, "step": 3973 }, { "epoch": 1.0569148936170212, "grad_norm": 3.7672524452209473, "learning_rate": 9.304694762827038e-06, "loss": 0.6872, "step": 3974 }, { "epoch": 1.0571808510638299, "grad_norm": 4.7689738273620605, "learning_rate": 9.304247284639335e-06, "loss": 0.8544, "step": 3975 }, { "epoch": 1.0574468085106383, "grad_norm": 3.8088295459747314, "learning_rate": 9.303799673273153e-06, "loss": 0.7047, "step": 3976 }, { "epoch": 1.0577127659574468, "grad_norm": 4.246236324310303, "learning_rate": 9.303351928742344e-06, "loss": 0.7887, "step": 3977 }, { "epoch": 1.0579787234042553, "grad_norm": 3.864558696746826, "learning_rate": 9.302904051060758e-06, "loss": 0.828, "step": 3978 }, { "epoch": 1.058244680851064, "grad_norm": 4.24592399597168, "learning_rate": 9.302456040242257e-06, "loss": 0.7851, "step": 3979 }, { "epoch": 1.0585106382978724, "grad_norm": 4.1537909507751465, "learning_rate": 9.302007896300697e-06, "loss": 0.8281, "step": 3980 }, { "epoch": 1.0587765957446809, "grad_norm": 4.180373668670654, "learning_rate": 9.30155961924995e-06, "loss": 0.8334, "step": 3981 }, { "epoch": 1.0590425531914893, "grad_norm": 3.3669097423553467, "learning_rate": 9.301111209103883e-06, "loss": 0.745, "step": 3982 }, { "epoch": 1.0593085106382978, "grad_norm": 3.8249645233154297, "learning_rate": 9.300662665876373e-06, "loss": 0.8035, "step": 3983 }, { "epoch": 1.0595744680851065, "grad_norm": 3.8265540599823, "learning_rate": 9.300213989581294e-06, "loss": 0.708, "step": 3984 }, { "epoch": 1.059840425531915, "grad_norm": 4.226235866546631, "learning_rate": 9.299765180232534e-06, "loss": 0.8594, "step": 3985 }, { "epoch": 1.0601063829787234, "grad_norm": 4.107953071594238, "learning_rate": 9.299316237843976e-06, "loss": 0.8162, "step": 3986 }, { "epoch": 1.0603723404255319, "grad_norm": 3.8606715202331543, "learning_rate": 9.298867162429511e-06, "loss": 0.7562, "step": 3987 }, { "epoch": 1.0606382978723403, "grad_norm": 3.6489405632019043, "learning_rate": 9.298417954003036e-06, "loss": 0.7331, "step": 3988 }, { "epoch": 1.060904255319149, "grad_norm": 4.5174150466918945, "learning_rate": 9.297968612578448e-06, "loss": 0.8392, "step": 3989 }, { "epoch": 1.0611702127659575, "grad_norm": 3.8880250453948975, "learning_rate": 9.29751913816965e-06, "loss": 0.8565, "step": 3990 }, { "epoch": 1.061436170212766, "grad_norm": 3.8482306003570557, "learning_rate": 9.297069530790552e-06, "loss": 0.6222, "step": 3991 }, { "epoch": 1.0617021276595744, "grad_norm": 3.9345664978027344, "learning_rate": 9.296619790455062e-06, "loss": 0.7166, "step": 3992 }, { "epoch": 1.061968085106383, "grad_norm": 4.360013961791992, "learning_rate": 9.296169917177099e-06, "loss": 0.7584, "step": 3993 }, { "epoch": 1.0622340425531915, "grad_norm": 3.7796449661254883, "learning_rate": 9.295719910970577e-06, "loss": 0.8688, "step": 3994 }, { "epoch": 1.0625, "grad_norm": 3.968502998352051, "learning_rate": 9.295269771849426e-06, "loss": 0.7795, "step": 3995 }, { "epoch": 1.0627659574468085, "grad_norm": 4.514654636383057, "learning_rate": 9.294819499827572e-06, "loss": 0.8955, "step": 3996 }, { "epoch": 1.063031914893617, "grad_norm": 3.8706483840942383, "learning_rate": 9.294369094918945e-06, "loss": 0.7875, "step": 3997 }, { "epoch": 1.0632978723404256, "grad_norm": 3.6928679943084717, "learning_rate": 9.293918557137483e-06, "loss": 0.7198, "step": 3998 }, { "epoch": 1.063563829787234, "grad_norm": 3.9840540885925293, "learning_rate": 9.293467886497123e-06, "loss": 0.8831, "step": 3999 }, { "epoch": 1.0638297872340425, "grad_norm": 4.153161525726318, "learning_rate": 9.293017083011814e-06, "loss": 0.8204, "step": 4000 }, { "epoch": 1.0638297872340425, "eval_loss": 1.3173630237579346, "eval_runtime": 13.912, "eval_samples_per_second": 28.752, "eval_steps_per_second": 3.594, "step": 4000 }, { "epoch": 1.064095744680851, "grad_norm": 3.50370717048645, "learning_rate": 9.2925661466955e-06, "loss": 0.6799, "step": 4001 }, { "epoch": 1.0643617021276595, "grad_norm": 3.481992244720459, "learning_rate": 9.292115077562138e-06, "loss": 0.6651, "step": 4002 }, { "epoch": 1.0646276595744681, "grad_norm": 3.986703634262085, "learning_rate": 9.291663875625681e-06, "loss": 0.713, "step": 4003 }, { "epoch": 1.0648936170212766, "grad_norm": 3.7703604698181152, "learning_rate": 9.291212540900091e-06, "loss": 0.8728, "step": 4004 }, { "epoch": 1.065159574468085, "grad_norm": 3.9758448600769043, "learning_rate": 9.290761073399333e-06, "loss": 0.8273, "step": 4005 }, { "epoch": 1.0654255319148935, "grad_norm": 3.999802350997925, "learning_rate": 9.290309473137376e-06, "loss": 0.8826, "step": 4006 }, { "epoch": 1.0656914893617022, "grad_norm": 4.072256088256836, "learning_rate": 9.289857740128192e-06, "loss": 0.8037, "step": 4007 }, { "epoch": 1.0659574468085107, "grad_norm": 3.619701623916626, "learning_rate": 9.289405874385759e-06, "loss": 0.6833, "step": 4008 }, { "epoch": 1.0662234042553191, "grad_norm": 4.227363586425781, "learning_rate": 9.288953875924057e-06, "loss": 0.8688, "step": 4009 }, { "epoch": 1.0664893617021276, "grad_norm": 3.589017629623413, "learning_rate": 9.288501744757073e-06, "loss": 0.6888, "step": 4010 }, { "epoch": 1.0667553191489363, "grad_norm": 3.9024956226348877, "learning_rate": 9.288049480898797e-06, "loss": 0.8349, "step": 4011 }, { "epoch": 1.0670212765957447, "grad_norm": 3.854668617248535, "learning_rate": 9.287597084363222e-06, "loss": 0.8158, "step": 4012 }, { "epoch": 1.0672872340425532, "grad_norm": 3.511909008026123, "learning_rate": 9.287144555164343e-06, "loss": 0.8076, "step": 4013 }, { "epoch": 1.0675531914893617, "grad_norm": 4.2021098136901855, "learning_rate": 9.286691893316165e-06, "loss": 0.8434, "step": 4014 }, { "epoch": 1.0678191489361701, "grad_norm": 3.823734760284424, "learning_rate": 9.286239098832693e-06, "loss": 0.8124, "step": 4015 }, { "epoch": 1.0680851063829788, "grad_norm": 3.6504952907562256, "learning_rate": 9.285786171727938e-06, "loss": 0.7402, "step": 4016 }, { "epoch": 1.0683510638297873, "grad_norm": 3.7579758167266846, "learning_rate": 9.28533311201591e-06, "loss": 0.8335, "step": 4017 }, { "epoch": 1.0686170212765957, "grad_norm": 3.902036428451538, "learning_rate": 9.284879919710631e-06, "loss": 0.8564, "step": 4018 }, { "epoch": 1.0688829787234042, "grad_norm": 3.6956422328948975, "learning_rate": 9.284426594826124e-06, "loss": 0.7766, "step": 4019 }, { "epoch": 1.0691489361702127, "grad_norm": 3.866909980773926, "learning_rate": 9.283973137376414e-06, "loss": 0.8988, "step": 4020 }, { "epoch": 1.0694148936170214, "grad_norm": 4.163184642791748, "learning_rate": 9.28351954737553e-06, "loss": 0.9235, "step": 4021 }, { "epoch": 1.0696808510638298, "grad_norm": 4.208329200744629, "learning_rate": 9.28306582483751e-06, "loss": 0.7734, "step": 4022 }, { "epoch": 1.0699468085106383, "grad_norm": 4.030316352844238, "learning_rate": 9.28261196977639e-06, "loss": 0.8427, "step": 4023 }, { "epoch": 1.0702127659574467, "grad_norm": 3.842853307723999, "learning_rate": 9.282157982206212e-06, "loss": 0.8647, "step": 4024 }, { "epoch": 1.0704787234042552, "grad_norm": 4.306194305419922, "learning_rate": 9.281703862141024e-06, "loss": 0.7107, "step": 4025 }, { "epoch": 1.070744680851064, "grad_norm": 4.034607887268066, "learning_rate": 9.28124960959488e-06, "loss": 0.76, "step": 4026 }, { "epoch": 1.0710106382978724, "grad_norm": 4.018486022949219, "learning_rate": 9.280795224581832e-06, "loss": 0.8058, "step": 4027 }, { "epoch": 1.0712765957446808, "grad_norm": 4.060681343078613, "learning_rate": 9.280340707115938e-06, "loss": 0.772, "step": 4028 }, { "epoch": 1.0715425531914893, "grad_norm": 3.8870697021484375, "learning_rate": 9.279886057211264e-06, "loss": 0.8036, "step": 4029 }, { "epoch": 1.071808510638298, "grad_norm": 3.455979585647583, "learning_rate": 9.279431274881876e-06, "loss": 0.6292, "step": 4030 }, { "epoch": 1.0720744680851064, "grad_norm": 3.5263242721557617, "learning_rate": 9.278976360141848e-06, "loss": 0.7937, "step": 4031 }, { "epoch": 1.0723404255319149, "grad_norm": 4.214826583862305, "learning_rate": 9.27852131300525e-06, "loss": 0.8888, "step": 4032 }, { "epoch": 1.0726063829787233, "grad_norm": 3.6315364837646484, "learning_rate": 9.278066133486167e-06, "loss": 0.7101, "step": 4033 }, { "epoch": 1.0728723404255318, "grad_norm": 4.311771869659424, "learning_rate": 9.277610821598682e-06, "loss": 0.8687, "step": 4034 }, { "epoch": 1.0731382978723405, "grad_norm": 3.720752716064453, "learning_rate": 9.277155377356881e-06, "loss": 0.709, "step": 4035 }, { "epoch": 1.073404255319149, "grad_norm": 3.8687169551849365, "learning_rate": 9.276699800774858e-06, "loss": 0.7483, "step": 4036 }, { "epoch": 1.0736702127659574, "grad_norm": 4.010682582855225, "learning_rate": 9.276244091866706e-06, "loss": 0.7954, "step": 4037 }, { "epoch": 1.0739361702127659, "grad_norm": 3.9716639518737793, "learning_rate": 9.27578825064653e-06, "loss": 0.8228, "step": 4038 }, { "epoch": 1.0742021276595746, "grad_norm": 3.6064131259918213, "learning_rate": 9.275332277128428e-06, "loss": 0.8019, "step": 4039 }, { "epoch": 1.074468085106383, "grad_norm": 3.986684560775757, "learning_rate": 9.274876171326514e-06, "loss": 0.7684, "step": 4040 }, { "epoch": 1.0747340425531915, "grad_norm": 3.6139955520629883, "learning_rate": 9.274419933254897e-06, "loss": 0.7885, "step": 4041 }, { "epoch": 1.075, "grad_norm": 4.203228950500488, "learning_rate": 9.273963562927695e-06, "loss": 0.8082, "step": 4042 }, { "epoch": 1.0752659574468084, "grad_norm": 4.109843730926514, "learning_rate": 9.27350706035903e-06, "loss": 0.6948, "step": 4043 }, { "epoch": 1.075531914893617, "grad_norm": 3.8464603424072266, "learning_rate": 9.273050425563023e-06, "loss": 0.8871, "step": 4044 }, { "epoch": 1.0757978723404256, "grad_norm": 3.8080790042877197, "learning_rate": 9.272593658553806e-06, "loss": 0.7375, "step": 4045 }, { "epoch": 1.076063829787234, "grad_norm": 3.829904556274414, "learning_rate": 9.272136759345512e-06, "loss": 0.7572, "step": 4046 }, { "epoch": 1.0763297872340425, "grad_norm": 4.1604390144348145, "learning_rate": 9.271679727952274e-06, "loss": 0.7503, "step": 4047 }, { "epoch": 1.076595744680851, "grad_norm": 3.538896322250366, "learning_rate": 9.271222564388238e-06, "loss": 0.7042, "step": 4048 }, { "epoch": 1.0768617021276596, "grad_norm": 3.960331439971924, "learning_rate": 9.270765268667547e-06, "loss": 0.8119, "step": 4049 }, { "epoch": 1.077127659574468, "grad_norm": 4.355499267578125, "learning_rate": 9.270307840804349e-06, "loss": 0.8219, "step": 4050 }, { "epoch": 1.0773936170212766, "grad_norm": 4.223673343658447, "learning_rate": 9.2698502808128e-06, "loss": 0.782, "step": 4051 }, { "epoch": 1.077659574468085, "grad_norm": 3.8911452293395996, "learning_rate": 9.269392588707056e-06, "loss": 0.8562, "step": 4052 }, { "epoch": 1.0779255319148937, "grad_norm": 3.9379541873931885, "learning_rate": 9.268934764501279e-06, "loss": 0.8103, "step": 4053 }, { "epoch": 1.0781914893617022, "grad_norm": 4.371243000030518, "learning_rate": 9.268476808209635e-06, "loss": 0.7773, "step": 4054 }, { "epoch": 1.0784574468085106, "grad_norm": 3.5743019580841064, "learning_rate": 9.26801871984629e-06, "loss": 0.8976, "step": 4055 }, { "epoch": 1.078723404255319, "grad_norm": 3.959336280822754, "learning_rate": 9.267560499425425e-06, "loss": 0.8294, "step": 4056 }, { "epoch": 1.0789893617021276, "grad_norm": 3.2908687591552734, "learning_rate": 9.267102146961211e-06, "loss": 0.7021, "step": 4057 }, { "epoch": 1.0792553191489362, "grad_norm": 3.952495574951172, "learning_rate": 9.266643662467834e-06, "loss": 0.8368, "step": 4058 }, { "epoch": 1.0795212765957447, "grad_norm": 3.691890239715576, "learning_rate": 9.266185045959478e-06, "loss": 0.7606, "step": 4059 }, { "epoch": 1.0797872340425532, "grad_norm": 4.092920780181885, "learning_rate": 9.265726297450332e-06, "loss": 0.7791, "step": 4060 }, { "epoch": 1.0800531914893616, "grad_norm": 4.004536151885986, "learning_rate": 9.265267416954595e-06, "loss": 0.7055, "step": 4061 }, { "epoch": 1.0803191489361703, "grad_norm": 3.7672064304351807, "learning_rate": 9.26480840448646e-06, "loss": 0.7552, "step": 4062 }, { "epoch": 1.0805851063829788, "grad_norm": 3.8815436363220215, "learning_rate": 9.264349260060134e-06, "loss": 0.7602, "step": 4063 }, { "epoch": 1.0808510638297872, "grad_norm": 4.021637916564941, "learning_rate": 9.26388998368982e-06, "loss": 0.7595, "step": 4064 }, { "epoch": 1.0811170212765957, "grad_norm": 3.9159035682678223, "learning_rate": 9.26343057538973e-06, "loss": 0.7554, "step": 4065 }, { "epoch": 1.0813829787234042, "grad_norm": 3.9444377422332764, "learning_rate": 9.26297103517408e-06, "loss": 0.6694, "step": 4066 }, { "epoch": 1.0816489361702128, "grad_norm": 3.8889427185058594, "learning_rate": 9.262511363057085e-06, "loss": 0.7356, "step": 4067 }, { "epoch": 1.0819148936170213, "grad_norm": 4.03524923324585, "learning_rate": 9.262051559052972e-06, "loss": 0.6715, "step": 4068 }, { "epoch": 1.0821808510638298, "grad_norm": 4.430936336517334, "learning_rate": 9.261591623175965e-06, "loss": 0.9173, "step": 4069 }, { "epoch": 1.0824468085106382, "grad_norm": 3.784855604171753, "learning_rate": 9.261131555440295e-06, "loss": 0.8472, "step": 4070 }, { "epoch": 1.0827127659574467, "grad_norm": 3.9647388458251953, "learning_rate": 9.260671355860196e-06, "loss": 0.6908, "step": 4071 }, { "epoch": 1.0829787234042554, "grad_norm": 4.330158710479736, "learning_rate": 9.260211024449913e-06, "loss": 0.7744, "step": 4072 }, { "epoch": 1.0832446808510638, "grad_norm": 3.934960126876831, "learning_rate": 9.259750561223682e-06, "loss": 0.7585, "step": 4073 }, { "epoch": 1.0835106382978723, "grad_norm": 4.234976291656494, "learning_rate": 9.259289966195754e-06, "loss": 0.7642, "step": 4074 }, { "epoch": 1.0837765957446808, "grad_norm": 4.297840118408203, "learning_rate": 9.25882923938038e-06, "loss": 0.8493, "step": 4075 }, { "epoch": 1.0840425531914895, "grad_norm": 3.9343340396881104, "learning_rate": 9.258368380791818e-06, "loss": 0.8649, "step": 4076 }, { "epoch": 1.084308510638298, "grad_norm": 4.02085018157959, "learning_rate": 9.257907390444322e-06, "loss": 0.7595, "step": 4077 }, { "epoch": 1.0845744680851064, "grad_norm": 4.010712146759033, "learning_rate": 9.257446268352158e-06, "loss": 0.9151, "step": 4078 }, { "epoch": 1.0848404255319148, "grad_norm": 3.8062400817871094, "learning_rate": 9.256985014529595e-06, "loss": 0.8318, "step": 4079 }, { "epoch": 1.0851063829787233, "grad_norm": 4.219789505004883, "learning_rate": 9.256523628990903e-06, "loss": 0.7924, "step": 4080 }, { "epoch": 1.085372340425532, "grad_norm": 3.7686777114868164, "learning_rate": 9.25606211175036e-06, "loss": 0.8027, "step": 4081 }, { "epoch": 1.0856382978723405, "grad_norm": 3.6773087978363037, "learning_rate": 9.255600462822241e-06, "loss": 0.7568, "step": 4082 }, { "epoch": 1.085904255319149, "grad_norm": 3.480522394180298, "learning_rate": 9.255138682220837e-06, "loss": 0.7156, "step": 4083 }, { "epoch": 1.0861702127659574, "grad_norm": 3.8398611545562744, "learning_rate": 9.254676769960429e-06, "loss": 0.7162, "step": 4084 }, { "epoch": 1.086436170212766, "grad_norm": 3.8505029678344727, "learning_rate": 9.254214726055314e-06, "loss": 0.8488, "step": 4085 }, { "epoch": 1.0867021276595745, "grad_norm": 4.238323211669922, "learning_rate": 9.253752550519787e-06, "loss": 0.8742, "step": 4086 }, { "epoch": 1.086968085106383, "grad_norm": 3.7396814823150635, "learning_rate": 9.253290243368149e-06, "loss": 0.8127, "step": 4087 }, { "epoch": 1.0872340425531914, "grad_norm": 4.44807767868042, "learning_rate": 9.2528278046147e-06, "loss": 0.8144, "step": 4088 }, { "epoch": 1.0875, "grad_norm": 3.88287091255188, "learning_rate": 9.252365234273754e-06, "loss": 0.691, "step": 4089 }, { "epoch": 1.0877659574468086, "grad_norm": 3.7738873958587646, "learning_rate": 9.251902532359622e-06, "loss": 0.7662, "step": 4090 }, { "epoch": 1.088031914893617, "grad_norm": 3.789278745651245, "learning_rate": 9.251439698886618e-06, "loss": 0.7773, "step": 4091 }, { "epoch": 1.0882978723404255, "grad_norm": 3.8501172065734863, "learning_rate": 9.250976733869065e-06, "loss": 0.795, "step": 4092 }, { "epoch": 1.088563829787234, "grad_norm": 4.324002265930176, "learning_rate": 9.250513637321287e-06, "loss": 0.7957, "step": 4093 }, { "epoch": 1.0888297872340424, "grad_norm": 3.598450183868408, "learning_rate": 9.250050409257612e-06, "loss": 0.8029, "step": 4094 }, { "epoch": 1.0890957446808511, "grad_norm": 3.749985694885254, "learning_rate": 9.249587049692375e-06, "loss": 0.7377, "step": 4095 }, { "epoch": 1.0893617021276596, "grad_norm": 3.7555527687072754, "learning_rate": 9.24912355863991e-06, "loss": 0.7276, "step": 4096 }, { "epoch": 1.089627659574468, "grad_norm": 3.826099395751953, "learning_rate": 9.248659936114558e-06, "loss": 0.9592, "step": 4097 }, { "epoch": 1.0898936170212765, "grad_norm": 4.4053263664245605, "learning_rate": 9.248196182130669e-06, "loss": 0.846, "step": 4098 }, { "epoch": 1.0901595744680852, "grad_norm": 3.7693631649017334, "learning_rate": 9.247732296702586e-06, "loss": 0.8702, "step": 4099 }, { "epoch": 1.0904255319148937, "grad_norm": 3.8193347454071045, "learning_rate": 9.247268279844666e-06, "loss": 0.8124, "step": 4100 }, { "epoch": 1.0906914893617021, "grad_norm": 3.5872762203216553, "learning_rate": 9.246804131571263e-06, "loss": 0.8409, "step": 4101 }, { "epoch": 1.0909574468085106, "grad_norm": 3.6679608821868896, "learning_rate": 9.246339851896742e-06, "loss": 0.8331, "step": 4102 }, { "epoch": 1.091223404255319, "grad_norm": 3.838644027709961, "learning_rate": 9.245875440835466e-06, "loss": 0.8683, "step": 4103 }, { "epoch": 1.0914893617021277, "grad_norm": 4.146610736846924, "learning_rate": 9.245410898401806e-06, "loss": 0.7721, "step": 4104 }, { "epoch": 1.0917553191489362, "grad_norm": 3.685303211212158, "learning_rate": 9.244946224610132e-06, "loss": 0.6993, "step": 4105 }, { "epoch": 1.0920212765957447, "grad_norm": 3.9541261196136475, "learning_rate": 9.244481419474824e-06, "loss": 0.7942, "step": 4106 }, { "epoch": 1.0922872340425531, "grad_norm": 4.122397422790527, "learning_rate": 9.244016483010266e-06, "loss": 0.7709, "step": 4107 }, { "epoch": 1.0925531914893618, "grad_norm": 4.400294303894043, "learning_rate": 9.24355141523084e-06, "loss": 0.8702, "step": 4108 }, { "epoch": 1.0928191489361703, "grad_norm": 4.555760383605957, "learning_rate": 9.243086216150938e-06, "loss": 0.8594, "step": 4109 }, { "epoch": 1.0930851063829787, "grad_norm": 4.033708095550537, "learning_rate": 9.242620885784952e-06, "loss": 0.9066, "step": 4110 }, { "epoch": 1.0933510638297872, "grad_norm": 3.908421754837036, "learning_rate": 9.24215542414728e-06, "loss": 0.7454, "step": 4111 }, { "epoch": 1.0936170212765957, "grad_norm": 3.8368232250213623, "learning_rate": 9.241689831252327e-06, "loss": 0.6895, "step": 4112 }, { "epoch": 1.0938829787234043, "grad_norm": 3.6774628162384033, "learning_rate": 9.241224107114495e-06, "loss": 0.8634, "step": 4113 }, { "epoch": 1.0941489361702128, "grad_norm": 4.185787677764893, "learning_rate": 9.240758251748195e-06, "loss": 0.8685, "step": 4114 }, { "epoch": 1.0944148936170213, "grad_norm": 3.8751626014709473, "learning_rate": 9.240292265167843e-06, "loss": 0.86, "step": 4115 }, { "epoch": 1.0946808510638297, "grad_norm": 4.215353965759277, "learning_rate": 9.239826147387857e-06, "loss": 0.8188, "step": 4116 }, { "epoch": 1.0949468085106382, "grad_norm": 3.7287204265594482, "learning_rate": 9.239359898422656e-06, "loss": 0.71, "step": 4117 }, { "epoch": 1.0952127659574469, "grad_norm": 3.8123693466186523, "learning_rate": 9.238893518286668e-06, "loss": 0.7727, "step": 4118 }, { "epoch": 1.0954787234042553, "grad_norm": 3.990419626235962, "learning_rate": 9.238427006994325e-06, "loss": 0.7953, "step": 4119 }, { "epoch": 1.0957446808510638, "grad_norm": 3.976417303085327, "learning_rate": 9.237960364560063e-06, "loss": 0.8596, "step": 4120 }, { "epoch": 1.0960106382978723, "grad_norm": 4.219186305999756, "learning_rate": 9.237493590998315e-06, "loss": 0.809, "step": 4121 }, { "epoch": 1.096276595744681, "grad_norm": 3.693594455718994, "learning_rate": 9.237026686323527e-06, "loss": 0.8066, "step": 4122 }, { "epoch": 1.0965425531914894, "grad_norm": 3.7492263317108154, "learning_rate": 9.236559650550143e-06, "loss": 0.7525, "step": 4123 }, { "epoch": 1.0968085106382979, "grad_norm": 4.333737850189209, "learning_rate": 9.236092483692617e-06, "loss": 0.8718, "step": 4124 }, { "epoch": 1.0970744680851063, "grad_norm": 3.505357503890991, "learning_rate": 9.235625185765403e-06, "loss": 0.8482, "step": 4125 }, { "epoch": 1.0973404255319148, "grad_norm": 4.302443027496338, "learning_rate": 9.235157756782957e-06, "loss": 1.0046, "step": 4126 }, { "epoch": 1.0976063829787235, "grad_norm": 3.8847270011901855, "learning_rate": 9.234690196759746e-06, "loss": 0.8921, "step": 4127 }, { "epoch": 1.097872340425532, "grad_norm": 3.976154327392578, "learning_rate": 9.234222505710232e-06, "loss": 0.7338, "step": 4128 }, { "epoch": 1.0981382978723404, "grad_norm": 3.829082489013672, "learning_rate": 9.233754683648891e-06, "loss": 0.7554, "step": 4129 }, { "epoch": 1.0984042553191489, "grad_norm": 3.693549633026123, "learning_rate": 9.233286730590195e-06, "loss": 0.7555, "step": 4130 }, { "epoch": 1.0986702127659576, "grad_norm": 3.9820609092712402, "learning_rate": 9.232818646548622e-06, "loss": 0.8567, "step": 4131 }, { "epoch": 1.098936170212766, "grad_norm": 3.9395439624786377, "learning_rate": 9.232350431538656e-06, "loss": 0.7728, "step": 4132 }, { "epoch": 1.0992021276595745, "grad_norm": 4.385442733764648, "learning_rate": 9.231882085574788e-06, "loss": 0.7803, "step": 4133 }, { "epoch": 1.099468085106383, "grad_norm": 4.260448932647705, "learning_rate": 9.231413608671504e-06, "loss": 0.8111, "step": 4134 }, { "epoch": 1.0997340425531914, "grad_norm": 3.9470431804656982, "learning_rate": 9.2309450008433e-06, "loss": 0.718, "step": 4135 }, { "epoch": 1.1, "grad_norm": 3.897451877593994, "learning_rate": 9.230476262104678e-06, "loss": 0.7257, "step": 4136 }, { "epoch": 1.1002659574468086, "grad_norm": 4.178949356079102, "learning_rate": 9.23000739247014e-06, "loss": 0.8704, "step": 4137 }, { "epoch": 1.100531914893617, "grad_norm": 3.9306554794311523, "learning_rate": 9.22953839195419e-06, "loss": 0.8856, "step": 4138 }, { "epoch": 1.1007978723404255, "grad_norm": 3.2699522972106934, "learning_rate": 9.229069260571346e-06, "loss": 0.7263, "step": 4139 }, { "epoch": 1.101063829787234, "grad_norm": 3.980687141418457, "learning_rate": 9.228599998336119e-06, "loss": 0.8805, "step": 4140 }, { "epoch": 1.1013297872340426, "grad_norm": 4.091682434082031, "learning_rate": 9.228130605263028e-06, "loss": 0.8572, "step": 4141 }, { "epoch": 1.101595744680851, "grad_norm": 3.8642654418945312, "learning_rate": 9.2276610813666e-06, "loss": 0.7285, "step": 4142 }, { "epoch": 1.1018617021276595, "grad_norm": 3.6476948261260986, "learning_rate": 9.227191426661359e-06, "loss": 0.7736, "step": 4143 }, { "epoch": 1.102127659574468, "grad_norm": 3.8674888610839844, "learning_rate": 9.22672164116184e-06, "loss": 0.6885, "step": 4144 }, { "epoch": 1.1023936170212767, "grad_norm": 3.6890833377838135, "learning_rate": 9.226251724882576e-06, "loss": 0.9683, "step": 4145 }, { "epoch": 1.1026595744680852, "grad_norm": 3.688188314437866, "learning_rate": 9.225781677838108e-06, "loss": 0.8236, "step": 4146 }, { "epoch": 1.1029255319148936, "grad_norm": 4.241778373718262, "learning_rate": 9.22531150004298e-06, "loss": 0.7666, "step": 4147 }, { "epoch": 1.103191489361702, "grad_norm": 3.8804636001586914, "learning_rate": 9.22484119151174e-06, "loss": 0.7547, "step": 4148 }, { "epoch": 1.1034574468085105, "grad_norm": 3.8728346824645996, "learning_rate": 9.224370752258938e-06, "loss": 0.7856, "step": 4149 }, { "epoch": 1.1037234042553192, "grad_norm": 3.4745118618011475, "learning_rate": 9.223900182299132e-06, "loss": 0.8213, "step": 4150 }, { "epoch": 1.1039893617021277, "grad_norm": 3.9133832454681396, "learning_rate": 9.223429481646881e-06, "loss": 0.8894, "step": 4151 }, { "epoch": 1.1042553191489362, "grad_norm": 3.5466485023498535, "learning_rate": 9.22295865031675e-06, "loss": 0.7024, "step": 4152 }, { "epoch": 1.1045212765957446, "grad_norm": 4.195438385009766, "learning_rate": 9.222487688323306e-06, "loss": 0.9108, "step": 4153 }, { "epoch": 1.1047872340425533, "grad_norm": 4.125967025756836, "learning_rate": 9.222016595681122e-06, "loss": 0.7909, "step": 4154 }, { "epoch": 1.1050531914893618, "grad_norm": 3.8983302116394043, "learning_rate": 9.221545372404774e-06, "loss": 0.8179, "step": 4155 }, { "epoch": 1.1053191489361702, "grad_norm": 4.264431953430176, "learning_rate": 9.22107401850884e-06, "loss": 0.8438, "step": 4156 }, { "epoch": 1.1055851063829787, "grad_norm": 3.9519243240356445, "learning_rate": 9.220602534007908e-06, "loss": 0.7254, "step": 4157 }, { "epoch": 1.1058510638297872, "grad_norm": 4.435789585113525, "learning_rate": 9.220130918916563e-06, "loss": 0.8453, "step": 4158 }, { "epoch": 1.1061170212765958, "grad_norm": 4.175622463226318, "learning_rate": 9.2196591732494e-06, "loss": 0.8253, "step": 4159 }, { "epoch": 1.1063829787234043, "grad_norm": 3.691840410232544, "learning_rate": 9.219187297021015e-06, "loss": 0.7372, "step": 4160 }, { "epoch": 1.1066489361702128, "grad_norm": 3.997159957885742, "learning_rate": 9.218715290246007e-06, "loss": 0.9002, "step": 4161 }, { "epoch": 1.1069148936170212, "grad_norm": 3.8894736766815186, "learning_rate": 9.21824315293898e-06, "loss": 0.8466, "step": 4162 }, { "epoch": 1.1071808510638297, "grad_norm": 4.081361293792725, "learning_rate": 9.217770885114544e-06, "loss": 0.8159, "step": 4163 }, { "epoch": 1.1074468085106384, "grad_norm": 3.6552507877349854, "learning_rate": 9.21729848678731e-06, "loss": 0.7608, "step": 4164 }, { "epoch": 1.1077127659574468, "grad_norm": 3.844689130783081, "learning_rate": 9.216825957971898e-06, "loss": 0.8599, "step": 4165 }, { "epoch": 1.1079787234042553, "grad_norm": 3.742281198501587, "learning_rate": 9.216353298682925e-06, "loss": 0.8188, "step": 4166 }, { "epoch": 1.1082446808510638, "grad_norm": 4.145520210266113, "learning_rate": 9.215880508935016e-06, "loss": 0.8485, "step": 4167 }, { "epoch": 1.1085106382978724, "grad_norm": 4.048991680145264, "learning_rate": 9.2154075887428e-06, "loss": 0.8058, "step": 4168 }, { "epoch": 1.108776595744681, "grad_norm": 3.9312491416931152, "learning_rate": 9.214934538120912e-06, "loss": 0.8728, "step": 4169 }, { "epoch": 1.1090425531914894, "grad_norm": 4.000396251678467, "learning_rate": 9.214461357083986e-06, "loss": 0.8695, "step": 4170 }, { "epoch": 1.1093085106382978, "grad_norm": 4.0020904541015625, "learning_rate": 9.213988045646664e-06, "loss": 0.7386, "step": 4171 }, { "epoch": 1.1095744680851063, "grad_norm": 3.527221441268921, "learning_rate": 9.21351460382359e-06, "loss": 0.8856, "step": 4172 }, { "epoch": 1.109840425531915, "grad_norm": 3.984145164489746, "learning_rate": 9.213041031629413e-06, "loss": 0.7518, "step": 4173 }, { "epoch": 1.1101063829787234, "grad_norm": 3.6558425426483154, "learning_rate": 9.212567329078787e-06, "loss": 0.7465, "step": 4174 }, { "epoch": 1.110372340425532, "grad_norm": 4.261702060699463, "learning_rate": 9.21209349618637e-06, "loss": 0.8813, "step": 4175 }, { "epoch": 1.1106382978723404, "grad_norm": 3.556643486022949, "learning_rate": 9.211619532966817e-06, "loss": 0.8007, "step": 4176 }, { "epoch": 1.110904255319149, "grad_norm": 3.8246734142303467, "learning_rate": 9.211145439434801e-06, "loss": 0.7599, "step": 4177 }, { "epoch": 1.1111702127659575, "grad_norm": 3.6221678256988525, "learning_rate": 9.210671215604985e-06, "loss": 0.8526, "step": 4178 }, { "epoch": 1.111436170212766, "grad_norm": 3.6839540004730225, "learning_rate": 9.210196861492045e-06, "loss": 0.88, "step": 4179 }, { "epoch": 1.1117021276595744, "grad_norm": 3.7845680713653564, "learning_rate": 9.209722377110657e-06, "loss": 0.7316, "step": 4180 }, { "epoch": 1.111968085106383, "grad_norm": 3.9798831939697266, "learning_rate": 9.209247762475502e-06, "loss": 0.7928, "step": 4181 }, { "epoch": 1.1122340425531916, "grad_norm": 3.394745349884033, "learning_rate": 9.208773017601265e-06, "loss": 0.7692, "step": 4182 }, { "epoch": 1.1125, "grad_norm": 3.9630630016326904, "learning_rate": 9.208298142502637e-06, "loss": 0.8699, "step": 4183 }, { "epoch": 1.1127659574468085, "grad_norm": 4.089821815490723, "learning_rate": 9.207823137194307e-06, "loss": 0.8295, "step": 4184 }, { "epoch": 1.113031914893617, "grad_norm": 3.949355125427246, "learning_rate": 9.20734800169098e-06, "loss": 0.8049, "step": 4185 }, { "epoch": 1.1132978723404254, "grad_norm": 3.588606119155884, "learning_rate": 9.206872736007348e-06, "loss": 0.7184, "step": 4186 }, { "epoch": 1.1135638297872341, "grad_norm": 4.689065933227539, "learning_rate": 9.206397340158122e-06, "loss": 0.8687, "step": 4187 }, { "epoch": 1.1138297872340426, "grad_norm": 3.685701847076416, "learning_rate": 9.20592181415801e-06, "loss": 0.7918, "step": 4188 }, { "epoch": 1.114095744680851, "grad_norm": 4.084209442138672, "learning_rate": 9.205446158021725e-06, "loss": 0.888, "step": 4189 }, { "epoch": 1.1143617021276595, "grad_norm": 3.9949495792388916, "learning_rate": 9.204970371763984e-06, "loss": 0.7975, "step": 4190 }, { "epoch": 1.1146276595744682, "grad_norm": 4.016841888427734, "learning_rate": 9.204494455399509e-06, "loss": 0.8413, "step": 4191 }, { "epoch": 1.1148936170212767, "grad_norm": 4.1810712814331055, "learning_rate": 9.204018408943026e-06, "loss": 0.7981, "step": 4192 }, { "epoch": 1.1151595744680851, "grad_norm": 3.305906295776367, "learning_rate": 9.203542232409263e-06, "loss": 0.6931, "step": 4193 }, { "epoch": 1.1154255319148936, "grad_norm": 4.138253688812256, "learning_rate": 9.203065925812955e-06, "loss": 0.7971, "step": 4194 }, { "epoch": 1.115691489361702, "grad_norm": 4.11892557144165, "learning_rate": 9.20258948916884e-06, "loss": 0.7175, "step": 4195 }, { "epoch": 1.1159574468085107, "grad_norm": 3.4274680614471436, "learning_rate": 9.202112922491657e-06, "loss": 0.7685, "step": 4196 }, { "epoch": 1.1162234042553192, "grad_norm": 3.894113540649414, "learning_rate": 9.201636225796151e-06, "loss": 0.6782, "step": 4197 }, { "epoch": 1.1164893617021276, "grad_norm": 4.417131423950195, "learning_rate": 9.201159399097077e-06, "loss": 0.7756, "step": 4198 }, { "epoch": 1.116755319148936, "grad_norm": 4.476882457733154, "learning_rate": 9.200682442409183e-06, "loss": 0.8896, "step": 4199 }, { "epoch": 1.1170212765957448, "grad_norm": 3.9255595207214355, "learning_rate": 9.200205355747228e-06, "loss": 0.669, "step": 4200 }, { "epoch": 1.1172872340425533, "grad_norm": 3.3451404571533203, "learning_rate": 9.199728139125976e-06, "loss": 0.6271, "step": 4201 }, { "epoch": 1.1175531914893617, "grad_norm": 4.113248825073242, "learning_rate": 9.199250792560187e-06, "loss": 0.8501, "step": 4202 }, { "epoch": 1.1178191489361702, "grad_norm": 3.8352253437042236, "learning_rate": 9.198773316064639e-06, "loss": 0.6881, "step": 4203 }, { "epoch": 1.1180851063829786, "grad_norm": 3.8396568298339844, "learning_rate": 9.1982957096541e-06, "loss": 0.695, "step": 4204 }, { "epoch": 1.1183510638297873, "grad_norm": 4.240661144256592, "learning_rate": 9.197817973343347e-06, "loss": 0.8287, "step": 4205 }, { "epoch": 1.1186170212765958, "grad_norm": 3.553846836090088, "learning_rate": 9.197340107147166e-06, "loss": 0.7441, "step": 4206 }, { "epoch": 1.1188829787234043, "grad_norm": 4.087765693664551, "learning_rate": 9.196862111080339e-06, "loss": 0.6896, "step": 4207 }, { "epoch": 1.1191489361702127, "grad_norm": 4.254801273345947, "learning_rate": 9.196383985157657e-06, "loss": 0.794, "step": 4208 }, { "epoch": 1.1194148936170212, "grad_norm": 3.8654487133026123, "learning_rate": 9.195905729393913e-06, "loss": 0.7891, "step": 4209 }, { "epoch": 1.1196808510638299, "grad_norm": 4.078755855560303, "learning_rate": 9.195427343803906e-06, "loss": 0.9686, "step": 4210 }, { "epoch": 1.1199468085106383, "grad_norm": 3.3730618953704834, "learning_rate": 9.19494882840244e-06, "loss": 0.7186, "step": 4211 }, { "epoch": 1.1202127659574468, "grad_norm": 3.944267511367798, "learning_rate": 9.194470183204315e-06, "loss": 0.7949, "step": 4212 }, { "epoch": 1.1204787234042553, "grad_norm": 3.8274521827697754, "learning_rate": 9.193991408224347e-06, "loss": 0.8237, "step": 4213 }, { "epoch": 1.1207446808510637, "grad_norm": 3.8445777893066406, "learning_rate": 9.193512503477345e-06, "loss": 0.7119, "step": 4214 }, { "epoch": 1.1210106382978724, "grad_norm": 4.098488807678223, "learning_rate": 9.19303346897813e-06, "loss": 0.9102, "step": 4215 }, { "epoch": 1.1212765957446809, "grad_norm": 4.096566200256348, "learning_rate": 9.192554304741522e-06, "loss": 0.8465, "step": 4216 }, { "epoch": 1.1215425531914893, "grad_norm": 3.770343065261841, "learning_rate": 9.192075010782348e-06, "loss": 0.8278, "step": 4217 }, { "epoch": 1.1218085106382978, "grad_norm": 3.843766689300537, "learning_rate": 9.191595587115439e-06, "loss": 0.8402, "step": 4218 }, { "epoch": 1.1220744680851065, "grad_norm": 4.594594478607178, "learning_rate": 9.191116033755625e-06, "loss": 0.8473, "step": 4219 }, { "epoch": 1.122340425531915, "grad_norm": 4.192259311676025, "learning_rate": 9.190636350717747e-06, "loss": 0.8356, "step": 4220 }, { "epoch": 1.1226063829787234, "grad_norm": 3.919210195541382, "learning_rate": 9.190156538016648e-06, "loss": 0.8494, "step": 4221 }, { "epoch": 1.1228723404255319, "grad_norm": 4.091637134552002, "learning_rate": 9.189676595667172e-06, "loss": 0.7264, "step": 4222 }, { "epoch": 1.1231382978723405, "grad_norm": 4.496889114379883, "learning_rate": 9.189196523684168e-06, "loss": 0.876, "step": 4223 }, { "epoch": 1.123404255319149, "grad_norm": 3.492234230041504, "learning_rate": 9.188716322082494e-06, "loss": 0.7568, "step": 4224 }, { "epoch": 1.1236702127659575, "grad_norm": 3.6598973274230957, "learning_rate": 9.188235990877004e-06, "loss": 0.683, "step": 4225 }, { "epoch": 1.123936170212766, "grad_norm": 4.073709964752197, "learning_rate": 9.18775553008256e-06, "loss": 0.7798, "step": 4226 }, { "epoch": 1.1242021276595744, "grad_norm": 4.100635528564453, "learning_rate": 9.18727493971403e-06, "loss": 0.8356, "step": 4227 }, { "epoch": 1.124468085106383, "grad_norm": 4.231848239898682, "learning_rate": 9.186794219786285e-06, "loss": 0.8528, "step": 4228 }, { "epoch": 1.1247340425531915, "grad_norm": 3.7461369037628174, "learning_rate": 9.186313370314196e-06, "loss": 0.7103, "step": 4229 }, { "epoch": 1.125, "grad_norm": 3.610039234161377, "learning_rate": 9.185832391312644e-06, "loss": 0.7271, "step": 4230 }, { "epoch": 1.1252659574468085, "grad_norm": 3.5538463592529297, "learning_rate": 9.18535128279651e-06, "loss": 0.82, "step": 4231 }, { "epoch": 1.125531914893617, "grad_norm": 3.878833293914795, "learning_rate": 9.184870044780677e-06, "loss": 0.8418, "step": 4232 }, { "epoch": 1.1257978723404256, "grad_norm": 4.012277126312256, "learning_rate": 9.184388677280038e-06, "loss": 0.8024, "step": 4233 }, { "epoch": 1.126063829787234, "grad_norm": 3.702630043029785, "learning_rate": 9.183907180309489e-06, "loss": 0.7978, "step": 4234 }, { "epoch": 1.1263297872340425, "grad_norm": 4.186684608459473, "learning_rate": 9.183425553883925e-06, "loss": 0.8459, "step": 4235 }, { "epoch": 1.126595744680851, "grad_norm": 4.011842727661133, "learning_rate": 9.18294379801825e-06, "loss": 0.7931, "step": 4236 }, { "epoch": 1.1268617021276595, "grad_norm": 4.870151042938232, "learning_rate": 9.182461912727368e-06, "loss": 0.9028, "step": 4237 }, { "epoch": 1.1271276595744681, "grad_norm": 3.5846457481384277, "learning_rate": 9.18197989802619e-06, "loss": 0.783, "step": 4238 }, { "epoch": 1.1273936170212766, "grad_norm": 3.910689115524292, "learning_rate": 9.181497753929629e-06, "loss": 0.8441, "step": 4239 }, { "epoch": 1.127659574468085, "grad_norm": 3.768601894378662, "learning_rate": 9.181015480452607e-06, "loss": 0.8207, "step": 4240 }, { "epoch": 1.1279255319148935, "grad_norm": 4.229056358337402, "learning_rate": 9.18053307761004e-06, "loss": 0.8025, "step": 4241 }, { "epoch": 1.1281914893617022, "grad_norm": 4.3545050621032715, "learning_rate": 9.180050545416861e-06, "loss": 0.8154, "step": 4242 }, { "epoch": 1.1284574468085107, "grad_norm": 4.138397693634033, "learning_rate": 9.179567883887997e-06, "loss": 0.8033, "step": 4243 }, { "epoch": 1.1287234042553191, "grad_norm": 3.9504189491271973, "learning_rate": 9.17908509303838e-06, "loss": 0.85, "step": 4244 }, { "epoch": 1.1289893617021276, "grad_norm": 3.9662301540374756, "learning_rate": 9.178602172882951e-06, "loss": 0.8327, "step": 4245 }, { "epoch": 1.1292553191489363, "grad_norm": 4.157631874084473, "learning_rate": 9.178119123436651e-06, "loss": 0.8558, "step": 4246 }, { "epoch": 1.1295212765957447, "grad_norm": 3.9172611236572266, "learning_rate": 9.177635944714424e-06, "loss": 0.9087, "step": 4247 }, { "epoch": 1.1297872340425532, "grad_norm": 3.9250762462615967, "learning_rate": 9.177152636731225e-06, "loss": 0.7709, "step": 4248 }, { "epoch": 1.1300531914893617, "grad_norm": 3.6299500465393066, "learning_rate": 9.176669199502004e-06, "loss": 0.717, "step": 4249 }, { "epoch": 1.1303191489361701, "grad_norm": 4.225446701049805, "learning_rate": 9.17618563304172e-06, "loss": 0.8766, "step": 4250 }, { "epoch": 1.1305851063829788, "grad_norm": 3.9178264141082764, "learning_rate": 9.175701937365337e-06, "loss": 0.7634, "step": 4251 }, { "epoch": 1.1308510638297873, "grad_norm": 3.905505657196045, "learning_rate": 9.175218112487821e-06, "loss": 0.7784, "step": 4252 }, { "epoch": 1.1311170212765957, "grad_norm": 4.228585243225098, "learning_rate": 9.174734158424138e-06, "loss": 0.8445, "step": 4253 }, { "epoch": 1.1313829787234042, "grad_norm": 3.9836041927337646, "learning_rate": 9.174250075189268e-06, "loss": 0.8252, "step": 4254 }, { "epoch": 1.1316489361702127, "grad_norm": 4.349749565124512, "learning_rate": 9.173765862798185e-06, "loss": 0.8154, "step": 4255 }, { "epoch": 1.1319148936170214, "grad_norm": 3.7815349102020264, "learning_rate": 9.17328152126587e-06, "loss": 0.7356, "step": 4256 }, { "epoch": 1.1321808510638298, "grad_norm": 3.9180119037628174, "learning_rate": 9.172797050607313e-06, "loss": 0.8098, "step": 4257 }, { "epoch": 1.1324468085106383, "grad_norm": 3.720789670944214, "learning_rate": 9.172312450837504e-06, "loss": 0.815, "step": 4258 }, { "epoch": 1.1327127659574467, "grad_norm": 4.155251502990723, "learning_rate": 9.171827721971434e-06, "loss": 0.8976, "step": 4259 }, { "epoch": 1.1329787234042552, "grad_norm": 4.600409030914307, "learning_rate": 9.171342864024103e-06, "loss": 0.8868, "step": 4260 }, { "epoch": 1.133244680851064, "grad_norm": 3.8379268646240234, "learning_rate": 9.170857877010512e-06, "loss": 0.7867, "step": 4261 }, { "epoch": 1.1335106382978724, "grad_norm": 4.109460830688477, "learning_rate": 9.170372760945668e-06, "loss": 0.7826, "step": 4262 }, { "epoch": 1.1337765957446808, "grad_norm": 3.895494222640991, "learning_rate": 9.16988751584458e-06, "loss": 0.854, "step": 4263 }, { "epoch": 1.1340425531914893, "grad_norm": 3.7237160205841064, "learning_rate": 9.169402141722264e-06, "loss": 0.7098, "step": 4264 }, { "epoch": 1.134308510638298, "grad_norm": 4.19631814956665, "learning_rate": 9.168916638593736e-06, "loss": 0.9218, "step": 4265 }, { "epoch": 1.1345744680851064, "grad_norm": 4.052074909210205, "learning_rate": 9.168431006474018e-06, "loss": 0.8367, "step": 4266 }, { "epoch": 1.1348404255319149, "grad_norm": 4.097432613372803, "learning_rate": 9.167945245378139e-06, "loss": 0.8705, "step": 4267 }, { "epoch": 1.1351063829787233, "grad_norm": 3.81488037109375, "learning_rate": 9.167459355321127e-06, "loss": 0.6803, "step": 4268 }, { "epoch": 1.135372340425532, "grad_norm": 4.266942501068115, "learning_rate": 9.166973336318015e-06, "loss": 0.8108, "step": 4269 }, { "epoch": 1.1356382978723405, "grad_norm": 3.9824750423431396, "learning_rate": 9.166487188383841e-06, "loss": 0.811, "step": 4270 }, { "epoch": 1.135904255319149, "grad_norm": 3.8896446228027344, "learning_rate": 9.16600091153365e-06, "loss": 0.8925, "step": 4271 }, { "epoch": 1.1361702127659574, "grad_norm": 4.690064907073975, "learning_rate": 9.165514505782484e-06, "loss": 1.1356, "step": 4272 }, { "epoch": 1.1364361702127659, "grad_norm": 4.304286479949951, "learning_rate": 9.165027971145397e-06, "loss": 0.8041, "step": 4273 }, { "epoch": 1.1367021276595746, "grad_norm": 4.315762519836426, "learning_rate": 9.16454130763744e-06, "loss": 0.7519, "step": 4274 }, { "epoch": 1.136968085106383, "grad_norm": 4.10341739654541, "learning_rate": 9.16405451527367e-06, "loss": 0.919, "step": 4275 }, { "epoch": 1.1372340425531915, "grad_norm": 3.7802481651306152, "learning_rate": 9.163567594069154e-06, "loss": 0.8271, "step": 4276 }, { "epoch": 1.1375, "grad_norm": 4.523904323577881, "learning_rate": 9.163080544038953e-06, "loss": 0.7865, "step": 4277 }, { "epoch": 1.1377659574468084, "grad_norm": 3.958662509918213, "learning_rate": 9.162593365198138e-06, "loss": 0.8165, "step": 4278 }, { "epoch": 1.138031914893617, "grad_norm": 3.8943662643432617, "learning_rate": 9.162106057561784e-06, "loss": 0.7951, "step": 4279 }, { "epoch": 1.1382978723404256, "grad_norm": 3.9076874256134033, "learning_rate": 9.161618621144967e-06, "loss": 0.8135, "step": 4280 }, { "epoch": 1.138563829787234, "grad_norm": 3.5434067249298096, "learning_rate": 9.161131055962773e-06, "loss": 0.7228, "step": 4281 }, { "epoch": 1.1388297872340425, "grad_norm": 4.137996673583984, "learning_rate": 9.160643362030284e-06, "loss": 0.7711, "step": 4282 }, { "epoch": 1.139095744680851, "grad_norm": 3.783001661300659, "learning_rate": 9.160155539362589e-06, "loss": 0.8494, "step": 4283 }, { "epoch": 1.1393617021276596, "grad_norm": 3.8411149978637695, "learning_rate": 9.159667587974786e-06, "loss": 0.7447, "step": 4284 }, { "epoch": 1.139627659574468, "grad_norm": 3.6387648582458496, "learning_rate": 9.15917950788197e-06, "loss": 0.8385, "step": 4285 }, { "epoch": 1.1398936170212766, "grad_norm": 4.564189910888672, "learning_rate": 9.158691299099241e-06, "loss": 0.7572, "step": 4286 }, { "epoch": 1.140159574468085, "grad_norm": 4.022932529449463, "learning_rate": 9.15820296164171e-06, "loss": 0.7129, "step": 4287 }, { "epoch": 1.1404255319148937, "grad_norm": 4.345612525939941, "learning_rate": 9.157714495524481e-06, "loss": 0.8371, "step": 4288 }, { "epoch": 1.1406914893617022, "grad_norm": 4.161421298980713, "learning_rate": 9.157225900762672e-06, "loss": 0.7528, "step": 4289 }, { "epoch": 1.1409574468085106, "grad_norm": 4.042864799499512, "learning_rate": 9.156737177371399e-06, "loss": 0.8491, "step": 4290 }, { "epoch": 1.141223404255319, "grad_norm": 3.8026928901672363, "learning_rate": 9.156248325365782e-06, "loss": 0.8444, "step": 4291 }, { "epoch": 1.1414893617021278, "grad_norm": 4.251069068908691, "learning_rate": 9.15575934476095e-06, "loss": 0.7857, "step": 4292 }, { "epoch": 1.1417553191489362, "grad_norm": 3.8531103134155273, "learning_rate": 9.155270235572031e-06, "loss": 0.867, "step": 4293 }, { "epoch": 1.1420212765957447, "grad_norm": 3.975175142288208, "learning_rate": 9.15478099781416e-06, "loss": 0.808, "step": 4294 }, { "epoch": 1.1422872340425532, "grad_norm": 3.695078134536743, "learning_rate": 9.154291631502471e-06, "loss": 0.7942, "step": 4295 }, { "epoch": 1.1425531914893616, "grad_norm": 3.8435237407684326, "learning_rate": 9.15380213665211e-06, "loss": 0.8701, "step": 4296 }, { "epoch": 1.1428191489361703, "grad_norm": 3.642451047897339, "learning_rate": 9.153312513278219e-06, "loss": 0.7479, "step": 4297 }, { "epoch": 1.1430851063829788, "grad_norm": 3.8612117767333984, "learning_rate": 9.15282276139595e-06, "loss": 0.8394, "step": 4298 }, { "epoch": 1.1433510638297872, "grad_norm": 3.818319082260132, "learning_rate": 9.152332881020454e-06, "loss": 0.789, "step": 4299 }, { "epoch": 1.1436170212765957, "grad_norm": 3.6774802207946777, "learning_rate": 9.15184287216689e-06, "loss": 0.7991, "step": 4300 }, { "epoch": 1.1438829787234042, "grad_norm": 4.338614463806152, "learning_rate": 9.15135273485042e-06, "loss": 0.8602, "step": 4301 }, { "epoch": 1.1441489361702128, "grad_norm": 3.9688498973846436, "learning_rate": 9.15086246908621e-06, "loss": 0.7759, "step": 4302 }, { "epoch": 1.1444148936170213, "grad_norm": 3.848708152770996, "learning_rate": 9.150372074889427e-06, "loss": 0.7635, "step": 4303 }, { "epoch": 1.1446808510638298, "grad_norm": 4.042501926422119, "learning_rate": 9.149881552275244e-06, "loss": 0.8029, "step": 4304 }, { "epoch": 1.1449468085106382, "grad_norm": 4.199094772338867, "learning_rate": 9.149390901258841e-06, "loss": 0.8343, "step": 4305 }, { "epoch": 1.1452127659574467, "grad_norm": 4.045470714569092, "learning_rate": 9.1489001218554e-06, "loss": 0.831, "step": 4306 }, { "epoch": 1.1454787234042554, "grad_norm": 3.7915914058685303, "learning_rate": 9.148409214080103e-06, "loss": 0.8476, "step": 4307 }, { "epoch": 1.1457446808510638, "grad_norm": 3.7452378273010254, "learning_rate": 9.14791817794814e-06, "loss": 0.776, "step": 4308 }, { "epoch": 1.1460106382978723, "grad_norm": 3.521505355834961, "learning_rate": 9.147427013474706e-06, "loss": 0.6753, "step": 4309 }, { "epoch": 1.1462765957446808, "grad_norm": 3.906930923461914, "learning_rate": 9.146935720674996e-06, "loss": 0.6909, "step": 4310 }, { "epoch": 1.1465425531914895, "grad_norm": 4.262080192565918, "learning_rate": 9.146444299564215e-06, "loss": 0.8444, "step": 4311 }, { "epoch": 1.146808510638298, "grad_norm": 4.085954666137695, "learning_rate": 9.145952750157563e-06, "loss": 0.7587, "step": 4312 }, { "epoch": 1.1470744680851064, "grad_norm": 3.9519617557525635, "learning_rate": 9.145461072470253e-06, "loss": 0.8757, "step": 4313 }, { "epoch": 1.1473404255319148, "grad_norm": 4.349664211273193, "learning_rate": 9.144969266517495e-06, "loss": 0.7766, "step": 4314 }, { "epoch": 1.1476063829787235, "grad_norm": 5.140100955963135, "learning_rate": 9.144477332314509e-06, "loss": 0.9414, "step": 4315 }, { "epoch": 1.147872340425532, "grad_norm": 3.641763210296631, "learning_rate": 9.143985269876516e-06, "loss": 0.7562, "step": 4316 }, { "epoch": 1.1481382978723405, "grad_norm": 3.641606092453003, "learning_rate": 9.143493079218738e-06, "loss": 0.7992, "step": 4317 }, { "epoch": 1.148404255319149, "grad_norm": 4.611671447753906, "learning_rate": 9.143000760356407e-06, "loss": 0.8306, "step": 4318 }, { "epoch": 1.1486702127659574, "grad_norm": 3.4973011016845703, "learning_rate": 9.142508313304754e-06, "loss": 0.7915, "step": 4319 }, { "epoch": 1.148936170212766, "grad_norm": 3.9405927658081055, "learning_rate": 9.142015738079017e-06, "loss": 0.8279, "step": 4320 }, { "epoch": 1.1492021276595745, "grad_norm": 4.37050199508667, "learning_rate": 9.141523034694436e-06, "loss": 0.8506, "step": 4321 }, { "epoch": 1.149468085106383, "grad_norm": 4.181821346282959, "learning_rate": 9.141030203166256e-06, "loss": 0.8439, "step": 4322 }, { "epoch": 1.1497340425531914, "grad_norm": 3.8523123264312744, "learning_rate": 9.140537243509729e-06, "loss": 0.7565, "step": 4323 }, { "epoch": 1.15, "grad_norm": 3.5637168884277344, "learning_rate": 9.140044155740102e-06, "loss": 0.7406, "step": 4324 }, { "epoch": 1.1502659574468086, "grad_norm": 3.8401317596435547, "learning_rate": 9.139550939872635e-06, "loss": 0.8231, "step": 4325 }, { "epoch": 1.150531914893617, "grad_norm": 4.033459186553955, "learning_rate": 9.139057595922587e-06, "loss": 0.7585, "step": 4326 }, { "epoch": 1.1507978723404255, "grad_norm": 4.144162654876709, "learning_rate": 9.138564123905225e-06, "loss": 0.8237, "step": 4327 }, { "epoch": 1.151063829787234, "grad_norm": 4.219383716583252, "learning_rate": 9.138070523835816e-06, "loss": 0.793, "step": 4328 }, { "epoch": 1.1513297872340424, "grad_norm": 4.144248962402344, "learning_rate": 9.137576795729635e-06, "loss": 0.743, "step": 4329 }, { "epoch": 1.1515957446808511, "grad_norm": 3.836845636367798, "learning_rate": 9.137082939601953e-06, "loss": 0.7829, "step": 4330 }, { "epoch": 1.1518617021276596, "grad_norm": 3.8342814445495605, "learning_rate": 9.136588955468057e-06, "loss": 0.7298, "step": 4331 }, { "epoch": 1.152127659574468, "grad_norm": 3.852695941925049, "learning_rate": 9.136094843343228e-06, "loss": 0.8051, "step": 4332 }, { "epoch": 1.1523936170212765, "grad_norm": 3.9740166664123535, "learning_rate": 9.135600603242753e-06, "loss": 0.8096, "step": 4333 }, { "epoch": 1.1526595744680852, "grad_norm": 4.557644367218018, "learning_rate": 9.13510623518193e-06, "loss": 0.8826, "step": 4334 }, { "epoch": 1.1529255319148937, "grad_norm": 4.095839500427246, "learning_rate": 9.13461173917605e-06, "loss": 0.7624, "step": 4335 }, { "epoch": 1.1531914893617021, "grad_norm": 3.6598823070526123, "learning_rate": 9.134117115240412e-06, "loss": 0.6786, "step": 4336 }, { "epoch": 1.1534574468085106, "grad_norm": 4.052873611450195, "learning_rate": 9.133622363390326e-06, "loss": 0.7476, "step": 4337 }, { "epoch": 1.1537234042553193, "grad_norm": 3.892709255218506, "learning_rate": 9.133127483641096e-06, "loss": 0.7902, "step": 4338 }, { "epoch": 1.1539893617021277, "grad_norm": 4.127117156982422, "learning_rate": 9.132632476008036e-06, "loss": 0.8427, "step": 4339 }, { "epoch": 1.1542553191489362, "grad_norm": 3.911402463912964, "learning_rate": 9.132137340506464e-06, "loss": 0.744, "step": 4340 }, { "epoch": 1.1545212765957447, "grad_norm": 4.6202826499938965, "learning_rate": 9.131642077151695e-06, "loss": 0.816, "step": 4341 }, { "epoch": 1.1547872340425531, "grad_norm": 3.967888593673706, "learning_rate": 9.131146685959055e-06, "loss": 0.8608, "step": 4342 }, { "epoch": 1.1550531914893618, "grad_norm": 3.7461965084075928, "learning_rate": 9.130651166943875e-06, "loss": 0.8002, "step": 4343 }, { "epoch": 1.1553191489361703, "grad_norm": 3.893925666809082, "learning_rate": 9.130155520121484e-06, "loss": 0.7651, "step": 4344 }, { "epoch": 1.1555851063829787, "grad_norm": 4.108353614807129, "learning_rate": 9.129659745507219e-06, "loss": 0.847, "step": 4345 }, { "epoch": 1.1558510638297872, "grad_norm": 3.766580104827881, "learning_rate": 9.129163843116417e-06, "loss": 0.7361, "step": 4346 }, { "epoch": 1.1561170212765957, "grad_norm": 4.005224227905273, "learning_rate": 9.128667812964428e-06, "loss": 0.846, "step": 4347 }, { "epoch": 1.1563829787234043, "grad_norm": 4.085299491882324, "learning_rate": 9.128171655066592e-06, "loss": 0.7435, "step": 4348 }, { "epoch": 1.1566489361702128, "grad_norm": 3.649341583251953, "learning_rate": 9.127675369438267e-06, "loss": 0.7848, "step": 4349 }, { "epoch": 1.1569148936170213, "grad_norm": 4.286210536956787, "learning_rate": 9.127178956094805e-06, "loss": 0.8657, "step": 4350 }, { "epoch": 1.1571808510638297, "grad_norm": 3.8484995365142822, "learning_rate": 9.12668241505157e-06, "loss": 0.7356, "step": 4351 }, { "epoch": 1.1574468085106382, "grad_norm": 3.80110239982605, "learning_rate": 9.12618574632392e-06, "loss": 0.8581, "step": 4352 }, { "epoch": 1.1577127659574469, "grad_norm": 4.16612434387207, "learning_rate": 9.125688949927223e-06, "loss": 0.9135, "step": 4353 }, { "epoch": 1.1579787234042553, "grad_norm": 4.107837677001953, "learning_rate": 9.125192025876855e-06, "loss": 0.8993, "step": 4354 }, { "epoch": 1.1582446808510638, "grad_norm": 3.7631843090057373, "learning_rate": 9.124694974188188e-06, "loss": 0.7997, "step": 4355 }, { "epoch": 1.1585106382978723, "grad_norm": 4.244007587432861, "learning_rate": 9.124197794876604e-06, "loss": 0.806, "step": 4356 }, { "epoch": 1.1587765957446807, "grad_norm": 3.4537291526794434, "learning_rate": 9.123700487957484e-06, "loss": 0.7259, "step": 4357 }, { "epoch": 1.1590425531914894, "grad_norm": 4.083813667297363, "learning_rate": 9.123203053446215e-06, "loss": 0.7935, "step": 4358 }, { "epoch": 1.1593085106382979, "grad_norm": 3.842515707015991, "learning_rate": 9.12270549135819e-06, "loss": 0.8403, "step": 4359 }, { "epoch": 1.1595744680851063, "grad_norm": 3.8198819160461426, "learning_rate": 9.122207801708802e-06, "loss": 0.8035, "step": 4360 }, { "epoch": 1.1598404255319148, "grad_norm": 4.05394172668457, "learning_rate": 9.121709984513453e-06, "loss": 0.6678, "step": 4361 }, { "epoch": 1.1601063829787235, "grad_norm": 3.8895061016082764, "learning_rate": 9.121212039787543e-06, "loss": 0.7822, "step": 4362 }, { "epoch": 1.160372340425532, "grad_norm": 4.040393829345703, "learning_rate": 9.12071396754648e-06, "loss": 0.8669, "step": 4363 }, { "epoch": 1.1606382978723404, "grad_norm": 3.8143858909606934, "learning_rate": 9.120215767805677e-06, "loss": 0.9251, "step": 4364 }, { "epoch": 1.1609042553191489, "grad_norm": 3.8011443614959717, "learning_rate": 9.119717440580547e-06, "loss": 0.7142, "step": 4365 }, { "epoch": 1.1611702127659576, "grad_norm": 4.147587776184082, "learning_rate": 9.119218985886506e-06, "loss": 0.8196, "step": 4366 }, { "epoch": 1.161436170212766, "grad_norm": 4.035295009613037, "learning_rate": 9.118720403738984e-06, "loss": 0.9006, "step": 4367 }, { "epoch": 1.1617021276595745, "grad_norm": 4.253767967224121, "learning_rate": 9.118221694153401e-06, "loss": 0.9149, "step": 4368 }, { "epoch": 1.161968085106383, "grad_norm": 3.7400970458984375, "learning_rate": 9.11772285714519e-06, "loss": 0.847, "step": 4369 }, { "epoch": 1.1622340425531914, "grad_norm": 4.12266731262207, "learning_rate": 9.117223892729788e-06, "loss": 0.8159, "step": 4370 }, { "epoch": 1.1625, "grad_norm": 3.939617395401001, "learning_rate": 9.11672480092263e-06, "loss": 0.8515, "step": 4371 }, { "epoch": 1.1627659574468086, "grad_norm": 3.597660541534424, "learning_rate": 9.11622558173916e-06, "loss": 0.7139, "step": 4372 }, { "epoch": 1.163031914893617, "grad_norm": 3.8929126262664795, "learning_rate": 9.115726235194825e-06, "loss": 0.755, "step": 4373 }, { "epoch": 1.1632978723404255, "grad_norm": 3.9748990535736084, "learning_rate": 9.115226761305071e-06, "loss": 0.9779, "step": 4374 }, { "epoch": 1.163563829787234, "grad_norm": 3.6702117919921875, "learning_rate": 9.11472716008536e-06, "loss": 0.7913, "step": 4375 }, { "epoch": 1.1638297872340426, "grad_norm": 3.5676674842834473, "learning_rate": 9.114227431551144e-06, "loss": 0.8714, "step": 4376 }, { "epoch": 1.164095744680851, "grad_norm": 3.871457576751709, "learning_rate": 9.113727575717887e-06, "loss": 0.7551, "step": 4377 }, { "epoch": 1.1643617021276595, "grad_norm": 3.709536552429199, "learning_rate": 9.113227592601057e-06, "loss": 0.7476, "step": 4378 }, { "epoch": 1.164627659574468, "grad_norm": 4.048936367034912, "learning_rate": 9.112727482216123e-06, "loss": 0.822, "step": 4379 }, { "epoch": 1.1648936170212765, "grad_norm": 4.941551685333252, "learning_rate": 9.112227244578557e-06, "loss": 0.942, "step": 4380 }, { "epoch": 1.1651595744680852, "grad_norm": 3.971956491470337, "learning_rate": 9.111726879703839e-06, "loss": 0.898, "step": 4381 }, { "epoch": 1.1654255319148936, "grad_norm": 4.139491558074951, "learning_rate": 9.111226387607452e-06, "loss": 0.9185, "step": 4382 }, { "epoch": 1.165691489361702, "grad_norm": 3.8217787742614746, "learning_rate": 9.110725768304878e-06, "loss": 0.8598, "step": 4383 }, { "epoch": 1.1659574468085105, "grad_norm": 3.656966209411621, "learning_rate": 9.11022502181161e-06, "loss": 0.7433, "step": 4384 }, { "epoch": 1.1662234042553192, "grad_norm": 4.29415225982666, "learning_rate": 9.10972414814314e-06, "loss": 0.7777, "step": 4385 }, { "epoch": 1.1664893617021277, "grad_norm": 3.9143810272216797, "learning_rate": 9.109223147314968e-06, "loss": 0.678, "step": 4386 }, { "epoch": 1.1667553191489362, "grad_norm": 4.056838512420654, "learning_rate": 9.108722019342592e-06, "loss": 0.6778, "step": 4387 }, { "epoch": 1.1670212765957446, "grad_norm": 3.9018867015838623, "learning_rate": 9.10822076424152e-06, "loss": 0.8195, "step": 4388 }, { "epoch": 1.1672872340425533, "grad_norm": 4.0093994140625, "learning_rate": 9.10771938202726e-06, "loss": 0.9474, "step": 4389 }, { "epoch": 1.1675531914893618, "grad_norm": 4.224606037139893, "learning_rate": 9.107217872715326e-06, "loss": 0.7376, "step": 4390 }, { "epoch": 1.1678191489361702, "grad_norm": 3.831489086151123, "learning_rate": 9.106716236321236e-06, "loss": 0.731, "step": 4391 }, { "epoch": 1.1680851063829787, "grad_norm": 3.8180394172668457, "learning_rate": 9.106214472860511e-06, "loss": 0.7458, "step": 4392 }, { "epoch": 1.1683510638297872, "grad_norm": 3.393148899078369, "learning_rate": 9.105712582348676e-06, "loss": 0.7216, "step": 4393 }, { "epoch": 1.1686170212765958, "grad_norm": 4.6142964363098145, "learning_rate": 9.105210564801259e-06, "loss": 0.7643, "step": 4394 }, { "epoch": 1.1688829787234043, "grad_norm": 4.428558826446533, "learning_rate": 9.104708420233794e-06, "loss": 0.8364, "step": 4395 }, { "epoch": 1.1691489361702128, "grad_norm": 4.209799766540527, "learning_rate": 9.104206148661819e-06, "loss": 0.7965, "step": 4396 }, { "epoch": 1.1694148936170212, "grad_norm": 4.0707831382751465, "learning_rate": 9.10370375010087e-06, "loss": 0.7676, "step": 4397 }, { "epoch": 1.1696808510638297, "grad_norm": 3.684016227722168, "learning_rate": 9.103201224566499e-06, "loss": 0.8018, "step": 4398 }, { "epoch": 1.1699468085106384, "grad_norm": 4.157726287841797, "learning_rate": 9.10269857207425e-06, "loss": 0.8431, "step": 4399 }, { "epoch": 1.1702127659574468, "grad_norm": 3.866776704788208, "learning_rate": 9.102195792639677e-06, "loss": 0.9013, "step": 4400 }, { "epoch": 1.1704787234042553, "grad_norm": 3.8174455165863037, "learning_rate": 9.101692886278336e-06, "loss": 0.8174, "step": 4401 }, { "epoch": 1.1707446808510638, "grad_norm": 4.051540851593018, "learning_rate": 9.101189853005788e-06, "loss": 0.8006, "step": 4402 }, { "epoch": 1.1710106382978722, "grad_norm": 4.115768909454346, "learning_rate": 9.100686692837598e-06, "loss": 0.8905, "step": 4403 }, { "epoch": 1.171276595744681, "grad_norm": 3.989694595336914, "learning_rate": 9.100183405789334e-06, "loss": 0.8763, "step": 4404 }, { "epoch": 1.1715425531914894, "grad_norm": 3.5945072174072266, "learning_rate": 9.099679991876567e-06, "loss": 0.7173, "step": 4405 }, { "epoch": 1.1718085106382978, "grad_norm": 3.627795934677124, "learning_rate": 9.099176451114876e-06, "loss": 0.7708, "step": 4406 }, { "epoch": 1.1720744680851063, "grad_norm": 4.366139888763428, "learning_rate": 9.098672783519837e-06, "loss": 0.7882, "step": 4407 }, { "epoch": 1.172340425531915, "grad_norm": 4.13855504989624, "learning_rate": 9.098168989107038e-06, "loss": 0.7776, "step": 4408 }, { "epoch": 1.1726063829787234, "grad_norm": 3.8078205585479736, "learning_rate": 9.097665067892066e-06, "loss": 0.7194, "step": 4409 }, { "epoch": 1.172872340425532, "grad_norm": 3.676452398300171, "learning_rate": 9.09716101989051e-06, "loss": 0.7386, "step": 4410 }, { "epoch": 1.1731382978723404, "grad_norm": 4.525330066680908, "learning_rate": 9.09665684511797e-06, "loss": 0.8734, "step": 4411 }, { "epoch": 1.173404255319149, "grad_norm": 4.38550329208374, "learning_rate": 9.096152543590045e-06, "loss": 0.8248, "step": 4412 }, { "epoch": 1.1736702127659575, "grad_norm": 4.337765693664551, "learning_rate": 9.095648115322336e-06, "loss": 0.8992, "step": 4413 }, { "epoch": 1.173936170212766, "grad_norm": 4.145912170410156, "learning_rate": 9.095143560330453e-06, "loss": 0.8119, "step": 4414 }, { "epoch": 1.1742021276595744, "grad_norm": 3.5085721015930176, "learning_rate": 9.094638878630007e-06, "loss": 0.744, "step": 4415 }, { "epoch": 1.174468085106383, "grad_norm": 4.225882053375244, "learning_rate": 9.094134070236614e-06, "loss": 0.8368, "step": 4416 }, { "epoch": 1.1747340425531916, "grad_norm": 4.2498273849487305, "learning_rate": 9.09362913516589e-06, "loss": 0.7281, "step": 4417 }, { "epoch": 1.175, "grad_norm": 3.8343684673309326, "learning_rate": 9.093124073433464e-06, "loss": 0.8521, "step": 4418 }, { "epoch": 1.1752659574468085, "grad_norm": 4.265048503875732, "learning_rate": 9.092618885054958e-06, "loss": 0.8624, "step": 4419 }, { "epoch": 1.175531914893617, "grad_norm": 4.251501560211182, "learning_rate": 9.092113570046005e-06, "loss": 0.7163, "step": 4420 }, { "epoch": 1.1757978723404254, "grad_norm": 3.9519202709198, "learning_rate": 9.091608128422243e-06, "loss": 0.8139, "step": 4421 }, { "epoch": 1.1760638297872341, "grad_norm": 3.785550832748413, "learning_rate": 9.091102560199306e-06, "loss": 0.7897, "step": 4422 }, { "epoch": 1.1763297872340426, "grad_norm": 4.2011260986328125, "learning_rate": 9.090596865392838e-06, "loss": 0.8119, "step": 4423 }, { "epoch": 1.176595744680851, "grad_norm": 3.7419655323028564, "learning_rate": 9.090091044018488e-06, "loss": 0.64, "step": 4424 }, { "epoch": 1.1768617021276595, "grad_norm": 3.561340093612671, "learning_rate": 9.089585096091906e-06, "loss": 0.7546, "step": 4425 }, { "epoch": 1.177127659574468, "grad_norm": 3.971997022628784, "learning_rate": 9.089079021628746e-06, "loss": 0.8783, "step": 4426 }, { "epoch": 1.1773936170212767, "grad_norm": 4.214608669281006, "learning_rate": 9.088572820644667e-06, "loss": 0.9312, "step": 4427 }, { "epoch": 1.1776595744680851, "grad_norm": 3.867511749267578, "learning_rate": 9.088066493155332e-06, "loss": 0.9171, "step": 4428 }, { "epoch": 1.1779255319148936, "grad_norm": 3.8267605304718018, "learning_rate": 9.087560039176407e-06, "loss": 0.7369, "step": 4429 }, { "epoch": 1.178191489361702, "grad_norm": 3.9210994243621826, "learning_rate": 9.08705345872356e-06, "loss": 0.7975, "step": 4430 }, { "epoch": 1.1784574468085107, "grad_norm": 3.820697069168091, "learning_rate": 9.086546751812467e-06, "loss": 0.7579, "step": 4431 }, { "epoch": 1.1787234042553192, "grad_norm": 4.319027423858643, "learning_rate": 9.086039918458806e-06, "loss": 0.7671, "step": 4432 }, { "epoch": 1.1789893617021276, "grad_norm": 3.768254280090332, "learning_rate": 9.085532958678262e-06, "loss": 0.7075, "step": 4433 }, { "epoch": 1.179255319148936, "grad_norm": 3.8115556240081787, "learning_rate": 9.085025872486516e-06, "loss": 0.6844, "step": 4434 }, { "epoch": 1.1795212765957448, "grad_norm": 3.6113126277923584, "learning_rate": 9.08451865989926e-06, "loss": 0.7161, "step": 4435 }, { "epoch": 1.1797872340425533, "grad_norm": 4.16688871383667, "learning_rate": 9.08401132093219e-06, "loss": 0.8756, "step": 4436 }, { "epoch": 1.1800531914893617, "grad_norm": 4.136419773101807, "learning_rate": 9.083503855600997e-06, "loss": 0.8072, "step": 4437 }, { "epoch": 1.1803191489361702, "grad_norm": 4.0323357582092285, "learning_rate": 9.08299626392139e-06, "loss": 0.7889, "step": 4438 }, { "epoch": 1.1805851063829786, "grad_norm": 3.848400354385376, "learning_rate": 9.082488545909072e-06, "loss": 0.8467, "step": 4439 }, { "epoch": 1.1808510638297873, "grad_norm": 3.8820831775665283, "learning_rate": 9.08198070157975e-06, "loss": 0.7926, "step": 4440 }, { "epoch": 1.1811170212765958, "grad_norm": 3.9585654735565186, "learning_rate": 9.08147273094914e-06, "loss": 0.8671, "step": 4441 }, { "epoch": 1.1813829787234043, "grad_norm": 4.736848831176758, "learning_rate": 9.080964634032958e-06, "loss": 0.8953, "step": 4442 }, { "epoch": 1.1816489361702127, "grad_norm": 4.1310343742370605, "learning_rate": 9.080456410846926e-06, "loss": 0.7878, "step": 4443 }, { "epoch": 1.1819148936170212, "grad_norm": 3.701655149459839, "learning_rate": 9.079948061406769e-06, "loss": 0.7205, "step": 4444 }, { "epoch": 1.1821808510638299, "grad_norm": 4.258152008056641, "learning_rate": 9.079439585728214e-06, "loss": 0.8573, "step": 4445 }, { "epoch": 1.1824468085106383, "grad_norm": 4.08727502822876, "learning_rate": 9.078930983826997e-06, "loss": 0.8661, "step": 4446 }, { "epoch": 1.1827127659574468, "grad_norm": 4.263191223144531, "learning_rate": 9.078422255718852e-06, "loss": 0.9975, "step": 4447 }, { "epoch": 1.1829787234042553, "grad_norm": 3.8881144523620605, "learning_rate": 9.07791340141952e-06, "loss": 0.8825, "step": 4448 }, { "epoch": 1.1832446808510637, "grad_norm": 4.034143924713135, "learning_rate": 9.077404420944746e-06, "loss": 0.7645, "step": 4449 }, { "epoch": 1.1835106382978724, "grad_norm": 3.6815900802612305, "learning_rate": 9.076895314310282e-06, "loss": 0.845, "step": 4450 }, { "epoch": 1.1837765957446809, "grad_norm": 4.061761379241943, "learning_rate": 9.076386081531873e-06, "loss": 0.715, "step": 4451 }, { "epoch": 1.1840425531914893, "grad_norm": 3.675588846206665, "learning_rate": 9.075876722625281e-06, "loss": 0.6865, "step": 4452 }, { "epoch": 1.1843085106382978, "grad_norm": 3.922511577606201, "learning_rate": 9.075367237606265e-06, "loss": 0.8139, "step": 4453 }, { "epoch": 1.1845744680851065, "grad_norm": 4.45919132232666, "learning_rate": 9.074857626490587e-06, "loss": 0.8832, "step": 4454 }, { "epoch": 1.184840425531915, "grad_norm": 3.8306045532226562, "learning_rate": 9.074347889294017e-06, "loss": 0.775, "step": 4455 }, { "epoch": 1.1851063829787234, "grad_norm": 4.380180358886719, "learning_rate": 9.073838026032328e-06, "loss": 0.8028, "step": 4456 }, { "epoch": 1.1853723404255319, "grad_norm": 3.6403377056121826, "learning_rate": 9.073328036721292e-06, "loss": 0.7365, "step": 4457 }, { "epoch": 1.1856382978723405, "grad_norm": 4.642416477203369, "learning_rate": 9.072817921376692e-06, "loss": 1.0456, "step": 4458 }, { "epoch": 1.185904255319149, "grad_norm": 4.2514753341674805, "learning_rate": 9.07230768001431e-06, "loss": 0.8752, "step": 4459 }, { "epoch": 1.1861702127659575, "grad_norm": 4.097993850708008, "learning_rate": 9.071797312649934e-06, "loss": 0.8805, "step": 4460 }, { "epoch": 1.186436170212766, "grad_norm": 3.6704015731811523, "learning_rate": 9.071286819299355e-06, "loss": 0.7362, "step": 4461 }, { "epoch": 1.1867021276595744, "grad_norm": 3.5198822021484375, "learning_rate": 9.070776199978369e-06, "loss": 0.6528, "step": 4462 }, { "epoch": 1.186968085106383, "grad_norm": 4.044826507568359, "learning_rate": 9.070265454702774e-06, "loss": 0.785, "step": 4463 }, { "epoch": 1.1872340425531915, "grad_norm": 3.775392770767212, "learning_rate": 9.069754583488375e-06, "loss": 0.7664, "step": 4464 }, { "epoch": 1.1875, "grad_norm": 3.9251670837402344, "learning_rate": 9.069243586350976e-06, "loss": 0.7694, "step": 4465 }, { "epoch": 1.1877659574468085, "grad_norm": 4.138858318328857, "learning_rate": 9.06873246330639e-06, "loss": 0.8734, "step": 4466 }, { "epoch": 1.188031914893617, "grad_norm": 3.8749899864196777, "learning_rate": 9.06822121437043e-06, "loss": 0.7114, "step": 4467 }, { "epoch": 1.1882978723404256, "grad_norm": 4.107519626617432, "learning_rate": 9.067709839558917e-06, "loss": 0.7998, "step": 4468 }, { "epoch": 1.188563829787234, "grad_norm": 3.6962497234344482, "learning_rate": 9.067198338887673e-06, "loss": 0.8317, "step": 4469 }, { "epoch": 1.1888297872340425, "grad_norm": 4.575094223022461, "learning_rate": 9.066686712372524e-06, "loss": 0.8399, "step": 4470 }, { "epoch": 1.189095744680851, "grad_norm": 4.391597747802734, "learning_rate": 9.0661749600293e-06, "loss": 0.8801, "step": 4471 }, { "epoch": 1.1893617021276595, "grad_norm": 3.650452136993408, "learning_rate": 9.065663081873834e-06, "loss": 0.7738, "step": 4472 }, { "epoch": 1.1896276595744681, "grad_norm": 4.12108039855957, "learning_rate": 9.065151077921968e-06, "loss": 0.8333, "step": 4473 }, { "epoch": 1.1898936170212766, "grad_norm": 4.204649925231934, "learning_rate": 9.064638948189539e-06, "loss": 0.8531, "step": 4474 }, { "epoch": 1.190159574468085, "grad_norm": 4.241077423095703, "learning_rate": 9.064126692692397e-06, "loss": 0.8215, "step": 4475 }, { "epoch": 1.1904255319148935, "grad_norm": 4.215181350708008, "learning_rate": 9.06361431144639e-06, "loss": 0.7595, "step": 4476 }, { "epoch": 1.1906914893617022, "grad_norm": 3.597543239593506, "learning_rate": 9.06310180446737e-06, "loss": 0.7967, "step": 4477 }, { "epoch": 1.1909574468085107, "grad_norm": 4.075351238250732, "learning_rate": 9.0625891717712e-06, "loss": 0.8158, "step": 4478 }, { "epoch": 1.1912234042553191, "grad_norm": 3.5748724937438965, "learning_rate": 9.062076413373735e-06, "loss": 0.733, "step": 4479 }, { "epoch": 1.1914893617021276, "grad_norm": 3.9107751846313477, "learning_rate": 9.061563529290845e-06, "loss": 0.8057, "step": 4480 }, { "epoch": 1.1917553191489363, "grad_norm": 4.108970642089844, "learning_rate": 9.061050519538397e-06, "loss": 0.9214, "step": 4481 }, { "epoch": 1.1920212765957447, "grad_norm": 3.9196219444274902, "learning_rate": 9.060537384132264e-06, "loss": 0.8046, "step": 4482 }, { "epoch": 1.1922872340425532, "grad_norm": 3.312999963760376, "learning_rate": 9.060024123088324e-06, "loss": 0.6791, "step": 4483 }, { "epoch": 1.1925531914893617, "grad_norm": 4.010212421417236, "learning_rate": 9.05951073642246e-06, "loss": 0.8244, "step": 4484 }, { "epoch": 1.1928191489361701, "grad_norm": 3.9299821853637695, "learning_rate": 9.05899722415055e-06, "loss": 0.7054, "step": 4485 }, { "epoch": 1.1930851063829788, "grad_norm": 4.205704212188721, "learning_rate": 9.05848358628849e-06, "loss": 0.9058, "step": 4486 }, { "epoch": 1.1933510638297873, "grad_norm": 4.133444309234619, "learning_rate": 9.057969822852168e-06, "loss": 0.8414, "step": 4487 }, { "epoch": 1.1936170212765957, "grad_norm": 3.7199227809906006, "learning_rate": 9.057455933857483e-06, "loss": 0.7884, "step": 4488 }, { "epoch": 1.1938829787234042, "grad_norm": 4.377199172973633, "learning_rate": 9.056941919320335e-06, "loss": 0.7732, "step": 4489 }, { "epoch": 1.1941489361702127, "grad_norm": 4.171092987060547, "learning_rate": 9.056427779256624e-06, "loss": 0.8652, "step": 4490 }, { "epoch": 1.1944148936170214, "grad_norm": 3.7670929431915283, "learning_rate": 9.055913513682267e-06, "loss": 0.7825, "step": 4491 }, { "epoch": 1.1946808510638298, "grad_norm": 3.9210784435272217, "learning_rate": 9.055399122613166e-06, "loss": 0.8515, "step": 4492 }, { "epoch": 1.1949468085106383, "grad_norm": 3.543363094329834, "learning_rate": 9.054884606065243e-06, "loss": 0.6883, "step": 4493 }, { "epoch": 1.1952127659574467, "grad_norm": 3.9357686042785645, "learning_rate": 9.054369964054418e-06, "loss": 0.7847, "step": 4494 }, { "epoch": 1.1954787234042552, "grad_norm": 3.5497348308563232, "learning_rate": 9.05385519659661e-06, "loss": 0.8664, "step": 4495 }, { "epoch": 1.195744680851064, "grad_norm": 4.09616756439209, "learning_rate": 9.053340303707752e-06, "loss": 0.7928, "step": 4496 }, { "epoch": 1.1960106382978724, "grad_norm": 4.135888576507568, "learning_rate": 9.052825285403771e-06, "loss": 0.8372, "step": 4497 }, { "epoch": 1.1962765957446808, "grad_norm": 4.014375686645508, "learning_rate": 9.052310141700605e-06, "loss": 0.7838, "step": 4498 }, { "epoch": 1.1965425531914893, "grad_norm": 4.164703369140625, "learning_rate": 9.051794872614193e-06, "loss": 0.7346, "step": 4499 }, { "epoch": 1.196808510638298, "grad_norm": 3.9445199966430664, "learning_rate": 9.051279478160475e-06, "loss": 0.7969, "step": 4500 }, { "epoch": 1.196808510638298, "eval_loss": 1.3114004135131836, "eval_runtime": 13.8708, "eval_samples_per_second": 28.838, "eval_steps_per_second": 3.605, "step": 4500 }, { "epoch": 1.1970744680851064, "grad_norm": 4.145724773406982, "learning_rate": 9.050763958355401e-06, "loss": 0.864, "step": 4501 }, { "epoch": 1.1973404255319149, "grad_norm": 3.9395062923431396, "learning_rate": 9.050248313214921e-06, "loss": 0.8854, "step": 4502 }, { "epoch": 1.1976063829787233, "grad_norm": 3.7419703006744385, "learning_rate": 9.04973254275499e-06, "loss": 0.778, "step": 4503 }, { "epoch": 1.197872340425532, "grad_norm": 3.620009422302246, "learning_rate": 9.049216646991568e-06, "loss": 0.6522, "step": 4504 }, { "epoch": 1.1981382978723405, "grad_norm": 4.093226909637451, "learning_rate": 9.048700625940613e-06, "loss": 0.7909, "step": 4505 }, { "epoch": 1.198404255319149, "grad_norm": 4.31190824508667, "learning_rate": 9.048184479618094e-06, "loss": 0.87, "step": 4506 }, { "epoch": 1.1986702127659574, "grad_norm": 3.5274550914764404, "learning_rate": 9.047668208039981e-06, "loss": 0.7015, "step": 4507 }, { "epoch": 1.1989361702127659, "grad_norm": 4.295877933502197, "learning_rate": 9.04715181122225e-06, "loss": 0.8673, "step": 4508 }, { "epoch": 1.1992021276595746, "grad_norm": 4.239846706390381, "learning_rate": 9.046635289180875e-06, "loss": 0.7815, "step": 4509 }, { "epoch": 1.199468085106383, "grad_norm": 4.294873237609863, "learning_rate": 9.046118641931841e-06, "loss": 0.8275, "step": 4510 }, { "epoch": 1.1997340425531915, "grad_norm": 4.2128586769104, "learning_rate": 9.045601869491131e-06, "loss": 0.885, "step": 4511 }, { "epoch": 1.2, "grad_norm": 4.04133415222168, "learning_rate": 9.045084971874738e-06, "loss": 0.6479, "step": 4512 }, { "epoch": 1.2002659574468084, "grad_norm": 4.300421714782715, "learning_rate": 9.044567949098653e-06, "loss": 0.7596, "step": 4513 }, { "epoch": 1.200531914893617, "grad_norm": 4.0186896324157715, "learning_rate": 9.044050801178873e-06, "loss": 0.9244, "step": 4514 }, { "epoch": 1.2007978723404256, "grad_norm": 3.989703416824341, "learning_rate": 9.043533528131401e-06, "loss": 0.8296, "step": 4515 }, { "epoch": 1.201063829787234, "grad_norm": 3.6627588272094727, "learning_rate": 9.043016129972239e-06, "loss": 0.6557, "step": 4516 }, { "epoch": 1.2013297872340425, "grad_norm": 4.000990867614746, "learning_rate": 9.042498606717401e-06, "loss": 0.8114, "step": 4517 }, { "epoch": 1.201595744680851, "grad_norm": 4.12056827545166, "learning_rate": 9.041980958382895e-06, "loss": 0.7866, "step": 4518 }, { "epoch": 1.2018617021276596, "grad_norm": 4.345433712005615, "learning_rate": 9.041463184984739e-06, "loss": 0.9222, "step": 4519 }, { "epoch": 1.202127659574468, "grad_norm": 3.629518747329712, "learning_rate": 9.040945286538954e-06, "loss": 0.6739, "step": 4520 }, { "epoch": 1.2023936170212766, "grad_norm": 4.012117862701416, "learning_rate": 9.040427263061563e-06, "loss": 0.8168, "step": 4521 }, { "epoch": 1.202659574468085, "grad_norm": 3.6947031021118164, "learning_rate": 9.039909114568597e-06, "loss": 0.7811, "step": 4522 }, { "epoch": 1.2029255319148937, "grad_norm": 4.276979446411133, "learning_rate": 9.039390841076086e-06, "loss": 0.9514, "step": 4523 }, { "epoch": 1.2031914893617022, "grad_norm": 3.970949411392212, "learning_rate": 9.038872442600066e-06, "loss": 0.832, "step": 4524 }, { "epoch": 1.2034574468085106, "grad_norm": 4.2050323486328125, "learning_rate": 9.038353919156579e-06, "loss": 0.838, "step": 4525 }, { "epoch": 1.203723404255319, "grad_norm": 3.872286319732666, "learning_rate": 9.037835270761667e-06, "loss": 0.8424, "step": 4526 }, { "epoch": 1.2039893617021278, "grad_norm": 4.053325653076172, "learning_rate": 9.037316497431377e-06, "loss": 0.8673, "step": 4527 }, { "epoch": 1.2042553191489362, "grad_norm": 3.982133388519287, "learning_rate": 9.036797599181762e-06, "loss": 0.7101, "step": 4528 }, { "epoch": 1.2045212765957447, "grad_norm": 4.298680782318115, "learning_rate": 9.036278576028876e-06, "loss": 0.8027, "step": 4529 }, { "epoch": 1.2047872340425532, "grad_norm": 3.7166576385498047, "learning_rate": 9.035759427988779e-06, "loss": 0.8048, "step": 4530 }, { "epoch": 1.2050531914893616, "grad_norm": 4.02637243270874, "learning_rate": 9.035240155077532e-06, "loss": 0.8519, "step": 4531 }, { "epoch": 1.2053191489361703, "grad_norm": 4.048903942108154, "learning_rate": 9.034720757311206e-06, "loss": 0.8076, "step": 4532 }, { "epoch": 1.2055851063829788, "grad_norm": 3.8102221488952637, "learning_rate": 9.034201234705869e-06, "loss": 0.8361, "step": 4533 }, { "epoch": 1.2058510638297872, "grad_norm": 4.269223213195801, "learning_rate": 9.033681587277596e-06, "loss": 0.9528, "step": 4534 }, { "epoch": 1.2061170212765957, "grad_norm": 4.001543998718262, "learning_rate": 9.033161815042465e-06, "loss": 0.8678, "step": 4535 }, { "epoch": 1.2063829787234042, "grad_norm": 4.034337997436523, "learning_rate": 9.032641918016559e-06, "loss": 0.7533, "step": 4536 }, { "epoch": 1.2066489361702128, "grad_norm": 3.7186598777770996, "learning_rate": 9.032121896215965e-06, "loss": 0.8469, "step": 4537 }, { "epoch": 1.2069148936170213, "grad_norm": 3.8396542072296143, "learning_rate": 9.03160174965677e-06, "loss": 0.7419, "step": 4538 }, { "epoch": 1.2071808510638298, "grad_norm": 3.971125602722168, "learning_rate": 9.031081478355074e-06, "loss": 0.7997, "step": 4539 }, { "epoch": 1.2074468085106382, "grad_norm": 3.9450175762176514, "learning_rate": 9.03056108232697e-06, "loss": 0.9049, "step": 4540 }, { "epoch": 1.2077127659574467, "grad_norm": 3.878206729888916, "learning_rate": 9.03004056158856e-06, "loss": 0.7389, "step": 4541 }, { "epoch": 1.2079787234042554, "grad_norm": 4.157868385314941, "learning_rate": 9.02951991615595e-06, "loss": 0.8474, "step": 4542 }, { "epoch": 1.2082446808510638, "grad_norm": 4.203000068664551, "learning_rate": 9.02899914604525e-06, "loss": 0.7146, "step": 4543 }, { "epoch": 1.2085106382978723, "grad_norm": 4.336871147155762, "learning_rate": 9.028478251272573e-06, "loss": 0.7901, "step": 4544 }, { "epoch": 1.2087765957446808, "grad_norm": 4.467360973358154, "learning_rate": 9.027957231854034e-06, "loss": 0.6987, "step": 4545 }, { "epoch": 1.2090425531914895, "grad_norm": 4.293298721313477, "learning_rate": 9.027436087805759e-06, "loss": 0.8706, "step": 4546 }, { "epoch": 1.209308510638298, "grad_norm": 4.344003200531006, "learning_rate": 9.026914819143867e-06, "loss": 0.8803, "step": 4547 }, { "epoch": 1.2095744680851064, "grad_norm": 3.9396615028381348, "learning_rate": 9.026393425884491e-06, "loss": 0.8195, "step": 4548 }, { "epoch": 1.2098404255319148, "grad_norm": 4.163116931915283, "learning_rate": 9.025871908043762e-06, "loss": 0.8396, "step": 4549 }, { "epoch": 1.2101063829787235, "grad_norm": 3.790417194366455, "learning_rate": 9.025350265637816e-06, "loss": 0.9279, "step": 4550 }, { "epoch": 1.210372340425532, "grad_norm": 3.6482441425323486, "learning_rate": 9.024828498682793e-06, "loss": 0.8154, "step": 4551 }, { "epoch": 1.2106382978723405, "grad_norm": 4.012534141540527, "learning_rate": 9.024306607194839e-06, "loss": 0.777, "step": 4552 }, { "epoch": 1.210904255319149, "grad_norm": 3.850843906402588, "learning_rate": 9.0237845911901e-06, "loss": 0.6989, "step": 4553 }, { "epoch": 1.2111702127659574, "grad_norm": 3.810297966003418, "learning_rate": 9.023262450684727e-06, "loss": 0.8284, "step": 4554 }, { "epoch": 1.211436170212766, "grad_norm": 3.643862247467041, "learning_rate": 9.022740185694877e-06, "loss": 0.9392, "step": 4555 }, { "epoch": 1.2117021276595745, "grad_norm": 3.707839012145996, "learning_rate": 9.022217796236711e-06, "loss": 0.794, "step": 4556 }, { "epoch": 1.211968085106383, "grad_norm": 4.23673152923584, "learning_rate": 9.02169528232639e-06, "loss": 0.7546, "step": 4557 }, { "epoch": 1.2122340425531914, "grad_norm": 4.236415386199951, "learning_rate": 9.021172643980082e-06, "loss": 0.9645, "step": 4558 }, { "epoch": 1.2125, "grad_norm": 3.956615686416626, "learning_rate": 9.02064988121396e-06, "loss": 0.9095, "step": 4559 }, { "epoch": 1.2127659574468086, "grad_norm": 4.126330852508545, "learning_rate": 9.020126994044194e-06, "loss": 0.7762, "step": 4560 }, { "epoch": 1.213031914893617, "grad_norm": 4.501354694366455, "learning_rate": 9.019603982486967e-06, "loss": 0.873, "step": 4561 }, { "epoch": 1.2132978723404255, "grad_norm": 4.185324192047119, "learning_rate": 9.01908084655846e-06, "loss": 0.8071, "step": 4562 }, { "epoch": 1.213563829787234, "grad_norm": 4.112594127655029, "learning_rate": 9.018557586274858e-06, "loss": 0.7762, "step": 4563 }, { "epoch": 1.2138297872340424, "grad_norm": 3.841365098953247, "learning_rate": 9.018034201652357e-06, "loss": 0.8042, "step": 4564 }, { "epoch": 1.2140957446808511, "grad_norm": 3.9603569507598877, "learning_rate": 9.017510692707144e-06, "loss": 0.6254, "step": 4565 }, { "epoch": 1.2143617021276596, "grad_norm": 3.6832830905914307, "learning_rate": 9.016987059455422e-06, "loss": 0.7013, "step": 4566 }, { "epoch": 1.214627659574468, "grad_norm": 4.155395030975342, "learning_rate": 9.01646330191339e-06, "loss": 0.8052, "step": 4567 }, { "epoch": 1.2148936170212765, "grad_norm": 3.9648375511169434, "learning_rate": 9.015939420097255e-06, "loss": 0.778, "step": 4568 }, { "epoch": 1.2151595744680852, "grad_norm": 3.8621366024017334, "learning_rate": 9.015415414023226e-06, "loss": 0.7851, "step": 4569 }, { "epoch": 1.2154255319148937, "grad_norm": 4.207528114318848, "learning_rate": 9.014891283707517e-06, "loss": 0.9192, "step": 4570 }, { "epoch": 1.2156914893617021, "grad_norm": 4.204238414764404, "learning_rate": 9.014367029166344e-06, "loss": 0.8175, "step": 4571 }, { "epoch": 1.2159574468085106, "grad_norm": 4.0870537757873535, "learning_rate": 9.013842650415927e-06, "loss": 0.8294, "step": 4572 }, { "epoch": 1.2162234042553193, "grad_norm": 4.164912700653076, "learning_rate": 9.013318147472497e-06, "loss": 0.8457, "step": 4573 }, { "epoch": 1.2164893617021277, "grad_norm": 4.122684478759766, "learning_rate": 9.012793520352276e-06, "loss": 0.7565, "step": 4574 }, { "epoch": 1.2167553191489362, "grad_norm": 4.155274391174316, "learning_rate": 9.012268769071499e-06, "loss": 0.7522, "step": 4575 }, { "epoch": 1.2170212765957447, "grad_norm": 4.182219505310059, "learning_rate": 9.011743893646402e-06, "loss": 0.842, "step": 4576 }, { "epoch": 1.2172872340425531, "grad_norm": 3.9600305557250977, "learning_rate": 9.011218894093226e-06, "loss": 0.7938, "step": 4577 }, { "epoch": 1.2175531914893618, "grad_norm": 3.977374792098999, "learning_rate": 9.010693770428217e-06, "loss": 0.7021, "step": 4578 }, { "epoch": 1.2178191489361703, "grad_norm": 4.227469444274902, "learning_rate": 9.010168522667617e-06, "loss": 0.8016, "step": 4579 }, { "epoch": 1.2180851063829787, "grad_norm": 3.7802317142486572, "learning_rate": 9.009643150827683e-06, "loss": 0.7565, "step": 4580 }, { "epoch": 1.2183510638297872, "grad_norm": 3.9615867137908936, "learning_rate": 9.00911765492467e-06, "loss": 0.8134, "step": 4581 }, { "epoch": 1.2186170212765957, "grad_norm": 3.852104902267456, "learning_rate": 9.008592034974836e-06, "loss": 0.7654, "step": 4582 }, { "epoch": 1.2188829787234043, "grad_norm": 3.5889623165130615, "learning_rate": 9.008066290994443e-06, "loss": 0.816, "step": 4583 }, { "epoch": 1.2191489361702128, "grad_norm": 3.7613863945007324, "learning_rate": 9.007540422999762e-06, "loss": 0.7356, "step": 4584 }, { "epoch": 1.2194148936170213, "grad_norm": 4.141067981719971, "learning_rate": 9.007014431007064e-06, "loss": 0.8445, "step": 4585 }, { "epoch": 1.2196808510638297, "grad_norm": 3.842954635620117, "learning_rate": 9.00648831503262e-06, "loss": 0.7844, "step": 4586 }, { "epoch": 1.2199468085106382, "grad_norm": 3.799661159515381, "learning_rate": 9.00596207509271e-06, "loss": 0.8777, "step": 4587 }, { "epoch": 1.2202127659574469, "grad_norm": 4.335452079772949, "learning_rate": 9.005435711203619e-06, "loss": 0.936, "step": 4588 }, { "epoch": 1.2204787234042553, "grad_norm": 3.905426025390625, "learning_rate": 9.004909223381628e-06, "loss": 0.7583, "step": 4589 }, { "epoch": 1.2207446808510638, "grad_norm": 3.950054168701172, "learning_rate": 9.004382611643032e-06, "loss": 0.8512, "step": 4590 }, { "epoch": 1.2210106382978723, "grad_norm": 4.1044135093688965, "learning_rate": 9.003855876004124e-06, "loss": 0.7941, "step": 4591 }, { "epoch": 1.2212765957446807, "grad_norm": 3.908524751663208, "learning_rate": 9.003329016481201e-06, "loss": 0.7502, "step": 4592 }, { "epoch": 1.2215425531914894, "grad_norm": 3.6956968307495117, "learning_rate": 9.002802033090564e-06, "loss": 0.7847, "step": 4593 }, { "epoch": 1.2218085106382979, "grad_norm": 4.292162895202637, "learning_rate": 9.00227492584852e-06, "loss": 0.7966, "step": 4594 }, { "epoch": 1.2220744680851063, "grad_norm": 4.15654993057251, "learning_rate": 9.001747694771378e-06, "loss": 0.7523, "step": 4595 }, { "epoch": 1.2223404255319148, "grad_norm": 3.5688204765319824, "learning_rate": 9.00122033987545e-06, "loss": 0.6891, "step": 4596 }, { "epoch": 1.2226063829787235, "grad_norm": 3.962028980255127, "learning_rate": 9.000692861177056e-06, "loss": 0.7285, "step": 4597 }, { "epoch": 1.222872340425532, "grad_norm": 4.2762651443481445, "learning_rate": 9.000165258692512e-06, "loss": 0.8359, "step": 4598 }, { "epoch": 1.2231382978723404, "grad_norm": 4.260420799255371, "learning_rate": 8.999637532438145e-06, "loss": 0.9171, "step": 4599 }, { "epoch": 1.2234042553191489, "grad_norm": 4.032958507537842, "learning_rate": 8.999109682430288e-06, "loss": 0.8082, "step": 4600 }, { "epoch": 1.2236702127659576, "grad_norm": 3.772594690322876, "learning_rate": 8.998581708685264e-06, "loss": 0.8029, "step": 4601 }, { "epoch": 1.223936170212766, "grad_norm": 4.074283123016357, "learning_rate": 8.998053611219418e-06, "loss": 0.729, "step": 4602 }, { "epoch": 1.2242021276595745, "grad_norm": 3.5871801376342773, "learning_rate": 8.997525390049084e-06, "loss": 0.8645, "step": 4603 }, { "epoch": 1.224468085106383, "grad_norm": 3.789030075073242, "learning_rate": 8.996997045190608e-06, "loss": 0.7226, "step": 4604 }, { "epoch": 1.2247340425531914, "grad_norm": 3.840949296951294, "learning_rate": 8.996468576660337e-06, "loss": 0.8817, "step": 4605 }, { "epoch": 1.225, "grad_norm": 4.251964569091797, "learning_rate": 8.995939984474624e-06, "loss": 0.7567, "step": 4606 }, { "epoch": 1.2252659574468086, "grad_norm": 3.7050812244415283, "learning_rate": 8.995411268649823e-06, "loss": 0.8609, "step": 4607 }, { "epoch": 1.225531914893617, "grad_norm": 4.209064483642578, "learning_rate": 8.994882429202294e-06, "loss": 0.8653, "step": 4608 }, { "epoch": 1.2257978723404255, "grad_norm": 4.214296340942383, "learning_rate": 8.994353466148399e-06, "loss": 0.8262, "step": 4609 }, { "epoch": 1.226063829787234, "grad_norm": 3.9574646949768066, "learning_rate": 8.993824379504505e-06, "loss": 0.7383, "step": 4610 }, { "epoch": 1.2263297872340426, "grad_norm": 4.194293975830078, "learning_rate": 8.993295169286982e-06, "loss": 0.7483, "step": 4611 }, { "epoch": 1.226595744680851, "grad_norm": 3.9258837699890137, "learning_rate": 8.992765835512205e-06, "loss": 0.7151, "step": 4612 }, { "epoch": 1.2268617021276595, "grad_norm": 3.662429094314575, "learning_rate": 8.992236378196552e-06, "loss": 0.8595, "step": 4613 }, { "epoch": 1.227127659574468, "grad_norm": 3.745591640472412, "learning_rate": 8.991706797356407e-06, "loss": 0.8065, "step": 4614 }, { "epoch": 1.2273936170212765, "grad_norm": 3.8420639038085938, "learning_rate": 8.991177093008153e-06, "loss": 0.7613, "step": 4615 }, { "epoch": 1.2276595744680852, "grad_norm": 3.994805097579956, "learning_rate": 8.990647265168179e-06, "loss": 0.7919, "step": 4616 }, { "epoch": 1.2279255319148936, "grad_norm": 4.0484514236450195, "learning_rate": 8.990117313852882e-06, "loss": 0.9, "step": 4617 }, { "epoch": 1.228191489361702, "grad_norm": 3.999068260192871, "learning_rate": 8.989587239078658e-06, "loss": 0.7472, "step": 4618 }, { "epoch": 1.2284574468085105, "grad_norm": 3.9625680446624756, "learning_rate": 8.989057040861905e-06, "loss": 1.0265, "step": 4619 }, { "epoch": 1.2287234042553192, "grad_norm": 4.0248284339904785, "learning_rate": 8.988526719219035e-06, "loss": 0.7525, "step": 4620 }, { "epoch": 1.2289893617021277, "grad_norm": 3.985003709793091, "learning_rate": 8.987996274166449e-06, "loss": 0.8491, "step": 4621 }, { "epoch": 1.2292553191489362, "grad_norm": 3.5832836627960205, "learning_rate": 8.987465705720565e-06, "loss": 0.6647, "step": 4622 }, { "epoch": 1.2295212765957446, "grad_norm": 3.5431840419769287, "learning_rate": 8.986935013897796e-06, "loss": 0.7142, "step": 4623 }, { "epoch": 1.2297872340425533, "grad_norm": 3.745082139968872, "learning_rate": 8.986404198714561e-06, "loss": 0.6538, "step": 4624 }, { "epoch": 1.2300531914893618, "grad_norm": 3.653146982192993, "learning_rate": 8.98587326018729e-06, "loss": 0.7833, "step": 4625 }, { "epoch": 1.2303191489361702, "grad_norm": 3.9238173961639404, "learning_rate": 8.985342198332407e-06, "loss": 0.8265, "step": 4626 }, { "epoch": 1.2305851063829787, "grad_norm": 4.6217265129089355, "learning_rate": 8.984811013166345e-06, "loss": 0.9442, "step": 4627 }, { "epoch": 1.2308510638297872, "grad_norm": 3.7040395736694336, "learning_rate": 8.98427970470554e-06, "loss": 0.8234, "step": 4628 }, { "epoch": 1.2311170212765958, "grad_norm": 3.8721320629119873, "learning_rate": 8.983748272966426e-06, "loss": 0.8997, "step": 4629 }, { "epoch": 1.2313829787234043, "grad_norm": 3.5621466636657715, "learning_rate": 8.983216717965453e-06, "loss": 0.8186, "step": 4630 }, { "epoch": 1.2316489361702128, "grad_norm": 3.854879379272461, "learning_rate": 8.982685039719064e-06, "loss": 0.773, "step": 4631 }, { "epoch": 1.2319148936170212, "grad_norm": 3.9702491760253906, "learning_rate": 8.982153238243712e-06, "loss": 0.8645, "step": 4632 }, { "epoch": 1.2321808510638297, "grad_norm": 4.122603416442871, "learning_rate": 8.981621313555849e-06, "loss": 0.7651, "step": 4633 }, { "epoch": 1.2324468085106384, "grad_norm": 4.362513065338135, "learning_rate": 8.981089265671936e-06, "loss": 0.8279, "step": 4634 }, { "epoch": 1.2327127659574468, "grad_norm": 4.333089351654053, "learning_rate": 8.980557094608433e-06, "loss": 0.8613, "step": 4635 }, { "epoch": 1.2329787234042553, "grad_norm": 3.9214844703674316, "learning_rate": 8.980024800381807e-06, "loss": 0.8316, "step": 4636 }, { "epoch": 1.2332446808510638, "grad_norm": 3.9786224365234375, "learning_rate": 8.979492383008528e-06, "loss": 0.8405, "step": 4637 }, { "epoch": 1.2335106382978722, "grad_norm": 4.105279445648193, "learning_rate": 8.978959842505071e-06, "loss": 0.8187, "step": 4638 }, { "epoch": 1.233776595744681, "grad_norm": 4.662153244018555, "learning_rate": 8.97842717888791e-06, "loss": 0.8309, "step": 4639 }, { "epoch": 1.2340425531914894, "grad_norm": 4.0390400886535645, "learning_rate": 8.977894392173527e-06, "loss": 0.823, "step": 4640 }, { "epoch": 1.2343085106382978, "grad_norm": 3.574883222579956, "learning_rate": 8.97736148237841e-06, "loss": 0.899, "step": 4641 }, { "epoch": 1.2345744680851063, "grad_norm": 3.9242796897888184, "learning_rate": 8.976828449519047e-06, "loss": 0.9994, "step": 4642 }, { "epoch": 1.234840425531915, "grad_norm": 3.9096062183380127, "learning_rate": 8.976295293611927e-06, "loss": 0.907, "step": 4643 }, { "epoch": 1.2351063829787234, "grad_norm": 4.211862087249756, "learning_rate": 8.97576201467355e-06, "loss": 0.807, "step": 4644 }, { "epoch": 1.235372340425532, "grad_norm": 3.7779862880706787, "learning_rate": 8.975228612720415e-06, "loss": 0.7325, "step": 4645 }, { "epoch": 1.2356382978723404, "grad_norm": 4.162439823150635, "learning_rate": 8.974695087769027e-06, "loss": 0.9018, "step": 4646 }, { "epoch": 1.235904255319149, "grad_norm": 3.9376440048217773, "learning_rate": 8.974161439835894e-06, "loss": 0.7467, "step": 4647 }, { "epoch": 1.2361702127659575, "grad_norm": 3.728128433227539, "learning_rate": 8.973627668937528e-06, "loss": 0.6471, "step": 4648 }, { "epoch": 1.236436170212766, "grad_norm": 4.1924967765808105, "learning_rate": 8.97309377509044e-06, "loss": 0.8827, "step": 4649 }, { "epoch": 1.2367021276595744, "grad_norm": 3.9644808769226074, "learning_rate": 8.972559758311156e-06, "loss": 0.737, "step": 4650 }, { "epoch": 1.236968085106383, "grad_norm": 4.276489734649658, "learning_rate": 8.972025618616195e-06, "loss": 0.7805, "step": 4651 }, { "epoch": 1.2372340425531916, "grad_norm": 4.115257263183594, "learning_rate": 8.971491356022086e-06, "loss": 0.8479, "step": 4652 }, { "epoch": 1.2375, "grad_norm": 4.143589019775391, "learning_rate": 8.970956970545356e-06, "loss": 0.7716, "step": 4653 }, { "epoch": 1.2377659574468085, "grad_norm": 3.872377634048462, "learning_rate": 8.970422462202543e-06, "loss": 0.7949, "step": 4654 }, { "epoch": 1.238031914893617, "grad_norm": 3.9074594974517822, "learning_rate": 8.969887831010185e-06, "loss": 0.818, "step": 4655 }, { "epoch": 1.2382978723404254, "grad_norm": 3.7083117961883545, "learning_rate": 8.969353076984823e-06, "loss": 0.823, "step": 4656 }, { "epoch": 1.2385638297872341, "grad_norm": 3.952829122543335, "learning_rate": 8.968818200143005e-06, "loss": 0.7928, "step": 4657 }, { "epoch": 1.2388297872340426, "grad_norm": 4.015969276428223, "learning_rate": 8.96828320050128e-06, "loss": 0.8713, "step": 4658 }, { "epoch": 1.239095744680851, "grad_norm": 4.456661701202393, "learning_rate": 8.967748078076197e-06, "loss": 0.8482, "step": 4659 }, { "epoch": 1.2393617021276595, "grad_norm": 3.8664846420288086, "learning_rate": 8.96721283288432e-06, "loss": 0.7526, "step": 4660 }, { "epoch": 1.239627659574468, "grad_norm": 4.358894348144531, "learning_rate": 8.966677464942206e-06, "loss": 0.7756, "step": 4661 }, { "epoch": 1.2398936170212767, "grad_norm": 3.8991811275482178, "learning_rate": 8.96614197426642e-06, "loss": 0.7629, "step": 4662 }, { "epoch": 1.2401595744680851, "grad_norm": 3.752913236618042, "learning_rate": 8.965606360873533e-06, "loss": 0.7598, "step": 4663 }, { "epoch": 1.2404255319148936, "grad_norm": 4.097616672515869, "learning_rate": 8.965070624780117e-06, "loss": 0.7635, "step": 4664 }, { "epoch": 1.240691489361702, "grad_norm": 3.855180025100708, "learning_rate": 8.964534766002747e-06, "loss": 0.8571, "step": 4665 }, { "epoch": 1.2409574468085107, "grad_norm": 4.117387771606445, "learning_rate": 8.963998784558001e-06, "loss": 0.8517, "step": 4666 }, { "epoch": 1.2412234042553192, "grad_norm": 4.247325897216797, "learning_rate": 8.963462680462469e-06, "loss": 0.7862, "step": 4667 }, { "epoch": 1.2414893617021276, "grad_norm": 4.604616165161133, "learning_rate": 8.962926453732734e-06, "loss": 0.8325, "step": 4668 }, { "epoch": 1.241755319148936, "grad_norm": 4.283206462860107, "learning_rate": 8.96239010438539e-06, "loss": 0.7897, "step": 4669 }, { "epoch": 1.2420212765957448, "grad_norm": 4.039552688598633, "learning_rate": 8.96185363243703e-06, "loss": 0.8889, "step": 4670 }, { "epoch": 1.2422872340425533, "grad_norm": 3.6952388286590576, "learning_rate": 8.961317037904253e-06, "loss": 0.7318, "step": 4671 }, { "epoch": 1.2425531914893617, "grad_norm": 4.330514907836914, "learning_rate": 8.960780320803665e-06, "loss": 0.8473, "step": 4672 }, { "epoch": 1.2428191489361702, "grad_norm": 3.8652656078338623, "learning_rate": 8.960243481151869e-06, "loss": 0.7744, "step": 4673 }, { "epoch": 1.2430851063829786, "grad_norm": 4.232844352722168, "learning_rate": 8.959706518965479e-06, "loss": 0.7232, "step": 4674 }, { "epoch": 1.2433510638297873, "grad_norm": 3.9439735412597656, "learning_rate": 8.959169434261106e-06, "loss": 0.7025, "step": 4675 }, { "epoch": 1.2436170212765958, "grad_norm": 3.876521587371826, "learning_rate": 8.958632227055369e-06, "loss": 0.6779, "step": 4676 }, { "epoch": 1.2438829787234043, "grad_norm": 3.7715842723846436, "learning_rate": 8.95809489736489e-06, "loss": 0.7331, "step": 4677 }, { "epoch": 1.2441489361702127, "grad_norm": 4.344306945800781, "learning_rate": 8.957557445206297e-06, "loss": 0.797, "step": 4678 }, { "epoch": 1.2444148936170212, "grad_norm": 3.924248218536377, "learning_rate": 8.957019870596216e-06, "loss": 0.9321, "step": 4679 }, { "epoch": 1.2446808510638299, "grad_norm": 3.8048911094665527, "learning_rate": 8.956482173551281e-06, "loss": 0.7405, "step": 4680 }, { "epoch": 1.2449468085106383, "grad_norm": 4.218112468719482, "learning_rate": 8.95594435408813e-06, "loss": 0.8395, "step": 4681 }, { "epoch": 1.2452127659574468, "grad_norm": 3.683992385864258, "learning_rate": 8.955406412223402e-06, "loss": 0.7261, "step": 4682 }, { "epoch": 1.2454787234042553, "grad_norm": 4.05771541595459, "learning_rate": 8.954868347973742e-06, "loss": 0.85, "step": 4683 }, { "epoch": 1.2457446808510637, "grad_norm": 4.423064708709717, "learning_rate": 8.954330161355803e-06, "loss": 0.8632, "step": 4684 }, { "epoch": 1.2460106382978724, "grad_norm": 4.039585113525391, "learning_rate": 8.953791852386229e-06, "loss": 0.8078, "step": 4685 }, { "epoch": 1.2462765957446809, "grad_norm": 4.336376190185547, "learning_rate": 8.953253421081682e-06, "loss": 0.807, "step": 4686 }, { "epoch": 1.2465425531914893, "grad_norm": 4.025651454925537, "learning_rate": 8.95271486745882e-06, "loss": 0.8651, "step": 4687 }, { "epoch": 1.2468085106382978, "grad_norm": 3.839545488357544, "learning_rate": 8.952176191534305e-06, "loss": 0.7696, "step": 4688 }, { "epoch": 1.2470744680851065, "grad_norm": 3.4037442207336426, "learning_rate": 8.951637393324806e-06, "loss": 0.7827, "step": 4689 }, { "epoch": 1.247340425531915, "grad_norm": 4.202190399169922, "learning_rate": 8.951098472846994e-06, "loss": 0.6717, "step": 4690 }, { "epoch": 1.2476063829787234, "grad_norm": 4.145596027374268, "learning_rate": 8.950559430117542e-06, "loss": 0.8201, "step": 4691 }, { "epoch": 1.2478723404255319, "grad_norm": 4.066543102264404, "learning_rate": 8.950020265153133e-06, "loss": 0.7651, "step": 4692 }, { "epoch": 1.2481382978723405, "grad_norm": 3.9612643718719482, "learning_rate": 8.949480977970444e-06, "loss": 0.7625, "step": 4693 }, { "epoch": 1.248404255319149, "grad_norm": 3.6797444820404053, "learning_rate": 8.948941568586165e-06, "loss": 0.7396, "step": 4694 }, { "epoch": 1.2486702127659575, "grad_norm": 4.5470662117004395, "learning_rate": 8.948402037016984e-06, "loss": 0.831, "step": 4695 }, { "epoch": 1.248936170212766, "grad_norm": 3.3565194606781006, "learning_rate": 8.947862383279594e-06, "loss": 0.6773, "step": 4696 }, { "epoch": 1.2492021276595744, "grad_norm": 4.042359352111816, "learning_rate": 8.947322607390694e-06, "loss": 0.8052, "step": 4697 }, { "epoch": 1.249468085106383, "grad_norm": 3.909513235092163, "learning_rate": 8.946782709366988e-06, "loss": 0.8849, "step": 4698 }, { "epoch": 1.2497340425531915, "grad_norm": 4.553561687469482, "learning_rate": 8.946242689225175e-06, "loss": 0.9048, "step": 4699 }, { "epoch": 1.25, "grad_norm": 4.289936542510986, "learning_rate": 8.94570254698197e-06, "loss": 0.8465, "step": 4700 }, { "epoch": 1.2502659574468085, "grad_norm": 3.7364187240600586, "learning_rate": 8.94516228265408e-06, "loss": 0.9081, "step": 4701 }, { "epoch": 1.250531914893617, "grad_norm": 3.8869049549102783, "learning_rate": 8.944621896258226e-06, "loss": 0.7625, "step": 4702 }, { "epoch": 1.2507978723404256, "grad_norm": 4.203104019165039, "learning_rate": 8.944081387811126e-06, "loss": 0.7822, "step": 4703 }, { "epoch": 1.251063829787234, "grad_norm": 3.810011148452759, "learning_rate": 8.943540757329503e-06, "loss": 0.9403, "step": 4704 }, { "epoch": 1.2513297872340425, "grad_norm": 3.795477867126465, "learning_rate": 8.943000004830087e-06, "loss": 0.7856, "step": 4705 }, { "epoch": 1.251595744680851, "grad_norm": 4.174344062805176, "learning_rate": 8.942459130329608e-06, "loss": 0.8522, "step": 4706 }, { "epoch": 1.2518617021276595, "grad_norm": 3.6374874114990234, "learning_rate": 8.941918133844803e-06, "loss": 0.8471, "step": 4707 }, { "epoch": 1.2521276595744681, "grad_norm": 3.645719528198242, "learning_rate": 8.941377015392407e-06, "loss": 0.7564, "step": 4708 }, { "epoch": 1.2523936170212766, "grad_norm": 4.238284587860107, "learning_rate": 8.94083577498917e-06, "loss": 0.9556, "step": 4709 }, { "epoch": 1.252659574468085, "grad_norm": 4.101098537445068, "learning_rate": 8.940294412651831e-06, "loss": 0.9095, "step": 4710 }, { "epoch": 1.2529255319148938, "grad_norm": 3.56626296043396, "learning_rate": 8.939752928397146e-06, "loss": 0.7358, "step": 4711 }, { "epoch": 1.253191489361702, "grad_norm": 3.680903434753418, "learning_rate": 8.939211322241866e-06, "loss": 0.7556, "step": 4712 }, { "epoch": 1.2534574468085107, "grad_norm": 4.173125267028809, "learning_rate": 8.938669594202748e-06, "loss": 0.7488, "step": 4713 }, { "epoch": 1.2537234042553191, "grad_norm": 4.197647571563721, "learning_rate": 8.938127744296559e-06, "loss": 0.8367, "step": 4714 }, { "epoch": 1.2539893617021276, "grad_norm": 3.5184898376464844, "learning_rate": 8.937585772540058e-06, "loss": 0.7586, "step": 4715 }, { "epoch": 1.2542553191489363, "grad_norm": 4.331880569458008, "learning_rate": 8.93704367895002e-06, "loss": 0.9277, "step": 4716 }, { "epoch": 1.2545212765957447, "grad_norm": 4.3062238693237305, "learning_rate": 8.936501463543213e-06, "loss": 0.7798, "step": 4717 }, { "epoch": 1.2547872340425532, "grad_norm": 4.3987956047058105, "learning_rate": 8.935959126336418e-06, "loss": 0.8121, "step": 4718 }, { "epoch": 1.2550531914893617, "grad_norm": 3.8964762687683105, "learning_rate": 8.935416667346412e-06, "loss": 0.8318, "step": 4719 }, { "epoch": 1.2553191489361701, "grad_norm": 4.110397815704346, "learning_rate": 8.934874086589981e-06, "loss": 0.7502, "step": 4720 }, { "epoch": 1.2555851063829788, "grad_norm": 3.531947135925293, "learning_rate": 8.934331384083914e-06, "loss": 0.7613, "step": 4721 }, { "epoch": 1.2558510638297873, "grad_norm": 3.8877408504486084, "learning_rate": 8.933788559845001e-06, "loss": 0.7568, "step": 4722 }, { "epoch": 1.2561170212765957, "grad_norm": 3.653062582015991, "learning_rate": 8.93324561389004e-06, "loss": 0.7156, "step": 4723 }, { "epoch": 1.2563829787234042, "grad_norm": 3.9823882579803467, "learning_rate": 8.932702546235827e-06, "loss": 0.8349, "step": 4724 }, { "epoch": 1.2566489361702127, "grad_norm": 3.867664337158203, "learning_rate": 8.932159356899169e-06, "loss": 0.7605, "step": 4725 }, { "epoch": 1.2569148936170214, "grad_norm": 3.945042371749878, "learning_rate": 8.93161604589687e-06, "loss": 0.698, "step": 4726 }, { "epoch": 1.2571808510638298, "grad_norm": 4.207972049713135, "learning_rate": 8.93107261324574e-06, "loss": 0.9514, "step": 4727 }, { "epoch": 1.2574468085106383, "grad_norm": 3.8403220176696777, "learning_rate": 8.930529058962597e-06, "loss": 0.7912, "step": 4728 }, { "epoch": 1.2577127659574467, "grad_norm": 3.9817752838134766, "learning_rate": 8.929985383064257e-06, "loss": 0.752, "step": 4729 }, { "epoch": 1.2579787234042552, "grad_norm": 3.786790132522583, "learning_rate": 8.929441585567543e-06, "loss": 0.7753, "step": 4730 }, { "epoch": 1.258244680851064, "grad_norm": 3.5705316066741943, "learning_rate": 8.928897666489278e-06, "loss": 0.6983, "step": 4731 }, { "epoch": 1.2585106382978724, "grad_norm": 3.8111605644226074, "learning_rate": 8.928353625846294e-06, "loss": 0.9261, "step": 4732 }, { "epoch": 1.2587765957446808, "grad_norm": 3.8016891479492188, "learning_rate": 8.927809463655424e-06, "loss": 0.9297, "step": 4733 }, { "epoch": 1.2590425531914895, "grad_norm": 3.998060941696167, "learning_rate": 8.927265179933506e-06, "loss": 0.8105, "step": 4734 }, { "epoch": 1.2593085106382977, "grad_norm": 3.4611032009124756, "learning_rate": 8.926720774697379e-06, "loss": 0.7404, "step": 4735 }, { "epoch": 1.2595744680851064, "grad_norm": 4.086428165435791, "learning_rate": 8.926176247963886e-06, "loss": 0.7905, "step": 4736 }, { "epoch": 1.2598404255319149, "grad_norm": 4.124720573425293, "learning_rate": 8.92563159974988e-06, "loss": 0.9439, "step": 4737 }, { "epoch": 1.2601063829787233, "grad_norm": 3.536327600479126, "learning_rate": 8.92508683007221e-06, "loss": 0.7992, "step": 4738 }, { "epoch": 1.260372340425532, "grad_norm": 3.884551763534546, "learning_rate": 8.924541938947731e-06, "loss": 0.8708, "step": 4739 }, { "epoch": 1.2606382978723405, "grad_norm": 4.106461048126221, "learning_rate": 8.923996926393306e-06, "loss": 0.8013, "step": 4740 }, { "epoch": 1.260904255319149, "grad_norm": 3.6707823276519775, "learning_rate": 8.923451792425795e-06, "loss": 0.7818, "step": 4741 }, { "epoch": 1.2611702127659574, "grad_norm": 4.26462984085083, "learning_rate": 8.922906537062066e-06, "loss": 0.9622, "step": 4742 }, { "epoch": 1.2614361702127659, "grad_norm": 4.356677055358887, "learning_rate": 8.92236116031899e-06, "loss": 0.9918, "step": 4743 }, { "epoch": 1.2617021276595746, "grad_norm": 3.735673427581787, "learning_rate": 8.921815662213442e-06, "loss": 0.6767, "step": 4744 }, { "epoch": 1.261968085106383, "grad_norm": 3.9601590633392334, "learning_rate": 8.9212700427623e-06, "loss": 0.8667, "step": 4745 }, { "epoch": 1.2622340425531915, "grad_norm": 3.9646952152252197, "learning_rate": 8.920724301982446e-06, "loss": 0.7383, "step": 4746 }, { "epoch": 1.2625, "grad_norm": 3.402167320251465, "learning_rate": 8.920178439890765e-06, "loss": 0.7373, "step": 4747 }, { "epoch": 1.2627659574468084, "grad_norm": 4.096093654632568, "learning_rate": 8.91963245650415e-06, "loss": 0.7765, "step": 4748 }, { "epoch": 1.263031914893617, "grad_norm": 3.612751007080078, "learning_rate": 8.91908635183949e-06, "loss": 0.8401, "step": 4749 }, { "epoch": 1.2632978723404256, "grad_norm": 4.043914318084717, "learning_rate": 8.918540125913686e-06, "loss": 0.7371, "step": 4750 }, { "epoch": 1.263563829787234, "grad_norm": 3.865091562271118, "learning_rate": 8.917993778743636e-06, "loss": 0.6962, "step": 4751 }, { "epoch": 1.2638297872340425, "grad_norm": 4.154531478881836, "learning_rate": 8.917447310346245e-06, "loss": 0.8158, "step": 4752 }, { "epoch": 1.264095744680851, "grad_norm": 3.6052658557891846, "learning_rate": 8.916900720738423e-06, "loss": 0.7131, "step": 4753 }, { "epoch": 1.2643617021276596, "grad_norm": 4.163410186767578, "learning_rate": 8.916354009937081e-06, "loss": 0.8955, "step": 4754 }, { "epoch": 1.264627659574468, "grad_norm": 3.979421377182007, "learning_rate": 8.915807177959133e-06, "loss": 0.8712, "step": 4755 }, { "epoch": 1.2648936170212766, "grad_norm": 3.4931585788726807, "learning_rate": 8.915260224821504e-06, "loss": 0.8079, "step": 4756 }, { "epoch": 1.265159574468085, "grad_norm": 3.8094661235809326, "learning_rate": 8.914713150541113e-06, "loss": 0.8143, "step": 4757 }, { "epoch": 1.2654255319148935, "grad_norm": 4.149999618530273, "learning_rate": 8.914165955134886e-06, "loss": 0.789, "step": 4758 }, { "epoch": 1.2656914893617022, "grad_norm": 3.9979913234710693, "learning_rate": 8.913618638619757e-06, "loss": 0.8312, "step": 4759 }, { "epoch": 1.2659574468085106, "grad_norm": 4.05308723449707, "learning_rate": 8.91307120101266e-06, "loss": 0.8029, "step": 4760 }, { "epoch": 1.266223404255319, "grad_norm": 4.013595104217529, "learning_rate": 8.912523642330533e-06, "loss": 0.8625, "step": 4761 }, { "epoch": 1.2664893617021278, "grad_norm": 3.932847023010254, "learning_rate": 8.911975962590319e-06, "loss": 0.8532, "step": 4762 }, { "epoch": 1.2667553191489362, "grad_norm": 4.163691520690918, "learning_rate": 8.911428161808962e-06, "loss": 0.9048, "step": 4763 }, { "epoch": 1.2670212765957447, "grad_norm": 4.368598461151123, "learning_rate": 8.910880240003413e-06, "loss": 0.7907, "step": 4764 }, { "epoch": 1.2672872340425532, "grad_norm": 4.071594715118408, "learning_rate": 8.910332197190623e-06, "loss": 0.8764, "step": 4765 }, { "epoch": 1.2675531914893616, "grad_norm": 3.6952078342437744, "learning_rate": 8.909784033387552e-06, "loss": 0.8343, "step": 4766 }, { "epoch": 1.2678191489361703, "grad_norm": 3.967707872390747, "learning_rate": 8.909235748611161e-06, "loss": 0.7465, "step": 4767 }, { "epoch": 1.2680851063829788, "grad_norm": 4.079662799835205, "learning_rate": 8.908687342878413e-06, "loss": 0.8126, "step": 4768 }, { "epoch": 1.2683510638297872, "grad_norm": 3.95373272895813, "learning_rate": 8.908138816206275e-06, "loss": 0.7309, "step": 4769 }, { "epoch": 1.2686170212765957, "grad_norm": 3.959603786468506, "learning_rate": 8.907590168611724e-06, "loss": 0.7635, "step": 4770 }, { "epoch": 1.2688829787234042, "grad_norm": 3.9669322967529297, "learning_rate": 8.90704140011173e-06, "loss": 0.9031, "step": 4771 }, { "epoch": 1.2691489361702128, "grad_norm": 4.063694477081299, "learning_rate": 8.906492510723276e-06, "loss": 0.8292, "step": 4772 }, { "epoch": 1.2694148936170213, "grad_norm": 3.9221720695495605, "learning_rate": 8.905943500463344e-06, "loss": 0.7683, "step": 4773 }, { "epoch": 1.2696808510638298, "grad_norm": 3.9919097423553467, "learning_rate": 8.905394369348921e-06, "loss": 0.7647, "step": 4774 }, { "epoch": 1.2699468085106382, "grad_norm": 3.8253092765808105, "learning_rate": 8.904845117397e-06, "loss": 0.7056, "step": 4775 }, { "epoch": 1.2702127659574467, "grad_norm": 3.5580105781555176, "learning_rate": 8.904295744624572e-06, "loss": 0.7939, "step": 4776 }, { "epoch": 1.2704787234042554, "grad_norm": 3.987231492996216, "learning_rate": 8.903746251048638e-06, "loss": 0.8708, "step": 4777 }, { "epoch": 1.2707446808510638, "grad_norm": 3.8669490814208984, "learning_rate": 8.903196636686198e-06, "loss": 0.776, "step": 4778 }, { "epoch": 1.2710106382978723, "grad_norm": 3.940711259841919, "learning_rate": 8.902646901554258e-06, "loss": 0.7831, "step": 4779 }, { "epoch": 1.2712765957446808, "grad_norm": 4.304079055786133, "learning_rate": 8.90209704566983e-06, "loss": 0.8243, "step": 4780 }, { "epoch": 1.2715425531914892, "grad_norm": 4.165473937988281, "learning_rate": 8.901547069049924e-06, "loss": 0.8804, "step": 4781 }, { "epoch": 1.271808510638298, "grad_norm": 3.84690260887146, "learning_rate": 8.900996971711558e-06, "loss": 0.8067, "step": 4782 }, { "epoch": 1.2720744680851064, "grad_norm": 3.9118542671203613, "learning_rate": 8.900446753671754e-06, "loss": 0.8676, "step": 4783 }, { "epoch": 1.2723404255319148, "grad_norm": 4.110815525054932, "learning_rate": 8.899896414947534e-06, "loss": 0.6605, "step": 4784 }, { "epoch": 1.2726063829787235, "grad_norm": 3.7008938789367676, "learning_rate": 8.899345955555928e-06, "loss": 0.7201, "step": 4785 }, { "epoch": 1.272872340425532, "grad_norm": 4.3613691329956055, "learning_rate": 8.898795375513966e-06, "loss": 0.806, "step": 4786 }, { "epoch": 1.2731382978723405, "grad_norm": 4.315506458282471, "learning_rate": 8.898244674838687e-06, "loss": 0.8599, "step": 4787 }, { "epoch": 1.273404255319149, "grad_norm": 3.8863260746002197, "learning_rate": 8.897693853547127e-06, "loss": 0.7735, "step": 4788 }, { "epoch": 1.2736702127659574, "grad_norm": 4.221061706542969, "learning_rate": 8.89714291165633e-06, "loss": 0.9449, "step": 4789 }, { "epoch": 1.273936170212766, "grad_norm": 3.727510929107666, "learning_rate": 8.896591849183343e-06, "loss": 0.8311, "step": 4790 }, { "epoch": 1.2742021276595745, "grad_norm": 3.9543018341064453, "learning_rate": 8.896040666145218e-06, "loss": 0.6876, "step": 4791 }, { "epoch": 1.274468085106383, "grad_norm": 3.7465333938598633, "learning_rate": 8.895489362559007e-06, "loss": 0.7677, "step": 4792 }, { "epoch": 1.2747340425531914, "grad_norm": 4.069217205047607, "learning_rate": 8.894937938441768e-06, "loss": 0.8168, "step": 4793 }, { "epoch": 1.275, "grad_norm": 4.367965221405029, "learning_rate": 8.894386393810563e-06, "loss": 0.7627, "step": 4794 }, { "epoch": 1.2752659574468086, "grad_norm": 3.4115452766418457, "learning_rate": 8.893834728682459e-06, "loss": 0.6498, "step": 4795 }, { "epoch": 1.275531914893617, "grad_norm": 3.94594669342041, "learning_rate": 8.893282943074524e-06, "loss": 0.7735, "step": 4796 }, { "epoch": 1.2757978723404255, "grad_norm": 3.6856279373168945, "learning_rate": 8.89273103700383e-06, "loss": 0.8616, "step": 4797 }, { "epoch": 1.276063829787234, "grad_norm": 3.8516628742218018, "learning_rate": 8.892179010487456e-06, "loss": 0.8549, "step": 4798 }, { "epoch": 1.2763297872340424, "grad_norm": 4.085914611816406, "learning_rate": 8.891626863542479e-06, "loss": 0.7623, "step": 4799 }, { "epoch": 1.2765957446808511, "grad_norm": 3.8456547260284424, "learning_rate": 8.891074596185987e-06, "loss": 0.8117, "step": 4800 }, { "epoch": 1.2768617021276596, "grad_norm": 4.302917003631592, "learning_rate": 8.890522208435067e-06, "loss": 0.8329, "step": 4801 }, { "epoch": 1.277127659574468, "grad_norm": 4.0489912033081055, "learning_rate": 8.889969700306807e-06, "loss": 0.8957, "step": 4802 }, { "epoch": 1.2773936170212765, "grad_norm": 4.2099199295043945, "learning_rate": 8.889417071818306e-06, "loss": 0.7582, "step": 4803 }, { "epoch": 1.277659574468085, "grad_norm": 3.925480842590332, "learning_rate": 8.888864322986658e-06, "loss": 0.814, "step": 4804 }, { "epoch": 1.2779255319148937, "grad_norm": 3.9066643714904785, "learning_rate": 8.888311453828973e-06, "loss": 0.798, "step": 4805 }, { "epoch": 1.2781914893617021, "grad_norm": 3.6610445976257324, "learning_rate": 8.887758464362352e-06, "loss": 0.708, "step": 4806 }, { "epoch": 1.2784574468085106, "grad_norm": 3.639225482940674, "learning_rate": 8.887205354603908e-06, "loss": 0.9377, "step": 4807 }, { "epoch": 1.2787234042553193, "grad_norm": 4.213227272033691, "learning_rate": 8.886652124570753e-06, "loss": 0.8664, "step": 4808 }, { "epoch": 1.2789893617021277, "grad_norm": 3.916071653366089, "learning_rate": 8.886098774280006e-06, "loss": 0.8438, "step": 4809 }, { "epoch": 1.2792553191489362, "grad_norm": 3.6656155586242676, "learning_rate": 8.885545303748786e-06, "loss": 0.8395, "step": 4810 }, { "epoch": 1.2795212765957447, "grad_norm": 3.8457565307617188, "learning_rate": 8.884991712994223e-06, "loss": 0.7528, "step": 4811 }, { "epoch": 1.2797872340425531, "grad_norm": 4.223479270935059, "learning_rate": 8.88443800203344e-06, "loss": 0.8702, "step": 4812 }, { "epoch": 1.2800531914893618, "grad_norm": 3.9296419620513916, "learning_rate": 8.88388417088357e-06, "loss": 0.8804, "step": 4813 }, { "epoch": 1.2803191489361703, "grad_norm": 4.048618316650391, "learning_rate": 8.883330219561754e-06, "loss": 0.8696, "step": 4814 }, { "epoch": 1.2805851063829787, "grad_norm": 3.960580825805664, "learning_rate": 8.882776148085129e-06, "loss": 0.7783, "step": 4815 }, { "epoch": 1.2808510638297872, "grad_norm": 4.032505035400391, "learning_rate": 8.882221956470838e-06, "loss": 0.8208, "step": 4816 }, { "epoch": 1.2811170212765957, "grad_norm": 4.192906379699707, "learning_rate": 8.881667644736028e-06, "loss": 0.8411, "step": 4817 }, { "epoch": 1.2813829787234043, "grad_norm": 3.9931344985961914, "learning_rate": 8.881113212897851e-06, "loss": 0.8844, "step": 4818 }, { "epoch": 1.2816489361702128, "grad_norm": 4.1028923988342285, "learning_rate": 8.880558660973462e-06, "loss": 0.7664, "step": 4819 }, { "epoch": 1.2819148936170213, "grad_norm": 4.039322376251221, "learning_rate": 8.880003988980019e-06, "loss": 0.8436, "step": 4820 }, { "epoch": 1.2821808510638297, "grad_norm": 4.0381388664245605, "learning_rate": 8.879449196934687e-06, "loss": 0.749, "step": 4821 }, { "epoch": 1.2824468085106382, "grad_norm": 4.3847222328186035, "learning_rate": 8.878894284854626e-06, "loss": 0.8086, "step": 4822 }, { "epoch": 1.2827127659574469, "grad_norm": 4.213246822357178, "learning_rate": 8.878339252757011e-06, "loss": 0.9063, "step": 4823 }, { "epoch": 1.2829787234042553, "grad_norm": 4.628039360046387, "learning_rate": 8.877784100659013e-06, "loss": 0.9035, "step": 4824 }, { "epoch": 1.2832446808510638, "grad_norm": 3.940800905227661, "learning_rate": 8.877228828577809e-06, "loss": 0.8975, "step": 4825 }, { "epoch": 1.2835106382978723, "grad_norm": 3.82865571975708, "learning_rate": 8.87667343653058e-06, "loss": 0.7283, "step": 4826 }, { "epoch": 1.2837765957446807, "grad_norm": 4.173588752746582, "learning_rate": 8.876117924534511e-06, "loss": 0.8323, "step": 4827 }, { "epoch": 1.2840425531914894, "grad_norm": 3.6624155044555664, "learning_rate": 8.87556229260679e-06, "loss": 0.8799, "step": 4828 }, { "epoch": 1.2843085106382979, "grad_norm": 3.8801040649414062, "learning_rate": 8.875006540764607e-06, "loss": 0.7246, "step": 4829 }, { "epoch": 1.2845744680851063, "grad_norm": 3.9223177433013916, "learning_rate": 8.874450669025161e-06, "loss": 0.8083, "step": 4830 }, { "epoch": 1.284840425531915, "grad_norm": 3.640429735183716, "learning_rate": 8.87389467740565e-06, "loss": 0.8996, "step": 4831 }, { "epoch": 1.2851063829787235, "grad_norm": 3.7746853828430176, "learning_rate": 8.873338565923275e-06, "loss": 0.6899, "step": 4832 }, { "epoch": 1.285372340425532, "grad_norm": 4.439557075500488, "learning_rate": 8.872782334595246e-06, "loss": 0.9741, "step": 4833 }, { "epoch": 1.2856382978723404, "grad_norm": 4.051036834716797, "learning_rate": 8.872225983438774e-06, "loss": 0.8935, "step": 4834 }, { "epoch": 1.2859042553191489, "grad_norm": 4.3584370613098145, "learning_rate": 8.871669512471068e-06, "loss": 0.8499, "step": 4835 }, { "epoch": 1.2861702127659576, "grad_norm": 3.96370792388916, "learning_rate": 8.87111292170935e-06, "loss": 0.8756, "step": 4836 }, { "epoch": 1.286436170212766, "grad_norm": 3.8416450023651123, "learning_rate": 8.87055621117084e-06, "loss": 0.7347, "step": 4837 }, { "epoch": 1.2867021276595745, "grad_norm": 3.84533429145813, "learning_rate": 8.869999380872765e-06, "loss": 0.7894, "step": 4838 }, { "epoch": 1.286968085106383, "grad_norm": 4.616893768310547, "learning_rate": 8.869442430832351e-06, "loss": 0.8618, "step": 4839 }, { "epoch": 1.2872340425531914, "grad_norm": 3.9372458457946777, "learning_rate": 8.868885361066835e-06, "loss": 0.785, "step": 4840 }, { "epoch": 1.2875, "grad_norm": 3.895632743835449, "learning_rate": 8.868328171593448e-06, "loss": 0.7812, "step": 4841 }, { "epoch": 1.2877659574468086, "grad_norm": 4.029928684234619, "learning_rate": 8.867770862429434e-06, "loss": 0.8724, "step": 4842 }, { "epoch": 1.288031914893617, "grad_norm": 3.8094303607940674, "learning_rate": 8.867213433592037e-06, "loss": 0.791, "step": 4843 }, { "epoch": 1.2882978723404255, "grad_norm": 3.862415313720703, "learning_rate": 8.866655885098502e-06, "loss": 0.8223, "step": 4844 }, { "epoch": 1.288563829787234, "grad_norm": 4.023502826690674, "learning_rate": 8.866098216966081e-06, "loss": 0.8339, "step": 4845 }, { "epoch": 1.2888297872340426, "grad_norm": 3.7530012130737305, "learning_rate": 8.865540429212031e-06, "loss": 0.7766, "step": 4846 }, { "epoch": 1.289095744680851, "grad_norm": 3.7417378425598145, "learning_rate": 8.864982521853609e-06, "loss": 0.9348, "step": 4847 }, { "epoch": 1.2893617021276595, "grad_norm": 4.337246417999268, "learning_rate": 8.864424494908076e-06, "loss": 0.8423, "step": 4848 }, { "epoch": 1.289627659574468, "grad_norm": 4.149337291717529, "learning_rate": 8.8638663483927e-06, "loss": 0.9212, "step": 4849 }, { "epoch": 1.2898936170212765, "grad_norm": 4.155276298522949, "learning_rate": 8.86330808232475e-06, "loss": 0.9331, "step": 4850 }, { "epoch": 1.2901595744680852, "grad_norm": 3.66481876373291, "learning_rate": 8.8627496967215e-06, "loss": 0.7795, "step": 4851 }, { "epoch": 1.2904255319148936, "grad_norm": 4.018246650695801, "learning_rate": 8.862191191600227e-06, "loss": 0.8021, "step": 4852 }, { "epoch": 1.290691489361702, "grad_norm": 4.123905658721924, "learning_rate": 8.86163256697821e-06, "loss": 0.8106, "step": 4853 }, { "epoch": 1.2909574468085108, "grad_norm": 4.097765922546387, "learning_rate": 8.861073822872735e-06, "loss": 0.8006, "step": 4854 }, { "epoch": 1.2912234042553192, "grad_norm": 4.317656517028809, "learning_rate": 8.86051495930109e-06, "loss": 0.8026, "step": 4855 }, { "epoch": 1.2914893617021277, "grad_norm": 3.8379859924316406, "learning_rate": 8.859955976280568e-06, "loss": 0.813, "step": 4856 }, { "epoch": 1.2917553191489362, "grad_norm": 4.173714637756348, "learning_rate": 8.859396873828461e-06, "loss": 0.8064, "step": 4857 }, { "epoch": 1.2920212765957446, "grad_norm": 4.439601898193359, "learning_rate": 8.858837651962073e-06, "loss": 0.8187, "step": 4858 }, { "epoch": 1.2922872340425533, "grad_norm": 3.970308542251587, "learning_rate": 8.858278310698705e-06, "loss": 0.7977, "step": 4859 }, { "epoch": 1.2925531914893618, "grad_norm": 3.7830026149749756, "learning_rate": 8.857718850055663e-06, "loss": 0.7371, "step": 4860 }, { "epoch": 1.2928191489361702, "grad_norm": 3.9715933799743652, "learning_rate": 8.857159270050258e-06, "loss": 0.9022, "step": 4861 }, { "epoch": 1.2930851063829787, "grad_norm": 3.824910879135132, "learning_rate": 8.856599570699805e-06, "loss": 0.7895, "step": 4862 }, { "epoch": 1.2933510638297872, "grad_norm": 4.079301357269287, "learning_rate": 8.856039752021619e-06, "loss": 0.8215, "step": 4863 }, { "epoch": 1.2936170212765958, "grad_norm": 3.722262382507324, "learning_rate": 8.855479814033024e-06, "loss": 0.7611, "step": 4864 }, { "epoch": 1.2938829787234043, "grad_norm": 3.853123664855957, "learning_rate": 8.854919756751343e-06, "loss": 0.7494, "step": 4865 }, { "epoch": 1.2941489361702128, "grad_norm": 3.9518027305603027, "learning_rate": 8.854359580193907e-06, "loss": 0.7751, "step": 4866 }, { "epoch": 1.2944148936170212, "grad_norm": 4.295631408691406, "learning_rate": 8.853799284378048e-06, "loss": 0.8227, "step": 4867 }, { "epoch": 1.2946808510638297, "grad_norm": 3.7936043739318848, "learning_rate": 8.853238869321104e-06, "loss": 0.7634, "step": 4868 }, { "epoch": 1.2949468085106384, "grad_norm": 4.017428874969482, "learning_rate": 8.85267833504041e-06, "loss": 0.732, "step": 4869 }, { "epoch": 1.2952127659574468, "grad_norm": 4.081499099731445, "learning_rate": 8.852117681553312e-06, "loss": 0.8568, "step": 4870 }, { "epoch": 1.2954787234042553, "grad_norm": 4.4456281661987305, "learning_rate": 8.851556908877159e-06, "loss": 0.8038, "step": 4871 }, { "epoch": 1.2957446808510638, "grad_norm": 4.371933460235596, "learning_rate": 8.8509960170293e-06, "loss": 0.7515, "step": 4872 }, { "epoch": 1.2960106382978722, "grad_norm": 3.5804035663604736, "learning_rate": 8.85043500602709e-06, "loss": 0.7818, "step": 4873 }, { "epoch": 1.296276595744681, "grad_norm": 4.176633834838867, "learning_rate": 8.849873875887888e-06, "loss": 0.8217, "step": 4874 }, { "epoch": 1.2965425531914894, "grad_norm": 3.9609858989715576, "learning_rate": 8.849312626629055e-06, "loss": 0.8517, "step": 4875 }, { "epoch": 1.2968085106382978, "grad_norm": 4.5829291343688965, "learning_rate": 8.848751258267959e-06, "loss": 1.0122, "step": 4876 }, { "epoch": 1.2970744680851065, "grad_norm": 3.677952766418457, "learning_rate": 8.848189770821965e-06, "loss": 0.8094, "step": 4877 }, { "epoch": 1.297340425531915, "grad_norm": 4.067968368530273, "learning_rate": 8.84762816430845e-06, "loss": 0.8764, "step": 4878 }, { "epoch": 1.2976063829787234, "grad_norm": 3.8500382900238037, "learning_rate": 8.847066438744792e-06, "loss": 0.8741, "step": 4879 }, { "epoch": 1.297872340425532, "grad_norm": 3.8818368911743164, "learning_rate": 8.846504594148366e-06, "loss": 0.8485, "step": 4880 }, { "epoch": 1.2981382978723404, "grad_norm": 3.9118518829345703, "learning_rate": 8.84594263053656e-06, "loss": 0.9005, "step": 4881 }, { "epoch": 1.298404255319149, "grad_norm": 3.889709711074829, "learning_rate": 8.84538054792676e-06, "loss": 0.9367, "step": 4882 }, { "epoch": 1.2986702127659575, "grad_norm": 3.9546077251434326, "learning_rate": 8.844818346336361e-06, "loss": 0.8102, "step": 4883 }, { "epoch": 1.298936170212766, "grad_norm": 4.036288738250732, "learning_rate": 8.844256025782754e-06, "loss": 0.9124, "step": 4884 }, { "epoch": 1.2992021276595744, "grad_norm": 3.9991087913513184, "learning_rate": 8.84369358628334e-06, "loss": 0.7885, "step": 4885 }, { "epoch": 1.299468085106383, "grad_norm": 3.767066478729248, "learning_rate": 8.84313102785552e-06, "loss": 0.8147, "step": 4886 }, { "epoch": 1.2997340425531916, "grad_norm": 3.645434617996216, "learning_rate": 8.842568350516702e-06, "loss": 0.7238, "step": 4887 }, { "epoch": 1.3, "grad_norm": 3.777766466140747, "learning_rate": 8.842005554284296e-06, "loss": 0.816, "step": 4888 }, { "epoch": 1.3002659574468085, "grad_norm": 3.8868510723114014, "learning_rate": 8.841442639175714e-06, "loss": 0.8835, "step": 4889 }, { "epoch": 1.300531914893617, "grad_norm": 4.271452903747559, "learning_rate": 8.840879605208374e-06, "loss": 0.8119, "step": 4890 }, { "epoch": 1.3007978723404254, "grad_norm": 3.4486215114593506, "learning_rate": 8.840316452399697e-06, "loss": 0.7602, "step": 4891 }, { "epoch": 1.3010638297872341, "grad_norm": 3.726085901260376, "learning_rate": 8.839753180767108e-06, "loss": 0.7252, "step": 4892 }, { "epoch": 1.3013297872340426, "grad_norm": 4.51430082321167, "learning_rate": 8.839189790328033e-06, "loss": 0.8133, "step": 4893 }, { "epoch": 1.301595744680851, "grad_norm": 4.0574469566345215, "learning_rate": 8.838626281099908e-06, "loss": 0.8436, "step": 4894 }, { "epoch": 1.3018617021276595, "grad_norm": 4.096327304840088, "learning_rate": 8.838062653100165e-06, "loss": 0.8056, "step": 4895 }, { "epoch": 1.302127659574468, "grad_norm": 4.048945903778076, "learning_rate": 8.837498906346247e-06, "loss": 0.8764, "step": 4896 }, { "epoch": 1.3023936170212767, "grad_norm": 3.9284706115722656, "learning_rate": 8.836935040855591e-06, "loss": 0.7626, "step": 4897 }, { "epoch": 1.3026595744680851, "grad_norm": 3.914583444595337, "learning_rate": 8.83637105664565e-06, "loss": 0.7855, "step": 4898 }, { "epoch": 1.3029255319148936, "grad_norm": 4.442378520965576, "learning_rate": 8.835806953733871e-06, "loss": 0.8103, "step": 4899 }, { "epoch": 1.3031914893617023, "grad_norm": 3.8343191146850586, "learning_rate": 8.83524273213771e-06, "loss": 0.8425, "step": 4900 }, { "epoch": 1.3034574468085105, "grad_norm": 4.154768943786621, "learning_rate": 8.834678391874623e-06, "loss": 0.7792, "step": 4901 }, { "epoch": 1.3037234042553192, "grad_norm": 4.136390209197998, "learning_rate": 8.834113932962071e-06, "loss": 0.8578, "step": 4902 }, { "epoch": 1.3039893617021276, "grad_norm": 4.139702320098877, "learning_rate": 8.833549355417518e-06, "loss": 0.724, "step": 4903 }, { "epoch": 1.304255319148936, "grad_norm": 4.213815689086914, "learning_rate": 8.83298465925844e-06, "loss": 0.7892, "step": 4904 }, { "epoch": 1.3045212765957448, "grad_norm": 4.048974990844727, "learning_rate": 8.832419844502298e-06, "loss": 0.829, "step": 4905 }, { "epoch": 1.3047872340425533, "grad_norm": 4.729825496673584, "learning_rate": 8.831854911166577e-06, "loss": 0.9176, "step": 4906 }, { "epoch": 1.3050531914893617, "grad_norm": 3.5801501274108887, "learning_rate": 8.831289859268753e-06, "loss": 0.724, "step": 4907 }, { "epoch": 1.3053191489361702, "grad_norm": 4.097287654876709, "learning_rate": 8.83072468882631e-06, "loss": 0.8299, "step": 4908 }, { "epoch": 1.3055851063829786, "grad_norm": 4.027351379394531, "learning_rate": 8.830159399856734e-06, "loss": 0.9384, "step": 4909 }, { "epoch": 1.3058510638297873, "grad_norm": 4.275338649749756, "learning_rate": 8.829593992377518e-06, "loss": 0.7921, "step": 4910 }, { "epoch": 1.3061170212765958, "grad_norm": 4.1409220695495605, "learning_rate": 8.829028466406156e-06, "loss": 0.8888, "step": 4911 }, { "epoch": 1.3063829787234043, "grad_norm": 3.6458733081817627, "learning_rate": 8.828462821960143e-06, "loss": 0.7371, "step": 4912 }, { "epoch": 1.3066489361702127, "grad_norm": 3.8695321083068848, "learning_rate": 8.827897059056983e-06, "loss": 0.8467, "step": 4913 }, { "epoch": 1.3069148936170212, "grad_norm": 3.693190336227417, "learning_rate": 8.827331177714183e-06, "loss": 0.8182, "step": 4914 }, { "epoch": 1.3071808510638299, "grad_norm": 3.806725263595581, "learning_rate": 8.826765177949248e-06, "loss": 0.8669, "step": 4915 }, { "epoch": 1.3074468085106383, "grad_norm": 3.970451593399048, "learning_rate": 8.826199059779695e-06, "loss": 0.9024, "step": 4916 }, { "epoch": 1.3077127659574468, "grad_norm": 3.7471280097961426, "learning_rate": 8.825632823223037e-06, "loss": 0.7707, "step": 4917 }, { "epoch": 1.3079787234042553, "grad_norm": 4.0794267654418945, "learning_rate": 8.825066468296796e-06, "loss": 0.8489, "step": 4918 }, { "epoch": 1.3082446808510637, "grad_norm": 3.681044578552246, "learning_rate": 8.824499995018494e-06, "loss": 0.7854, "step": 4919 }, { "epoch": 1.3085106382978724, "grad_norm": 3.9300031661987305, "learning_rate": 8.82393340340566e-06, "loss": 0.8076, "step": 4920 }, { "epoch": 1.3087765957446809, "grad_norm": 3.5358026027679443, "learning_rate": 8.823366693475826e-06, "loss": 0.7239, "step": 4921 }, { "epoch": 1.3090425531914893, "grad_norm": 3.7831380367279053, "learning_rate": 8.822799865246522e-06, "loss": 0.8004, "step": 4922 }, { "epoch": 1.309308510638298, "grad_norm": 3.6898906230926514, "learning_rate": 8.822232918735292e-06, "loss": 0.765, "step": 4923 }, { "epoch": 1.3095744680851062, "grad_norm": 3.685541868209839, "learning_rate": 8.821665853959673e-06, "loss": 0.9544, "step": 4924 }, { "epoch": 1.309840425531915, "grad_norm": 4.169592380523682, "learning_rate": 8.821098670937215e-06, "loss": 0.9082, "step": 4925 }, { "epoch": 1.3101063829787234, "grad_norm": 3.870544910430908, "learning_rate": 8.820531369685464e-06, "loss": 0.7508, "step": 4926 }, { "epoch": 1.3103723404255319, "grad_norm": 3.920816659927368, "learning_rate": 8.819963950221976e-06, "loss": 0.849, "step": 4927 }, { "epoch": 1.3106382978723405, "grad_norm": 3.8789918422698975, "learning_rate": 8.819396412564305e-06, "loss": 0.7916, "step": 4928 }, { "epoch": 1.310904255319149, "grad_norm": 3.8481719493865967, "learning_rate": 8.818828756730012e-06, "loss": 0.7985, "step": 4929 }, { "epoch": 1.3111702127659575, "grad_norm": 4.481472015380859, "learning_rate": 8.818260982736662e-06, "loss": 0.7636, "step": 4930 }, { "epoch": 1.311436170212766, "grad_norm": 3.4751243591308594, "learning_rate": 8.81769309060182e-06, "loss": 0.7336, "step": 4931 }, { "epoch": 1.3117021276595744, "grad_norm": 4.149890899658203, "learning_rate": 8.81712508034306e-06, "loss": 0.8473, "step": 4932 }, { "epoch": 1.311968085106383, "grad_norm": 3.9108872413635254, "learning_rate": 8.816556951977955e-06, "loss": 0.7656, "step": 4933 }, { "epoch": 1.3122340425531915, "grad_norm": 3.8704488277435303, "learning_rate": 8.815988705524086e-06, "loss": 0.8214, "step": 4934 }, { "epoch": 1.3125, "grad_norm": 4.183962821960449, "learning_rate": 8.815420340999034e-06, "loss": 0.8411, "step": 4935 }, { "epoch": 1.3127659574468085, "grad_norm": 3.7032434940338135, "learning_rate": 8.814851858420384e-06, "loss": 0.8455, "step": 4936 }, { "epoch": 1.313031914893617, "grad_norm": 3.5762336254119873, "learning_rate": 8.814283257805724e-06, "loss": 0.7208, "step": 4937 }, { "epoch": 1.3132978723404256, "grad_norm": 4.197664260864258, "learning_rate": 8.813714539172653e-06, "loss": 0.8642, "step": 4938 }, { "epoch": 1.313563829787234, "grad_norm": 3.5386626720428467, "learning_rate": 8.81314570253876e-06, "loss": 0.6846, "step": 4939 }, { "epoch": 1.3138297872340425, "grad_norm": 4.332328796386719, "learning_rate": 8.812576747921653e-06, "loss": 0.7862, "step": 4940 }, { "epoch": 1.314095744680851, "grad_norm": 3.6495919227600098, "learning_rate": 8.81200767533893e-06, "loss": 0.676, "step": 4941 }, { "epoch": 1.3143617021276595, "grad_norm": 3.717625617980957, "learning_rate": 8.811438484808204e-06, "loss": 0.8879, "step": 4942 }, { "epoch": 1.3146276595744681, "grad_norm": 4.201274394989014, "learning_rate": 8.810869176347082e-06, "loss": 0.9174, "step": 4943 }, { "epoch": 1.3148936170212766, "grad_norm": 3.3899879455566406, "learning_rate": 8.810299749973182e-06, "loss": 0.7209, "step": 4944 }, { "epoch": 1.315159574468085, "grad_norm": 3.821558713912964, "learning_rate": 8.80973020570412e-06, "loss": 0.647, "step": 4945 }, { "epoch": 1.3154255319148938, "grad_norm": 4.011831760406494, "learning_rate": 8.809160543557523e-06, "loss": 0.8387, "step": 4946 }, { "epoch": 1.315691489361702, "grad_norm": 4.121433258056641, "learning_rate": 8.80859076355101e-06, "loss": 0.7835, "step": 4947 }, { "epoch": 1.3159574468085107, "grad_norm": 4.066422462463379, "learning_rate": 8.808020865702218e-06, "loss": 0.7569, "step": 4948 }, { "epoch": 1.3162234042553191, "grad_norm": 3.7616024017333984, "learning_rate": 8.807450850028776e-06, "loss": 0.7514, "step": 4949 }, { "epoch": 1.3164893617021276, "grad_norm": 3.809521198272705, "learning_rate": 8.806880716548322e-06, "loss": 0.8212, "step": 4950 }, { "epoch": 1.3167553191489363, "grad_norm": 3.664140224456787, "learning_rate": 8.806310465278496e-06, "loss": 0.8303, "step": 4951 }, { "epoch": 1.3170212765957447, "grad_norm": 3.978876829147339, "learning_rate": 8.805740096236943e-06, "loss": 0.8149, "step": 4952 }, { "epoch": 1.3172872340425532, "grad_norm": 4.436275959014893, "learning_rate": 8.805169609441312e-06, "loss": 0.9033, "step": 4953 }, { "epoch": 1.3175531914893617, "grad_norm": 3.9355101585388184, "learning_rate": 8.804599004909251e-06, "loss": 0.8599, "step": 4954 }, { "epoch": 1.3178191489361701, "grad_norm": 3.6748297214508057, "learning_rate": 8.80402828265842e-06, "loss": 0.6637, "step": 4955 }, { "epoch": 1.3180851063829788, "grad_norm": 3.953321695327759, "learning_rate": 8.803457442706473e-06, "loss": 0.7684, "step": 4956 }, { "epoch": 1.3183510638297873, "grad_norm": 3.9680938720703125, "learning_rate": 8.802886485071078e-06, "loss": 0.8377, "step": 4957 }, { "epoch": 1.3186170212765957, "grad_norm": 3.608375072479248, "learning_rate": 8.802315409769894e-06, "loss": 0.7671, "step": 4958 }, { "epoch": 1.3188829787234042, "grad_norm": 3.7180373668670654, "learning_rate": 8.801744216820596e-06, "loss": 0.794, "step": 4959 }, { "epoch": 1.3191489361702127, "grad_norm": 3.490082263946533, "learning_rate": 8.801172906240857e-06, "loss": 0.7993, "step": 4960 }, { "epoch": 1.3194148936170214, "grad_norm": 3.9783389568328857, "learning_rate": 8.800601478048351e-06, "loss": 0.7455, "step": 4961 }, { "epoch": 1.3196808510638298, "grad_norm": 4.333663463592529, "learning_rate": 8.800029932260764e-06, "loss": 0.8772, "step": 4962 }, { "epoch": 1.3199468085106383, "grad_norm": 3.9584553241729736, "learning_rate": 8.799458268895774e-06, "loss": 0.8622, "step": 4963 }, { "epoch": 1.3202127659574467, "grad_norm": 4.271299362182617, "learning_rate": 8.798886487971073e-06, "loss": 0.7591, "step": 4964 }, { "epoch": 1.3204787234042552, "grad_norm": 4.128324508666992, "learning_rate": 8.798314589504348e-06, "loss": 0.7294, "step": 4965 }, { "epoch": 1.320744680851064, "grad_norm": 3.613626718521118, "learning_rate": 8.797742573513302e-06, "loss": 0.8173, "step": 4966 }, { "epoch": 1.3210106382978724, "grad_norm": 3.665271043777466, "learning_rate": 8.797170440015627e-06, "loss": 0.7592, "step": 4967 }, { "epoch": 1.3212765957446808, "grad_norm": 4.036754608154297, "learning_rate": 8.79659818902903e-06, "loss": 0.7705, "step": 4968 }, { "epoch": 1.3215425531914895, "grad_norm": 4.09188175201416, "learning_rate": 8.796025820571213e-06, "loss": 0.9028, "step": 4969 }, { "epoch": 1.3218085106382977, "grad_norm": 3.8270485401153564, "learning_rate": 8.795453334659889e-06, "loss": 0.7337, "step": 4970 }, { "epoch": 1.3220744680851064, "grad_norm": 4.005841255187988, "learning_rate": 8.794880731312771e-06, "loss": 0.8789, "step": 4971 }, { "epoch": 1.3223404255319149, "grad_norm": 3.894681930541992, "learning_rate": 8.794308010547574e-06, "loss": 0.7452, "step": 4972 }, { "epoch": 1.3226063829787233, "grad_norm": 3.7697856426239014, "learning_rate": 8.79373517238202e-06, "loss": 0.7111, "step": 4973 }, { "epoch": 1.322872340425532, "grad_norm": 4.162429332733154, "learning_rate": 8.793162216833835e-06, "loss": 0.8352, "step": 4974 }, { "epoch": 1.3231382978723405, "grad_norm": 4.8362298011779785, "learning_rate": 8.792589143920743e-06, "loss": 0.8807, "step": 4975 }, { "epoch": 1.323404255319149, "grad_norm": 4.283027172088623, "learning_rate": 8.792015953660478e-06, "loss": 0.9241, "step": 4976 }, { "epoch": 1.3236702127659574, "grad_norm": 3.7246296405792236, "learning_rate": 8.791442646070776e-06, "loss": 0.8158, "step": 4977 }, { "epoch": 1.3239361702127659, "grad_norm": 3.9116530418395996, "learning_rate": 8.790869221169374e-06, "loss": 0.7603, "step": 4978 }, { "epoch": 1.3242021276595746, "grad_norm": 4.164322853088379, "learning_rate": 8.790295678974015e-06, "loss": 0.7518, "step": 4979 }, { "epoch": 1.324468085106383, "grad_norm": 3.459543228149414, "learning_rate": 8.789722019502444e-06, "loss": 0.8216, "step": 4980 }, { "epoch": 1.3247340425531915, "grad_norm": 3.4385783672332764, "learning_rate": 8.789148242772414e-06, "loss": 0.5722, "step": 4981 }, { "epoch": 1.325, "grad_norm": 3.881467580795288, "learning_rate": 8.788574348801676e-06, "loss": 0.7652, "step": 4982 }, { "epoch": 1.3252659574468084, "grad_norm": 3.8028674125671387, "learning_rate": 8.788000337607984e-06, "loss": 0.7125, "step": 4983 }, { "epoch": 1.325531914893617, "grad_norm": 3.595238447189331, "learning_rate": 8.787426209209104e-06, "loss": 0.6849, "step": 4984 }, { "epoch": 1.3257978723404256, "grad_norm": 4.597902774810791, "learning_rate": 8.786851963622799e-06, "loss": 0.8314, "step": 4985 }, { "epoch": 1.326063829787234, "grad_norm": 4.151714324951172, "learning_rate": 8.786277600866834e-06, "loss": 0.8624, "step": 4986 }, { "epoch": 1.3263297872340425, "grad_norm": 3.7185237407684326, "learning_rate": 8.785703120958984e-06, "loss": 0.7547, "step": 4987 }, { "epoch": 1.326595744680851, "grad_norm": 3.964048385620117, "learning_rate": 8.785128523917022e-06, "loss": 0.8626, "step": 4988 }, { "epoch": 1.3268617021276596, "grad_norm": 3.9490604400634766, "learning_rate": 8.784553809758724e-06, "loss": 0.7927, "step": 4989 }, { "epoch": 1.327127659574468, "grad_norm": 3.736051321029663, "learning_rate": 8.783978978501879e-06, "loss": 0.7581, "step": 4990 }, { "epoch": 1.3273936170212766, "grad_norm": 4.048060417175293, "learning_rate": 8.783404030164269e-06, "loss": 0.8141, "step": 4991 }, { "epoch": 1.327659574468085, "grad_norm": 3.542971134185791, "learning_rate": 8.782828964763683e-06, "loss": 0.8244, "step": 4992 }, { "epoch": 1.3279255319148935, "grad_norm": 4.4042439460754395, "learning_rate": 8.782253782317914e-06, "loss": 0.7623, "step": 4993 }, { "epoch": 1.3281914893617022, "grad_norm": 4.011150360107422, "learning_rate": 8.781678482844763e-06, "loss": 0.7879, "step": 4994 }, { "epoch": 1.3284574468085106, "grad_norm": 3.9396347999572754, "learning_rate": 8.781103066362024e-06, "loss": 0.8731, "step": 4995 }, { "epoch": 1.328723404255319, "grad_norm": 4.063819408416748, "learning_rate": 8.780527532887506e-06, "loss": 0.7255, "step": 4996 }, { "epoch": 1.3289893617021278, "grad_norm": 3.684864044189453, "learning_rate": 8.779951882439016e-06, "loss": 0.7447, "step": 4997 }, { "epoch": 1.3292553191489362, "grad_norm": 4.3980207443237305, "learning_rate": 8.77937611503436e-06, "loss": 0.8104, "step": 4998 }, { "epoch": 1.3295212765957447, "grad_norm": 4.019001483917236, "learning_rate": 8.778800230691363e-06, "loss": 0.7426, "step": 4999 }, { "epoch": 1.3297872340425532, "grad_norm": 4.1492486000061035, "learning_rate": 8.778224229427836e-06, "loss": 0.7929, "step": 5000 }, { "epoch": 1.3297872340425532, "eval_loss": 1.2957489490509033, "eval_runtime": 14.7283, "eval_samples_per_second": 27.159, "eval_steps_per_second": 3.395, "step": 5000 }, { "epoch": 1.3300531914893616, "grad_norm": 3.742830753326416, "learning_rate": 8.777648111261601e-06, "loss": 0.6807, "step": 5001 }, { "epoch": 1.3303191489361703, "grad_norm": 4.3522114753723145, "learning_rate": 8.77707187621049e-06, "loss": 0.8048, "step": 5002 }, { "epoch": 1.3305851063829788, "grad_norm": 3.7916550636291504, "learning_rate": 8.776495524292325e-06, "loss": 0.8209, "step": 5003 }, { "epoch": 1.3308510638297872, "grad_norm": 3.642531156539917, "learning_rate": 8.775919055524941e-06, "loss": 0.7274, "step": 5004 }, { "epoch": 1.3311170212765957, "grad_norm": 3.885079860687256, "learning_rate": 8.775342469926178e-06, "loss": 0.8305, "step": 5005 }, { "epoch": 1.3313829787234042, "grad_norm": 3.816824436187744, "learning_rate": 8.774765767513876e-06, "loss": 0.7605, "step": 5006 }, { "epoch": 1.3316489361702128, "grad_norm": 4.696832656860352, "learning_rate": 8.774188948305874e-06, "loss": 0.8907, "step": 5007 }, { "epoch": 1.3319148936170213, "grad_norm": 4.030970096588135, "learning_rate": 8.773612012320023e-06, "loss": 0.9613, "step": 5008 }, { "epoch": 1.3321808510638298, "grad_norm": 4.046240329742432, "learning_rate": 8.773034959574173e-06, "loss": 0.7066, "step": 5009 }, { "epoch": 1.3324468085106382, "grad_norm": 3.916098117828369, "learning_rate": 8.77245779008618e-06, "loss": 0.7762, "step": 5010 }, { "epoch": 1.3327127659574467, "grad_norm": 4.096320629119873, "learning_rate": 8.771880503873902e-06, "loss": 0.7222, "step": 5011 }, { "epoch": 1.3329787234042554, "grad_norm": 4.3136467933654785, "learning_rate": 8.771303100955199e-06, "loss": 0.8265, "step": 5012 }, { "epoch": 1.3332446808510638, "grad_norm": 3.972031593322754, "learning_rate": 8.770725581347938e-06, "loss": 0.7263, "step": 5013 }, { "epoch": 1.3335106382978723, "grad_norm": 4.295060634613037, "learning_rate": 8.770147945069988e-06, "loss": 0.8489, "step": 5014 }, { "epoch": 1.3337765957446808, "grad_norm": 3.8986477851867676, "learning_rate": 8.769570192139224e-06, "loss": 0.7101, "step": 5015 }, { "epoch": 1.3340425531914892, "grad_norm": 3.8135452270507812, "learning_rate": 8.768992322573518e-06, "loss": 0.7885, "step": 5016 }, { "epoch": 1.334308510638298, "grad_norm": 3.727550983428955, "learning_rate": 8.768414336390752e-06, "loss": 0.8622, "step": 5017 }, { "epoch": 1.3345744680851064, "grad_norm": 4.012676239013672, "learning_rate": 8.76783623360881e-06, "loss": 0.8938, "step": 5018 }, { "epoch": 1.3348404255319148, "grad_norm": 4.344918727874756, "learning_rate": 8.767258014245578e-06, "loss": 0.8228, "step": 5019 }, { "epoch": 1.3351063829787235, "grad_norm": 3.9926249980926514, "learning_rate": 8.76667967831895e-06, "loss": 0.6513, "step": 5020 }, { "epoch": 1.335372340425532, "grad_norm": 4.119525909423828, "learning_rate": 8.766101225846816e-06, "loss": 0.7887, "step": 5021 }, { "epoch": 1.3356382978723405, "grad_norm": 4.538883686065674, "learning_rate": 8.765522656847077e-06, "loss": 0.796, "step": 5022 }, { "epoch": 1.335904255319149, "grad_norm": 3.7550501823425293, "learning_rate": 8.764943971337633e-06, "loss": 0.7695, "step": 5023 }, { "epoch": 1.3361702127659574, "grad_norm": 3.611605405807495, "learning_rate": 8.76436516933639e-06, "loss": 0.7483, "step": 5024 }, { "epoch": 1.336436170212766, "grad_norm": 4.187867164611816, "learning_rate": 8.763786250861258e-06, "loss": 0.8277, "step": 5025 }, { "epoch": 1.3367021276595745, "grad_norm": 3.9223055839538574, "learning_rate": 8.763207215930147e-06, "loss": 0.7724, "step": 5026 }, { "epoch": 1.336968085106383, "grad_norm": 4.048906326293945, "learning_rate": 8.762628064560975e-06, "loss": 0.7923, "step": 5027 }, { "epoch": 1.3372340425531914, "grad_norm": 4.241153240203857, "learning_rate": 8.762048796771659e-06, "loss": 0.8776, "step": 5028 }, { "epoch": 1.3375, "grad_norm": 3.759209632873535, "learning_rate": 8.761469412580126e-06, "loss": 0.7554, "step": 5029 }, { "epoch": 1.3377659574468086, "grad_norm": 3.8906912803649902, "learning_rate": 8.760889912004297e-06, "loss": 0.6977, "step": 5030 }, { "epoch": 1.338031914893617, "grad_norm": 3.9501161575317383, "learning_rate": 8.760310295062112e-06, "loss": 0.9481, "step": 5031 }, { "epoch": 1.3382978723404255, "grad_norm": 3.918553590774536, "learning_rate": 8.759730561771494e-06, "loss": 0.7882, "step": 5032 }, { "epoch": 1.338563829787234, "grad_norm": 4.063170909881592, "learning_rate": 8.759150712150388e-06, "loss": 0.8415, "step": 5033 }, { "epoch": 1.3388297872340424, "grad_norm": 3.863600015640259, "learning_rate": 8.758570746216732e-06, "loss": 0.807, "step": 5034 }, { "epoch": 1.3390957446808511, "grad_norm": 3.9519717693328857, "learning_rate": 8.757990663988474e-06, "loss": 0.8594, "step": 5035 }, { "epoch": 1.3393617021276596, "grad_norm": 4.245703220367432, "learning_rate": 8.75741046548356e-06, "loss": 0.7987, "step": 5036 }, { "epoch": 1.339627659574468, "grad_norm": 4.1299729347229, "learning_rate": 8.75683015071994e-06, "loss": 0.9377, "step": 5037 }, { "epoch": 1.3398936170212765, "grad_norm": 3.744929552078247, "learning_rate": 8.756249719715576e-06, "loss": 0.6875, "step": 5038 }, { "epoch": 1.340159574468085, "grad_norm": 3.7629339694976807, "learning_rate": 8.75566917248842e-06, "loss": 0.7619, "step": 5039 }, { "epoch": 1.3404255319148937, "grad_norm": 4.09276819229126, "learning_rate": 8.75508850905644e-06, "loss": 0.7618, "step": 5040 }, { "epoch": 1.3406914893617021, "grad_norm": 4.220356464385986, "learning_rate": 8.7545077294376e-06, "loss": 0.9246, "step": 5041 }, { "epoch": 1.3409574468085106, "grad_norm": 3.9419326782226562, "learning_rate": 8.753926833649871e-06, "loss": 0.7463, "step": 5042 }, { "epoch": 1.3412234042553193, "grad_norm": 4.060051918029785, "learning_rate": 8.753345821711224e-06, "loss": 0.9061, "step": 5043 }, { "epoch": 1.3414893617021277, "grad_norm": 3.7086057662963867, "learning_rate": 8.75276469363964e-06, "loss": 0.8177, "step": 5044 }, { "epoch": 1.3417553191489362, "grad_norm": 4.173861503601074, "learning_rate": 8.752183449453098e-06, "loss": 0.8117, "step": 5045 }, { "epoch": 1.3420212765957447, "grad_norm": 4.282475471496582, "learning_rate": 8.75160208916958e-06, "loss": 0.8352, "step": 5046 }, { "epoch": 1.3422872340425531, "grad_norm": 3.9250497817993164, "learning_rate": 8.75102061280708e-06, "loss": 0.8292, "step": 5047 }, { "epoch": 1.3425531914893618, "grad_norm": 4.28936767578125, "learning_rate": 8.750439020383584e-06, "loss": 0.8269, "step": 5048 }, { "epoch": 1.3428191489361703, "grad_norm": 4.007338523864746, "learning_rate": 8.749857311917089e-06, "loss": 0.8376, "step": 5049 }, { "epoch": 1.3430851063829787, "grad_norm": 3.741140842437744, "learning_rate": 8.749275487425595e-06, "loss": 0.7936, "step": 5050 }, { "epoch": 1.3433510638297872, "grad_norm": 3.8448450565338135, "learning_rate": 8.748693546927101e-06, "loss": 0.8088, "step": 5051 }, { "epoch": 1.3436170212765957, "grad_norm": 4.5769782066345215, "learning_rate": 8.748111490439617e-06, "loss": 0.8315, "step": 5052 }, { "epoch": 1.3438829787234043, "grad_norm": 4.1284871101379395, "learning_rate": 8.74752931798115e-06, "loss": 0.8866, "step": 5053 }, { "epoch": 1.3441489361702128, "grad_norm": 3.9224517345428467, "learning_rate": 8.746947029569715e-06, "loss": 0.6403, "step": 5054 }, { "epoch": 1.3444148936170213, "grad_norm": 4.114837169647217, "learning_rate": 8.746364625223326e-06, "loss": 0.7303, "step": 5055 }, { "epoch": 1.3446808510638297, "grad_norm": 3.9492406845092773, "learning_rate": 8.745782104960006e-06, "loss": 0.7462, "step": 5056 }, { "epoch": 1.3449468085106382, "grad_norm": 3.5633533000946045, "learning_rate": 8.745199468797775e-06, "loss": 0.8241, "step": 5057 }, { "epoch": 1.3452127659574469, "grad_norm": 3.9602227210998535, "learning_rate": 8.744616716754665e-06, "loss": 0.8142, "step": 5058 }, { "epoch": 1.3454787234042553, "grad_norm": 3.6486499309539795, "learning_rate": 8.744033848848705e-06, "loss": 0.7932, "step": 5059 }, { "epoch": 1.3457446808510638, "grad_norm": 3.9516966342926025, "learning_rate": 8.743450865097929e-06, "loss": 0.7334, "step": 5060 }, { "epoch": 1.3460106382978723, "grad_norm": 4.261397361755371, "learning_rate": 8.742867765520377e-06, "loss": 0.7549, "step": 5061 }, { "epoch": 1.3462765957446807, "grad_norm": 4.082563877105713, "learning_rate": 8.742284550134088e-06, "loss": 0.8306, "step": 5062 }, { "epoch": 1.3465425531914894, "grad_norm": 3.9603230953216553, "learning_rate": 8.74170121895711e-06, "loss": 0.832, "step": 5063 }, { "epoch": 1.3468085106382979, "grad_norm": 4.0057692527771, "learning_rate": 8.741117772007492e-06, "loss": 0.783, "step": 5064 }, { "epoch": 1.3470744680851063, "grad_norm": 4.130981922149658, "learning_rate": 8.740534209303285e-06, "loss": 0.6476, "step": 5065 }, { "epoch": 1.347340425531915, "grad_norm": 3.641900062561035, "learning_rate": 8.739950530862544e-06, "loss": 0.9809, "step": 5066 }, { "epoch": 1.3476063829787235, "grad_norm": 3.607656955718994, "learning_rate": 8.739366736703331e-06, "loss": 0.7784, "step": 5067 }, { "epoch": 1.347872340425532, "grad_norm": 4.068065166473389, "learning_rate": 8.73878282684371e-06, "loss": 0.9063, "step": 5068 }, { "epoch": 1.3481382978723404, "grad_norm": 3.952601671218872, "learning_rate": 8.738198801301745e-06, "loss": 0.9279, "step": 5069 }, { "epoch": 1.3484042553191489, "grad_norm": 4.016735553741455, "learning_rate": 8.737614660095507e-06, "loss": 0.7658, "step": 5070 }, { "epoch": 1.3486702127659576, "grad_norm": 3.669020891189575, "learning_rate": 8.737030403243074e-06, "loss": 0.6806, "step": 5071 }, { "epoch": 1.348936170212766, "grad_norm": 3.659308910369873, "learning_rate": 8.736446030762518e-06, "loss": 0.7539, "step": 5072 }, { "epoch": 1.3492021276595745, "grad_norm": 3.9839887619018555, "learning_rate": 8.735861542671924e-06, "loss": 0.7342, "step": 5073 }, { "epoch": 1.349468085106383, "grad_norm": 3.9134328365325928, "learning_rate": 8.735276938989375e-06, "loss": 0.8636, "step": 5074 }, { "epoch": 1.3497340425531914, "grad_norm": 3.841643810272217, "learning_rate": 8.73469221973296e-06, "loss": 0.7273, "step": 5075 }, { "epoch": 1.35, "grad_norm": 3.903296947479248, "learning_rate": 8.734107384920771e-06, "loss": 0.8596, "step": 5076 }, { "epoch": 1.3502659574468086, "grad_norm": 4.10729455947876, "learning_rate": 8.733522434570901e-06, "loss": 0.8268, "step": 5077 }, { "epoch": 1.350531914893617, "grad_norm": 3.913231611251831, "learning_rate": 8.732937368701453e-06, "loss": 0.8017, "step": 5078 }, { "epoch": 1.3507978723404255, "grad_norm": 3.795318365097046, "learning_rate": 8.732352187330528e-06, "loss": 0.6833, "step": 5079 }, { "epoch": 1.351063829787234, "grad_norm": 3.991790294647217, "learning_rate": 8.731766890476232e-06, "loss": 0.7068, "step": 5080 }, { "epoch": 1.3513297872340426, "grad_norm": 4.177598476409912, "learning_rate": 8.731181478156673e-06, "loss": 0.806, "step": 5081 }, { "epoch": 1.351595744680851, "grad_norm": 3.855368137359619, "learning_rate": 8.730595950389968e-06, "loss": 0.7752, "step": 5082 }, { "epoch": 1.3518617021276595, "grad_norm": 4.333880424499512, "learning_rate": 8.730010307194232e-06, "loss": 0.771, "step": 5083 }, { "epoch": 1.352127659574468, "grad_norm": 3.9861552715301514, "learning_rate": 8.729424548587585e-06, "loss": 0.873, "step": 5084 }, { "epoch": 1.3523936170212765, "grad_norm": 4.271336078643799, "learning_rate": 8.728838674588151e-06, "loss": 0.8345, "step": 5085 }, { "epoch": 1.3526595744680852, "grad_norm": 4.418639659881592, "learning_rate": 8.72825268521406e-06, "loss": 0.9593, "step": 5086 }, { "epoch": 1.3529255319148936, "grad_norm": 4.122128963470459, "learning_rate": 8.72766658048344e-06, "loss": 0.6917, "step": 5087 }, { "epoch": 1.353191489361702, "grad_norm": 3.9738972187042236, "learning_rate": 8.727080360414428e-06, "loss": 0.7446, "step": 5088 }, { "epoch": 1.3534574468085108, "grad_norm": 4.067488670349121, "learning_rate": 8.726494025025162e-06, "loss": 0.6886, "step": 5089 }, { "epoch": 1.3537234042553192, "grad_norm": 3.782886028289795, "learning_rate": 8.725907574333783e-06, "loss": 0.8159, "step": 5090 }, { "epoch": 1.3539893617021277, "grad_norm": 3.9360549449920654, "learning_rate": 8.725321008358436e-06, "loss": 0.8189, "step": 5091 }, { "epoch": 1.3542553191489362, "grad_norm": 4.132941246032715, "learning_rate": 8.724734327117273e-06, "loss": 0.9677, "step": 5092 }, { "epoch": 1.3545212765957446, "grad_norm": 4.25277042388916, "learning_rate": 8.724147530628442e-06, "loss": 0.8653, "step": 5093 }, { "epoch": 1.3547872340425533, "grad_norm": 3.962684392929077, "learning_rate": 8.723560618910103e-06, "loss": 0.6903, "step": 5094 }, { "epoch": 1.3550531914893618, "grad_norm": 3.9663078784942627, "learning_rate": 8.722973591980414e-06, "loss": 0.7572, "step": 5095 }, { "epoch": 1.3553191489361702, "grad_norm": 4.48624849319458, "learning_rate": 8.722386449857541e-06, "loss": 0.9056, "step": 5096 }, { "epoch": 1.3555851063829787, "grad_norm": 3.8394525051116943, "learning_rate": 8.721799192559646e-06, "loss": 0.7721, "step": 5097 }, { "epoch": 1.3558510638297872, "grad_norm": 4.599715232849121, "learning_rate": 8.721211820104903e-06, "loss": 1.0118, "step": 5098 }, { "epoch": 1.3561170212765958, "grad_norm": 4.1499528884887695, "learning_rate": 8.720624332511484e-06, "loss": 0.8979, "step": 5099 }, { "epoch": 1.3563829787234043, "grad_norm": 3.8984806537628174, "learning_rate": 8.72003672979757e-06, "loss": 0.8824, "step": 5100 }, { "epoch": 1.3566489361702128, "grad_norm": 3.709800958633423, "learning_rate": 8.71944901198134e-06, "loss": 0.8053, "step": 5101 }, { "epoch": 1.3569148936170212, "grad_norm": 3.4785032272338867, "learning_rate": 8.718861179080975e-06, "loss": 0.6898, "step": 5102 }, { "epoch": 1.3571808510638297, "grad_norm": 3.8457705974578857, "learning_rate": 8.71827323111467e-06, "loss": 0.75, "step": 5103 }, { "epoch": 1.3574468085106384, "grad_norm": 3.66109299659729, "learning_rate": 8.71768516810061e-06, "loss": 0.7255, "step": 5104 }, { "epoch": 1.3577127659574468, "grad_norm": 3.6998486518859863, "learning_rate": 8.717096990056999e-06, "loss": 0.8202, "step": 5105 }, { "epoch": 1.3579787234042553, "grad_norm": 4.291678428649902, "learning_rate": 8.716508697002027e-06, "loss": 0.9424, "step": 5106 }, { "epoch": 1.3582446808510638, "grad_norm": 3.870074987411499, "learning_rate": 8.715920288953901e-06, "loss": 0.8821, "step": 5107 }, { "epoch": 1.3585106382978722, "grad_norm": 3.469759702682495, "learning_rate": 8.715331765930828e-06, "loss": 0.745, "step": 5108 }, { "epoch": 1.358776595744681, "grad_norm": 4.048684597015381, "learning_rate": 8.714743127951014e-06, "loss": 0.9526, "step": 5109 }, { "epoch": 1.3590425531914894, "grad_norm": 4.060766696929932, "learning_rate": 8.714154375032675e-06, "loss": 0.7971, "step": 5110 }, { "epoch": 1.3593085106382978, "grad_norm": 4.004628658294678, "learning_rate": 8.713565507194027e-06, "loss": 0.8302, "step": 5111 }, { "epoch": 1.3595744680851065, "grad_norm": 4.034252166748047, "learning_rate": 8.712976524453289e-06, "loss": 0.8873, "step": 5112 }, { "epoch": 1.359840425531915, "grad_norm": 3.9113869667053223, "learning_rate": 8.712387426828685e-06, "loss": 0.7514, "step": 5113 }, { "epoch": 1.3601063829787234, "grad_norm": 3.977827787399292, "learning_rate": 8.711798214338445e-06, "loss": 0.8099, "step": 5114 }, { "epoch": 1.360372340425532, "grad_norm": 4.005003929138184, "learning_rate": 8.711208887000797e-06, "loss": 0.8888, "step": 5115 }, { "epoch": 1.3606382978723404, "grad_norm": 3.7809715270996094, "learning_rate": 8.710619444833977e-06, "loss": 0.8131, "step": 5116 }, { "epoch": 1.360904255319149, "grad_norm": 3.8309693336486816, "learning_rate": 8.710029887856224e-06, "loss": 0.6836, "step": 5117 }, { "epoch": 1.3611702127659575, "grad_norm": 3.7106757164001465, "learning_rate": 8.709440216085777e-06, "loss": 0.8079, "step": 5118 }, { "epoch": 1.361436170212766, "grad_norm": 4.386137962341309, "learning_rate": 8.708850429540882e-06, "loss": 0.8484, "step": 5119 }, { "epoch": 1.3617021276595744, "grad_norm": 4.305933952331543, "learning_rate": 8.708260528239788e-06, "loss": 0.9357, "step": 5120 }, { "epoch": 1.361968085106383, "grad_norm": 4.107351303100586, "learning_rate": 8.70767051220075e-06, "loss": 0.8932, "step": 5121 }, { "epoch": 1.3622340425531916, "grad_norm": 3.7665624618530273, "learning_rate": 8.707080381442016e-06, "loss": 0.7792, "step": 5122 }, { "epoch": 1.3625, "grad_norm": 4.177657604217529, "learning_rate": 8.706490135981856e-06, "loss": 0.8046, "step": 5123 }, { "epoch": 1.3627659574468085, "grad_norm": 4.132664203643799, "learning_rate": 8.705899775838525e-06, "loss": 0.8516, "step": 5124 }, { "epoch": 1.363031914893617, "grad_norm": 4.0525288581848145, "learning_rate": 8.70530930103029e-06, "loss": 0.8747, "step": 5125 }, { "epoch": 1.3632978723404254, "grad_norm": 4.088098526000977, "learning_rate": 8.704718711575424e-06, "loss": 0.6531, "step": 5126 }, { "epoch": 1.3635638297872341, "grad_norm": 3.944594144821167, "learning_rate": 8.704128007492201e-06, "loss": 0.8084, "step": 5127 }, { "epoch": 1.3638297872340426, "grad_norm": 4.340763092041016, "learning_rate": 8.703537188798894e-06, "loss": 0.8186, "step": 5128 }, { "epoch": 1.364095744680851, "grad_norm": 3.9249961376190186, "learning_rate": 8.702946255513787e-06, "loss": 0.8166, "step": 5129 }, { "epoch": 1.3643617021276595, "grad_norm": 3.667654275894165, "learning_rate": 8.702355207655164e-06, "loss": 0.8432, "step": 5130 }, { "epoch": 1.364627659574468, "grad_norm": 3.6376404762268066, "learning_rate": 8.70176404524131e-06, "loss": 0.7878, "step": 5131 }, { "epoch": 1.3648936170212767, "grad_norm": 3.9054555892944336, "learning_rate": 8.70117276829052e-06, "loss": 0.7763, "step": 5132 }, { "epoch": 1.3651595744680851, "grad_norm": 4.0739288330078125, "learning_rate": 8.700581376821086e-06, "loss": 0.728, "step": 5133 }, { "epoch": 1.3654255319148936, "grad_norm": 3.8359971046447754, "learning_rate": 8.699989870851308e-06, "loss": 0.8314, "step": 5134 }, { "epoch": 1.3656914893617023, "grad_norm": 3.708594799041748, "learning_rate": 8.699398250399486e-06, "loss": 0.7632, "step": 5135 }, { "epoch": 1.3659574468085105, "grad_norm": 3.9665486812591553, "learning_rate": 8.698806515483928e-06, "loss": 0.8794, "step": 5136 }, { "epoch": 1.3662234042553192, "grad_norm": 4.699567794799805, "learning_rate": 8.698214666122941e-06, "loss": 1.0106, "step": 5137 }, { "epoch": 1.3664893617021276, "grad_norm": 3.8563220500946045, "learning_rate": 8.697622702334839e-06, "loss": 0.7451, "step": 5138 }, { "epoch": 1.366755319148936, "grad_norm": 4.188748359680176, "learning_rate": 8.697030624137937e-06, "loss": 0.7481, "step": 5139 }, { "epoch": 1.3670212765957448, "grad_norm": 3.891820192337036, "learning_rate": 8.696438431550553e-06, "loss": 0.8304, "step": 5140 }, { "epoch": 1.3672872340425533, "grad_norm": 4.065185546875, "learning_rate": 8.695846124591015e-06, "loss": 0.8912, "step": 5141 }, { "epoch": 1.3675531914893617, "grad_norm": 3.466252326965332, "learning_rate": 8.695253703277644e-06, "loss": 0.7941, "step": 5142 }, { "epoch": 1.3678191489361702, "grad_norm": 3.7102415561676025, "learning_rate": 8.694661167628772e-06, "loss": 0.6821, "step": 5143 }, { "epoch": 1.3680851063829786, "grad_norm": 4.1319260597229, "learning_rate": 8.694068517662735e-06, "loss": 0.9666, "step": 5144 }, { "epoch": 1.3683510638297873, "grad_norm": 3.870607852935791, "learning_rate": 8.693475753397869e-06, "loss": 0.8806, "step": 5145 }, { "epoch": 1.3686170212765958, "grad_norm": 3.9953293800354004, "learning_rate": 8.692882874852515e-06, "loss": 0.8558, "step": 5146 }, { "epoch": 1.3688829787234043, "grad_norm": 4.429169178009033, "learning_rate": 8.692289882045015e-06, "loss": 0.7949, "step": 5147 }, { "epoch": 1.3691489361702127, "grad_norm": 3.895005464553833, "learning_rate": 8.691696774993721e-06, "loss": 0.7547, "step": 5148 }, { "epoch": 1.3694148936170212, "grad_norm": 4.446406841278076, "learning_rate": 8.691103553716981e-06, "loss": 0.8757, "step": 5149 }, { "epoch": 1.3696808510638299, "grad_norm": 4.012157440185547, "learning_rate": 8.690510218233153e-06, "loss": 0.9106, "step": 5150 }, { "epoch": 1.3699468085106383, "grad_norm": 3.966068983078003, "learning_rate": 8.689916768560593e-06, "loss": 0.7194, "step": 5151 }, { "epoch": 1.3702127659574468, "grad_norm": 3.9841232299804688, "learning_rate": 8.689323204717663e-06, "loss": 0.8174, "step": 5152 }, { "epoch": 1.3704787234042553, "grad_norm": 4.248937129974365, "learning_rate": 8.688729526722732e-06, "loss": 0.8107, "step": 5153 }, { "epoch": 1.3707446808510637, "grad_norm": 3.6485583782196045, "learning_rate": 8.688135734594165e-06, "loss": 0.8828, "step": 5154 }, { "epoch": 1.3710106382978724, "grad_norm": 4.1670966148376465, "learning_rate": 8.687541828350334e-06, "loss": 0.8604, "step": 5155 }, { "epoch": 1.3712765957446809, "grad_norm": 4.121282577514648, "learning_rate": 8.686947808009621e-06, "loss": 0.8228, "step": 5156 }, { "epoch": 1.3715425531914893, "grad_norm": 3.781928539276123, "learning_rate": 8.6863536735904e-06, "loss": 0.7416, "step": 5157 }, { "epoch": 1.371808510638298, "grad_norm": 3.688425064086914, "learning_rate": 8.685759425111056e-06, "loss": 0.7902, "step": 5158 }, { "epoch": 1.3720744680851062, "grad_norm": 3.922410488128662, "learning_rate": 8.685165062589975e-06, "loss": 0.8117, "step": 5159 }, { "epoch": 1.372340425531915, "grad_norm": 4.217987060546875, "learning_rate": 8.68457058604555e-06, "loss": 0.9173, "step": 5160 }, { "epoch": 1.3726063829787234, "grad_norm": 4.135257244110107, "learning_rate": 8.683975995496173e-06, "loss": 0.7474, "step": 5161 }, { "epoch": 1.3728723404255319, "grad_norm": 3.7882463932037354, "learning_rate": 8.68338129096024e-06, "loss": 0.8153, "step": 5162 }, { "epoch": 1.3731382978723405, "grad_norm": 3.6793859004974365, "learning_rate": 8.682786472456155e-06, "loss": 0.6914, "step": 5163 }, { "epoch": 1.373404255319149, "grad_norm": 4.030581951141357, "learning_rate": 8.682191540002318e-06, "loss": 0.778, "step": 5164 }, { "epoch": 1.3736702127659575, "grad_norm": 3.8380470275878906, "learning_rate": 8.681596493617141e-06, "loss": 0.7522, "step": 5165 }, { "epoch": 1.373936170212766, "grad_norm": 4.138343334197998, "learning_rate": 8.681001333319035e-06, "loss": 0.843, "step": 5166 }, { "epoch": 1.3742021276595744, "grad_norm": 3.723407030105591, "learning_rate": 8.680406059126412e-06, "loss": 0.7799, "step": 5167 }, { "epoch": 1.374468085106383, "grad_norm": 3.8985822200775146, "learning_rate": 8.679810671057695e-06, "loss": 0.7446, "step": 5168 }, { "epoch": 1.3747340425531915, "grad_norm": 4.534223556518555, "learning_rate": 8.679215169131301e-06, "loss": 0.8734, "step": 5169 }, { "epoch": 1.375, "grad_norm": 3.75278639793396, "learning_rate": 8.67861955336566e-06, "loss": 0.8435, "step": 5170 }, { "epoch": 1.3752659574468085, "grad_norm": 4.094736099243164, "learning_rate": 8.678023823779196e-06, "loss": 0.7671, "step": 5171 }, { "epoch": 1.375531914893617, "grad_norm": 3.920642137527466, "learning_rate": 8.677427980390348e-06, "loss": 0.7937, "step": 5172 }, { "epoch": 1.3757978723404256, "grad_norm": 3.5799460411071777, "learning_rate": 8.676832023217545e-06, "loss": 0.8206, "step": 5173 }, { "epoch": 1.376063829787234, "grad_norm": 3.8929152488708496, "learning_rate": 8.676235952279233e-06, "loss": 0.837, "step": 5174 }, { "epoch": 1.3763297872340425, "grad_norm": 3.7762844562530518, "learning_rate": 8.675639767593851e-06, "loss": 0.8191, "step": 5175 }, { "epoch": 1.376595744680851, "grad_norm": 4.34854793548584, "learning_rate": 8.675043469179849e-06, "loss": 0.9724, "step": 5176 }, { "epoch": 1.3768617021276595, "grad_norm": 4.143275260925293, "learning_rate": 8.674447057055673e-06, "loss": 0.7607, "step": 5177 }, { "epoch": 1.3771276595744681, "grad_norm": 3.8602356910705566, "learning_rate": 8.673850531239781e-06, "loss": 0.8241, "step": 5178 }, { "epoch": 1.3773936170212766, "grad_norm": 4.238362789154053, "learning_rate": 8.673253891750626e-06, "loss": 0.75, "step": 5179 }, { "epoch": 1.377659574468085, "grad_norm": 4.423724174499512, "learning_rate": 8.672657138606672e-06, "loss": 0.8929, "step": 5180 }, { "epoch": 1.3779255319148938, "grad_norm": 3.5237340927124023, "learning_rate": 8.672060271826381e-06, "loss": 0.6877, "step": 5181 }, { "epoch": 1.378191489361702, "grad_norm": 3.615936756134033, "learning_rate": 8.671463291428223e-06, "loss": 0.7091, "step": 5182 }, { "epoch": 1.3784574468085107, "grad_norm": 3.587336778640747, "learning_rate": 8.67086619743067e-06, "loss": 0.8266, "step": 5183 }, { "epoch": 1.3787234042553191, "grad_norm": 4.141132831573486, "learning_rate": 8.670268989852192e-06, "loss": 0.7199, "step": 5184 }, { "epoch": 1.3789893617021276, "grad_norm": 4.076261520385742, "learning_rate": 8.669671668711272e-06, "loss": 0.7788, "step": 5185 }, { "epoch": 1.3792553191489363, "grad_norm": 4.020741939544678, "learning_rate": 8.66907423402639e-06, "loss": 0.8652, "step": 5186 }, { "epoch": 1.3795212765957447, "grad_norm": 3.8059983253479004, "learning_rate": 8.668476685816029e-06, "loss": 0.8151, "step": 5187 }, { "epoch": 1.3797872340425532, "grad_norm": 4.055500030517578, "learning_rate": 8.667879024098682e-06, "loss": 0.7985, "step": 5188 }, { "epoch": 1.3800531914893617, "grad_norm": 3.8605387210845947, "learning_rate": 8.66728124889284e-06, "loss": 0.8602, "step": 5189 }, { "epoch": 1.3803191489361701, "grad_norm": 3.781041383743286, "learning_rate": 8.666683360216998e-06, "loss": 0.815, "step": 5190 }, { "epoch": 1.3805851063829788, "grad_norm": 4.160099029541016, "learning_rate": 8.666085358089655e-06, "loss": 0.8366, "step": 5191 }, { "epoch": 1.3808510638297873, "grad_norm": 4.079177379608154, "learning_rate": 8.665487242529316e-06, "loss": 0.9131, "step": 5192 }, { "epoch": 1.3811170212765957, "grad_norm": 4.033502578735352, "learning_rate": 8.664889013554484e-06, "loss": 0.7588, "step": 5193 }, { "epoch": 1.3813829787234042, "grad_norm": 3.969634771347046, "learning_rate": 8.664290671183675e-06, "loss": 0.9422, "step": 5194 }, { "epoch": 1.3816489361702127, "grad_norm": 3.9259159564971924, "learning_rate": 8.663692215435396e-06, "loss": 0.7046, "step": 5195 }, { "epoch": 1.3819148936170214, "grad_norm": 4.086988925933838, "learning_rate": 8.663093646328166e-06, "loss": 0.8629, "step": 5196 }, { "epoch": 1.3821808510638298, "grad_norm": 4.083224773406982, "learning_rate": 8.662494963880508e-06, "loss": 0.8992, "step": 5197 }, { "epoch": 1.3824468085106383, "grad_norm": 4.1260881423950195, "learning_rate": 8.66189616811094e-06, "loss": 0.8958, "step": 5198 }, { "epoch": 1.3827127659574467, "grad_norm": 3.9255919456481934, "learning_rate": 8.661297259037998e-06, "loss": 0.8155, "step": 5199 }, { "epoch": 1.3829787234042552, "grad_norm": 4.030576705932617, "learning_rate": 8.660698236680205e-06, "loss": 0.901, "step": 5200 }, { "epoch": 1.383244680851064, "grad_norm": 4.204456329345703, "learning_rate": 8.660099101056098e-06, "loss": 0.8021, "step": 5201 }, { "epoch": 1.3835106382978724, "grad_norm": 3.743723154067993, "learning_rate": 8.659499852184218e-06, "loss": 0.8411, "step": 5202 }, { "epoch": 1.3837765957446808, "grad_norm": 3.8044793605804443, "learning_rate": 8.658900490083102e-06, "loss": 0.6985, "step": 5203 }, { "epoch": 1.3840425531914895, "grad_norm": 3.762624740600586, "learning_rate": 8.658301014771298e-06, "loss": 0.7873, "step": 5204 }, { "epoch": 1.3843085106382977, "grad_norm": 3.8245599269866943, "learning_rate": 8.657701426267355e-06, "loss": 0.7773, "step": 5205 }, { "epoch": 1.3845744680851064, "grad_norm": 3.875678062438965, "learning_rate": 8.65710172458982e-06, "loss": 0.9493, "step": 5206 }, { "epoch": 1.3848404255319149, "grad_norm": 4.034217834472656, "learning_rate": 8.656501909757255e-06, "loss": 0.8742, "step": 5207 }, { "epoch": 1.3851063829787233, "grad_norm": 3.7253971099853516, "learning_rate": 8.655901981788216e-06, "loss": 0.7408, "step": 5208 }, { "epoch": 1.385372340425532, "grad_norm": 4.211146354675293, "learning_rate": 8.655301940701262e-06, "loss": 0.8107, "step": 5209 }, { "epoch": 1.3856382978723405, "grad_norm": 4.0121378898620605, "learning_rate": 8.654701786514965e-06, "loss": 0.8808, "step": 5210 }, { "epoch": 1.385904255319149, "grad_norm": 4.111256122589111, "learning_rate": 8.654101519247892e-06, "loss": 0.8339, "step": 5211 }, { "epoch": 1.3861702127659574, "grad_norm": 3.683849811553955, "learning_rate": 8.653501138918615e-06, "loss": 0.8046, "step": 5212 }, { "epoch": 1.3864361702127659, "grad_norm": 4.3086957931518555, "learning_rate": 8.652900645545711e-06, "loss": 0.8217, "step": 5213 }, { "epoch": 1.3867021276595746, "grad_norm": 4.064043998718262, "learning_rate": 8.65230003914776e-06, "loss": 0.9811, "step": 5214 }, { "epoch": 1.386968085106383, "grad_norm": 3.8175463676452637, "learning_rate": 8.651699319743348e-06, "loss": 0.879, "step": 5215 }, { "epoch": 1.3872340425531915, "grad_norm": 4.500128269195557, "learning_rate": 8.651098487351057e-06, "loss": 0.6979, "step": 5216 }, { "epoch": 1.3875, "grad_norm": 4.019436836242676, "learning_rate": 8.650497541989483e-06, "loss": 0.8766, "step": 5217 }, { "epoch": 1.3877659574468084, "grad_norm": 3.5277206897735596, "learning_rate": 8.649896483677213e-06, "loss": 0.8292, "step": 5218 }, { "epoch": 1.388031914893617, "grad_norm": 3.918307065963745, "learning_rate": 8.649295312432853e-06, "loss": 0.7684, "step": 5219 }, { "epoch": 1.3882978723404256, "grad_norm": 3.9739909172058105, "learning_rate": 8.648694028274998e-06, "loss": 0.743, "step": 5220 }, { "epoch": 1.388563829787234, "grad_norm": 3.6508398056030273, "learning_rate": 8.648092631222253e-06, "loss": 0.7689, "step": 5221 }, { "epoch": 1.3888297872340425, "grad_norm": 3.846869468688965, "learning_rate": 8.647491121293228e-06, "loss": 0.741, "step": 5222 }, { "epoch": 1.389095744680851, "grad_norm": 3.8481643199920654, "learning_rate": 8.646889498506532e-06, "loss": 0.8665, "step": 5223 }, { "epoch": 1.3893617021276596, "grad_norm": 4.380584239959717, "learning_rate": 8.646287762880783e-06, "loss": 0.8029, "step": 5224 }, { "epoch": 1.389627659574468, "grad_norm": 3.8931496143341064, "learning_rate": 8.645685914434596e-06, "loss": 0.8964, "step": 5225 }, { "epoch": 1.3898936170212766, "grad_norm": 3.976508378982544, "learning_rate": 8.645083953186596e-06, "loss": 0.8707, "step": 5226 }, { "epoch": 1.390159574468085, "grad_norm": 3.606631278991699, "learning_rate": 8.644481879155406e-06, "loss": 0.7476, "step": 5227 }, { "epoch": 1.3904255319148935, "grad_norm": 4.043211936950684, "learning_rate": 8.643879692359655e-06, "loss": 0.7478, "step": 5228 }, { "epoch": 1.3906914893617022, "grad_norm": 3.9135618209838867, "learning_rate": 8.643277392817976e-06, "loss": 0.7469, "step": 5229 }, { "epoch": 1.3909574468085106, "grad_norm": 3.747793674468994, "learning_rate": 8.642674980549008e-06, "loss": 0.8092, "step": 5230 }, { "epoch": 1.391223404255319, "grad_norm": 4.33275032043457, "learning_rate": 8.642072455571383e-06, "loss": 0.7867, "step": 5231 }, { "epoch": 1.3914893617021278, "grad_norm": 4.364730358123779, "learning_rate": 8.641469817903752e-06, "loss": 0.8545, "step": 5232 }, { "epoch": 1.3917553191489362, "grad_norm": 3.848296880722046, "learning_rate": 8.640867067564757e-06, "loss": 0.8735, "step": 5233 }, { "epoch": 1.3920212765957447, "grad_norm": 3.8391952514648438, "learning_rate": 8.640264204573049e-06, "loss": 0.8439, "step": 5234 }, { "epoch": 1.3922872340425532, "grad_norm": 4.061415672302246, "learning_rate": 8.639661228947278e-06, "loss": 0.7702, "step": 5235 }, { "epoch": 1.3925531914893616, "grad_norm": 4.175765037536621, "learning_rate": 8.639058140706105e-06, "loss": 0.8053, "step": 5236 }, { "epoch": 1.3928191489361703, "grad_norm": 3.840773105621338, "learning_rate": 8.638454939868188e-06, "loss": 0.7192, "step": 5237 }, { "epoch": 1.3930851063829788, "grad_norm": 3.76470947265625, "learning_rate": 8.637851626452191e-06, "loss": 0.7634, "step": 5238 }, { "epoch": 1.3933510638297872, "grad_norm": 3.903261184692383, "learning_rate": 8.637248200476783e-06, "loss": 0.7672, "step": 5239 }, { "epoch": 1.3936170212765957, "grad_norm": 4.356569290161133, "learning_rate": 8.636644661960634e-06, "loss": 0.8834, "step": 5240 }, { "epoch": 1.3938829787234042, "grad_norm": 4.116570949554443, "learning_rate": 8.636041010922416e-06, "loss": 0.7715, "step": 5241 }, { "epoch": 1.3941489361702128, "grad_norm": 3.9501302242279053, "learning_rate": 8.635437247380809e-06, "loss": 0.7663, "step": 5242 }, { "epoch": 1.3944148936170213, "grad_norm": 4.226482391357422, "learning_rate": 8.634833371354492e-06, "loss": 0.8156, "step": 5243 }, { "epoch": 1.3946808510638298, "grad_norm": 4.047403335571289, "learning_rate": 8.634229382862152e-06, "loss": 0.8982, "step": 5244 }, { "epoch": 1.3949468085106382, "grad_norm": 4.245815753936768, "learning_rate": 8.633625281922477e-06, "loss": 0.8558, "step": 5245 }, { "epoch": 1.3952127659574467, "grad_norm": 3.9995036125183105, "learning_rate": 8.633021068554155e-06, "loss": 0.8246, "step": 5246 }, { "epoch": 1.3954787234042554, "grad_norm": 4.213914394378662, "learning_rate": 8.632416742775886e-06, "loss": 0.7979, "step": 5247 }, { "epoch": 1.3957446808510638, "grad_norm": 4.043915748596191, "learning_rate": 8.631812304606367e-06, "loss": 0.8903, "step": 5248 }, { "epoch": 1.3960106382978723, "grad_norm": 3.995999336242676, "learning_rate": 8.631207754064299e-06, "loss": 0.7445, "step": 5249 }, { "epoch": 1.3962765957446808, "grad_norm": 3.6424171924591064, "learning_rate": 8.630603091168385e-06, "loss": 0.6922, "step": 5250 }, { "epoch": 1.3965425531914892, "grad_norm": 4.3226118087768555, "learning_rate": 8.62999831593734e-06, "loss": 0.8686, "step": 5251 }, { "epoch": 1.396808510638298, "grad_norm": 3.89966082572937, "learning_rate": 8.629393428389873e-06, "loss": 0.7592, "step": 5252 }, { "epoch": 1.3970744680851064, "grad_norm": 4.409592151641846, "learning_rate": 8.628788428544698e-06, "loss": 0.952, "step": 5253 }, { "epoch": 1.3973404255319148, "grad_norm": 3.884060859680176, "learning_rate": 8.62818331642054e-06, "loss": 0.83, "step": 5254 }, { "epoch": 1.3976063829787235, "grad_norm": 3.480745792388916, "learning_rate": 8.627578092036117e-06, "loss": 0.7324, "step": 5255 }, { "epoch": 1.397872340425532, "grad_norm": 3.862119436264038, "learning_rate": 8.626972755410156e-06, "loss": 0.7555, "step": 5256 }, { "epoch": 1.3981382978723405, "grad_norm": 4.149264335632324, "learning_rate": 8.626367306561387e-06, "loss": 0.7649, "step": 5257 }, { "epoch": 1.398404255319149, "grad_norm": 3.6122639179229736, "learning_rate": 8.625761745508547e-06, "loss": 0.7959, "step": 5258 }, { "epoch": 1.3986702127659574, "grad_norm": 3.611455202102661, "learning_rate": 8.625156072270367e-06, "loss": 0.8546, "step": 5259 }, { "epoch": 1.398936170212766, "grad_norm": 4.0274858474731445, "learning_rate": 8.624550286865592e-06, "loss": 0.818, "step": 5260 }, { "epoch": 1.3992021276595745, "grad_norm": 4.080778121948242, "learning_rate": 8.623944389312962e-06, "loss": 0.8599, "step": 5261 }, { "epoch": 1.399468085106383, "grad_norm": 4.097471237182617, "learning_rate": 8.623338379631227e-06, "loss": 0.8178, "step": 5262 }, { "epoch": 1.3997340425531914, "grad_norm": 3.6200075149536133, "learning_rate": 8.622732257839137e-06, "loss": 0.8381, "step": 5263 }, { "epoch": 1.4, "grad_norm": 4.054747581481934, "learning_rate": 8.622126023955446e-06, "loss": 0.9865, "step": 5264 }, { "epoch": 1.4002659574468086, "grad_norm": 4.653242111206055, "learning_rate": 8.62151967799891e-06, "loss": 0.8813, "step": 5265 }, { "epoch": 1.400531914893617, "grad_norm": 4.182617664337158, "learning_rate": 8.620913219988291e-06, "loss": 0.7061, "step": 5266 }, { "epoch": 1.4007978723404255, "grad_norm": 3.594130277633667, "learning_rate": 8.620306649942356e-06, "loss": 0.7468, "step": 5267 }, { "epoch": 1.401063829787234, "grad_norm": 4.210184574127197, "learning_rate": 8.619699967879868e-06, "loss": 0.9574, "step": 5268 }, { "epoch": 1.4013297872340424, "grad_norm": 4.212064743041992, "learning_rate": 8.619093173819603e-06, "loss": 0.8027, "step": 5269 }, { "epoch": 1.4015957446808511, "grad_norm": 4.000636100769043, "learning_rate": 8.618486267780334e-06, "loss": 0.8482, "step": 5270 }, { "epoch": 1.4018617021276596, "grad_norm": 4.396604537963867, "learning_rate": 8.617879249780841e-06, "loss": 0.8989, "step": 5271 }, { "epoch": 1.402127659574468, "grad_norm": 3.6377105712890625, "learning_rate": 8.617272119839903e-06, "loss": 0.7686, "step": 5272 }, { "epoch": 1.4023936170212765, "grad_norm": 3.8942556381225586, "learning_rate": 8.616664877976308e-06, "loss": 0.8185, "step": 5273 }, { "epoch": 1.402659574468085, "grad_norm": 3.9607818126678467, "learning_rate": 8.616057524208843e-06, "loss": 0.6682, "step": 5274 }, { "epoch": 1.4029255319148937, "grad_norm": 4.523376941680908, "learning_rate": 8.615450058556301e-06, "loss": 0.8093, "step": 5275 }, { "epoch": 1.4031914893617021, "grad_norm": 4.111645221710205, "learning_rate": 8.614842481037476e-06, "loss": 0.8694, "step": 5276 }, { "epoch": 1.4034574468085106, "grad_norm": 3.7978808879852295, "learning_rate": 8.61423479167117e-06, "loss": 0.7477, "step": 5277 }, { "epoch": 1.4037234042553193, "grad_norm": 3.669728994369507, "learning_rate": 8.613626990476186e-06, "loss": 0.7951, "step": 5278 }, { "epoch": 1.4039893617021277, "grad_norm": 4.3240251541137695, "learning_rate": 8.613019077471325e-06, "loss": 0.8721, "step": 5279 }, { "epoch": 1.4042553191489362, "grad_norm": 3.702890157699585, "learning_rate": 8.6124110526754e-06, "loss": 0.6856, "step": 5280 }, { "epoch": 1.4045212765957447, "grad_norm": 4.085876941680908, "learning_rate": 8.611802916107225e-06, "loss": 0.7458, "step": 5281 }, { "epoch": 1.4047872340425531, "grad_norm": 4.095217704772949, "learning_rate": 8.611194667785615e-06, "loss": 0.821, "step": 5282 }, { "epoch": 1.4050531914893618, "grad_norm": 3.8958888053894043, "learning_rate": 8.610586307729393e-06, "loss": 0.7271, "step": 5283 }, { "epoch": 1.4053191489361703, "grad_norm": 3.696851968765259, "learning_rate": 8.609977835957378e-06, "loss": 0.7236, "step": 5284 }, { "epoch": 1.4055851063829787, "grad_norm": 4.185340404510498, "learning_rate": 8.609369252488398e-06, "loss": 0.9089, "step": 5285 }, { "epoch": 1.4058510638297872, "grad_norm": 4.072790622711182, "learning_rate": 8.608760557341284e-06, "loss": 0.761, "step": 5286 }, { "epoch": 1.4061170212765957, "grad_norm": 3.8811473846435547, "learning_rate": 8.60815175053487e-06, "loss": 0.8021, "step": 5287 }, { "epoch": 1.4063829787234043, "grad_norm": 4.050495624542236, "learning_rate": 8.607542832087993e-06, "loss": 0.7736, "step": 5288 }, { "epoch": 1.4066489361702128, "grad_norm": 3.903702735900879, "learning_rate": 8.606933802019493e-06, "loss": 0.8525, "step": 5289 }, { "epoch": 1.4069148936170213, "grad_norm": 3.618151903152466, "learning_rate": 8.606324660348214e-06, "loss": 0.7992, "step": 5290 }, { "epoch": 1.4071808510638297, "grad_norm": 3.910585641860962, "learning_rate": 8.605715407093005e-06, "loss": 0.8235, "step": 5291 }, { "epoch": 1.4074468085106382, "grad_norm": 4.317497253417969, "learning_rate": 8.605106042272715e-06, "loss": 0.8737, "step": 5292 }, { "epoch": 1.4077127659574469, "grad_norm": 4.357272624969482, "learning_rate": 8.6044965659062e-06, "loss": 0.787, "step": 5293 }, { "epoch": 1.4079787234042553, "grad_norm": 4.051640033721924, "learning_rate": 8.603886978012317e-06, "loss": 0.8513, "step": 5294 }, { "epoch": 1.4082446808510638, "grad_norm": 4.226726055145264, "learning_rate": 8.60327727860993e-06, "loss": 0.717, "step": 5295 }, { "epoch": 1.4085106382978723, "grad_norm": 3.7265825271606445, "learning_rate": 8.6026674677179e-06, "loss": 0.7177, "step": 5296 }, { "epoch": 1.4087765957446807, "grad_norm": 3.866156816482544, "learning_rate": 8.602057545355096e-06, "loss": 0.78, "step": 5297 }, { "epoch": 1.4090425531914894, "grad_norm": 3.843125820159912, "learning_rate": 8.601447511540392e-06, "loss": 0.8847, "step": 5298 }, { "epoch": 1.4093085106382979, "grad_norm": 3.813894033432007, "learning_rate": 8.600837366292663e-06, "loss": 0.7, "step": 5299 }, { "epoch": 1.4095744680851063, "grad_norm": 4.289909362792969, "learning_rate": 8.600227109630785e-06, "loss": 0.7832, "step": 5300 }, { "epoch": 1.409840425531915, "grad_norm": 4.330870151519775, "learning_rate": 8.599616741573642e-06, "loss": 0.9482, "step": 5301 }, { "epoch": 1.4101063829787235, "grad_norm": 3.625694990158081, "learning_rate": 8.599006262140117e-06, "loss": 0.6515, "step": 5302 }, { "epoch": 1.410372340425532, "grad_norm": 4.081284999847412, "learning_rate": 8.598395671349104e-06, "loss": 0.9656, "step": 5303 }, { "epoch": 1.4106382978723404, "grad_norm": 4.240716457366943, "learning_rate": 8.59778496921949e-06, "loss": 0.8328, "step": 5304 }, { "epoch": 1.4109042553191489, "grad_norm": 3.9750494956970215, "learning_rate": 8.597174155770174e-06, "loss": 0.7686, "step": 5305 }, { "epoch": 1.4111702127659576, "grad_norm": 3.6305007934570312, "learning_rate": 8.596563231020054e-06, "loss": 0.7059, "step": 5306 }, { "epoch": 1.411436170212766, "grad_norm": 3.9132840633392334, "learning_rate": 8.595952194988034e-06, "loss": 0.8509, "step": 5307 }, { "epoch": 1.4117021276595745, "grad_norm": 4.162221431732178, "learning_rate": 8.59534104769302e-06, "loss": 0.82, "step": 5308 }, { "epoch": 1.411968085106383, "grad_norm": 4.090907096862793, "learning_rate": 8.594729789153919e-06, "loss": 0.9025, "step": 5309 }, { "epoch": 1.4122340425531914, "grad_norm": 4.178388595581055, "learning_rate": 8.594118419389648e-06, "loss": 0.8537, "step": 5310 }, { "epoch": 1.4125, "grad_norm": 3.5532939434051514, "learning_rate": 8.59350693841912e-06, "loss": 0.684, "step": 5311 }, { "epoch": 1.4127659574468086, "grad_norm": 3.9625163078308105, "learning_rate": 8.592895346261258e-06, "loss": 0.7501, "step": 5312 }, { "epoch": 1.413031914893617, "grad_norm": 3.4592795372009277, "learning_rate": 8.592283642934983e-06, "loss": 0.8845, "step": 5313 }, { "epoch": 1.4132978723404255, "grad_norm": 4.265946865081787, "learning_rate": 8.591671828459222e-06, "loss": 0.8354, "step": 5314 }, { "epoch": 1.413563829787234, "grad_norm": 4.301452159881592, "learning_rate": 8.591059902852907e-06, "loss": 0.9654, "step": 5315 }, { "epoch": 1.4138297872340426, "grad_norm": 3.953643560409546, "learning_rate": 8.59044786613497e-06, "loss": 0.8592, "step": 5316 }, { "epoch": 1.414095744680851, "grad_norm": 3.8107998371124268, "learning_rate": 8.589835718324349e-06, "loss": 0.7486, "step": 5317 }, { "epoch": 1.4143617021276595, "grad_norm": 4.148920059204102, "learning_rate": 8.589223459439987e-06, "loss": 0.8111, "step": 5318 }, { "epoch": 1.414627659574468, "grad_norm": 3.7461628913879395, "learning_rate": 8.588611089500821e-06, "loss": 0.7551, "step": 5319 }, { "epoch": 1.4148936170212765, "grad_norm": 4.387768268585205, "learning_rate": 8.587998608525806e-06, "loss": 0.933, "step": 5320 }, { "epoch": 1.4151595744680852, "grad_norm": 3.419297933578491, "learning_rate": 8.587386016533887e-06, "loss": 0.7643, "step": 5321 }, { "epoch": 1.4154255319148936, "grad_norm": 3.7075390815734863, "learning_rate": 8.586773313544023e-06, "loss": 0.7818, "step": 5322 }, { "epoch": 1.415691489361702, "grad_norm": 4.141719341278076, "learning_rate": 8.586160499575168e-06, "loss": 0.912, "step": 5323 }, { "epoch": 1.4159574468085108, "grad_norm": 4.2602386474609375, "learning_rate": 8.585547574646287e-06, "loss": 0.834, "step": 5324 }, { "epoch": 1.4162234042553192, "grad_norm": 4.043152332305908, "learning_rate": 8.584934538776342e-06, "loss": 0.6793, "step": 5325 }, { "epoch": 1.4164893617021277, "grad_norm": 4.062325954437256, "learning_rate": 8.584321391984301e-06, "loss": 0.8172, "step": 5326 }, { "epoch": 1.4167553191489362, "grad_norm": 3.731950044631958, "learning_rate": 8.583708134289138e-06, "loss": 0.6754, "step": 5327 }, { "epoch": 1.4170212765957446, "grad_norm": 4.3393940925598145, "learning_rate": 8.583094765709823e-06, "loss": 0.8304, "step": 5328 }, { "epoch": 1.4172872340425533, "grad_norm": 4.178645610809326, "learning_rate": 8.582481286265341e-06, "loss": 0.9168, "step": 5329 }, { "epoch": 1.4175531914893618, "grad_norm": 3.5687899589538574, "learning_rate": 8.581867695974667e-06, "loss": 0.6632, "step": 5330 }, { "epoch": 1.4178191489361702, "grad_norm": 3.7236688137054443, "learning_rate": 8.58125399485679e-06, "loss": 0.6788, "step": 5331 }, { "epoch": 1.4180851063829787, "grad_norm": 3.8592636585235596, "learning_rate": 8.5806401829307e-06, "loss": 0.8632, "step": 5332 }, { "epoch": 1.4183510638297872, "grad_norm": 3.7756807804107666, "learning_rate": 8.580026260215384e-06, "loss": 0.6994, "step": 5333 }, { "epoch": 1.4186170212765958, "grad_norm": 3.481576919555664, "learning_rate": 8.579412226729843e-06, "loss": 0.8748, "step": 5334 }, { "epoch": 1.4188829787234043, "grad_norm": 3.908369779586792, "learning_rate": 8.578798082493074e-06, "loss": 0.7567, "step": 5335 }, { "epoch": 1.4191489361702128, "grad_norm": 4.084057807922363, "learning_rate": 8.578183827524076e-06, "loss": 0.9174, "step": 5336 }, { "epoch": 1.4194148936170212, "grad_norm": 4.469969749450684, "learning_rate": 8.57756946184186e-06, "loss": 0.9547, "step": 5337 }, { "epoch": 1.4196808510638297, "grad_norm": 3.8578479290008545, "learning_rate": 8.576954985465431e-06, "loss": 0.8135, "step": 5338 }, { "epoch": 1.4199468085106384, "grad_norm": 3.7595484256744385, "learning_rate": 8.576340398413804e-06, "loss": 0.7724, "step": 5339 }, { "epoch": 1.4202127659574468, "grad_norm": 4.005858898162842, "learning_rate": 8.575725700705995e-06, "loss": 0.8386, "step": 5340 }, { "epoch": 1.4204787234042553, "grad_norm": 4.103984355926514, "learning_rate": 8.575110892361022e-06, "loss": 0.9413, "step": 5341 }, { "epoch": 1.4207446808510638, "grad_norm": 3.5380845069885254, "learning_rate": 8.57449597339791e-06, "loss": 0.8393, "step": 5342 }, { "epoch": 1.4210106382978722, "grad_norm": 3.589729070663452, "learning_rate": 8.573880943835684e-06, "loss": 0.7789, "step": 5343 }, { "epoch": 1.421276595744681, "grad_norm": 4.016366004943848, "learning_rate": 8.573265803693374e-06, "loss": 0.7377, "step": 5344 }, { "epoch": 1.4215425531914894, "grad_norm": 3.708329439163208, "learning_rate": 8.572650552990012e-06, "loss": 0.8608, "step": 5345 }, { "epoch": 1.4218085106382978, "grad_norm": 4.192487716674805, "learning_rate": 8.572035191744637e-06, "loss": 0.7963, "step": 5346 }, { "epoch": 1.4220744680851065, "grad_norm": 3.561629056930542, "learning_rate": 8.571419719976287e-06, "loss": 0.8004, "step": 5347 }, { "epoch": 1.422340425531915, "grad_norm": 3.7709176540374756, "learning_rate": 8.570804137704005e-06, "loss": 0.7012, "step": 5348 }, { "epoch": 1.4226063829787234, "grad_norm": 3.842339515686035, "learning_rate": 8.57018844494684e-06, "loss": 0.8063, "step": 5349 }, { "epoch": 1.422872340425532, "grad_norm": 4.014485836029053, "learning_rate": 8.56957264172384e-06, "loss": 0.681, "step": 5350 }, { "epoch": 1.4231382978723404, "grad_norm": 3.9877431392669678, "learning_rate": 8.568956728054061e-06, "loss": 0.9011, "step": 5351 }, { "epoch": 1.423404255319149, "grad_norm": 3.9741530418395996, "learning_rate": 8.568340703956558e-06, "loss": 0.8245, "step": 5352 }, { "epoch": 1.4236702127659575, "grad_norm": 4.008678436279297, "learning_rate": 8.567724569450393e-06, "loss": 0.8588, "step": 5353 }, { "epoch": 1.423936170212766, "grad_norm": 4.2688679695129395, "learning_rate": 8.56710832455463e-06, "loss": 0.8026, "step": 5354 }, { "epoch": 1.4242021276595744, "grad_norm": 4.144524097442627, "learning_rate": 8.566491969288333e-06, "loss": 0.7977, "step": 5355 }, { "epoch": 1.424468085106383, "grad_norm": 4.431448459625244, "learning_rate": 8.565875503670578e-06, "loss": 0.9466, "step": 5356 }, { "epoch": 1.4247340425531916, "grad_norm": 3.9344115257263184, "learning_rate": 8.565258927720436e-06, "loss": 0.7571, "step": 5357 }, { "epoch": 1.425, "grad_norm": 4.618174076080322, "learning_rate": 8.564642241456986e-06, "loss": 0.92, "step": 5358 }, { "epoch": 1.4252659574468085, "grad_norm": 4.515613079071045, "learning_rate": 8.564025444899308e-06, "loss": 0.8339, "step": 5359 }, { "epoch": 1.425531914893617, "grad_norm": 3.8892219066619873, "learning_rate": 8.563408538066486e-06, "loss": 0.6946, "step": 5360 }, { "epoch": 1.4257978723404254, "grad_norm": 3.8335928916931152, "learning_rate": 8.562791520977608e-06, "loss": 0.7894, "step": 5361 }, { "epoch": 1.4260638297872341, "grad_norm": 3.8898446559906006, "learning_rate": 8.562174393651767e-06, "loss": 0.6504, "step": 5362 }, { "epoch": 1.4263297872340426, "grad_norm": 3.916454553604126, "learning_rate": 8.561557156108055e-06, "loss": 0.8178, "step": 5363 }, { "epoch": 1.426595744680851, "grad_norm": 4.594573020935059, "learning_rate": 8.560939808365571e-06, "loss": 0.8554, "step": 5364 }, { "epoch": 1.4268617021276595, "grad_norm": 3.920474052429199, "learning_rate": 8.56032235044342e-06, "loss": 0.9173, "step": 5365 }, { "epoch": 1.427127659574468, "grad_norm": 3.8437423706054688, "learning_rate": 8.5597047823607e-06, "loss": 0.7551, "step": 5366 }, { "epoch": 1.4273936170212767, "grad_norm": 3.631983518600464, "learning_rate": 8.559087104136525e-06, "loss": 0.8889, "step": 5367 }, { "epoch": 1.4276595744680851, "grad_norm": 3.7418458461761475, "learning_rate": 8.558469315790005e-06, "loss": 0.7964, "step": 5368 }, { "epoch": 1.4279255319148936, "grad_norm": 4.14785099029541, "learning_rate": 8.557851417340252e-06, "loss": 0.8312, "step": 5369 }, { "epoch": 1.4281914893617023, "grad_norm": 4.0224103927612305, "learning_rate": 8.55723340880639e-06, "loss": 0.9175, "step": 5370 }, { "epoch": 1.4284574468085105, "grad_norm": 3.899369478225708, "learning_rate": 8.556615290207538e-06, "loss": 0.776, "step": 5371 }, { "epoch": 1.4287234042553192, "grad_norm": 3.869248628616333, "learning_rate": 8.555997061562821e-06, "loss": 0.7417, "step": 5372 }, { "epoch": 1.4289893617021276, "grad_norm": 3.8381667137145996, "learning_rate": 8.555378722891367e-06, "loss": 0.7887, "step": 5373 }, { "epoch": 1.429255319148936, "grad_norm": 4.0374674797058105, "learning_rate": 8.55476027421231e-06, "loss": 0.7039, "step": 5374 }, { "epoch": 1.4295212765957448, "grad_norm": 4.473758220672607, "learning_rate": 8.554141715544788e-06, "loss": 0.8829, "step": 5375 }, { "epoch": 1.4297872340425533, "grad_norm": 3.995429277420044, "learning_rate": 8.553523046907934e-06, "loss": 0.8441, "step": 5376 }, { "epoch": 1.4300531914893617, "grad_norm": 3.942129373550415, "learning_rate": 8.552904268320895e-06, "loss": 0.8657, "step": 5377 }, { "epoch": 1.4303191489361702, "grad_norm": 4.163167953491211, "learning_rate": 8.552285379802811e-06, "loss": 0.7497, "step": 5378 }, { "epoch": 1.4305851063829786, "grad_norm": 3.926020860671997, "learning_rate": 8.551666381372839e-06, "loss": 0.8265, "step": 5379 }, { "epoch": 1.4308510638297873, "grad_norm": 3.686615228652954, "learning_rate": 8.551047273050126e-06, "loss": 0.694, "step": 5380 }, { "epoch": 1.4311170212765958, "grad_norm": 4.436965465545654, "learning_rate": 8.55042805485383e-06, "loss": 0.929, "step": 5381 }, { "epoch": 1.4313829787234043, "grad_norm": 4.103221416473389, "learning_rate": 8.549808726803108e-06, "loss": 0.7724, "step": 5382 }, { "epoch": 1.4316489361702127, "grad_norm": 3.994560718536377, "learning_rate": 8.549189288917127e-06, "loss": 0.6845, "step": 5383 }, { "epoch": 1.4319148936170212, "grad_norm": 4.3197712898254395, "learning_rate": 8.548569741215049e-06, "loss": 0.8348, "step": 5384 }, { "epoch": 1.4321808510638299, "grad_norm": 4.51045560836792, "learning_rate": 8.547950083716047e-06, "loss": 0.8659, "step": 5385 }, { "epoch": 1.4324468085106383, "grad_norm": 4.250168323516846, "learning_rate": 8.54733031643929e-06, "loss": 0.9424, "step": 5386 }, { "epoch": 1.4327127659574468, "grad_norm": 3.6297523975372314, "learning_rate": 8.54671043940396e-06, "loss": 0.8464, "step": 5387 }, { "epoch": 1.4329787234042553, "grad_norm": 3.914750099182129, "learning_rate": 8.54609045262923e-06, "loss": 0.9345, "step": 5388 }, { "epoch": 1.4332446808510637, "grad_norm": 4.086660385131836, "learning_rate": 8.545470356134289e-06, "loss": 0.8161, "step": 5389 }, { "epoch": 1.4335106382978724, "grad_norm": 3.657174825668335, "learning_rate": 8.54485014993832e-06, "loss": 0.8184, "step": 5390 }, { "epoch": 1.4337765957446809, "grad_norm": 4.197863578796387, "learning_rate": 8.544229834060512e-06, "loss": 0.8937, "step": 5391 }, { "epoch": 1.4340425531914893, "grad_norm": 4.215087413787842, "learning_rate": 8.543609408520062e-06, "loss": 0.8149, "step": 5392 }, { "epoch": 1.434308510638298, "grad_norm": 4.2908101081848145, "learning_rate": 8.542988873336164e-06, "loss": 0.7731, "step": 5393 }, { "epoch": 1.4345744680851062, "grad_norm": 3.921720266342163, "learning_rate": 8.54236822852802e-06, "loss": 0.7697, "step": 5394 }, { "epoch": 1.434840425531915, "grad_norm": 4.464201927185059, "learning_rate": 8.54174747411483e-06, "loss": 0.8365, "step": 5395 }, { "epoch": 1.4351063829787234, "grad_norm": 3.9795491695404053, "learning_rate": 8.541126610115806e-06, "loss": 0.8086, "step": 5396 }, { "epoch": 1.4353723404255319, "grad_norm": 4.0533766746521, "learning_rate": 8.540505636550153e-06, "loss": 0.7996, "step": 5397 }, { "epoch": 1.4356382978723405, "grad_norm": 4.261003494262695, "learning_rate": 8.53988455343709e-06, "loss": 0.7748, "step": 5398 }, { "epoch": 1.435904255319149, "grad_norm": 4.159748077392578, "learning_rate": 8.53926336079583e-06, "loss": 0.8867, "step": 5399 }, { "epoch": 1.4361702127659575, "grad_norm": 3.9314358234405518, "learning_rate": 8.538642058645595e-06, "loss": 0.8713, "step": 5400 }, { "epoch": 1.436436170212766, "grad_norm": 3.8043625354766846, "learning_rate": 8.538020647005607e-06, "loss": 0.7276, "step": 5401 }, { "epoch": 1.4367021276595744, "grad_norm": 4.576129913330078, "learning_rate": 8.537399125895096e-06, "loss": 0.7822, "step": 5402 }, { "epoch": 1.436968085106383, "grad_norm": 3.801168918609619, "learning_rate": 8.53677749533329e-06, "loss": 0.8445, "step": 5403 }, { "epoch": 1.4372340425531915, "grad_norm": 3.763317108154297, "learning_rate": 8.536155755339427e-06, "loss": 0.7572, "step": 5404 }, { "epoch": 1.4375, "grad_norm": 4.1881256103515625, "learning_rate": 8.535533905932739e-06, "loss": 0.8398, "step": 5405 }, { "epoch": 1.4377659574468085, "grad_norm": 3.61997127532959, "learning_rate": 8.534911947132469e-06, "loss": 0.674, "step": 5406 }, { "epoch": 1.438031914893617, "grad_norm": 3.6583242416381836, "learning_rate": 8.534289878957863e-06, "loss": 0.6655, "step": 5407 }, { "epoch": 1.4382978723404256, "grad_norm": 3.9012091159820557, "learning_rate": 8.533667701428167e-06, "loss": 0.6869, "step": 5408 }, { "epoch": 1.438563829787234, "grad_norm": 3.890615463256836, "learning_rate": 8.53304541456263e-06, "loss": 0.8431, "step": 5409 }, { "epoch": 1.4388297872340425, "grad_norm": 3.8987715244293213, "learning_rate": 8.532423018380511e-06, "loss": 0.8705, "step": 5410 }, { "epoch": 1.439095744680851, "grad_norm": 4.005768775939941, "learning_rate": 8.531800512901066e-06, "loss": 0.8555, "step": 5411 }, { "epoch": 1.4393617021276595, "grad_norm": 3.9035804271698, "learning_rate": 8.531177898143552e-06, "loss": 0.7811, "step": 5412 }, { "epoch": 1.4396276595744681, "grad_norm": 4.260951995849609, "learning_rate": 8.530555174127236e-06, "loss": 0.9168, "step": 5413 }, { "epoch": 1.4398936170212766, "grad_norm": 4.07423210144043, "learning_rate": 8.529932340871388e-06, "loss": 0.7437, "step": 5414 }, { "epoch": 1.440159574468085, "grad_norm": 3.9797050952911377, "learning_rate": 8.529309398395275e-06, "loss": 0.707, "step": 5415 }, { "epoch": 1.4404255319148938, "grad_norm": 3.7319893836975098, "learning_rate": 8.528686346718177e-06, "loss": 0.7089, "step": 5416 }, { "epoch": 1.440691489361702, "grad_norm": 4.224223613739014, "learning_rate": 8.528063185859367e-06, "loss": 0.786, "step": 5417 }, { "epoch": 1.4409574468085107, "grad_norm": 4.449718952178955, "learning_rate": 8.527439915838129e-06, "loss": 0.8129, "step": 5418 }, { "epoch": 1.4412234042553191, "grad_norm": 3.991421937942505, "learning_rate": 8.526816536673748e-06, "loss": 0.9446, "step": 5419 }, { "epoch": 1.4414893617021276, "grad_norm": 3.5149245262145996, "learning_rate": 8.52619304838551e-06, "loss": 0.738, "step": 5420 }, { "epoch": 1.4417553191489363, "grad_norm": 4.034007549285889, "learning_rate": 8.525569450992707e-06, "loss": 0.8011, "step": 5421 }, { "epoch": 1.4420212765957447, "grad_norm": 4.191031455993652, "learning_rate": 8.524945744514634e-06, "loss": 0.9352, "step": 5422 }, { "epoch": 1.4422872340425532, "grad_norm": 3.4210205078125, "learning_rate": 8.524321928970591e-06, "loss": 0.7345, "step": 5423 }, { "epoch": 1.4425531914893617, "grad_norm": 3.573930263519287, "learning_rate": 8.523698004379878e-06, "loss": 0.6936, "step": 5424 }, { "epoch": 1.4428191489361701, "grad_norm": 3.847769260406494, "learning_rate": 8.523073970761799e-06, "loss": 0.7465, "step": 5425 }, { "epoch": 1.4430851063829788, "grad_norm": 3.526007652282715, "learning_rate": 8.522449828135663e-06, "loss": 0.8042, "step": 5426 }, { "epoch": 1.4433510638297873, "grad_norm": 3.3529438972473145, "learning_rate": 8.521825576520784e-06, "loss": 0.6523, "step": 5427 }, { "epoch": 1.4436170212765957, "grad_norm": 3.608856678009033, "learning_rate": 8.521201215936474e-06, "loss": 0.753, "step": 5428 }, { "epoch": 1.4438829787234042, "grad_norm": 3.78037691116333, "learning_rate": 8.520576746402052e-06, "loss": 0.9188, "step": 5429 }, { "epoch": 1.4441489361702127, "grad_norm": 3.6370112895965576, "learning_rate": 8.519952167936842e-06, "loss": 0.7606, "step": 5430 }, { "epoch": 1.4444148936170214, "grad_norm": 4.091804504394531, "learning_rate": 8.519327480560169e-06, "loss": 0.8833, "step": 5431 }, { "epoch": 1.4446808510638298, "grad_norm": 4.076303482055664, "learning_rate": 8.518702684291358e-06, "loss": 0.7852, "step": 5432 }, { "epoch": 1.4449468085106383, "grad_norm": 3.845811605453491, "learning_rate": 8.518077779149744e-06, "loss": 0.7455, "step": 5433 }, { "epoch": 1.4452127659574467, "grad_norm": 4.302513599395752, "learning_rate": 8.517452765154661e-06, "loss": 0.7273, "step": 5434 }, { "epoch": 1.4454787234042552, "grad_norm": 3.78494930267334, "learning_rate": 8.516827642325447e-06, "loss": 0.7468, "step": 5435 }, { "epoch": 1.445744680851064, "grad_norm": 3.9590561389923096, "learning_rate": 8.516202410681446e-06, "loss": 0.9023, "step": 5436 }, { "epoch": 1.4460106382978724, "grad_norm": 4.2443766593933105, "learning_rate": 8.515577070242005e-06, "loss": 0.9363, "step": 5437 }, { "epoch": 1.4462765957446808, "grad_norm": 3.511875867843628, "learning_rate": 8.514951621026468e-06, "loss": 0.7257, "step": 5438 }, { "epoch": 1.4465425531914895, "grad_norm": 3.931488513946533, "learning_rate": 8.51432606305419e-06, "loss": 0.794, "step": 5439 }, { "epoch": 1.4468085106382977, "grad_norm": 4.520570755004883, "learning_rate": 8.513700396344527e-06, "loss": 0.9367, "step": 5440 }, { "epoch": 1.4470744680851064, "grad_norm": 4.023960113525391, "learning_rate": 8.513074620916835e-06, "loss": 0.8083, "step": 5441 }, { "epoch": 1.4473404255319149, "grad_norm": 3.8863484859466553, "learning_rate": 8.512448736790479e-06, "loss": 0.7789, "step": 5442 }, { "epoch": 1.4476063829787233, "grad_norm": 3.4847662448883057, "learning_rate": 8.511822743984824e-06, "loss": 0.6853, "step": 5443 }, { "epoch": 1.447872340425532, "grad_norm": 3.668828010559082, "learning_rate": 8.511196642519237e-06, "loss": 0.8037, "step": 5444 }, { "epoch": 1.4481382978723405, "grad_norm": 3.801157236099243, "learning_rate": 8.510570432413095e-06, "loss": 0.8393, "step": 5445 }, { "epoch": 1.448404255319149, "grad_norm": 4.479011535644531, "learning_rate": 8.509944113685769e-06, "loss": 0.9082, "step": 5446 }, { "epoch": 1.4486702127659574, "grad_norm": 4.385382652282715, "learning_rate": 8.509317686356638e-06, "loss": 0.9118, "step": 5447 }, { "epoch": 1.4489361702127659, "grad_norm": 4.001799583435059, "learning_rate": 8.50869115044509e-06, "loss": 0.7022, "step": 5448 }, { "epoch": 1.4492021276595746, "grad_norm": 4.2879228591918945, "learning_rate": 8.508064505970503e-06, "loss": 0.8253, "step": 5449 }, { "epoch": 1.449468085106383, "grad_norm": 3.933523654937744, "learning_rate": 8.507437752952271e-06, "loss": 0.8163, "step": 5450 }, { "epoch": 1.4497340425531915, "grad_norm": 4.011867046356201, "learning_rate": 8.506810891409786e-06, "loss": 0.8196, "step": 5451 }, { "epoch": 1.45, "grad_norm": 4.269194602966309, "learning_rate": 8.506183921362443e-06, "loss": 0.7912, "step": 5452 }, { "epoch": 1.4502659574468084, "grad_norm": 4.043778896331787, "learning_rate": 8.505556842829643e-06, "loss": 0.7842, "step": 5453 }, { "epoch": 1.450531914893617, "grad_norm": 4.532417297363281, "learning_rate": 8.504929655830785e-06, "loss": 0.9794, "step": 5454 }, { "epoch": 1.4507978723404256, "grad_norm": 3.571371555328369, "learning_rate": 8.504302360385276e-06, "loss": 0.8234, "step": 5455 }, { "epoch": 1.451063829787234, "grad_norm": 3.6812736988067627, "learning_rate": 8.50367495651253e-06, "loss": 0.8207, "step": 5456 }, { "epoch": 1.4513297872340425, "grad_norm": 3.88917875289917, "learning_rate": 8.503047444231954e-06, "loss": 0.8452, "step": 5457 }, { "epoch": 1.451595744680851, "grad_norm": 3.7152698040008545, "learning_rate": 8.502419823562964e-06, "loss": 0.7018, "step": 5458 }, { "epoch": 1.4518617021276596, "grad_norm": 3.9872684478759766, "learning_rate": 8.501792094524983e-06, "loss": 0.9355, "step": 5459 }, { "epoch": 1.452127659574468, "grad_norm": 3.8965933322906494, "learning_rate": 8.501164257137431e-06, "loss": 0.7547, "step": 5460 }, { "epoch": 1.4523936170212766, "grad_norm": 4.248835563659668, "learning_rate": 8.500536311419735e-06, "loss": 0.8456, "step": 5461 }, { "epoch": 1.452659574468085, "grad_norm": 4.09518575668335, "learning_rate": 8.499908257391324e-06, "loss": 0.8698, "step": 5462 }, { "epoch": 1.4529255319148935, "grad_norm": 4.262086391448975, "learning_rate": 8.49928009507163e-06, "loss": 0.761, "step": 5463 }, { "epoch": 1.4531914893617022, "grad_norm": 3.634997606277466, "learning_rate": 8.49865182448009e-06, "loss": 0.7712, "step": 5464 }, { "epoch": 1.4534574468085106, "grad_norm": 4.407344818115234, "learning_rate": 8.498023445636145e-06, "loss": 0.8103, "step": 5465 }, { "epoch": 1.453723404255319, "grad_norm": 3.926379680633545, "learning_rate": 8.497394958559236e-06, "loss": 0.7233, "step": 5466 }, { "epoch": 1.4539893617021278, "grad_norm": 4.115360736846924, "learning_rate": 8.496766363268809e-06, "loss": 0.9513, "step": 5467 }, { "epoch": 1.4542553191489362, "grad_norm": 4.249356269836426, "learning_rate": 8.496137659784313e-06, "loss": 0.7799, "step": 5468 }, { "epoch": 1.4545212765957447, "grad_norm": 3.9418179988861084, "learning_rate": 8.495508848125202e-06, "loss": 0.7216, "step": 5469 }, { "epoch": 1.4547872340425532, "grad_norm": 4.33933687210083, "learning_rate": 8.494879928310934e-06, "loss": 0.8312, "step": 5470 }, { "epoch": 1.4550531914893616, "grad_norm": 4.497339248657227, "learning_rate": 8.494250900360963e-06, "loss": 0.6842, "step": 5471 }, { "epoch": 1.4553191489361703, "grad_norm": 4.439492225646973, "learning_rate": 8.493621764294757e-06, "loss": 0.8134, "step": 5472 }, { "epoch": 1.4555851063829788, "grad_norm": 4.622555255889893, "learning_rate": 8.49299252013178e-06, "loss": 0.878, "step": 5473 }, { "epoch": 1.4558510638297872, "grad_norm": 4.369466781616211, "learning_rate": 8.492363167891502e-06, "loss": 0.7228, "step": 5474 }, { "epoch": 1.4561170212765957, "grad_norm": 4.223091125488281, "learning_rate": 8.491733707593395e-06, "loss": 0.8303, "step": 5475 }, { "epoch": 1.4563829787234042, "grad_norm": 4.063412189483643, "learning_rate": 8.491104139256936e-06, "loss": 0.8504, "step": 5476 }, { "epoch": 1.4566489361702128, "grad_norm": 4.342689514160156, "learning_rate": 8.490474462901605e-06, "loss": 0.841, "step": 5477 }, { "epoch": 1.4569148936170213, "grad_norm": 4.090299129486084, "learning_rate": 8.489844678546886e-06, "loss": 0.8391, "step": 5478 }, { "epoch": 1.4571808510638298, "grad_norm": 3.786254644393921, "learning_rate": 8.489214786212263e-06, "loss": 0.8498, "step": 5479 }, { "epoch": 1.4574468085106382, "grad_norm": 4.191230297088623, "learning_rate": 8.488584785917226e-06, "loss": 0.7906, "step": 5480 }, { "epoch": 1.4577127659574467, "grad_norm": 3.928368330001831, "learning_rate": 8.487954677681269e-06, "loss": 0.8001, "step": 5481 }, { "epoch": 1.4579787234042554, "grad_norm": 3.579162836074829, "learning_rate": 8.487324461523887e-06, "loss": 0.8023, "step": 5482 }, { "epoch": 1.4582446808510638, "grad_norm": 3.6825640201568604, "learning_rate": 8.486694137464582e-06, "loss": 0.7853, "step": 5483 }, { "epoch": 1.4585106382978723, "grad_norm": 4.125916004180908, "learning_rate": 8.486063705522853e-06, "loss": 0.7216, "step": 5484 }, { "epoch": 1.4587765957446808, "grad_norm": 4.086201190948486, "learning_rate": 8.48543316571821e-06, "loss": 0.7723, "step": 5485 }, { "epoch": 1.4590425531914892, "grad_norm": 3.6054461002349854, "learning_rate": 8.484802518070161e-06, "loss": 0.7561, "step": 5486 }, { "epoch": 1.459308510638298, "grad_norm": 3.9755938053131104, "learning_rate": 8.48417176259822e-06, "loss": 0.7914, "step": 5487 }, { "epoch": 1.4595744680851064, "grad_norm": 3.4087741374969482, "learning_rate": 8.483540899321901e-06, "loss": 0.8288, "step": 5488 }, { "epoch": 1.4598404255319148, "grad_norm": 4.220149517059326, "learning_rate": 8.482909928260726e-06, "loss": 0.9088, "step": 5489 }, { "epoch": 1.4601063829787235, "grad_norm": 4.157181262969971, "learning_rate": 8.482278849434218e-06, "loss": 0.8727, "step": 5490 }, { "epoch": 1.460372340425532, "grad_norm": 4.077250003814697, "learning_rate": 8.481647662861901e-06, "loss": 0.7891, "step": 5491 }, { "epoch": 1.4606382978723405, "grad_norm": 3.9751412868499756, "learning_rate": 8.481016368563308e-06, "loss": 0.8363, "step": 5492 }, { "epoch": 1.460904255319149, "grad_norm": 4.07692813873291, "learning_rate": 8.480384966557969e-06, "loss": 1.0291, "step": 5493 }, { "epoch": 1.4611702127659574, "grad_norm": 3.963118553161621, "learning_rate": 8.479753456865422e-06, "loss": 0.778, "step": 5494 }, { "epoch": 1.461436170212766, "grad_norm": 4.359419822692871, "learning_rate": 8.479121839505205e-06, "loss": 0.8413, "step": 5495 }, { "epoch": 1.4617021276595745, "grad_norm": 4.071464538574219, "learning_rate": 8.478490114496862e-06, "loss": 0.802, "step": 5496 }, { "epoch": 1.461968085106383, "grad_norm": 4.090579509735107, "learning_rate": 8.477858281859941e-06, "loss": 0.8182, "step": 5497 }, { "epoch": 1.4622340425531914, "grad_norm": 4.3386006355285645, "learning_rate": 8.47722634161399e-06, "loss": 0.7349, "step": 5498 }, { "epoch": 1.4625, "grad_norm": 3.489248275756836, "learning_rate": 8.476594293778561e-06, "loss": 0.7918, "step": 5499 }, { "epoch": 1.4627659574468086, "grad_norm": 3.849106788635254, "learning_rate": 8.475962138373212e-06, "loss": 0.7986, "step": 5500 }, { "epoch": 1.4627659574468086, "eval_loss": 1.2964370250701904, "eval_runtime": 13.6602, "eval_samples_per_second": 29.282, "eval_steps_per_second": 3.66, "step": 5500 }, { "epoch": 1.463031914893617, "grad_norm": 3.9225049018859863, "learning_rate": 8.475329875417502e-06, "loss": 0.7197, "step": 5501 }, { "epoch": 1.4632978723404255, "grad_norm": 3.952686071395874, "learning_rate": 8.474697504930994e-06, "loss": 0.8378, "step": 5502 }, { "epoch": 1.463563829787234, "grad_norm": 3.452550172805786, "learning_rate": 8.474065026933254e-06, "loss": 0.8279, "step": 5503 }, { "epoch": 1.4638297872340424, "grad_norm": 3.6807174682617188, "learning_rate": 8.473432441443852e-06, "loss": 0.8527, "step": 5504 }, { "epoch": 1.4640957446808511, "grad_norm": 3.6200850009918213, "learning_rate": 8.472799748482361e-06, "loss": 0.7749, "step": 5505 }, { "epoch": 1.4643617021276596, "grad_norm": 4.591206073760986, "learning_rate": 8.472166948068357e-06, "loss": 0.8827, "step": 5506 }, { "epoch": 1.464627659574468, "grad_norm": 3.7772765159606934, "learning_rate": 8.471534040221419e-06, "loss": 0.8578, "step": 5507 }, { "epoch": 1.4648936170212765, "grad_norm": 3.75657057762146, "learning_rate": 8.47090102496113e-06, "loss": 0.8552, "step": 5508 }, { "epoch": 1.465159574468085, "grad_norm": 3.635420322418213, "learning_rate": 8.470267902307079e-06, "loss": 0.7732, "step": 5509 }, { "epoch": 1.4654255319148937, "grad_norm": 4.403695583343506, "learning_rate": 8.469634672278853e-06, "loss": 0.9379, "step": 5510 }, { "epoch": 1.4656914893617021, "grad_norm": 3.849709987640381, "learning_rate": 8.469001334896044e-06, "loss": 0.7691, "step": 5511 }, { "epoch": 1.4659574468085106, "grad_norm": 3.580702066421509, "learning_rate": 8.46836789017825e-06, "loss": 0.7887, "step": 5512 }, { "epoch": 1.4662234042553193, "grad_norm": 4.184311866760254, "learning_rate": 8.46773433814507e-06, "loss": 0.9119, "step": 5513 }, { "epoch": 1.4664893617021277, "grad_norm": 4.308862686157227, "learning_rate": 8.467100678816108e-06, "loss": 0.8483, "step": 5514 }, { "epoch": 1.4667553191489362, "grad_norm": 3.799316883087158, "learning_rate": 8.466466912210967e-06, "loss": 0.8143, "step": 5515 }, { "epoch": 1.4670212765957447, "grad_norm": 3.673563003540039, "learning_rate": 8.465833038349259e-06, "loss": 0.7485, "step": 5516 }, { "epoch": 1.4672872340425531, "grad_norm": 4.07314395904541, "learning_rate": 8.465199057250597e-06, "loss": 0.8663, "step": 5517 }, { "epoch": 1.4675531914893618, "grad_norm": 3.6095144748687744, "learning_rate": 8.464564968934595e-06, "loss": 0.6752, "step": 5518 }, { "epoch": 1.4678191489361703, "grad_norm": 3.661813735961914, "learning_rate": 8.463930773420874e-06, "loss": 0.8518, "step": 5519 }, { "epoch": 1.4680851063829787, "grad_norm": 4.36665153503418, "learning_rate": 8.463296470729058e-06, "loss": 0.7581, "step": 5520 }, { "epoch": 1.4683510638297872, "grad_norm": 4.145575046539307, "learning_rate": 8.462662060878772e-06, "loss": 0.8582, "step": 5521 }, { "epoch": 1.4686170212765957, "grad_norm": 3.805684804916382, "learning_rate": 8.462027543889644e-06, "loss": 0.718, "step": 5522 }, { "epoch": 1.4688829787234043, "grad_norm": 3.7820284366607666, "learning_rate": 8.461392919781309e-06, "loss": 0.7179, "step": 5523 }, { "epoch": 1.4691489361702128, "grad_norm": 4.097955226898193, "learning_rate": 8.460758188573399e-06, "loss": 0.7764, "step": 5524 }, { "epoch": 1.4694148936170213, "grad_norm": 4.177279472351074, "learning_rate": 8.46012335028556e-06, "loss": 0.8168, "step": 5525 }, { "epoch": 1.4696808510638297, "grad_norm": 4.4050679206848145, "learning_rate": 8.459488404937426e-06, "loss": 0.8876, "step": 5526 }, { "epoch": 1.4699468085106382, "grad_norm": 3.7400434017181396, "learning_rate": 8.458853352548651e-06, "loss": 0.8693, "step": 5527 }, { "epoch": 1.4702127659574469, "grad_norm": 3.909196138381958, "learning_rate": 8.458218193138881e-06, "loss": 0.8237, "step": 5528 }, { "epoch": 1.4704787234042553, "grad_norm": 3.941265344619751, "learning_rate": 8.457582926727768e-06, "loss": 0.9123, "step": 5529 }, { "epoch": 1.4707446808510638, "grad_norm": 3.8149471282958984, "learning_rate": 8.456947553334966e-06, "loss": 0.6899, "step": 5530 }, { "epoch": 1.4710106382978723, "grad_norm": 3.6952855587005615, "learning_rate": 8.45631207298014e-06, "loss": 0.7128, "step": 5531 }, { "epoch": 1.4712765957446807, "grad_norm": 3.9754221439361572, "learning_rate": 8.45567648568295e-06, "loss": 0.9245, "step": 5532 }, { "epoch": 1.4715425531914894, "grad_norm": 4.337751388549805, "learning_rate": 8.455040791463057e-06, "loss": 0.8776, "step": 5533 }, { "epoch": 1.4718085106382979, "grad_norm": 3.7709763050079346, "learning_rate": 8.454404990340137e-06, "loss": 0.6869, "step": 5534 }, { "epoch": 1.4720744680851063, "grad_norm": 4.196871280670166, "learning_rate": 8.453769082333858e-06, "loss": 0.8704, "step": 5535 }, { "epoch": 1.472340425531915, "grad_norm": 3.957577705383301, "learning_rate": 8.453133067463898e-06, "loss": 0.7857, "step": 5536 }, { "epoch": 1.4726063829787235, "grad_norm": 3.942445993423462, "learning_rate": 8.452496945749934e-06, "loss": 0.875, "step": 5537 }, { "epoch": 1.472872340425532, "grad_norm": 4.122093200683594, "learning_rate": 8.451860717211653e-06, "loss": 0.8047, "step": 5538 }, { "epoch": 1.4731382978723404, "grad_norm": 3.8919665813446045, "learning_rate": 8.451224381868735e-06, "loss": 0.9631, "step": 5539 }, { "epoch": 1.4734042553191489, "grad_norm": 4.186689376831055, "learning_rate": 8.45058793974087e-06, "loss": 0.8028, "step": 5540 }, { "epoch": 1.4736702127659576, "grad_norm": 4.130399703979492, "learning_rate": 8.449951390847754e-06, "loss": 0.7659, "step": 5541 }, { "epoch": 1.473936170212766, "grad_norm": 3.8741462230682373, "learning_rate": 8.44931473520908e-06, "loss": 0.74, "step": 5542 }, { "epoch": 1.4742021276595745, "grad_norm": 4.210333824157715, "learning_rate": 8.448677972844546e-06, "loss": 0.7675, "step": 5543 }, { "epoch": 1.474468085106383, "grad_norm": 3.959024429321289, "learning_rate": 8.448041103773857e-06, "loss": 0.8771, "step": 5544 }, { "epoch": 1.4747340425531914, "grad_norm": 3.9098892211914062, "learning_rate": 8.447404128016715e-06, "loss": 0.8756, "step": 5545 }, { "epoch": 1.475, "grad_norm": 3.9612808227539062, "learning_rate": 8.446767045592829e-06, "loss": 0.7888, "step": 5546 }, { "epoch": 1.4752659574468086, "grad_norm": 3.754507303237915, "learning_rate": 8.446129856521917e-06, "loss": 0.8611, "step": 5547 }, { "epoch": 1.475531914893617, "grad_norm": 3.97927188873291, "learning_rate": 8.445492560823686e-06, "loss": 0.7937, "step": 5548 }, { "epoch": 1.4757978723404255, "grad_norm": 3.8864712715148926, "learning_rate": 8.44485515851786e-06, "loss": 0.7687, "step": 5549 }, { "epoch": 1.476063829787234, "grad_norm": 3.407346487045288, "learning_rate": 8.44421764962416e-06, "loss": 0.8368, "step": 5550 }, { "epoch": 1.4763297872340426, "grad_norm": 4.162166118621826, "learning_rate": 8.44358003416231e-06, "loss": 0.7305, "step": 5551 }, { "epoch": 1.476595744680851, "grad_norm": 4.198580741882324, "learning_rate": 8.44294231215204e-06, "loss": 0.9471, "step": 5552 }, { "epoch": 1.4768617021276595, "grad_norm": 3.6172430515289307, "learning_rate": 8.44230448361308e-06, "loss": 0.84, "step": 5553 }, { "epoch": 1.477127659574468, "grad_norm": 3.573073387145996, "learning_rate": 8.441666548565169e-06, "loss": 0.8333, "step": 5554 }, { "epoch": 1.4773936170212765, "grad_norm": 3.864596128463745, "learning_rate": 8.441028507028041e-06, "loss": 0.7169, "step": 5555 }, { "epoch": 1.4776595744680852, "grad_norm": 3.62256121635437, "learning_rate": 8.44039035902144e-06, "loss": 0.8163, "step": 5556 }, { "epoch": 1.4779255319148936, "grad_norm": 3.8395614624023438, "learning_rate": 8.43975210456511e-06, "loss": 0.7796, "step": 5557 }, { "epoch": 1.478191489361702, "grad_norm": 3.980595111846924, "learning_rate": 8.439113743678801e-06, "loss": 0.9652, "step": 5558 }, { "epoch": 1.4784574468085108, "grad_norm": 3.7857303619384766, "learning_rate": 8.438475276382264e-06, "loss": 0.9076, "step": 5559 }, { "epoch": 1.4787234042553192, "grad_norm": 3.4477193355560303, "learning_rate": 8.437836702695253e-06, "loss": 0.727, "step": 5560 }, { "epoch": 1.4789893617021277, "grad_norm": 3.9439425468444824, "learning_rate": 8.437198022637527e-06, "loss": 0.7404, "step": 5561 }, { "epoch": 1.4792553191489362, "grad_norm": 3.8489301204681396, "learning_rate": 8.436559236228849e-06, "loss": 0.7598, "step": 5562 }, { "epoch": 1.4795212765957446, "grad_norm": 3.9537103176116943, "learning_rate": 8.435920343488978e-06, "loss": 0.81, "step": 5563 }, { "epoch": 1.4797872340425533, "grad_norm": 4.361562252044678, "learning_rate": 8.435281344437691e-06, "loss": 0.9021, "step": 5564 }, { "epoch": 1.4800531914893618, "grad_norm": 4.177056789398193, "learning_rate": 8.434642239094752e-06, "loss": 0.7916, "step": 5565 }, { "epoch": 1.4803191489361702, "grad_norm": 4.249316215515137, "learning_rate": 8.43400302747994e-06, "loss": 0.8578, "step": 5566 }, { "epoch": 1.4805851063829787, "grad_norm": 4.1586198806762695, "learning_rate": 8.43336370961303e-06, "loss": 0.7918, "step": 5567 }, { "epoch": 1.4808510638297872, "grad_norm": 3.8984861373901367, "learning_rate": 8.432724285513804e-06, "loss": 0.8302, "step": 5568 }, { "epoch": 1.4811170212765958, "grad_norm": 4.403296947479248, "learning_rate": 8.43208475520205e-06, "loss": 0.9246, "step": 5569 }, { "epoch": 1.4813829787234043, "grad_norm": 4.00664758682251, "learning_rate": 8.43144511869755e-06, "loss": 0.7915, "step": 5570 }, { "epoch": 1.4816489361702128, "grad_norm": 4.43447732925415, "learning_rate": 8.4308053760201e-06, "loss": 0.811, "step": 5571 }, { "epoch": 1.4819148936170212, "grad_norm": 4.107089519500732, "learning_rate": 8.43016552718949e-06, "loss": 0.9385, "step": 5572 }, { "epoch": 1.4821808510638297, "grad_norm": 4.0541229248046875, "learning_rate": 8.429525572225521e-06, "loss": 0.7683, "step": 5573 }, { "epoch": 1.4824468085106384, "grad_norm": 3.8049004077911377, "learning_rate": 8.428885511147994e-06, "loss": 0.8483, "step": 5574 }, { "epoch": 1.4827127659574468, "grad_norm": 4.220947265625, "learning_rate": 8.42824534397671e-06, "loss": 0.8209, "step": 5575 }, { "epoch": 1.4829787234042553, "grad_norm": 3.299015998840332, "learning_rate": 8.427605070731482e-06, "loss": 0.6946, "step": 5576 }, { "epoch": 1.4832446808510638, "grad_norm": 4.028343677520752, "learning_rate": 8.426964691432116e-06, "loss": 0.7912, "step": 5577 }, { "epoch": 1.4835106382978722, "grad_norm": 3.6714823246002197, "learning_rate": 8.426324206098429e-06, "loss": 0.7487, "step": 5578 }, { "epoch": 1.483776595744681, "grad_norm": 3.8498239517211914, "learning_rate": 8.425683614750235e-06, "loss": 0.7929, "step": 5579 }, { "epoch": 1.4840425531914894, "grad_norm": 3.6556410789489746, "learning_rate": 8.425042917407358e-06, "loss": 0.7774, "step": 5580 }, { "epoch": 1.4843085106382978, "grad_norm": 3.908780336380005, "learning_rate": 8.424402114089618e-06, "loss": 0.7533, "step": 5581 }, { "epoch": 1.4845744680851065, "grad_norm": 4.054098129272461, "learning_rate": 8.42376120481685e-06, "loss": 0.8575, "step": 5582 }, { "epoch": 1.484840425531915, "grad_norm": 4.667778968811035, "learning_rate": 8.423120189608876e-06, "loss": 0.8906, "step": 5583 }, { "epoch": 1.4851063829787234, "grad_norm": 3.960300922393799, "learning_rate": 8.422479068485531e-06, "loss": 0.7737, "step": 5584 }, { "epoch": 1.485372340425532, "grad_norm": 4.355529308319092, "learning_rate": 8.421837841466657e-06, "loss": 0.8904, "step": 5585 }, { "epoch": 1.4856382978723404, "grad_norm": 4.450819969177246, "learning_rate": 8.42119650857209e-06, "loss": 0.8558, "step": 5586 }, { "epoch": 1.485904255319149, "grad_norm": 3.8777942657470703, "learning_rate": 8.420555069821679e-06, "loss": 0.8021, "step": 5587 }, { "epoch": 1.4861702127659575, "grad_norm": 3.9618871212005615, "learning_rate": 8.419913525235264e-06, "loss": 0.8717, "step": 5588 }, { "epoch": 1.486436170212766, "grad_norm": 3.7627811431884766, "learning_rate": 8.419271874832697e-06, "loss": 0.7337, "step": 5589 }, { "epoch": 1.4867021276595744, "grad_norm": 3.9509243965148926, "learning_rate": 8.418630118633835e-06, "loss": 0.8209, "step": 5590 }, { "epoch": 1.486968085106383, "grad_norm": 3.8642148971557617, "learning_rate": 8.417988256658532e-06, "loss": 0.7907, "step": 5591 }, { "epoch": 1.4872340425531916, "grad_norm": 3.917509078979492, "learning_rate": 8.417346288926646e-06, "loss": 0.8037, "step": 5592 }, { "epoch": 1.4875, "grad_norm": 3.5143251419067383, "learning_rate": 8.416704215458042e-06, "loss": 0.8127, "step": 5593 }, { "epoch": 1.4877659574468085, "grad_norm": 4.229488372802734, "learning_rate": 8.41606203627259e-06, "loss": 0.8681, "step": 5594 }, { "epoch": 1.488031914893617, "grad_norm": 3.636591911315918, "learning_rate": 8.415419751390155e-06, "loss": 0.8858, "step": 5595 }, { "epoch": 1.4882978723404254, "grad_norm": 3.9129700660705566, "learning_rate": 8.414777360830611e-06, "loss": 0.8607, "step": 5596 }, { "epoch": 1.4885638297872341, "grad_norm": 4.00184965133667, "learning_rate": 8.414134864613837e-06, "loss": 0.7551, "step": 5597 }, { "epoch": 1.4888297872340426, "grad_norm": 3.9038429260253906, "learning_rate": 8.413492262759708e-06, "loss": 0.7195, "step": 5598 }, { "epoch": 1.489095744680851, "grad_norm": 3.802076816558838, "learning_rate": 8.412849555288111e-06, "loss": 0.8092, "step": 5599 }, { "epoch": 1.4893617021276595, "grad_norm": 4.020835876464844, "learning_rate": 8.41220674221893e-06, "loss": 0.8439, "step": 5600 }, { "epoch": 1.489627659574468, "grad_norm": 4.310454845428467, "learning_rate": 8.411563823572057e-06, "loss": 0.959, "step": 5601 }, { "epoch": 1.4898936170212767, "grad_norm": 4.212212085723877, "learning_rate": 8.410920799367382e-06, "loss": 0.784, "step": 5602 }, { "epoch": 1.4901595744680851, "grad_norm": 3.9010252952575684, "learning_rate": 8.4102776696248e-06, "loss": 0.7156, "step": 5603 }, { "epoch": 1.4904255319148936, "grad_norm": 4.061422348022461, "learning_rate": 8.409634434364214e-06, "loss": 0.8524, "step": 5604 }, { "epoch": 1.4906914893617023, "grad_norm": 4.281171798706055, "learning_rate": 8.408991093605524e-06, "loss": 0.8344, "step": 5605 }, { "epoch": 1.4909574468085105, "grad_norm": 4.274752616882324, "learning_rate": 8.408347647368634e-06, "loss": 0.8106, "step": 5606 }, { "epoch": 1.4912234042553192, "grad_norm": 3.9846606254577637, "learning_rate": 8.407704095673454e-06, "loss": 0.7059, "step": 5607 }, { "epoch": 1.4914893617021276, "grad_norm": 4.1280436515808105, "learning_rate": 8.4070604385399e-06, "loss": 0.8267, "step": 5608 }, { "epoch": 1.491755319148936, "grad_norm": 3.7875635623931885, "learning_rate": 8.406416675987884e-06, "loss": 0.8078, "step": 5609 }, { "epoch": 1.4920212765957448, "grad_norm": 4.4207444190979, "learning_rate": 8.405772808037326e-06, "loss": 0.8452, "step": 5610 }, { "epoch": 1.4922872340425533, "grad_norm": 3.9423201084136963, "learning_rate": 8.405128834708147e-06, "loss": 0.7491, "step": 5611 }, { "epoch": 1.4925531914893617, "grad_norm": 3.669431686401367, "learning_rate": 8.404484756020272e-06, "loss": 0.7232, "step": 5612 }, { "epoch": 1.4928191489361702, "grad_norm": 4.371226787567139, "learning_rate": 8.403840571993631e-06, "loss": 0.7899, "step": 5613 }, { "epoch": 1.4930851063829786, "grad_norm": 4.185215950012207, "learning_rate": 8.403196282648156e-06, "loss": 0.9727, "step": 5614 }, { "epoch": 1.4933510638297873, "grad_norm": 3.5517239570617676, "learning_rate": 8.402551888003781e-06, "loss": 0.805, "step": 5615 }, { "epoch": 1.4936170212765958, "grad_norm": 3.4188995361328125, "learning_rate": 8.401907388080443e-06, "loss": 0.7345, "step": 5616 }, { "epoch": 1.4938829787234043, "grad_norm": 3.7187201976776123, "learning_rate": 8.401262782898087e-06, "loss": 0.7147, "step": 5617 }, { "epoch": 1.4941489361702127, "grad_norm": 4.5645976066589355, "learning_rate": 8.400618072476655e-06, "loss": 0.8707, "step": 5618 }, { "epoch": 1.4944148936170212, "grad_norm": 3.7568912506103516, "learning_rate": 8.399973256836097e-06, "loss": 0.8637, "step": 5619 }, { "epoch": 1.4946808510638299, "grad_norm": 4.120610237121582, "learning_rate": 8.399328335996362e-06, "loss": 0.8749, "step": 5620 }, { "epoch": 1.4949468085106383, "grad_norm": 3.780111312866211, "learning_rate": 8.398683309977407e-06, "loss": 0.739, "step": 5621 }, { "epoch": 1.4952127659574468, "grad_norm": 4.050705909729004, "learning_rate": 8.39803817879919e-06, "loss": 0.869, "step": 5622 }, { "epoch": 1.4954787234042553, "grad_norm": 3.941727876663208, "learning_rate": 8.39739294248167e-06, "loss": 0.8147, "step": 5623 }, { "epoch": 1.4957446808510637, "grad_norm": 4.117156505584717, "learning_rate": 8.396747601044812e-06, "loss": 0.843, "step": 5624 }, { "epoch": 1.4960106382978724, "grad_norm": 3.813788890838623, "learning_rate": 8.396102154508584e-06, "loss": 0.7214, "step": 5625 }, { "epoch": 1.4962765957446809, "grad_norm": 4.435267448425293, "learning_rate": 8.395456602892957e-06, "loss": 0.9548, "step": 5626 }, { "epoch": 1.4965425531914893, "grad_norm": 4.178934097290039, "learning_rate": 8.394810946217905e-06, "loss": 0.797, "step": 5627 }, { "epoch": 1.496808510638298, "grad_norm": 4.201347827911377, "learning_rate": 8.394165184503406e-06, "loss": 0.8086, "step": 5628 }, { "epoch": 1.4970744680851062, "grad_norm": 4.090775489807129, "learning_rate": 8.39351931776944e-06, "loss": 0.8206, "step": 5629 }, { "epoch": 1.497340425531915, "grad_norm": 3.81706166267395, "learning_rate": 8.392873346035992e-06, "loss": 0.7876, "step": 5630 }, { "epoch": 1.4976063829787234, "grad_norm": 4.212119102478027, "learning_rate": 8.392227269323046e-06, "loss": 0.8634, "step": 5631 }, { "epoch": 1.4978723404255319, "grad_norm": 4.333573818206787, "learning_rate": 8.391581087650596e-06, "loss": 0.8157, "step": 5632 }, { "epoch": 1.4981382978723405, "grad_norm": 4.08198356628418, "learning_rate": 8.390934801038632e-06, "loss": 0.8804, "step": 5633 }, { "epoch": 1.498404255319149, "grad_norm": 3.6360666751861572, "learning_rate": 8.390288409507156e-06, "loss": 0.6327, "step": 5634 }, { "epoch": 1.4986702127659575, "grad_norm": 4.428205490112305, "learning_rate": 8.389641913076163e-06, "loss": 0.8857, "step": 5635 }, { "epoch": 1.498936170212766, "grad_norm": 4.506261825561523, "learning_rate": 8.388995311765657e-06, "loss": 0.8376, "step": 5636 }, { "epoch": 1.4992021276595744, "grad_norm": 3.7618744373321533, "learning_rate": 8.388348605595649e-06, "loss": 0.8656, "step": 5637 }, { "epoch": 1.499468085106383, "grad_norm": 3.843425750732422, "learning_rate": 8.387701794586145e-06, "loss": 0.7474, "step": 5638 }, { "epoch": 1.4997340425531915, "grad_norm": 3.933223009109497, "learning_rate": 8.387054878757157e-06, "loss": 0.9316, "step": 5639 }, { "epoch": 1.5, "grad_norm": 3.8141305446624756, "learning_rate": 8.386407858128707e-06, "loss": 0.7359, "step": 5640 }, { "epoch": 1.5002659574468085, "grad_norm": 4.184633731842041, "learning_rate": 8.385760732720809e-06, "loss": 0.8206, "step": 5641 }, { "epoch": 1.500531914893617, "grad_norm": 3.9276089668273926, "learning_rate": 8.385113502553487e-06, "loss": 0.8148, "step": 5642 }, { "epoch": 1.5007978723404256, "grad_norm": 4.084725856781006, "learning_rate": 8.384466167646768e-06, "loss": 0.8435, "step": 5643 }, { "epoch": 1.501063829787234, "grad_norm": 4.092894077301025, "learning_rate": 8.383818728020681e-06, "loss": 0.7876, "step": 5644 }, { "epoch": 1.5013297872340425, "grad_norm": 3.6473567485809326, "learning_rate": 8.383171183695258e-06, "loss": 0.7427, "step": 5645 }, { "epoch": 1.5015957446808512, "grad_norm": 4.224092483520508, "learning_rate": 8.382523534690537e-06, "loss": 0.8959, "step": 5646 }, { "epoch": 1.5018617021276595, "grad_norm": 4.414750576019287, "learning_rate": 8.381875781026553e-06, "loss": 0.746, "step": 5647 }, { "epoch": 1.5021276595744681, "grad_norm": 4.199521064758301, "learning_rate": 8.381227922723353e-06, "loss": 0.8083, "step": 5648 }, { "epoch": 1.5023936170212766, "grad_norm": 3.8716115951538086, "learning_rate": 8.380579959800981e-06, "loss": 0.7007, "step": 5649 }, { "epoch": 1.502659574468085, "grad_norm": 4.189701080322266, "learning_rate": 8.379931892279483e-06, "loss": 0.7694, "step": 5650 }, { "epoch": 1.5029255319148938, "grad_norm": 3.577147960662842, "learning_rate": 8.379283720178913e-06, "loss": 0.7776, "step": 5651 }, { "epoch": 1.503191489361702, "grad_norm": 4.009932994842529, "learning_rate": 8.378635443519327e-06, "loss": 0.7633, "step": 5652 }, { "epoch": 1.5034574468085107, "grad_norm": 4.129024505615234, "learning_rate": 8.377987062320782e-06, "loss": 0.7067, "step": 5653 }, { "epoch": 1.5037234042553191, "grad_norm": 3.6017751693725586, "learning_rate": 8.37733857660334e-06, "loss": 0.7983, "step": 5654 }, { "epoch": 1.5039893617021276, "grad_norm": 3.799006223678589, "learning_rate": 8.376689986387066e-06, "loss": 0.8479, "step": 5655 }, { "epoch": 1.5042553191489363, "grad_norm": 4.5062575340271, "learning_rate": 8.376041291692028e-06, "loss": 0.8298, "step": 5656 }, { "epoch": 1.5045212765957445, "grad_norm": 3.729353666305542, "learning_rate": 8.3753924925383e-06, "loss": 0.7688, "step": 5657 }, { "epoch": 1.5047872340425532, "grad_norm": 4.237773418426514, "learning_rate": 8.374743588945951e-06, "loss": 0.9623, "step": 5658 }, { "epoch": 1.5050531914893617, "grad_norm": 3.5734505653381348, "learning_rate": 8.374094580935064e-06, "loss": 0.6333, "step": 5659 }, { "epoch": 1.5053191489361701, "grad_norm": 3.711700677871704, "learning_rate": 8.373445468525719e-06, "loss": 0.8401, "step": 5660 }, { "epoch": 1.5055851063829788, "grad_norm": 3.8051505088806152, "learning_rate": 8.372796251737995e-06, "loss": 0.7845, "step": 5661 }, { "epoch": 1.5058510638297873, "grad_norm": 3.983067750930786, "learning_rate": 8.372146930591988e-06, "loss": 0.8886, "step": 5662 }, { "epoch": 1.5061170212765957, "grad_norm": 3.872107744216919, "learning_rate": 8.371497505107784e-06, "loss": 0.8892, "step": 5663 }, { "epoch": 1.5063829787234042, "grad_norm": 4.311370849609375, "learning_rate": 8.370847975305479e-06, "loss": 0.8369, "step": 5664 }, { "epoch": 1.5066489361702127, "grad_norm": 3.470078706741333, "learning_rate": 8.370198341205167e-06, "loss": 0.7035, "step": 5665 }, { "epoch": 1.5069148936170214, "grad_norm": 3.7826905250549316, "learning_rate": 8.369548602826951e-06, "loss": 0.8478, "step": 5666 }, { "epoch": 1.5071808510638298, "grad_norm": 4.1136603355407715, "learning_rate": 8.368898760190933e-06, "loss": 0.7812, "step": 5667 }, { "epoch": 1.5074468085106383, "grad_norm": 3.856652021408081, "learning_rate": 8.368248813317221e-06, "loss": 0.7926, "step": 5668 }, { "epoch": 1.507712765957447, "grad_norm": 4.0616865158081055, "learning_rate": 8.367598762225929e-06, "loss": 0.7884, "step": 5669 }, { "epoch": 1.5079787234042552, "grad_norm": 4.08623743057251, "learning_rate": 8.366948606937161e-06, "loss": 0.8499, "step": 5670 }, { "epoch": 1.508244680851064, "grad_norm": 4.225100517272949, "learning_rate": 8.366298347471043e-06, "loss": 0.8145, "step": 5671 }, { "epoch": 1.5085106382978724, "grad_norm": 4.046361923217773, "learning_rate": 8.36564798384769e-06, "loss": 0.6879, "step": 5672 }, { "epoch": 1.5087765957446808, "grad_norm": 4.1829833984375, "learning_rate": 8.364997516087224e-06, "loss": 0.7828, "step": 5673 }, { "epoch": 1.5090425531914895, "grad_norm": 3.750427484512329, "learning_rate": 8.364346944209774e-06, "loss": 0.7639, "step": 5674 }, { "epoch": 1.5093085106382977, "grad_norm": 4.194416522979736, "learning_rate": 8.36369626823547e-06, "loss": 0.8308, "step": 5675 }, { "epoch": 1.5095744680851064, "grad_norm": 4.148036003112793, "learning_rate": 8.363045488184443e-06, "loss": 0.7443, "step": 5676 }, { "epoch": 1.5098404255319149, "grad_norm": 3.7398674488067627, "learning_rate": 8.362394604076827e-06, "loss": 0.8633, "step": 5677 }, { "epoch": 1.5101063829787233, "grad_norm": 3.8514955043792725, "learning_rate": 8.361743615932765e-06, "loss": 0.797, "step": 5678 }, { "epoch": 1.510372340425532, "grad_norm": 4.254388809204102, "learning_rate": 8.361092523772396e-06, "loss": 0.8425, "step": 5679 }, { "epoch": 1.5106382978723403, "grad_norm": 4.257145881652832, "learning_rate": 8.360441327615868e-06, "loss": 0.7964, "step": 5680 }, { "epoch": 1.510904255319149, "grad_norm": 3.9065487384796143, "learning_rate": 8.35979002748333e-06, "loss": 0.837, "step": 5681 }, { "epoch": 1.5111702127659574, "grad_norm": 4.575162410736084, "learning_rate": 8.359138623394931e-06, "loss": 0.9485, "step": 5682 }, { "epoch": 1.5114361702127659, "grad_norm": 4.180033206939697, "learning_rate": 8.35848711537083e-06, "loss": 0.8287, "step": 5683 }, { "epoch": 1.5117021276595746, "grad_norm": 4.284930229187012, "learning_rate": 8.357835503431182e-06, "loss": 0.8548, "step": 5684 }, { "epoch": 1.511968085106383, "grad_norm": 3.8655450344085693, "learning_rate": 8.357183787596151e-06, "loss": 0.7792, "step": 5685 }, { "epoch": 1.5122340425531915, "grad_norm": 3.840792655944824, "learning_rate": 8.356531967885899e-06, "loss": 0.7953, "step": 5686 }, { "epoch": 1.5125, "grad_norm": 3.675896406173706, "learning_rate": 8.355880044320599e-06, "loss": 0.7667, "step": 5687 }, { "epoch": 1.5127659574468084, "grad_norm": 3.6345510482788086, "learning_rate": 8.355228016920417e-06, "loss": 0.8588, "step": 5688 }, { "epoch": 1.513031914893617, "grad_norm": 3.8645408153533936, "learning_rate": 8.354575885705532e-06, "loss": 0.862, "step": 5689 }, { "epoch": 1.5132978723404256, "grad_norm": 4.727093696594238, "learning_rate": 8.353923650696119e-06, "loss": 0.8419, "step": 5690 }, { "epoch": 1.513563829787234, "grad_norm": 4.074021816253662, "learning_rate": 8.353271311912357e-06, "loss": 0.7486, "step": 5691 }, { "epoch": 1.5138297872340427, "grad_norm": 3.9446327686309814, "learning_rate": 8.352618869374435e-06, "loss": 0.7721, "step": 5692 }, { "epoch": 1.514095744680851, "grad_norm": 3.839276075363159, "learning_rate": 8.351966323102538e-06, "loss": 0.7744, "step": 5693 }, { "epoch": 1.5143617021276596, "grad_norm": 4.190333366394043, "learning_rate": 8.351313673116856e-06, "loss": 0.8085, "step": 5694 }, { "epoch": 1.514627659574468, "grad_norm": 3.8334741592407227, "learning_rate": 8.350660919437585e-06, "loss": 0.933, "step": 5695 }, { "epoch": 1.5148936170212766, "grad_norm": 3.766174793243408, "learning_rate": 8.350008062084918e-06, "loss": 0.7537, "step": 5696 }, { "epoch": 1.5151595744680852, "grad_norm": 4.281386852264404, "learning_rate": 8.349355101079058e-06, "loss": 0.8714, "step": 5697 }, { "epoch": 1.5154255319148935, "grad_norm": 3.8533146381378174, "learning_rate": 8.348702036440209e-06, "loss": 0.8423, "step": 5698 }, { "epoch": 1.5156914893617022, "grad_norm": 4.271562099456787, "learning_rate": 8.348048868188574e-06, "loss": 0.9832, "step": 5699 }, { "epoch": 1.5159574468085106, "grad_norm": 4.475942611694336, "learning_rate": 8.347395596344365e-06, "loss": 0.9984, "step": 5700 }, { "epoch": 1.516223404255319, "grad_norm": 4.308716773986816, "learning_rate": 8.346742220927798e-06, "loss": 0.8947, "step": 5701 }, { "epoch": 1.5164893617021278, "grad_norm": 4.1707587242126465, "learning_rate": 8.346088741959085e-06, "loss": 0.9077, "step": 5702 }, { "epoch": 1.516755319148936, "grad_norm": 4.016225337982178, "learning_rate": 8.345435159458445e-06, "loss": 0.9186, "step": 5703 }, { "epoch": 1.5170212765957447, "grad_norm": 4.131173133850098, "learning_rate": 8.344781473446106e-06, "loss": 0.708, "step": 5704 }, { "epoch": 1.5172872340425532, "grad_norm": 4.118223667144775, "learning_rate": 8.344127683942289e-06, "loss": 0.815, "step": 5705 }, { "epoch": 1.5175531914893616, "grad_norm": 4.08048677444458, "learning_rate": 8.343473790967223e-06, "loss": 0.7402, "step": 5706 }, { "epoch": 1.5178191489361703, "grad_norm": 4.256683826446533, "learning_rate": 8.342819794541143e-06, "loss": 0.9272, "step": 5707 }, { "epoch": 1.5180851063829788, "grad_norm": 3.6859428882598877, "learning_rate": 8.34216569468428e-06, "loss": 0.8052, "step": 5708 }, { "epoch": 1.5183510638297872, "grad_norm": 4.601988315582275, "learning_rate": 8.341511491416877e-06, "loss": 0.7638, "step": 5709 }, { "epoch": 1.5186170212765957, "grad_norm": 3.8631575107574463, "learning_rate": 8.340857184759178e-06, "loss": 0.8282, "step": 5710 }, { "epoch": 1.5188829787234042, "grad_norm": 4.184502124786377, "learning_rate": 8.34020277473142e-06, "loss": 0.8513, "step": 5711 }, { "epoch": 1.5191489361702128, "grad_norm": 3.9446780681610107, "learning_rate": 8.339548261353856e-06, "loss": 0.6634, "step": 5712 }, { "epoch": 1.5194148936170213, "grad_norm": 3.9360363483428955, "learning_rate": 8.338893644646739e-06, "loss": 0.7769, "step": 5713 }, { "epoch": 1.5196808510638298, "grad_norm": 3.235274314880371, "learning_rate": 8.33823892463032e-06, "loss": 0.7531, "step": 5714 }, { "epoch": 1.5199468085106385, "grad_norm": 3.941875696182251, "learning_rate": 8.337584101324859e-06, "loss": 0.7937, "step": 5715 }, { "epoch": 1.5202127659574467, "grad_norm": 3.7710206508636475, "learning_rate": 8.336929174750616e-06, "loss": 0.8403, "step": 5716 }, { "epoch": 1.5204787234042554, "grad_norm": 4.109030246734619, "learning_rate": 8.336274144927855e-06, "loss": 0.6704, "step": 5717 }, { "epoch": 1.5207446808510638, "grad_norm": 3.7918636798858643, "learning_rate": 8.335619011876846e-06, "loss": 0.7756, "step": 5718 }, { "epoch": 1.5210106382978723, "grad_norm": 3.633254051208496, "learning_rate": 8.334963775617854e-06, "loss": 0.7325, "step": 5719 }, { "epoch": 1.521276595744681, "grad_norm": 3.994147539138794, "learning_rate": 8.334308436171159e-06, "loss": 0.8936, "step": 5720 }, { "epoch": 1.5215425531914892, "grad_norm": 3.5977087020874023, "learning_rate": 8.333652993557035e-06, "loss": 0.8429, "step": 5721 }, { "epoch": 1.521808510638298, "grad_norm": 3.7515316009521484, "learning_rate": 8.332997447795763e-06, "loss": 0.8329, "step": 5722 }, { "epoch": 1.5220744680851064, "grad_norm": 3.969116449356079, "learning_rate": 8.332341798907624e-06, "loss": 0.804, "step": 5723 }, { "epoch": 1.5223404255319148, "grad_norm": 3.915306329727173, "learning_rate": 8.331686046912908e-06, "loss": 0.9369, "step": 5724 }, { "epoch": 1.5226063829787235, "grad_norm": 3.7423787117004395, "learning_rate": 8.331030191831904e-06, "loss": 0.8416, "step": 5725 }, { "epoch": 1.5228723404255318, "grad_norm": 3.554068088531494, "learning_rate": 8.3303742336849e-06, "loss": 0.7121, "step": 5726 }, { "epoch": 1.5231382978723405, "grad_norm": 4.019564628601074, "learning_rate": 8.3297181724922e-06, "loss": 0.7882, "step": 5727 }, { "epoch": 1.523404255319149, "grad_norm": 4.351405143737793, "learning_rate": 8.3290620082741e-06, "loss": 0.8769, "step": 5728 }, { "epoch": 1.5236702127659574, "grad_norm": 3.942936658859253, "learning_rate": 8.328405741050901e-06, "loss": 0.924, "step": 5729 }, { "epoch": 1.523936170212766, "grad_norm": 4.362167835235596, "learning_rate": 8.327749370842909e-06, "loss": 0.8015, "step": 5730 }, { "epoch": 1.5242021276595743, "grad_norm": 3.7932353019714355, "learning_rate": 8.327092897670432e-06, "loss": 0.7993, "step": 5731 }, { "epoch": 1.524468085106383, "grad_norm": 3.8214194774627686, "learning_rate": 8.326436321553785e-06, "loss": 0.7971, "step": 5732 }, { "epoch": 1.5247340425531914, "grad_norm": 4.244415760040283, "learning_rate": 8.325779642513283e-06, "loss": 0.7253, "step": 5733 }, { "epoch": 1.525, "grad_norm": 4.184083938598633, "learning_rate": 8.325122860569241e-06, "loss": 0.7849, "step": 5734 }, { "epoch": 1.5252659574468086, "grad_norm": 4.359492301940918, "learning_rate": 8.324465975741986e-06, "loss": 0.8228, "step": 5735 }, { "epoch": 1.525531914893617, "grad_norm": 3.8751020431518555, "learning_rate": 8.323808988051837e-06, "loss": 0.7288, "step": 5736 }, { "epoch": 1.5257978723404255, "grad_norm": 4.366562843322754, "learning_rate": 8.323151897519126e-06, "loss": 0.8452, "step": 5737 }, { "epoch": 1.5260638297872342, "grad_norm": 4.116846561431885, "learning_rate": 8.322494704164182e-06, "loss": 0.9376, "step": 5738 }, { "epoch": 1.5263297872340424, "grad_norm": 4.062334060668945, "learning_rate": 8.321837408007341e-06, "loss": 0.855, "step": 5739 }, { "epoch": 1.5265957446808511, "grad_norm": 4.4059014320373535, "learning_rate": 8.321180009068937e-06, "loss": 0.8832, "step": 5740 }, { "epoch": 1.5268617021276596, "grad_norm": 4.124050140380859, "learning_rate": 8.320522507369315e-06, "loss": 0.7446, "step": 5741 }, { "epoch": 1.527127659574468, "grad_norm": 3.721942901611328, "learning_rate": 8.319864902928819e-06, "loss": 0.8547, "step": 5742 }, { "epoch": 1.5273936170212767, "grad_norm": 3.816612720489502, "learning_rate": 8.31920719576779e-06, "loss": 0.8478, "step": 5743 }, { "epoch": 1.527659574468085, "grad_norm": 4.217785835266113, "learning_rate": 8.318549385906587e-06, "loss": 0.8573, "step": 5744 }, { "epoch": 1.5279255319148937, "grad_norm": 4.105627536773682, "learning_rate": 8.317891473365558e-06, "loss": 0.8891, "step": 5745 }, { "epoch": 1.5281914893617021, "grad_norm": 4.537158966064453, "learning_rate": 8.317233458165059e-06, "loss": 0.9119, "step": 5746 }, { "epoch": 1.5284574468085106, "grad_norm": 4.287096977233887, "learning_rate": 8.31657534032545e-06, "loss": 0.8465, "step": 5747 }, { "epoch": 1.5287234042553193, "grad_norm": 4.125601291656494, "learning_rate": 8.315917119867098e-06, "loss": 0.7537, "step": 5748 }, { "epoch": 1.5289893617021275, "grad_norm": 4.014163017272949, "learning_rate": 8.315258796810366e-06, "loss": 0.7572, "step": 5749 }, { "epoch": 1.5292553191489362, "grad_norm": 3.912703514099121, "learning_rate": 8.314600371175623e-06, "loss": 0.7825, "step": 5750 }, { "epoch": 1.5295212765957447, "grad_norm": 3.731410264968872, "learning_rate": 8.313941842983243e-06, "loss": 0.9015, "step": 5751 }, { "epoch": 1.5297872340425531, "grad_norm": 4.122485160827637, "learning_rate": 8.313283212253598e-06, "loss": 0.8381, "step": 5752 }, { "epoch": 1.5300531914893618, "grad_norm": 4.2268757820129395, "learning_rate": 8.312624479007072e-06, "loss": 0.788, "step": 5753 }, { "epoch": 1.53031914893617, "grad_norm": 4.129693508148193, "learning_rate": 8.311965643264042e-06, "loss": 0.6951, "step": 5754 }, { "epoch": 1.5305851063829787, "grad_norm": 4.038047790527344, "learning_rate": 8.311306705044898e-06, "loss": 0.834, "step": 5755 }, { "epoch": 1.5308510638297872, "grad_norm": 3.85589599609375, "learning_rate": 8.310647664370026e-06, "loss": 0.8583, "step": 5756 }, { "epoch": 1.5311170212765957, "grad_norm": 3.889176845550537, "learning_rate": 8.309988521259816e-06, "loss": 0.8361, "step": 5757 }, { "epoch": 1.5313829787234043, "grad_norm": 4.0538458824157715, "learning_rate": 8.309329275734664e-06, "loss": 0.6951, "step": 5758 }, { "epoch": 1.5316489361702128, "grad_norm": 4.010767936706543, "learning_rate": 8.30866992781497e-06, "loss": 0.8313, "step": 5759 }, { "epoch": 1.5319148936170213, "grad_norm": 3.897259473800659, "learning_rate": 8.30801047752113e-06, "loss": 0.7736, "step": 5760 }, { "epoch": 1.53218085106383, "grad_norm": 4.07016134262085, "learning_rate": 8.307350924873553e-06, "loss": 0.8231, "step": 5761 }, { "epoch": 1.5324468085106382, "grad_norm": 3.886470317840576, "learning_rate": 8.306691269892646e-06, "loss": 0.8535, "step": 5762 }, { "epoch": 1.5327127659574469, "grad_norm": 3.458498477935791, "learning_rate": 8.306031512598815e-06, "loss": 0.7291, "step": 5763 }, { "epoch": 1.5329787234042553, "grad_norm": 3.6657865047454834, "learning_rate": 8.305371653012479e-06, "loss": 0.8239, "step": 5764 }, { "epoch": 1.5332446808510638, "grad_norm": 4.054435729980469, "learning_rate": 8.304711691154052e-06, "loss": 0.7947, "step": 5765 }, { "epoch": 1.5335106382978725, "grad_norm": 4.395258903503418, "learning_rate": 8.304051627043952e-06, "loss": 0.8615, "step": 5766 }, { "epoch": 1.5337765957446807, "grad_norm": 4.212094306945801, "learning_rate": 8.303391460702607e-06, "loss": 0.7645, "step": 5767 }, { "epoch": 1.5340425531914894, "grad_norm": 4.2090044021606445, "learning_rate": 8.302731192150441e-06, "loss": 0.8463, "step": 5768 }, { "epoch": 1.5343085106382979, "grad_norm": 3.734283685684204, "learning_rate": 8.302070821407882e-06, "loss": 0.7986, "step": 5769 }, { "epoch": 1.5345744680851063, "grad_norm": 4.0931291580200195, "learning_rate": 8.301410348495366e-06, "loss": 0.7541, "step": 5770 }, { "epoch": 1.534840425531915, "grad_norm": 3.604841470718384, "learning_rate": 8.300749773433325e-06, "loss": 0.8511, "step": 5771 }, { "epoch": 1.5351063829787233, "grad_norm": 3.881558895111084, "learning_rate": 8.300089096242201e-06, "loss": 0.7382, "step": 5772 }, { "epoch": 1.535372340425532, "grad_norm": 3.472681760787964, "learning_rate": 8.299428316942435e-06, "loss": 0.7106, "step": 5773 }, { "epoch": 1.5356382978723404, "grad_norm": 3.5763661861419678, "learning_rate": 8.298767435554473e-06, "loss": 0.6924, "step": 5774 }, { "epoch": 1.5359042553191489, "grad_norm": 3.965982437133789, "learning_rate": 8.298106452098761e-06, "loss": 0.8163, "step": 5775 }, { "epoch": 1.5361702127659576, "grad_norm": 3.9243502616882324, "learning_rate": 8.297445366595754e-06, "loss": 0.8372, "step": 5776 }, { "epoch": 1.5364361702127658, "grad_norm": 3.8713953495025635, "learning_rate": 8.296784179065904e-06, "loss": 0.7919, "step": 5777 }, { "epoch": 1.5367021276595745, "grad_norm": 3.7591898441314697, "learning_rate": 8.29612288952967e-06, "loss": 0.8597, "step": 5778 }, { "epoch": 1.536968085106383, "grad_norm": 4.25253438949585, "learning_rate": 8.295461498007513e-06, "loss": 1.0482, "step": 5779 }, { "epoch": 1.5372340425531914, "grad_norm": 3.846035957336426, "learning_rate": 8.294800004519895e-06, "loss": 0.8348, "step": 5780 }, { "epoch": 1.5375, "grad_norm": 3.652987003326416, "learning_rate": 8.29413840908729e-06, "loss": 0.7409, "step": 5781 }, { "epoch": 1.5377659574468086, "grad_norm": 4.131805419921875, "learning_rate": 8.293476711730163e-06, "loss": 0.8703, "step": 5782 }, { "epoch": 1.538031914893617, "grad_norm": 4.142578125, "learning_rate": 8.292814912468988e-06, "loss": 0.881, "step": 5783 }, { "epoch": 1.5382978723404257, "grad_norm": 3.5386013984680176, "learning_rate": 8.292153011324242e-06, "loss": 0.7984, "step": 5784 }, { "epoch": 1.538563829787234, "grad_norm": 4.26931619644165, "learning_rate": 8.291491008316409e-06, "loss": 0.8968, "step": 5785 }, { "epoch": 1.5388297872340426, "grad_norm": 4.214763164520264, "learning_rate": 8.290828903465965e-06, "loss": 0.7912, "step": 5786 }, { "epoch": 1.539095744680851, "grad_norm": 4.008779525756836, "learning_rate": 8.290166696793405e-06, "loss": 0.8708, "step": 5787 }, { "epoch": 1.5393617021276595, "grad_norm": 3.722784996032715, "learning_rate": 8.28950438831921e-06, "loss": 0.8047, "step": 5788 }, { "epoch": 1.5396276595744682, "grad_norm": 3.9850144386291504, "learning_rate": 8.288841978063877e-06, "loss": 0.8583, "step": 5789 }, { "epoch": 1.5398936170212765, "grad_norm": 3.7640953063964844, "learning_rate": 8.288179466047903e-06, "loss": 0.899, "step": 5790 }, { "epoch": 1.5401595744680852, "grad_norm": 3.9535369873046875, "learning_rate": 8.287516852291784e-06, "loss": 0.671, "step": 5791 }, { "epoch": 1.5404255319148936, "grad_norm": 3.784611940383911, "learning_rate": 8.28685413681602e-06, "loss": 0.955, "step": 5792 }, { "epoch": 1.540691489361702, "grad_norm": 4.205324172973633, "learning_rate": 8.286191319641123e-06, "loss": 0.8411, "step": 5793 }, { "epoch": 1.5409574468085108, "grad_norm": 4.253503322601318, "learning_rate": 8.285528400787597e-06, "loss": 0.7707, "step": 5794 }, { "epoch": 1.541223404255319, "grad_norm": 3.7679977416992188, "learning_rate": 8.284865380275953e-06, "loss": 0.9103, "step": 5795 }, { "epoch": 1.5414893617021277, "grad_norm": 4.094081878662109, "learning_rate": 8.284202258126706e-06, "loss": 0.9798, "step": 5796 }, { "epoch": 1.5417553191489362, "grad_norm": 4.189050674438477, "learning_rate": 8.283539034360376e-06, "loss": 0.8641, "step": 5797 }, { "epoch": 1.5420212765957446, "grad_norm": 4.017099857330322, "learning_rate": 8.282875708997482e-06, "loss": 0.8214, "step": 5798 }, { "epoch": 1.5422872340425533, "grad_norm": 3.6189417839050293, "learning_rate": 8.282212282058549e-06, "loss": 0.7486, "step": 5799 }, { "epoch": 1.5425531914893615, "grad_norm": 4.480672359466553, "learning_rate": 8.281548753564101e-06, "loss": 0.9041, "step": 5800 }, { "epoch": 1.5428191489361702, "grad_norm": 4.047300338745117, "learning_rate": 8.280885123534673e-06, "loss": 0.9519, "step": 5801 }, { "epoch": 1.5430851063829787, "grad_norm": 4.379581928253174, "learning_rate": 8.280221391990797e-06, "loss": 0.9203, "step": 5802 }, { "epoch": 1.5433510638297872, "grad_norm": 4.053439140319824, "learning_rate": 8.279557558953009e-06, "loss": 0.7759, "step": 5803 }, { "epoch": 1.5436170212765958, "grad_norm": 3.927568197250366, "learning_rate": 8.278893624441849e-06, "loss": 0.7132, "step": 5804 }, { "epoch": 1.5438829787234043, "grad_norm": 4.322382926940918, "learning_rate": 8.278229588477857e-06, "loss": 0.8272, "step": 5805 }, { "epoch": 1.5441489361702128, "grad_norm": 3.6044352054595947, "learning_rate": 8.277565451081587e-06, "loss": 0.7487, "step": 5806 }, { "epoch": 1.5444148936170212, "grad_norm": 3.7423501014709473, "learning_rate": 8.27690121227358e-06, "loss": 0.7342, "step": 5807 }, { "epoch": 1.5446808510638297, "grad_norm": 3.7679383754730225, "learning_rate": 8.27623687207439e-06, "loss": 0.7897, "step": 5808 }, { "epoch": 1.5449468085106384, "grad_norm": 3.7263903617858887, "learning_rate": 8.275572430504578e-06, "loss": 0.8311, "step": 5809 }, { "epoch": 1.5452127659574468, "grad_norm": 3.551025390625, "learning_rate": 8.274907887584695e-06, "loss": 0.6916, "step": 5810 }, { "epoch": 1.5454787234042553, "grad_norm": 3.8874595165252686, "learning_rate": 8.274243243335307e-06, "loss": 0.8246, "step": 5811 }, { "epoch": 1.545744680851064, "grad_norm": 3.7710976600646973, "learning_rate": 8.27357849777698e-06, "loss": 0.8668, "step": 5812 }, { "epoch": 1.5460106382978722, "grad_norm": 4.312849044799805, "learning_rate": 8.272913650930277e-06, "loss": 0.9206, "step": 5813 }, { "epoch": 1.546276595744681, "grad_norm": 4.059734344482422, "learning_rate": 8.272248702815776e-06, "loss": 0.77, "step": 5814 }, { "epoch": 1.5465425531914894, "grad_norm": 3.781832456588745, "learning_rate": 8.271583653454046e-06, "loss": 0.7643, "step": 5815 }, { "epoch": 1.5468085106382978, "grad_norm": 3.607161045074463, "learning_rate": 8.270918502865663e-06, "loss": 0.7721, "step": 5816 }, { "epoch": 1.5470744680851065, "grad_norm": 3.986572504043579, "learning_rate": 8.270253251071214e-06, "loss": 0.6967, "step": 5817 }, { "epoch": 1.5473404255319148, "grad_norm": 3.9674570560455322, "learning_rate": 8.269587898091277e-06, "loss": 0.7986, "step": 5818 }, { "epoch": 1.5476063829787234, "grad_norm": 3.794405698776245, "learning_rate": 8.268922443946444e-06, "loss": 0.7897, "step": 5819 }, { "epoch": 1.547872340425532, "grad_norm": 3.5226500034332275, "learning_rate": 8.2682568886573e-06, "loss": 0.7474, "step": 5820 }, { "epoch": 1.5481382978723404, "grad_norm": 3.692884922027588, "learning_rate": 8.267591232244439e-06, "loss": 0.9286, "step": 5821 }, { "epoch": 1.548404255319149, "grad_norm": 4.193415641784668, "learning_rate": 8.266925474728459e-06, "loss": 0.7917, "step": 5822 }, { "epoch": 1.5486702127659573, "grad_norm": 3.877485752105713, "learning_rate": 8.266259616129959e-06, "loss": 0.8366, "step": 5823 }, { "epoch": 1.548936170212766, "grad_norm": 3.8126795291900635, "learning_rate": 8.26559365646954e-06, "loss": 0.7591, "step": 5824 }, { "epoch": 1.5492021276595744, "grad_norm": 4.233253479003906, "learning_rate": 8.264927595767808e-06, "loss": 0.8596, "step": 5825 }, { "epoch": 1.549468085106383, "grad_norm": 4.092543601989746, "learning_rate": 8.264261434045374e-06, "loss": 0.7732, "step": 5826 }, { "epoch": 1.5497340425531916, "grad_norm": 4.047788619995117, "learning_rate": 8.263595171322847e-06, "loss": 0.8763, "step": 5827 }, { "epoch": 1.55, "grad_norm": 3.990832805633545, "learning_rate": 8.262928807620843e-06, "loss": 0.8313, "step": 5828 }, { "epoch": 1.5502659574468085, "grad_norm": 3.948673725128174, "learning_rate": 8.262262342959981e-06, "loss": 0.8937, "step": 5829 }, { "epoch": 1.550531914893617, "grad_norm": 4.302928924560547, "learning_rate": 8.261595777360881e-06, "loss": 0.7945, "step": 5830 }, { "epoch": 1.5507978723404254, "grad_norm": 3.8130292892456055, "learning_rate": 8.260929110844166e-06, "loss": 0.7971, "step": 5831 }, { "epoch": 1.5510638297872341, "grad_norm": 3.7944552898406982, "learning_rate": 8.260262343430468e-06, "loss": 0.7268, "step": 5832 }, { "epoch": 1.5513297872340426, "grad_norm": 3.765657424926758, "learning_rate": 8.259595475140412e-06, "loss": 0.7289, "step": 5833 }, { "epoch": 1.551595744680851, "grad_norm": 4.215806484222412, "learning_rate": 8.258928505994635e-06, "loss": 0.8254, "step": 5834 }, { "epoch": 1.5518617021276597, "grad_norm": 3.7282323837280273, "learning_rate": 8.258261436013774e-06, "loss": 0.8426, "step": 5835 }, { "epoch": 1.552127659574468, "grad_norm": 4.05489444732666, "learning_rate": 8.257594265218468e-06, "loss": 0.832, "step": 5836 }, { "epoch": 1.5523936170212767, "grad_norm": 4.3416666984558105, "learning_rate": 8.256926993629358e-06, "loss": 0.844, "step": 5837 }, { "epoch": 1.5526595744680851, "grad_norm": 4.158813953399658, "learning_rate": 8.256259621267095e-06, "loss": 0.7328, "step": 5838 }, { "epoch": 1.5529255319148936, "grad_norm": 4.071340560913086, "learning_rate": 8.255592148152325e-06, "loss": 0.7983, "step": 5839 }, { "epoch": 1.5531914893617023, "grad_norm": 3.988938093185425, "learning_rate": 8.254924574305698e-06, "loss": 0.7863, "step": 5840 }, { "epoch": 1.5534574468085105, "grad_norm": 3.8350539207458496, "learning_rate": 8.254256899747876e-06, "loss": 0.8347, "step": 5841 }, { "epoch": 1.5537234042553192, "grad_norm": 3.7759451866149902, "learning_rate": 8.253589124499513e-06, "loss": 0.7486, "step": 5842 }, { "epoch": 1.5539893617021276, "grad_norm": 4.114711284637451, "learning_rate": 8.252921248581272e-06, "loss": 0.8939, "step": 5843 }, { "epoch": 1.554255319148936, "grad_norm": 4.071899890899658, "learning_rate": 8.252253272013816e-06, "loss": 0.7912, "step": 5844 }, { "epoch": 1.5545212765957448, "grad_norm": 3.5732295513153076, "learning_rate": 8.251585194817816e-06, "loss": 0.7897, "step": 5845 }, { "epoch": 1.554787234042553, "grad_norm": 3.884356737136841, "learning_rate": 8.250917017013943e-06, "loss": 0.8328, "step": 5846 }, { "epoch": 1.5550531914893617, "grad_norm": 4.147099018096924, "learning_rate": 8.250248738622868e-06, "loss": 0.8425, "step": 5847 }, { "epoch": 1.5553191489361702, "grad_norm": 4.285495758056641, "learning_rate": 8.249580359665272e-06, "loss": 0.9088, "step": 5848 }, { "epoch": 1.5555851063829786, "grad_norm": 3.903362512588501, "learning_rate": 8.248911880161832e-06, "loss": 0.8711, "step": 5849 }, { "epoch": 1.5558510638297873, "grad_norm": 3.910297155380249, "learning_rate": 8.248243300133236e-06, "loss": 0.8571, "step": 5850 }, { "epoch": 1.5561170212765958, "grad_norm": 3.7283291816711426, "learning_rate": 8.247574619600165e-06, "loss": 0.8114, "step": 5851 }, { "epoch": 1.5563829787234043, "grad_norm": 4.2508864402771, "learning_rate": 8.246905838583315e-06, "loss": 0.8498, "step": 5852 }, { "epoch": 1.5566489361702127, "grad_norm": 3.5398671627044678, "learning_rate": 8.246236957103374e-06, "loss": 0.7013, "step": 5853 }, { "epoch": 1.5569148936170212, "grad_norm": 3.609945297241211, "learning_rate": 8.245567975181037e-06, "loss": 0.7113, "step": 5854 }, { "epoch": 1.5571808510638299, "grad_norm": 3.550767660140991, "learning_rate": 8.244898892837009e-06, "loss": 0.753, "step": 5855 }, { "epoch": 1.5574468085106383, "grad_norm": 4.197300434112549, "learning_rate": 8.244229710091986e-06, "loss": 0.7006, "step": 5856 }, { "epoch": 1.5577127659574468, "grad_norm": 3.916386842727661, "learning_rate": 8.243560426966678e-06, "loss": 0.7071, "step": 5857 }, { "epoch": 1.5579787234042555, "grad_norm": 4.1130218505859375, "learning_rate": 8.242891043481793e-06, "loss": 0.8622, "step": 5858 }, { "epoch": 1.5582446808510637, "grad_norm": 3.9336955547332764, "learning_rate": 8.242221559658039e-06, "loss": 0.7626, "step": 5859 }, { "epoch": 1.5585106382978724, "grad_norm": 4.237149715423584, "learning_rate": 8.241551975516133e-06, "loss": 0.8566, "step": 5860 }, { "epoch": 1.5587765957446809, "grad_norm": 4.12725305557251, "learning_rate": 8.240882291076794e-06, "loss": 0.7879, "step": 5861 }, { "epoch": 1.5590425531914893, "grad_norm": 4.043492794036865, "learning_rate": 8.240212506360738e-06, "loss": 0.8772, "step": 5862 }, { "epoch": 1.559308510638298, "grad_norm": 3.8735363483428955, "learning_rate": 8.239542621388696e-06, "loss": 0.9265, "step": 5863 }, { "epoch": 1.5595744680851062, "grad_norm": 4.195898056030273, "learning_rate": 8.23887263618139e-06, "loss": 0.9022, "step": 5864 }, { "epoch": 1.559840425531915, "grad_norm": 3.4813778400421143, "learning_rate": 8.23820255075955e-06, "loss": 0.7605, "step": 5865 }, { "epoch": 1.5601063829787234, "grad_norm": 3.5564541816711426, "learning_rate": 8.237532365143909e-06, "loss": 0.7148, "step": 5866 }, { "epoch": 1.5603723404255319, "grad_norm": 4.291294097900391, "learning_rate": 8.236862079355208e-06, "loss": 1.022, "step": 5867 }, { "epoch": 1.5606382978723405, "grad_norm": 3.761632204055786, "learning_rate": 8.236191693414184e-06, "loss": 0.8673, "step": 5868 }, { "epoch": 1.5609042553191488, "grad_norm": 3.8336169719696045, "learning_rate": 8.235521207341577e-06, "loss": 0.7979, "step": 5869 }, { "epoch": 1.5611702127659575, "grad_norm": 3.8964157104492188, "learning_rate": 8.234850621158135e-06, "loss": 0.7466, "step": 5870 }, { "epoch": 1.561436170212766, "grad_norm": 3.8827109336853027, "learning_rate": 8.234179934884605e-06, "loss": 0.953, "step": 5871 }, { "epoch": 1.5617021276595744, "grad_norm": 4.318760395050049, "learning_rate": 8.23350914854174e-06, "loss": 0.8975, "step": 5872 }, { "epoch": 1.561968085106383, "grad_norm": 3.927676200866699, "learning_rate": 8.232838262150298e-06, "loss": 0.8148, "step": 5873 }, { "epoch": 1.5622340425531915, "grad_norm": 4.160933017730713, "learning_rate": 8.23216727573103e-06, "loss": 0.7736, "step": 5874 }, { "epoch": 1.5625, "grad_norm": 4.034573078155518, "learning_rate": 8.231496189304704e-06, "loss": 0.7754, "step": 5875 }, { "epoch": 1.5627659574468085, "grad_norm": 4.033196926116943, "learning_rate": 8.230825002892081e-06, "loss": 0.8588, "step": 5876 }, { "epoch": 1.563031914893617, "grad_norm": 3.949902057647705, "learning_rate": 8.23015371651393e-06, "loss": 0.8279, "step": 5877 }, { "epoch": 1.5632978723404256, "grad_norm": 3.8417794704437256, "learning_rate": 8.229482330191016e-06, "loss": 0.7201, "step": 5878 }, { "epoch": 1.563563829787234, "grad_norm": 3.836516857147217, "learning_rate": 8.22881084394412e-06, "loss": 0.9244, "step": 5879 }, { "epoch": 1.5638297872340425, "grad_norm": 3.882302761077881, "learning_rate": 8.228139257794012e-06, "loss": 0.7944, "step": 5880 }, { "epoch": 1.5640957446808512, "grad_norm": 4.163621425628662, "learning_rate": 8.227467571761478e-06, "loss": 0.7916, "step": 5881 }, { "epoch": 1.5643617021276595, "grad_norm": 3.8937926292419434, "learning_rate": 8.226795785867294e-06, "loss": 0.7165, "step": 5882 }, { "epoch": 1.5646276595744681, "grad_norm": 4.019950866699219, "learning_rate": 8.226123900132252e-06, "loss": 0.8444, "step": 5883 }, { "epoch": 1.5648936170212766, "grad_norm": 3.9146535396575928, "learning_rate": 8.225451914577137e-06, "loss": 0.7472, "step": 5884 }, { "epoch": 1.565159574468085, "grad_norm": 4.430140018463135, "learning_rate": 8.224779829222742e-06, "loss": 0.8139, "step": 5885 }, { "epoch": 1.5654255319148938, "grad_norm": 3.8101890087127686, "learning_rate": 8.224107644089863e-06, "loss": 0.8198, "step": 5886 }, { "epoch": 1.565691489361702, "grad_norm": 3.603240966796875, "learning_rate": 8.223435359199297e-06, "loss": 0.7507, "step": 5887 }, { "epoch": 1.5659574468085107, "grad_norm": 3.993999719619751, "learning_rate": 8.222762974571848e-06, "loss": 0.6875, "step": 5888 }, { "epoch": 1.5662234042553191, "grad_norm": 4.127441883087158, "learning_rate": 8.222090490228316e-06, "loss": 0.7653, "step": 5889 }, { "epoch": 1.5664893617021276, "grad_norm": 4.082408428192139, "learning_rate": 8.22141790618951e-06, "loss": 0.8506, "step": 5890 }, { "epoch": 1.5667553191489363, "grad_norm": 4.1307806968688965, "learning_rate": 8.220745222476243e-06, "loss": 0.7614, "step": 5891 }, { "epoch": 1.5670212765957445, "grad_norm": 3.9022128582000732, "learning_rate": 8.220072439109326e-06, "loss": 0.8563, "step": 5892 }, { "epoch": 1.5672872340425532, "grad_norm": 3.8020009994506836, "learning_rate": 8.219399556109578e-06, "loss": 0.8016, "step": 5893 }, { "epoch": 1.5675531914893617, "grad_norm": 4.383156776428223, "learning_rate": 8.218726573497817e-06, "loss": 0.7956, "step": 5894 }, { "epoch": 1.5678191489361701, "grad_norm": 4.414666175842285, "learning_rate": 8.218053491294864e-06, "loss": 0.8215, "step": 5895 }, { "epoch": 1.5680851063829788, "grad_norm": 4.223287105560303, "learning_rate": 8.21738030952155e-06, "loss": 0.8466, "step": 5896 }, { "epoch": 1.5683510638297873, "grad_norm": 4.012655735015869, "learning_rate": 8.216707028198699e-06, "loss": 0.7384, "step": 5897 }, { "epoch": 1.5686170212765957, "grad_norm": 4.301409721374512, "learning_rate": 8.216033647347145e-06, "loss": 0.7748, "step": 5898 }, { "epoch": 1.5688829787234042, "grad_norm": 4.148224353790283, "learning_rate": 8.215360166987728e-06, "loss": 0.8227, "step": 5899 }, { "epoch": 1.5691489361702127, "grad_norm": 4.055191993713379, "learning_rate": 8.214686587141277e-06, "loss": 0.7811, "step": 5900 }, { "epoch": 1.5694148936170214, "grad_norm": 3.9274792671203613, "learning_rate": 8.21401290782864e-06, "loss": 0.7934, "step": 5901 }, { "epoch": 1.5696808510638298, "grad_norm": 3.762334108352661, "learning_rate": 8.213339129070658e-06, "loss": 0.7967, "step": 5902 }, { "epoch": 1.5699468085106383, "grad_norm": 4.094070911407471, "learning_rate": 8.212665250888184e-06, "loss": 0.8637, "step": 5903 }, { "epoch": 1.570212765957447, "grad_norm": 3.871859550476074, "learning_rate": 8.21199127330206e-06, "loss": 0.7181, "step": 5904 }, { "epoch": 1.5704787234042552, "grad_norm": 4.029532432556152, "learning_rate": 8.211317196333149e-06, "loss": 0.756, "step": 5905 }, { "epoch": 1.570744680851064, "grad_norm": 3.982078790664673, "learning_rate": 8.2106430200023e-06, "loss": 0.7437, "step": 5906 }, { "epoch": 1.5710106382978724, "grad_norm": 4.319076061248779, "learning_rate": 8.209968744330375e-06, "loss": 0.8517, "step": 5907 }, { "epoch": 1.5712765957446808, "grad_norm": 3.5704493522644043, "learning_rate": 8.20929436933824e-06, "loss": 0.7369, "step": 5908 }, { "epoch": 1.5715425531914895, "grad_norm": 3.825941562652588, "learning_rate": 8.208619895046759e-06, "loss": 0.7644, "step": 5909 }, { "epoch": 1.5718085106382977, "grad_norm": 3.535365581512451, "learning_rate": 8.2079453214768e-06, "loss": 0.8191, "step": 5910 }, { "epoch": 1.5720744680851064, "grad_norm": 4.012056827545166, "learning_rate": 8.207270648649235e-06, "loss": 0.805, "step": 5911 }, { "epoch": 1.5723404255319149, "grad_norm": 3.670342206954956, "learning_rate": 8.20659587658494e-06, "loss": 0.7253, "step": 5912 }, { "epoch": 1.5726063829787233, "grad_norm": 3.5404562950134277, "learning_rate": 8.205921005304796e-06, "loss": 0.7078, "step": 5913 }, { "epoch": 1.572872340425532, "grad_norm": 4.304678916931152, "learning_rate": 8.20524603482968e-06, "loss": 0.8129, "step": 5914 }, { "epoch": 1.5731382978723403, "grad_norm": 3.6795125007629395, "learning_rate": 8.204570965180476e-06, "loss": 0.7669, "step": 5915 }, { "epoch": 1.573404255319149, "grad_norm": 3.8298754692077637, "learning_rate": 8.203895796378076e-06, "loss": 0.7803, "step": 5916 }, { "epoch": 1.5736702127659574, "grad_norm": 4.399144649505615, "learning_rate": 8.203220528443367e-06, "loss": 0.9503, "step": 5917 }, { "epoch": 1.5739361702127659, "grad_norm": 4.104849815368652, "learning_rate": 8.202545161397242e-06, "loss": 0.8586, "step": 5918 }, { "epoch": 1.5742021276595746, "grad_norm": 4.923317909240723, "learning_rate": 8.201869695260603e-06, "loss": 0.815, "step": 5919 }, { "epoch": 1.574468085106383, "grad_norm": 3.845151424407959, "learning_rate": 8.201194130054342e-06, "loss": 0.8449, "step": 5920 }, { "epoch": 1.5747340425531915, "grad_norm": 4.074094295501709, "learning_rate": 8.200518465799367e-06, "loss": 0.7569, "step": 5921 }, { "epoch": 1.575, "grad_norm": 4.062026023864746, "learning_rate": 8.199842702516584e-06, "loss": 0.8712, "step": 5922 }, { "epoch": 1.5752659574468084, "grad_norm": 4.046767711639404, "learning_rate": 8.199166840226898e-06, "loss": 0.8318, "step": 5923 }, { "epoch": 1.575531914893617, "grad_norm": 3.813408851623535, "learning_rate": 8.198490878951224e-06, "loss": 0.7493, "step": 5924 }, { "epoch": 1.5757978723404256, "grad_norm": 4.108468055725098, "learning_rate": 8.19781481871048e-06, "loss": 0.7867, "step": 5925 }, { "epoch": 1.576063829787234, "grad_norm": 3.9004015922546387, "learning_rate": 8.197138659525576e-06, "loss": 0.7384, "step": 5926 }, { "epoch": 1.5763297872340427, "grad_norm": 4.14080286026001, "learning_rate": 8.19646240141744e-06, "loss": 0.7755, "step": 5927 }, { "epoch": 1.576595744680851, "grad_norm": 3.8850128650665283, "learning_rate": 8.195786044406992e-06, "loss": 0.7689, "step": 5928 }, { "epoch": 1.5768617021276596, "grad_norm": 3.973543882369995, "learning_rate": 8.195109588515163e-06, "loss": 0.7336, "step": 5929 }, { "epoch": 1.577127659574468, "grad_norm": 3.7367260456085205, "learning_rate": 8.194433033762882e-06, "loss": 0.8511, "step": 5930 }, { "epoch": 1.5773936170212766, "grad_norm": 3.7051467895507812, "learning_rate": 8.193756380171081e-06, "loss": 0.7696, "step": 5931 }, { "epoch": 1.5776595744680852, "grad_norm": 3.612755298614502, "learning_rate": 8.193079627760697e-06, "loss": 0.7733, "step": 5932 }, { "epoch": 1.5779255319148935, "grad_norm": 4.524839401245117, "learning_rate": 8.19240277655267e-06, "loss": 0.8047, "step": 5933 }, { "epoch": 1.5781914893617022, "grad_norm": 4.2709059715271, "learning_rate": 8.191725826567943e-06, "loss": 0.9173, "step": 5934 }, { "epoch": 1.5784574468085106, "grad_norm": 4.062780857086182, "learning_rate": 8.191048777827462e-06, "loss": 0.755, "step": 5935 }, { "epoch": 1.578723404255319, "grad_norm": 4.253462314605713, "learning_rate": 8.190371630352174e-06, "loss": 0.9102, "step": 5936 }, { "epoch": 1.5789893617021278, "grad_norm": 3.578122854232788, "learning_rate": 8.189694384163032e-06, "loss": 0.6755, "step": 5937 }, { "epoch": 1.579255319148936, "grad_norm": 3.9935173988342285, "learning_rate": 8.189017039280989e-06, "loss": 0.8196, "step": 5938 }, { "epoch": 1.5795212765957447, "grad_norm": 3.9614062309265137, "learning_rate": 8.188339595727004e-06, "loss": 0.7896, "step": 5939 }, { "epoch": 1.5797872340425532, "grad_norm": 3.7698519229888916, "learning_rate": 8.187662053522039e-06, "loss": 0.785, "step": 5940 }, { "epoch": 1.5800531914893616, "grad_norm": 4.328986167907715, "learning_rate": 8.186984412687058e-06, "loss": 0.87, "step": 5941 }, { "epoch": 1.5803191489361703, "grad_norm": 4.169852256774902, "learning_rate": 8.186306673243025e-06, "loss": 0.8594, "step": 5942 }, { "epoch": 1.5805851063829788, "grad_norm": 4.010345458984375, "learning_rate": 8.185628835210915e-06, "loss": 0.913, "step": 5943 }, { "epoch": 1.5808510638297872, "grad_norm": 3.9177587032318115, "learning_rate": 8.184950898611696e-06, "loss": 0.9157, "step": 5944 }, { "epoch": 1.5811170212765957, "grad_norm": 4.508220672607422, "learning_rate": 8.184272863466348e-06, "loss": 0.8951, "step": 5945 }, { "epoch": 1.5813829787234042, "grad_norm": 3.5971477031707764, "learning_rate": 8.183594729795848e-06, "loss": 0.7883, "step": 5946 }, { "epoch": 1.5816489361702128, "grad_norm": 4.1539998054504395, "learning_rate": 8.182916497621177e-06, "loss": 0.8599, "step": 5947 }, { "epoch": 1.5819148936170213, "grad_norm": 3.9577205181121826, "learning_rate": 8.182238166963325e-06, "loss": 0.8107, "step": 5948 }, { "epoch": 1.5821808510638298, "grad_norm": 3.921849250793457, "learning_rate": 8.181559737843274e-06, "loss": 0.8452, "step": 5949 }, { "epoch": 1.5824468085106385, "grad_norm": 3.6595895290374756, "learning_rate": 8.18088121028202e-06, "loss": 0.8332, "step": 5950 }, { "epoch": 1.5827127659574467, "grad_norm": 4.248002052307129, "learning_rate": 8.18020258430056e-06, "loss": 0.928, "step": 5951 }, { "epoch": 1.5829787234042554, "grad_norm": 3.584662437438965, "learning_rate": 8.179523859919884e-06, "loss": 0.7684, "step": 5952 }, { "epoch": 1.5832446808510638, "grad_norm": 3.5269956588745117, "learning_rate": 8.178845037160997e-06, "loss": 0.7553, "step": 5953 }, { "epoch": 1.5835106382978723, "grad_norm": 4.2691731452941895, "learning_rate": 8.178166116044904e-06, "loss": 0.8211, "step": 5954 }, { "epoch": 1.583776595744681, "grad_norm": 4.050920009613037, "learning_rate": 8.177487096592607e-06, "loss": 0.9221, "step": 5955 }, { "epoch": 1.5840425531914892, "grad_norm": 4.290426731109619, "learning_rate": 8.17680797882512e-06, "loss": 0.7909, "step": 5956 }, { "epoch": 1.584308510638298, "grad_norm": 3.8692431449890137, "learning_rate": 8.176128762763451e-06, "loss": 0.7887, "step": 5957 }, { "epoch": 1.5845744680851064, "grad_norm": 4.173573017120361, "learning_rate": 8.175449448428621e-06, "loss": 0.7535, "step": 5958 }, { "epoch": 1.5848404255319148, "grad_norm": 4.186033248901367, "learning_rate": 8.174770035841647e-06, "loss": 0.8673, "step": 5959 }, { "epoch": 1.5851063829787235, "grad_norm": 4.015555381774902, "learning_rate": 8.17409052502355e-06, "loss": 0.8815, "step": 5960 }, { "epoch": 1.5853723404255318, "grad_norm": 3.864473342895508, "learning_rate": 8.173410915995354e-06, "loss": 0.8684, "step": 5961 }, { "epoch": 1.5856382978723405, "grad_norm": 3.6198973655700684, "learning_rate": 8.172731208778089e-06, "loss": 0.7445, "step": 5962 }, { "epoch": 1.585904255319149, "grad_norm": 3.7900218963623047, "learning_rate": 8.172051403392784e-06, "loss": 0.7331, "step": 5963 }, { "epoch": 1.5861702127659574, "grad_norm": 4.163589954376221, "learning_rate": 8.171371499860475e-06, "loss": 0.8528, "step": 5964 }, { "epoch": 1.586436170212766, "grad_norm": 4.275415420532227, "learning_rate": 8.170691498202196e-06, "loss": 0.8435, "step": 5965 }, { "epoch": 1.5867021276595743, "grad_norm": 3.969174861907959, "learning_rate": 8.170011398438992e-06, "loss": 0.8812, "step": 5966 }, { "epoch": 1.586968085106383, "grad_norm": 4.086930751800537, "learning_rate": 8.169331200591901e-06, "loss": 0.8988, "step": 5967 }, { "epoch": 1.5872340425531914, "grad_norm": 4.444678783416748, "learning_rate": 8.168650904681973e-06, "loss": 0.9295, "step": 5968 }, { "epoch": 1.5875, "grad_norm": 3.7711548805236816, "learning_rate": 8.167970510730254e-06, "loss": 0.7715, "step": 5969 }, { "epoch": 1.5877659574468086, "grad_norm": 3.800588369369507, "learning_rate": 8.167290018757797e-06, "loss": 0.8273, "step": 5970 }, { "epoch": 1.588031914893617, "grad_norm": 4.506065845489502, "learning_rate": 8.16660942878566e-06, "loss": 0.7786, "step": 5971 }, { "epoch": 1.5882978723404255, "grad_norm": 3.8182950019836426, "learning_rate": 8.165928740834896e-06, "loss": 0.6682, "step": 5972 }, { "epoch": 1.5885638297872342, "grad_norm": 4.040492534637451, "learning_rate": 8.165247954926572e-06, "loss": 0.7333, "step": 5973 }, { "epoch": 1.5888297872340424, "grad_norm": 4.233337879180908, "learning_rate": 8.164567071081747e-06, "loss": 0.7931, "step": 5974 }, { "epoch": 1.5890957446808511, "grad_norm": 4.0191969871521, "learning_rate": 8.163886089321493e-06, "loss": 0.8279, "step": 5975 }, { "epoch": 1.5893617021276596, "grad_norm": 3.9428741931915283, "learning_rate": 8.163205009666879e-06, "loss": 0.7945, "step": 5976 }, { "epoch": 1.589627659574468, "grad_norm": 4.383618354797363, "learning_rate": 8.162523832138977e-06, "loss": 0.8961, "step": 5977 }, { "epoch": 1.5898936170212767, "grad_norm": 4.313653945922852, "learning_rate": 8.161842556758863e-06, "loss": 0.927, "step": 5978 }, { "epoch": 1.590159574468085, "grad_norm": 4.137526988983154, "learning_rate": 8.161161183547619e-06, "loss": 0.833, "step": 5979 }, { "epoch": 1.5904255319148937, "grad_norm": 3.9024994373321533, "learning_rate": 8.160479712526326e-06, "loss": 0.8324, "step": 5980 }, { "epoch": 1.5906914893617021, "grad_norm": 3.745685577392578, "learning_rate": 8.159798143716069e-06, "loss": 0.7946, "step": 5981 }, { "epoch": 1.5909574468085106, "grad_norm": 4.142686367034912, "learning_rate": 8.159116477137938e-06, "loss": 0.8469, "step": 5982 }, { "epoch": 1.5912234042553193, "grad_norm": 4.332526683807373, "learning_rate": 8.158434712813024e-06, "loss": 0.8398, "step": 5983 }, { "epoch": 1.5914893617021275, "grad_norm": 4.1822028160095215, "learning_rate": 8.157752850762422e-06, "loss": 0.8182, "step": 5984 }, { "epoch": 1.5917553191489362, "grad_norm": 3.797029972076416, "learning_rate": 8.157070891007227e-06, "loss": 0.8219, "step": 5985 }, { "epoch": 1.5920212765957447, "grad_norm": 3.6281862258911133, "learning_rate": 8.156388833568543e-06, "loss": 0.7788, "step": 5986 }, { "epoch": 1.5922872340425531, "grad_norm": 3.963622570037842, "learning_rate": 8.155706678467472e-06, "loss": 0.8121, "step": 5987 }, { "epoch": 1.5925531914893618, "grad_norm": 3.965254068374634, "learning_rate": 8.15502442572512e-06, "loss": 0.9758, "step": 5988 }, { "epoch": 1.59281914893617, "grad_norm": 3.7290945053100586, "learning_rate": 8.1543420753626e-06, "loss": 0.7913, "step": 5989 }, { "epoch": 1.5930851063829787, "grad_norm": 3.5423686504364014, "learning_rate": 8.15365962740102e-06, "loss": 0.6702, "step": 5990 }, { "epoch": 1.5933510638297872, "grad_norm": 4.0960540771484375, "learning_rate": 8.1529770818615e-06, "loss": 0.976, "step": 5991 }, { "epoch": 1.5936170212765957, "grad_norm": 3.9374215602874756, "learning_rate": 8.152294438765157e-06, "loss": 0.7726, "step": 5992 }, { "epoch": 1.5938829787234043, "grad_norm": 4.123393535614014, "learning_rate": 8.15161169813311e-06, "loss": 0.7414, "step": 5993 }, { "epoch": 1.5941489361702128, "grad_norm": 3.7125062942504883, "learning_rate": 8.150928859986488e-06, "loss": 0.8094, "step": 5994 }, { "epoch": 1.5944148936170213, "grad_norm": 3.6186742782592773, "learning_rate": 8.15024592434642e-06, "loss": 0.8291, "step": 5995 }, { "epoch": 1.59468085106383, "grad_norm": 3.9349913597106934, "learning_rate": 8.14956289123403e-06, "loss": 0.8469, "step": 5996 }, { "epoch": 1.5949468085106382, "grad_norm": 4.224155426025391, "learning_rate": 8.148879760670459e-06, "loss": 0.8178, "step": 5997 }, { "epoch": 1.5952127659574469, "grad_norm": 4.03489351272583, "learning_rate": 8.14819653267684e-06, "loss": 1.0682, "step": 5998 }, { "epoch": 1.5954787234042553, "grad_norm": 3.757615566253662, "learning_rate": 8.147513207274314e-06, "loss": 0.9454, "step": 5999 }, { "epoch": 1.5957446808510638, "grad_norm": 3.69804048538208, "learning_rate": 8.146829784484024e-06, "loss": 0.6988, "step": 6000 }, { "epoch": 1.5957446808510638, "eval_loss": 1.2842473983764648, "eval_runtime": 13.4375, "eval_samples_per_second": 29.767, "eval_steps_per_second": 3.721, "step": 6000 }, { "epoch": 1.5960106382978725, "grad_norm": 3.8672168254852295, "learning_rate": 8.146146264327113e-06, "loss": 0.8893, "step": 6001 }, { "epoch": 1.5962765957446807, "grad_norm": 3.7445380687713623, "learning_rate": 8.145462646824734e-06, "loss": 0.8237, "step": 6002 }, { "epoch": 1.5965425531914894, "grad_norm": 3.7135863304138184, "learning_rate": 8.144778931998038e-06, "loss": 0.6954, "step": 6003 }, { "epoch": 1.5968085106382979, "grad_norm": 3.946181058883667, "learning_rate": 8.144095119868178e-06, "loss": 0.8022, "step": 6004 }, { "epoch": 1.5970744680851063, "grad_norm": 3.866457223892212, "learning_rate": 8.143411210456314e-06, "loss": 0.7848, "step": 6005 }, { "epoch": 1.597340425531915, "grad_norm": 3.9514496326446533, "learning_rate": 8.142727203783608e-06, "loss": 0.8287, "step": 6006 }, { "epoch": 1.5976063829787233, "grad_norm": 3.780092239379883, "learning_rate": 8.142043099871219e-06, "loss": 0.731, "step": 6007 }, { "epoch": 1.597872340425532, "grad_norm": 3.832037925720215, "learning_rate": 8.141358898740319e-06, "loss": 0.8207, "step": 6008 }, { "epoch": 1.5981382978723404, "grad_norm": 3.7208633422851562, "learning_rate": 8.140674600412076e-06, "loss": 0.7905, "step": 6009 }, { "epoch": 1.5984042553191489, "grad_norm": 3.5873775482177734, "learning_rate": 8.139990204907662e-06, "loss": 0.7042, "step": 6010 }, { "epoch": 1.5986702127659576, "grad_norm": 4.138782024383545, "learning_rate": 8.139305712248256e-06, "loss": 0.8231, "step": 6011 }, { "epoch": 1.5989361702127658, "grad_norm": 4.014845371246338, "learning_rate": 8.138621122455034e-06, "loss": 0.7606, "step": 6012 }, { "epoch": 1.5992021276595745, "grad_norm": 3.997772693634033, "learning_rate": 8.13793643554918e-06, "loss": 0.8122, "step": 6013 }, { "epoch": 1.599468085106383, "grad_norm": 3.3885183334350586, "learning_rate": 8.137251651551878e-06, "loss": 0.7245, "step": 6014 }, { "epoch": 1.5997340425531914, "grad_norm": 3.9096522331237793, "learning_rate": 8.136566770484316e-06, "loss": 0.7919, "step": 6015 }, { "epoch": 1.6, "grad_norm": 4.008962154388428, "learning_rate": 8.135881792367686e-06, "loss": 0.8683, "step": 6016 }, { "epoch": 1.6002659574468086, "grad_norm": 3.9772658348083496, "learning_rate": 8.13519671722318e-06, "loss": 0.7775, "step": 6017 }, { "epoch": 1.600531914893617, "grad_norm": 4.593280792236328, "learning_rate": 8.134511545071998e-06, "loss": 0.8959, "step": 6018 }, { "epoch": 1.6007978723404257, "grad_norm": 3.9730031490325928, "learning_rate": 8.133826275935337e-06, "loss": 0.8394, "step": 6019 }, { "epoch": 1.601063829787234, "grad_norm": 4.224338531494141, "learning_rate": 8.133140909834402e-06, "loss": 0.7961, "step": 6020 }, { "epoch": 1.6013297872340426, "grad_norm": 3.759888172149658, "learning_rate": 8.132455446790399e-06, "loss": 0.8531, "step": 6021 }, { "epoch": 1.601595744680851, "grad_norm": 3.5629312992095947, "learning_rate": 8.131769886824535e-06, "loss": 0.8102, "step": 6022 }, { "epoch": 1.6018617021276595, "grad_norm": 3.5515568256378174, "learning_rate": 8.131084229958024e-06, "loss": 0.7867, "step": 6023 }, { "epoch": 1.6021276595744682, "grad_norm": 4.148061275482178, "learning_rate": 8.130398476212081e-06, "loss": 0.8708, "step": 6024 }, { "epoch": 1.6023936170212765, "grad_norm": 4.018913745880127, "learning_rate": 8.129712625607924e-06, "loss": 0.771, "step": 6025 }, { "epoch": 1.6026595744680852, "grad_norm": 4.379147052764893, "learning_rate": 8.129026678166772e-06, "loss": 0.8199, "step": 6026 }, { "epoch": 1.6029255319148936, "grad_norm": 3.568890333175659, "learning_rate": 8.128340633909852e-06, "loss": 0.705, "step": 6027 }, { "epoch": 1.603191489361702, "grad_norm": 3.6377384662628174, "learning_rate": 8.127654492858388e-06, "loss": 0.6958, "step": 6028 }, { "epoch": 1.6034574468085108, "grad_norm": 4.233497142791748, "learning_rate": 8.126968255033614e-06, "loss": 0.8446, "step": 6029 }, { "epoch": 1.603723404255319, "grad_norm": 4.239995956420898, "learning_rate": 8.126281920456758e-06, "loss": 0.813, "step": 6030 }, { "epoch": 1.6039893617021277, "grad_norm": 3.8521575927734375, "learning_rate": 8.12559548914906e-06, "loss": 0.7906, "step": 6031 }, { "epoch": 1.6042553191489362, "grad_norm": 3.567471742630005, "learning_rate": 8.124908961131759e-06, "loss": 0.6709, "step": 6032 }, { "epoch": 1.6045212765957446, "grad_norm": 3.527024030685425, "learning_rate": 8.124222336426094e-06, "loss": 0.7508, "step": 6033 }, { "epoch": 1.6047872340425533, "grad_norm": 4.134167671203613, "learning_rate": 8.123535615053312e-06, "loss": 0.8233, "step": 6034 }, { "epoch": 1.6050531914893615, "grad_norm": 3.62556791305542, "learning_rate": 8.12284879703466e-06, "loss": 0.7347, "step": 6035 }, { "epoch": 1.6053191489361702, "grad_norm": 4.534690856933594, "learning_rate": 8.12216188239139e-06, "loss": 0.9258, "step": 6036 }, { "epoch": 1.6055851063829787, "grad_norm": 3.8855905532836914, "learning_rate": 8.121474871144757e-06, "loss": 0.7215, "step": 6037 }, { "epoch": 1.6058510638297872, "grad_norm": 3.889317274093628, "learning_rate": 8.120787763316014e-06, "loss": 0.7557, "step": 6038 }, { "epoch": 1.6061170212765958, "grad_norm": 4.091339588165283, "learning_rate": 8.120100558926425e-06, "loss": 0.8053, "step": 6039 }, { "epoch": 1.6063829787234043, "grad_norm": 4.249019622802734, "learning_rate": 8.11941325799725e-06, "loss": 0.837, "step": 6040 }, { "epoch": 1.6066489361702128, "grad_norm": 4.165124416351318, "learning_rate": 8.118725860549756e-06, "loss": 0.8762, "step": 6041 }, { "epoch": 1.6069148936170212, "grad_norm": 4.028770923614502, "learning_rate": 8.118038366605212e-06, "loss": 0.8456, "step": 6042 }, { "epoch": 1.6071808510638297, "grad_norm": 3.60648250579834, "learning_rate": 8.117350776184892e-06, "loss": 0.688, "step": 6043 }, { "epoch": 1.6074468085106384, "grad_norm": 3.6444270610809326, "learning_rate": 8.116663089310067e-06, "loss": 0.8199, "step": 6044 }, { "epoch": 1.6077127659574468, "grad_norm": 4.073156833648682, "learning_rate": 8.115975306002018e-06, "loss": 0.9758, "step": 6045 }, { "epoch": 1.6079787234042553, "grad_norm": 4.100760459899902, "learning_rate": 8.115287426282022e-06, "loss": 0.9357, "step": 6046 }, { "epoch": 1.608244680851064, "grad_norm": 4.134888648986816, "learning_rate": 8.114599450171366e-06, "loss": 0.7536, "step": 6047 }, { "epoch": 1.6085106382978722, "grad_norm": 3.8742432594299316, "learning_rate": 8.113911377691338e-06, "loss": 0.7832, "step": 6048 }, { "epoch": 1.608776595744681, "grad_norm": 4.110736846923828, "learning_rate": 8.113223208863224e-06, "loss": 0.7098, "step": 6049 }, { "epoch": 1.6090425531914894, "grad_norm": 3.972907304763794, "learning_rate": 8.11253494370832e-06, "loss": 0.8414, "step": 6050 }, { "epoch": 1.6093085106382978, "grad_norm": 3.984872817993164, "learning_rate": 8.111846582247917e-06, "loss": 0.9063, "step": 6051 }, { "epoch": 1.6095744680851065, "grad_norm": 4.114076614379883, "learning_rate": 8.11115812450332e-06, "loss": 0.8774, "step": 6052 }, { "epoch": 1.6098404255319148, "grad_norm": 3.8898861408233643, "learning_rate": 8.110469570495828e-06, "loss": 0.6855, "step": 6053 }, { "epoch": 1.6101063829787234, "grad_norm": 3.620485544204712, "learning_rate": 8.109780920246743e-06, "loss": 0.8566, "step": 6054 }, { "epoch": 1.610372340425532, "grad_norm": 4.412075519561768, "learning_rate": 8.109092173777376e-06, "loss": 0.8386, "step": 6055 }, { "epoch": 1.6106382978723404, "grad_norm": 4.396791934967041, "learning_rate": 8.108403331109038e-06, "loss": 0.7074, "step": 6056 }, { "epoch": 1.610904255319149, "grad_norm": 4.347930431365967, "learning_rate": 8.10771439226304e-06, "loss": 0.8188, "step": 6057 }, { "epoch": 1.6111702127659573, "grad_norm": 3.751016855239868, "learning_rate": 8.1070253572607e-06, "loss": 0.7469, "step": 6058 }, { "epoch": 1.611436170212766, "grad_norm": 4.112164497375488, "learning_rate": 8.106336226123339e-06, "loss": 0.8259, "step": 6059 }, { "epoch": 1.6117021276595744, "grad_norm": 4.112537860870361, "learning_rate": 8.105646998872275e-06, "loss": 0.8493, "step": 6060 }, { "epoch": 1.611968085106383, "grad_norm": 4.171288967132568, "learning_rate": 8.104957675528837e-06, "loss": 0.9249, "step": 6061 }, { "epoch": 1.6122340425531916, "grad_norm": 4.331489086151123, "learning_rate": 8.104268256114354e-06, "loss": 0.9123, "step": 6062 }, { "epoch": 1.6125, "grad_norm": 4.148106575012207, "learning_rate": 8.103578740650157e-06, "loss": 0.7654, "step": 6063 }, { "epoch": 1.6127659574468085, "grad_norm": 3.72057843208313, "learning_rate": 8.102889129157578e-06, "loss": 0.8049, "step": 6064 }, { "epoch": 1.613031914893617, "grad_norm": 3.9282565116882324, "learning_rate": 8.102199421657957e-06, "loss": 0.7639, "step": 6065 }, { "epoch": 1.6132978723404254, "grad_norm": 3.8103582859039307, "learning_rate": 8.101509618172634e-06, "loss": 0.8689, "step": 6066 }, { "epoch": 1.6135638297872341, "grad_norm": 4.2297539710998535, "learning_rate": 8.10081971872295e-06, "loss": 0.9582, "step": 6067 }, { "epoch": 1.6138297872340426, "grad_norm": 4.653298854827881, "learning_rate": 8.100129723330255e-06, "loss": 0.9946, "step": 6068 }, { "epoch": 1.614095744680851, "grad_norm": 3.7969958782196045, "learning_rate": 8.099439632015896e-06, "loss": 0.7852, "step": 6069 }, { "epoch": 1.6143617021276597, "grad_norm": 4.072946071624756, "learning_rate": 8.098749444801226e-06, "loss": 0.79, "step": 6070 }, { "epoch": 1.614627659574468, "grad_norm": 3.9592959880828857, "learning_rate": 8.0980591617076e-06, "loss": 0.7815, "step": 6071 }, { "epoch": 1.6148936170212767, "grad_norm": 4.4633588790893555, "learning_rate": 8.097368782756374e-06, "loss": 0.7754, "step": 6072 }, { "epoch": 1.6151595744680851, "grad_norm": 4.381833553314209, "learning_rate": 8.096678307968913e-06, "loss": 0.9649, "step": 6073 }, { "epoch": 1.6154255319148936, "grad_norm": 4.433225154876709, "learning_rate": 8.095987737366578e-06, "loss": 0.9376, "step": 6074 }, { "epoch": 1.6156914893617023, "grad_norm": 3.7621006965637207, "learning_rate": 8.095297070970738e-06, "loss": 0.7577, "step": 6075 }, { "epoch": 1.6159574468085105, "grad_norm": 3.4518826007843018, "learning_rate": 8.094606308802764e-06, "loss": 0.816, "step": 6076 }, { "epoch": 1.6162234042553192, "grad_norm": 4.059780120849609, "learning_rate": 8.093915450884025e-06, "loss": 0.8319, "step": 6077 }, { "epoch": 1.6164893617021276, "grad_norm": 3.8527324199676514, "learning_rate": 8.093224497235899e-06, "loss": 0.8826, "step": 6078 }, { "epoch": 1.616755319148936, "grad_norm": 3.3895418643951416, "learning_rate": 8.092533447879766e-06, "loss": 0.73, "step": 6079 }, { "epoch": 1.6170212765957448, "grad_norm": 3.9259166717529297, "learning_rate": 8.091842302837009e-06, "loss": 0.8569, "step": 6080 }, { "epoch": 1.617287234042553, "grad_norm": 3.5704541206359863, "learning_rate": 8.091151062129008e-06, "loss": 0.8113, "step": 6081 }, { "epoch": 1.6175531914893617, "grad_norm": 3.8313138484954834, "learning_rate": 8.090459725777156e-06, "loss": 0.7352, "step": 6082 }, { "epoch": 1.6178191489361702, "grad_norm": 4.403858184814453, "learning_rate": 8.089768293802842e-06, "loss": 0.7757, "step": 6083 }, { "epoch": 1.6180851063829786, "grad_norm": 4.078790664672852, "learning_rate": 8.089076766227457e-06, "loss": 0.8444, "step": 6084 }, { "epoch": 1.6183510638297873, "grad_norm": 4.103868007659912, "learning_rate": 8.088385143072402e-06, "loss": 0.7451, "step": 6085 }, { "epoch": 1.6186170212765958, "grad_norm": 3.906527042388916, "learning_rate": 8.087693424359073e-06, "loss": 0.7095, "step": 6086 }, { "epoch": 1.6188829787234043, "grad_norm": 4.909295082092285, "learning_rate": 8.087001610108874e-06, "loss": 0.8277, "step": 6087 }, { "epoch": 1.6191489361702127, "grad_norm": 5.194472312927246, "learning_rate": 8.086309700343211e-06, "loss": 0.8959, "step": 6088 }, { "epoch": 1.6194148936170212, "grad_norm": 3.6174070835113525, "learning_rate": 8.085617695083493e-06, "loss": 0.7838, "step": 6089 }, { "epoch": 1.6196808510638299, "grad_norm": 3.5253570079803467, "learning_rate": 8.08492559435113e-06, "loss": 0.7633, "step": 6090 }, { "epoch": 1.6199468085106383, "grad_norm": 4.330216884613037, "learning_rate": 8.084233398167537e-06, "loss": 0.8669, "step": 6091 }, { "epoch": 1.6202127659574468, "grad_norm": 3.792811393737793, "learning_rate": 8.083541106554131e-06, "loss": 0.8782, "step": 6092 }, { "epoch": 1.6204787234042555, "grad_norm": 3.888946533203125, "learning_rate": 8.082848719532335e-06, "loss": 0.8816, "step": 6093 }, { "epoch": 1.6207446808510637, "grad_norm": 3.9346768856048584, "learning_rate": 8.082156237123567e-06, "loss": 0.6887, "step": 6094 }, { "epoch": 1.6210106382978724, "grad_norm": 3.7470414638519287, "learning_rate": 8.081463659349258e-06, "loss": 0.7622, "step": 6095 }, { "epoch": 1.6212765957446809, "grad_norm": 3.9194772243499756, "learning_rate": 8.080770986230835e-06, "loss": 0.768, "step": 6096 }, { "epoch": 1.6215425531914893, "grad_norm": 3.7921671867370605, "learning_rate": 8.08007821778973e-06, "loss": 0.8936, "step": 6097 }, { "epoch": 1.621808510638298, "grad_norm": 3.8893918991088867, "learning_rate": 8.07938535404738e-06, "loss": 0.835, "step": 6098 }, { "epoch": 1.6220744680851062, "grad_norm": 3.7834744453430176, "learning_rate": 8.07869239502522e-06, "loss": 0.7374, "step": 6099 }, { "epoch": 1.622340425531915, "grad_norm": 3.867154598236084, "learning_rate": 8.077999340744694e-06, "loss": 0.7935, "step": 6100 }, { "epoch": 1.6226063829787234, "grad_norm": 4.853170394897461, "learning_rate": 8.077306191227244e-06, "loss": 0.7786, "step": 6101 }, { "epoch": 1.6228723404255319, "grad_norm": 4.339568614959717, "learning_rate": 8.076612946494317e-06, "loss": 0.6722, "step": 6102 }, { "epoch": 1.6231382978723405, "grad_norm": 3.6707983016967773, "learning_rate": 8.075919606567363e-06, "loss": 0.8792, "step": 6103 }, { "epoch": 1.6234042553191488, "grad_norm": 3.867652177810669, "learning_rate": 8.075226171467835e-06, "loss": 0.7879, "step": 6104 }, { "epoch": 1.6236702127659575, "grad_norm": 3.5733299255371094, "learning_rate": 8.07453264121719e-06, "loss": 0.7921, "step": 6105 }, { "epoch": 1.623936170212766, "grad_norm": 3.7665045261383057, "learning_rate": 8.073839015836884e-06, "loss": 0.9738, "step": 6106 }, { "epoch": 1.6242021276595744, "grad_norm": 4.237964153289795, "learning_rate": 8.07314529534838e-06, "loss": 0.869, "step": 6107 }, { "epoch": 1.624468085106383, "grad_norm": 3.797464370727539, "learning_rate": 8.072451479773143e-06, "loss": 0.8445, "step": 6108 }, { "epoch": 1.6247340425531915, "grad_norm": 3.9559130668640137, "learning_rate": 8.071757569132639e-06, "loss": 0.848, "step": 6109 }, { "epoch": 1.625, "grad_norm": 3.7033722400665283, "learning_rate": 8.071063563448341e-06, "loss": 0.8571, "step": 6110 }, { "epoch": 1.6252659574468085, "grad_norm": 3.696049451828003, "learning_rate": 8.070369462741719e-06, "loss": 0.8649, "step": 6111 }, { "epoch": 1.625531914893617, "grad_norm": 3.495377540588379, "learning_rate": 8.06967526703425e-06, "loss": 0.7691, "step": 6112 }, { "epoch": 1.6257978723404256, "grad_norm": 3.9298911094665527, "learning_rate": 8.068980976347416e-06, "loss": 0.7793, "step": 6113 }, { "epoch": 1.626063829787234, "grad_norm": 3.756425380706787, "learning_rate": 8.068286590702697e-06, "loss": 0.8161, "step": 6114 }, { "epoch": 1.6263297872340425, "grad_norm": 4.13591194152832, "learning_rate": 8.067592110121576e-06, "loss": 0.8543, "step": 6115 }, { "epoch": 1.6265957446808512, "grad_norm": 4.203410625457764, "learning_rate": 8.066897534625547e-06, "loss": 0.7607, "step": 6116 }, { "epoch": 1.6268617021276595, "grad_norm": 4.2013983726501465, "learning_rate": 8.066202864236096e-06, "loss": 0.8248, "step": 6117 }, { "epoch": 1.6271276595744681, "grad_norm": 4.034732341766357, "learning_rate": 8.065508098974719e-06, "loss": 0.804, "step": 6118 }, { "epoch": 1.6273936170212766, "grad_norm": 4.180783271789551, "learning_rate": 8.06481323886291e-06, "loss": 0.8354, "step": 6119 }, { "epoch": 1.627659574468085, "grad_norm": 3.9474117755889893, "learning_rate": 8.064118283922173e-06, "loss": 0.8622, "step": 6120 }, { "epoch": 1.6279255319148938, "grad_norm": 3.8866050243377686, "learning_rate": 8.063423234174008e-06, "loss": 0.7197, "step": 6121 }, { "epoch": 1.628191489361702, "grad_norm": 4.463206768035889, "learning_rate": 8.062728089639921e-06, "loss": 0.9226, "step": 6122 }, { "epoch": 1.6284574468085107, "grad_norm": 3.982656717300415, "learning_rate": 8.062032850341423e-06, "loss": 0.7225, "step": 6123 }, { "epoch": 1.6287234042553191, "grad_norm": 3.9853739738464355, "learning_rate": 8.061337516300024e-06, "loss": 0.6711, "step": 6124 }, { "epoch": 1.6289893617021276, "grad_norm": 3.823125123977661, "learning_rate": 8.060642087537233e-06, "loss": 0.8944, "step": 6125 }, { "epoch": 1.6292553191489363, "grad_norm": 4.082576274871826, "learning_rate": 8.059946564074577e-06, "loss": 0.8235, "step": 6126 }, { "epoch": 1.6295212765957445, "grad_norm": 4.3164472579956055, "learning_rate": 8.05925094593357e-06, "loss": 0.8086, "step": 6127 }, { "epoch": 1.6297872340425532, "grad_norm": 3.8943753242492676, "learning_rate": 8.058555233135737e-06, "loss": 0.7088, "step": 6128 }, { "epoch": 1.6300531914893617, "grad_norm": 4.248415470123291, "learning_rate": 8.057859425702605e-06, "loss": 0.8011, "step": 6129 }, { "epoch": 1.6303191489361701, "grad_norm": 3.8152194023132324, "learning_rate": 8.057163523655702e-06, "loss": 0.7437, "step": 6130 }, { "epoch": 1.6305851063829788, "grad_norm": 4.243065357208252, "learning_rate": 8.056467527016559e-06, "loss": 0.8156, "step": 6131 }, { "epoch": 1.6308510638297873, "grad_norm": 4.148963928222656, "learning_rate": 8.055771435806714e-06, "loss": 0.8538, "step": 6132 }, { "epoch": 1.6311170212765957, "grad_norm": 3.848583698272705, "learning_rate": 8.0550752500477e-06, "loss": 0.7818, "step": 6133 }, { "epoch": 1.6313829787234042, "grad_norm": 4.185320854187012, "learning_rate": 8.054378969761062e-06, "loss": 0.85, "step": 6134 }, { "epoch": 1.6316489361702127, "grad_norm": 4.244765758514404, "learning_rate": 8.053682594968346e-06, "loss": 0.8856, "step": 6135 }, { "epoch": 1.6319148936170214, "grad_norm": 3.8420188426971436, "learning_rate": 8.052986125691091e-06, "loss": 0.7745, "step": 6136 }, { "epoch": 1.6321808510638298, "grad_norm": 4.029837131500244, "learning_rate": 8.052289561950852e-06, "loss": 0.8724, "step": 6137 }, { "epoch": 1.6324468085106383, "grad_norm": 3.9027750492095947, "learning_rate": 8.051592903769182e-06, "loss": 0.7405, "step": 6138 }, { "epoch": 1.632712765957447, "grad_norm": 4.00022554397583, "learning_rate": 8.050896151167632e-06, "loss": 0.7677, "step": 6139 }, { "epoch": 1.6329787234042552, "grad_norm": 4.150446891784668, "learning_rate": 8.050199304167766e-06, "loss": 0.7348, "step": 6140 }, { "epoch": 1.633244680851064, "grad_norm": 4.308548927307129, "learning_rate": 8.04950236279114e-06, "loss": 0.8106, "step": 6141 }, { "epoch": 1.6335106382978724, "grad_norm": 3.9967095851898193, "learning_rate": 8.048805327059321e-06, "loss": 0.7345, "step": 6142 }, { "epoch": 1.6337765957446808, "grad_norm": 3.783818244934082, "learning_rate": 8.048108196993879e-06, "loss": 0.716, "step": 6143 }, { "epoch": 1.6340425531914895, "grad_norm": 3.8823726177215576, "learning_rate": 8.047410972616376e-06, "loss": 0.778, "step": 6144 }, { "epoch": 1.6343085106382977, "grad_norm": 4.007701873779297, "learning_rate": 8.046713653948393e-06, "loss": 0.9691, "step": 6145 }, { "epoch": 1.6345744680851064, "grad_norm": 4.14747428894043, "learning_rate": 8.0460162410115e-06, "loss": 0.8201, "step": 6146 }, { "epoch": 1.6348404255319149, "grad_norm": 4.101099967956543, "learning_rate": 8.045318733827278e-06, "loss": 0.8864, "step": 6147 }, { "epoch": 1.6351063829787233, "grad_norm": 3.709555149078369, "learning_rate": 8.044621132417311e-06, "loss": 0.7185, "step": 6148 }, { "epoch": 1.635372340425532, "grad_norm": 4.0000481605529785, "learning_rate": 8.043923436803182e-06, "loss": 0.8816, "step": 6149 }, { "epoch": 1.6356382978723403, "grad_norm": 4.075678825378418, "learning_rate": 8.043225647006475e-06, "loss": 0.8192, "step": 6150 }, { "epoch": 1.635904255319149, "grad_norm": 4.004273891448975, "learning_rate": 8.042527763048787e-06, "loss": 0.9374, "step": 6151 }, { "epoch": 1.6361702127659574, "grad_norm": 3.904745101928711, "learning_rate": 8.041829784951706e-06, "loss": 0.7701, "step": 6152 }, { "epoch": 1.6364361702127659, "grad_norm": 3.7361650466918945, "learning_rate": 8.04113171273683e-06, "loss": 0.6875, "step": 6153 }, { "epoch": 1.6367021276595746, "grad_norm": 3.9355521202087402, "learning_rate": 8.040433546425759e-06, "loss": 0.828, "step": 6154 }, { "epoch": 1.636968085106383, "grad_norm": 3.615612745285034, "learning_rate": 8.039735286040095e-06, "loss": 0.8136, "step": 6155 }, { "epoch": 1.6372340425531915, "grad_norm": 3.900493621826172, "learning_rate": 8.03903693160144e-06, "loss": 0.7782, "step": 6156 }, { "epoch": 1.6375, "grad_norm": 4.175507068634033, "learning_rate": 8.038338483131408e-06, "loss": 0.8486, "step": 6157 }, { "epoch": 1.6377659574468084, "grad_norm": 4.02733039855957, "learning_rate": 8.037639940651603e-06, "loss": 0.7591, "step": 6158 }, { "epoch": 1.638031914893617, "grad_norm": 4.006030559539795, "learning_rate": 8.036941304183643e-06, "loss": 0.8453, "step": 6159 }, { "epoch": 1.6382978723404256, "grad_norm": 3.9777238368988037, "learning_rate": 8.036242573749142e-06, "loss": 0.7623, "step": 6160 }, { "epoch": 1.638563829787234, "grad_norm": 3.7054030895233154, "learning_rate": 8.035543749369724e-06, "loss": 0.7552, "step": 6161 }, { "epoch": 1.6388297872340427, "grad_norm": 4.149451732635498, "learning_rate": 8.034844831067006e-06, "loss": 0.6954, "step": 6162 }, { "epoch": 1.639095744680851, "grad_norm": 4.144680500030518, "learning_rate": 8.034145818862618e-06, "loss": 0.8583, "step": 6163 }, { "epoch": 1.6393617021276596, "grad_norm": 3.732167959213257, "learning_rate": 8.033446712778184e-06, "loss": 0.7437, "step": 6164 }, { "epoch": 1.639627659574468, "grad_norm": 4.176260471343994, "learning_rate": 8.032747512835338e-06, "loss": 0.9089, "step": 6165 }, { "epoch": 1.6398936170212766, "grad_norm": 3.9875879287719727, "learning_rate": 8.032048219055712e-06, "loss": 0.7776, "step": 6166 }, { "epoch": 1.6401595744680852, "grad_norm": 3.942016839981079, "learning_rate": 8.031348831460948e-06, "loss": 0.752, "step": 6167 }, { "epoch": 1.6404255319148935, "grad_norm": 4.088458061218262, "learning_rate": 8.030649350072679e-06, "loss": 0.8339, "step": 6168 }, { "epoch": 1.6406914893617022, "grad_norm": 4.712299346923828, "learning_rate": 8.029949774912552e-06, "loss": 0.942, "step": 6169 }, { "epoch": 1.6409574468085106, "grad_norm": 3.5929760932922363, "learning_rate": 8.029250106002212e-06, "loss": 0.7309, "step": 6170 }, { "epoch": 1.641223404255319, "grad_norm": 4.059690475463867, "learning_rate": 8.028550343363306e-06, "loss": 0.8479, "step": 6171 }, { "epoch": 1.6414893617021278, "grad_norm": 4.054781436920166, "learning_rate": 8.027850487017488e-06, "loss": 0.9293, "step": 6172 }, { "epoch": 1.641755319148936, "grad_norm": 3.754241466522217, "learning_rate": 8.027150536986411e-06, "loss": 0.7714, "step": 6173 }, { "epoch": 1.6420212765957447, "grad_norm": 3.6258599758148193, "learning_rate": 8.026450493291731e-06, "loss": 0.725, "step": 6174 }, { "epoch": 1.6422872340425532, "grad_norm": 4.247791290283203, "learning_rate": 8.025750355955112e-06, "loss": 0.7394, "step": 6175 }, { "epoch": 1.6425531914893616, "grad_norm": 3.7767536640167236, "learning_rate": 8.025050124998213e-06, "loss": 0.757, "step": 6176 }, { "epoch": 1.6428191489361703, "grad_norm": 3.970726490020752, "learning_rate": 8.0243498004427e-06, "loss": 0.7449, "step": 6177 }, { "epoch": 1.6430851063829788, "grad_norm": 4.161791801452637, "learning_rate": 8.023649382310246e-06, "loss": 0.8939, "step": 6178 }, { "epoch": 1.6433510638297872, "grad_norm": 3.9791698455810547, "learning_rate": 8.02294887062252e-06, "loss": 0.7553, "step": 6179 }, { "epoch": 1.6436170212765957, "grad_norm": 3.881882905960083, "learning_rate": 8.022248265401196e-06, "loss": 0.7806, "step": 6180 }, { "epoch": 1.6438829787234042, "grad_norm": 4.165888786315918, "learning_rate": 8.021547566667952e-06, "loss": 0.7756, "step": 6181 }, { "epoch": 1.6441489361702128, "grad_norm": 4.053508281707764, "learning_rate": 8.02084677444447e-06, "loss": 0.7472, "step": 6182 }, { "epoch": 1.6444148936170213, "grad_norm": 4.370820045471191, "learning_rate": 8.020145888752431e-06, "loss": 0.858, "step": 6183 }, { "epoch": 1.6446808510638298, "grad_norm": 4.108578205108643, "learning_rate": 8.019444909613524e-06, "loss": 0.8644, "step": 6184 }, { "epoch": 1.6449468085106385, "grad_norm": 3.9922139644622803, "learning_rate": 8.018743837049433e-06, "loss": 0.7846, "step": 6185 }, { "epoch": 1.6452127659574467, "grad_norm": 3.711470127105713, "learning_rate": 8.018042671081858e-06, "loss": 0.685, "step": 6186 }, { "epoch": 1.6454787234042554, "grad_norm": 3.7997970581054688, "learning_rate": 8.01734141173249e-06, "loss": 0.7726, "step": 6187 }, { "epoch": 1.6457446808510638, "grad_norm": 4.349726676940918, "learning_rate": 8.016640059023023e-06, "loss": 0.9296, "step": 6188 }, { "epoch": 1.6460106382978723, "grad_norm": 3.8738739490509033, "learning_rate": 8.01593861297516e-06, "loss": 0.9472, "step": 6189 }, { "epoch": 1.646276595744681, "grad_norm": 4.002452850341797, "learning_rate": 8.015237073610607e-06, "loss": 0.7488, "step": 6190 }, { "epoch": 1.6465425531914892, "grad_norm": 4.017054557800293, "learning_rate": 8.01453544095107e-06, "loss": 0.9446, "step": 6191 }, { "epoch": 1.646808510638298, "grad_norm": 4.123724460601807, "learning_rate": 8.013833715018256e-06, "loss": 0.9052, "step": 6192 }, { "epoch": 1.6470744680851064, "grad_norm": 3.664494752883911, "learning_rate": 8.013131895833879e-06, "loss": 0.7421, "step": 6193 }, { "epoch": 1.6473404255319148, "grad_norm": 3.7503373622894287, "learning_rate": 8.012429983419654e-06, "loss": 0.7293, "step": 6194 }, { "epoch": 1.6476063829787235, "grad_norm": 4.248551845550537, "learning_rate": 8.0117279777973e-06, "loss": 0.664, "step": 6195 }, { "epoch": 1.6478723404255318, "grad_norm": 4.146711349487305, "learning_rate": 8.011025878988534e-06, "loss": 0.8164, "step": 6196 }, { "epoch": 1.6481382978723405, "grad_norm": 3.8372318744659424, "learning_rate": 8.010323687015083e-06, "loss": 0.7173, "step": 6197 }, { "epoch": 1.648404255319149, "grad_norm": 4.206233501434326, "learning_rate": 8.009621401898671e-06, "loss": 0.8324, "step": 6198 }, { "epoch": 1.6486702127659574, "grad_norm": 3.9302217960357666, "learning_rate": 8.008919023661033e-06, "loss": 0.8095, "step": 6199 }, { "epoch": 1.648936170212766, "grad_norm": 3.8333635330200195, "learning_rate": 8.008216552323896e-06, "loss": 0.6761, "step": 6200 }, { "epoch": 1.6492021276595743, "grad_norm": 4.308274269104004, "learning_rate": 8.007513987908997e-06, "loss": 0.9286, "step": 6201 }, { "epoch": 1.649468085106383, "grad_norm": 3.9875328540802, "learning_rate": 8.006811330438076e-06, "loss": 0.8439, "step": 6202 }, { "epoch": 1.6497340425531914, "grad_norm": 3.9723567962646484, "learning_rate": 8.006108579932869e-06, "loss": 0.743, "step": 6203 }, { "epoch": 1.65, "grad_norm": 3.6594903469085693, "learning_rate": 8.005405736415127e-06, "loss": 0.8403, "step": 6204 }, { "epoch": 1.6502659574468086, "grad_norm": 3.7459709644317627, "learning_rate": 8.00470279990659e-06, "loss": 0.7611, "step": 6205 }, { "epoch": 1.650531914893617, "grad_norm": 4.077069282531738, "learning_rate": 8.003999770429013e-06, "loss": 0.8415, "step": 6206 }, { "epoch": 1.6507978723404255, "grad_norm": 4.072371482849121, "learning_rate": 8.003296648004146e-06, "loss": 0.8709, "step": 6207 }, { "epoch": 1.6510638297872342, "grad_norm": 4.159237861633301, "learning_rate": 8.002593432653743e-06, "loss": 0.802, "step": 6208 }, { "epoch": 1.6513297872340424, "grad_norm": 4.047359943389893, "learning_rate": 8.001890124399565e-06, "loss": 0.7666, "step": 6209 }, { "epoch": 1.6515957446808511, "grad_norm": 3.548340320587158, "learning_rate": 8.001186723263374e-06, "loss": 0.8141, "step": 6210 }, { "epoch": 1.6518617021276596, "grad_norm": 4.3510050773620605, "learning_rate": 8.00048322926693e-06, "loss": 0.7908, "step": 6211 }, { "epoch": 1.652127659574468, "grad_norm": 3.642498254776001, "learning_rate": 7.999779642432003e-06, "loss": 0.8594, "step": 6212 }, { "epoch": 1.6523936170212767, "grad_norm": 3.804325819015503, "learning_rate": 7.999075962780363e-06, "loss": 0.7736, "step": 6213 }, { "epoch": 1.652659574468085, "grad_norm": 4.080993175506592, "learning_rate": 7.998372190333781e-06, "loss": 0.8834, "step": 6214 }, { "epoch": 1.6529255319148937, "grad_norm": 4.291904449462891, "learning_rate": 7.997668325114033e-06, "loss": 0.8433, "step": 6215 }, { "epoch": 1.6531914893617021, "grad_norm": 3.4936020374298096, "learning_rate": 7.996964367142899e-06, "loss": 0.7045, "step": 6216 }, { "epoch": 1.6534574468085106, "grad_norm": 4.251427173614502, "learning_rate": 7.996260316442157e-06, "loss": 0.8487, "step": 6217 }, { "epoch": 1.6537234042553193, "grad_norm": 3.810161828994751, "learning_rate": 7.995556173033594e-06, "loss": 0.7715, "step": 6218 }, { "epoch": 1.6539893617021275, "grad_norm": 3.8157644271850586, "learning_rate": 7.994851936938996e-06, "loss": 0.8408, "step": 6219 }, { "epoch": 1.6542553191489362, "grad_norm": 3.614837646484375, "learning_rate": 7.994147608180153e-06, "loss": 0.7829, "step": 6220 }, { "epoch": 1.6545212765957447, "grad_norm": 4.262511253356934, "learning_rate": 7.99344318677886e-06, "loss": 0.8728, "step": 6221 }, { "epoch": 1.6547872340425531, "grad_norm": 4.14133358001709, "learning_rate": 7.992738672756909e-06, "loss": 0.8611, "step": 6222 }, { "epoch": 1.6550531914893618, "grad_norm": 4.4198737144470215, "learning_rate": 7.992034066136099e-06, "loss": 0.8825, "step": 6223 }, { "epoch": 1.65531914893617, "grad_norm": 4.433263778686523, "learning_rate": 7.991329366938232e-06, "loss": 0.9547, "step": 6224 }, { "epoch": 1.6555851063829787, "grad_norm": 4.354765892028809, "learning_rate": 7.990624575185116e-06, "loss": 0.9415, "step": 6225 }, { "epoch": 1.6558510638297872, "grad_norm": 4.149988174438477, "learning_rate": 7.98991969089855e-06, "loss": 0.7804, "step": 6226 }, { "epoch": 1.6561170212765957, "grad_norm": 3.833970546722412, "learning_rate": 7.98921471410035e-06, "loss": 0.7944, "step": 6227 }, { "epoch": 1.6563829787234043, "grad_norm": 3.816167116165161, "learning_rate": 7.98850964481233e-06, "loss": 0.8054, "step": 6228 }, { "epoch": 1.6566489361702128, "grad_norm": 3.758295774459839, "learning_rate": 7.987804483056301e-06, "loss": 0.7724, "step": 6229 }, { "epoch": 1.6569148936170213, "grad_norm": 4.2231669425964355, "learning_rate": 7.987099228854083e-06, "loss": 0.8713, "step": 6230 }, { "epoch": 1.65718085106383, "grad_norm": 4.497824192047119, "learning_rate": 7.9863938822275e-06, "loss": 0.9629, "step": 6231 }, { "epoch": 1.6574468085106382, "grad_norm": 3.9088895320892334, "learning_rate": 7.985688443198371e-06, "loss": 0.7597, "step": 6232 }, { "epoch": 1.6577127659574469, "grad_norm": 3.699256658554077, "learning_rate": 7.984982911788528e-06, "loss": 0.8468, "step": 6233 }, { "epoch": 1.6579787234042553, "grad_norm": 3.8971588611602783, "learning_rate": 7.9842772880198e-06, "loss": 0.8377, "step": 6234 }, { "epoch": 1.6582446808510638, "grad_norm": 3.8062503337860107, "learning_rate": 7.98357157191402e-06, "loss": 0.6739, "step": 6235 }, { "epoch": 1.6585106382978725, "grad_norm": 3.7170534133911133, "learning_rate": 7.982865763493022e-06, "loss": 0.7505, "step": 6236 }, { "epoch": 1.6587765957446807, "grad_norm": 3.678074598312378, "learning_rate": 7.982159862778645e-06, "loss": 0.7589, "step": 6237 }, { "epoch": 1.6590425531914894, "grad_norm": 3.895219326019287, "learning_rate": 7.98145386979273e-06, "loss": 0.6712, "step": 6238 }, { "epoch": 1.6593085106382979, "grad_norm": 4.339925765991211, "learning_rate": 7.980747784557123e-06, "loss": 0.9584, "step": 6239 }, { "epoch": 1.6595744680851063, "grad_norm": 3.8446319103240967, "learning_rate": 7.98004160709367e-06, "loss": 0.7287, "step": 6240 }, { "epoch": 1.659840425531915, "grad_norm": 3.852252960205078, "learning_rate": 7.979335337424222e-06, "loss": 0.9698, "step": 6241 }, { "epoch": 1.6601063829787233, "grad_norm": 3.7780802249908447, "learning_rate": 7.97862897557063e-06, "loss": 0.8085, "step": 6242 }, { "epoch": 1.660372340425532, "grad_norm": 3.954035758972168, "learning_rate": 7.97792252155475e-06, "loss": 0.8768, "step": 6243 }, { "epoch": 1.6606382978723404, "grad_norm": 3.267712116241455, "learning_rate": 7.977215975398442e-06, "loss": 0.6974, "step": 6244 }, { "epoch": 1.6609042553191489, "grad_norm": 3.534168243408203, "learning_rate": 7.976509337123567e-06, "loss": 0.8029, "step": 6245 }, { "epoch": 1.6611702127659576, "grad_norm": 3.9597525596618652, "learning_rate": 7.975802606751989e-06, "loss": 0.7754, "step": 6246 }, { "epoch": 1.6614361702127658, "grad_norm": 4.123916149139404, "learning_rate": 7.975095784305572e-06, "loss": 0.8451, "step": 6247 }, { "epoch": 1.6617021276595745, "grad_norm": 3.989689588546753, "learning_rate": 7.97438886980619e-06, "loss": 0.7707, "step": 6248 }, { "epoch": 1.661968085106383, "grad_norm": 4.045599937438965, "learning_rate": 7.973681863275715e-06, "loss": 0.7474, "step": 6249 }, { "epoch": 1.6622340425531914, "grad_norm": 4.4239420890808105, "learning_rate": 7.972974764736023e-06, "loss": 0.7858, "step": 6250 }, { "epoch": 1.6625, "grad_norm": 3.499119520187378, "learning_rate": 7.972267574208991e-06, "loss": 0.7021, "step": 6251 }, { "epoch": 1.6627659574468086, "grad_norm": 4.45729923248291, "learning_rate": 7.971560291716501e-06, "loss": 0.9094, "step": 6252 }, { "epoch": 1.663031914893617, "grad_norm": 4.242092609405518, "learning_rate": 7.970852917280434e-06, "loss": 0.8807, "step": 6253 }, { "epoch": 1.6632978723404257, "grad_norm": 3.947512149810791, "learning_rate": 7.970145450922684e-06, "loss": 0.8778, "step": 6254 }, { "epoch": 1.663563829787234, "grad_norm": 5.4790167808532715, "learning_rate": 7.969437892665134e-06, "loss": 0.8196, "step": 6255 }, { "epoch": 1.6638297872340426, "grad_norm": 3.856820583343506, "learning_rate": 7.968730242529681e-06, "loss": 0.7653, "step": 6256 }, { "epoch": 1.664095744680851, "grad_norm": 4.446346759796143, "learning_rate": 7.968022500538219e-06, "loss": 0.9374, "step": 6257 }, { "epoch": 1.6643617021276595, "grad_norm": 4.079642295837402, "learning_rate": 7.967314666712647e-06, "loss": 0.8123, "step": 6258 }, { "epoch": 1.6646276595744682, "grad_norm": 4.338622570037842, "learning_rate": 7.966606741074864e-06, "loss": 0.7508, "step": 6259 }, { "epoch": 1.6648936170212765, "grad_norm": 3.974862813949585, "learning_rate": 7.965898723646777e-06, "loss": 0.8222, "step": 6260 }, { "epoch": 1.6651595744680852, "grad_norm": 4.263228416442871, "learning_rate": 7.96519061445029e-06, "loss": 0.9591, "step": 6261 }, { "epoch": 1.6654255319148936, "grad_norm": 3.6377105712890625, "learning_rate": 7.964482413507316e-06, "loss": 0.7791, "step": 6262 }, { "epoch": 1.665691489361702, "grad_norm": 3.3404452800750732, "learning_rate": 7.963774120839767e-06, "loss": 0.7668, "step": 6263 }, { "epoch": 1.6659574468085108, "grad_norm": 3.6252615451812744, "learning_rate": 7.963065736469555e-06, "loss": 0.7628, "step": 6264 }, { "epoch": 1.666223404255319, "grad_norm": 4.053292751312256, "learning_rate": 7.9623572604186e-06, "loss": 0.9255, "step": 6265 }, { "epoch": 1.6664893617021277, "grad_norm": 3.612187385559082, "learning_rate": 7.961648692708826e-06, "loss": 0.7864, "step": 6266 }, { "epoch": 1.6667553191489362, "grad_norm": 4.19817590713501, "learning_rate": 7.960940033362152e-06, "loss": 0.8414, "step": 6267 }, { "epoch": 1.6670212765957446, "grad_norm": 3.919515371322632, "learning_rate": 7.960231282400509e-06, "loss": 0.7358, "step": 6268 }, { "epoch": 1.6672872340425533, "grad_norm": 4.0831732749938965, "learning_rate": 7.959522439845825e-06, "loss": 0.7613, "step": 6269 }, { "epoch": 1.6675531914893615, "grad_norm": 4.200259685516357, "learning_rate": 7.958813505720031e-06, "loss": 0.9464, "step": 6270 }, { "epoch": 1.6678191489361702, "grad_norm": 4.281257152557373, "learning_rate": 7.958104480045066e-06, "loss": 0.8795, "step": 6271 }, { "epoch": 1.6680851063829787, "grad_norm": 3.907784938812256, "learning_rate": 7.957395362842864e-06, "loss": 0.6676, "step": 6272 }, { "epoch": 1.6683510638297872, "grad_norm": 4.122792720794678, "learning_rate": 7.956686154135368e-06, "loss": 0.7808, "step": 6273 }, { "epoch": 1.6686170212765958, "grad_norm": 4.015087127685547, "learning_rate": 7.95597685394452e-06, "loss": 0.8536, "step": 6274 }, { "epoch": 1.6688829787234043, "grad_norm": 3.8058676719665527, "learning_rate": 7.95526746229227e-06, "loss": 0.8526, "step": 6275 }, { "epoch": 1.6691489361702128, "grad_norm": 4.022008895874023, "learning_rate": 7.954557979200562e-06, "loss": 0.7642, "step": 6276 }, { "epoch": 1.6694148936170212, "grad_norm": 3.820610284805298, "learning_rate": 7.953848404691354e-06, "loss": 0.8786, "step": 6277 }, { "epoch": 1.6696808510638297, "grad_norm": 3.6477434635162354, "learning_rate": 7.9531387387866e-06, "loss": 0.8277, "step": 6278 }, { "epoch": 1.6699468085106384, "grad_norm": 4.075412273406982, "learning_rate": 7.952428981508254e-06, "loss": 0.8095, "step": 6279 }, { "epoch": 1.6702127659574468, "grad_norm": 4.030799388885498, "learning_rate": 7.951719132878279e-06, "loss": 0.7007, "step": 6280 }, { "epoch": 1.6704787234042553, "grad_norm": 4.039961338043213, "learning_rate": 7.95100919291864e-06, "loss": 0.8829, "step": 6281 }, { "epoch": 1.670744680851064, "grad_norm": 3.8483259677886963, "learning_rate": 7.950299161651303e-06, "loss": 0.7494, "step": 6282 }, { "epoch": 1.6710106382978722, "grad_norm": 3.8535609245300293, "learning_rate": 7.949589039098235e-06, "loss": 0.7572, "step": 6283 }, { "epoch": 1.671276595744681, "grad_norm": 4.3112311363220215, "learning_rate": 7.94887882528141e-06, "loss": 0.9061, "step": 6284 }, { "epoch": 1.6715425531914894, "grad_norm": 3.8851253986358643, "learning_rate": 7.948168520222802e-06, "loss": 0.9334, "step": 6285 }, { "epoch": 1.6718085106382978, "grad_norm": 4.051077842712402, "learning_rate": 7.94745812394439e-06, "loss": 0.8568, "step": 6286 }, { "epoch": 1.6720744680851065, "grad_norm": 3.8714540004730225, "learning_rate": 7.946747636468153e-06, "loss": 0.8496, "step": 6287 }, { "epoch": 1.6723404255319148, "grad_norm": 3.9510905742645264, "learning_rate": 7.946037057816075e-06, "loss": 0.8367, "step": 6288 }, { "epoch": 1.6726063829787234, "grad_norm": 4.504206657409668, "learning_rate": 7.945326388010141e-06, "loss": 0.8716, "step": 6289 }, { "epoch": 1.672872340425532, "grad_norm": 4.116037845611572, "learning_rate": 7.944615627072341e-06, "loss": 0.8481, "step": 6290 }, { "epoch": 1.6731382978723404, "grad_norm": 3.539327383041382, "learning_rate": 7.943904775024667e-06, "loss": 0.6687, "step": 6291 }, { "epoch": 1.673404255319149, "grad_norm": 4.1150898933410645, "learning_rate": 7.943193831889112e-06, "loss": 0.9299, "step": 6292 }, { "epoch": 1.6736702127659573, "grad_norm": 4.379646301269531, "learning_rate": 7.942482797687675e-06, "loss": 0.8867, "step": 6293 }, { "epoch": 1.673936170212766, "grad_norm": 3.6255533695220947, "learning_rate": 7.941771672442358e-06, "loss": 0.6831, "step": 6294 }, { "epoch": 1.6742021276595744, "grad_norm": 4.358723163604736, "learning_rate": 7.94106045617516e-06, "loss": 0.6923, "step": 6295 }, { "epoch": 1.674468085106383, "grad_norm": 3.967379093170166, "learning_rate": 7.94034914890809e-06, "loss": 0.8413, "step": 6296 }, { "epoch": 1.6747340425531916, "grad_norm": 4.233070373535156, "learning_rate": 7.939637750663153e-06, "loss": 0.9755, "step": 6297 }, { "epoch": 1.675, "grad_norm": 3.4149739742279053, "learning_rate": 7.938926261462366e-06, "loss": 0.6741, "step": 6298 }, { "epoch": 1.6752659574468085, "grad_norm": 4.045546054840088, "learning_rate": 7.938214681327739e-06, "loss": 0.8484, "step": 6299 }, { "epoch": 1.675531914893617, "grad_norm": 4.123802185058594, "learning_rate": 7.93750301028129e-06, "loss": 0.8398, "step": 6300 }, { "epoch": 1.6757978723404254, "grad_norm": 3.7821900844573975, "learning_rate": 7.936791248345041e-06, "loss": 0.7785, "step": 6301 }, { "epoch": 1.6760638297872341, "grad_norm": 3.6713192462921143, "learning_rate": 7.936079395541013e-06, "loss": 0.7191, "step": 6302 }, { "epoch": 1.6763297872340426, "grad_norm": 4.085387706756592, "learning_rate": 7.935367451891232e-06, "loss": 0.684, "step": 6303 }, { "epoch": 1.676595744680851, "grad_norm": 3.6555123329162598, "learning_rate": 7.934655417417724e-06, "loss": 0.7526, "step": 6304 }, { "epoch": 1.6768617021276597, "grad_norm": 3.9464025497436523, "learning_rate": 7.933943292142524e-06, "loss": 0.7544, "step": 6305 }, { "epoch": 1.677127659574468, "grad_norm": 3.74369215965271, "learning_rate": 7.933231076087662e-06, "loss": 0.7524, "step": 6306 }, { "epoch": 1.6773936170212767, "grad_norm": 4.703025817871094, "learning_rate": 7.932518769275179e-06, "loss": 0.8955, "step": 6307 }, { "epoch": 1.6776595744680851, "grad_norm": 4.241019248962402, "learning_rate": 7.931806371727111e-06, "loss": 0.7727, "step": 6308 }, { "epoch": 1.6779255319148936, "grad_norm": 4.029513359069824, "learning_rate": 7.931093883465503e-06, "loss": 0.7951, "step": 6309 }, { "epoch": 1.6781914893617023, "grad_norm": 3.7332520484924316, "learning_rate": 7.930381304512401e-06, "loss": 0.7148, "step": 6310 }, { "epoch": 1.6784574468085105, "grad_norm": 3.734999179840088, "learning_rate": 7.92966863488985e-06, "loss": 0.7856, "step": 6311 }, { "epoch": 1.6787234042553192, "grad_norm": 4.164159774780273, "learning_rate": 7.928955874619902e-06, "loss": 0.8163, "step": 6312 }, { "epoch": 1.6789893617021276, "grad_norm": 4.043959617614746, "learning_rate": 7.928243023724611e-06, "loss": 0.8262, "step": 6313 }, { "epoch": 1.679255319148936, "grad_norm": 3.5217018127441406, "learning_rate": 7.927530082226034e-06, "loss": 0.7066, "step": 6314 }, { "epoch": 1.6795212765957448, "grad_norm": 4.035088539123535, "learning_rate": 7.926817050146227e-06, "loss": 0.9041, "step": 6315 }, { "epoch": 1.679787234042553, "grad_norm": 3.8981032371520996, "learning_rate": 7.926103927507257e-06, "loss": 0.8896, "step": 6316 }, { "epoch": 1.6800531914893617, "grad_norm": 3.613386392593384, "learning_rate": 7.925390714331185e-06, "loss": 0.8692, "step": 6317 }, { "epoch": 1.6803191489361702, "grad_norm": 4.042194843292236, "learning_rate": 7.924677410640081e-06, "loss": 0.8251, "step": 6318 }, { "epoch": 1.6805851063829786, "grad_norm": 3.749028444290161, "learning_rate": 7.923964016456014e-06, "loss": 0.8519, "step": 6319 }, { "epoch": 1.6808510638297873, "grad_norm": 3.482661008834839, "learning_rate": 7.92325053180106e-06, "loss": 0.6798, "step": 6320 }, { "epoch": 1.6811170212765958, "grad_norm": 3.876594066619873, "learning_rate": 7.92253695669729e-06, "loss": 0.8437, "step": 6321 }, { "epoch": 1.6813829787234043, "grad_norm": 3.941342830657959, "learning_rate": 7.921823291166785e-06, "loss": 0.7915, "step": 6322 }, { "epoch": 1.6816489361702127, "grad_norm": 4.015593528747559, "learning_rate": 7.92110953523163e-06, "loss": 0.8184, "step": 6323 }, { "epoch": 1.6819148936170212, "grad_norm": 4.370626449584961, "learning_rate": 7.920395688913906e-06, "loss": 0.962, "step": 6324 }, { "epoch": 1.6821808510638299, "grad_norm": 3.7897567749023438, "learning_rate": 7.919681752235701e-06, "loss": 0.9113, "step": 6325 }, { "epoch": 1.6824468085106383, "grad_norm": 3.8005380630493164, "learning_rate": 7.918967725219104e-06, "loss": 0.869, "step": 6326 }, { "epoch": 1.6827127659574468, "grad_norm": 4.056982040405273, "learning_rate": 7.918253607886212e-06, "loss": 0.8451, "step": 6327 }, { "epoch": 1.6829787234042555, "grad_norm": 3.5084946155548096, "learning_rate": 7.917539400259116e-06, "loss": 0.7714, "step": 6328 }, { "epoch": 1.6832446808510637, "grad_norm": 3.9143457412719727, "learning_rate": 7.916825102359914e-06, "loss": 0.8663, "step": 6329 }, { "epoch": 1.6835106382978724, "grad_norm": 3.867074966430664, "learning_rate": 7.916110714210711e-06, "loss": 0.8741, "step": 6330 }, { "epoch": 1.6837765957446809, "grad_norm": 3.8426260948181152, "learning_rate": 7.91539623583361e-06, "loss": 0.8347, "step": 6331 }, { "epoch": 1.6840425531914893, "grad_norm": 3.8092234134674072, "learning_rate": 7.914681667250714e-06, "loss": 0.8565, "step": 6332 }, { "epoch": 1.684308510638298, "grad_norm": 3.754821538925171, "learning_rate": 7.913967008484138e-06, "loss": 0.6845, "step": 6333 }, { "epoch": 1.6845744680851062, "grad_norm": 4.067741394042969, "learning_rate": 7.913252259555992e-06, "loss": 0.7716, "step": 6334 }, { "epoch": 1.684840425531915, "grad_norm": 4.096173286437988, "learning_rate": 7.91253742048839e-06, "loss": 0.8299, "step": 6335 }, { "epoch": 1.6851063829787234, "grad_norm": 4.119457721710205, "learning_rate": 7.911822491303453e-06, "loss": 0.8621, "step": 6336 }, { "epoch": 1.6853723404255319, "grad_norm": 4.278772354125977, "learning_rate": 7.911107472023298e-06, "loss": 0.8446, "step": 6337 }, { "epoch": 1.6856382978723405, "grad_norm": 3.7795321941375732, "learning_rate": 7.910392362670051e-06, "loss": 0.6943, "step": 6338 }, { "epoch": 1.6859042553191488, "grad_norm": 3.9733240604400635, "learning_rate": 7.909677163265838e-06, "loss": 0.6562, "step": 6339 }, { "epoch": 1.6861702127659575, "grad_norm": 4.160102844238281, "learning_rate": 7.908961873832788e-06, "loss": 0.7915, "step": 6340 }, { "epoch": 1.686436170212766, "grad_norm": 4.3431525230407715, "learning_rate": 7.908246494393032e-06, "loss": 0.8474, "step": 6341 }, { "epoch": 1.6867021276595744, "grad_norm": 4.230860233306885, "learning_rate": 7.907531024968705e-06, "loss": 0.7098, "step": 6342 }, { "epoch": 1.686968085106383, "grad_norm": 4.223114967346191, "learning_rate": 7.906815465581945e-06, "loss": 0.7278, "step": 6343 }, { "epoch": 1.6872340425531915, "grad_norm": 4.246336460113525, "learning_rate": 7.906099816254895e-06, "loss": 0.825, "step": 6344 }, { "epoch": 1.6875, "grad_norm": 3.5722670555114746, "learning_rate": 7.905384077009693e-06, "loss": 0.8907, "step": 6345 }, { "epoch": 1.6877659574468085, "grad_norm": 4.00727653503418, "learning_rate": 7.904668247868486e-06, "loss": 0.7821, "step": 6346 }, { "epoch": 1.688031914893617, "grad_norm": 3.889538049697876, "learning_rate": 7.903952328853426e-06, "loss": 0.7967, "step": 6347 }, { "epoch": 1.6882978723404256, "grad_norm": 3.923154830932617, "learning_rate": 7.90323631998666e-06, "loss": 0.8152, "step": 6348 }, { "epoch": 1.688563829787234, "grad_norm": 4.059485912322998, "learning_rate": 7.902520221290345e-06, "loss": 0.7824, "step": 6349 }, { "epoch": 1.6888297872340425, "grad_norm": 4.1757378578186035, "learning_rate": 7.901804032786637e-06, "loss": 0.8839, "step": 6350 }, { "epoch": 1.6890957446808512, "grad_norm": 3.6736671924591064, "learning_rate": 7.901087754497694e-06, "loss": 0.684, "step": 6351 }, { "epoch": 1.6893617021276595, "grad_norm": 4.116995811462402, "learning_rate": 7.900371386445682e-06, "loss": 0.9625, "step": 6352 }, { "epoch": 1.6896276595744681, "grad_norm": 3.686619758605957, "learning_rate": 7.899654928652765e-06, "loss": 0.8667, "step": 6353 }, { "epoch": 1.6898936170212766, "grad_norm": 4.151339054107666, "learning_rate": 7.89893838114111e-06, "loss": 0.8102, "step": 6354 }, { "epoch": 1.690159574468085, "grad_norm": 3.7917020320892334, "learning_rate": 7.898221743932887e-06, "loss": 0.934, "step": 6355 }, { "epoch": 1.6904255319148938, "grad_norm": 3.5394623279571533, "learning_rate": 7.897505017050272e-06, "loss": 0.7577, "step": 6356 }, { "epoch": 1.690691489361702, "grad_norm": 4.058946132659912, "learning_rate": 7.896788200515442e-06, "loss": 0.7536, "step": 6357 }, { "epoch": 1.6909574468085107, "grad_norm": 3.8410744667053223, "learning_rate": 7.896071294350574e-06, "loss": 0.8212, "step": 6358 }, { "epoch": 1.6912234042553191, "grad_norm": 3.915674924850464, "learning_rate": 7.89535429857785e-06, "loss": 0.8288, "step": 6359 }, { "epoch": 1.6914893617021276, "grad_norm": 3.954108715057373, "learning_rate": 7.894637213219454e-06, "loss": 0.7738, "step": 6360 }, { "epoch": 1.6917553191489363, "grad_norm": 4.220264434814453, "learning_rate": 7.893920038297575e-06, "loss": 0.7686, "step": 6361 }, { "epoch": 1.6920212765957445, "grad_norm": 4.50542688369751, "learning_rate": 7.893202773834404e-06, "loss": 0.825, "step": 6362 }, { "epoch": 1.6922872340425532, "grad_norm": 4.274563312530518, "learning_rate": 7.892485419852131e-06, "loss": 0.8119, "step": 6363 }, { "epoch": 1.6925531914893617, "grad_norm": 3.8938279151916504, "learning_rate": 7.891767976372957e-06, "loss": 0.9073, "step": 6364 }, { "epoch": 1.6928191489361701, "grad_norm": 3.949944257736206, "learning_rate": 7.891050443419074e-06, "loss": 0.757, "step": 6365 }, { "epoch": 1.6930851063829788, "grad_norm": 4.313665866851807, "learning_rate": 7.890332821012687e-06, "loss": 0.8997, "step": 6366 }, { "epoch": 1.6933510638297873, "grad_norm": 4.165764331817627, "learning_rate": 7.889615109176e-06, "loss": 0.8262, "step": 6367 }, { "epoch": 1.6936170212765957, "grad_norm": 3.462186336517334, "learning_rate": 7.88889730793122e-06, "loss": 0.6989, "step": 6368 }, { "epoch": 1.6938829787234042, "grad_norm": 4.610195159912109, "learning_rate": 7.888179417300556e-06, "loss": 0.924, "step": 6369 }, { "epoch": 1.6941489361702127, "grad_norm": 3.8986306190490723, "learning_rate": 7.887461437306221e-06, "loss": 0.8204, "step": 6370 }, { "epoch": 1.6944148936170214, "grad_norm": 3.9623425006866455, "learning_rate": 7.886743367970428e-06, "loss": 0.8856, "step": 6371 }, { "epoch": 1.6946808510638298, "grad_norm": 3.7937700748443604, "learning_rate": 7.886025209315396e-06, "loss": 0.905, "step": 6372 }, { "epoch": 1.6949468085106383, "grad_norm": 3.6256890296936035, "learning_rate": 7.885306961363347e-06, "loss": 0.7097, "step": 6373 }, { "epoch": 1.695212765957447, "grad_norm": 4.079528331756592, "learning_rate": 7.884588624136505e-06, "loss": 0.8255, "step": 6374 }, { "epoch": 1.6954787234042552, "grad_norm": 3.7182741165161133, "learning_rate": 7.883870197657094e-06, "loss": 0.671, "step": 6375 }, { "epoch": 1.695744680851064, "grad_norm": 3.2320377826690674, "learning_rate": 7.883151681947343e-06, "loss": 0.6876, "step": 6376 }, { "epoch": 1.6960106382978724, "grad_norm": 3.610546588897705, "learning_rate": 7.882433077029484e-06, "loss": 0.7904, "step": 6377 }, { "epoch": 1.6962765957446808, "grad_norm": 3.8851020336151123, "learning_rate": 7.881714382925753e-06, "loss": 0.7701, "step": 6378 }, { "epoch": 1.6965425531914895, "grad_norm": 3.727907657623291, "learning_rate": 7.880995599658387e-06, "loss": 0.8374, "step": 6379 }, { "epoch": 1.6968085106382977, "grad_norm": 3.564770221710205, "learning_rate": 7.880276727249623e-06, "loss": 0.6483, "step": 6380 }, { "epoch": 1.6970744680851064, "grad_norm": 4.088687419891357, "learning_rate": 7.879557765721707e-06, "loss": 0.7902, "step": 6381 }, { "epoch": 1.6973404255319149, "grad_norm": 4.087176322937012, "learning_rate": 7.878838715096883e-06, "loss": 0.8723, "step": 6382 }, { "epoch": 1.6976063829787233, "grad_norm": 3.7613840103149414, "learning_rate": 7.878119575397401e-06, "loss": 0.7559, "step": 6383 }, { "epoch": 1.697872340425532, "grad_norm": 4.426526069641113, "learning_rate": 7.87740034664551e-06, "loss": 1.1472, "step": 6384 }, { "epoch": 1.6981382978723403, "grad_norm": 3.5922887325286865, "learning_rate": 7.876681028863464e-06, "loss": 0.8193, "step": 6385 }, { "epoch": 1.698404255319149, "grad_norm": 4.141395092010498, "learning_rate": 7.875961622073523e-06, "loss": 0.8629, "step": 6386 }, { "epoch": 1.6986702127659574, "grad_norm": 3.894594669342041, "learning_rate": 7.875242126297939e-06, "loss": 0.8301, "step": 6387 }, { "epoch": 1.6989361702127659, "grad_norm": 3.929243564605713, "learning_rate": 7.87452254155898e-06, "loss": 0.8301, "step": 6388 }, { "epoch": 1.6992021276595746, "grad_norm": 3.575058698654175, "learning_rate": 7.87380286787891e-06, "loss": 0.7595, "step": 6389 }, { "epoch": 1.699468085106383, "grad_norm": 3.9643123149871826, "learning_rate": 7.873083105279996e-06, "loss": 0.8527, "step": 6390 }, { "epoch": 1.6997340425531915, "grad_norm": 3.8817079067230225, "learning_rate": 7.872363253784508e-06, "loss": 0.6764, "step": 6391 }, { "epoch": 1.7, "grad_norm": 4.209853649139404, "learning_rate": 7.871643313414718e-06, "loss": 0.8082, "step": 6392 }, { "epoch": 1.7002659574468084, "grad_norm": 3.9260003566741943, "learning_rate": 7.870923284192904e-06, "loss": 0.7839, "step": 6393 }, { "epoch": 1.700531914893617, "grad_norm": 3.726177453994751, "learning_rate": 7.870203166141343e-06, "loss": 0.721, "step": 6394 }, { "epoch": 1.7007978723404256, "grad_norm": 4.2059326171875, "learning_rate": 7.869482959282318e-06, "loss": 0.7346, "step": 6395 }, { "epoch": 1.701063829787234, "grad_norm": 4.017068862915039, "learning_rate": 7.868762663638111e-06, "loss": 0.6286, "step": 6396 }, { "epoch": 1.7013297872340427, "grad_norm": 3.6799540519714355, "learning_rate": 7.86804227923101e-06, "loss": 0.7389, "step": 6397 }, { "epoch": 1.701595744680851, "grad_norm": 3.797459602355957, "learning_rate": 7.867321806083303e-06, "loss": 0.7271, "step": 6398 }, { "epoch": 1.7018617021276596, "grad_norm": 3.9897758960723877, "learning_rate": 7.866601244217284e-06, "loss": 0.8449, "step": 6399 }, { "epoch": 1.702127659574468, "grad_norm": 4.305942058563232, "learning_rate": 7.86588059365525e-06, "loss": 0.8108, "step": 6400 }, { "epoch": 1.7023936170212766, "grad_norm": 3.727057456970215, "learning_rate": 7.865159854419493e-06, "loss": 0.801, "step": 6401 }, { "epoch": 1.7026595744680852, "grad_norm": 3.9825263023376465, "learning_rate": 7.864439026532318e-06, "loss": 0.8026, "step": 6402 }, { "epoch": 1.7029255319148935, "grad_norm": 3.602372884750366, "learning_rate": 7.863718110016025e-06, "loss": 0.6829, "step": 6403 }, { "epoch": 1.7031914893617022, "grad_norm": 4.175540447235107, "learning_rate": 7.862997104892924e-06, "loss": 0.7491, "step": 6404 }, { "epoch": 1.7034574468085106, "grad_norm": 3.7469863891601562, "learning_rate": 7.862276011185323e-06, "loss": 0.6495, "step": 6405 }, { "epoch": 1.703723404255319, "grad_norm": 3.860929012298584, "learning_rate": 7.861554828915531e-06, "loss": 0.8538, "step": 6406 }, { "epoch": 1.7039893617021278, "grad_norm": 3.6298773288726807, "learning_rate": 7.860833558105863e-06, "loss": 0.7653, "step": 6407 }, { "epoch": 1.704255319148936, "grad_norm": 3.6208910942077637, "learning_rate": 7.860112198778638e-06, "loss": 0.8272, "step": 6408 }, { "epoch": 1.7045212765957447, "grad_norm": 3.9331130981445312, "learning_rate": 7.859390750956172e-06, "loss": 0.802, "step": 6409 }, { "epoch": 1.7047872340425532, "grad_norm": 3.843306303024292, "learning_rate": 7.858669214660792e-06, "loss": 0.8426, "step": 6410 }, { "epoch": 1.7050531914893616, "grad_norm": 3.844093084335327, "learning_rate": 7.857947589914819e-06, "loss": 0.7836, "step": 6411 }, { "epoch": 1.7053191489361703, "grad_norm": 3.7956225872039795, "learning_rate": 7.857225876740585e-06, "loss": 0.7151, "step": 6412 }, { "epoch": 1.7055851063829788, "grad_norm": 3.568847417831421, "learning_rate": 7.856504075160416e-06, "loss": 0.8406, "step": 6413 }, { "epoch": 1.7058510638297872, "grad_norm": 5.6517462730407715, "learning_rate": 7.855782185196648e-06, "loss": 0.8804, "step": 6414 }, { "epoch": 1.7061170212765957, "grad_norm": 3.6728999614715576, "learning_rate": 7.855060206871618e-06, "loss": 0.7445, "step": 6415 }, { "epoch": 1.7063829787234042, "grad_norm": 4.358402729034424, "learning_rate": 7.854338140207662e-06, "loss": 0.7949, "step": 6416 }, { "epoch": 1.7066489361702128, "grad_norm": 4.032132625579834, "learning_rate": 7.853615985227126e-06, "loss": 0.8492, "step": 6417 }, { "epoch": 1.7069148936170213, "grad_norm": 4.185794353485107, "learning_rate": 7.85289374195235e-06, "loss": 0.9054, "step": 6418 }, { "epoch": 1.7071808510638298, "grad_norm": 4.639225006103516, "learning_rate": 7.852171410405684e-06, "loss": 0.9118, "step": 6419 }, { "epoch": 1.7074468085106385, "grad_norm": 3.67490816116333, "learning_rate": 7.851448990609476e-06, "loss": 0.8046, "step": 6420 }, { "epoch": 1.7077127659574467, "grad_norm": 3.879056692123413, "learning_rate": 7.850726482586078e-06, "loss": 0.6831, "step": 6421 }, { "epoch": 1.7079787234042554, "grad_norm": 3.963789463043213, "learning_rate": 7.850003886357847e-06, "loss": 0.7881, "step": 6422 }, { "epoch": 1.7082446808510638, "grad_norm": 4.229506015777588, "learning_rate": 7.849281201947142e-06, "loss": 0.8157, "step": 6423 }, { "epoch": 1.7085106382978723, "grad_norm": 4.29874849319458, "learning_rate": 7.84855842937632e-06, "loss": 0.9049, "step": 6424 }, { "epoch": 1.708776595744681, "grad_norm": 3.8917417526245117, "learning_rate": 7.847835568667746e-06, "loss": 0.7922, "step": 6425 }, { "epoch": 1.7090425531914892, "grad_norm": 3.8562116622924805, "learning_rate": 7.847112619843789e-06, "loss": 0.7363, "step": 6426 }, { "epoch": 1.709308510638298, "grad_norm": 4.495066165924072, "learning_rate": 7.846389582926814e-06, "loss": 0.977, "step": 6427 }, { "epoch": 1.7095744680851064, "grad_norm": 3.899489164352417, "learning_rate": 7.845666457939193e-06, "loss": 0.7289, "step": 6428 }, { "epoch": 1.7098404255319148, "grad_norm": 3.9472427368164062, "learning_rate": 7.844943244903303e-06, "loss": 0.8273, "step": 6429 }, { "epoch": 1.7101063829787235, "grad_norm": 4.187959671020508, "learning_rate": 7.84421994384152e-06, "loss": 0.8658, "step": 6430 }, { "epoch": 1.7103723404255318, "grad_norm": 4.103062152862549, "learning_rate": 7.843496554776222e-06, "loss": 0.8097, "step": 6431 }, { "epoch": 1.7106382978723405, "grad_norm": 3.977741241455078, "learning_rate": 7.842773077729793e-06, "loss": 0.799, "step": 6432 }, { "epoch": 1.710904255319149, "grad_norm": 3.8812167644500732, "learning_rate": 7.842049512724618e-06, "loss": 0.6743, "step": 6433 }, { "epoch": 1.7111702127659574, "grad_norm": 4.060866832733154, "learning_rate": 7.841325859783086e-06, "loss": 0.7479, "step": 6434 }, { "epoch": 1.711436170212766, "grad_norm": 4.428943634033203, "learning_rate": 7.840602118927584e-06, "loss": 0.9101, "step": 6435 }, { "epoch": 1.7117021276595743, "grad_norm": 3.989323139190674, "learning_rate": 7.83987829018051e-06, "loss": 0.8308, "step": 6436 }, { "epoch": 1.711968085106383, "grad_norm": 4.173738479614258, "learning_rate": 7.83915437356426e-06, "loss": 0.8025, "step": 6437 }, { "epoch": 1.7122340425531914, "grad_norm": 3.7683372497558594, "learning_rate": 7.838430369101227e-06, "loss": 0.8168, "step": 6438 }, { "epoch": 1.7125, "grad_norm": 3.9382693767547607, "learning_rate": 7.837706276813819e-06, "loss": 0.8469, "step": 6439 }, { "epoch": 1.7127659574468086, "grad_norm": 4.1283278465271, "learning_rate": 7.836982096724438e-06, "loss": 0.7938, "step": 6440 }, { "epoch": 1.713031914893617, "grad_norm": 4.033618927001953, "learning_rate": 7.836257828855489e-06, "loss": 0.8479, "step": 6441 }, { "epoch": 1.7132978723404255, "grad_norm": 4.25187349319458, "learning_rate": 7.835533473229385e-06, "loss": 0.8507, "step": 6442 }, { "epoch": 1.7135638297872342, "grad_norm": 4.031279563903809, "learning_rate": 7.834809029868538e-06, "loss": 0.8444, "step": 6443 }, { "epoch": 1.7138297872340424, "grad_norm": 3.5434410572052, "learning_rate": 7.834084498795361e-06, "loss": 0.6862, "step": 6444 }, { "epoch": 1.7140957446808511, "grad_norm": 4.158623218536377, "learning_rate": 7.833359880032272e-06, "loss": 0.8362, "step": 6445 }, { "epoch": 1.7143617021276596, "grad_norm": 4.039031982421875, "learning_rate": 7.832635173601692e-06, "loss": 0.8806, "step": 6446 }, { "epoch": 1.714627659574468, "grad_norm": 4.09163236618042, "learning_rate": 7.831910379526047e-06, "loss": 0.9957, "step": 6447 }, { "epoch": 1.7148936170212767, "grad_norm": 3.4675064086914062, "learning_rate": 7.831185497827758e-06, "loss": 0.7451, "step": 6448 }, { "epoch": 1.715159574468085, "grad_norm": 3.6473426818847656, "learning_rate": 7.830460528529258e-06, "loss": 0.7436, "step": 6449 }, { "epoch": 1.7154255319148937, "grad_norm": 3.779623508453369, "learning_rate": 7.829735471652978e-06, "loss": 0.7522, "step": 6450 }, { "epoch": 1.7156914893617021, "grad_norm": 3.759127616882324, "learning_rate": 7.829010327221348e-06, "loss": 0.8186, "step": 6451 }, { "epoch": 1.7159574468085106, "grad_norm": 3.606985330581665, "learning_rate": 7.828285095256808e-06, "loss": 0.8916, "step": 6452 }, { "epoch": 1.7162234042553193, "grad_norm": 3.6981024742126465, "learning_rate": 7.8275597757818e-06, "loss": 0.7967, "step": 6453 }, { "epoch": 1.7164893617021275, "grad_norm": 3.8665547370910645, "learning_rate": 7.826834368818761e-06, "loss": 0.731, "step": 6454 }, { "epoch": 1.7167553191489362, "grad_norm": 3.547314167022705, "learning_rate": 7.826108874390141e-06, "loss": 0.7793, "step": 6455 }, { "epoch": 1.7170212765957447, "grad_norm": 3.823787212371826, "learning_rate": 7.825383292518383e-06, "loss": 0.7854, "step": 6456 }, { "epoch": 1.7172872340425531, "grad_norm": 4.252329349517822, "learning_rate": 7.82465762322594e-06, "loss": 0.9033, "step": 6457 }, { "epoch": 1.7175531914893618, "grad_norm": 3.9819960594177246, "learning_rate": 7.823931866535264e-06, "loss": 0.9616, "step": 6458 }, { "epoch": 1.71781914893617, "grad_norm": 4.099963665008545, "learning_rate": 7.823206022468812e-06, "loss": 0.8145, "step": 6459 }, { "epoch": 1.7180851063829787, "grad_norm": 4.146093368530273, "learning_rate": 7.82248009104904e-06, "loss": 0.7693, "step": 6460 }, { "epoch": 1.7183510638297872, "grad_norm": 3.9053497314453125, "learning_rate": 7.821754072298414e-06, "loss": 0.8287, "step": 6461 }, { "epoch": 1.7186170212765957, "grad_norm": 4.186066150665283, "learning_rate": 7.821027966239393e-06, "loss": 0.7655, "step": 6462 }, { "epoch": 1.7188829787234043, "grad_norm": 4.364232540130615, "learning_rate": 7.820301772894445e-06, "loss": 0.7746, "step": 6463 }, { "epoch": 1.7191489361702128, "grad_norm": 3.838639736175537, "learning_rate": 7.81957549228604e-06, "loss": 0.8342, "step": 6464 }, { "epoch": 1.7194148936170213, "grad_norm": 4.181699752807617, "learning_rate": 7.818849124436651e-06, "loss": 0.8181, "step": 6465 }, { "epoch": 1.71968085106383, "grad_norm": 4.069806098937988, "learning_rate": 7.818122669368751e-06, "loss": 0.7486, "step": 6466 }, { "epoch": 1.7199468085106382, "grad_norm": 3.9210989475250244, "learning_rate": 7.817396127104815e-06, "loss": 0.8064, "step": 6467 }, { "epoch": 1.7202127659574469, "grad_norm": 3.3825418949127197, "learning_rate": 7.816669497667328e-06, "loss": 0.7276, "step": 6468 }, { "epoch": 1.7204787234042553, "grad_norm": 4.07489013671875, "learning_rate": 7.815942781078772e-06, "loss": 0.7628, "step": 6469 }, { "epoch": 1.7207446808510638, "grad_norm": 4.20849084854126, "learning_rate": 7.815215977361628e-06, "loss": 0.822, "step": 6470 }, { "epoch": 1.7210106382978725, "grad_norm": 4.13023567199707, "learning_rate": 7.814489086538388e-06, "loss": 0.8117, "step": 6471 }, { "epoch": 1.7212765957446807, "grad_norm": 4.143436431884766, "learning_rate": 7.813762108631544e-06, "loss": 0.8769, "step": 6472 }, { "epoch": 1.7215425531914894, "grad_norm": 3.954219102859497, "learning_rate": 7.813035043663585e-06, "loss": 0.7836, "step": 6473 }, { "epoch": 1.7218085106382979, "grad_norm": 3.688133478164673, "learning_rate": 7.81230789165701e-06, "loss": 0.8905, "step": 6474 }, { "epoch": 1.7220744680851063, "grad_norm": 4.443986892700195, "learning_rate": 7.811580652634319e-06, "loss": 0.8933, "step": 6475 }, { "epoch": 1.722340425531915, "grad_norm": 3.791365146636963, "learning_rate": 7.810853326618012e-06, "loss": 0.8278, "step": 6476 }, { "epoch": 1.7226063829787233, "grad_norm": 4.167088031768799, "learning_rate": 7.810125913630593e-06, "loss": 0.7669, "step": 6477 }, { "epoch": 1.722872340425532, "grad_norm": 3.4958133697509766, "learning_rate": 7.80939841369457e-06, "loss": 0.7095, "step": 6478 }, { "epoch": 1.7231382978723404, "grad_norm": 4.2002339363098145, "learning_rate": 7.808670826832455e-06, "loss": 0.7463, "step": 6479 }, { "epoch": 1.7234042553191489, "grad_norm": 3.795557737350464, "learning_rate": 7.807943153066754e-06, "loss": 0.6731, "step": 6480 }, { "epoch": 1.7236702127659576, "grad_norm": 3.272183895111084, "learning_rate": 7.807215392419988e-06, "loss": 0.6116, "step": 6481 }, { "epoch": 1.7239361702127658, "grad_norm": 4.027061462402344, "learning_rate": 7.806487544914672e-06, "loss": 0.8122, "step": 6482 }, { "epoch": 1.7242021276595745, "grad_norm": 3.5909063816070557, "learning_rate": 7.805759610573327e-06, "loss": 0.7915, "step": 6483 }, { "epoch": 1.724468085106383, "grad_norm": 4.0041961669921875, "learning_rate": 7.805031589418477e-06, "loss": 0.6859, "step": 6484 }, { "epoch": 1.7247340425531914, "grad_norm": 3.9270341396331787, "learning_rate": 7.804303481472645e-06, "loss": 0.7585, "step": 6485 }, { "epoch": 1.725, "grad_norm": 4.444969654083252, "learning_rate": 7.803575286758365e-06, "loss": 0.8409, "step": 6486 }, { "epoch": 1.7252659574468086, "grad_norm": 4.4063262939453125, "learning_rate": 7.802847005298162e-06, "loss": 1.0173, "step": 6487 }, { "epoch": 1.725531914893617, "grad_norm": 4.078791618347168, "learning_rate": 7.802118637114575e-06, "loss": 0.8106, "step": 6488 }, { "epoch": 1.7257978723404257, "grad_norm": 3.8760604858398438, "learning_rate": 7.801390182230137e-06, "loss": 0.7751, "step": 6489 }, { "epoch": 1.726063829787234, "grad_norm": 4.180771350860596, "learning_rate": 7.800661640667388e-06, "loss": 0.8671, "step": 6490 }, { "epoch": 1.7263297872340426, "grad_norm": 3.921558380126953, "learning_rate": 7.799933012448872e-06, "loss": 0.8414, "step": 6491 }, { "epoch": 1.726595744680851, "grad_norm": 3.8960835933685303, "learning_rate": 7.799204297597129e-06, "loss": 0.7135, "step": 6492 }, { "epoch": 1.7268617021276595, "grad_norm": 3.834841251373291, "learning_rate": 7.798475496134714e-06, "loss": 0.7374, "step": 6493 }, { "epoch": 1.7271276595744682, "grad_norm": 3.5948872566223145, "learning_rate": 7.79774660808417e-06, "loss": 0.7354, "step": 6494 }, { "epoch": 1.7273936170212765, "grad_norm": 3.763976573944092, "learning_rate": 7.797017633468052e-06, "loss": 0.9162, "step": 6495 }, { "epoch": 1.7276595744680852, "grad_norm": 3.8534562587738037, "learning_rate": 7.796288572308914e-06, "loss": 0.8713, "step": 6496 }, { "epoch": 1.7279255319148936, "grad_norm": 4.049807071685791, "learning_rate": 7.795559424629317e-06, "loss": 0.8404, "step": 6497 }, { "epoch": 1.728191489361702, "grad_norm": 3.8596930503845215, "learning_rate": 7.79483019045182e-06, "loss": 0.7868, "step": 6498 }, { "epoch": 1.7284574468085108, "grad_norm": 4.452897071838379, "learning_rate": 7.794100869798986e-06, "loss": 0.9168, "step": 6499 }, { "epoch": 1.728723404255319, "grad_norm": 3.7102370262145996, "learning_rate": 7.79337146269338e-06, "loss": 0.9201, "step": 6500 }, { "epoch": 1.728723404255319, "eval_loss": 1.2800854444503784, "eval_runtime": 13.8491, "eval_samples_per_second": 28.883, "eval_steps_per_second": 3.61, "step": 6500 }, { "epoch": 1.7289893617021277, "grad_norm": 4.088536262512207, "learning_rate": 7.792641969157574e-06, "loss": 0.8304, "step": 6501 }, { "epoch": 1.7292553191489362, "grad_norm": 3.8640379905700684, "learning_rate": 7.791912389214138e-06, "loss": 0.77, "step": 6502 }, { "epoch": 1.7295212765957446, "grad_norm": 3.927625894546509, "learning_rate": 7.791182722885644e-06, "loss": 0.7303, "step": 6503 }, { "epoch": 1.7297872340425533, "grad_norm": 3.960904598236084, "learning_rate": 7.790452970194673e-06, "loss": 0.8346, "step": 6504 }, { "epoch": 1.7300531914893615, "grad_norm": 3.953512191772461, "learning_rate": 7.7897231311638e-06, "loss": 0.6958, "step": 6505 }, { "epoch": 1.7303191489361702, "grad_norm": 3.7672922611236572, "learning_rate": 7.788993205815606e-06, "loss": 0.7887, "step": 6506 }, { "epoch": 1.7305851063829787, "grad_norm": 4.269046783447266, "learning_rate": 7.788263194172684e-06, "loss": 0.9836, "step": 6507 }, { "epoch": 1.7308510638297872, "grad_norm": 3.96058988571167, "learning_rate": 7.787533096257613e-06, "loss": 0.9103, "step": 6508 }, { "epoch": 1.7311170212765958, "grad_norm": 3.9208950996398926, "learning_rate": 7.786802912092986e-06, "loss": 0.819, "step": 6509 }, { "epoch": 1.7313829787234043, "grad_norm": 3.600135326385498, "learning_rate": 7.786072641701397e-06, "loss": 0.8122, "step": 6510 }, { "epoch": 1.7316489361702128, "grad_norm": 3.9716193675994873, "learning_rate": 7.78534228510544e-06, "loss": 0.7281, "step": 6511 }, { "epoch": 1.7319148936170212, "grad_norm": 4.222037315368652, "learning_rate": 7.784611842327711e-06, "loss": 0.8926, "step": 6512 }, { "epoch": 1.7321808510638297, "grad_norm": 3.3642852306365967, "learning_rate": 7.783881313390816e-06, "loss": 0.7014, "step": 6513 }, { "epoch": 1.7324468085106384, "grad_norm": 4.051825046539307, "learning_rate": 7.783150698317354e-06, "loss": 0.7602, "step": 6514 }, { "epoch": 1.7327127659574468, "grad_norm": 4.036343574523926, "learning_rate": 7.782419997129934e-06, "loss": 0.8381, "step": 6515 }, { "epoch": 1.7329787234042553, "grad_norm": 3.722576856613159, "learning_rate": 7.781689209851163e-06, "loss": 0.8737, "step": 6516 }, { "epoch": 1.733244680851064, "grad_norm": 4.037721157073975, "learning_rate": 7.780958336503653e-06, "loss": 0.8382, "step": 6517 }, { "epoch": 1.7335106382978722, "grad_norm": 4.075493812561035, "learning_rate": 7.780227377110016e-06, "loss": 0.8215, "step": 6518 }, { "epoch": 1.733776595744681, "grad_norm": 3.9683899879455566, "learning_rate": 7.779496331692872e-06, "loss": 0.8797, "step": 6519 }, { "epoch": 1.7340425531914894, "grad_norm": 3.871469259262085, "learning_rate": 7.77876520027484e-06, "loss": 0.7388, "step": 6520 }, { "epoch": 1.7343085106382978, "grad_norm": 3.950624465942383, "learning_rate": 7.778033982878539e-06, "loss": 0.7502, "step": 6521 }, { "epoch": 1.7345744680851065, "grad_norm": 4.015387058258057, "learning_rate": 7.777302679526596e-06, "loss": 0.9874, "step": 6522 }, { "epoch": 1.7348404255319148, "grad_norm": 4.03596830368042, "learning_rate": 7.776571290241642e-06, "loss": 0.7633, "step": 6523 }, { "epoch": 1.7351063829787234, "grad_norm": 4.029125213623047, "learning_rate": 7.775839815046299e-06, "loss": 0.7994, "step": 6524 }, { "epoch": 1.735372340425532, "grad_norm": 4.058604717254639, "learning_rate": 7.775108253963207e-06, "loss": 0.7391, "step": 6525 }, { "epoch": 1.7356382978723404, "grad_norm": 3.862391948699951, "learning_rate": 7.774376607014995e-06, "loss": 0.9032, "step": 6526 }, { "epoch": 1.735904255319149, "grad_norm": 3.903395414352417, "learning_rate": 7.773644874224306e-06, "loss": 0.8429, "step": 6527 }, { "epoch": 1.7361702127659573, "grad_norm": 3.8711469173431396, "learning_rate": 7.77291305561378e-06, "loss": 0.807, "step": 6528 }, { "epoch": 1.736436170212766, "grad_norm": 3.977463483810425, "learning_rate": 7.77218115120606e-06, "loss": 0.7929, "step": 6529 }, { "epoch": 1.7367021276595744, "grad_norm": 3.7397544384002686, "learning_rate": 7.77144916102379e-06, "loss": 0.8478, "step": 6530 }, { "epoch": 1.736968085106383, "grad_norm": 3.6703922748565674, "learning_rate": 7.770717085089618e-06, "loss": 0.6432, "step": 6531 }, { "epoch": 1.7372340425531916, "grad_norm": 4.170365333557129, "learning_rate": 7.7699849234262e-06, "loss": 0.7565, "step": 6532 }, { "epoch": 1.7375, "grad_norm": 3.6264007091522217, "learning_rate": 7.769252676056186e-06, "loss": 0.7635, "step": 6533 }, { "epoch": 1.7377659574468085, "grad_norm": 3.9042675495147705, "learning_rate": 7.768520343002235e-06, "loss": 0.9037, "step": 6534 }, { "epoch": 1.738031914893617, "grad_norm": 4.19412899017334, "learning_rate": 7.767787924287005e-06, "loss": 0.8516, "step": 6535 }, { "epoch": 1.7382978723404254, "grad_norm": 3.869814157485962, "learning_rate": 7.767055419933157e-06, "loss": 0.7815, "step": 6536 }, { "epoch": 1.7385638297872341, "grad_norm": 3.712411642074585, "learning_rate": 7.766322829963357e-06, "loss": 0.6676, "step": 6537 }, { "epoch": 1.7388297872340426, "grad_norm": 4.046865463256836, "learning_rate": 7.76559015440027e-06, "loss": 0.8799, "step": 6538 }, { "epoch": 1.739095744680851, "grad_norm": 3.908235549926758, "learning_rate": 7.76485739326657e-06, "loss": 0.7999, "step": 6539 }, { "epoch": 1.7393617021276597, "grad_norm": 4.396571159362793, "learning_rate": 7.764124546584926e-06, "loss": 0.8813, "step": 6540 }, { "epoch": 1.739627659574468, "grad_norm": 3.7259883880615234, "learning_rate": 7.763391614378014e-06, "loss": 0.8519, "step": 6541 }, { "epoch": 1.7398936170212767, "grad_norm": 3.7457261085510254, "learning_rate": 7.762658596668514e-06, "loss": 0.7913, "step": 6542 }, { "epoch": 1.7401595744680851, "grad_norm": 3.66605544090271, "learning_rate": 7.7619254934791e-06, "loss": 0.8122, "step": 6543 }, { "epoch": 1.7404255319148936, "grad_norm": 3.8894519805908203, "learning_rate": 7.761192304832463e-06, "loss": 0.6829, "step": 6544 }, { "epoch": 1.7406914893617023, "grad_norm": 3.4376041889190674, "learning_rate": 7.760459030751285e-06, "loss": 0.6903, "step": 6545 }, { "epoch": 1.7409574468085105, "grad_norm": 4.00453519821167, "learning_rate": 7.759725671258254e-06, "loss": 0.8714, "step": 6546 }, { "epoch": 1.7412234042553192, "grad_norm": 3.9484405517578125, "learning_rate": 7.758992226376062e-06, "loss": 0.9567, "step": 6547 }, { "epoch": 1.7414893617021276, "grad_norm": 3.885755777359009, "learning_rate": 7.7582586961274e-06, "loss": 0.7928, "step": 6548 }, { "epoch": 1.741755319148936, "grad_norm": 3.8768088817596436, "learning_rate": 7.757525080534968e-06, "loss": 0.7554, "step": 6549 }, { "epoch": 1.7420212765957448, "grad_norm": 3.7053639888763428, "learning_rate": 7.756791379621461e-06, "loss": 0.8122, "step": 6550 }, { "epoch": 1.742287234042553, "grad_norm": 3.9800238609313965, "learning_rate": 7.756057593409588e-06, "loss": 0.8505, "step": 6551 }, { "epoch": 1.7425531914893617, "grad_norm": 3.586451768875122, "learning_rate": 7.755323721922045e-06, "loss": 0.7435, "step": 6552 }, { "epoch": 1.7428191489361702, "grad_norm": 4.315957069396973, "learning_rate": 7.754589765181543e-06, "loss": 0.8308, "step": 6553 }, { "epoch": 1.7430851063829786, "grad_norm": 3.764915704727173, "learning_rate": 7.75385572321079e-06, "loss": 0.7939, "step": 6554 }, { "epoch": 1.7433510638297873, "grad_norm": 3.9177279472351074, "learning_rate": 7.7531215960325e-06, "loss": 0.8557, "step": 6555 }, { "epoch": 1.7436170212765958, "grad_norm": 3.802114248275757, "learning_rate": 7.752387383669384e-06, "loss": 0.7933, "step": 6556 }, { "epoch": 1.7438829787234043, "grad_norm": 4.129657745361328, "learning_rate": 7.751653086144164e-06, "loss": 0.8744, "step": 6557 }, { "epoch": 1.7441489361702127, "grad_norm": 4.201019763946533, "learning_rate": 7.750918703479558e-06, "loss": 0.7875, "step": 6558 }, { "epoch": 1.7444148936170212, "grad_norm": 4.305670261383057, "learning_rate": 7.750184235698285e-06, "loss": 0.8137, "step": 6559 }, { "epoch": 1.7446808510638299, "grad_norm": 3.571631908416748, "learning_rate": 7.749449682823077e-06, "loss": 0.7308, "step": 6560 }, { "epoch": 1.7449468085106383, "grad_norm": 4.124020576477051, "learning_rate": 7.74871504487666e-06, "loss": 0.9546, "step": 6561 }, { "epoch": 1.7452127659574468, "grad_norm": 4.1722588539123535, "learning_rate": 7.74798032188176e-06, "loss": 0.787, "step": 6562 }, { "epoch": 1.7454787234042555, "grad_norm": 4.017617225646973, "learning_rate": 7.747245513861115e-06, "loss": 0.8655, "step": 6563 }, { "epoch": 1.7457446808510637, "grad_norm": 4.122082233428955, "learning_rate": 7.74651062083746e-06, "loss": 0.9471, "step": 6564 }, { "epoch": 1.7460106382978724, "grad_norm": 4.254493713378906, "learning_rate": 7.745775642833532e-06, "loss": 0.8313, "step": 6565 }, { "epoch": 1.7462765957446809, "grad_norm": 3.856379985809326, "learning_rate": 7.745040579872073e-06, "loss": 0.9207, "step": 6566 }, { "epoch": 1.7465425531914893, "grad_norm": 4.020528316497803, "learning_rate": 7.744305431975827e-06, "loss": 0.7029, "step": 6567 }, { "epoch": 1.746808510638298, "grad_norm": 4.091069221496582, "learning_rate": 7.743570199167539e-06, "loss": 0.8682, "step": 6568 }, { "epoch": 1.7470744680851062, "grad_norm": 3.8805131912231445, "learning_rate": 7.742834881469959e-06, "loss": 0.8366, "step": 6569 }, { "epoch": 1.747340425531915, "grad_norm": 3.5972797870635986, "learning_rate": 7.742099478905837e-06, "loss": 0.784, "step": 6570 }, { "epoch": 1.7476063829787234, "grad_norm": 3.655684232711792, "learning_rate": 7.741363991497932e-06, "loss": 0.7849, "step": 6571 }, { "epoch": 1.7478723404255319, "grad_norm": 3.854562520980835, "learning_rate": 7.740628419268996e-06, "loss": 0.7961, "step": 6572 }, { "epoch": 1.7481382978723405, "grad_norm": 3.5972256660461426, "learning_rate": 7.73989276224179e-06, "loss": 0.8045, "step": 6573 }, { "epoch": 1.7484042553191488, "grad_norm": 4.087411880493164, "learning_rate": 7.739157020439077e-06, "loss": 0.8889, "step": 6574 }, { "epoch": 1.7486702127659575, "grad_norm": 4.145167350769043, "learning_rate": 7.738421193883618e-06, "loss": 0.8542, "step": 6575 }, { "epoch": 1.748936170212766, "grad_norm": 4.064332008361816, "learning_rate": 7.737685282598187e-06, "loss": 0.8523, "step": 6576 }, { "epoch": 1.7492021276595744, "grad_norm": 4.075108051300049, "learning_rate": 7.736949286605549e-06, "loss": 0.8839, "step": 6577 }, { "epoch": 1.749468085106383, "grad_norm": 4.157843112945557, "learning_rate": 7.736213205928476e-06, "loss": 0.9253, "step": 6578 }, { "epoch": 1.7497340425531915, "grad_norm": 3.978928327560425, "learning_rate": 7.735477040589745e-06, "loss": 0.8454, "step": 6579 }, { "epoch": 1.75, "grad_norm": 3.7294394969940186, "learning_rate": 7.734740790612137e-06, "loss": 0.7877, "step": 6580 }, { "epoch": 1.7502659574468085, "grad_norm": 4.367574214935303, "learning_rate": 7.734004456018424e-06, "loss": 0.7477, "step": 6581 }, { "epoch": 1.750531914893617, "grad_norm": 3.952146291732788, "learning_rate": 7.733268036831398e-06, "loss": 0.7725, "step": 6582 }, { "epoch": 1.7507978723404256, "grad_norm": 4.400146961212158, "learning_rate": 7.73253153307384e-06, "loss": 0.8059, "step": 6583 }, { "epoch": 1.751063829787234, "grad_norm": 4.003587245941162, "learning_rate": 7.73179494476854e-06, "loss": 0.8549, "step": 6584 }, { "epoch": 1.7513297872340425, "grad_norm": 3.898470640182495, "learning_rate": 7.731058271938286e-06, "loss": 0.7925, "step": 6585 }, { "epoch": 1.7515957446808512, "grad_norm": 3.6899170875549316, "learning_rate": 7.730321514605877e-06, "loss": 0.7535, "step": 6586 }, { "epoch": 1.7518617021276595, "grad_norm": 3.996615171432495, "learning_rate": 7.729584672794102e-06, "loss": 0.8278, "step": 6587 }, { "epoch": 1.7521276595744681, "grad_norm": 4.020608901977539, "learning_rate": 7.728847746525764e-06, "loss": 0.7233, "step": 6588 }, { "epoch": 1.7523936170212766, "grad_norm": 4.504430294036865, "learning_rate": 7.728110735823666e-06, "loss": 0.8254, "step": 6589 }, { "epoch": 1.752659574468085, "grad_norm": 3.7418766021728516, "learning_rate": 7.72737364071061e-06, "loss": 0.8151, "step": 6590 }, { "epoch": 1.7529255319148938, "grad_norm": 4.577789783477783, "learning_rate": 7.7266364612094e-06, "loss": 0.9276, "step": 6591 }, { "epoch": 1.753191489361702, "grad_norm": 4.067131042480469, "learning_rate": 7.72589919734285e-06, "loss": 0.8282, "step": 6592 }, { "epoch": 1.7534574468085107, "grad_norm": 4.11132287979126, "learning_rate": 7.725161849133769e-06, "loss": 0.8663, "step": 6593 }, { "epoch": 1.7537234042553191, "grad_norm": 3.8996002674102783, "learning_rate": 7.724424416604972e-06, "loss": 0.9631, "step": 6594 }, { "epoch": 1.7539893617021276, "grad_norm": 3.911623954772949, "learning_rate": 7.723686899779277e-06, "loss": 0.8082, "step": 6595 }, { "epoch": 1.7542553191489363, "grad_norm": 4.957215785980225, "learning_rate": 7.7229492986795e-06, "loss": 0.8758, "step": 6596 }, { "epoch": 1.7545212765957445, "grad_norm": 4.114643573760986, "learning_rate": 7.722211613328467e-06, "loss": 0.7665, "step": 6597 }, { "epoch": 1.7547872340425532, "grad_norm": 3.4866108894348145, "learning_rate": 7.721473843749e-06, "loss": 0.7636, "step": 6598 }, { "epoch": 1.7550531914893617, "grad_norm": 3.798917055130005, "learning_rate": 7.72073598996393e-06, "loss": 0.7645, "step": 6599 }, { "epoch": 1.7553191489361701, "grad_norm": 4.327617168426514, "learning_rate": 7.719998051996087e-06, "loss": 0.8174, "step": 6600 }, { "epoch": 1.7555851063829788, "grad_norm": 3.7455971240997314, "learning_rate": 7.719260029868299e-06, "loss": 0.7484, "step": 6601 }, { "epoch": 1.7558510638297873, "grad_norm": 3.4463014602661133, "learning_rate": 7.718521923603404e-06, "loss": 0.692, "step": 6602 }, { "epoch": 1.7561170212765957, "grad_norm": 3.920140027999878, "learning_rate": 7.717783733224243e-06, "loss": 0.9122, "step": 6603 }, { "epoch": 1.7563829787234042, "grad_norm": 4.227574825286865, "learning_rate": 7.717045458753651e-06, "loss": 0.7812, "step": 6604 }, { "epoch": 1.7566489361702127, "grad_norm": 4.23086404800415, "learning_rate": 7.716307100214472e-06, "loss": 0.829, "step": 6605 }, { "epoch": 1.7569148936170214, "grad_norm": 3.5714340209960938, "learning_rate": 7.715568657629557e-06, "loss": 0.8676, "step": 6606 }, { "epoch": 1.7571808510638298, "grad_norm": 4.220118045806885, "learning_rate": 7.71483013102175e-06, "loss": 0.7351, "step": 6607 }, { "epoch": 1.7574468085106383, "grad_norm": 3.8862133026123047, "learning_rate": 7.7140915204139e-06, "loss": 0.7836, "step": 6608 }, { "epoch": 1.757712765957447, "grad_norm": 3.9056966304779053, "learning_rate": 7.713352825828865e-06, "loss": 0.7439, "step": 6609 }, { "epoch": 1.7579787234042552, "grad_norm": 4.519630432128906, "learning_rate": 7.712614047289498e-06, "loss": 0.9618, "step": 6610 }, { "epoch": 1.758244680851064, "grad_norm": 3.756225109100342, "learning_rate": 7.711875184818659e-06, "loss": 0.7612, "step": 6611 }, { "epoch": 1.7585106382978724, "grad_norm": 4.109426498413086, "learning_rate": 7.71113623843921e-06, "loss": 0.8828, "step": 6612 }, { "epoch": 1.7587765957446808, "grad_norm": 4.274012565612793, "learning_rate": 7.710397208174012e-06, "loss": 0.8212, "step": 6613 }, { "epoch": 1.7590425531914895, "grad_norm": 4.489198207855225, "learning_rate": 7.709658094045933e-06, "loss": 0.9358, "step": 6614 }, { "epoch": 1.7593085106382977, "grad_norm": 3.796844005584717, "learning_rate": 7.708918896077843e-06, "loss": 0.8092, "step": 6615 }, { "epoch": 1.7595744680851064, "grad_norm": 4.139426231384277, "learning_rate": 7.708179614292614e-06, "loss": 0.7859, "step": 6616 }, { "epoch": 1.7598404255319149, "grad_norm": 4.109641075134277, "learning_rate": 7.707440248713118e-06, "loss": 0.7763, "step": 6617 }, { "epoch": 1.7601063829787233, "grad_norm": 4.1055521965026855, "learning_rate": 7.706700799362235e-06, "loss": 0.7225, "step": 6618 }, { "epoch": 1.760372340425532, "grad_norm": 4.071004390716553, "learning_rate": 7.70596126626284e-06, "loss": 0.7714, "step": 6619 }, { "epoch": 1.7606382978723403, "grad_norm": 4.117389678955078, "learning_rate": 7.705221649437819e-06, "loss": 0.8, "step": 6620 }, { "epoch": 1.760904255319149, "grad_norm": 3.617248058319092, "learning_rate": 7.704481948910057e-06, "loss": 0.8286, "step": 6621 }, { "epoch": 1.7611702127659574, "grad_norm": 3.6249337196350098, "learning_rate": 7.703742164702436e-06, "loss": 0.732, "step": 6622 }, { "epoch": 1.7614361702127659, "grad_norm": 3.584951400756836, "learning_rate": 7.703002296837849e-06, "loss": 0.859, "step": 6623 }, { "epoch": 1.7617021276595746, "grad_norm": 3.908857822418213, "learning_rate": 7.70226234533919e-06, "loss": 0.8112, "step": 6624 }, { "epoch": 1.761968085106383, "grad_norm": 4.350627422332764, "learning_rate": 7.701522310229353e-06, "loss": 0.9676, "step": 6625 }, { "epoch": 1.7622340425531915, "grad_norm": 3.7733817100524902, "learning_rate": 7.700782191531236e-06, "loss": 0.7312, "step": 6626 }, { "epoch": 1.7625, "grad_norm": 3.822552442550659, "learning_rate": 7.700041989267738e-06, "loss": 0.6901, "step": 6627 }, { "epoch": 1.7627659574468084, "grad_norm": 3.9083547592163086, "learning_rate": 7.69930170346176e-06, "loss": 0.7498, "step": 6628 }, { "epoch": 1.763031914893617, "grad_norm": 4.126950263977051, "learning_rate": 7.69856133413621e-06, "loss": 0.7975, "step": 6629 }, { "epoch": 1.7632978723404256, "grad_norm": 4.27503776550293, "learning_rate": 7.697820881313994e-06, "loss": 0.7927, "step": 6630 }, { "epoch": 1.763563829787234, "grad_norm": 4.2161407470703125, "learning_rate": 7.697080345018024e-06, "loss": 0.8779, "step": 6631 }, { "epoch": 1.7638297872340427, "grad_norm": 4.142273426055908, "learning_rate": 7.696339725271215e-06, "loss": 0.8069, "step": 6632 }, { "epoch": 1.764095744680851, "grad_norm": 4.17659330368042, "learning_rate": 7.695599022096478e-06, "loss": 0.7439, "step": 6633 }, { "epoch": 1.7643617021276596, "grad_norm": 4.072018623352051, "learning_rate": 7.694858235516735e-06, "loss": 0.8364, "step": 6634 }, { "epoch": 1.764627659574468, "grad_norm": 3.6811084747314453, "learning_rate": 7.694117365554905e-06, "loss": 0.8986, "step": 6635 }, { "epoch": 1.7648936170212766, "grad_norm": 3.924104928970337, "learning_rate": 7.693376412233913e-06, "loss": 0.7906, "step": 6636 }, { "epoch": 1.7651595744680852, "grad_norm": 4.180627822875977, "learning_rate": 7.69263537557668e-06, "loss": 0.814, "step": 6637 }, { "epoch": 1.7654255319148935, "grad_norm": 3.74808931350708, "learning_rate": 7.691894255606143e-06, "loss": 0.8623, "step": 6638 }, { "epoch": 1.7656914893617022, "grad_norm": 3.8845086097717285, "learning_rate": 7.691153052345227e-06, "loss": 0.8279, "step": 6639 }, { "epoch": 1.7659574468085106, "grad_norm": 3.6786465644836426, "learning_rate": 7.690411765816864e-06, "loss": 0.8579, "step": 6640 }, { "epoch": 1.766223404255319, "grad_norm": 4.260414123535156, "learning_rate": 7.689670396043997e-06, "loss": 0.8473, "step": 6641 }, { "epoch": 1.7664893617021278, "grad_norm": 3.757199287414551, "learning_rate": 7.688928943049558e-06, "loss": 0.8065, "step": 6642 }, { "epoch": 1.766755319148936, "grad_norm": 4.010439872741699, "learning_rate": 7.688187406856494e-06, "loss": 0.8412, "step": 6643 }, { "epoch": 1.7670212765957447, "grad_norm": 4.193131923675537, "learning_rate": 7.687445787487746e-06, "loss": 0.7638, "step": 6644 }, { "epoch": 1.7672872340425532, "grad_norm": 3.7920022010803223, "learning_rate": 7.686704084966263e-06, "loss": 0.7628, "step": 6645 }, { "epoch": 1.7675531914893616, "grad_norm": 3.6464099884033203, "learning_rate": 7.68596229931499e-06, "loss": 0.7547, "step": 6646 }, { "epoch": 1.7678191489361703, "grad_norm": 3.7222912311553955, "learning_rate": 7.685220430556883e-06, "loss": 0.6741, "step": 6647 }, { "epoch": 1.7680851063829788, "grad_norm": 3.48502254486084, "learning_rate": 7.684478478714892e-06, "loss": 0.6893, "step": 6648 }, { "epoch": 1.7683510638297872, "grad_norm": 4.072755813598633, "learning_rate": 7.683736443811978e-06, "loss": 0.8487, "step": 6649 }, { "epoch": 1.7686170212765957, "grad_norm": 3.5753612518310547, "learning_rate": 7.682994325871098e-06, "loss": 0.8314, "step": 6650 }, { "epoch": 1.7688829787234042, "grad_norm": 4.951267242431641, "learning_rate": 7.682252124915216e-06, "loss": 0.9956, "step": 6651 }, { "epoch": 1.7691489361702128, "grad_norm": 4.200650691986084, "learning_rate": 7.681509840967294e-06, "loss": 0.7119, "step": 6652 }, { "epoch": 1.7694148936170213, "grad_norm": 3.4650633335113525, "learning_rate": 7.6807674740503e-06, "loss": 0.843, "step": 6653 }, { "epoch": 1.7696808510638298, "grad_norm": 4.049907207489014, "learning_rate": 7.680025024187206e-06, "loss": 0.7776, "step": 6654 }, { "epoch": 1.7699468085106385, "grad_norm": 3.934799909591675, "learning_rate": 7.67928249140098e-06, "loss": 0.7957, "step": 6655 }, { "epoch": 1.7702127659574467, "grad_norm": 4.14153528213501, "learning_rate": 7.678539875714604e-06, "loss": 0.7445, "step": 6656 }, { "epoch": 1.7704787234042554, "grad_norm": 3.816898822784424, "learning_rate": 7.677797177151047e-06, "loss": 0.8869, "step": 6657 }, { "epoch": 1.7707446808510638, "grad_norm": 4.405877113342285, "learning_rate": 7.677054395733292e-06, "loss": 0.9004, "step": 6658 }, { "epoch": 1.7710106382978723, "grad_norm": 4.069585800170898, "learning_rate": 7.676311531484324e-06, "loss": 0.7907, "step": 6659 }, { "epoch": 1.771276595744681, "grad_norm": 3.9655072689056396, "learning_rate": 7.675568584427125e-06, "loss": 0.8069, "step": 6660 }, { "epoch": 1.7715425531914892, "grad_norm": 3.8515357971191406, "learning_rate": 7.674825554584686e-06, "loss": 0.8013, "step": 6661 }, { "epoch": 1.771808510638298, "grad_norm": 4.2742438316345215, "learning_rate": 7.674082441979993e-06, "loss": 0.9655, "step": 6662 }, { "epoch": 1.7720744680851064, "grad_norm": 4.425269603729248, "learning_rate": 7.67333924663604e-06, "loss": 0.872, "step": 6663 }, { "epoch": 1.7723404255319148, "grad_norm": 4.043865203857422, "learning_rate": 7.672595968575827e-06, "loss": 0.8425, "step": 6664 }, { "epoch": 1.7726063829787235, "grad_norm": 3.77255916595459, "learning_rate": 7.671852607822346e-06, "loss": 0.6711, "step": 6665 }, { "epoch": 1.7728723404255318, "grad_norm": 3.8917951583862305, "learning_rate": 7.671109164398598e-06, "loss": 0.7429, "step": 6666 }, { "epoch": 1.7731382978723405, "grad_norm": 4.034469127655029, "learning_rate": 7.67036563832759e-06, "loss": 0.884, "step": 6667 }, { "epoch": 1.773404255319149, "grad_norm": 4.177572727203369, "learning_rate": 7.669622029632323e-06, "loss": 0.7823, "step": 6668 }, { "epoch": 1.7736702127659574, "grad_norm": 3.816012382507324, "learning_rate": 7.668878338335808e-06, "loss": 0.8012, "step": 6669 }, { "epoch": 1.773936170212766, "grad_norm": 3.6478235721588135, "learning_rate": 7.668134564461057e-06, "loss": 0.8071, "step": 6670 }, { "epoch": 1.7742021276595743, "grad_norm": 4.1651177406311035, "learning_rate": 7.66739070803108e-06, "loss": 0.882, "step": 6671 }, { "epoch": 1.774468085106383, "grad_norm": 4.032572269439697, "learning_rate": 7.666646769068894e-06, "loss": 0.7804, "step": 6672 }, { "epoch": 1.7747340425531914, "grad_norm": 4.481500148773193, "learning_rate": 7.665902747597516e-06, "loss": 0.8824, "step": 6673 }, { "epoch": 1.775, "grad_norm": 3.6887848377227783, "learning_rate": 7.66515864363997e-06, "loss": 0.8179, "step": 6674 }, { "epoch": 1.7752659574468086, "grad_norm": 3.5154476165771484, "learning_rate": 7.664414457219277e-06, "loss": 0.8015, "step": 6675 }, { "epoch": 1.775531914893617, "grad_norm": 3.9713804721832275, "learning_rate": 7.663670188358464e-06, "loss": 0.8426, "step": 6676 }, { "epoch": 1.7757978723404255, "grad_norm": 4.082159996032715, "learning_rate": 7.66292583708056e-06, "loss": 0.81, "step": 6677 }, { "epoch": 1.7760638297872342, "grad_norm": 3.8582613468170166, "learning_rate": 7.662181403408593e-06, "loss": 0.7965, "step": 6678 }, { "epoch": 1.7763297872340424, "grad_norm": 4.068000793457031, "learning_rate": 7.661436887365603e-06, "loss": 0.8332, "step": 6679 }, { "epoch": 1.7765957446808511, "grad_norm": 4.067226409912109, "learning_rate": 7.660692288974618e-06, "loss": 0.8399, "step": 6680 }, { "epoch": 1.7768617021276596, "grad_norm": 3.885331392288208, "learning_rate": 7.659947608258684e-06, "loss": 0.8701, "step": 6681 }, { "epoch": 1.777127659574468, "grad_norm": 3.792872905731201, "learning_rate": 7.659202845240839e-06, "loss": 0.8379, "step": 6682 }, { "epoch": 1.7773936170212767, "grad_norm": 3.553959369659424, "learning_rate": 7.658457999944124e-06, "loss": 0.6874, "step": 6683 }, { "epoch": 1.777659574468085, "grad_norm": 4.169983386993408, "learning_rate": 7.657713072391591e-06, "loss": 0.7569, "step": 6684 }, { "epoch": 1.7779255319148937, "grad_norm": 4.05847692489624, "learning_rate": 7.656968062606288e-06, "loss": 0.8497, "step": 6685 }, { "epoch": 1.7781914893617021, "grad_norm": 4.117887496948242, "learning_rate": 7.656222970611263e-06, "loss": 0.708, "step": 6686 }, { "epoch": 1.7784574468085106, "grad_norm": 3.683126211166382, "learning_rate": 7.655477796429571e-06, "loss": 0.7568, "step": 6687 }, { "epoch": 1.7787234042553193, "grad_norm": 3.6990060806274414, "learning_rate": 7.654732540084273e-06, "loss": 0.7721, "step": 6688 }, { "epoch": 1.7789893617021275, "grad_norm": 3.917276620864868, "learning_rate": 7.653987201598422e-06, "loss": 0.8214, "step": 6689 }, { "epoch": 1.7792553191489362, "grad_norm": 4.091401100158691, "learning_rate": 7.653241780995083e-06, "loss": 0.7312, "step": 6690 }, { "epoch": 1.7795212765957447, "grad_norm": 4.167940139770508, "learning_rate": 7.652496278297319e-06, "loss": 0.9115, "step": 6691 }, { "epoch": 1.7797872340425531, "grad_norm": 3.9726510047912598, "learning_rate": 7.651750693528197e-06, "loss": 0.7857, "step": 6692 }, { "epoch": 1.7800531914893618, "grad_norm": 3.7973427772521973, "learning_rate": 7.651005026710786e-06, "loss": 0.8594, "step": 6693 }, { "epoch": 1.78031914893617, "grad_norm": 3.932386875152588, "learning_rate": 7.65025927786816e-06, "loss": 0.7873, "step": 6694 }, { "epoch": 1.7805851063829787, "grad_norm": 3.6921486854553223, "learning_rate": 7.64951344702339e-06, "loss": 0.7569, "step": 6695 }, { "epoch": 1.7808510638297872, "grad_norm": 4.060511589050293, "learning_rate": 7.648767534199556e-06, "loss": 0.7533, "step": 6696 }, { "epoch": 1.7811170212765957, "grad_norm": 4.142321586608887, "learning_rate": 7.648021539419737e-06, "loss": 0.7836, "step": 6697 }, { "epoch": 1.7813829787234043, "grad_norm": 4.071194648742676, "learning_rate": 7.647275462707011e-06, "loss": 0.7489, "step": 6698 }, { "epoch": 1.7816489361702128, "grad_norm": 4.006459712982178, "learning_rate": 7.646529304084469e-06, "loss": 0.812, "step": 6699 }, { "epoch": 1.7819148936170213, "grad_norm": 3.6437671184539795, "learning_rate": 7.64578306357519e-06, "loss": 0.7105, "step": 6700 }, { "epoch": 1.78218085106383, "grad_norm": 4.094074249267578, "learning_rate": 7.645036741202271e-06, "loss": 0.9633, "step": 6701 }, { "epoch": 1.7824468085106382, "grad_norm": 4.029351711273193, "learning_rate": 7.6442903369888e-06, "loss": 0.8999, "step": 6702 }, { "epoch": 1.7827127659574469, "grad_norm": 3.8068792819976807, "learning_rate": 7.643543850957872e-06, "loss": 0.7305, "step": 6703 }, { "epoch": 1.7829787234042553, "grad_norm": 4.074723243713379, "learning_rate": 7.642797283132586e-06, "loss": 0.8502, "step": 6704 }, { "epoch": 1.7832446808510638, "grad_norm": 3.3582799434661865, "learning_rate": 7.642050633536042e-06, "loss": 0.7219, "step": 6705 }, { "epoch": 1.7835106382978725, "grad_norm": 3.6337673664093018, "learning_rate": 7.641303902191339e-06, "loss": 0.7843, "step": 6706 }, { "epoch": 1.7837765957446807, "grad_norm": 4.376511573791504, "learning_rate": 7.640557089121583e-06, "loss": 0.9737, "step": 6707 }, { "epoch": 1.7840425531914894, "grad_norm": 3.6106109619140625, "learning_rate": 7.639810194349884e-06, "loss": 0.7549, "step": 6708 }, { "epoch": 1.7843085106382979, "grad_norm": 3.9676499366760254, "learning_rate": 7.639063217899348e-06, "loss": 0.8951, "step": 6709 }, { "epoch": 1.7845744680851063, "grad_norm": 3.7763378620147705, "learning_rate": 7.638316159793089e-06, "loss": 0.8431, "step": 6710 }, { "epoch": 1.784840425531915, "grad_norm": 3.744365930557251, "learning_rate": 7.637569020054221e-06, "loss": 0.8697, "step": 6711 }, { "epoch": 1.7851063829787233, "grad_norm": 3.4194390773773193, "learning_rate": 7.636821798705864e-06, "loss": 0.8979, "step": 6712 }, { "epoch": 1.785372340425532, "grad_norm": 3.804483413696289, "learning_rate": 7.636074495771134e-06, "loss": 0.8484, "step": 6713 }, { "epoch": 1.7856382978723404, "grad_norm": 4.089145660400391, "learning_rate": 7.635327111273158e-06, "loss": 0.892, "step": 6714 }, { "epoch": 1.7859042553191489, "grad_norm": 4.051761150360107, "learning_rate": 7.634579645235056e-06, "loss": 0.8972, "step": 6715 }, { "epoch": 1.7861702127659576, "grad_norm": 4.0280961990356445, "learning_rate": 7.633832097679959e-06, "loss": 0.8125, "step": 6716 }, { "epoch": 1.7864361702127658, "grad_norm": 4.206244468688965, "learning_rate": 7.633084468630996e-06, "loss": 0.7675, "step": 6717 }, { "epoch": 1.7867021276595745, "grad_norm": 3.4746177196502686, "learning_rate": 7.6323367581113e-06, "loss": 0.7079, "step": 6718 }, { "epoch": 1.786968085106383, "grad_norm": 3.8518667221069336, "learning_rate": 7.631588966144003e-06, "loss": 0.965, "step": 6719 }, { "epoch": 1.7872340425531914, "grad_norm": 3.605275869369507, "learning_rate": 7.630841092752248e-06, "loss": 0.7733, "step": 6720 }, { "epoch": 1.7875, "grad_norm": 4.255527019500732, "learning_rate": 7.63009313795917e-06, "loss": 0.8645, "step": 6721 }, { "epoch": 1.7877659574468086, "grad_norm": 3.93906307220459, "learning_rate": 7.629345101787917e-06, "loss": 0.8449, "step": 6722 }, { "epoch": 1.788031914893617, "grad_norm": 4.351909160614014, "learning_rate": 7.628596984261629e-06, "loss": 0.8644, "step": 6723 }, { "epoch": 1.7882978723404257, "grad_norm": 3.7165818214416504, "learning_rate": 7.627848785403456e-06, "loss": 0.7284, "step": 6724 }, { "epoch": 1.788563829787234, "grad_norm": 3.9665300846099854, "learning_rate": 7.6271005052365465e-06, "loss": 0.8396, "step": 6725 }, { "epoch": 1.7888297872340426, "grad_norm": 3.951260566711426, "learning_rate": 7.6263521437840544e-06, "loss": 0.9464, "step": 6726 }, { "epoch": 1.789095744680851, "grad_norm": 4.499269008636475, "learning_rate": 7.625603701069135e-06, "loss": 0.9031, "step": 6727 }, { "epoch": 1.7893617021276595, "grad_norm": 3.931673526763916, "learning_rate": 7.6248551771149474e-06, "loss": 0.823, "step": 6728 }, { "epoch": 1.7896276595744682, "grad_norm": 4.128811836242676, "learning_rate": 7.624106571944648e-06, "loss": 0.7497, "step": 6729 }, { "epoch": 1.7898936170212765, "grad_norm": 3.873683452606201, "learning_rate": 7.623357885581403e-06, "loss": 0.8247, "step": 6730 }, { "epoch": 1.7901595744680852, "grad_norm": 3.7852728366851807, "learning_rate": 7.6226091180483765e-06, "loss": 0.8774, "step": 6731 }, { "epoch": 1.7904255319148936, "grad_norm": 3.885965585708618, "learning_rate": 7.621860269368735e-06, "loss": 0.7561, "step": 6732 }, { "epoch": 1.790691489361702, "grad_norm": 4.435214519500732, "learning_rate": 7.6211113395656515e-06, "loss": 0.9338, "step": 6733 }, { "epoch": 1.7909574468085108, "grad_norm": 4.548224449157715, "learning_rate": 7.6203623286622955e-06, "loss": 0.8323, "step": 6734 }, { "epoch": 1.791223404255319, "grad_norm": 3.8655712604522705, "learning_rate": 7.619613236681845e-06, "loss": 0.8654, "step": 6735 }, { "epoch": 1.7914893617021277, "grad_norm": 3.7102363109588623, "learning_rate": 7.618864063647477e-06, "loss": 0.8015, "step": 6736 }, { "epoch": 1.7917553191489362, "grad_norm": 4.260025978088379, "learning_rate": 7.6181148095823705e-06, "loss": 0.7977, "step": 6737 }, { "epoch": 1.7920212765957446, "grad_norm": 4.112497806549072, "learning_rate": 7.6173654745097106e-06, "loss": 0.7763, "step": 6738 }, { "epoch": 1.7922872340425533, "grad_norm": 3.998528003692627, "learning_rate": 7.6166160584526795e-06, "loss": 0.8215, "step": 6739 }, { "epoch": 1.7925531914893615, "grad_norm": 3.6492180824279785, "learning_rate": 7.615866561434468e-06, "loss": 0.7239, "step": 6740 }, { "epoch": 1.7928191489361702, "grad_norm": 3.8486714363098145, "learning_rate": 7.615116983478266e-06, "loss": 0.8435, "step": 6741 }, { "epoch": 1.7930851063829787, "grad_norm": 3.863814353942871, "learning_rate": 7.614367324607263e-06, "loss": 0.8033, "step": 6742 }, { "epoch": 1.7933510638297872, "grad_norm": 3.88749098777771, "learning_rate": 7.613617584844662e-06, "loss": 0.8072, "step": 6743 }, { "epoch": 1.7936170212765958, "grad_norm": 3.9917871952056885, "learning_rate": 7.612867764213651e-06, "loss": 0.8138, "step": 6744 }, { "epoch": 1.7938829787234043, "grad_norm": 4.009222507476807, "learning_rate": 7.612117862737437e-06, "loss": 0.7131, "step": 6745 }, { "epoch": 1.7941489361702128, "grad_norm": 4.001763343811035, "learning_rate": 7.611367880439221e-06, "loss": 0.9487, "step": 6746 }, { "epoch": 1.7944148936170212, "grad_norm": 4.2233805656433105, "learning_rate": 7.610617817342207e-06, "loss": 0.7244, "step": 6747 }, { "epoch": 1.7946808510638297, "grad_norm": 3.7131550312042236, "learning_rate": 7.609867673469607e-06, "loss": 0.8303, "step": 6748 }, { "epoch": 1.7949468085106384, "grad_norm": 4.046380519866943, "learning_rate": 7.609117448844626e-06, "loss": 0.8372, "step": 6749 }, { "epoch": 1.7952127659574468, "grad_norm": 4.070696830749512, "learning_rate": 7.60836714349048e-06, "loss": 0.8259, "step": 6750 }, { "epoch": 1.7954787234042553, "grad_norm": 3.893247604370117, "learning_rate": 7.607616757430383e-06, "loss": 0.8598, "step": 6751 }, { "epoch": 1.795744680851064, "grad_norm": 3.7077648639678955, "learning_rate": 7.606866290687555e-06, "loss": 0.8036, "step": 6752 }, { "epoch": 1.7960106382978722, "grad_norm": 4.3204450607299805, "learning_rate": 7.606115743285213e-06, "loss": 0.8424, "step": 6753 }, { "epoch": 1.796276595744681, "grad_norm": 3.3555731773376465, "learning_rate": 7.605365115246581e-06, "loss": 0.8369, "step": 6754 }, { "epoch": 1.7965425531914894, "grad_norm": 3.561962842941284, "learning_rate": 7.604614406594888e-06, "loss": 0.7841, "step": 6755 }, { "epoch": 1.7968085106382978, "grad_norm": 4.0263166427612305, "learning_rate": 7.6038636173533565e-06, "loss": 0.7135, "step": 6756 }, { "epoch": 1.7970744680851065, "grad_norm": 3.8524928092956543, "learning_rate": 7.603112747545218e-06, "loss": 0.8327, "step": 6757 }, { "epoch": 1.7973404255319148, "grad_norm": 3.5046606063842773, "learning_rate": 7.602361797193709e-06, "loss": 0.8162, "step": 6758 }, { "epoch": 1.7976063829787234, "grad_norm": 4.547070503234863, "learning_rate": 7.60161076632206e-06, "loss": 0.8014, "step": 6759 }, { "epoch": 1.797872340425532, "grad_norm": 4.453802585601807, "learning_rate": 7.600859654953513e-06, "loss": 0.9287, "step": 6760 }, { "epoch": 1.7981382978723404, "grad_norm": 4.324093818664551, "learning_rate": 7.6001084631113046e-06, "loss": 0.848, "step": 6761 }, { "epoch": 1.798404255319149, "grad_norm": 4.146725177764893, "learning_rate": 7.599357190818679e-06, "loss": 0.8875, "step": 6762 }, { "epoch": 1.7986702127659573, "grad_norm": 4.132041931152344, "learning_rate": 7.598605838098882e-06, "loss": 0.8413, "step": 6763 }, { "epoch": 1.798936170212766, "grad_norm": 3.829908847808838, "learning_rate": 7.59785440497516e-06, "loss": 0.843, "step": 6764 }, { "epoch": 1.7992021276595744, "grad_norm": 4.308759689331055, "learning_rate": 7.597102891470766e-06, "loss": 0.7839, "step": 6765 }, { "epoch": 1.799468085106383, "grad_norm": 3.6383216381073, "learning_rate": 7.59635129760895e-06, "loss": 0.608, "step": 6766 }, { "epoch": 1.7997340425531916, "grad_norm": 3.6101510524749756, "learning_rate": 7.595599623412968e-06, "loss": 0.7246, "step": 6767 }, { "epoch": 1.8, "grad_norm": 3.51635479927063, "learning_rate": 7.594847868906076e-06, "loss": 0.798, "step": 6768 }, { "epoch": 1.8002659574468085, "grad_norm": 3.927917718887329, "learning_rate": 7.594096034111538e-06, "loss": 0.8229, "step": 6769 }, { "epoch": 1.800531914893617, "grad_norm": 4.29150390625, "learning_rate": 7.5933441190526146e-06, "loss": 0.922, "step": 6770 }, { "epoch": 1.8007978723404254, "grad_norm": 3.8685336112976074, "learning_rate": 7.592592123752569e-06, "loss": 0.7242, "step": 6771 }, { "epoch": 1.8010638297872341, "grad_norm": 3.9335358142852783, "learning_rate": 7.591840048234673e-06, "loss": 0.8717, "step": 6772 }, { "epoch": 1.8013297872340426, "grad_norm": 4.033020496368408, "learning_rate": 7.591087892522193e-06, "loss": 0.8129, "step": 6773 }, { "epoch": 1.801595744680851, "grad_norm": 4.348812580108643, "learning_rate": 7.590335656638403e-06, "loss": 0.8352, "step": 6774 }, { "epoch": 1.8018617021276597, "grad_norm": 3.683743476867676, "learning_rate": 7.589583340606579e-06, "loss": 0.8427, "step": 6775 }, { "epoch": 1.802127659574468, "grad_norm": 3.782118797302246, "learning_rate": 7.588830944449996e-06, "loss": 0.8659, "step": 6776 }, { "epoch": 1.8023936170212767, "grad_norm": 4.097870826721191, "learning_rate": 7.5880784681919365e-06, "loss": 0.7472, "step": 6777 }, { "epoch": 1.8026595744680851, "grad_norm": 3.921733856201172, "learning_rate": 7.587325911855681e-06, "loss": 0.8388, "step": 6778 }, { "epoch": 1.8029255319148936, "grad_norm": 4.305613994598389, "learning_rate": 7.586573275464517e-06, "loss": 1.0133, "step": 6779 }, { "epoch": 1.8031914893617023, "grad_norm": 4.13943338394165, "learning_rate": 7.58582055904173e-06, "loss": 0.7861, "step": 6780 }, { "epoch": 1.8034574468085105, "grad_norm": 4.047939777374268, "learning_rate": 7.585067762610612e-06, "loss": 0.8422, "step": 6781 }, { "epoch": 1.8037234042553192, "grad_norm": 3.8695991039276123, "learning_rate": 7.584314886194451e-06, "loss": 0.8365, "step": 6782 }, { "epoch": 1.8039893617021276, "grad_norm": 3.7691190242767334, "learning_rate": 7.583561929816547e-06, "loss": 0.8293, "step": 6783 }, { "epoch": 1.804255319148936, "grad_norm": 4.062473773956299, "learning_rate": 7.5828088935001954e-06, "loss": 0.8118, "step": 6784 }, { "epoch": 1.8045212765957448, "grad_norm": 4.588931560516357, "learning_rate": 7.582055777268693e-06, "loss": 0.8835, "step": 6785 }, { "epoch": 1.804787234042553, "grad_norm": 3.1973307132720947, "learning_rate": 7.581302581145346e-06, "loss": 0.6728, "step": 6786 }, { "epoch": 1.8050531914893617, "grad_norm": 4.123830318450928, "learning_rate": 7.5805493051534605e-06, "loss": 0.9315, "step": 6787 }, { "epoch": 1.8053191489361702, "grad_norm": 3.992337942123413, "learning_rate": 7.57979594931634e-06, "loss": 0.7951, "step": 6788 }, { "epoch": 1.8055851063829786, "grad_norm": 3.456594467163086, "learning_rate": 7.579042513657294e-06, "loss": 0.8114, "step": 6789 }, { "epoch": 1.8058510638297873, "grad_norm": 4.029353618621826, "learning_rate": 7.578288998199638e-06, "loss": 0.895, "step": 6790 }, { "epoch": 1.8061170212765958, "grad_norm": 4.027595520019531, "learning_rate": 7.577535402966683e-06, "loss": 0.8416, "step": 6791 }, { "epoch": 1.8063829787234043, "grad_norm": 3.8989861011505127, "learning_rate": 7.5767817279817505e-06, "loss": 0.8275, "step": 6792 }, { "epoch": 1.8066489361702127, "grad_norm": 4.1814961433410645, "learning_rate": 7.576027973268155e-06, "loss": 0.7388, "step": 6793 }, { "epoch": 1.8069148936170212, "grad_norm": 3.8830153942108154, "learning_rate": 7.575274138849223e-06, "loss": 0.7622, "step": 6794 }, { "epoch": 1.8071808510638299, "grad_norm": 3.6945488452911377, "learning_rate": 7.574520224748276e-06, "loss": 0.6767, "step": 6795 }, { "epoch": 1.8074468085106383, "grad_norm": 3.8499093055725098, "learning_rate": 7.5737662309886415e-06, "loss": 0.8128, "step": 6796 }, { "epoch": 1.8077127659574468, "grad_norm": 4.120965480804443, "learning_rate": 7.573012157593651e-06, "loss": 0.8356, "step": 6797 }, { "epoch": 1.8079787234042555, "grad_norm": 3.9702072143554688, "learning_rate": 7.572258004586635e-06, "loss": 0.773, "step": 6798 }, { "epoch": 1.8082446808510637, "grad_norm": 3.910039186477661, "learning_rate": 7.5715037719909266e-06, "loss": 0.7577, "step": 6799 }, { "epoch": 1.8085106382978724, "grad_norm": 3.9392266273498535, "learning_rate": 7.570749459829865e-06, "loss": 0.9043, "step": 6800 }, { "epoch": 1.8087765957446809, "grad_norm": 3.9405999183654785, "learning_rate": 7.56999506812679e-06, "loss": 0.8526, "step": 6801 }, { "epoch": 1.8090425531914893, "grad_norm": 3.701950788497925, "learning_rate": 7.569240596905038e-06, "loss": 0.7136, "step": 6802 }, { "epoch": 1.809308510638298, "grad_norm": 3.7333173751831055, "learning_rate": 7.568486046187959e-06, "loss": 0.8191, "step": 6803 }, { "epoch": 1.8095744680851062, "grad_norm": 3.9274251461029053, "learning_rate": 7.567731415998898e-06, "loss": 0.8371, "step": 6804 }, { "epoch": 1.809840425531915, "grad_norm": 4.320472240447998, "learning_rate": 7.566976706361204e-06, "loss": 0.8743, "step": 6805 }, { "epoch": 1.8101063829787234, "grad_norm": 4.124827861785889, "learning_rate": 7.566221917298228e-06, "loss": 0.8599, "step": 6806 }, { "epoch": 1.8103723404255319, "grad_norm": 4.09792947769165, "learning_rate": 7.565467048833325e-06, "loss": 0.782, "step": 6807 }, { "epoch": 1.8106382978723405, "grad_norm": 4.003774166107178, "learning_rate": 7.56471210098985e-06, "loss": 0.7946, "step": 6808 }, { "epoch": 1.8109042553191488, "grad_norm": 4.259424686431885, "learning_rate": 7.563957073791164e-06, "loss": 0.8328, "step": 6809 }, { "epoch": 1.8111702127659575, "grad_norm": 3.9565248489379883, "learning_rate": 7.563201967260627e-06, "loss": 0.8544, "step": 6810 }, { "epoch": 1.811436170212766, "grad_norm": 3.88087797164917, "learning_rate": 7.562446781421604e-06, "loss": 0.7987, "step": 6811 }, { "epoch": 1.8117021276595744, "grad_norm": 3.9190945625305176, "learning_rate": 7.5616915162974594e-06, "loss": 0.8162, "step": 6812 }, { "epoch": 1.811968085106383, "grad_norm": 3.700688600540161, "learning_rate": 7.560936171911564e-06, "loss": 0.7738, "step": 6813 }, { "epoch": 1.8122340425531915, "grad_norm": 4.023971080780029, "learning_rate": 7.560180748287289e-06, "loss": 0.8266, "step": 6814 }, { "epoch": 1.8125, "grad_norm": 4.754519462585449, "learning_rate": 7.559425245448006e-06, "loss": 1.0779, "step": 6815 }, { "epoch": 1.8127659574468085, "grad_norm": 4.043941497802734, "learning_rate": 7.558669663417093e-06, "loss": 0.7789, "step": 6816 }, { "epoch": 1.813031914893617, "grad_norm": 4.064941883087158, "learning_rate": 7.557914002217929e-06, "loss": 0.8235, "step": 6817 }, { "epoch": 1.8132978723404256, "grad_norm": 4.2770562171936035, "learning_rate": 7.5571582618738936e-06, "loss": 0.8647, "step": 6818 }, { "epoch": 1.813563829787234, "grad_norm": 3.758079767227173, "learning_rate": 7.55640244240837e-06, "loss": 0.765, "step": 6819 }, { "epoch": 1.8138297872340425, "grad_norm": 4.024742603302002, "learning_rate": 7.555646543844747e-06, "loss": 0.9143, "step": 6820 }, { "epoch": 1.8140957446808512, "grad_norm": 4.142058372497559, "learning_rate": 7.55489056620641e-06, "loss": 0.8872, "step": 6821 }, { "epoch": 1.8143617021276595, "grad_norm": 4.0311455726623535, "learning_rate": 7.554134509516751e-06, "loss": 0.7628, "step": 6822 }, { "epoch": 1.8146276595744681, "grad_norm": 3.73848032951355, "learning_rate": 7.553378373799163e-06, "loss": 0.807, "step": 6823 }, { "epoch": 1.8148936170212766, "grad_norm": 3.553116798400879, "learning_rate": 7.552622159077041e-06, "loss": 0.8166, "step": 6824 }, { "epoch": 1.815159574468085, "grad_norm": 3.678316116333008, "learning_rate": 7.5518658653737844e-06, "loss": 0.8462, "step": 6825 }, { "epoch": 1.8154255319148938, "grad_norm": 4.440575122833252, "learning_rate": 7.551109492712795e-06, "loss": 0.8861, "step": 6826 }, { "epoch": 1.815691489361702, "grad_norm": 4.359316825866699, "learning_rate": 7.550353041117473e-06, "loss": 0.8025, "step": 6827 }, { "epoch": 1.8159574468085107, "grad_norm": 3.976832389831543, "learning_rate": 7.549596510611226e-06, "loss": 0.8486, "step": 6828 }, { "epoch": 1.8162234042553191, "grad_norm": 3.64974308013916, "learning_rate": 7.54883990121746e-06, "loss": 0.6982, "step": 6829 }, { "epoch": 1.8164893617021276, "grad_norm": 4.051089286804199, "learning_rate": 7.548083212959588e-06, "loss": 0.8417, "step": 6830 }, { "epoch": 1.8167553191489363, "grad_norm": 3.949113130569458, "learning_rate": 7.547326445861021e-06, "loss": 0.7382, "step": 6831 }, { "epoch": 1.8170212765957445, "grad_norm": 3.896155834197998, "learning_rate": 7.546569599945174e-06, "loss": 0.9312, "step": 6832 }, { "epoch": 1.8172872340425532, "grad_norm": 4.127990245819092, "learning_rate": 7.545812675235467e-06, "loss": 0.9422, "step": 6833 }, { "epoch": 1.8175531914893617, "grad_norm": 3.8345584869384766, "learning_rate": 7.545055671755316e-06, "loss": 0.8672, "step": 6834 }, { "epoch": 1.8178191489361701, "grad_norm": 3.544022560119629, "learning_rate": 7.544298589528148e-06, "loss": 0.8378, "step": 6835 }, { "epoch": 1.8180851063829788, "grad_norm": 3.773446798324585, "learning_rate": 7.543541428577386e-06, "loss": 0.7617, "step": 6836 }, { "epoch": 1.8183510638297873, "grad_norm": 4.245392322540283, "learning_rate": 7.542784188926456e-06, "loss": 0.7689, "step": 6837 }, { "epoch": 1.8186170212765957, "grad_norm": 4.0154924392700195, "learning_rate": 7.542026870598791e-06, "loss": 0.7467, "step": 6838 }, { "epoch": 1.8188829787234042, "grad_norm": 4.492767810821533, "learning_rate": 7.5412694736178206e-06, "loss": 0.9573, "step": 6839 }, { "epoch": 1.8191489361702127, "grad_norm": 3.7740705013275146, "learning_rate": 7.540511998006982e-06, "loss": 0.6853, "step": 6840 }, { "epoch": 1.8194148936170214, "grad_norm": 4.6515655517578125, "learning_rate": 7.539754443789709e-06, "loss": 0.9875, "step": 6841 }, { "epoch": 1.8196808510638298, "grad_norm": 4.019815921783447, "learning_rate": 7.5389968109894465e-06, "loss": 0.7956, "step": 6842 }, { "epoch": 1.8199468085106383, "grad_norm": 3.8876473903656006, "learning_rate": 7.5382390996296315e-06, "loss": 0.8368, "step": 6843 }, { "epoch": 1.820212765957447, "grad_norm": 4.036003112792969, "learning_rate": 7.537481309733709e-06, "loss": 0.7615, "step": 6844 }, { "epoch": 1.8204787234042552, "grad_norm": 3.9731733798980713, "learning_rate": 7.53672344132513e-06, "loss": 0.8408, "step": 6845 }, { "epoch": 1.820744680851064, "grad_norm": 4.149892807006836, "learning_rate": 7.53596549442734e-06, "loss": 0.7553, "step": 6846 }, { "epoch": 1.8210106382978724, "grad_norm": 3.9756197929382324, "learning_rate": 7.535207469063791e-06, "loss": 0.8429, "step": 6847 }, { "epoch": 1.8212765957446808, "grad_norm": 4.044477939605713, "learning_rate": 7.53444936525794e-06, "loss": 0.7761, "step": 6848 }, { "epoch": 1.8215425531914895, "grad_norm": 3.613596200942993, "learning_rate": 7.53369118303324e-06, "loss": 0.808, "step": 6849 }, { "epoch": 1.8218085106382977, "grad_norm": 4.789092540740967, "learning_rate": 7.532932922413152e-06, "loss": 0.8992, "step": 6850 }, { "epoch": 1.8220744680851064, "grad_norm": 3.8128976821899414, "learning_rate": 7.532174583421138e-06, "loss": 0.7259, "step": 6851 }, { "epoch": 1.8223404255319149, "grad_norm": 3.685126781463623, "learning_rate": 7.53141616608066e-06, "loss": 0.7971, "step": 6852 }, { "epoch": 1.8226063829787233, "grad_norm": 3.8787617683410645, "learning_rate": 7.5306576704151865e-06, "loss": 0.7447, "step": 6853 }, { "epoch": 1.822872340425532, "grad_norm": 4.506245136260986, "learning_rate": 7.529899096448185e-06, "loss": 0.8898, "step": 6854 }, { "epoch": 1.8231382978723403, "grad_norm": 4.238636016845703, "learning_rate": 7.529140444203127e-06, "loss": 0.8057, "step": 6855 }, { "epoch": 1.823404255319149, "grad_norm": 4.039521217346191, "learning_rate": 7.528381713703485e-06, "loss": 0.772, "step": 6856 }, { "epoch": 1.8236702127659574, "grad_norm": 3.6089868545532227, "learning_rate": 7.5276229049727375e-06, "loss": 0.8194, "step": 6857 }, { "epoch": 1.8239361702127659, "grad_norm": 3.4110054969787598, "learning_rate": 7.52686401803436e-06, "loss": 0.6902, "step": 6858 }, { "epoch": 1.8242021276595746, "grad_norm": 3.6139302253723145, "learning_rate": 7.526105052911836e-06, "loss": 0.8318, "step": 6859 }, { "epoch": 1.824468085106383, "grad_norm": 4.215152740478516, "learning_rate": 7.525346009628647e-06, "loss": 0.8303, "step": 6860 }, { "epoch": 1.8247340425531915, "grad_norm": 3.8578953742980957, "learning_rate": 7.524586888208278e-06, "loss": 0.8625, "step": 6861 }, { "epoch": 1.825, "grad_norm": 3.8874824047088623, "learning_rate": 7.52382768867422e-06, "loss": 0.7106, "step": 6862 }, { "epoch": 1.8252659574468084, "grad_norm": 3.746168851852417, "learning_rate": 7.5230684110499604e-06, "loss": 0.8753, "step": 6863 }, { "epoch": 1.825531914893617, "grad_norm": 3.70993971824646, "learning_rate": 7.522309055358995e-06, "loss": 0.7393, "step": 6864 }, { "epoch": 1.8257978723404256, "grad_norm": 3.599679470062256, "learning_rate": 7.5215496216248175e-06, "loss": 0.893, "step": 6865 }, { "epoch": 1.826063829787234, "grad_norm": 3.7604589462280273, "learning_rate": 7.520790109870926e-06, "loss": 0.7966, "step": 6866 }, { "epoch": 1.8263297872340427, "grad_norm": 3.9113166332244873, "learning_rate": 7.5200305201208205e-06, "loss": 0.8071, "step": 6867 }, { "epoch": 1.826595744680851, "grad_norm": 4.262864112854004, "learning_rate": 7.519270852398002e-06, "loss": 0.7942, "step": 6868 }, { "epoch": 1.8268617021276596, "grad_norm": 4.096951007843018, "learning_rate": 7.5185111067259804e-06, "loss": 0.717, "step": 6869 }, { "epoch": 1.827127659574468, "grad_norm": 4.112506866455078, "learning_rate": 7.517751283128258e-06, "loss": 0.8871, "step": 6870 }, { "epoch": 1.8273936170212766, "grad_norm": 3.5203890800476074, "learning_rate": 7.516991381628347e-06, "loss": 0.796, "step": 6871 }, { "epoch": 1.8276595744680852, "grad_norm": 3.556929588317871, "learning_rate": 7.516231402249758e-06, "loss": 0.8346, "step": 6872 }, { "epoch": 1.8279255319148935, "grad_norm": 3.3509085178375244, "learning_rate": 7.51547134501601e-06, "loss": 0.7763, "step": 6873 }, { "epoch": 1.8281914893617022, "grad_norm": 4.3177103996276855, "learning_rate": 7.514711209950615e-06, "loss": 0.7943, "step": 6874 }, { "epoch": 1.8284574468085106, "grad_norm": 3.8919661045074463, "learning_rate": 7.513950997077094e-06, "loss": 0.7541, "step": 6875 }, { "epoch": 1.828723404255319, "grad_norm": 3.506849765777588, "learning_rate": 7.513190706418969e-06, "loss": 0.8451, "step": 6876 }, { "epoch": 1.8289893617021278, "grad_norm": 4.711544513702393, "learning_rate": 7.512430337999768e-06, "loss": 0.9569, "step": 6877 }, { "epoch": 1.829255319148936, "grad_norm": 4.111194610595703, "learning_rate": 7.511669891843011e-06, "loss": 0.9289, "step": 6878 }, { "epoch": 1.8295212765957447, "grad_norm": 3.4928982257843018, "learning_rate": 7.510909367972231e-06, "loss": 0.7627, "step": 6879 }, { "epoch": 1.8297872340425532, "grad_norm": 3.737337827682495, "learning_rate": 7.5101487664109605e-06, "loss": 0.7463, "step": 6880 }, { "epoch": 1.8300531914893616, "grad_norm": 3.4611358642578125, "learning_rate": 7.50938808718273e-06, "loss": 0.7764, "step": 6881 }, { "epoch": 1.8303191489361703, "grad_norm": 3.901796817779541, "learning_rate": 7.508627330311078e-06, "loss": 0.9079, "step": 6882 }, { "epoch": 1.8305851063829788, "grad_norm": 3.8375611305236816, "learning_rate": 7.507866495819543e-06, "loss": 0.7861, "step": 6883 }, { "epoch": 1.8308510638297872, "grad_norm": 3.7982888221740723, "learning_rate": 7.507105583731666e-06, "loss": 0.8905, "step": 6884 }, { "epoch": 1.8311170212765957, "grad_norm": 3.70542573928833, "learning_rate": 7.506344594070991e-06, "loss": 0.7173, "step": 6885 }, { "epoch": 1.8313829787234042, "grad_norm": 3.7828474044799805, "learning_rate": 7.505583526861064e-06, "loss": 0.8687, "step": 6886 }, { "epoch": 1.8316489361702128, "grad_norm": 4.376963138580322, "learning_rate": 7.504822382125432e-06, "loss": 0.982, "step": 6887 }, { "epoch": 1.8319148936170213, "grad_norm": 3.9631431102752686, "learning_rate": 7.504061159887646e-06, "loss": 0.8186, "step": 6888 }, { "epoch": 1.8321808510638298, "grad_norm": 4.296795845031738, "learning_rate": 7.5032998601712605e-06, "loss": 0.8346, "step": 6889 }, { "epoch": 1.8324468085106385, "grad_norm": 3.889289617538452, "learning_rate": 7.502538482999829e-06, "loss": 0.8344, "step": 6890 }, { "epoch": 1.8327127659574467, "grad_norm": 4.060772895812988, "learning_rate": 7.50177702839691e-06, "loss": 0.7625, "step": 6891 }, { "epoch": 1.8329787234042554, "grad_norm": 3.6209208965301514, "learning_rate": 7.501015496386066e-06, "loss": 0.779, "step": 6892 }, { "epoch": 1.8332446808510638, "grad_norm": 3.7519564628601074, "learning_rate": 7.5002538869908556e-06, "loss": 0.7245, "step": 6893 }, { "epoch": 1.8335106382978723, "grad_norm": 3.842135190963745, "learning_rate": 7.499492200234849e-06, "loss": 0.7977, "step": 6894 }, { "epoch": 1.833776595744681, "grad_norm": 4.067161560058594, "learning_rate": 7.498730436141609e-06, "loss": 0.8287, "step": 6895 }, { "epoch": 1.8340425531914892, "grad_norm": 3.8573522567749023, "learning_rate": 7.497968594734708e-06, "loss": 0.7012, "step": 6896 }, { "epoch": 1.834308510638298, "grad_norm": 3.792734146118164, "learning_rate": 7.4972066760377184e-06, "loss": 0.7986, "step": 6897 }, { "epoch": 1.8345744680851064, "grad_norm": 4.287036418914795, "learning_rate": 7.496444680074213e-06, "loss": 0.8091, "step": 6898 }, { "epoch": 1.8348404255319148, "grad_norm": 3.9161949157714844, "learning_rate": 7.49568260686777e-06, "loss": 0.8796, "step": 6899 }, { "epoch": 1.8351063829787235, "grad_norm": 3.8841638565063477, "learning_rate": 7.49492045644197e-06, "loss": 0.8827, "step": 6900 }, { "epoch": 1.8353723404255318, "grad_norm": 3.770533323287964, "learning_rate": 7.494158228820393e-06, "loss": 0.7671, "step": 6901 }, { "epoch": 1.8356382978723405, "grad_norm": 4.155034065246582, "learning_rate": 7.493395924026623e-06, "loss": 0.8533, "step": 6902 }, { "epoch": 1.835904255319149, "grad_norm": 3.911745071411133, "learning_rate": 7.492633542084249e-06, "loss": 0.82, "step": 6903 }, { "epoch": 1.8361702127659574, "grad_norm": 3.444728136062622, "learning_rate": 7.491871083016858e-06, "loss": 0.7717, "step": 6904 }, { "epoch": 1.836436170212766, "grad_norm": 4.003023147583008, "learning_rate": 7.491108546848041e-06, "loss": 0.7351, "step": 6905 }, { "epoch": 1.8367021276595743, "grad_norm": 3.9087607860565186, "learning_rate": 7.490345933601395e-06, "loss": 0.8509, "step": 6906 }, { "epoch": 1.836968085106383, "grad_norm": 4.098905086517334, "learning_rate": 7.489583243300511e-06, "loss": 0.9289, "step": 6907 }, { "epoch": 1.8372340425531914, "grad_norm": 4.120253562927246, "learning_rate": 7.488820475968992e-06, "loss": 0.8707, "step": 6908 }, { "epoch": 1.8375, "grad_norm": 4.324950218200684, "learning_rate": 7.488057631630438e-06, "loss": 0.7811, "step": 6909 }, { "epoch": 1.8377659574468086, "grad_norm": 4.5706634521484375, "learning_rate": 7.4872947103084495e-06, "loss": 0.8641, "step": 6910 }, { "epoch": 1.838031914893617, "grad_norm": 4.22561502456665, "learning_rate": 7.486531712026634e-06, "loss": 0.794, "step": 6911 }, { "epoch": 1.8382978723404255, "grad_norm": 4.015974521636963, "learning_rate": 7.485768636808603e-06, "loss": 0.8757, "step": 6912 }, { "epoch": 1.8385638297872342, "grad_norm": 3.7457127571105957, "learning_rate": 7.48500548467796e-06, "loss": 0.8682, "step": 6913 }, { "epoch": 1.8388297872340424, "grad_norm": 3.964571714401245, "learning_rate": 7.484242255658322e-06, "loss": 0.7431, "step": 6914 }, { "epoch": 1.8390957446808511, "grad_norm": 3.838426351547241, "learning_rate": 7.4834789497733065e-06, "loss": 0.7413, "step": 6915 }, { "epoch": 1.8393617021276596, "grad_norm": 3.7367520332336426, "learning_rate": 7.4827155670465264e-06, "loss": 0.8366, "step": 6916 }, { "epoch": 1.839627659574468, "grad_norm": 3.9056553840637207, "learning_rate": 7.481952107501604e-06, "loss": 0.7134, "step": 6917 }, { "epoch": 1.8398936170212767, "grad_norm": 4.098144054412842, "learning_rate": 7.481188571162161e-06, "loss": 0.7744, "step": 6918 }, { "epoch": 1.840159574468085, "grad_norm": 4.067973613739014, "learning_rate": 7.480424958051823e-06, "loss": 0.8143, "step": 6919 }, { "epoch": 1.8404255319148937, "grad_norm": 3.9194462299346924, "learning_rate": 7.479661268194217e-06, "loss": 0.8335, "step": 6920 }, { "epoch": 1.8406914893617021, "grad_norm": 4.130805492401123, "learning_rate": 7.4788975016129704e-06, "loss": 0.769, "step": 6921 }, { "epoch": 1.8409574468085106, "grad_norm": 3.580792188644409, "learning_rate": 7.478133658331716e-06, "loss": 0.7743, "step": 6922 }, { "epoch": 1.8412234042553193, "grad_norm": 3.78035569190979, "learning_rate": 7.477369738374092e-06, "loss": 0.8619, "step": 6923 }, { "epoch": 1.8414893617021275, "grad_norm": 3.8400089740753174, "learning_rate": 7.476605741763729e-06, "loss": 0.8161, "step": 6924 }, { "epoch": 1.8417553191489362, "grad_norm": 3.7448103427886963, "learning_rate": 7.475841668524268e-06, "loss": 0.8305, "step": 6925 }, { "epoch": 1.8420212765957447, "grad_norm": 3.828014850616455, "learning_rate": 7.475077518679352e-06, "loss": 0.8424, "step": 6926 }, { "epoch": 1.8422872340425531, "grad_norm": 3.776527166366577, "learning_rate": 7.474313292252624e-06, "loss": 0.9811, "step": 6927 }, { "epoch": 1.8425531914893618, "grad_norm": 4.294341564178467, "learning_rate": 7.473548989267728e-06, "loss": 0.8375, "step": 6928 }, { "epoch": 1.84281914893617, "grad_norm": 4.230419158935547, "learning_rate": 7.472784609748316e-06, "loss": 0.7886, "step": 6929 }, { "epoch": 1.8430851063829787, "grad_norm": 4.243613243103027, "learning_rate": 7.472020153718036e-06, "loss": 0.8787, "step": 6930 }, { "epoch": 1.8433510638297872, "grad_norm": 4.046195983886719, "learning_rate": 7.471255621200541e-06, "loss": 0.7344, "step": 6931 }, { "epoch": 1.8436170212765957, "grad_norm": 3.4666972160339355, "learning_rate": 7.470491012219488e-06, "loss": 0.8123, "step": 6932 }, { "epoch": 1.8438829787234043, "grad_norm": 4.226772785186768, "learning_rate": 7.469726326798535e-06, "loss": 0.7765, "step": 6933 }, { "epoch": 1.8441489361702128, "grad_norm": 4.348804950714111, "learning_rate": 7.468961564961341e-06, "loss": 0.8481, "step": 6934 }, { "epoch": 1.8444148936170213, "grad_norm": 3.7085683345794678, "learning_rate": 7.4681967267315715e-06, "loss": 0.7717, "step": 6935 }, { "epoch": 1.84468085106383, "grad_norm": 3.670295238494873, "learning_rate": 7.4674318121328856e-06, "loss": 0.7074, "step": 6936 }, { "epoch": 1.8449468085106382, "grad_norm": 4.235050678253174, "learning_rate": 7.466666821188957e-06, "loss": 0.9085, "step": 6937 }, { "epoch": 1.8452127659574469, "grad_norm": 4.282822132110596, "learning_rate": 7.465901753923452e-06, "loss": 0.8641, "step": 6938 }, { "epoch": 1.8454787234042553, "grad_norm": 3.9703402519226074, "learning_rate": 7.465136610360044e-06, "loss": 0.7331, "step": 6939 }, { "epoch": 1.8457446808510638, "grad_norm": 3.793503522872925, "learning_rate": 7.4643713905224065e-06, "loss": 0.8122, "step": 6940 }, { "epoch": 1.8460106382978725, "grad_norm": 4.120753288269043, "learning_rate": 7.463606094434218e-06, "loss": 0.8822, "step": 6941 }, { "epoch": 1.8462765957446807, "grad_norm": 4.266670227050781, "learning_rate": 7.462840722119155e-06, "loss": 0.8363, "step": 6942 }, { "epoch": 1.8465425531914894, "grad_norm": 3.998488664627075, "learning_rate": 7.462075273600901e-06, "loss": 0.895, "step": 6943 }, { "epoch": 1.8468085106382979, "grad_norm": 3.923610210418701, "learning_rate": 7.461309748903138e-06, "loss": 0.8406, "step": 6944 }, { "epoch": 1.8470744680851063, "grad_norm": 4.076598644256592, "learning_rate": 7.460544148049555e-06, "loss": 0.7919, "step": 6945 }, { "epoch": 1.847340425531915, "grad_norm": 4.171792507171631, "learning_rate": 7.459778471063839e-06, "loss": 0.9616, "step": 6946 }, { "epoch": 1.8476063829787233, "grad_norm": 4.327701091766357, "learning_rate": 7.45901271796968e-06, "loss": 0.8918, "step": 6947 }, { "epoch": 1.847872340425532, "grad_norm": 4.035894393920898, "learning_rate": 7.4582468887907746e-06, "loss": 0.7007, "step": 6948 }, { "epoch": 1.8481382978723404, "grad_norm": 3.9794068336486816, "learning_rate": 7.457480983550813e-06, "loss": 0.8622, "step": 6949 }, { "epoch": 1.8484042553191489, "grad_norm": 3.988560914993286, "learning_rate": 7.4567150022735e-06, "loss": 0.7892, "step": 6950 }, { "epoch": 1.8486702127659576, "grad_norm": 3.761817216873169, "learning_rate": 7.455948944982529e-06, "loss": 0.7549, "step": 6951 }, { "epoch": 1.8489361702127658, "grad_norm": 3.962528944015503, "learning_rate": 7.455182811701609e-06, "loss": 0.7874, "step": 6952 }, { "epoch": 1.8492021276595745, "grad_norm": 4.180268287658691, "learning_rate": 7.454416602454441e-06, "loss": 0.8401, "step": 6953 }, { "epoch": 1.849468085106383, "grad_norm": 3.7611262798309326, "learning_rate": 7.453650317264734e-06, "loss": 0.8463, "step": 6954 }, { "epoch": 1.8497340425531914, "grad_norm": 3.7269387245178223, "learning_rate": 7.452883956156197e-06, "loss": 0.7884, "step": 6955 }, { "epoch": 1.85, "grad_norm": 4.998419284820557, "learning_rate": 7.452117519152542e-06, "loss": 0.861, "step": 6956 }, { "epoch": 1.8502659574468086, "grad_norm": 4.210315704345703, "learning_rate": 7.4513510062774845e-06, "loss": 0.8083, "step": 6957 }, { "epoch": 1.850531914893617, "grad_norm": 4.184957027435303, "learning_rate": 7.4505844175547405e-06, "loss": 0.7648, "step": 6958 }, { "epoch": 1.8507978723404257, "grad_norm": 3.883157730102539, "learning_rate": 7.44981775300803e-06, "loss": 0.789, "step": 6959 }, { "epoch": 1.851063829787234, "grad_norm": 3.930384397506714, "learning_rate": 7.449051012661073e-06, "loss": 0.7467, "step": 6960 }, { "epoch": 1.8513297872340426, "grad_norm": 4.148220062255859, "learning_rate": 7.448284196537594e-06, "loss": 0.8692, "step": 6961 }, { "epoch": 1.851595744680851, "grad_norm": 4.141353607177734, "learning_rate": 7.4475173046613205e-06, "loss": 0.8553, "step": 6962 }, { "epoch": 1.8518617021276595, "grad_norm": 3.8646962642669678, "learning_rate": 7.4467503370559806e-06, "loss": 0.7953, "step": 6963 }, { "epoch": 1.8521276595744682, "grad_norm": 3.765763759613037, "learning_rate": 7.445983293745302e-06, "loss": 0.7173, "step": 6964 }, { "epoch": 1.8523936170212765, "grad_norm": 3.5731546878814697, "learning_rate": 7.445216174753022e-06, "loss": 0.7643, "step": 6965 }, { "epoch": 1.8526595744680852, "grad_norm": 3.3962113857269287, "learning_rate": 7.444448980102875e-06, "loss": 0.7694, "step": 6966 }, { "epoch": 1.8529255319148936, "grad_norm": 4.201429843902588, "learning_rate": 7.4436817098186e-06, "loss": 0.9388, "step": 6967 }, { "epoch": 1.853191489361702, "grad_norm": 4.063852787017822, "learning_rate": 7.442914363923933e-06, "loss": 0.8472, "step": 6968 }, { "epoch": 1.8534574468085108, "grad_norm": 4.6696696281433105, "learning_rate": 7.442146942442621e-06, "loss": 0.8739, "step": 6969 }, { "epoch": 1.853723404255319, "grad_norm": 3.5337836742401123, "learning_rate": 7.4413794453984065e-06, "loss": 0.7506, "step": 6970 }, { "epoch": 1.8539893617021277, "grad_norm": 4.372726917266846, "learning_rate": 7.440611872815038e-06, "loss": 0.824, "step": 6971 }, { "epoch": 1.8542553191489362, "grad_norm": 4.04209566116333, "learning_rate": 7.439844224716265e-06, "loss": 0.8098, "step": 6972 }, { "epoch": 1.8545212765957446, "grad_norm": 3.8578147888183594, "learning_rate": 7.439076501125839e-06, "loss": 0.7585, "step": 6973 }, { "epoch": 1.8547872340425533, "grad_norm": 4.210418701171875, "learning_rate": 7.4383087020675145e-06, "loss": 0.7915, "step": 6974 }, { "epoch": 1.8550531914893615, "grad_norm": 3.4614603519439697, "learning_rate": 7.4375408275650475e-06, "loss": 0.7506, "step": 6975 }, { "epoch": 1.8553191489361702, "grad_norm": 4.220035076141357, "learning_rate": 7.436772877642199e-06, "loss": 0.8875, "step": 6976 }, { "epoch": 1.8555851063829787, "grad_norm": 4.095662593841553, "learning_rate": 7.436004852322727e-06, "loss": 0.8973, "step": 6977 }, { "epoch": 1.8558510638297872, "grad_norm": 4.23422908782959, "learning_rate": 7.435236751630397e-06, "loss": 0.699, "step": 6978 }, { "epoch": 1.8561170212765958, "grad_norm": 3.976768970489502, "learning_rate": 7.434468575588976e-06, "loss": 0.781, "step": 6979 }, { "epoch": 1.8563829787234043, "grad_norm": 4.405401229858398, "learning_rate": 7.43370032422223e-06, "loss": 0.7388, "step": 6980 }, { "epoch": 1.8566489361702128, "grad_norm": 4.096654891967773, "learning_rate": 7.432931997553929e-06, "loss": 0.8305, "step": 6981 }, { "epoch": 1.8569148936170212, "grad_norm": 3.9386327266693115, "learning_rate": 7.432163595607851e-06, "loss": 0.775, "step": 6982 }, { "epoch": 1.8571808510638297, "grad_norm": 4.111544609069824, "learning_rate": 7.431395118407766e-06, "loss": 0.9179, "step": 6983 }, { "epoch": 1.8574468085106384, "grad_norm": 3.3650224208831787, "learning_rate": 7.4306265659774525e-06, "loss": 0.8286, "step": 6984 }, { "epoch": 1.8577127659574468, "grad_norm": 4.099471569061279, "learning_rate": 7.429857938340693e-06, "loss": 0.8789, "step": 6985 }, { "epoch": 1.8579787234042553, "grad_norm": 4.082056999206543, "learning_rate": 7.429089235521267e-06, "loss": 0.8938, "step": 6986 }, { "epoch": 1.858244680851064, "grad_norm": 4.1304545402526855, "learning_rate": 7.428320457542962e-06, "loss": 0.8639, "step": 6987 }, { "epoch": 1.8585106382978722, "grad_norm": 3.941922426223755, "learning_rate": 7.427551604429562e-06, "loss": 0.7966, "step": 6988 }, { "epoch": 1.858776595744681, "grad_norm": 3.8861730098724365, "learning_rate": 7.426782676204857e-06, "loss": 0.8282, "step": 6989 }, { "epoch": 1.8590425531914894, "grad_norm": 3.8917558193206787, "learning_rate": 7.426013672892639e-06, "loss": 0.7213, "step": 6990 }, { "epoch": 1.8593085106382978, "grad_norm": 4.324743747711182, "learning_rate": 7.4252445945167005e-06, "loss": 0.9627, "step": 6991 }, { "epoch": 1.8595744680851065, "grad_norm": 3.6545021533966064, "learning_rate": 7.42447544110084e-06, "loss": 0.742, "step": 6992 }, { "epoch": 1.8598404255319148, "grad_norm": 4.201162338256836, "learning_rate": 7.423706212668855e-06, "loss": 0.8343, "step": 6993 }, { "epoch": 1.8601063829787234, "grad_norm": 3.67588472366333, "learning_rate": 7.4229369092445465e-06, "loss": 0.7863, "step": 6994 }, { "epoch": 1.860372340425532, "grad_norm": 3.3527588844299316, "learning_rate": 7.422167530851716e-06, "loss": 0.7513, "step": 6995 }, { "epoch": 1.8606382978723404, "grad_norm": 3.977691888809204, "learning_rate": 7.421398077514172e-06, "loss": 0.7507, "step": 6996 }, { "epoch": 1.860904255319149, "grad_norm": 4.172175407409668, "learning_rate": 7.420628549255719e-06, "loss": 0.8395, "step": 6997 }, { "epoch": 1.8611702127659573, "grad_norm": 3.738621473312378, "learning_rate": 7.41985894610017e-06, "loss": 0.8366, "step": 6998 }, { "epoch": 1.861436170212766, "grad_norm": 4.003189563751221, "learning_rate": 7.4190892680713366e-06, "loss": 0.9032, "step": 6999 }, { "epoch": 1.8617021276595744, "grad_norm": 3.872437000274658, "learning_rate": 7.418319515193032e-06, "loss": 0.8052, "step": 7000 }, { "epoch": 1.8617021276595744, "eval_loss": 1.269985556602478, "eval_runtime": 14.1914, "eval_samples_per_second": 28.186, "eval_steps_per_second": 3.523, "step": 7000 }, { "epoch": 1.861968085106383, "grad_norm": 4.005687713623047, "learning_rate": 7.417549687489074e-06, "loss": 0.7515, "step": 7001 }, { "epoch": 1.8622340425531916, "grad_norm": 3.833047866821289, "learning_rate": 7.416779784983284e-06, "loss": 0.8487, "step": 7002 }, { "epoch": 1.8625, "grad_norm": 3.902536392211914, "learning_rate": 7.416009807699481e-06, "loss": 0.7448, "step": 7003 }, { "epoch": 1.8627659574468085, "grad_norm": 4.018909931182861, "learning_rate": 7.41523975566149e-06, "loss": 0.8619, "step": 7004 }, { "epoch": 1.863031914893617, "grad_norm": 3.7916078567504883, "learning_rate": 7.414469628893137e-06, "loss": 0.7254, "step": 7005 }, { "epoch": 1.8632978723404254, "grad_norm": 3.662709951400757, "learning_rate": 7.413699427418253e-06, "loss": 0.8801, "step": 7006 }, { "epoch": 1.8635638297872341, "grad_norm": 3.8417561054229736, "learning_rate": 7.412929151260665e-06, "loss": 0.9611, "step": 7007 }, { "epoch": 1.8638297872340426, "grad_norm": 3.8474161624908447, "learning_rate": 7.412158800444208e-06, "loss": 0.7215, "step": 7008 }, { "epoch": 1.864095744680851, "grad_norm": 3.4360055923461914, "learning_rate": 7.411388374992719e-06, "loss": 0.7885, "step": 7009 }, { "epoch": 1.8643617021276597, "grad_norm": 3.902475357055664, "learning_rate": 7.410617874930034e-06, "loss": 0.8199, "step": 7010 }, { "epoch": 1.864627659574468, "grad_norm": 4.08276891708374, "learning_rate": 7.409847300279993e-06, "loss": 0.793, "step": 7011 }, { "epoch": 1.8648936170212767, "grad_norm": 4.242387294769287, "learning_rate": 7.4090766510664405e-06, "loss": 0.9345, "step": 7012 }, { "epoch": 1.8651595744680851, "grad_norm": 3.8312370777130127, "learning_rate": 7.40830592731322e-06, "loss": 0.8151, "step": 7013 }, { "epoch": 1.8654255319148936, "grad_norm": 4.087930679321289, "learning_rate": 7.407535129044179e-06, "loss": 0.936, "step": 7014 }, { "epoch": 1.8656914893617023, "grad_norm": 4.200309753417969, "learning_rate": 7.4067642562831656e-06, "loss": 0.8345, "step": 7015 }, { "epoch": 1.8659574468085105, "grad_norm": 3.7283883094787598, "learning_rate": 7.4059933090540315e-06, "loss": 0.7398, "step": 7016 }, { "epoch": 1.8662234042553192, "grad_norm": 4.288913249969482, "learning_rate": 7.4052222873806345e-06, "loss": 0.9314, "step": 7017 }, { "epoch": 1.8664893617021276, "grad_norm": 4.077908515930176, "learning_rate": 7.404451191286825e-06, "loss": 0.8331, "step": 7018 }, { "epoch": 1.866755319148936, "grad_norm": 4.040445804595947, "learning_rate": 7.403680020796468e-06, "loss": 0.8054, "step": 7019 }, { "epoch": 1.8670212765957448, "grad_norm": 4.416097164154053, "learning_rate": 7.402908775933419e-06, "loss": 0.7164, "step": 7020 }, { "epoch": 1.867287234042553, "grad_norm": 3.8552403450012207, "learning_rate": 7.402137456721544e-06, "loss": 0.8274, "step": 7021 }, { "epoch": 1.8675531914893617, "grad_norm": 4.477870941162109, "learning_rate": 7.401366063184709e-06, "loss": 0.9087, "step": 7022 }, { "epoch": 1.8678191489361702, "grad_norm": 4.315149784088135, "learning_rate": 7.4005945953467794e-06, "loss": 0.8275, "step": 7023 }, { "epoch": 1.8680851063829786, "grad_norm": 4.013988971710205, "learning_rate": 7.3998230532316275e-06, "loss": 0.7935, "step": 7024 }, { "epoch": 1.8683510638297873, "grad_norm": 4.538480281829834, "learning_rate": 7.399051436863125e-06, "loss": 0.7913, "step": 7025 }, { "epoch": 1.8686170212765958, "grad_norm": 3.814431667327881, "learning_rate": 7.398279746265144e-06, "loss": 0.8819, "step": 7026 }, { "epoch": 1.8688829787234043, "grad_norm": 4.128929615020752, "learning_rate": 7.397507981461567e-06, "loss": 0.7733, "step": 7027 }, { "epoch": 1.8691489361702127, "grad_norm": 4.266568660736084, "learning_rate": 7.3967361424762696e-06, "loss": 0.8756, "step": 7028 }, { "epoch": 1.8694148936170212, "grad_norm": 3.817857265472412, "learning_rate": 7.3959642293331336e-06, "loss": 0.8247, "step": 7029 }, { "epoch": 1.8696808510638299, "grad_norm": 4.07396125793457, "learning_rate": 7.395192242056044e-06, "loss": 0.7925, "step": 7030 }, { "epoch": 1.8699468085106383, "grad_norm": 3.3347582817077637, "learning_rate": 7.3944201806688865e-06, "loss": 0.647, "step": 7031 }, { "epoch": 1.8702127659574468, "grad_norm": 3.7496252059936523, "learning_rate": 7.393648045195548e-06, "loss": 0.884, "step": 7032 }, { "epoch": 1.8704787234042555, "grad_norm": 3.871969223022461, "learning_rate": 7.392875835659923e-06, "loss": 0.7962, "step": 7033 }, { "epoch": 1.8707446808510637, "grad_norm": 4.357855796813965, "learning_rate": 7.392103552085901e-06, "loss": 0.8063, "step": 7034 }, { "epoch": 1.8710106382978724, "grad_norm": 3.7552926540374756, "learning_rate": 7.391331194497379e-06, "loss": 0.7611, "step": 7035 }, { "epoch": 1.8712765957446809, "grad_norm": 4.20325231552124, "learning_rate": 7.390558762918254e-06, "loss": 0.8825, "step": 7036 }, { "epoch": 1.8715425531914893, "grad_norm": 3.433969020843506, "learning_rate": 7.389786257372428e-06, "loss": 0.6822, "step": 7037 }, { "epoch": 1.871808510638298, "grad_norm": 3.9316911697387695, "learning_rate": 7.3890136778837995e-06, "loss": 0.8302, "step": 7038 }, { "epoch": 1.8720744680851062, "grad_norm": 3.7068655490875244, "learning_rate": 7.388241024476276e-06, "loss": 0.8207, "step": 7039 }, { "epoch": 1.872340425531915, "grad_norm": 3.7558844089508057, "learning_rate": 7.387468297173764e-06, "loss": 0.8916, "step": 7040 }, { "epoch": 1.8726063829787234, "grad_norm": 3.663325786590576, "learning_rate": 7.386695496000172e-06, "loss": 0.8461, "step": 7041 }, { "epoch": 1.8728723404255319, "grad_norm": 3.7792584896087646, "learning_rate": 7.38592262097941e-06, "loss": 0.775, "step": 7042 }, { "epoch": 1.8731382978723405, "grad_norm": 3.6168766021728516, "learning_rate": 7.385149672135394e-06, "loss": 0.7552, "step": 7043 }, { "epoch": 1.8734042553191488, "grad_norm": 3.5428271293640137, "learning_rate": 7.384376649492039e-06, "loss": 0.8633, "step": 7044 }, { "epoch": 1.8736702127659575, "grad_norm": 4.00286340713501, "learning_rate": 7.383603553073262e-06, "loss": 0.7895, "step": 7045 }, { "epoch": 1.873936170212766, "grad_norm": 4.0529890060424805, "learning_rate": 7.382830382902986e-06, "loss": 0.7161, "step": 7046 }, { "epoch": 1.8742021276595744, "grad_norm": 4.5928425788879395, "learning_rate": 7.382057139005132e-06, "loss": 0.8454, "step": 7047 }, { "epoch": 1.874468085106383, "grad_norm": 3.7979865074157715, "learning_rate": 7.381283821403626e-06, "loss": 0.8475, "step": 7048 }, { "epoch": 1.8747340425531915, "grad_norm": 3.9232993125915527, "learning_rate": 7.380510430122396e-06, "loss": 0.8079, "step": 7049 }, { "epoch": 1.875, "grad_norm": 4.084567546844482, "learning_rate": 7.379736965185369e-06, "loss": 0.8926, "step": 7050 }, { "epoch": 1.8752659574468085, "grad_norm": 3.967013359069824, "learning_rate": 7.378963426616479e-06, "loss": 0.8136, "step": 7051 }, { "epoch": 1.875531914893617, "grad_norm": 4.18993616104126, "learning_rate": 7.378189814439659e-06, "loss": 0.663, "step": 7052 }, { "epoch": 1.8757978723404256, "grad_norm": 3.4214327335357666, "learning_rate": 7.377416128678847e-06, "loss": 0.7142, "step": 7053 }, { "epoch": 1.876063829787234, "grad_norm": 4.111138343811035, "learning_rate": 7.37664236935798e-06, "loss": 0.8517, "step": 7054 }, { "epoch": 1.8763297872340425, "grad_norm": 4.020641326904297, "learning_rate": 7.375868536501001e-06, "loss": 0.7649, "step": 7055 }, { "epoch": 1.8765957446808512, "grad_norm": 3.6159451007843018, "learning_rate": 7.375094630131852e-06, "loss": 0.7219, "step": 7056 }, { "epoch": 1.8768617021276595, "grad_norm": 4.138524532318115, "learning_rate": 7.374320650274479e-06, "loss": 0.7374, "step": 7057 }, { "epoch": 1.8771276595744681, "grad_norm": 4.114788055419922, "learning_rate": 7.373546596952829e-06, "loss": 0.9118, "step": 7058 }, { "epoch": 1.8773936170212766, "grad_norm": 3.8229057788848877, "learning_rate": 7.372772470190852e-06, "loss": 0.7109, "step": 7059 }, { "epoch": 1.877659574468085, "grad_norm": 3.9543075561523438, "learning_rate": 7.371998270012504e-06, "loss": 0.7616, "step": 7060 }, { "epoch": 1.8779255319148938, "grad_norm": 3.862529754638672, "learning_rate": 7.3712239964417345e-06, "loss": 0.8719, "step": 7061 }, { "epoch": 1.878191489361702, "grad_norm": 3.855138063430786, "learning_rate": 7.370449649502504e-06, "loss": 0.7093, "step": 7062 }, { "epoch": 1.8784574468085107, "grad_norm": 4.169119358062744, "learning_rate": 7.36967522921877e-06, "loss": 0.8817, "step": 7063 }, { "epoch": 1.8787234042553191, "grad_norm": 3.8987720012664795, "learning_rate": 7.368900735614494e-06, "loss": 0.7522, "step": 7064 }, { "epoch": 1.8789893617021276, "grad_norm": 3.938058853149414, "learning_rate": 7.36812616871364e-06, "loss": 0.7694, "step": 7065 }, { "epoch": 1.8792553191489363, "grad_norm": 3.7450876235961914, "learning_rate": 7.367351528540176e-06, "loss": 0.7283, "step": 7066 }, { "epoch": 1.8795212765957445, "grad_norm": 3.9045193195343018, "learning_rate": 7.366576815118067e-06, "loss": 0.735, "step": 7067 }, { "epoch": 1.8797872340425532, "grad_norm": 3.4928138256073, "learning_rate": 7.365802028471285e-06, "loss": 0.7537, "step": 7068 }, { "epoch": 1.8800531914893617, "grad_norm": 3.8254666328430176, "learning_rate": 7.365027168623804e-06, "loss": 0.8252, "step": 7069 }, { "epoch": 1.8803191489361701, "grad_norm": 4.039599418640137, "learning_rate": 7.364252235599596e-06, "loss": 0.78, "step": 7070 }, { "epoch": 1.8805851063829788, "grad_norm": 4.29962158203125, "learning_rate": 7.363477229422642e-06, "loss": 0.8651, "step": 7071 }, { "epoch": 1.8808510638297873, "grad_norm": 3.891298294067383, "learning_rate": 7.3627021501169196e-06, "loss": 0.7887, "step": 7072 }, { "epoch": 1.8811170212765957, "grad_norm": 3.8227875232696533, "learning_rate": 7.36192699770641e-06, "loss": 0.8563, "step": 7073 }, { "epoch": 1.8813829787234042, "grad_norm": 3.881826639175415, "learning_rate": 7.3611517722151e-06, "loss": 0.7518, "step": 7074 }, { "epoch": 1.8816489361702127, "grad_norm": 3.529783248901367, "learning_rate": 7.360376473666973e-06, "loss": 0.7086, "step": 7075 }, { "epoch": 1.8819148936170214, "grad_norm": 3.710423231124878, "learning_rate": 7.359601102086018e-06, "loss": 0.8141, "step": 7076 }, { "epoch": 1.8821808510638298, "grad_norm": 4.26459264755249, "learning_rate": 7.358825657496228e-06, "loss": 0.8523, "step": 7077 }, { "epoch": 1.8824468085106383, "grad_norm": 3.9186158180236816, "learning_rate": 7.358050139921595e-06, "loss": 0.806, "step": 7078 }, { "epoch": 1.882712765957447, "grad_norm": 3.5147833824157715, "learning_rate": 7.3572745493861155e-06, "loss": 0.742, "step": 7079 }, { "epoch": 1.8829787234042552, "grad_norm": 3.834606885910034, "learning_rate": 7.356498885913784e-06, "loss": 0.9308, "step": 7080 }, { "epoch": 1.883244680851064, "grad_norm": 3.989713191986084, "learning_rate": 7.355723149528604e-06, "loss": 0.8085, "step": 7081 }, { "epoch": 1.8835106382978724, "grad_norm": 4.148540019989014, "learning_rate": 7.354947340254576e-06, "loss": 0.7697, "step": 7082 }, { "epoch": 1.8837765957446808, "grad_norm": 3.6128063201904297, "learning_rate": 7.354171458115704e-06, "loss": 0.7755, "step": 7083 }, { "epoch": 1.8840425531914895, "grad_norm": 4.31196928024292, "learning_rate": 7.353395503135996e-06, "loss": 0.7754, "step": 7084 }, { "epoch": 1.8843085106382977, "grad_norm": 3.750534772872925, "learning_rate": 7.35261947533946e-06, "loss": 0.8237, "step": 7085 }, { "epoch": 1.8845744680851064, "grad_norm": 3.8344967365264893, "learning_rate": 7.351843374750108e-06, "loss": 0.832, "step": 7086 }, { "epoch": 1.8848404255319149, "grad_norm": 3.5898144245147705, "learning_rate": 7.351067201391952e-06, "loss": 0.737, "step": 7087 }, { "epoch": 1.8851063829787233, "grad_norm": 3.8664729595184326, "learning_rate": 7.35029095528901e-06, "loss": 0.8636, "step": 7088 }, { "epoch": 1.885372340425532, "grad_norm": 4.382975101470947, "learning_rate": 7.349514636465298e-06, "loss": 0.8923, "step": 7089 }, { "epoch": 1.8856382978723403, "grad_norm": 4.070766448974609, "learning_rate": 7.348738244944837e-06, "loss": 0.8651, "step": 7090 }, { "epoch": 1.885904255319149, "grad_norm": 4.187519073486328, "learning_rate": 7.347961780751649e-06, "loss": 0.8492, "step": 7091 }, { "epoch": 1.8861702127659574, "grad_norm": 3.7398457527160645, "learning_rate": 7.347185243909761e-06, "loss": 0.7936, "step": 7092 }, { "epoch": 1.8864361702127659, "grad_norm": 3.758314609527588, "learning_rate": 7.346408634443196e-06, "loss": 0.9086, "step": 7093 }, { "epoch": 1.8867021276595746, "grad_norm": 3.800701856613159, "learning_rate": 7.345631952375986e-06, "loss": 0.8418, "step": 7094 }, { "epoch": 1.886968085106383, "grad_norm": 4.155978202819824, "learning_rate": 7.3448551977321615e-06, "loss": 0.9388, "step": 7095 }, { "epoch": 1.8872340425531915, "grad_norm": 3.9163780212402344, "learning_rate": 7.344078370535757e-06, "loss": 0.7108, "step": 7096 }, { "epoch": 1.8875, "grad_norm": 3.312629222869873, "learning_rate": 7.343301470810809e-06, "loss": 0.6591, "step": 7097 }, { "epoch": 1.8877659574468084, "grad_norm": 4.259210586547852, "learning_rate": 7.342524498581352e-06, "loss": 0.9209, "step": 7098 }, { "epoch": 1.888031914893617, "grad_norm": 4.158624649047852, "learning_rate": 7.34174745387143e-06, "loss": 0.8084, "step": 7099 }, { "epoch": 1.8882978723404256, "grad_norm": 4.25371789932251, "learning_rate": 7.340970336705084e-06, "loss": 0.8624, "step": 7100 }, { "epoch": 1.888563829787234, "grad_norm": 3.780513286590576, "learning_rate": 7.340193147106362e-06, "loss": 0.7879, "step": 7101 }, { "epoch": 1.8888297872340427, "grad_norm": 4.191688537597656, "learning_rate": 7.339415885099307e-06, "loss": 0.7785, "step": 7102 }, { "epoch": 1.889095744680851, "grad_norm": 4.398171901702881, "learning_rate": 7.33863855070797e-06, "loss": 0.8883, "step": 7103 }, { "epoch": 1.8893617021276596, "grad_norm": 3.6488990783691406, "learning_rate": 7.337861143956404e-06, "loss": 0.8097, "step": 7104 }, { "epoch": 1.889627659574468, "grad_norm": 4.0780487060546875, "learning_rate": 7.3370836648686616e-06, "loss": 0.7897, "step": 7105 }, { "epoch": 1.8898936170212766, "grad_norm": 4.089003562927246, "learning_rate": 7.336306113468799e-06, "loss": 0.9653, "step": 7106 }, { "epoch": 1.8901595744680852, "grad_norm": 4.446435928344727, "learning_rate": 7.335528489780874e-06, "loss": 0.8947, "step": 7107 }, { "epoch": 1.8904255319148935, "grad_norm": 3.880557060241699, "learning_rate": 7.334750793828947e-06, "loss": 0.9184, "step": 7108 }, { "epoch": 1.8906914893617022, "grad_norm": 4.0276899337768555, "learning_rate": 7.3339730256370834e-06, "loss": 0.7444, "step": 7109 }, { "epoch": 1.8909574468085106, "grad_norm": 4.381673336029053, "learning_rate": 7.333195185229346e-06, "loss": 0.7789, "step": 7110 }, { "epoch": 1.891223404255319, "grad_norm": 4.908472537994385, "learning_rate": 7.3324172726298015e-06, "loss": 0.8258, "step": 7111 }, { "epoch": 1.8914893617021278, "grad_norm": 4.257655143737793, "learning_rate": 7.331639287862522e-06, "loss": 0.8343, "step": 7112 }, { "epoch": 1.891755319148936, "grad_norm": 3.902233600616455, "learning_rate": 7.330861230951577e-06, "loss": 0.7672, "step": 7113 }, { "epoch": 1.8920212765957447, "grad_norm": 4.111093044281006, "learning_rate": 7.3300831019210415e-06, "loss": 0.9128, "step": 7114 }, { "epoch": 1.8922872340425532, "grad_norm": 4.477164268493652, "learning_rate": 7.329304900794991e-06, "loss": 0.9389, "step": 7115 }, { "epoch": 1.8925531914893616, "grad_norm": 4.585188388824463, "learning_rate": 7.328526627597505e-06, "loss": 0.8127, "step": 7116 }, { "epoch": 1.8928191489361703, "grad_norm": 3.906665086746216, "learning_rate": 7.327748282352664e-06, "loss": 0.7996, "step": 7117 }, { "epoch": 1.8930851063829788, "grad_norm": 4.213885307312012, "learning_rate": 7.32696986508455e-06, "loss": 0.8334, "step": 7118 }, { "epoch": 1.8933510638297872, "grad_norm": 4.066798686981201, "learning_rate": 7.326191375817249e-06, "loss": 0.8217, "step": 7119 }, { "epoch": 1.8936170212765957, "grad_norm": 3.510889768600464, "learning_rate": 7.325412814574847e-06, "loss": 0.7864, "step": 7120 }, { "epoch": 1.8938829787234042, "grad_norm": 3.888808250427246, "learning_rate": 7.324634181381436e-06, "loss": 0.7519, "step": 7121 }, { "epoch": 1.8941489361702128, "grad_norm": 3.9174201488494873, "learning_rate": 7.323855476261106e-06, "loss": 0.6913, "step": 7122 }, { "epoch": 1.8944148936170213, "grad_norm": 4.041181564331055, "learning_rate": 7.323076699237951e-06, "loss": 0.6076, "step": 7123 }, { "epoch": 1.8946808510638298, "grad_norm": 3.841498851776123, "learning_rate": 7.322297850336069e-06, "loss": 0.8645, "step": 7124 }, { "epoch": 1.8949468085106385, "grad_norm": 3.5201406478881836, "learning_rate": 7.3215189295795565e-06, "loss": 0.7253, "step": 7125 }, { "epoch": 1.8952127659574467, "grad_norm": 3.9525210857391357, "learning_rate": 7.320739936992514e-06, "loss": 0.8073, "step": 7126 }, { "epoch": 1.8954787234042554, "grad_norm": 3.8624043464660645, "learning_rate": 7.319960872599048e-06, "loss": 0.8157, "step": 7127 }, { "epoch": 1.8957446808510638, "grad_norm": 4.123876571655273, "learning_rate": 7.31918173642326e-06, "loss": 0.8038, "step": 7128 }, { "epoch": 1.8960106382978723, "grad_norm": 3.812316417694092, "learning_rate": 7.318402528489258e-06, "loss": 0.7421, "step": 7129 }, { "epoch": 1.896276595744681, "grad_norm": 4.009311199188232, "learning_rate": 7.317623248821153e-06, "loss": 0.835, "step": 7130 }, { "epoch": 1.8965425531914892, "grad_norm": 4.297110557556152, "learning_rate": 7.316843897443055e-06, "loss": 0.7093, "step": 7131 }, { "epoch": 1.896808510638298, "grad_norm": 4.034492015838623, "learning_rate": 7.316064474379081e-06, "loss": 0.7682, "step": 7132 }, { "epoch": 1.8970744680851064, "grad_norm": 4.544641494750977, "learning_rate": 7.315284979653344e-06, "loss": 0.8832, "step": 7133 }, { "epoch": 1.8973404255319148, "grad_norm": 4.383004188537598, "learning_rate": 7.314505413289964e-06, "loss": 0.892, "step": 7134 }, { "epoch": 1.8976063829787235, "grad_norm": 3.52055025100708, "learning_rate": 7.313725775313061e-06, "loss": 0.7965, "step": 7135 }, { "epoch": 1.8978723404255318, "grad_norm": 3.933687925338745, "learning_rate": 7.31294606574676e-06, "loss": 0.7829, "step": 7136 }, { "epoch": 1.8981382978723405, "grad_norm": 4.500588417053223, "learning_rate": 7.312166284615183e-06, "loss": 0.8802, "step": 7137 }, { "epoch": 1.898404255319149, "grad_norm": 3.9210360050201416, "learning_rate": 7.31138643194246e-06, "loss": 0.7418, "step": 7138 }, { "epoch": 1.8986702127659574, "grad_norm": 4.024209022521973, "learning_rate": 7.3106065077527175e-06, "loss": 0.8769, "step": 7139 }, { "epoch": 1.898936170212766, "grad_norm": 4.242138862609863, "learning_rate": 7.3098265120700915e-06, "loss": 0.8789, "step": 7140 }, { "epoch": 1.8992021276595743, "grad_norm": 3.6798341274261475, "learning_rate": 7.309046444918712e-06, "loss": 0.7971, "step": 7141 }, { "epoch": 1.899468085106383, "grad_norm": 4.092346668243408, "learning_rate": 7.308266306322719e-06, "loss": 0.7864, "step": 7142 }, { "epoch": 1.8997340425531914, "grad_norm": 4.132681846618652, "learning_rate": 7.307486096306247e-06, "loss": 0.8868, "step": 7143 }, { "epoch": 1.9, "grad_norm": 3.893075942993164, "learning_rate": 7.30670581489344e-06, "loss": 0.9096, "step": 7144 }, { "epoch": 1.9002659574468086, "grad_norm": 3.807593822479248, "learning_rate": 7.305925462108439e-06, "loss": 0.7444, "step": 7145 }, { "epoch": 1.900531914893617, "grad_norm": 3.6460392475128174, "learning_rate": 7.305145037975388e-06, "loss": 0.74, "step": 7146 }, { "epoch": 1.9007978723404255, "grad_norm": 3.5041310787200928, "learning_rate": 7.304364542518435e-06, "loss": 0.8561, "step": 7147 }, { "epoch": 1.9010638297872342, "grad_norm": 4.359119892120361, "learning_rate": 7.303583975761732e-06, "loss": 0.735, "step": 7148 }, { "epoch": 1.9013297872340424, "grad_norm": 4.176085948944092, "learning_rate": 7.302803337729429e-06, "loss": 0.8723, "step": 7149 }, { "epoch": 1.9015957446808511, "grad_norm": 3.764272689819336, "learning_rate": 7.302022628445678e-06, "loss": 0.8359, "step": 7150 }, { "epoch": 1.9018617021276596, "grad_norm": 3.8661603927612305, "learning_rate": 7.301241847934637e-06, "loss": 0.9286, "step": 7151 }, { "epoch": 1.902127659574468, "grad_norm": 3.493070363998413, "learning_rate": 7.300460996220464e-06, "loss": 0.7439, "step": 7152 }, { "epoch": 1.9023936170212767, "grad_norm": 3.425701379776001, "learning_rate": 7.2996800733273196e-06, "loss": 0.8468, "step": 7153 }, { "epoch": 1.902659574468085, "grad_norm": 3.9553513526916504, "learning_rate": 7.298899079279365e-06, "loss": 0.8075, "step": 7154 }, { "epoch": 1.9029255319148937, "grad_norm": 3.900907516479492, "learning_rate": 7.298118014100766e-06, "loss": 0.8969, "step": 7155 }, { "epoch": 1.9031914893617021, "grad_norm": 3.8822121620178223, "learning_rate": 7.297336877815693e-06, "loss": 0.8685, "step": 7156 }, { "epoch": 1.9034574468085106, "grad_norm": 3.847317695617676, "learning_rate": 7.29655567044831e-06, "loss": 0.7251, "step": 7157 }, { "epoch": 1.9037234042553193, "grad_norm": 3.5498738288879395, "learning_rate": 7.295774392022791e-06, "loss": 0.7035, "step": 7158 }, { "epoch": 1.9039893617021275, "grad_norm": 3.658343553543091, "learning_rate": 7.2949930425633095e-06, "loss": 0.7414, "step": 7159 }, { "epoch": 1.9042553191489362, "grad_norm": 3.804388999938965, "learning_rate": 7.2942116220940406e-06, "loss": 0.8057, "step": 7160 }, { "epoch": 1.9045212765957447, "grad_norm": 3.876521348953247, "learning_rate": 7.293430130639163e-06, "loss": 0.886, "step": 7161 }, { "epoch": 1.9047872340425531, "grad_norm": 3.969161033630371, "learning_rate": 7.292648568222859e-06, "loss": 0.9049, "step": 7162 }, { "epoch": 1.9050531914893618, "grad_norm": 4.049928188323975, "learning_rate": 7.2918669348693075e-06, "loss": 0.8954, "step": 7163 }, { "epoch": 1.90531914893617, "grad_norm": 3.997854232788086, "learning_rate": 7.291085230602694e-06, "loss": 0.9063, "step": 7164 }, { "epoch": 1.9055851063829787, "grad_norm": 4.090554237365723, "learning_rate": 7.290303455447208e-06, "loss": 0.8132, "step": 7165 }, { "epoch": 1.9058510638297872, "grad_norm": 3.8804330825805664, "learning_rate": 7.289521609427035e-06, "loss": 0.8245, "step": 7166 }, { "epoch": 1.9061170212765957, "grad_norm": 3.7036948204040527, "learning_rate": 7.288739692566367e-06, "loss": 0.891, "step": 7167 }, { "epoch": 1.9063829787234043, "grad_norm": 3.8350512981414795, "learning_rate": 7.2879577048894e-06, "loss": 0.7912, "step": 7168 }, { "epoch": 1.9066489361702128, "grad_norm": 3.3897817134857178, "learning_rate": 7.287175646420327e-06, "loss": 0.8327, "step": 7169 }, { "epoch": 1.9069148936170213, "grad_norm": 4.037939548492432, "learning_rate": 7.2863935171833465e-06, "loss": 0.8793, "step": 7170 }, { "epoch": 1.90718085106383, "grad_norm": 3.7813265323638916, "learning_rate": 7.285611317202661e-06, "loss": 0.8551, "step": 7171 }, { "epoch": 1.9074468085106382, "grad_norm": 3.916761636734009, "learning_rate": 7.284829046502467e-06, "loss": 0.7564, "step": 7172 }, { "epoch": 1.9077127659574469, "grad_norm": 3.843834400177002, "learning_rate": 7.284046705106974e-06, "loss": 0.8456, "step": 7173 }, { "epoch": 1.9079787234042553, "grad_norm": 3.752497434616089, "learning_rate": 7.2832642930403876e-06, "loss": 0.8221, "step": 7174 }, { "epoch": 1.9082446808510638, "grad_norm": 4.00820779800415, "learning_rate": 7.282481810326915e-06, "loss": 0.9672, "step": 7175 }, { "epoch": 1.9085106382978725, "grad_norm": 4.226334571838379, "learning_rate": 7.281699256990766e-06, "loss": 0.8973, "step": 7176 }, { "epoch": 1.9087765957446807, "grad_norm": 3.871880531311035, "learning_rate": 7.280916633056159e-06, "loss": 0.8204, "step": 7177 }, { "epoch": 1.9090425531914894, "grad_norm": 4.339875221252441, "learning_rate": 7.280133938547304e-06, "loss": 0.8958, "step": 7178 }, { "epoch": 1.9093085106382979, "grad_norm": 3.7419753074645996, "learning_rate": 7.27935117348842e-06, "loss": 0.789, "step": 7179 }, { "epoch": 1.9095744680851063, "grad_norm": 4.0317888259887695, "learning_rate": 7.278568337903729e-06, "loss": 0.7995, "step": 7180 }, { "epoch": 1.909840425531915, "grad_norm": 3.9452288150787354, "learning_rate": 7.277785431817449e-06, "loss": 0.8576, "step": 7181 }, { "epoch": 1.9101063829787233, "grad_norm": 3.957437753677368, "learning_rate": 7.277002455253807e-06, "loss": 0.8532, "step": 7182 }, { "epoch": 1.910372340425532, "grad_norm": 3.9327943325042725, "learning_rate": 7.276219408237029e-06, "loss": 0.8155, "step": 7183 }, { "epoch": 1.9106382978723404, "grad_norm": 4.20408296585083, "learning_rate": 7.27543629079134e-06, "loss": 0.8285, "step": 7184 }, { "epoch": 1.9109042553191489, "grad_norm": 4.2042341232299805, "learning_rate": 7.274653102940974e-06, "loss": 0.8624, "step": 7185 }, { "epoch": 1.9111702127659576, "grad_norm": 4.000115871429443, "learning_rate": 7.2738698447101645e-06, "loss": 0.8343, "step": 7186 }, { "epoch": 1.9114361702127658, "grad_norm": 4.323785305023193, "learning_rate": 7.273086516123145e-06, "loss": 0.7525, "step": 7187 }, { "epoch": 1.9117021276595745, "grad_norm": 3.9202396869659424, "learning_rate": 7.27230311720415e-06, "loss": 0.9014, "step": 7188 }, { "epoch": 1.911968085106383, "grad_norm": 3.924821615219116, "learning_rate": 7.271519647977422e-06, "loss": 0.8206, "step": 7189 }, { "epoch": 1.9122340425531914, "grad_norm": 3.9752979278564453, "learning_rate": 7.270736108467202e-06, "loss": 0.9627, "step": 7190 }, { "epoch": 1.9125, "grad_norm": 3.7932825088500977, "learning_rate": 7.269952498697734e-06, "loss": 0.8227, "step": 7191 }, { "epoch": 1.9127659574468086, "grad_norm": 4.589715480804443, "learning_rate": 7.2691688186932626e-06, "loss": 0.9176, "step": 7192 }, { "epoch": 1.913031914893617, "grad_norm": 4.00385856628418, "learning_rate": 7.268385068478037e-06, "loss": 0.7602, "step": 7193 }, { "epoch": 1.9132978723404257, "grad_norm": 4.291144847869873, "learning_rate": 7.267601248076307e-06, "loss": 1.0254, "step": 7194 }, { "epoch": 1.913563829787234, "grad_norm": 3.699037790298462, "learning_rate": 7.2668173575123234e-06, "loss": 0.8528, "step": 7195 }, { "epoch": 1.9138297872340426, "grad_norm": 3.936768054962158, "learning_rate": 7.266033396810343e-06, "loss": 0.7172, "step": 7196 }, { "epoch": 1.914095744680851, "grad_norm": 3.23809814453125, "learning_rate": 7.265249365994621e-06, "loss": 0.6519, "step": 7197 }, { "epoch": 1.9143617021276595, "grad_norm": 4.3691020011901855, "learning_rate": 7.2644652650894155e-06, "loss": 0.8097, "step": 7198 }, { "epoch": 1.9146276595744682, "grad_norm": 4.070173263549805, "learning_rate": 7.263681094118989e-06, "loss": 1.0137, "step": 7199 }, { "epoch": 1.9148936170212765, "grad_norm": 3.9889721870422363, "learning_rate": 7.262896853107606e-06, "loss": 0.8935, "step": 7200 }, { "epoch": 1.9151595744680852, "grad_norm": 3.6993491649627686, "learning_rate": 7.262112542079529e-06, "loss": 0.7445, "step": 7201 }, { "epoch": 1.9154255319148936, "grad_norm": 4.081962585449219, "learning_rate": 7.261328161059026e-06, "loss": 1.0239, "step": 7202 }, { "epoch": 1.915691489361702, "grad_norm": 4.065913677215576, "learning_rate": 7.260543710070369e-06, "loss": 0.9063, "step": 7203 }, { "epoch": 1.9159574468085108, "grad_norm": 3.7012364864349365, "learning_rate": 7.259759189137827e-06, "loss": 0.9102, "step": 7204 }, { "epoch": 1.916223404255319, "grad_norm": 4.341013431549072, "learning_rate": 7.258974598285674e-06, "loss": 0.8309, "step": 7205 }, { "epoch": 1.9164893617021277, "grad_norm": 3.8948628902435303, "learning_rate": 7.258189937538189e-06, "loss": 0.786, "step": 7206 }, { "epoch": 1.9167553191489362, "grad_norm": 4.040065288543701, "learning_rate": 7.257405206919649e-06, "loss": 0.7283, "step": 7207 }, { "epoch": 1.9170212765957446, "grad_norm": 3.775395631790161, "learning_rate": 7.256620406454333e-06, "loss": 0.7441, "step": 7208 }, { "epoch": 1.9172872340425533, "grad_norm": 4.277199745178223, "learning_rate": 7.255835536166525e-06, "loss": 0.8784, "step": 7209 }, { "epoch": 1.9175531914893615, "grad_norm": 4.311332702636719, "learning_rate": 7.25505059608051e-06, "loss": 0.911, "step": 7210 }, { "epoch": 1.9178191489361702, "grad_norm": 3.843778371810913, "learning_rate": 7.254265586220574e-06, "loss": 0.7906, "step": 7211 }, { "epoch": 1.9180851063829787, "grad_norm": 4.064030647277832, "learning_rate": 7.253480506611008e-06, "loss": 0.8904, "step": 7212 }, { "epoch": 1.9183510638297872, "grad_norm": 3.85115385055542, "learning_rate": 7.252695357276101e-06, "loss": 0.7148, "step": 7213 }, { "epoch": 1.9186170212765958, "grad_norm": 3.716801643371582, "learning_rate": 7.251910138240147e-06, "loss": 0.7956, "step": 7214 }, { "epoch": 1.9188829787234043, "grad_norm": 3.7296745777130127, "learning_rate": 7.251124849527442e-06, "loss": 0.8143, "step": 7215 }, { "epoch": 1.9191489361702128, "grad_norm": 3.9987385272979736, "learning_rate": 7.250339491162284e-06, "loss": 0.8333, "step": 7216 }, { "epoch": 1.9194148936170212, "grad_norm": 3.8190033435821533, "learning_rate": 7.2495540631689745e-06, "loss": 0.8476, "step": 7217 }, { "epoch": 1.9196808510638297, "grad_norm": 4.055121898651123, "learning_rate": 7.248768565571811e-06, "loss": 0.8605, "step": 7218 }, { "epoch": 1.9199468085106384, "grad_norm": 4.3670525550842285, "learning_rate": 7.247982998395102e-06, "loss": 0.8381, "step": 7219 }, { "epoch": 1.9202127659574468, "grad_norm": 4.680405139923096, "learning_rate": 7.247197361663152e-06, "loss": 0.9635, "step": 7220 }, { "epoch": 1.9204787234042553, "grad_norm": 4.1340460777282715, "learning_rate": 7.24641165540027e-06, "loss": 0.8125, "step": 7221 }, { "epoch": 1.920744680851064, "grad_norm": 4.003271102905273, "learning_rate": 7.245625879630767e-06, "loss": 0.8934, "step": 7222 }, { "epoch": 1.9210106382978722, "grad_norm": 4.222568035125732, "learning_rate": 7.244840034378955e-06, "loss": 1.0299, "step": 7223 }, { "epoch": 1.921276595744681, "grad_norm": 3.762643337249756, "learning_rate": 7.244054119669148e-06, "loss": 0.6798, "step": 7224 }, { "epoch": 1.9215425531914894, "grad_norm": 4.137721538543701, "learning_rate": 7.243268135525666e-06, "loss": 0.8147, "step": 7225 }, { "epoch": 1.9218085106382978, "grad_norm": 4.0250139236450195, "learning_rate": 7.242482081972827e-06, "loss": 0.8394, "step": 7226 }, { "epoch": 1.9220744680851065, "grad_norm": 3.7539706230163574, "learning_rate": 7.241695959034951e-06, "loss": 0.8293, "step": 7227 }, { "epoch": 1.9223404255319148, "grad_norm": 4.054415225982666, "learning_rate": 7.2409097667363635e-06, "loss": 0.9107, "step": 7228 }, { "epoch": 1.9226063829787234, "grad_norm": 4.380495548248291, "learning_rate": 7.2401235051013885e-06, "loss": 0.8641, "step": 7229 }, { "epoch": 1.922872340425532, "grad_norm": 4.061448097229004, "learning_rate": 7.239337174154357e-06, "loss": 0.8332, "step": 7230 }, { "epoch": 1.9231382978723404, "grad_norm": 4.095539093017578, "learning_rate": 7.2385507739195945e-06, "loss": 0.828, "step": 7231 }, { "epoch": 1.923404255319149, "grad_norm": 4.271059513092041, "learning_rate": 7.2377643044214375e-06, "loss": 0.8365, "step": 7232 }, { "epoch": 1.9236702127659573, "grad_norm": 3.9962894916534424, "learning_rate": 7.236977765684216e-06, "loss": 0.6932, "step": 7233 }, { "epoch": 1.923936170212766, "grad_norm": 4.267841339111328, "learning_rate": 7.236191157732272e-06, "loss": 0.8561, "step": 7234 }, { "epoch": 1.9242021276595744, "grad_norm": 4.299777030944824, "learning_rate": 7.2354044805899385e-06, "loss": 0.864, "step": 7235 }, { "epoch": 1.924468085106383, "grad_norm": 4.053724765777588, "learning_rate": 7.234617734281558e-06, "loss": 0.8643, "step": 7236 }, { "epoch": 1.9247340425531916, "grad_norm": 4.541396141052246, "learning_rate": 7.2338309188314745e-06, "loss": 0.793, "step": 7237 }, { "epoch": 1.925, "grad_norm": 4.2436676025390625, "learning_rate": 7.233044034264034e-06, "loss": 0.7894, "step": 7238 }, { "epoch": 1.9252659574468085, "grad_norm": 4.764181613922119, "learning_rate": 7.23225708060358e-06, "loss": 0.7979, "step": 7239 }, { "epoch": 1.925531914893617, "grad_norm": 4.301015377044678, "learning_rate": 7.2314700578744635e-06, "loss": 0.8022, "step": 7240 }, { "epoch": 1.9257978723404254, "grad_norm": 3.9735851287841797, "learning_rate": 7.230682966101038e-06, "loss": 0.7377, "step": 7241 }, { "epoch": 1.9260638297872341, "grad_norm": 4.120856285095215, "learning_rate": 7.229895805307654e-06, "loss": 0.7386, "step": 7242 }, { "epoch": 1.9263297872340426, "grad_norm": 4.618571758270264, "learning_rate": 7.229108575518668e-06, "loss": 0.8771, "step": 7243 }, { "epoch": 1.926595744680851, "grad_norm": 3.679917573928833, "learning_rate": 7.22832127675844e-06, "loss": 0.8137, "step": 7244 }, { "epoch": 1.9268617021276597, "grad_norm": 4.480624198913574, "learning_rate": 7.227533909051327e-06, "loss": 0.8955, "step": 7245 }, { "epoch": 1.927127659574468, "grad_norm": 3.715806722640991, "learning_rate": 7.226746472421692e-06, "loss": 0.8023, "step": 7246 }, { "epoch": 1.9273936170212767, "grad_norm": 4.008445739746094, "learning_rate": 7.2259589668939005e-06, "loss": 0.8584, "step": 7247 }, { "epoch": 1.9276595744680851, "grad_norm": 4.211793899536133, "learning_rate": 7.225171392492316e-06, "loss": 0.8412, "step": 7248 }, { "epoch": 1.9279255319148936, "grad_norm": 4.422094821929932, "learning_rate": 7.224383749241311e-06, "loss": 0.811, "step": 7249 }, { "epoch": 1.9281914893617023, "grad_norm": 3.894848108291626, "learning_rate": 7.223596037165252e-06, "loss": 0.9126, "step": 7250 }, { "epoch": 1.9284574468085105, "grad_norm": 3.9139139652252197, "learning_rate": 7.222808256288515e-06, "loss": 0.7837, "step": 7251 }, { "epoch": 1.9287234042553192, "grad_norm": 4.1469197273254395, "learning_rate": 7.222020406635474e-06, "loss": 0.7134, "step": 7252 }, { "epoch": 1.9289893617021276, "grad_norm": 3.5331952571868896, "learning_rate": 7.2212324882305045e-06, "loss": 0.7372, "step": 7253 }, { "epoch": 1.929255319148936, "grad_norm": 3.312333822250366, "learning_rate": 7.220444501097986e-06, "loss": 0.7583, "step": 7254 }, { "epoch": 1.9295212765957448, "grad_norm": 4.264598846435547, "learning_rate": 7.2196564452623015e-06, "loss": 0.8354, "step": 7255 }, { "epoch": 1.929787234042553, "grad_norm": 4.467483997344971, "learning_rate": 7.2188683207478326e-06, "loss": 0.8728, "step": 7256 }, { "epoch": 1.9300531914893617, "grad_norm": 3.850327730178833, "learning_rate": 7.218080127578966e-06, "loss": 0.8222, "step": 7257 }, { "epoch": 1.9303191489361702, "grad_norm": 3.970350980758667, "learning_rate": 7.217291865780089e-06, "loss": 0.8979, "step": 7258 }, { "epoch": 1.9305851063829786, "grad_norm": 3.9415476322174072, "learning_rate": 7.21650353537559e-06, "loss": 0.7552, "step": 7259 }, { "epoch": 1.9308510638297873, "grad_norm": 3.566114664077759, "learning_rate": 7.215715136389862e-06, "loss": 0.8683, "step": 7260 }, { "epoch": 1.9311170212765958, "grad_norm": 3.991467237472534, "learning_rate": 7.2149266688473005e-06, "loss": 0.7815, "step": 7261 }, { "epoch": 1.9313829787234043, "grad_norm": 4.0647406578063965, "learning_rate": 7.214138132772299e-06, "loss": 0.7483, "step": 7262 }, { "epoch": 1.9316489361702127, "grad_norm": 4.495807647705078, "learning_rate": 7.213349528189258e-06, "loss": 0.9067, "step": 7263 }, { "epoch": 1.9319148936170212, "grad_norm": 4.034248352050781, "learning_rate": 7.212560855122576e-06, "loss": 0.7541, "step": 7264 }, { "epoch": 1.9321808510638299, "grad_norm": 3.8755152225494385, "learning_rate": 7.211772113596656e-06, "loss": 0.8805, "step": 7265 }, { "epoch": 1.9324468085106383, "grad_norm": 3.655921220779419, "learning_rate": 7.210983303635901e-06, "loss": 0.7864, "step": 7266 }, { "epoch": 1.9327127659574468, "grad_norm": 4.281502723693848, "learning_rate": 7.210194425264723e-06, "loss": 0.9595, "step": 7267 }, { "epoch": 1.9329787234042555, "grad_norm": 3.8239359855651855, "learning_rate": 7.209405478507525e-06, "loss": 0.7896, "step": 7268 }, { "epoch": 1.9332446808510637, "grad_norm": 3.9340760707855225, "learning_rate": 7.20861646338872e-06, "loss": 0.855, "step": 7269 }, { "epoch": 1.9335106382978724, "grad_norm": 3.6993649005889893, "learning_rate": 7.207827379932724e-06, "loss": 0.774, "step": 7270 }, { "epoch": 1.9337765957446809, "grad_norm": 4.12832498550415, "learning_rate": 7.2070382281639466e-06, "loss": 0.8031, "step": 7271 }, { "epoch": 1.9340425531914893, "grad_norm": 3.675234079360962, "learning_rate": 7.206249008106808e-06, "loss": 0.7203, "step": 7272 }, { "epoch": 1.934308510638298, "grad_norm": 4.341015338897705, "learning_rate": 7.20545971978573e-06, "loss": 0.7099, "step": 7273 }, { "epoch": 1.9345744680851062, "grad_norm": 4.289004802703857, "learning_rate": 7.2046703632251295e-06, "loss": 0.8558, "step": 7274 }, { "epoch": 1.934840425531915, "grad_norm": 3.8868236541748047, "learning_rate": 7.203880938449432e-06, "loss": 0.8851, "step": 7275 }, { "epoch": 1.9351063829787234, "grad_norm": 4.085642337799072, "learning_rate": 7.2030914454830645e-06, "loss": 0.7872, "step": 7276 }, { "epoch": 1.9353723404255319, "grad_norm": 3.6767923831939697, "learning_rate": 7.202301884350454e-06, "loss": 0.712, "step": 7277 }, { "epoch": 1.9356382978723405, "grad_norm": 4.32539176940918, "learning_rate": 7.201512255076031e-06, "loss": 0.9707, "step": 7278 }, { "epoch": 1.9359042553191488, "grad_norm": 3.729510545730591, "learning_rate": 7.2007225576842255e-06, "loss": 0.8447, "step": 7279 }, { "epoch": 1.9361702127659575, "grad_norm": 4.127895832061768, "learning_rate": 7.1999327921994735e-06, "loss": 0.8129, "step": 7280 }, { "epoch": 1.936436170212766, "grad_norm": 3.7349631786346436, "learning_rate": 7.199142958646211e-06, "loss": 0.6886, "step": 7281 }, { "epoch": 1.9367021276595744, "grad_norm": 3.900869369506836, "learning_rate": 7.198353057048876e-06, "loss": 0.7183, "step": 7282 }, { "epoch": 1.936968085106383, "grad_norm": 4.21663761138916, "learning_rate": 7.197563087431909e-06, "loss": 0.9005, "step": 7283 }, { "epoch": 1.9372340425531915, "grad_norm": 3.992421865463257, "learning_rate": 7.196773049819753e-06, "loss": 0.8604, "step": 7284 }, { "epoch": 1.9375, "grad_norm": 4.140373229980469, "learning_rate": 7.195982944236853e-06, "loss": 0.9231, "step": 7285 }, { "epoch": 1.9377659574468085, "grad_norm": 3.9591143131256104, "learning_rate": 7.1951927707076545e-06, "loss": 0.9934, "step": 7286 }, { "epoch": 1.938031914893617, "grad_norm": 4.134740352630615, "learning_rate": 7.194402529256608e-06, "loss": 0.8869, "step": 7287 }, { "epoch": 1.9382978723404256, "grad_norm": 3.9935176372528076, "learning_rate": 7.193612219908161e-06, "loss": 0.7377, "step": 7288 }, { "epoch": 1.938563829787234, "grad_norm": 4.432157039642334, "learning_rate": 7.192821842686772e-06, "loss": 0.864, "step": 7289 }, { "epoch": 1.9388297872340425, "grad_norm": 4.096209526062012, "learning_rate": 7.1920313976168935e-06, "loss": 0.8539, "step": 7290 }, { "epoch": 1.9390957446808512, "grad_norm": 3.792664051055908, "learning_rate": 7.191240884722982e-06, "loss": 0.8195, "step": 7291 }, { "epoch": 1.9393617021276595, "grad_norm": 3.759690046310425, "learning_rate": 7.190450304029497e-06, "loss": 0.7395, "step": 7292 }, { "epoch": 1.9396276595744681, "grad_norm": 3.7826247215270996, "learning_rate": 7.1896596555609025e-06, "loss": 0.7206, "step": 7293 }, { "epoch": 1.9398936170212766, "grad_norm": 3.8327670097351074, "learning_rate": 7.1888689393416575e-06, "loss": 0.9116, "step": 7294 }, { "epoch": 1.940159574468085, "grad_norm": 3.965418815612793, "learning_rate": 7.188078155396232e-06, "loss": 0.8134, "step": 7295 }, { "epoch": 1.9404255319148938, "grad_norm": 3.9271137714385986, "learning_rate": 7.187287303749093e-06, "loss": 0.705, "step": 7296 }, { "epoch": 1.940691489361702, "grad_norm": 4.100310325622559, "learning_rate": 7.186496384424708e-06, "loss": 0.8471, "step": 7297 }, { "epoch": 1.9409574468085107, "grad_norm": 3.9107069969177246, "learning_rate": 7.185705397447552e-06, "loss": 0.8495, "step": 7298 }, { "epoch": 1.9412234042553191, "grad_norm": 4.238333225250244, "learning_rate": 7.1849143428420975e-06, "loss": 0.7926, "step": 7299 }, { "epoch": 1.9414893617021276, "grad_norm": 4.412265777587891, "learning_rate": 7.18412322063282e-06, "loss": 0.947, "step": 7300 }, { "epoch": 1.9417553191489363, "grad_norm": 3.686246156692505, "learning_rate": 7.183332030844199e-06, "loss": 0.7733, "step": 7301 }, { "epoch": 1.9420212765957445, "grad_norm": 3.924842596054077, "learning_rate": 7.182540773500715e-06, "loss": 0.9132, "step": 7302 }, { "epoch": 1.9422872340425532, "grad_norm": 3.5468335151672363, "learning_rate": 7.181749448626849e-06, "loss": 0.8032, "step": 7303 }, { "epoch": 1.9425531914893617, "grad_norm": 3.618908166885376, "learning_rate": 7.180958056247087e-06, "loss": 0.8473, "step": 7304 }, { "epoch": 1.9428191489361701, "grad_norm": 3.575326919555664, "learning_rate": 7.180166596385915e-06, "loss": 0.7703, "step": 7305 }, { "epoch": 1.9430851063829788, "grad_norm": 4.315759658813477, "learning_rate": 7.179375069067821e-06, "loss": 0.823, "step": 7306 }, { "epoch": 1.9433510638297873, "grad_norm": 3.9836225509643555, "learning_rate": 7.178583474317295e-06, "loss": 0.6672, "step": 7307 }, { "epoch": 1.9436170212765957, "grad_norm": 4.030239105224609, "learning_rate": 7.177791812158835e-06, "loss": 0.806, "step": 7308 }, { "epoch": 1.9438829787234042, "grad_norm": 3.8376708030700684, "learning_rate": 7.17700008261693e-06, "loss": 0.7224, "step": 7309 }, { "epoch": 1.9441489361702127, "grad_norm": 4.117557048797607, "learning_rate": 7.176208285716079e-06, "loss": 0.8359, "step": 7310 }, { "epoch": 1.9444148936170214, "grad_norm": 4.3215012550354, "learning_rate": 7.175416421480783e-06, "loss": 0.7143, "step": 7311 }, { "epoch": 1.9446808510638298, "grad_norm": 3.8996849060058594, "learning_rate": 7.174624489935541e-06, "loss": 0.806, "step": 7312 }, { "epoch": 1.9449468085106383, "grad_norm": 3.478804588317871, "learning_rate": 7.173832491104858e-06, "loss": 0.7916, "step": 7313 }, { "epoch": 1.945212765957447, "grad_norm": 3.8935012817382812, "learning_rate": 7.173040425013236e-06, "loss": 0.719, "step": 7314 }, { "epoch": 1.9454787234042552, "grad_norm": 3.9126412868499756, "learning_rate": 7.172248291685187e-06, "loss": 0.6975, "step": 7315 }, { "epoch": 1.945744680851064, "grad_norm": 3.790658712387085, "learning_rate": 7.171456091145217e-06, "loss": 0.8119, "step": 7316 }, { "epoch": 1.9460106382978724, "grad_norm": 4.477363109588623, "learning_rate": 7.170663823417839e-06, "loss": 0.8697, "step": 7317 }, { "epoch": 1.9462765957446808, "grad_norm": 4.502041816711426, "learning_rate": 7.1698714885275665e-06, "loss": 0.9479, "step": 7318 }, { "epoch": 1.9465425531914895, "grad_norm": 3.928950071334839, "learning_rate": 7.169079086498915e-06, "loss": 0.7123, "step": 7319 }, { "epoch": 1.9468085106382977, "grad_norm": 3.781550168991089, "learning_rate": 7.168286617356406e-06, "loss": 0.7275, "step": 7320 }, { "epoch": 1.9470744680851064, "grad_norm": 4.246979236602783, "learning_rate": 7.167494081124553e-06, "loss": 0.885, "step": 7321 }, { "epoch": 1.9473404255319149, "grad_norm": 4.124865531921387, "learning_rate": 7.166701477827882e-06, "loss": 0.8088, "step": 7322 }, { "epoch": 1.9476063829787233, "grad_norm": 4.21986198425293, "learning_rate": 7.165908807490916e-06, "loss": 0.9175, "step": 7323 }, { "epoch": 1.947872340425532, "grad_norm": 4.153756618499756, "learning_rate": 7.165116070138183e-06, "loss": 0.8633, "step": 7324 }, { "epoch": 1.9481382978723403, "grad_norm": 3.5365302562713623, "learning_rate": 7.164323265794209e-06, "loss": 0.8274, "step": 7325 }, { "epoch": 1.948404255319149, "grad_norm": 4.312306880950928, "learning_rate": 7.1635303944835246e-06, "loss": 0.847, "step": 7326 }, { "epoch": 1.9486702127659574, "grad_norm": 4.010374069213867, "learning_rate": 7.162737456230662e-06, "loss": 0.82, "step": 7327 }, { "epoch": 1.9489361702127659, "grad_norm": 5.155407905578613, "learning_rate": 7.161944451060157e-06, "loss": 0.9241, "step": 7328 }, { "epoch": 1.9492021276595746, "grad_norm": 3.665374279022217, "learning_rate": 7.161151378996545e-06, "loss": 0.8255, "step": 7329 }, { "epoch": 1.949468085106383, "grad_norm": 3.6932079792022705, "learning_rate": 7.1603582400643646e-06, "loss": 0.8187, "step": 7330 }, { "epoch": 1.9497340425531915, "grad_norm": 3.555961847305298, "learning_rate": 7.159565034288157e-06, "loss": 0.7523, "step": 7331 }, { "epoch": 1.95, "grad_norm": 4.505660533905029, "learning_rate": 7.158771761692464e-06, "loss": 0.7903, "step": 7332 }, { "epoch": 1.9502659574468084, "grad_norm": 3.616476058959961, "learning_rate": 7.157978422301832e-06, "loss": 0.8853, "step": 7333 }, { "epoch": 1.950531914893617, "grad_norm": 4.25620698928833, "learning_rate": 7.157185016140809e-06, "loss": 0.8566, "step": 7334 }, { "epoch": 1.9507978723404256, "grad_norm": 3.9593820571899414, "learning_rate": 7.156391543233938e-06, "loss": 0.7797, "step": 7335 }, { "epoch": 1.951063829787234, "grad_norm": 4.379816055297852, "learning_rate": 7.155598003605776e-06, "loss": 0.9148, "step": 7336 }, { "epoch": 1.9513297872340427, "grad_norm": 3.731823205947876, "learning_rate": 7.154804397280873e-06, "loss": 0.7223, "step": 7337 }, { "epoch": 1.951595744680851, "grad_norm": 3.8849217891693115, "learning_rate": 7.154010724283786e-06, "loss": 0.8446, "step": 7338 }, { "epoch": 1.9518617021276596, "grad_norm": 3.7477874755859375, "learning_rate": 7.15321698463907e-06, "loss": 0.6922, "step": 7339 }, { "epoch": 1.952127659574468, "grad_norm": 4.323108673095703, "learning_rate": 7.152423178371286e-06, "loss": 0.8153, "step": 7340 }, { "epoch": 1.9523936170212766, "grad_norm": 4.16124153137207, "learning_rate": 7.1516293055049944e-06, "loss": 0.8003, "step": 7341 }, { "epoch": 1.9526595744680852, "grad_norm": 4.236426830291748, "learning_rate": 7.150835366064759e-06, "loss": 0.7843, "step": 7342 }, { "epoch": 1.9529255319148935, "grad_norm": 3.637660026550293, "learning_rate": 7.1500413600751465e-06, "loss": 0.7665, "step": 7343 }, { "epoch": 1.9531914893617022, "grad_norm": 3.838202476501465, "learning_rate": 7.14924728756072e-06, "loss": 0.7723, "step": 7344 }, { "epoch": 1.9534574468085106, "grad_norm": 4.209107875823975, "learning_rate": 7.148453148546055e-06, "loss": 0.8646, "step": 7345 }, { "epoch": 1.953723404255319, "grad_norm": 3.9335439205169678, "learning_rate": 7.147658943055718e-06, "loss": 0.6881, "step": 7346 }, { "epoch": 1.9539893617021278, "grad_norm": 3.6025755405426025, "learning_rate": 7.1468646711142855e-06, "loss": 0.6567, "step": 7347 }, { "epoch": 1.954255319148936, "grad_norm": 3.8079092502593994, "learning_rate": 7.146070332746332e-06, "loss": 0.7122, "step": 7348 }, { "epoch": 1.9545212765957447, "grad_norm": 4.033806800842285, "learning_rate": 7.145275927976436e-06, "loss": 0.7522, "step": 7349 }, { "epoch": 1.9547872340425532, "grad_norm": 4.1563310623168945, "learning_rate": 7.144481456829178e-06, "loss": 0.7998, "step": 7350 }, { "epoch": 1.9550531914893616, "grad_norm": 4.061034202575684, "learning_rate": 7.143686919329138e-06, "loss": 0.9232, "step": 7351 }, { "epoch": 1.9553191489361703, "grad_norm": 4.174419403076172, "learning_rate": 7.1428923155009e-06, "loss": 0.6807, "step": 7352 }, { "epoch": 1.9555851063829788, "grad_norm": 3.6197104454040527, "learning_rate": 7.142097645369052e-06, "loss": 0.8129, "step": 7353 }, { "epoch": 1.9558510638297872, "grad_norm": 4.288638591766357, "learning_rate": 7.141302908958181e-06, "loss": 0.9342, "step": 7354 }, { "epoch": 1.9561170212765957, "grad_norm": 3.9184861183166504, "learning_rate": 7.140508106292876e-06, "loss": 0.7052, "step": 7355 }, { "epoch": 1.9563829787234042, "grad_norm": 4.214428901672363, "learning_rate": 7.1397132373977295e-06, "loss": 0.8679, "step": 7356 }, { "epoch": 1.9566489361702128, "grad_norm": 4.283886909484863, "learning_rate": 7.138918302297338e-06, "loss": 0.8816, "step": 7357 }, { "epoch": 1.9569148936170213, "grad_norm": 3.77843976020813, "learning_rate": 7.138123301016295e-06, "loss": 0.7901, "step": 7358 }, { "epoch": 1.9571808510638298, "grad_norm": 3.9347009658813477, "learning_rate": 7.137328233579201e-06, "loss": 0.7385, "step": 7359 }, { "epoch": 1.9574468085106385, "grad_norm": 3.9841034412384033, "learning_rate": 7.136533100010654e-06, "loss": 0.7738, "step": 7360 }, { "epoch": 1.9577127659574467, "grad_norm": 3.536179780960083, "learning_rate": 7.1357379003352565e-06, "loss": 0.8311, "step": 7361 }, { "epoch": 1.9579787234042554, "grad_norm": 4.386892318725586, "learning_rate": 7.134942634577615e-06, "loss": 0.9451, "step": 7362 }, { "epoch": 1.9582446808510638, "grad_norm": 3.738041877746582, "learning_rate": 7.1341473027623355e-06, "loss": 0.6454, "step": 7363 }, { "epoch": 1.9585106382978723, "grad_norm": 3.718473434448242, "learning_rate": 7.133351904914024e-06, "loss": 0.8613, "step": 7364 }, { "epoch": 1.958776595744681, "grad_norm": 4.3047661781311035, "learning_rate": 7.132556441057294e-06, "loss": 0.7499, "step": 7365 }, { "epoch": 1.9590425531914892, "grad_norm": 3.821338415145874, "learning_rate": 7.131760911216756e-06, "loss": 0.737, "step": 7366 }, { "epoch": 1.959308510638298, "grad_norm": 3.7964980602264404, "learning_rate": 7.130965315417027e-06, "loss": 0.8637, "step": 7367 }, { "epoch": 1.9595744680851064, "grad_norm": 3.9412569999694824, "learning_rate": 7.130169653682721e-06, "loss": 0.6788, "step": 7368 }, { "epoch": 1.9598404255319148, "grad_norm": 4.125255584716797, "learning_rate": 7.129373926038459e-06, "loss": 0.86, "step": 7369 }, { "epoch": 1.9601063829787235, "grad_norm": 3.7982115745544434, "learning_rate": 7.128578132508859e-06, "loss": 0.9386, "step": 7370 }, { "epoch": 1.9603723404255318, "grad_norm": 3.9143412113189697, "learning_rate": 7.1277822731185475e-06, "loss": 0.911, "step": 7371 }, { "epoch": 1.9606382978723405, "grad_norm": 4.226142883300781, "learning_rate": 7.126986347892146e-06, "loss": 0.7375, "step": 7372 }, { "epoch": 1.960904255319149, "grad_norm": 3.8393430709838867, "learning_rate": 7.126190356854283e-06, "loss": 0.8341, "step": 7373 }, { "epoch": 1.9611702127659574, "grad_norm": 4.1616926193237305, "learning_rate": 7.1253943000295865e-06, "loss": 0.8532, "step": 7374 }, { "epoch": 1.961436170212766, "grad_norm": 3.9134316444396973, "learning_rate": 7.12459817744269e-06, "loss": 0.7566, "step": 7375 }, { "epoch": 1.9617021276595743, "grad_norm": 3.930948495864868, "learning_rate": 7.123801989118223e-06, "loss": 0.7781, "step": 7376 }, { "epoch": 1.961968085106383, "grad_norm": 3.913886785507202, "learning_rate": 7.1230057350808234e-06, "loss": 0.8081, "step": 7377 }, { "epoch": 1.9622340425531914, "grad_norm": 4.381828308105469, "learning_rate": 7.122209415355125e-06, "loss": 0.9048, "step": 7378 }, { "epoch": 1.9625, "grad_norm": 3.839282512664795, "learning_rate": 7.121413029965769e-06, "loss": 0.7002, "step": 7379 }, { "epoch": 1.9627659574468086, "grad_norm": 4.018161773681641, "learning_rate": 7.120616578937397e-06, "loss": 0.803, "step": 7380 }, { "epoch": 1.963031914893617, "grad_norm": 4.220311164855957, "learning_rate": 7.1198200622946516e-06, "loss": 0.8337, "step": 7381 }, { "epoch": 1.9632978723404255, "grad_norm": 3.790156841278076, "learning_rate": 7.119023480062176e-06, "loss": 0.7224, "step": 7382 }, { "epoch": 1.9635638297872342, "grad_norm": 4.560417652130127, "learning_rate": 7.1182268322646205e-06, "loss": 0.8584, "step": 7383 }, { "epoch": 1.9638297872340424, "grad_norm": 4.3043999671936035, "learning_rate": 7.117430118926633e-06, "loss": 0.8294, "step": 7384 }, { "epoch": 1.9640957446808511, "grad_norm": 3.781405210494995, "learning_rate": 7.116633340072863e-06, "loss": 0.7876, "step": 7385 }, { "epoch": 1.9643617021276596, "grad_norm": 3.986027956008911, "learning_rate": 7.115836495727968e-06, "loss": 0.7581, "step": 7386 }, { "epoch": 1.964627659574468, "grad_norm": 3.9813320636749268, "learning_rate": 7.1150395859165985e-06, "loss": 0.9021, "step": 7387 }, { "epoch": 1.9648936170212767, "grad_norm": 4.043676376342773, "learning_rate": 7.114242610663415e-06, "loss": 0.791, "step": 7388 }, { "epoch": 1.965159574468085, "grad_norm": 4.014968395233154, "learning_rate": 7.113445569993076e-06, "loss": 0.7437, "step": 7389 }, { "epoch": 1.9654255319148937, "grad_norm": 3.8244807720184326, "learning_rate": 7.1126484639302425e-06, "loss": 0.7376, "step": 7390 }, { "epoch": 1.9656914893617021, "grad_norm": 3.804473400115967, "learning_rate": 7.111851292499579e-06, "loss": 0.8358, "step": 7391 }, { "epoch": 1.9659574468085106, "grad_norm": 3.598792552947998, "learning_rate": 7.111054055725749e-06, "loss": 0.7728, "step": 7392 }, { "epoch": 1.9662234042553193, "grad_norm": 4.2588677406311035, "learning_rate": 7.110256753633421e-06, "loss": 0.884, "step": 7393 }, { "epoch": 1.9664893617021275, "grad_norm": 3.7859714031219482, "learning_rate": 7.109459386247265e-06, "loss": 0.6813, "step": 7394 }, { "epoch": 1.9667553191489362, "grad_norm": 4.303823471069336, "learning_rate": 7.108661953591953e-06, "loss": 0.9044, "step": 7395 }, { "epoch": 1.9670212765957447, "grad_norm": 3.953003406524658, "learning_rate": 7.107864455692156e-06, "loss": 0.7632, "step": 7396 }, { "epoch": 1.9672872340425531, "grad_norm": 4.125672817230225, "learning_rate": 7.107066892572552e-06, "loss": 0.7153, "step": 7397 }, { "epoch": 1.9675531914893618, "grad_norm": 4.01138973236084, "learning_rate": 7.106269264257817e-06, "loss": 0.8052, "step": 7398 }, { "epoch": 1.96781914893617, "grad_norm": 3.7055439949035645, "learning_rate": 7.10547157077263e-06, "loss": 0.7684, "step": 7399 }, { "epoch": 1.9680851063829787, "grad_norm": 4.636490821838379, "learning_rate": 7.104673812141676e-06, "loss": 0.7504, "step": 7400 }, { "epoch": 1.9683510638297872, "grad_norm": 3.961894989013672, "learning_rate": 7.103875988389636e-06, "loss": 0.9316, "step": 7401 }, { "epoch": 1.9686170212765957, "grad_norm": 3.978306770324707, "learning_rate": 7.103078099541194e-06, "loss": 0.8276, "step": 7402 }, { "epoch": 1.9688829787234043, "grad_norm": 3.9166336059570312, "learning_rate": 7.102280145621041e-06, "loss": 0.7308, "step": 7403 }, { "epoch": 1.9691489361702128, "grad_norm": 3.680129289627075, "learning_rate": 7.101482126653865e-06, "loss": 0.8355, "step": 7404 }, { "epoch": 1.9694148936170213, "grad_norm": 4.1183857917785645, "learning_rate": 7.1006840426643576e-06, "loss": 0.7782, "step": 7405 }, { "epoch": 1.96968085106383, "grad_norm": 4.286891460418701, "learning_rate": 7.099885893677213e-06, "loss": 0.8094, "step": 7406 }, { "epoch": 1.9699468085106382, "grad_norm": 4.037398338317871, "learning_rate": 7.099087679717127e-06, "loss": 0.8141, "step": 7407 }, { "epoch": 1.9702127659574469, "grad_norm": 3.8752505779266357, "learning_rate": 7.098289400808795e-06, "loss": 0.7824, "step": 7408 }, { "epoch": 1.9704787234042553, "grad_norm": 3.7574338912963867, "learning_rate": 7.0974910569769195e-06, "loss": 0.6398, "step": 7409 }, { "epoch": 1.9707446808510638, "grad_norm": 3.918271064758301, "learning_rate": 7.096692648246203e-06, "loss": 0.7949, "step": 7410 }, { "epoch": 1.9710106382978725, "grad_norm": 4.124891757965088, "learning_rate": 7.095894174641345e-06, "loss": 0.9578, "step": 7411 }, { "epoch": 1.9712765957446807, "grad_norm": 3.764817953109741, "learning_rate": 7.0950956361870536e-06, "loss": 0.8013, "step": 7412 }, { "epoch": 1.9715425531914894, "grad_norm": 4.22829008102417, "learning_rate": 7.094297032908037e-06, "loss": 0.7897, "step": 7413 }, { "epoch": 1.9718085106382979, "grad_norm": 4.174428462982178, "learning_rate": 7.093498364829006e-06, "loss": 0.8182, "step": 7414 }, { "epoch": 1.9720744680851063, "grad_norm": 4.265493392944336, "learning_rate": 7.09269963197467e-06, "loss": 0.7067, "step": 7415 }, { "epoch": 1.972340425531915, "grad_norm": 3.417632579803467, "learning_rate": 7.091900834369743e-06, "loss": 0.6767, "step": 7416 }, { "epoch": 1.9726063829787233, "grad_norm": 3.931145429611206, "learning_rate": 7.09110197203894e-06, "loss": 0.7581, "step": 7417 }, { "epoch": 1.972872340425532, "grad_norm": 3.808061361312866, "learning_rate": 7.090303045006983e-06, "loss": 0.88, "step": 7418 }, { "epoch": 1.9731382978723404, "grad_norm": 4.074621677398682, "learning_rate": 7.089504053298587e-06, "loss": 0.8391, "step": 7419 }, { "epoch": 1.9734042553191489, "grad_norm": 3.7446646690368652, "learning_rate": 7.0887049969384756e-06, "loss": 0.778, "step": 7420 }, { "epoch": 1.9736702127659576, "grad_norm": 4.311694622039795, "learning_rate": 7.087905875951373e-06, "loss": 0.6362, "step": 7421 }, { "epoch": 1.9739361702127658, "grad_norm": 3.7492148876190186, "learning_rate": 7.087106690362003e-06, "loss": 0.85, "step": 7422 }, { "epoch": 1.9742021276595745, "grad_norm": 3.8154044151306152, "learning_rate": 7.086307440195096e-06, "loss": 0.8229, "step": 7423 }, { "epoch": 1.974468085106383, "grad_norm": 3.8786826133728027, "learning_rate": 7.085508125475381e-06, "loss": 0.8001, "step": 7424 }, { "epoch": 1.9747340425531914, "grad_norm": 3.972696304321289, "learning_rate": 7.084708746227589e-06, "loss": 0.9101, "step": 7425 }, { "epoch": 1.975, "grad_norm": 4.224587440490723, "learning_rate": 7.083909302476453e-06, "loss": 0.7869, "step": 7426 }, { "epoch": 1.9752659574468086, "grad_norm": 3.700507164001465, "learning_rate": 7.08310979424671e-06, "loss": 0.7123, "step": 7427 }, { "epoch": 1.975531914893617, "grad_norm": 3.8128812313079834, "learning_rate": 7.082310221563098e-06, "loss": 0.7205, "step": 7428 }, { "epoch": 1.9757978723404257, "grad_norm": 4.028718948364258, "learning_rate": 7.081510584450355e-06, "loss": 0.9249, "step": 7429 }, { "epoch": 1.976063829787234, "grad_norm": 3.798619270324707, "learning_rate": 7.080710882933225e-06, "loss": 0.7412, "step": 7430 }, { "epoch": 1.9763297872340426, "grad_norm": 4.599943161010742, "learning_rate": 7.07991111703645e-06, "loss": 0.8713, "step": 7431 }, { "epoch": 1.976595744680851, "grad_norm": 4.6581854820251465, "learning_rate": 7.079111286784775e-06, "loss": 0.8165, "step": 7432 }, { "epoch": 1.9768617021276595, "grad_norm": 3.9097495079040527, "learning_rate": 7.078311392202951e-06, "loss": 0.7803, "step": 7433 }, { "epoch": 1.9771276595744682, "grad_norm": 4.4464802742004395, "learning_rate": 7.077511433315725e-06, "loss": 0.9244, "step": 7434 }, { "epoch": 1.9773936170212765, "grad_norm": 4.222725868225098, "learning_rate": 7.076711410147849e-06, "loss": 0.9159, "step": 7435 }, { "epoch": 1.9776595744680852, "grad_norm": 3.8437206745147705, "learning_rate": 7.075911322724077e-06, "loss": 0.7657, "step": 7436 }, { "epoch": 1.9779255319148936, "grad_norm": 3.891757011413574, "learning_rate": 7.075111171069165e-06, "loss": 0.574, "step": 7437 }, { "epoch": 1.978191489361702, "grad_norm": 3.8077917098999023, "learning_rate": 7.074310955207869e-06, "loss": 0.713, "step": 7438 }, { "epoch": 1.9784574468085108, "grad_norm": 3.8292224407196045, "learning_rate": 7.073510675164952e-06, "loss": 0.8645, "step": 7439 }, { "epoch": 1.978723404255319, "grad_norm": 3.931783437728882, "learning_rate": 7.072710330965171e-06, "loss": 0.7588, "step": 7440 }, { "epoch": 1.9789893617021277, "grad_norm": 3.6988885402679443, "learning_rate": 7.071909922633293e-06, "loss": 0.8146, "step": 7441 }, { "epoch": 1.9792553191489362, "grad_norm": 3.7726998329162598, "learning_rate": 7.071109450194085e-06, "loss": 0.8082, "step": 7442 }, { "epoch": 1.9795212765957446, "grad_norm": 4.304258346557617, "learning_rate": 7.070308913672309e-06, "loss": 0.8142, "step": 7443 }, { "epoch": 1.9797872340425533, "grad_norm": 3.6615335941314697, "learning_rate": 7.069508313092739e-06, "loss": 0.7409, "step": 7444 }, { "epoch": 1.9800531914893615, "grad_norm": 4.02711296081543, "learning_rate": 7.068707648480145e-06, "loss": 0.8662, "step": 7445 }, { "epoch": 1.9803191489361702, "grad_norm": 3.48976993560791, "learning_rate": 7.067906919859301e-06, "loss": 0.7655, "step": 7446 }, { "epoch": 1.9805851063829787, "grad_norm": 4.168039321899414, "learning_rate": 7.067106127254983e-06, "loss": 0.8516, "step": 7447 }, { "epoch": 1.9808510638297872, "grad_norm": 3.757882833480835, "learning_rate": 7.066305270691965e-06, "loss": 0.7557, "step": 7448 }, { "epoch": 1.9811170212765958, "grad_norm": 4.09896183013916, "learning_rate": 7.065504350195031e-06, "loss": 0.7227, "step": 7449 }, { "epoch": 1.9813829787234043, "grad_norm": 3.6728386878967285, "learning_rate": 7.064703365788961e-06, "loss": 0.8711, "step": 7450 }, { "epoch": 1.9816489361702128, "grad_norm": 4.336848735809326, "learning_rate": 7.063902317498537e-06, "loss": 0.8427, "step": 7451 }, { "epoch": 1.9819148936170212, "grad_norm": 3.715324640274048, "learning_rate": 7.063101205348546e-06, "loss": 0.8392, "step": 7452 }, { "epoch": 1.9821808510638297, "grad_norm": 3.8472211360931396, "learning_rate": 7.062300029363775e-06, "loss": 0.8386, "step": 7453 }, { "epoch": 1.9824468085106384, "grad_norm": 4.4139533042907715, "learning_rate": 7.061498789569012e-06, "loss": 0.7736, "step": 7454 }, { "epoch": 1.9827127659574468, "grad_norm": 4.422085285186768, "learning_rate": 7.06069748598905e-06, "loss": 0.8175, "step": 7455 }, { "epoch": 1.9829787234042553, "grad_norm": 4.3708696365356445, "learning_rate": 7.059896118648681e-06, "loss": 0.8802, "step": 7456 }, { "epoch": 1.983244680851064, "grad_norm": 3.6612091064453125, "learning_rate": 7.059094687572701e-06, "loss": 0.73, "step": 7457 }, { "epoch": 1.9835106382978722, "grad_norm": 4.2330780029296875, "learning_rate": 7.058293192785907e-06, "loss": 0.7638, "step": 7458 }, { "epoch": 1.983776595744681, "grad_norm": 4.289926528930664, "learning_rate": 7.0574916343130995e-06, "loss": 0.7821, "step": 7459 }, { "epoch": 1.9840425531914894, "grad_norm": 4.122095108032227, "learning_rate": 7.0566900121790775e-06, "loss": 0.9189, "step": 7460 }, { "epoch": 1.9843085106382978, "grad_norm": 3.974686861038208, "learning_rate": 7.055888326408645e-06, "loss": 0.7231, "step": 7461 }, { "epoch": 1.9845744680851065, "grad_norm": 3.515641450881958, "learning_rate": 7.055086577026608e-06, "loss": 0.8235, "step": 7462 }, { "epoch": 1.9848404255319148, "grad_norm": 4.1052565574646, "learning_rate": 7.0542847640577725e-06, "loss": 0.7862, "step": 7463 }, { "epoch": 1.9851063829787234, "grad_norm": 3.889636516571045, "learning_rate": 7.0534828875269466e-06, "loss": 0.7854, "step": 7464 }, { "epoch": 1.985372340425532, "grad_norm": 4.208193778991699, "learning_rate": 7.052680947458944e-06, "loss": 0.7854, "step": 7465 }, { "epoch": 1.9856382978723404, "grad_norm": 4.233124732971191, "learning_rate": 7.051878943878575e-06, "loss": 0.7895, "step": 7466 }, { "epoch": 1.985904255319149, "grad_norm": 4.030735969543457, "learning_rate": 7.051076876810656e-06, "loss": 0.8551, "step": 7467 }, { "epoch": 1.9861702127659573, "grad_norm": 3.666236639022827, "learning_rate": 7.050274746280005e-06, "loss": 0.7758, "step": 7468 }, { "epoch": 1.986436170212766, "grad_norm": 3.7510082721710205, "learning_rate": 7.0494725523114375e-06, "loss": 0.9323, "step": 7469 }, { "epoch": 1.9867021276595744, "grad_norm": 3.9435558319091797, "learning_rate": 7.048670294929777e-06, "loss": 0.9059, "step": 7470 }, { "epoch": 1.986968085106383, "grad_norm": 3.691020965576172, "learning_rate": 7.047867974159845e-06, "loss": 0.7602, "step": 7471 }, { "epoch": 1.9872340425531916, "grad_norm": 3.697643518447876, "learning_rate": 7.047065590026467e-06, "loss": 0.7624, "step": 7472 }, { "epoch": 1.9875, "grad_norm": 3.759286880493164, "learning_rate": 7.04626314255447e-06, "loss": 0.8639, "step": 7473 }, { "epoch": 1.9877659574468085, "grad_norm": 4.054465293884277, "learning_rate": 7.045460631768684e-06, "loss": 0.7268, "step": 7474 }, { "epoch": 1.988031914893617, "grad_norm": 4.61219596862793, "learning_rate": 7.0446580576939346e-06, "loss": 0.9591, "step": 7475 }, { "epoch": 1.9882978723404254, "grad_norm": 4.135398864746094, "learning_rate": 7.04385542035506e-06, "loss": 0.9273, "step": 7476 }, { "epoch": 1.9885638297872341, "grad_norm": 3.8725779056549072, "learning_rate": 7.043052719776891e-06, "loss": 0.803, "step": 7477 }, { "epoch": 1.9888297872340426, "grad_norm": 3.9959404468536377, "learning_rate": 7.042249955984265e-06, "loss": 0.8572, "step": 7478 }, { "epoch": 1.989095744680851, "grad_norm": 3.542355537414551, "learning_rate": 7.041447129002023e-06, "loss": 0.8041, "step": 7479 }, { "epoch": 1.9893617021276597, "grad_norm": 4.780427932739258, "learning_rate": 7.0406442388550016e-06, "loss": 0.88, "step": 7480 }, { "epoch": 1.989627659574468, "grad_norm": 3.5344386100769043, "learning_rate": 7.039841285568045e-06, "loss": 0.7503, "step": 7481 }, { "epoch": 1.9898936170212767, "grad_norm": 3.8678970336914062, "learning_rate": 7.039038269165999e-06, "loss": 0.74, "step": 7482 }, { "epoch": 1.9901595744680851, "grad_norm": 3.366485834121704, "learning_rate": 7.038235189673706e-06, "loss": 0.7804, "step": 7483 }, { "epoch": 1.9904255319148936, "grad_norm": 3.5538713932037354, "learning_rate": 7.037432047116018e-06, "loss": 0.7362, "step": 7484 }, { "epoch": 1.9906914893617023, "grad_norm": 4.539484977722168, "learning_rate": 7.036628841517783e-06, "loss": 0.8812, "step": 7485 }, { "epoch": 1.9909574468085105, "grad_norm": 3.830280065536499, "learning_rate": 7.035825572903854e-06, "loss": 0.809, "step": 7486 }, { "epoch": 1.9912234042553192, "grad_norm": 4.038280963897705, "learning_rate": 7.035022241299083e-06, "loss": 0.7987, "step": 7487 }, { "epoch": 1.9914893617021276, "grad_norm": 4.29449462890625, "learning_rate": 7.034218846728331e-06, "loss": 0.8703, "step": 7488 }, { "epoch": 1.991755319148936, "grad_norm": 4.56672477722168, "learning_rate": 7.033415389216452e-06, "loss": 0.9195, "step": 7489 }, { "epoch": 1.9920212765957448, "grad_norm": 4.10626745223999, "learning_rate": 7.032611868788306e-06, "loss": 0.7476, "step": 7490 }, { "epoch": 1.992287234042553, "grad_norm": 3.6163523197174072, "learning_rate": 7.031808285468756e-06, "loss": 0.8082, "step": 7491 }, { "epoch": 1.9925531914893617, "grad_norm": 4.114681243896484, "learning_rate": 7.031004639282666e-06, "loss": 0.9355, "step": 7492 }, { "epoch": 1.9928191489361702, "grad_norm": 3.9397499561309814, "learning_rate": 7.0302009302549e-06, "loss": 0.7364, "step": 7493 }, { "epoch": 1.9930851063829786, "grad_norm": 3.4797003269195557, "learning_rate": 7.029397158410329e-06, "loss": 0.8413, "step": 7494 }, { "epoch": 1.9933510638297873, "grad_norm": 4.215932369232178, "learning_rate": 7.028593323773819e-06, "loss": 0.8095, "step": 7495 }, { "epoch": 1.9936170212765958, "grad_norm": 3.694060802459717, "learning_rate": 7.027789426370244e-06, "loss": 0.8051, "step": 7496 }, { "epoch": 1.9938829787234043, "grad_norm": 4.0490875244140625, "learning_rate": 7.026985466224477e-06, "loss": 0.874, "step": 7497 }, { "epoch": 1.9941489361702127, "grad_norm": 4.0154194831848145, "learning_rate": 7.026181443361392e-06, "loss": 0.807, "step": 7498 }, { "epoch": 1.9944148936170212, "grad_norm": 3.8070061206817627, "learning_rate": 7.025377357805867e-06, "loss": 0.8078, "step": 7499 }, { "epoch": 1.9946808510638299, "grad_norm": 4.185990810394287, "learning_rate": 7.024573209582783e-06, "loss": 0.7529, "step": 7500 }, { "epoch": 1.9946808510638299, "eval_loss": 1.260877251625061, "eval_runtime": 13.905, "eval_samples_per_second": 28.767, "eval_steps_per_second": 3.596, "step": 7500 }, { "epoch": 1.9949468085106383, "grad_norm": 3.18033504486084, "learning_rate": 7.023768998717022e-06, "loss": 0.7159, "step": 7501 }, { "epoch": 1.9952127659574468, "grad_norm": 3.839970111846924, "learning_rate": 7.022964725233463e-06, "loss": 0.7902, "step": 7502 }, { "epoch": 1.9954787234042555, "grad_norm": 4.011384963989258, "learning_rate": 7.022160389156995e-06, "loss": 0.7596, "step": 7503 }, { "epoch": 1.9957446808510637, "grad_norm": 3.67543888092041, "learning_rate": 7.0213559905125016e-06, "loss": 0.7987, "step": 7504 }, { "epoch": 1.9960106382978724, "grad_norm": 4.240528583526611, "learning_rate": 7.020551529324877e-06, "loss": 0.8651, "step": 7505 }, { "epoch": 1.9962765957446809, "grad_norm": 3.9020180702209473, "learning_rate": 7.0197470056190075e-06, "loss": 0.9205, "step": 7506 }, { "epoch": 1.9965425531914893, "grad_norm": 4.0633368492126465, "learning_rate": 7.0189424194197875e-06, "loss": 0.8294, "step": 7507 }, { "epoch": 1.996808510638298, "grad_norm": 3.88988995552063, "learning_rate": 7.018137770752114e-06, "loss": 0.861, "step": 7508 }, { "epoch": 1.9970744680851062, "grad_norm": 3.5177197456359863, "learning_rate": 7.01733305964088e-06, "loss": 0.772, "step": 7509 }, { "epoch": 1.997340425531915, "grad_norm": 3.661116123199463, "learning_rate": 7.016528286110986e-06, "loss": 0.7985, "step": 7510 }, { "epoch": 1.9976063829787234, "grad_norm": 4.28385591506958, "learning_rate": 7.015723450187334e-06, "loss": 0.9045, "step": 7511 }, { "epoch": 1.9978723404255319, "grad_norm": 3.899296522140503, "learning_rate": 7.014918551894824e-06, "loss": 0.7558, "step": 7512 }, { "epoch": 1.9981382978723405, "grad_norm": 3.9070241451263428, "learning_rate": 7.014113591258361e-06, "loss": 0.8287, "step": 7513 }, { "epoch": 1.9984042553191488, "grad_norm": 3.7345831394195557, "learning_rate": 7.013308568302855e-06, "loss": 0.781, "step": 7514 }, { "epoch": 1.9986702127659575, "grad_norm": 3.6665847301483154, "learning_rate": 7.012503483053209e-06, "loss": 0.9715, "step": 7515 }, { "epoch": 1.998936170212766, "grad_norm": 3.48984956741333, "learning_rate": 7.011698335534336e-06, "loss": 0.6823, "step": 7516 }, { "epoch": 1.9992021276595744, "grad_norm": 3.7711336612701416, "learning_rate": 7.01089312577115e-06, "loss": 0.8192, "step": 7517 }, { "epoch": 1.999468085106383, "grad_norm": 4.02569580078125, "learning_rate": 7.0100878537885605e-06, "loss": 0.856, "step": 7518 }, { "epoch": 1.9997340425531915, "grad_norm": 4.044494152069092, "learning_rate": 7.009282519611488e-06, "loss": 0.8349, "step": 7519 }, { "epoch": 2.0, "grad_norm": 3.897979259490967, "learning_rate": 7.008477123264849e-06, "loss": 0.6436, "step": 7520 } ], "logging_steps": 1.0, "max_steps": 18800, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.4371418007171236e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }