{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 200, "global_step": 1215, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2e-05, "loss": 2.8335, "step": 1 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 2.752, "step": 2 }, { "epoch": 0.01, "learning_rate": 6e-05, "loss": 2.7803, "step": 3 }, { "epoch": 0.01, "learning_rate": 8e-05, "loss": 2.5798, "step": 4 }, { "epoch": 0.01, "learning_rate": 0.0001, "loss": 2.7038, "step": 5 }, { "epoch": 0.01, "learning_rate": 0.00012, "loss": 2.1478, "step": 6 }, { "epoch": 0.02, "learning_rate": 0.00014, "loss": 1.7468, "step": 7 }, { "epoch": 0.02, "learning_rate": 0.00016, "loss": 1.6426, "step": 8 }, { "epoch": 0.02, "learning_rate": 0.00018, "loss": 0.9787, "step": 9 }, { "epoch": 0.02, "learning_rate": 0.0002, "loss": 0.7736, "step": 10 }, { "epoch": 0.03, "learning_rate": 0.00019999997902092877, "loss": 0.5849, "step": 11 }, { "epoch": 0.03, "learning_rate": 0.00019999991608372393, "loss": 0.4335, "step": 12 }, { "epoch": 0.03, "learning_rate": 0.00019999981118841182, "loss": 0.3929, "step": 13 }, { "epoch": 0.03, "learning_rate": 0.00019999966433503652, "loss": 0.1443, "step": 14 }, { "epoch": 0.04, "learning_rate": 0.00019999947552365961, "loss": 0.1243, "step": 15 }, { "epoch": 0.04, "learning_rate": 0.0001999992447543603, "loss": 0.4306, "step": 16 }, { "epoch": 0.04, "learning_rate": 0.00019999897202723545, "loss": 0.2558, "step": 17 }, { "epoch": 0.04, "learning_rate": 0.00019999865734239946, "loss": 0.1647, "step": 18 }, { "epoch": 0.05, "learning_rate": 0.0001999983006999844, "loss": 0.191, "step": 19 }, { "epoch": 0.05, "learning_rate": 0.00019999790210013988, "loss": 0.259, "step": 20 }, { "epoch": 0.05, "learning_rate": 0.00019999746154303317, "loss": 0.0401, "step": 21 }, { "epoch": 0.05, "learning_rate": 0.0001999969790288491, "loss": 0.0613, "step": 22 }, { "epoch": 0.06, "learning_rate": 0.00019999645455779014, "loss": 0.2135, "step": 23 }, { "epoch": 0.06, "learning_rate": 0.00019999588813007633, "loss": 0.0816, "step": 24 }, { "epoch": 0.06, "learning_rate": 0.0001999952797459453, "loss": 0.0543, "step": 25 }, { "epoch": 0.06, "learning_rate": 0.00019999462940565243, "loss": 0.1945, "step": 26 }, { "epoch": 0.07, "learning_rate": 0.0001999939371094705, "loss": 0.0461, "step": 27 }, { "epoch": 0.07, "learning_rate": 0.00019999320285769, "loss": 0.1266, "step": 28 }, { "epoch": 0.07, "learning_rate": 0.000199992426650619, "loss": 0.0274, "step": 29 }, { "epoch": 0.07, "learning_rate": 0.0001999916084885832, "loss": 0.1039, "step": 30 }, { "epoch": 0.08, "learning_rate": 0.00019999074837192589, "loss": 0.0169, "step": 31 }, { "epoch": 0.08, "learning_rate": 0.00019998984630100792, "loss": 0.1045, "step": 32 }, { "epoch": 0.08, "learning_rate": 0.00019998890227620783, "loss": 0.0236, "step": 33 }, { "epoch": 0.08, "learning_rate": 0.0001999879162979217, "loss": 0.0125, "step": 34 }, { "epoch": 0.09, "learning_rate": 0.00019998688836656323, "loss": 0.0317, "step": 35 }, { "epoch": 0.09, "learning_rate": 0.0001999858184825637, "loss": 0.0216, "step": 36 }, { "epoch": 0.09, "learning_rate": 0.00019998470664637203, "loss": 0.0583, "step": 37 }, { "epoch": 0.09, "learning_rate": 0.00019998355285845475, "loss": 0.0534, "step": 38 }, { "epoch": 0.1, "learning_rate": 0.0001999823571192959, "loss": 0.1213, "step": 39 }, { "epoch": 0.1, "learning_rate": 0.0001999811194293973, "loss": 0.2095, "step": 40 }, { "epoch": 0.1, "learning_rate": 0.00019997983978927813, "loss": 0.0122, "step": 41 }, { "epoch": 0.1, "learning_rate": 0.00019997851819947537, "loss": 0.2262, "step": 42 }, { "epoch": 0.11, "learning_rate": 0.00019997715466054357, "loss": 0.0329, "step": 43 }, { "epoch": 0.11, "learning_rate": 0.00019997574917305478, "loss": 0.0907, "step": 44 }, { "epoch": 0.11, "learning_rate": 0.00019997430173759875, "loss": 0.0111, "step": 45 }, { "epoch": 0.11, "learning_rate": 0.00019997281235478278, "loss": 0.1297, "step": 46 }, { "epoch": 0.12, "learning_rate": 0.00019997128102523183, "loss": 0.0805, "step": 47 }, { "epoch": 0.12, "learning_rate": 0.00019996970774958836, "loss": 0.0527, "step": 48 }, { "epoch": 0.12, "learning_rate": 0.00019996809252851251, "loss": 0.0117, "step": 49 }, { "epoch": 0.12, "learning_rate": 0.00019996643536268204, "loss": 0.0287, "step": 50 }, { "epoch": 0.13, "learning_rate": 0.0001999647362527922, "loss": 0.0106, "step": 51 }, { "epoch": 0.13, "learning_rate": 0.0001999629951995559, "loss": 0.0564, "step": 52 }, { "epoch": 0.13, "learning_rate": 0.00019996121220370374, "loss": 0.0318, "step": 53 }, { "epoch": 0.13, "learning_rate": 0.00019995938726598373, "loss": 0.0829, "step": 54 }, { "epoch": 0.14, "learning_rate": 0.00019995752038716168, "loss": 0.0564, "step": 55 }, { "epoch": 0.14, "learning_rate": 0.00019995561156802079, "loss": 0.0852, "step": 56 }, { "epoch": 0.14, "learning_rate": 0.00019995366080936204, "loss": 0.0146, "step": 57 }, { "epoch": 0.14, "learning_rate": 0.0001999516681120039, "loss": 0.0446, "step": 58 }, { "epoch": 0.15, "learning_rate": 0.00019994963347678247, "loss": 0.0103, "step": 59 }, { "epoch": 0.15, "learning_rate": 0.00019994755690455152, "loss": 0.0147, "step": 60 }, { "epoch": 0.15, "learning_rate": 0.00019994543839618221, "loss": 0.0479, "step": 61 }, { "epoch": 0.15, "learning_rate": 0.0001999432779525635, "loss": 0.0028, "step": 62 }, { "epoch": 0.16, "learning_rate": 0.0001999410755746019, "loss": 0.0316, "step": 63 }, { "epoch": 0.16, "learning_rate": 0.0001999388312632214, "loss": 0.113, "step": 64 }, { "epoch": 0.16, "learning_rate": 0.0001999365450193638, "loss": 0.0148, "step": 65 }, { "epoch": 0.16, "learning_rate": 0.00019993421684398824, "loss": 0.0437, "step": 66 }, { "epoch": 0.17, "learning_rate": 0.00019993184673807162, "loss": 0.0287, "step": 67 }, { "epoch": 0.17, "learning_rate": 0.00019992943470260844, "loss": 0.0088, "step": 68 }, { "epoch": 0.17, "learning_rate": 0.00019992698073861064, "loss": 0.0971, "step": 69 }, { "epoch": 0.17, "learning_rate": 0.00019992448484710797, "loss": 0.0557, "step": 70 }, { "epoch": 0.18, "learning_rate": 0.0001999219470291476, "loss": 0.0609, "step": 71 }, { "epoch": 0.18, "learning_rate": 0.00019991936728579437, "loss": 0.0033, "step": 72 }, { "epoch": 0.18, "learning_rate": 0.0001999167456181307, "loss": 0.0853, "step": 73 }, { "epoch": 0.18, "learning_rate": 0.00019991408202725655, "loss": 0.0019, "step": 74 }, { "epoch": 0.19, "learning_rate": 0.00019991137651428957, "loss": 0.0121, "step": 75 }, { "epoch": 0.19, "learning_rate": 0.0001999086290803649, "loss": 0.016, "step": 76 }, { "epoch": 0.19, "learning_rate": 0.00019990583972663534, "loss": 0.0014, "step": 77 }, { "epoch": 0.19, "learning_rate": 0.00019990300845427125, "loss": 0.1069, "step": 78 }, { "epoch": 0.2, "learning_rate": 0.00019990013526446056, "loss": 0.0157, "step": 79 }, { "epoch": 0.2, "learning_rate": 0.0001998972201584088, "loss": 0.0891, "step": 80 }, { "epoch": 0.2, "learning_rate": 0.0001998942631373391, "loss": 0.0495, "step": 81 }, { "epoch": 0.2, "learning_rate": 0.00019989126420249221, "loss": 0.0441, "step": 82 }, { "epoch": 0.2, "learning_rate": 0.00019988822335512637, "loss": 0.079, "step": 83 }, { "epoch": 0.21, "learning_rate": 0.00019988514059651752, "loss": 0.0699, "step": 84 }, { "epoch": 0.21, "learning_rate": 0.0001998820159279591, "loss": 0.0009, "step": 85 }, { "epoch": 0.21, "learning_rate": 0.00019987884935076213, "loss": 0.0007, "step": 86 }, { "epoch": 0.21, "learning_rate": 0.0001998756408662553, "loss": 0.061, "step": 87 }, { "epoch": 0.22, "learning_rate": 0.00019987239047578482, "loss": 0.0012, "step": 88 }, { "epoch": 0.22, "learning_rate": 0.00019986909818071446, "loss": 0.0657, "step": 89 }, { "epoch": 0.22, "learning_rate": 0.00019986576398242566, "loss": 0.0024, "step": 90 }, { "epoch": 0.22, "learning_rate": 0.00019986238788231733, "loss": 0.0411, "step": 91 }, { "epoch": 0.23, "learning_rate": 0.00019985896988180605, "loss": 0.0813, "step": 92 }, { "epoch": 0.23, "learning_rate": 0.00019985550998232596, "loss": 0.0013, "step": 93 }, { "epoch": 0.23, "learning_rate": 0.00019985200818532875, "loss": 0.1736, "step": 94 }, { "epoch": 0.23, "learning_rate": 0.0001998484644922837, "loss": 0.0006, "step": 95 }, { "epoch": 0.24, "learning_rate": 0.0001998448789046777, "loss": 0.0013, "step": 96 }, { "epoch": 0.24, "learning_rate": 0.0001998412514240152, "loss": 0.0813, "step": 97 }, { "epoch": 0.24, "learning_rate": 0.00019983758205181822, "loss": 0.0002, "step": 98 }, { "epoch": 0.24, "learning_rate": 0.00019983387078962631, "loss": 0.0006, "step": 99 }, { "epoch": 0.25, "learning_rate": 0.00019983011763899673, "loss": 0.1275, "step": 100 }, { "epoch": 0.25, "learning_rate": 0.00019982632260150417, "loss": 0.0737, "step": 101 }, { "epoch": 0.25, "learning_rate": 0.00019982248567874098, "loss": 0.162, "step": 102 }, { "epoch": 0.25, "learning_rate": 0.00019981860687231706, "loss": 0.2331, "step": 103 }, { "epoch": 0.26, "learning_rate": 0.00019981468618385988, "loss": 0.0005, "step": 104 }, { "epoch": 0.26, "learning_rate": 0.0001998107236150145, "loss": 0.001, "step": 105 }, { "epoch": 0.26, "learning_rate": 0.00019980671916744352, "loss": 0.0136, "step": 106 }, { "epoch": 0.26, "learning_rate": 0.00019980267284282717, "loss": 0.1149, "step": 107 }, { "epoch": 0.27, "learning_rate": 0.00019979858464286317, "loss": 0.1266, "step": 108 }, { "epoch": 0.27, "learning_rate": 0.00019979445456926688, "loss": 0.0724, "step": 109 }, { "epoch": 0.27, "learning_rate": 0.00019979028262377118, "loss": 0.1143, "step": 110 }, { "epoch": 0.27, "learning_rate": 0.00019978606880812657, "loss": 0.0084, "step": 111 }, { "epoch": 0.28, "learning_rate": 0.00019978181312410104, "loss": 0.0657, "step": 112 }, { "epoch": 0.28, "learning_rate": 0.00019977751557348025, "loss": 0.1512, "step": 113 }, { "epoch": 0.28, "learning_rate": 0.00019977317615806737, "loss": 0.0051, "step": 114 }, { "epoch": 0.28, "learning_rate": 0.0001997687948796831, "loss": 0.0093, "step": 115 }, { "epoch": 0.29, "learning_rate": 0.00019976437174016573, "loss": 0.0067, "step": 116 }, { "epoch": 0.29, "learning_rate": 0.0001997599067413712, "loss": 0.0949, "step": 117 }, { "epoch": 0.29, "learning_rate": 0.00019975539988517288, "loss": 0.0393, "step": 118 }, { "epoch": 0.29, "learning_rate": 0.00019975085117346177, "loss": 0.0364, "step": 119 }, { "epoch": 0.3, "learning_rate": 0.00019974626060814647, "loss": 0.1306, "step": 120 }, { "epoch": 0.3, "learning_rate": 0.00019974162819115306, "loss": 0.0019, "step": 121 }, { "epoch": 0.3, "learning_rate": 0.0001997369539244252, "loss": 0.0029, "step": 122 }, { "epoch": 0.3, "learning_rate": 0.00019973223780992414, "loss": 0.0159, "step": 123 }, { "epoch": 0.31, "learning_rate": 0.0001997274798496287, "loss": 0.0019, "step": 124 }, { "epoch": 0.31, "learning_rate": 0.0001997226800455352, "loss": 0.0194, "step": 125 }, { "epoch": 0.31, "learning_rate": 0.00019971783839965756, "loss": 0.0037, "step": 126 }, { "epoch": 0.31, "learning_rate": 0.00019971295491402725, "loss": 0.0442, "step": 127 }, { "epoch": 0.32, "learning_rate": 0.00019970802959069328, "loss": 0.003, "step": 128 }, { "epoch": 0.32, "learning_rate": 0.00019970306243172222, "loss": 0.0023, "step": 129 }, { "epoch": 0.32, "learning_rate": 0.00019969805343919821, "loss": 0.0198, "step": 130 }, { "epoch": 0.32, "learning_rate": 0.00019969300261522293, "loss": 0.0025, "step": 131 }, { "epoch": 0.33, "learning_rate": 0.0001996879099619156, "loss": 0.0574, "step": 132 }, { "epoch": 0.33, "learning_rate": 0.00019968277548141302, "loss": 0.0796, "step": 133 }, { "epoch": 0.33, "learning_rate": 0.00019967759917586953, "loss": 0.0326, "step": 134 }, { "epoch": 0.33, "learning_rate": 0.00019967238104745696, "loss": 0.0021, "step": 135 }, { "epoch": 0.34, "learning_rate": 0.00019966712109836476, "loss": 0.0008, "step": 136 }, { "epoch": 0.34, "learning_rate": 0.00019966181933079997, "loss": 0.0943, "step": 137 }, { "epoch": 0.34, "learning_rate": 0.000199656475746987, "loss": 0.002, "step": 138 }, { "epoch": 0.34, "learning_rate": 0.00019965109034916808, "loss": 0.0451, "step": 139 }, { "epoch": 0.35, "learning_rate": 0.00019964566313960264, "loss": 0.0485, "step": 140 }, { "epoch": 0.35, "learning_rate": 0.000199640194120568, "loss": 0.0186, "step": 141 }, { "epoch": 0.35, "learning_rate": 0.0001996346832943587, "loss": 0.0305, "step": 142 }, { "epoch": 0.35, "learning_rate": 0.0001996291306632871, "loss": 0.0625, "step": 143 }, { "epoch": 0.36, "learning_rate": 0.00019962353622968295, "loss": 0.0183, "step": 144 }, { "epoch": 0.36, "learning_rate": 0.00019961789999589356, "loss": 0.0043, "step": 145 }, { "epoch": 0.36, "learning_rate": 0.00019961222196428378, "loss": 0.0493, "step": 146 }, { "epoch": 0.36, "learning_rate": 0.00019960650213723602, "loss": 0.1198, "step": 147 }, { "epoch": 0.37, "learning_rate": 0.0001996007405171502, "loss": 0.0005, "step": 148 }, { "epoch": 0.37, "learning_rate": 0.00019959493710644384, "loss": 0.0006, "step": 149 }, { "epoch": 0.37, "learning_rate": 0.00019958909190755187, "loss": 0.1201, "step": 150 }, { "epoch": 0.37, "learning_rate": 0.00019958320492292686, "loss": 0.0012, "step": 151 }, { "epoch": 0.38, "learning_rate": 0.00019957727615503888, "loss": 0.0025, "step": 152 }, { "epoch": 0.38, "learning_rate": 0.00019957130560637552, "loss": 0.1595, "step": 153 }, { "epoch": 0.38, "learning_rate": 0.00019956529327944198, "loss": 0.0003, "step": 154 }, { "epoch": 0.38, "learning_rate": 0.0001995592391767608, "loss": 0.0014, "step": 155 }, { "epoch": 0.39, "learning_rate": 0.00019955314330087225, "loss": 0.1316, "step": 156 }, { "epoch": 0.39, "learning_rate": 0.00019954700565433405, "loss": 0.0015, "step": 157 }, { "epoch": 0.39, "learning_rate": 0.00019954082623972142, "loss": 0.0015, "step": 158 }, { "epoch": 0.39, "learning_rate": 0.0001995346050596271, "loss": 0.0018, "step": 159 }, { "epoch": 0.4, "learning_rate": 0.0001995283421166614, "loss": 0.0638, "step": 160 }, { "epoch": 0.4, "learning_rate": 0.00019952203741345218, "loss": 0.0283, "step": 161 }, { "epoch": 0.4, "learning_rate": 0.00019951569095264473, "loss": 0.0005, "step": 162 }, { "epoch": 0.4, "learning_rate": 0.0001995093027369019, "loss": 0.0013, "step": 163 }, { "epoch": 0.4, "learning_rate": 0.0001995028727689041, "loss": 0.0828, "step": 164 }, { "epoch": 0.41, "learning_rate": 0.00019949640105134918, "loss": 0.0007, "step": 165 }, { "epoch": 0.41, "learning_rate": 0.00019948988758695263, "loss": 0.0588, "step": 166 }, { "epoch": 0.41, "learning_rate": 0.00019948333237844733, "loss": 0.0166, "step": 167 }, { "epoch": 0.41, "learning_rate": 0.00019947673542858367, "loss": 0.2658, "step": 168 }, { "epoch": 0.42, "learning_rate": 0.00019947009674012973, "loss": 0.0063, "step": 169 }, { "epoch": 0.42, "learning_rate": 0.00019946341631587087, "loss": 0.053, "step": 170 }, { "epoch": 0.42, "learning_rate": 0.0001994566941586101, "loss": 0.0007, "step": 171 }, { "epoch": 0.42, "learning_rate": 0.00019944993027116797, "loss": 0.0009, "step": 172 }, { "epoch": 0.43, "learning_rate": 0.0001994431246563824, "loss": 0.0003, "step": 173 }, { "epoch": 0.43, "learning_rate": 0.00019943627731710897, "loss": 0.0008, "step": 174 }, { "epoch": 0.43, "learning_rate": 0.00019942938825622065, "loss": 0.0834, "step": 175 }, { "epoch": 0.43, "learning_rate": 0.00019942245747660796, "loss": 0.0174, "step": 176 }, { "epoch": 0.44, "learning_rate": 0.00019941548498117896, "loss": 0.0963, "step": 177 }, { "epoch": 0.44, "learning_rate": 0.00019940847077285916, "loss": 0.0361, "step": 178 }, { "epoch": 0.44, "learning_rate": 0.0001994014148545916, "loss": 0.0408, "step": 179 }, { "epoch": 0.44, "learning_rate": 0.0001993943172293368, "loss": 0.0011, "step": 180 }, { "epoch": 0.45, "learning_rate": 0.0001993871779000728, "loss": 0.0801, "step": 181 }, { "epoch": 0.45, "learning_rate": 0.0001993799968697951, "loss": 0.1176, "step": 182 }, { "epoch": 0.45, "learning_rate": 0.00019937277414151677, "loss": 0.1212, "step": 183 }, { "epoch": 0.45, "learning_rate": 0.00019936550971826834, "loss": 0.0458, "step": 184 }, { "epoch": 0.46, "learning_rate": 0.00019935820360309777, "loss": 0.0543, "step": 185 }, { "epoch": 0.46, "learning_rate": 0.00019935085579907063, "loss": 0.0518, "step": 186 }, { "epoch": 0.46, "learning_rate": 0.0001993434663092699, "loss": 0.0015, "step": 187 }, { "epoch": 0.46, "learning_rate": 0.00019933603513679605, "loss": 0.0704, "step": 188 }, { "epoch": 0.47, "learning_rate": 0.00019932856228476706, "loss": 0.0628, "step": 189 }, { "epoch": 0.47, "learning_rate": 0.00019932104775631846, "loss": 0.0602, "step": 190 }, { "epoch": 0.47, "learning_rate": 0.00019931349155460315, "loss": 0.0025, "step": 191 }, { "epoch": 0.47, "learning_rate": 0.0001993058936827916, "loss": 0.0032, "step": 192 }, { "epoch": 0.48, "learning_rate": 0.00019929825414407172, "loss": 0.1927, "step": 193 }, { "epoch": 0.48, "learning_rate": 0.00019929057294164893, "loss": 0.003, "step": 194 }, { "epoch": 0.48, "learning_rate": 0.0001992828500787461, "loss": 0.0027, "step": 195 }, { "epoch": 0.48, "learning_rate": 0.0001992750855586036, "loss": 0.0225, "step": 196 }, { "epoch": 0.49, "learning_rate": 0.00019926727938447933, "loss": 0.0018, "step": 197 }, { "epoch": 0.49, "learning_rate": 0.00019925943155964856, "loss": 0.0019, "step": 198 }, { "epoch": 0.49, "learning_rate": 0.0001992515420874041, "loss": 0.0015, "step": 199 }, { "epoch": 0.49, "learning_rate": 0.00019924361097105623, "loss": 0.0411, "step": 200 }, { "epoch": 0.49, "eval_loss": 0.02292541041970253, "eval_runtime": 126.0288, "eval_samples_per_second": 1.016, "eval_steps_per_second": 0.341, "step": 200 }, { "epoch": 0.5, "learning_rate": 0.0001992356382139327, "loss": 0.0512, "step": 201 }, { "epoch": 0.5, "learning_rate": 0.00019922762381937878, "loss": 0.0013, "step": 202 }, { "epoch": 0.5, "learning_rate": 0.00019921956779075708, "loss": 0.0022, "step": 203 }, { "epoch": 0.5, "learning_rate": 0.0001992114701314478, "loss": 0.0513, "step": 204 }, { "epoch": 0.51, "learning_rate": 0.00019920333084484857, "loss": 0.0621, "step": 205 }, { "epoch": 0.51, "learning_rate": 0.00019919514993437445, "loss": 0.0032, "step": 206 }, { "epoch": 0.51, "learning_rate": 0.00019918692740345802, "loss": 0.0048, "step": 207 }, { "epoch": 0.51, "learning_rate": 0.00019917866325554938, "loss": 0.001, "step": 208 }, { "epoch": 0.52, "learning_rate": 0.00019917035749411586, "loss": 0.0006, "step": 209 }, { "epoch": 0.52, "learning_rate": 0.00019916201012264254, "loss": 0.0666, "step": 210 }, { "epoch": 0.52, "learning_rate": 0.00019915362114463172, "loss": 0.049, "step": 211 }, { "epoch": 0.52, "learning_rate": 0.0001991451905636033, "loss": 0.0005, "step": 212 }, { "epoch": 0.53, "learning_rate": 0.00019913671838309464, "loss": 0.0025, "step": 213 }, { "epoch": 0.53, "learning_rate": 0.00019912820460666044, "loss": 0.0006, "step": 214 }, { "epoch": 0.53, "learning_rate": 0.00019911964923787295, "loss": 0.062, "step": 215 }, { "epoch": 0.53, "learning_rate": 0.00019911105228032186, "loss": 0.0012, "step": 216 }, { "epoch": 0.54, "learning_rate": 0.00019910241373761426, "loss": 0.0817, "step": 217 }, { "epoch": 0.54, "learning_rate": 0.00019909373361337476, "loss": 0.0003, "step": 218 }, { "epoch": 0.54, "learning_rate": 0.00019908501191124534, "loss": 0.0002, "step": 219 }, { "epoch": 0.54, "learning_rate": 0.0001990762486348855, "loss": 0.0434, "step": 220 }, { "epoch": 0.55, "learning_rate": 0.00019906744378797212, "loss": 0.0994, "step": 221 }, { "epoch": 0.55, "learning_rate": 0.00019905859737419956, "loss": 0.0506, "step": 222 }, { "epoch": 0.55, "learning_rate": 0.00019904970939727963, "loss": 0.059, "step": 223 }, { "epoch": 0.55, "learning_rate": 0.00019904077986094152, "loss": 0.0042, "step": 224 }, { "epoch": 0.56, "learning_rate": 0.00019903180876893194, "loss": 0.0667, "step": 225 }, { "epoch": 0.56, "learning_rate": 0.00019902279612501493, "loss": 0.002, "step": 226 }, { "epoch": 0.56, "learning_rate": 0.00019901374193297212, "loss": 0.0003, "step": 227 }, { "epoch": 0.56, "learning_rate": 0.0001990046461966024, "loss": 0.0003, "step": 228 }, { "epoch": 0.57, "learning_rate": 0.00019899550891972222, "loss": 0.0002, "step": 229 }, { "epoch": 0.57, "learning_rate": 0.00019898633010616542, "loss": 0.0025, "step": 230 }, { "epoch": 0.57, "learning_rate": 0.00019897710975978321, "loss": 0.1237, "step": 231 }, { "epoch": 0.57, "learning_rate": 0.0001989678478844443, "loss": 0.0625, "step": 232 }, { "epoch": 0.58, "learning_rate": 0.00019895854448403482, "loss": 0.0569, "step": 233 }, { "epoch": 0.58, "learning_rate": 0.00019894919956245824, "loss": 0.001, "step": 234 }, { "epoch": 0.58, "learning_rate": 0.00019893981312363562, "loss": 0.0846, "step": 235 }, { "epoch": 0.58, "learning_rate": 0.00019893038517150525, "loss": 0.0325, "step": 236 }, { "epoch": 0.59, "learning_rate": 0.00019892091571002297, "loss": 0.0696, "step": 237 }, { "epoch": 0.59, "learning_rate": 0.00019891140474316194, "loss": 0.0006, "step": 238 }, { "epoch": 0.59, "learning_rate": 0.00019890185227491283, "loss": 0.0002, "step": 239 }, { "epoch": 0.59, "learning_rate": 0.00019889225830928365, "loss": 0.0002, "step": 240 }, { "epoch": 0.6, "learning_rate": 0.00019888262285029987, "loss": 0.0008, "step": 241 }, { "epoch": 0.6, "learning_rate": 0.00019887294590200435, "loss": 0.0013, "step": 242 }, { "epoch": 0.6, "learning_rate": 0.00019886322746845737, "loss": 0.0006, "step": 243 }, { "epoch": 0.6, "learning_rate": 0.00019885346755373656, "loss": 0.0004, "step": 244 }, { "epoch": 0.6, "learning_rate": 0.00019884366616193706, "loss": 0.0468, "step": 245 }, { "epoch": 0.61, "learning_rate": 0.00019883382329717128, "loss": 0.0017, "step": 246 }, { "epoch": 0.61, "learning_rate": 0.00019882393896356913, "loss": 0.0741, "step": 247 }, { "epoch": 0.61, "learning_rate": 0.00019881401316527793, "loss": 0.0004, "step": 248 }, { "epoch": 0.61, "learning_rate": 0.00019880404590646232, "loss": 0.1221, "step": 249 }, { "epoch": 0.62, "learning_rate": 0.0001987940371913044, "loss": 0.0582, "step": 250 }, { "epoch": 0.62, "learning_rate": 0.00019878398702400364, "loss": 0.0689, "step": 251 }, { "epoch": 0.62, "learning_rate": 0.00019877389540877687, "loss": 0.1289, "step": 252 }, { "epoch": 0.62, "learning_rate": 0.0001987637623498584, "loss": 0.0026, "step": 253 }, { "epoch": 0.63, "learning_rate": 0.0001987535878514998, "loss": 0.0419, "step": 254 }, { "epoch": 0.63, "learning_rate": 0.0001987433719179702, "loss": 0.001, "step": 255 }, { "epoch": 0.63, "learning_rate": 0.0001987331145535559, "loss": 0.0003, "step": 256 }, { "epoch": 0.63, "learning_rate": 0.00019872281576256077, "loss": 0.0004, "step": 257 }, { "epoch": 0.64, "learning_rate": 0.000198712475549306, "loss": 0.0006, "step": 258 }, { "epoch": 0.64, "learning_rate": 0.00019870209391813012, "loss": 0.0495, "step": 259 }, { "epoch": 0.64, "learning_rate": 0.00019869167087338907, "loss": 0.1258, "step": 260 }, { "epoch": 0.64, "learning_rate": 0.0001986812064194562, "loss": 0.0003, "step": 261 }, { "epoch": 0.65, "learning_rate": 0.00019867070056072214, "loss": 0.069, "step": 262 }, { "epoch": 0.65, "learning_rate": 0.00019866015330159505, "loss": 0.1022, "step": 263 }, { "epoch": 0.65, "learning_rate": 0.00019864956464650025, "loss": 0.0012, "step": 264 }, { "epoch": 0.65, "learning_rate": 0.00019863893459988062, "loss": 0.1075, "step": 265 }, { "epoch": 0.66, "learning_rate": 0.00019862826316619628, "loss": 0.0002, "step": 266 }, { "epoch": 0.66, "learning_rate": 0.00019861755034992484, "loss": 0.0004, "step": 267 }, { "epoch": 0.66, "learning_rate": 0.0001986067961555611, "loss": 0.0006, "step": 268 }, { "epoch": 0.66, "learning_rate": 0.0001985960005876174, "loss": 0.0541, "step": 269 }, { "epoch": 0.67, "learning_rate": 0.00019858516365062334, "loss": 0.1308, "step": 270 }, { "epoch": 0.67, "learning_rate": 0.00019857428534912587, "loss": 0.0062, "step": 271 }, { "epoch": 0.67, "learning_rate": 0.00019856336568768935, "loss": 0.0382, "step": 272 }, { "epoch": 0.67, "learning_rate": 0.00019855240467089543, "loss": 0.0003, "step": 273 }, { "epoch": 0.68, "learning_rate": 0.00019854140230334322, "loss": 0.0043, "step": 274 }, { "epoch": 0.68, "learning_rate": 0.00019853035858964906, "loss": 0.0373, "step": 275 }, { "epoch": 0.68, "learning_rate": 0.0001985192735344467, "loss": 0.0516, "step": 276 }, { "epoch": 0.68, "learning_rate": 0.00019850814714238716, "loss": 0.0004, "step": 277 }, { "epoch": 0.69, "learning_rate": 0.00019849697941813898, "loss": 0.1028, "step": 278 }, { "epoch": 0.69, "learning_rate": 0.00019848577036638788, "loss": 0.2622, "step": 279 }, { "epoch": 0.69, "learning_rate": 0.00019847451999183694, "loss": 0.0004, "step": 280 }, { "epoch": 0.69, "learning_rate": 0.00019846322829920662, "loss": 0.0006, "step": 281 }, { "epoch": 0.7, "learning_rate": 0.00019845189529323475, "loss": 0.1482, "step": 282 }, { "epoch": 0.7, "learning_rate": 0.00019844052097867638, "loss": 0.038, "step": 283 }, { "epoch": 0.7, "learning_rate": 0.00019842910536030403, "loss": 0.0107, "step": 284 }, { "epoch": 0.7, "learning_rate": 0.00019841764844290744, "loss": 0.035, "step": 285 }, { "epoch": 0.71, "learning_rate": 0.00019840615023129372, "loss": 0.035, "step": 286 }, { "epoch": 0.71, "learning_rate": 0.00019839461073028732, "loss": 0.0033, "step": 287 }, { "epoch": 0.71, "learning_rate": 0.00019838302994472997, "loss": 0.0442, "step": 288 }, { "epoch": 0.71, "learning_rate": 0.00019837140787948082, "loss": 0.0135, "step": 289 }, { "epoch": 0.72, "learning_rate": 0.0001983597445394162, "loss": 0.0014, "step": 290 }, { "epoch": 0.72, "learning_rate": 0.00019834803992942987, "loss": 0.1629, "step": 291 }, { "epoch": 0.72, "learning_rate": 0.00019833629405443284, "loss": 0.0867, "step": 292 }, { "epoch": 0.72, "learning_rate": 0.00019832450691935352, "loss": 0.0388, "step": 293 }, { "epoch": 0.73, "learning_rate": 0.0001983126785291375, "loss": 0.0014, "step": 294 }, { "epoch": 0.73, "learning_rate": 0.00019830080888874778, "loss": 0.0028, "step": 295 }, { "epoch": 0.73, "learning_rate": 0.00019828889800316466, "loss": 0.1433, "step": 296 }, { "epoch": 0.73, "learning_rate": 0.0001982769458773857, "loss": 0.1458, "step": 297 }, { "epoch": 0.74, "learning_rate": 0.00019826495251642578, "loss": 0.0005, "step": 298 }, { "epoch": 0.74, "learning_rate": 0.00019825291792531716, "loss": 0.0114, "step": 299 }, { "epoch": 0.74, "learning_rate": 0.00019824084210910925, "loss": 0.0508, "step": 300 }, { "epoch": 0.74, "learning_rate": 0.0001982287250728689, "loss": 0.0006, "step": 301 }, { "epoch": 0.75, "learning_rate": 0.00019821656682168012, "loss": 0.0377, "step": 302 }, { "epoch": 0.75, "learning_rate": 0.00019820436736064435, "loss": 0.0383, "step": 303 }, { "epoch": 0.75, "learning_rate": 0.00019819212669488026, "loss": 0.0052, "step": 304 }, { "epoch": 0.75, "learning_rate": 0.00019817984482952376, "loss": 0.0008, "step": 305 }, { "epoch": 0.76, "learning_rate": 0.00019816752176972813, "loss": 0.0619, "step": 306 }, { "epoch": 0.76, "learning_rate": 0.00019815515752066387, "loss": 0.0866, "step": 307 }, { "epoch": 0.76, "learning_rate": 0.0001981427520875188, "loss": 0.002, "step": 308 }, { "epoch": 0.76, "learning_rate": 0.00019813030547549803, "loss": 0.0405, "step": 309 }, { "epoch": 0.77, "learning_rate": 0.0001981178176898239, "loss": 0.0656, "step": 310 }, { "epoch": 0.77, "learning_rate": 0.00019810528873573607, "loss": 0.0043, "step": 311 }, { "epoch": 0.77, "learning_rate": 0.00019809271861849145, "loss": 0.0045, "step": 312 }, { "epoch": 0.77, "learning_rate": 0.00019808010734336423, "loss": 0.0021, "step": 313 }, { "epoch": 0.78, "learning_rate": 0.00019806745491564586, "loss": 0.001, "step": 314 }, { "epoch": 0.78, "learning_rate": 0.00019805476134064507, "loss": 0.09, "step": 315 }, { "epoch": 0.78, "learning_rate": 0.0001980420266236878, "loss": 0.0011, "step": 316 }, { "epoch": 0.78, "learning_rate": 0.0001980292507701174, "loss": 0.0013, "step": 317 }, { "epoch": 0.79, "learning_rate": 0.0001980164337852943, "loss": 0.0007, "step": 318 }, { "epoch": 0.79, "learning_rate": 0.00019800357567459633, "loss": 0.1487, "step": 319 }, { "epoch": 0.79, "learning_rate": 0.00019799067644341844, "loss": 0.098, "step": 320 }, { "epoch": 0.79, "learning_rate": 0.00019797773609717297, "loss": 0.0006, "step": 321 }, { "epoch": 0.8, "learning_rate": 0.00019796475464128942, "loss": 0.0469, "step": 322 }, { "epoch": 0.8, "learning_rate": 0.00019795173208121458, "loss": 0.0003, "step": 323 }, { "epoch": 0.8, "learning_rate": 0.00019793866842241243, "loss": 0.0424, "step": 324 }, { "epoch": 0.8, "learning_rate": 0.00019792556367036432, "loss": 0.0003, "step": 325 }, { "epoch": 0.8, "learning_rate": 0.00019791241783056874, "loss": 0.0003, "step": 326 }, { "epoch": 0.81, "learning_rate": 0.00019789923090854136, "loss": 0.0243, "step": 327 }, { "epoch": 0.81, "learning_rate": 0.00019788600290981525, "loss": 0.0383, "step": 328 }, { "epoch": 0.81, "learning_rate": 0.00019787273383994062, "loss": 0.0005, "step": 329 }, { "epoch": 0.81, "learning_rate": 0.0001978594237044849, "loss": 0.0329, "step": 330 }, { "epoch": 0.82, "learning_rate": 0.00019784607250903277, "loss": 0.0013, "step": 331 }, { "epoch": 0.82, "learning_rate": 0.0001978326802591862, "loss": 0.0363, "step": 332 }, { "epoch": 0.82, "learning_rate": 0.00019781924696056426, "loss": 0.1015, "step": 333 }, { "epoch": 0.82, "learning_rate": 0.00019780577261880336, "loss": 0.0383, "step": 334 }, { "epoch": 0.83, "learning_rate": 0.00019779225723955707, "loss": 0.001, "step": 335 }, { "epoch": 0.83, "learning_rate": 0.0001977787008284962, "loss": 0.0498, "step": 336 }, { "epoch": 0.83, "learning_rate": 0.00019776510339130873, "loss": 0.095, "step": 337 }, { "epoch": 0.83, "learning_rate": 0.00019775146493369994, "loss": 0.0004, "step": 338 }, { "epoch": 0.84, "learning_rate": 0.00019773778546139227, "loss": 0.0002, "step": 339 }, { "epoch": 0.84, "learning_rate": 0.0001977240649801253, "loss": 0.0003, "step": 340 }, { "epoch": 0.84, "learning_rate": 0.000197710303495656, "loss": 0.0011, "step": 341 }, { "epoch": 0.84, "learning_rate": 0.00019769650101375837, "loss": 0.0017, "step": 342 }, { "epoch": 0.85, "learning_rate": 0.00019768265754022365, "loss": 0.0576, "step": 343 }, { "epoch": 0.85, "learning_rate": 0.00019766877308086036, "loss": 0.0002, "step": 344 }, { "epoch": 0.85, "learning_rate": 0.00019765484764149415, "loss": 0.0175, "step": 345 }, { "epoch": 0.85, "learning_rate": 0.00019764088122796783, "loss": 0.0002, "step": 346 }, { "epoch": 0.86, "learning_rate": 0.0001976268738461415, "loss": 0.0008, "step": 347 }, { "epoch": 0.86, "learning_rate": 0.0001976128255018924, "loss": 0.0006, "step": 348 }, { "epoch": 0.86, "learning_rate": 0.0001975987362011149, "loss": 0.1749, "step": 349 }, { "epoch": 0.86, "learning_rate": 0.00019758460594972068, "loss": 0.0748, "step": 350 }, { "epoch": 0.87, "learning_rate": 0.00019757043475363847, "loss": 0.0698, "step": 351 }, { "epoch": 0.87, "learning_rate": 0.00019755622261881427, "loss": 0.0002, "step": 352 }, { "epoch": 0.87, "learning_rate": 0.00019754196955121123, "loss": 0.0004, "step": 353 }, { "epoch": 0.87, "learning_rate": 0.00019752767555680968, "loss": 0.0491, "step": 354 }, { "epoch": 0.88, "learning_rate": 0.00019751334064160706, "loss": 0.0708, "step": 355 }, { "epoch": 0.88, "learning_rate": 0.00019749896481161808, "loss": 0.0673, "step": 356 }, { "epoch": 0.88, "learning_rate": 0.00019748454807287457, "loss": 0.0002, "step": 357 }, { "epoch": 0.88, "learning_rate": 0.00019747009043142555, "loss": 0.0689, "step": 358 }, { "epoch": 0.89, "learning_rate": 0.0001974555918933371, "loss": 0.001, "step": 359 }, { "epoch": 0.89, "learning_rate": 0.00019744105246469263, "loss": 0.0496, "step": 360 }, { "epoch": 0.89, "learning_rate": 0.00019742647215159254, "loss": 0.0034, "step": 361 }, { "epoch": 0.89, "learning_rate": 0.00019741185096015448, "loss": 0.0498, "step": 362 }, { "epoch": 0.9, "learning_rate": 0.00019739718889651327, "loss": 0.02, "step": 363 }, { "epoch": 0.9, "learning_rate": 0.00019738248596682078, "loss": 0.0358, "step": 364 }, { "epoch": 0.9, "learning_rate": 0.00019736774217724614, "loss": 0.0929, "step": 365 }, { "epoch": 0.9, "learning_rate": 0.0001973529575339755, "loss": 0.0001, "step": 366 }, { "epoch": 0.91, "learning_rate": 0.00019733813204321233, "loss": 0.0018, "step": 367 }, { "epoch": 0.91, "learning_rate": 0.00019732326571117703, "loss": 0.0017, "step": 368 }, { "epoch": 0.91, "learning_rate": 0.00019730835854410726, "loss": 0.0001, "step": 369 }, { "epoch": 0.91, "learning_rate": 0.00019729341054825782, "loss": 0.0688, "step": 370 }, { "epoch": 0.92, "learning_rate": 0.0001972784217299006, "loss": 0.0004, "step": 371 }, { "epoch": 0.92, "learning_rate": 0.00019726339209532462, "loss": 0.0652, "step": 372 }, { "epoch": 0.92, "learning_rate": 0.00019724832165083603, "loss": 0.002, "step": 373 }, { "epoch": 0.92, "learning_rate": 0.00019723321040275815, "loss": 0.0565, "step": 374 }, { "epoch": 0.93, "learning_rate": 0.00019721805835743134, "loss": 0.0345, "step": 375 }, { "epoch": 0.93, "learning_rate": 0.0001972028655212131, "loss": 0.0005, "step": 376 }, { "epoch": 0.93, "learning_rate": 0.00019718763190047808, "loss": 0.0726, "step": 377 }, { "epoch": 0.93, "learning_rate": 0.00019717235750161806, "loss": 0.08, "step": 378 }, { "epoch": 0.94, "learning_rate": 0.00019715704233104185, "loss": 0.0486, "step": 379 }, { "epoch": 0.94, "learning_rate": 0.00019714168639517544, "loss": 0.0006, "step": 380 }, { "epoch": 0.94, "learning_rate": 0.00019712628970046189, "loss": 0.0001, "step": 381 }, { "epoch": 0.94, "learning_rate": 0.00019711085225336132, "loss": 0.0653, "step": 382 }, { "epoch": 0.95, "learning_rate": 0.00019709537406035105, "loss": 0.1022, "step": 383 }, { "epoch": 0.95, "learning_rate": 0.00019707985512792543, "loss": 0.0005, "step": 384 }, { "epoch": 0.95, "learning_rate": 0.00019706429546259593, "loss": 0.0345, "step": 385 }, { "epoch": 0.95, "learning_rate": 0.00019704869507089105, "loss": 0.0003, "step": 386 }, { "epoch": 0.96, "learning_rate": 0.00019703305395935648, "loss": 0.0231, "step": 387 }, { "epoch": 0.96, "learning_rate": 0.0001970173721345549, "loss": 0.2725, "step": 388 }, { "epoch": 0.96, "learning_rate": 0.00019700164960306614, "loss": 0.0806, "step": 389 }, { "epoch": 0.96, "learning_rate": 0.00019698588637148703, "loss": 0.086, "step": 390 }, { "epoch": 0.97, "learning_rate": 0.0001969700824464316, "loss": 0.0044, "step": 391 }, { "epoch": 0.97, "learning_rate": 0.00019695423783453088, "loss": 0.0033, "step": 392 }, { "epoch": 0.97, "learning_rate": 0.00019693835254243287, "loss": 0.0028, "step": 393 }, { "epoch": 0.97, "learning_rate": 0.00019692242657680286, "loss": 0.0513, "step": 394 }, { "epoch": 0.98, "learning_rate": 0.00019690645994432305, "loss": 0.0038, "step": 395 }, { "epoch": 0.98, "learning_rate": 0.00019689045265169273, "loss": 0.0032, "step": 396 }, { "epoch": 0.98, "learning_rate": 0.0001968744047056283, "loss": 0.0026, "step": 397 }, { "epoch": 0.98, "learning_rate": 0.0001968583161128631, "loss": 0.0099, "step": 398 }, { "epoch": 0.99, "learning_rate": 0.00019684218688014772, "loss": 0.0675, "step": 399 }, { "epoch": 0.99, "learning_rate": 0.0001968260170142496, "loss": 0.0425, "step": 400 }, { "epoch": 0.99, "eval_loss": 0.019980641081929207, "eval_runtime": 126.04, "eval_samples_per_second": 1.016, "eval_steps_per_second": 0.341, "step": 400 }, { "epoch": 0.99, "learning_rate": 0.00019680980652195333, "loss": 0.0016, "step": 401 }, { "epoch": 0.99, "learning_rate": 0.00019679355541006054, "loss": 0.0017, "step": 402 }, { "epoch": 1.0, "learning_rate": 0.0001967772636853899, "loss": 0.0009, "step": 403 }, { "epoch": 1.0, "learning_rate": 0.00019676093135477713, "loss": 0.095, "step": 404 }, { "epoch": 1.0, "learning_rate": 0.00019674455842507492, "loss": 0.0646, "step": 405 }, { "epoch": 1.0, "learning_rate": 0.0001967281449031531, "loss": 0.0014, "step": 406 }, { "epoch": 1.0, "learning_rate": 0.00019671169079589848, "loss": 0.001, "step": 407 }, { "epoch": 1.01, "learning_rate": 0.00019669519611021486, "loss": 0.0223, "step": 408 }, { "epoch": 1.01, "learning_rate": 0.00019667866085302312, "loss": 0.0006, "step": 409 }, { "epoch": 1.01, "learning_rate": 0.00019666208503126112, "loss": 0.0609, "step": 410 }, { "epoch": 1.01, "learning_rate": 0.00019664546865188386, "loss": 0.1217, "step": 411 }, { "epoch": 1.02, "learning_rate": 0.00019662881172186313, "loss": 0.0006, "step": 412 }, { "epoch": 1.02, "learning_rate": 0.00019661211424818798, "loss": 0.0005, "step": 413 }, { "epoch": 1.02, "learning_rate": 0.00019659537623786428, "loss": 0.0518, "step": 414 }, { "epoch": 1.02, "learning_rate": 0.00019657859769791505, "loss": 0.0364, "step": 415 }, { "epoch": 1.03, "learning_rate": 0.00019656177863538026, "loss": 0.0003, "step": 416 }, { "epoch": 1.03, "learning_rate": 0.0001965449190573168, "loss": 0.0604, "step": 417 }, { "epoch": 1.03, "learning_rate": 0.00019652801897079869, "loss": 0.0569, "step": 418 }, { "epoch": 1.03, "learning_rate": 0.0001965110783829169, "loss": 0.0875, "step": 419 }, { "epoch": 1.04, "learning_rate": 0.00019649409730077935, "loss": 0.0554, "step": 420 }, { "epoch": 1.04, "learning_rate": 0.00019647707573151098, "loss": 0.0004, "step": 421 }, { "epoch": 1.04, "learning_rate": 0.00019646001368225382, "loss": 0.035, "step": 422 }, { "epoch": 1.04, "learning_rate": 0.00019644291116016667, "loss": 0.0007, "step": 423 }, { "epoch": 1.05, "learning_rate": 0.0001964257681724255, "loss": 0.0064, "step": 424 }, { "epoch": 1.05, "learning_rate": 0.00019640858472622316, "loss": 0.0008, "step": 425 }, { "epoch": 1.05, "learning_rate": 0.00019639136082876953, "loss": 0.0009, "step": 426 }, { "epoch": 1.05, "learning_rate": 0.0001963740964872914, "loss": 0.0027, "step": 427 }, { "epoch": 1.06, "learning_rate": 0.00019635679170903258, "loss": 0.0356, "step": 428 }, { "epoch": 1.06, "learning_rate": 0.00019633944650125388, "loss": 0.0013, "step": 429 }, { "epoch": 1.06, "learning_rate": 0.00019632206087123296, "loss": 0.0179, "step": 430 }, { "epoch": 1.06, "learning_rate": 0.00019630463482626454, "loss": 0.0477, "step": 431 }, { "epoch": 1.07, "learning_rate": 0.00019628716837366027, "loss": 0.0004, "step": 432 }, { "epoch": 1.07, "learning_rate": 0.00019626966152074874, "loss": 0.0795, "step": 433 }, { "epoch": 1.07, "learning_rate": 0.00019625211427487548, "loss": 0.1192, "step": 434 }, { "epoch": 1.07, "learning_rate": 0.00019623452664340306, "loss": 0.0606, "step": 435 }, { "epoch": 1.08, "learning_rate": 0.00019621689863371083, "loss": 0.0004, "step": 436 }, { "epoch": 1.08, "learning_rate": 0.0001961992302531952, "loss": 0.1189, "step": 437 }, { "epoch": 1.08, "learning_rate": 0.00019618152150926955, "loss": 0.0015, "step": 438 }, { "epoch": 1.08, "learning_rate": 0.00019616377240936407, "loss": 0.0004, "step": 439 }, { "epoch": 1.09, "learning_rate": 0.000196145982960926, "loss": 0.0004, "step": 440 }, { "epoch": 1.09, "learning_rate": 0.00019612815317141945, "loss": 0.0003, "step": 441 }, { "epoch": 1.09, "learning_rate": 0.00019611028304832546, "loss": 0.0003, "step": 442 }, { "epoch": 1.09, "learning_rate": 0.000196092372599142, "loss": 0.0153, "step": 443 }, { "epoch": 1.1, "learning_rate": 0.000196074421831384, "loss": 0.0725, "step": 444 }, { "epoch": 1.1, "learning_rate": 0.00019605643075258321, "loss": 0.0005, "step": 445 }, { "epoch": 1.1, "learning_rate": 0.00019603839937028838, "loss": 0.0005, "step": 446 }, { "epoch": 1.1, "learning_rate": 0.00019602032769206517, "loss": 0.0003, "step": 447 }, { "epoch": 1.11, "learning_rate": 0.00019600221572549606, "loss": 0.0416, "step": 448 }, { "epoch": 1.11, "learning_rate": 0.00019598406347818054, "loss": 0.0712, "step": 449 }, { "epoch": 1.11, "learning_rate": 0.00019596587095773495, "loss": 0.0005, "step": 450 }, { "epoch": 1.11, "learning_rate": 0.00019594763817179254, "loss": 0.0095, "step": 451 }, { "epoch": 1.12, "learning_rate": 0.00019592936512800342, "loss": 0.0294, "step": 452 }, { "epoch": 1.12, "learning_rate": 0.00019591105183403462, "loss": 0.0164, "step": 453 }, { "epoch": 1.12, "learning_rate": 0.00019589269829757008, "loss": 0.0021, "step": 454 }, { "epoch": 1.12, "learning_rate": 0.0001958743045263106, "loss": 0.0017, "step": 455 }, { "epoch": 1.13, "learning_rate": 0.00019585587052797389, "loss": 0.0439, "step": 456 }, { "epoch": 1.13, "learning_rate": 0.00019583739631029445, "loss": 0.0014, "step": 457 }, { "epoch": 1.13, "learning_rate": 0.00019581888188102375, "loss": 0.0944, "step": 458 }, { "epoch": 1.13, "learning_rate": 0.0001958003272479301, "loss": 0.0444, "step": 459 }, { "epoch": 1.14, "learning_rate": 0.00019578173241879872, "loss": 0.0063, "step": 460 }, { "epoch": 1.14, "learning_rate": 0.0001957630974014316, "loss": 0.0054, "step": 461 }, { "epoch": 1.14, "learning_rate": 0.00019574442220364767, "loss": 0.0567, "step": 462 }, { "epoch": 1.14, "learning_rate": 0.0001957257068332827, "loss": 0.0019, "step": 463 }, { "epoch": 1.15, "learning_rate": 0.00019570695129818926, "loss": 0.0355, "step": 464 }, { "epoch": 1.15, "learning_rate": 0.0001956881556062369, "loss": 0.0891, "step": 465 }, { "epoch": 1.15, "learning_rate": 0.0001956693197653119, "loss": 0.0026, "step": 466 }, { "epoch": 1.15, "learning_rate": 0.00019565044378331745, "loss": 0.0609, "step": 467 }, { "epoch": 1.16, "learning_rate": 0.00019563152766817354, "loss": 0.1015, "step": 468 }, { "epoch": 1.16, "learning_rate": 0.00019561257142781705, "loss": 0.0434, "step": 469 }, { "epoch": 1.16, "learning_rate": 0.00019559357507020162, "loss": 0.0016, "step": 470 }, { "epoch": 1.16, "learning_rate": 0.0001955745386032978, "loss": 0.0339, "step": 471 }, { "epoch": 1.17, "learning_rate": 0.00019555546203509297, "loss": 0.0305, "step": 472 }, { "epoch": 1.17, "learning_rate": 0.00019553634537359122, "loss": 0.0003, "step": 473 }, { "epoch": 1.17, "learning_rate": 0.00019551718862681364, "loss": 0.001, "step": 474 }, { "epoch": 1.17, "learning_rate": 0.00019549799180279792, "loss": 0.0016, "step": 475 }, { "epoch": 1.18, "learning_rate": 0.00019547875490959885, "loss": 0.0265, "step": 476 }, { "epoch": 1.18, "learning_rate": 0.00019545947795528777, "loss": 0.0248, "step": 477 }, { "epoch": 1.18, "learning_rate": 0.00019544016094795295, "loss": 0.0443, "step": 478 }, { "epoch": 1.18, "learning_rate": 0.00019542080389569946, "loss": 0.0005, "step": 479 }, { "epoch": 1.19, "learning_rate": 0.00019540140680664913, "loss": 0.0005, "step": 480 }, { "epoch": 1.19, "learning_rate": 0.00019538196968894067, "loss": 0.0001, "step": 481 }, { "epoch": 1.19, "learning_rate": 0.00019536249255072948, "loss": 0.0002, "step": 482 }, { "epoch": 1.19, "learning_rate": 0.00019534297540018785, "loss": 0.0002, "step": 483 }, { "epoch": 1.2, "learning_rate": 0.00019532341824550479, "loss": 0.0526, "step": 484 }, { "epoch": 1.2, "learning_rate": 0.0001953038210948861, "loss": 0.0002, "step": 485 }, { "epoch": 1.2, "learning_rate": 0.0001952841839565544, "loss": 0.0631, "step": 486 }, { "epoch": 1.2, "learning_rate": 0.0001952645068387491, "loss": 0.0252, "step": 487 }, { "epoch": 1.2, "learning_rate": 0.0001952447897497263, "loss": 0.0002, "step": 488 }, { "epoch": 1.21, "learning_rate": 0.00019522503269775899, "loss": 0.056, "step": 489 }, { "epoch": 1.21, "learning_rate": 0.00019520523569113677, "loss": 0.0001, "step": 490 }, { "epoch": 1.21, "learning_rate": 0.00019518539873816617, "loss": 0.0591, "step": 491 }, { "epoch": 1.21, "learning_rate": 0.00019516552184717037, "loss": 0.0257, "step": 492 }, { "epoch": 1.22, "learning_rate": 0.00019514560502648936, "loss": 0.0989, "step": 493 }, { "epoch": 1.22, "learning_rate": 0.00019512564828447988, "loss": 0.0679, "step": 494 }, { "epoch": 1.22, "learning_rate": 0.00019510565162951537, "loss": 0.0002, "step": 495 }, { "epoch": 1.22, "learning_rate": 0.0001950856150699861, "loss": 0.0662, "step": 496 }, { "epoch": 1.23, "learning_rate": 0.00019506553861429898, "loss": 0.0004, "step": 497 }, { "epoch": 1.23, "learning_rate": 0.0001950454222708778, "loss": 0.0464, "step": 498 }, { "epoch": 1.23, "learning_rate": 0.00019502526604816295, "loss": 0.0657, "step": 499 }, { "epoch": 1.23, "learning_rate": 0.0001950050699546116, "loss": 0.0001, "step": 500 }, { "epoch": 1.24, "learning_rate": 0.00019498483399869767, "loss": 0.0327, "step": 501 }, { "epoch": 1.24, "learning_rate": 0.0001949645581889118, "loss": 0.0701, "step": 502 }, { "epoch": 1.24, "learning_rate": 0.00019494424253376134, "loss": 0.0003, "step": 503 }, { "epoch": 1.24, "learning_rate": 0.00019492388704177036, "loss": 0.0001, "step": 504 }, { "epoch": 1.25, "learning_rate": 0.00019490349172147963, "loss": 0.0002, "step": 505 }, { "epoch": 1.25, "learning_rate": 0.00019488305658144667, "loss": 0.0633, "step": 506 }, { "epoch": 1.25, "learning_rate": 0.00019486258163024567, "loss": 0.0066, "step": 507 }, { "epoch": 1.25, "learning_rate": 0.00019484206687646753, "loss": 0.0001, "step": 508 }, { "epoch": 1.26, "learning_rate": 0.0001948215123287199, "loss": 0.0001, "step": 509 }, { "epoch": 1.26, "learning_rate": 0.00019480091799562704, "loss": 0.003, "step": 510 }, { "epoch": 1.26, "learning_rate": 0.00019478028388583, "loss": 0.0005, "step": 511 }, { "epoch": 1.26, "learning_rate": 0.00019475961000798645, "loss": 0.0002, "step": 512 }, { "epoch": 1.27, "learning_rate": 0.00019473889637077073, "loss": 0.0002, "step": 513 }, { "epoch": 1.27, "learning_rate": 0.0001947181429828739, "loss": 0.0594, "step": 514 }, { "epoch": 1.27, "learning_rate": 0.00019469734985300371, "loss": 0.0003, "step": 515 }, { "epoch": 1.27, "learning_rate": 0.00019467651698988462, "loss": 0.0012, "step": 516 }, { "epoch": 1.28, "learning_rate": 0.00019465564440225767, "loss": 0.0003, "step": 517 }, { "epoch": 1.28, "learning_rate": 0.0001946347320988806, "loss": 0.0001, "step": 518 }, { "epoch": 1.28, "learning_rate": 0.00019461378008852785, "loss": 0.0029, "step": 519 }, { "epoch": 1.28, "learning_rate": 0.00019459278837999046, "loss": 0.0017, "step": 520 }, { "epoch": 1.29, "learning_rate": 0.0001945717569820762, "loss": 0.009, "step": 521 }, { "epoch": 1.29, "learning_rate": 0.00019455068590360942, "loss": 0.0014, "step": 522 }, { "epoch": 1.29, "learning_rate": 0.00019452957515343118, "loss": 0.0007, "step": 523 }, { "epoch": 1.29, "learning_rate": 0.00019450842474039913, "loss": 0.0011, "step": 524 }, { "epoch": 1.3, "learning_rate": 0.00019448723467338763, "loss": 0.0279, "step": 525 }, { "epoch": 1.3, "learning_rate": 0.00019446600496128758, "loss": 0.0443, "step": 526 }, { "epoch": 1.3, "learning_rate": 0.00019444473561300668, "loss": 0.0261, "step": 527 }, { "epoch": 1.3, "learning_rate": 0.00019442342663746902, "loss": 0.06, "step": 528 }, { "epoch": 1.31, "learning_rate": 0.00019440207804361553, "loss": 0.0002, "step": 529 }, { "epoch": 1.31, "learning_rate": 0.00019438068984040365, "loss": 0.0514, "step": 530 }, { "epoch": 1.31, "learning_rate": 0.0001943592620368075, "loss": 0.0004, "step": 531 }, { "epoch": 1.31, "learning_rate": 0.00019433779464181778, "loss": 0.0593, "step": 532 }, { "epoch": 1.32, "learning_rate": 0.00019431628766444182, "loss": 0.0122, "step": 533 }, { "epoch": 1.32, "learning_rate": 0.00019429474111370352, "loss": 0.0393, "step": 534 }, { "epoch": 1.32, "learning_rate": 0.00019427315499864344, "loss": 0.062, "step": 535 }, { "epoch": 1.32, "learning_rate": 0.0001942515293283187, "loss": 0.0549, "step": 536 }, { "epoch": 1.33, "learning_rate": 0.000194229864111803, "loss": 0.0005, "step": 537 }, { "epoch": 1.33, "learning_rate": 0.00019420815935818672, "loss": 0.0004, "step": 538 }, { "epoch": 1.33, "learning_rate": 0.00019418641507657673, "loss": 0.0561, "step": 539 }, { "epoch": 1.33, "learning_rate": 0.00019416463127609656, "loss": 0.0574, "step": 540 }, { "epoch": 1.34, "learning_rate": 0.00019414280796588624, "loss": 0.0573, "step": 541 }, { "epoch": 1.34, "learning_rate": 0.00019412094515510248, "loss": 0.0012, "step": 542 }, { "epoch": 1.34, "learning_rate": 0.0001940990428529185, "loss": 0.0064, "step": 543 }, { "epoch": 1.34, "learning_rate": 0.00019407710106852404, "loss": 0.0661, "step": 544 }, { "epoch": 1.35, "learning_rate": 0.0001940551198111255, "loss": 0.0512, "step": 545 }, { "epoch": 1.35, "learning_rate": 0.00019403309908994586, "loss": 0.001, "step": 546 }, { "epoch": 1.35, "learning_rate": 0.00019401103891422455, "loss": 0.042, "step": 547 }, { "epoch": 1.35, "learning_rate": 0.00019398893929321761, "loss": 0.0537, "step": 548 }, { "epoch": 1.36, "learning_rate": 0.00019396680023619765, "loss": 0.2087, "step": 549 }, { "epoch": 1.36, "learning_rate": 0.00019394462175245381, "loss": 0.0287, "step": 550 }, { "epoch": 1.36, "learning_rate": 0.00019392240385129173, "loss": 0.0028, "step": 551 }, { "epoch": 1.36, "learning_rate": 0.00019390014654203369, "loss": 0.0006, "step": 552 }, { "epoch": 1.37, "learning_rate": 0.00019387784983401838, "loss": 0.0008, "step": 553 }, { "epoch": 1.37, "learning_rate": 0.0001938555137366011, "loss": 0.0403, "step": 554 }, { "epoch": 1.37, "learning_rate": 0.0001938331382591537, "loss": 0.0014, "step": 555 }, { "epoch": 1.37, "learning_rate": 0.00019381072341106452, "loss": 0.001, "step": 556 }, { "epoch": 1.38, "learning_rate": 0.00019378826920173837, "loss": 0.0037, "step": 557 }, { "epoch": 1.38, "learning_rate": 0.0001937657756405966, "loss": 0.0008, "step": 558 }, { "epoch": 1.38, "learning_rate": 0.00019374324273707715, "loss": 0.0008, "step": 559 }, { "epoch": 1.38, "learning_rate": 0.00019372067050063438, "loss": 0.0675, "step": 560 }, { "epoch": 1.39, "learning_rate": 0.00019369805894073919, "loss": 0.041, "step": 561 }, { "epoch": 1.39, "learning_rate": 0.00019367540806687893, "loss": 0.0299, "step": 562 }, { "epoch": 1.39, "learning_rate": 0.00019365271788855757, "loss": 0.0006, "step": 563 }, { "epoch": 1.39, "learning_rate": 0.0001936299884152954, "loss": 0.0125, "step": 564 }, { "epoch": 1.4, "learning_rate": 0.00019360721965662933, "loss": 0.032, "step": 565 }, { "epoch": 1.4, "learning_rate": 0.0001935844116221127, "loss": 0.0252, "step": 566 }, { "epoch": 1.4, "learning_rate": 0.00019356156432131534, "loss": 0.0639, "step": 567 }, { "epoch": 1.4, "learning_rate": 0.00019353867776382354, "loss": 0.0005, "step": 568 }, { "epoch": 1.4, "learning_rate": 0.00019351575195924013, "loss": 0.0014, "step": 569 }, { "epoch": 1.41, "learning_rate": 0.00019349278691718427, "loss": 0.0019, "step": 570 }, { "epoch": 1.41, "learning_rate": 0.00019346978264729172, "loss": 0.1464, "step": 571 }, { "epoch": 1.41, "learning_rate": 0.0001934467391592146, "loss": 0.0019, "step": 572 }, { "epoch": 1.41, "learning_rate": 0.00019342365646262156, "loss": 0.0376, "step": 573 }, { "epoch": 1.42, "learning_rate": 0.00019340053456719768, "loss": 0.0007, "step": 574 }, { "epoch": 1.42, "learning_rate": 0.00019337737348264447, "loss": 0.0007, "step": 575 }, { "epoch": 1.42, "learning_rate": 0.00019335417321867987, "loss": 0.001, "step": 576 }, { "epoch": 1.42, "learning_rate": 0.0001933309337850383, "loss": 0.0108, "step": 577 }, { "epoch": 1.43, "learning_rate": 0.0001933076551914706, "loss": 0.0225, "step": 578 }, { "epoch": 1.43, "learning_rate": 0.000193284337447744, "loss": 0.063, "step": 579 }, { "epoch": 1.43, "learning_rate": 0.00019326098056364222, "loss": 0.0476, "step": 580 }, { "epoch": 1.43, "learning_rate": 0.00019323758454896538, "loss": 0.0011, "step": 581 }, { "epoch": 1.44, "learning_rate": 0.00019321414941353003, "loss": 0.0003, "step": 582 }, { "epoch": 1.44, "learning_rate": 0.0001931906751671691, "loss": 0.0262, "step": 583 }, { "epoch": 1.44, "learning_rate": 0.00019316716181973188, "loss": 0.0005, "step": 584 }, { "epoch": 1.44, "learning_rate": 0.00019314360938108425, "loss": 0.084, "step": 585 }, { "epoch": 1.45, "learning_rate": 0.00019312001786110828, "loss": 0.0512, "step": 586 }, { "epoch": 1.45, "learning_rate": 0.0001930963872697026, "loss": 0.0235, "step": 587 }, { "epoch": 1.45, "learning_rate": 0.00019307271761678213, "loss": 0.0002, "step": 588 }, { "epoch": 1.45, "learning_rate": 0.00019304900891227824, "loss": 0.0004, "step": 589 }, { "epoch": 1.46, "learning_rate": 0.00019302526116613864, "loss": 0.0491, "step": 590 }, { "epoch": 1.46, "learning_rate": 0.00019300147438832744, "loss": 0.0595, "step": 591 }, { "epoch": 1.46, "learning_rate": 0.00019297764858882514, "loss": 0.0005, "step": 592 }, { "epoch": 1.46, "learning_rate": 0.00019295378377762862, "loss": 0.0693, "step": 593 }, { "epoch": 1.47, "learning_rate": 0.00019292987996475113, "loss": 0.0024, "step": 594 }, { "epoch": 1.47, "learning_rate": 0.00019290593716022217, "loss": 0.001, "step": 595 }, { "epoch": 1.47, "learning_rate": 0.0001928819553740878, "loss": 0.0134, "step": 596 }, { "epoch": 1.47, "learning_rate": 0.00019285793461641028, "loss": 0.0295, "step": 597 }, { "epoch": 1.48, "learning_rate": 0.00019283387489726827, "loss": 0.0006, "step": 598 }, { "epoch": 1.48, "learning_rate": 0.0001928097762267568, "loss": 0.0009, "step": 599 }, { "epoch": 1.48, "learning_rate": 0.00019278563861498723, "loss": 0.0465, "step": 600 }, { "epoch": 1.48, "eval_loss": 0.017965465784072876, "eval_runtime": 126.0107, "eval_samples_per_second": 1.016, "eval_steps_per_second": 0.341, "step": 600 }, { "epoch": 1.48, "learning_rate": 0.00019276146207208728, "loss": 0.039, "step": 601 }, { "epoch": 1.49, "learning_rate": 0.00019273724660820088, "loss": 0.0005, "step": 602 }, { "epoch": 1.49, "learning_rate": 0.00019271299223348848, "loss": 0.071, "step": 603 }, { "epoch": 1.49, "learning_rate": 0.00019268869895812672, "loss": 0.0007, "step": 604 }, { "epoch": 1.49, "learning_rate": 0.00019266436679230865, "loss": 0.1072, "step": 605 }, { "epoch": 1.5, "learning_rate": 0.00019263999574624355, "loss": 0.0503, "step": 606 }, { "epoch": 1.5, "learning_rate": 0.00019261558583015707, "loss": 0.0451, "step": 607 }, { "epoch": 1.5, "learning_rate": 0.0001925911370542912, "loss": 0.0005, "step": 608 }, { "epoch": 1.5, "learning_rate": 0.00019256664942890413, "loss": 0.0202, "step": 609 }, { "epoch": 1.51, "learning_rate": 0.00019254212296427044, "loss": 0.0401, "step": 610 }, { "epoch": 1.51, "learning_rate": 0.00019251755767068097, "loss": 0.1015, "step": 611 }, { "epoch": 1.51, "learning_rate": 0.00019249295355844285, "loss": 0.0009, "step": 612 }, { "epoch": 1.51, "learning_rate": 0.00019246831063787957, "loss": 0.0007, "step": 613 }, { "epoch": 1.52, "learning_rate": 0.00019244362891933077, "loss": 0.0007, "step": 614 }, { "epoch": 1.52, "learning_rate": 0.00019241890841315248, "loss": 0.0726, "step": 615 }, { "epoch": 1.52, "learning_rate": 0.00019239414912971696, "loss": 0.0022, "step": 616 }, { "epoch": 1.52, "learning_rate": 0.0001923693510794127, "loss": 0.1312, "step": 617 }, { "epoch": 1.53, "learning_rate": 0.0001923445142726446, "loss": 0.0742, "step": 618 }, { "epoch": 1.53, "learning_rate": 0.00019231963871983366, "loss": 0.043, "step": 619 }, { "epoch": 1.53, "learning_rate": 0.0001922947244314172, "loss": 0.0899, "step": 620 }, { "epoch": 1.53, "learning_rate": 0.00019226977141784875, "loss": 0.0004, "step": 621 }, { "epoch": 1.54, "learning_rate": 0.0001922447796895982, "loss": 0.0003, "step": 622 }, { "epoch": 1.54, "learning_rate": 0.0001922197492571516, "loss": 0.0003, "step": 623 }, { "epoch": 1.54, "learning_rate": 0.00019219468013101124, "loss": 0.0015, "step": 624 }, { "epoch": 1.54, "learning_rate": 0.0001921695723216957, "loss": 0.0003, "step": 625 }, { "epoch": 1.55, "learning_rate": 0.00019214442583973966, "loss": 0.001, "step": 626 }, { "epoch": 1.55, "learning_rate": 0.0001921192406956942, "loss": 0.0004, "step": 627 }, { "epoch": 1.55, "learning_rate": 0.00019209401690012653, "loss": 0.0651, "step": 628 }, { "epoch": 1.55, "learning_rate": 0.00019206875446362001, "loss": 0.0938, "step": 629 }, { "epoch": 1.56, "learning_rate": 0.00019204345339677442, "loss": 0.0486, "step": 630 }, { "epoch": 1.56, "learning_rate": 0.0001920181137102055, "loss": 0.0281, "step": 631 }, { "epoch": 1.56, "learning_rate": 0.00019199273541454538, "loss": 0.0003, "step": 632 }, { "epoch": 1.56, "learning_rate": 0.0001919673185204423, "loss": 0.0001, "step": 633 }, { "epoch": 1.57, "learning_rate": 0.00019194186303856067, "loss": 0.0002, "step": 634 }, { "epoch": 1.57, "learning_rate": 0.00019191636897958122, "loss": 0.0003, "step": 635 }, { "epoch": 1.57, "learning_rate": 0.00019189083635420075, "loss": 0.0004, "step": 636 }, { "epoch": 1.57, "learning_rate": 0.00019186526517313225, "loss": 0.0003, "step": 637 }, { "epoch": 1.58, "learning_rate": 0.00019183965544710495, "loss": 0.0611, "step": 638 }, { "epoch": 1.58, "learning_rate": 0.0001918140071868642, "loss": 0.0528, "step": 639 }, { "epoch": 1.58, "learning_rate": 0.00019178832040317155, "loss": 0.0948, "step": 640 }, { "epoch": 1.58, "learning_rate": 0.00019176259510680463, "loss": 0.0002, "step": 641 }, { "epoch": 1.59, "learning_rate": 0.0001917368313085574, "loss": 0.0003, "step": 642 }, { "epoch": 1.59, "learning_rate": 0.0001917110290192398, "loss": 0.0005, "step": 643 }, { "epoch": 1.59, "learning_rate": 0.00019168518824967795, "loss": 0.0415, "step": 644 }, { "epoch": 1.59, "learning_rate": 0.0001916593090107143, "loss": 0.0259, "step": 645 }, { "epoch": 1.6, "learning_rate": 0.00019163339131320718, "loss": 0.0261, "step": 646 }, { "epoch": 1.6, "learning_rate": 0.0001916074351680312, "loss": 0.0001, "step": 647 }, { "epoch": 1.6, "learning_rate": 0.00019158144058607708, "loss": 0.0001, "step": 648 }, { "epoch": 1.6, "learning_rate": 0.00019155540757825168, "loss": 0.0002, "step": 649 }, { "epoch": 1.6, "learning_rate": 0.00019152933615547798, "loss": 0.031, "step": 650 }, { "epoch": 1.61, "learning_rate": 0.00019150322632869497, "loss": 0.0002, "step": 651 }, { "epoch": 1.61, "learning_rate": 0.000191477078108858, "loss": 0.0405, "step": 652 }, { "epoch": 1.61, "learning_rate": 0.00019145089150693822, "loss": 0.0651, "step": 653 }, { "epoch": 1.61, "learning_rate": 0.00019142466653392318, "loss": 0.0001, "step": 654 }, { "epoch": 1.62, "learning_rate": 0.0001913984032008163, "loss": 0.0903, "step": 655 }, { "epoch": 1.62, "learning_rate": 0.0001913721015186372, "loss": 0.0001, "step": 656 }, { "epoch": 1.62, "learning_rate": 0.00019134576149842163, "loss": 0.0271, "step": 657 }, { "epoch": 1.62, "learning_rate": 0.0001913193831512213, "loss": 0.0002, "step": 658 }, { "epoch": 1.63, "learning_rate": 0.00019129296648810412, "loss": 0.0406, "step": 659 }, { "epoch": 1.63, "learning_rate": 0.00019126651152015403, "loss": 0.0484, "step": 660 }, { "epoch": 1.63, "learning_rate": 0.00019124001825847103, "loss": 0.0004, "step": 661 }, { "epoch": 1.63, "learning_rate": 0.0001912134867141712, "loss": 0.0509, "step": 662 }, { "epoch": 1.64, "learning_rate": 0.00019118691689838668, "loss": 0.0436, "step": 663 }, { "epoch": 1.64, "learning_rate": 0.0001911603088222657, "loss": 0.0358, "step": 664 }, { "epoch": 1.64, "learning_rate": 0.0001911336624969725, "loss": 0.0447, "step": 665 }, { "epoch": 1.64, "learning_rate": 0.0001911069779336873, "loss": 0.0439, "step": 666 }, { "epoch": 1.65, "learning_rate": 0.00019108025514360662, "loss": 0.0617, "step": 667 }, { "epoch": 1.65, "learning_rate": 0.00019105349413794272, "loss": 0.0001, "step": 668 }, { "epoch": 1.65, "learning_rate": 0.00019102669492792405, "loss": 0.0001, "step": 669 }, { "epoch": 1.65, "learning_rate": 0.00019099985752479506, "loss": 0.0001, "step": 670 }, { "epoch": 1.66, "learning_rate": 0.00019097298193981624, "loss": 0.0224, "step": 671 }, { "epoch": 1.66, "learning_rate": 0.00019094606818426403, "loss": 0.0407, "step": 672 }, { "epoch": 1.66, "learning_rate": 0.00019091911626943102, "loss": 0.0632, "step": 673 }, { "epoch": 1.66, "learning_rate": 0.00019089212620662568, "loss": 0.0505, "step": 674 }, { "epoch": 1.67, "learning_rate": 0.00019086509800717258, "loss": 0.0001, "step": 675 }, { "epoch": 1.67, "learning_rate": 0.00019083803168241223, "loss": 0.0465, "step": 676 }, { "epoch": 1.67, "learning_rate": 0.00019081092724370114, "loss": 0.0001, "step": 677 }, { "epoch": 1.67, "learning_rate": 0.00019078378470241183, "loss": 0.0312, "step": 678 }, { "epoch": 1.68, "learning_rate": 0.00019075660406993284, "loss": 0.0892, "step": 679 }, { "epoch": 1.68, "learning_rate": 0.00019072938535766865, "loss": 0.0007, "step": 680 }, { "epoch": 1.68, "learning_rate": 0.00019070212857703967, "loss": 0.0002, "step": 681 }, { "epoch": 1.68, "learning_rate": 0.00019067483373948243, "loss": 0.0005, "step": 682 }, { "epoch": 1.69, "learning_rate": 0.00019064750085644926, "loss": 0.0545, "step": 683 }, { "epoch": 1.69, "learning_rate": 0.00019062012993940859, "loss": 0.0501, "step": 684 }, { "epoch": 1.69, "learning_rate": 0.0001905927209998447, "loss": 0.0001, "step": 685 }, { "epoch": 1.69, "learning_rate": 0.00019056527404925789, "loss": 0.1453, "step": 686 }, { "epoch": 1.7, "learning_rate": 0.00019053778909916438, "loss": 0.0001, "step": 687 }, { "epoch": 1.7, "learning_rate": 0.00019051026616109638, "loss": 0.0962, "step": 688 }, { "epoch": 1.7, "learning_rate": 0.00019048270524660196, "loss": 0.0436, "step": 689 }, { "epoch": 1.7, "learning_rate": 0.0001904551063672452, "loss": 0.1079, "step": 690 }, { "epoch": 1.71, "learning_rate": 0.00019042746953460606, "loss": 0.0687, "step": 691 }, { "epoch": 1.71, "learning_rate": 0.00019039979476028043, "loss": 0.0853, "step": 692 }, { "epoch": 1.71, "learning_rate": 0.00019037208205588017, "loss": 0.0943, "step": 693 }, { "epoch": 1.71, "learning_rate": 0.000190344331433033, "loss": 0.0001, "step": 694 }, { "epoch": 1.72, "learning_rate": 0.00019031654290338254, "loss": 0.0375, "step": 695 }, { "epoch": 1.72, "learning_rate": 0.00019028871647858834, "loss": 0.0001, "step": 696 }, { "epoch": 1.72, "learning_rate": 0.00019026085217032593, "loss": 0.0001, "step": 697 }, { "epoch": 1.72, "learning_rate": 0.00019023294999028653, "loss": 0.1158, "step": 698 }, { "epoch": 1.73, "learning_rate": 0.00019020500995017747, "loss": 0.0398, "step": 699 }, { "epoch": 1.73, "learning_rate": 0.00019017703206172185, "loss": 0.0001, "step": 700 }, { "epoch": 1.73, "learning_rate": 0.00019014901633665867, "loss": 0.0001, "step": 701 }, { "epoch": 1.73, "learning_rate": 0.0001901209627867428, "loss": 0.0473, "step": 702 }, { "epoch": 1.74, "learning_rate": 0.000190092871423745, "loss": 0.0528, "step": 703 }, { "epoch": 1.74, "learning_rate": 0.0001900647422594519, "loss": 0.0581, "step": 704 }, { "epoch": 1.74, "learning_rate": 0.0001900365753056659, "loss": 0.0434, "step": 705 }, { "epoch": 1.74, "learning_rate": 0.0001900083705742054, "loss": 0.0001, "step": 706 }, { "epoch": 1.75, "learning_rate": 0.00018998012807690457, "loss": 0.0886, "step": 707 }, { "epoch": 1.75, "learning_rate": 0.00018995184782561345, "loss": 0.0391, "step": 708 }, { "epoch": 1.75, "learning_rate": 0.00018992352983219785, "loss": 0.0006, "step": 709 }, { "epoch": 1.75, "learning_rate": 0.00018989517410853955, "loss": 0.0002, "step": 710 }, { "epoch": 1.76, "learning_rate": 0.00018986678066653601, "loss": 0.0001, "step": 711 }, { "epoch": 1.76, "learning_rate": 0.0001898383495181007, "loss": 0.0006, "step": 712 }, { "epoch": 1.76, "learning_rate": 0.00018980988067516266, "loss": 0.0539, "step": 713 }, { "epoch": 1.76, "learning_rate": 0.00018978137414966698, "loss": 0.0211, "step": 714 }, { "epoch": 1.77, "learning_rate": 0.00018975282995357446, "loss": 0.0355, "step": 715 }, { "epoch": 1.77, "learning_rate": 0.0001897242480988617, "loss": 0.0532, "step": 716 }, { "epoch": 1.77, "learning_rate": 0.0001896956285975211, "loss": 0.0354, "step": 717 }, { "epoch": 1.77, "learning_rate": 0.00018966697146156092, "loss": 0.0004, "step": 718 }, { "epoch": 1.78, "learning_rate": 0.0001896382767030051, "loss": 0.0418, "step": 719 }, { "epoch": 1.78, "learning_rate": 0.00018960954433389345, "loss": 0.0502, "step": 720 }, { "epoch": 1.78, "learning_rate": 0.00018958077436628158, "loss": 0.0004, "step": 721 }, { "epoch": 1.78, "learning_rate": 0.0001895519668122408, "loss": 0.0001, "step": 722 }, { "epoch": 1.79, "learning_rate": 0.00018952312168385823, "loss": 0.0509, "step": 723 }, { "epoch": 1.79, "learning_rate": 0.0001894942389932367, "loss": 0.0003, "step": 724 }, { "epoch": 1.79, "learning_rate": 0.00018946531875249493, "loss": 0.0006, "step": 725 }, { "epoch": 1.79, "learning_rate": 0.00018943636097376726, "loss": 0.019, "step": 726 }, { "epoch": 1.8, "learning_rate": 0.00018940736566920387, "loss": 0.0002, "step": 727 }, { "epoch": 1.8, "learning_rate": 0.00018937833285097066, "loss": 0.0004, "step": 728 }, { "epoch": 1.8, "learning_rate": 0.00018934926253124921, "loss": 0.0258, "step": 729 }, { "epoch": 1.8, "learning_rate": 0.00018932015472223693, "loss": 0.0001, "step": 730 }, { "epoch": 1.8, "learning_rate": 0.0001892910094361469, "loss": 0.0008, "step": 731 }, { "epoch": 1.81, "learning_rate": 0.00018926182668520792, "loss": 0.0479, "step": 732 }, { "epoch": 1.81, "learning_rate": 0.00018923260648166457, "loss": 0.0008, "step": 733 }, { "epoch": 1.81, "learning_rate": 0.0001892033488377771, "loss": 0.1391, "step": 734 }, { "epoch": 1.81, "learning_rate": 0.00018917405376582145, "loss": 0.0029, "step": 735 }, { "epoch": 1.82, "learning_rate": 0.0001891447212780893, "loss": 0.0002, "step": 736 }, { "epoch": 1.82, "learning_rate": 0.00018911535138688802, "loss": 0.0002, "step": 737 }, { "epoch": 1.82, "learning_rate": 0.0001890859441045407, "loss": 0.0877, "step": 738 }, { "epoch": 1.82, "learning_rate": 0.00018905649944338598, "loss": 0.0014, "step": 739 }, { "epoch": 1.83, "learning_rate": 0.0001890270174157784, "loss": 0.0006, "step": 740 }, { "epoch": 1.83, "learning_rate": 0.00018899749803408806, "loss": 0.1397, "step": 741 }, { "epoch": 1.83, "learning_rate": 0.00018896794131070073, "loss": 0.1003, "step": 742 }, { "epoch": 1.83, "learning_rate": 0.00018893834725801782, "loss": 0.0558, "step": 743 }, { "epoch": 1.84, "learning_rate": 0.0001889087158884565, "loss": 0.0016, "step": 744 }, { "epoch": 1.84, "learning_rate": 0.00018887904721444953, "loss": 0.0328, "step": 745 }, { "epoch": 1.84, "learning_rate": 0.00018884934124844532, "loss": 0.0359, "step": 746 }, { "epoch": 1.84, "learning_rate": 0.00018881959800290797, "loss": 0.0856, "step": 747 }, { "epoch": 1.85, "learning_rate": 0.00018878981749031716, "loss": 0.0639, "step": 748 }, { "epoch": 1.85, "learning_rate": 0.00018875999972316825, "loss": 0.0152, "step": 749 }, { "epoch": 1.85, "learning_rate": 0.00018873014471397224, "loss": 0.0043, "step": 750 }, { "epoch": 1.85, "learning_rate": 0.0001887002524752557, "loss": 0.0024, "step": 751 }, { "epoch": 1.86, "learning_rate": 0.00018867032301956088, "loss": 0.0007, "step": 752 }, { "epoch": 1.86, "learning_rate": 0.00018864035635944562, "loss": 0.0005, "step": 753 }, { "epoch": 1.86, "learning_rate": 0.00018861035250748343, "loss": 0.0556, "step": 754 }, { "epoch": 1.86, "learning_rate": 0.00018858031147626325, "loss": 0.0383, "step": 755 }, { "epoch": 1.87, "learning_rate": 0.00018855023327838983, "loss": 0.0003, "step": 756 }, { "epoch": 1.87, "learning_rate": 0.0001885201179264834, "loss": 0.0006, "step": 757 }, { "epoch": 1.87, "learning_rate": 0.00018848996543317982, "loss": 0.0033, "step": 758 }, { "epoch": 1.87, "learning_rate": 0.00018845977581113046, "loss": 0.0003, "step": 759 }, { "epoch": 1.88, "learning_rate": 0.00018842954907300236, "loss": 0.0507, "step": 760 }, { "epoch": 1.88, "learning_rate": 0.00018839928523147812, "loss": 0.181, "step": 761 }, { "epoch": 1.88, "learning_rate": 0.00018836898429925585, "loss": 0.0007, "step": 762 }, { "epoch": 1.88, "learning_rate": 0.0001883386462890493, "loss": 0.0371, "step": 763 }, { "epoch": 1.89, "learning_rate": 0.0001883082712135877, "loss": 0.0005, "step": 764 }, { "epoch": 1.89, "learning_rate": 0.00018827785908561584, "loss": 0.0009, "step": 765 }, { "epoch": 1.89, "learning_rate": 0.00018824740991789415, "loss": 0.0009, "step": 766 }, { "epoch": 1.89, "learning_rate": 0.0001882169237231985, "loss": 0.0014, "step": 767 }, { "epoch": 1.9, "learning_rate": 0.00018818640051432035, "loss": 0.0451, "step": 768 }, { "epoch": 1.9, "learning_rate": 0.00018815584030406664, "loss": 0.0006, "step": 769 }, { "epoch": 1.9, "learning_rate": 0.0001881252431052599, "loss": 0.0046, "step": 770 }, { "epoch": 1.9, "learning_rate": 0.0001880946089307381, "loss": 0.0058, "step": 771 }, { "epoch": 1.91, "learning_rate": 0.00018806393779335483, "loss": 0.0002, "step": 772 }, { "epoch": 1.91, "learning_rate": 0.00018803322970597908, "loss": 0.0012, "step": 773 }, { "epoch": 1.91, "learning_rate": 0.00018800248468149543, "loss": 0.1054, "step": 774 }, { "epoch": 1.91, "learning_rate": 0.00018797170273280388, "loss": 0.0132, "step": 775 }, { "epoch": 1.92, "learning_rate": 0.00018794088387282, "loss": 0.0526, "step": 776 }, { "epoch": 1.92, "learning_rate": 0.00018791002811447481, "loss": 0.0004, "step": 777 }, { "epoch": 1.92, "learning_rate": 0.00018787913547071484, "loss": 0.0421, "step": 778 }, { "epoch": 1.92, "learning_rate": 0.00018784820595450197, "loss": 0.0585, "step": 779 }, { "epoch": 1.93, "learning_rate": 0.00018781723957881372, "loss": 0.0561, "step": 780 }, { "epoch": 1.93, "learning_rate": 0.00018778623635664303, "loss": 0.0618, "step": 781 }, { "epoch": 1.93, "learning_rate": 0.0001877551963009982, "loss": 0.0473, "step": 782 }, { "epoch": 1.93, "learning_rate": 0.00018772411942490313, "loss": 0.0004, "step": 783 }, { "epoch": 1.94, "learning_rate": 0.0001876930057413971, "loss": 0.001, "step": 784 }, { "epoch": 1.94, "learning_rate": 0.0001876618552635348, "loss": 0.0439, "step": 785 }, { "epoch": 1.94, "learning_rate": 0.00018763066800438636, "loss": 0.068, "step": 786 }, { "epoch": 1.94, "learning_rate": 0.00018759944397703747, "loss": 0.035, "step": 787 }, { "epoch": 1.95, "learning_rate": 0.00018756818319458907, "loss": 0.0005, "step": 788 }, { "epoch": 1.95, "learning_rate": 0.0001875368856701576, "loss": 0.0472, "step": 789 }, { "epoch": 1.95, "learning_rate": 0.000187505551416875, "loss": 0.0495, "step": 790 }, { "epoch": 1.95, "learning_rate": 0.00018747418044788846, "loss": 0.0657, "step": 791 }, { "epoch": 1.96, "learning_rate": 0.0001874427727763607, "loss": 0.0005, "step": 792 }, { "epoch": 1.96, "learning_rate": 0.0001874113284154698, "loss": 0.076, "step": 793 }, { "epoch": 1.96, "learning_rate": 0.0001873798473784092, "loss": 0.0581, "step": 794 }, { "epoch": 1.96, "learning_rate": 0.00018734832967838775, "loss": 0.0007, "step": 795 }, { "epoch": 1.97, "learning_rate": 0.00018731677532862976, "loss": 0.0363, "step": 796 }, { "epoch": 1.97, "learning_rate": 0.00018728518434237473, "loss": 0.0651, "step": 797 }, { "epoch": 1.97, "learning_rate": 0.00018725355673287778, "loss": 0.0649, "step": 798 }, { "epoch": 1.97, "learning_rate": 0.0001872218925134092, "loss": 0.0004, "step": 799 }, { "epoch": 1.98, "learning_rate": 0.00018719019169725472, "loss": 0.0002, "step": 800 }, { "epoch": 1.98, "eval_loss": 0.014746900647878647, "eval_runtime": 126.097, "eval_samples_per_second": 1.015, "eval_steps_per_second": 0.341, "step": 800 }, { "epoch": 1.98, "learning_rate": 0.0001871584542977154, "loss": 0.0467, "step": 801 }, { "epoch": 1.98, "learning_rate": 0.00018712668032810768, "loss": 0.0519, "step": 802 }, { "epoch": 1.98, "learning_rate": 0.0001870948698017633, "loss": 0.0105, "step": 803 }, { "epoch": 1.99, "learning_rate": 0.00018706302273202943, "loss": 0.0003, "step": 804 }, { "epoch": 1.99, "learning_rate": 0.00018703113913226847, "loss": 0.0297, "step": 805 }, { "epoch": 1.99, "learning_rate": 0.00018699921901585813, "loss": 0.1403, "step": 806 }, { "epoch": 1.99, "learning_rate": 0.0001869672623961916, "loss": 0.0525, "step": 807 }, { "epoch": 2.0, "learning_rate": 0.0001869352692866772, "loss": 0.0691, "step": 808 }, { "epoch": 2.0, "learning_rate": 0.00018690323970073873, "loss": 0.0535, "step": 809 }, { "epoch": 2.0, "learning_rate": 0.00018687117365181512, "loss": 0.0336, "step": 810 }, { "epoch": 2.0, "learning_rate": 0.00018683907115336074, "loss": 0.0262, "step": 811 }, { "epoch": 2.0, "learning_rate": 0.00018680693221884517, "loss": 0.0004, "step": 812 }, { "epoch": 2.01, "learning_rate": 0.00018677475686175338, "loss": 0.0003, "step": 813 }, { "epoch": 2.01, "learning_rate": 0.00018674254509558544, "loss": 0.0402, "step": 814 }, { "epoch": 2.01, "learning_rate": 0.0001867102969338569, "loss": 0.0004, "step": 815 }, { "epoch": 2.01, "learning_rate": 0.00018667801239009846, "loss": 0.0005, "step": 816 }, { "epoch": 2.02, "learning_rate": 0.00018664569147785613, "loss": 0.005, "step": 817 }, { "epoch": 2.02, "learning_rate": 0.00018661333421069113, "loss": 0.0008, "step": 818 }, { "epoch": 2.02, "learning_rate": 0.00018658094060217999, "loss": 0.0608, "step": 819 }, { "epoch": 2.02, "learning_rate": 0.00018654851066591448, "loss": 0.031, "step": 820 }, { "epoch": 2.03, "learning_rate": 0.00018651604441550154, "loss": 0.0024, "step": 821 }, { "epoch": 2.03, "learning_rate": 0.00018648354186456348, "loss": 0.0726, "step": 822 }, { "epoch": 2.03, "learning_rate": 0.00018645100302673774, "loss": 0.0463, "step": 823 }, { "epoch": 2.03, "learning_rate": 0.000186418427915677, "loss": 0.0032, "step": 824 }, { "epoch": 2.04, "learning_rate": 0.0001863858165450492, "loss": 0.0004, "step": 825 }, { "epoch": 2.04, "learning_rate": 0.00018635316892853741, "loss": 0.0196, "step": 826 }, { "epoch": 2.04, "learning_rate": 0.00018632048507984, "loss": 0.022, "step": 827 }, { "epoch": 2.04, "learning_rate": 0.00018628776501267052, "loss": 0.0009, "step": 828 }, { "epoch": 2.05, "learning_rate": 0.0001862550087407577, "loss": 0.1024, "step": 829 }, { "epoch": 2.05, "learning_rate": 0.0001862222162778454, "loss": 0.0796, "step": 830 }, { "epoch": 2.05, "learning_rate": 0.00018618938763769282, "loss": 0.0004, "step": 831 }, { "epoch": 2.05, "learning_rate": 0.0001861565228340742, "loss": 0.0004, "step": 832 }, { "epoch": 2.06, "learning_rate": 0.00018612362188077898, "loss": 0.1187, "step": 833 }, { "epoch": 2.06, "learning_rate": 0.00018609068479161182, "loss": 0.002, "step": 834 }, { "epoch": 2.06, "learning_rate": 0.00018605771158039253, "loss": 0.0931, "step": 835 }, { "epoch": 2.06, "learning_rate": 0.00018602470226095603, "loss": 0.1085, "step": 836 }, { "epoch": 2.07, "learning_rate": 0.0001859916568471524, "loss": 0.0022, "step": 837 }, { "epoch": 2.07, "learning_rate": 0.00018595857535284692, "loss": 0.0291, "step": 838 }, { "epoch": 2.07, "learning_rate": 0.0001859254577919199, "loss": 0.032, "step": 839 }, { "epoch": 2.07, "learning_rate": 0.00018589230417826697, "loss": 0.0046, "step": 840 }, { "epoch": 2.08, "learning_rate": 0.0001858591145257987, "loss": 0.0018, "step": 841 }, { "epoch": 2.08, "learning_rate": 0.00018582588884844084, "loss": 0.1096, "step": 842 }, { "epoch": 2.08, "learning_rate": 0.0001857926271601343, "loss": 0.0547, "step": 843 }, { "epoch": 2.08, "learning_rate": 0.00018575932947483502, "loss": 0.0151, "step": 844 }, { "epoch": 2.09, "learning_rate": 0.00018572599580651415, "loss": 0.071, "step": 845 }, { "epoch": 2.09, "learning_rate": 0.00018569262616915784, "loss": 0.0062, "step": 846 }, { "epoch": 2.09, "learning_rate": 0.00018565922057676737, "loss": 0.0039, "step": 847 }, { "epoch": 2.09, "learning_rate": 0.00018562577904335912, "loss": 0.0017, "step": 848 }, { "epoch": 2.1, "learning_rate": 0.00018559230158296454, "loss": 0.0013, "step": 849 }, { "epoch": 2.1, "learning_rate": 0.00018555878820963013, "loss": 0.0011, "step": 850 }, { "epoch": 2.1, "learning_rate": 0.00018552523893741748, "loss": 0.0013, "step": 851 }, { "epoch": 2.1, "learning_rate": 0.00018549165378040327, "loss": 0.005, "step": 852 }, { "epoch": 2.11, "learning_rate": 0.0001854580327526792, "loss": 0.0379, "step": 853 }, { "epoch": 2.11, "learning_rate": 0.00018542437586835202, "loss": 0.0847, "step": 854 }, { "epoch": 2.11, "learning_rate": 0.00018539068314154354, "loss": 0.0017, "step": 855 }, { "epoch": 2.11, "learning_rate": 0.00018535695458639056, "loss": 0.0721, "step": 856 }, { "epoch": 2.12, "learning_rate": 0.000185323190217045, "loss": 0.0006, "step": 857 }, { "epoch": 2.12, "learning_rate": 0.00018528939004767376, "loss": 0.0003, "step": 858 }, { "epoch": 2.12, "learning_rate": 0.00018525555409245877, "loss": 0.0005, "step": 859 }, { "epoch": 2.12, "learning_rate": 0.00018522168236559695, "loss": 0.0008, "step": 860 }, { "epoch": 2.13, "learning_rate": 0.00018518777488130023, "loss": 0.0466, "step": 861 }, { "epoch": 2.13, "learning_rate": 0.0001851538316537956, "loss": 0.0372, "step": 862 }, { "epoch": 2.13, "learning_rate": 0.00018511985269732497, "loss": 0.038, "step": 863 }, { "epoch": 2.13, "learning_rate": 0.0001850858380261453, "loss": 0.0013, "step": 864 }, { "epoch": 2.14, "learning_rate": 0.00018505178765452853, "loss": 0.0304, "step": 865 }, { "epoch": 2.14, "learning_rate": 0.00018501770159676156, "loss": 0.0465, "step": 866 }, { "epoch": 2.14, "learning_rate": 0.00018498357986714622, "loss": 0.0424, "step": 867 }, { "epoch": 2.14, "learning_rate": 0.0001849494224799994, "loss": 0.0705, "step": 868 }, { "epoch": 2.15, "learning_rate": 0.0001849152294496529, "loss": 0.0002, "step": 869 }, { "epoch": 2.15, "learning_rate": 0.00018488100079045344, "loss": 0.0009, "step": 870 }, { "epoch": 2.15, "learning_rate": 0.00018484673651676282, "loss": 0.0002, "step": 871 }, { "epoch": 2.15, "learning_rate": 0.0001848124366429576, "loss": 0.0455, "step": 872 }, { "epoch": 2.16, "learning_rate": 0.0001847781011834294, "loss": 0.0505, "step": 873 }, { "epoch": 2.16, "learning_rate": 0.00018474373015258473, "loss": 0.0386, "step": 874 }, { "epoch": 2.16, "learning_rate": 0.00018470932356484508, "loss": 0.0509, "step": 875 }, { "epoch": 2.16, "learning_rate": 0.0001846748814346468, "loss": 0.0003, "step": 876 }, { "epoch": 2.17, "learning_rate": 0.0001846404037764411, "loss": 0.0351, "step": 877 }, { "epoch": 2.17, "learning_rate": 0.00018460589060469425, "loss": 0.0003, "step": 878 }, { "epoch": 2.17, "learning_rate": 0.0001845713419338873, "loss": 0.0002, "step": 879 }, { "epoch": 2.17, "learning_rate": 0.00018453675777851627, "loss": 0.0037, "step": 880 }, { "epoch": 2.18, "learning_rate": 0.00018450213815309198, "loss": 0.0442, "step": 881 }, { "epoch": 2.18, "learning_rate": 0.00018446748307214019, "loss": 0.035, "step": 882 }, { "epoch": 2.18, "learning_rate": 0.00018443279255020152, "loss": 0.0875, "step": 883 }, { "epoch": 2.18, "learning_rate": 0.0001843980666018315, "loss": 0.0418, "step": 884 }, { "epoch": 2.19, "learning_rate": 0.00018436330524160047, "loss": 0.0009, "step": 885 }, { "epoch": 2.19, "learning_rate": 0.00018432850848409363, "loss": 0.0089, "step": 886 }, { "epoch": 2.19, "learning_rate": 0.00018429367634391114, "loss": 0.0017, "step": 887 }, { "epoch": 2.19, "learning_rate": 0.00018425880883566782, "loss": 0.0004, "step": 888 }, { "epoch": 2.2, "learning_rate": 0.00018422390597399349, "loss": 0.0464, "step": 889 }, { "epoch": 2.2, "learning_rate": 0.0001841889677735327, "loss": 0.0027, "step": 890 }, { "epoch": 2.2, "learning_rate": 0.00018415399424894492, "loss": 0.0004, "step": 891 }, { "epoch": 2.2, "learning_rate": 0.00018411898541490434, "loss": 0.0928, "step": 892 }, { "epoch": 2.2, "learning_rate": 0.00018408394128610001, "loss": 0.0042, "step": 893 }, { "epoch": 2.21, "learning_rate": 0.0001840488618772359, "loss": 0.002, "step": 894 }, { "epoch": 2.21, "learning_rate": 0.00018401374720303056, "loss": 0.0002, "step": 895 }, { "epoch": 2.21, "learning_rate": 0.00018397859727821748, "loss": 0.0004, "step": 896 }, { "epoch": 2.21, "learning_rate": 0.00018394341211754495, "loss": 0.0987, "step": 897 }, { "epoch": 2.22, "learning_rate": 0.00018390819173577598, "loss": 0.0004, "step": 898 }, { "epoch": 2.22, "learning_rate": 0.0001838729361476884, "loss": 0.0002, "step": 899 }, { "epoch": 2.22, "learning_rate": 0.00018383764536807485, "loss": 0.0002, "step": 900 }, { "epoch": 2.22, "learning_rate": 0.00018380231941174258, "loss": 0.0422, "step": 901 }, { "epoch": 2.23, "learning_rate": 0.00018376695829351377, "loss": 0.0004, "step": 902 }, { "epoch": 2.23, "learning_rate": 0.0001837315620282253, "loss": 0.0007, "step": 903 }, { "epoch": 2.23, "learning_rate": 0.00018369613063072874, "loss": 0.0231, "step": 904 }, { "epoch": 2.23, "learning_rate": 0.0001836606641158905, "loss": 0.053, "step": 905 }, { "epoch": 2.24, "learning_rate": 0.00018362516249859163, "loss": 0.0003, "step": 906 }, { "epoch": 2.24, "learning_rate": 0.00018358962579372796, "loss": 0.0403, "step": 907 }, { "epoch": 2.24, "learning_rate": 0.00018355405401621001, "loss": 0.0319, "step": 908 }, { "epoch": 2.24, "learning_rate": 0.0001835184471809631, "loss": 0.0402, "step": 909 }, { "epoch": 2.25, "learning_rate": 0.00018348280530292713, "loss": 0.0628, "step": 910 }, { "epoch": 2.25, "learning_rate": 0.0001834471283970568, "loss": 0.0937, "step": 911 }, { "epoch": 2.25, "learning_rate": 0.00018341141647832147, "loss": 0.0005, "step": 912 }, { "epoch": 2.25, "learning_rate": 0.00018337566956170523, "loss": 0.05, "step": 913 }, { "epoch": 2.26, "learning_rate": 0.00018333988766220676, "loss": 0.0202, "step": 914 }, { "epoch": 2.26, "learning_rate": 0.00018330407079483952, "loss": 0.0008, "step": 915 }, { "epoch": 2.26, "learning_rate": 0.0001832682189746316, "loss": 0.0003, "step": 916 }, { "epoch": 2.26, "learning_rate": 0.00018323233221662573, "loss": 0.0037, "step": 917 }, { "epoch": 2.27, "learning_rate": 0.00018319641053587938, "loss": 0.0417, "step": 918 }, { "epoch": 2.27, "learning_rate": 0.0001831604539474646, "loss": 0.0269, "step": 919 }, { "epoch": 2.27, "learning_rate": 0.0001831244624664681, "loss": 0.048, "step": 920 }, { "epoch": 2.27, "learning_rate": 0.0001830884361079912, "loss": 0.0801, "step": 921 }, { "epoch": 2.28, "learning_rate": 0.00018305237488714995, "loss": 0.0001, "step": 922 }, { "epoch": 2.28, "learning_rate": 0.00018301627881907494, "loss": 0.0425, "step": 923 }, { "epoch": 2.28, "learning_rate": 0.00018298014791891137, "loss": 0.0323, "step": 924 }, { "epoch": 2.28, "learning_rate": 0.00018294398220181917, "loss": 0.0631, "step": 925 }, { "epoch": 2.29, "learning_rate": 0.00018290778168297277, "loss": 0.0005, "step": 926 }, { "epoch": 2.29, "learning_rate": 0.00018287154637756125, "loss": 0.0007, "step": 927 }, { "epoch": 2.29, "learning_rate": 0.00018283527630078825, "loss": 0.0237, "step": 928 }, { "epoch": 2.29, "learning_rate": 0.00018279897146787204, "loss": 0.0605, "step": 929 }, { "epoch": 2.3, "learning_rate": 0.0001827626318940454, "loss": 0.0871, "step": 930 }, { "epoch": 2.3, "learning_rate": 0.00018272625759455582, "loss": 0.003, "step": 931 }, { "epoch": 2.3, "learning_rate": 0.00018268984858466522, "loss": 0.0001, "step": 932 }, { "epoch": 2.3, "learning_rate": 0.00018265340487965017, "loss": 0.0586, "step": 933 }, { "epoch": 2.31, "learning_rate": 0.00018261692649480175, "loss": 0.0459, "step": 934 }, { "epoch": 2.31, "learning_rate": 0.00018258041344542566, "loss": 0.0005, "step": 935 }, { "epoch": 2.31, "learning_rate": 0.00018254386574684204, "loss": 0.027, "step": 936 }, { "epoch": 2.31, "learning_rate": 0.00018250728341438568, "loss": 0.0001, "step": 937 }, { "epoch": 2.32, "learning_rate": 0.0001824706664634058, "loss": 0.0824, "step": 938 }, { "epoch": 2.32, "learning_rate": 0.0001824340149092662, "loss": 0.0112, "step": 939 }, { "epoch": 2.32, "learning_rate": 0.00018239732876734527, "loss": 0.0007, "step": 940 }, { "epoch": 2.32, "learning_rate": 0.0001823606080530357, "loss": 0.0004, "step": 941 }, { "epoch": 2.33, "learning_rate": 0.0001823238527817449, "loss": 0.1245, "step": 942 }, { "epoch": 2.33, "learning_rate": 0.0001822870629688947, "loss": 0.0796, "step": 943 }, { "epoch": 2.33, "learning_rate": 0.00018225023862992142, "loss": 0.0712, "step": 944 }, { "epoch": 2.33, "learning_rate": 0.00018221337978027583, "loss": 0.033, "step": 945 }, { "epoch": 2.34, "learning_rate": 0.00018217648643542323, "loss": 0.0025, "step": 946 }, { "epoch": 2.34, "learning_rate": 0.00018213955861084343, "loss": 0.0002, "step": 947 }, { "epoch": 2.34, "learning_rate": 0.0001821025963220306, "loss": 0.0015, "step": 948 }, { "epoch": 2.34, "learning_rate": 0.0001820655995844935, "loss": 0.0024, "step": 949 }, { "epoch": 2.35, "learning_rate": 0.00018202856841375518, "loss": 0.0435, "step": 950 }, { "epoch": 2.35, "learning_rate": 0.00018199150282535332, "loss": 0.0511, "step": 951 }, { "epoch": 2.35, "learning_rate": 0.00018195440283483988, "loss": 0.0627, "step": 952 }, { "epoch": 2.35, "learning_rate": 0.0001819172684577814, "loss": 0.0885, "step": 953 }, { "epoch": 2.36, "learning_rate": 0.0001818800997097587, "loss": 0.0628, "step": 954 }, { "epoch": 2.36, "learning_rate": 0.00018184289660636715, "loss": 0.0008, "step": 955 }, { "epoch": 2.36, "learning_rate": 0.00018180565916321647, "loss": 0.0005, "step": 956 }, { "epoch": 2.36, "learning_rate": 0.00018176838739593078, "loss": 0.0304, "step": 957 }, { "epoch": 2.37, "learning_rate": 0.0001817310813201486, "loss": 0.0297, "step": 958 }, { "epoch": 2.37, "learning_rate": 0.00018169374095152295, "loss": 0.001, "step": 959 }, { "epoch": 2.37, "learning_rate": 0.0001816563663057211, "loss": 0.0022, "step": 960 }, { "epoch": 2.37, "learning_rate": 0.00018161895739842476, "loss": 0.0411, "step": 961 }, { "epoch": 2.38, "learning_rate": 0.00018158151424533002, "loss": 0.0012, "step": 962 }, { "epoch": 2.38, "learning_rate": 0.0001815440368621473, "loss": 0.0343, "step": 963 }, { "epoch": 2.38, "learning_rate": 0.00018150652526460146, "loss": 0.0418, "step": 964 }, { "epoch": 2.38, "learning_rate": 0.00018146897946843163, "loss": 0.1491, "step": 965 }, { "epoch": 2.39, "learning_rate": 0.00018143139948939137, "loss": 0.0002, "step": 966 }, { "epoch": 2.39, "learning_rate": 0.00018139378534324848, "loss": 0.0006, "step": 967 }, { "epoch": 2.39, "learning_rate": 0.00018135613704578526, "loss": 0.0004, "step": 968 }, { "epoch": 2.39, "learning_rate": 0.00018131845461279812, "loss": 0.0704, "step": 969 }, { "epoch": 2.4, "learning_rate": 0.000181280738060098, "loss": 0.0002, "step": 970 }, { "epoch": 2.4, "learning_rate": 0.00018124298740351003, "loss": 0.0001, "step": 971 }, { "epoch": 2.4, "learning_rate": 0.00018120520265887363, "loss": 0.0025, "step": 972 }, { "epoch": 2.4, "learning_rate": 0.00018116738384204266, "loss": 0.0254, "step": 973 }, { "epoch": 2.4, "learning_rate": 0.00018112953096888516, "loss": 0.0003, "step": 974 }, { "epoch": 2.41, "learning_rate": 0.0001810916440552835, "loss": 0.1252, "step": 975 }, { "epoch": 2.41, "learning_rate": 0.00018105372311713432, "loss": 0.0005, "step": 976 }, { "epoch": 2.41, "learning_rate": 0.0001810157681703485, "loss": 0.0002, "step": 977 }, { "epoch": 2.41, "learning_rate": 0.0001809777792308513, "loss": 0.024, "step": 978 }, { "epoch": 2.42, "learning_rate": 0.00018093975631458217, "loss": 0.0001, "step": 979 }, { "epoch": 2.42, "learning_rate": 0.00018090169943749476, "loss": 0.0566, "step": 980 }, { "epoch": 2.42, "learning_rate": 0.00018086360861555706, "loss": 0.0003, "step": 981 }, { "epoch": 2.42, "learning_rate": 0.0001808254838647513, "loss": 0.0443, "step": 982 }, { "epoch": 2.43, "learning_rate": 0.00018078732520107385, "loss": 0.0001, "step": 983 }, { "epoch": 2.43, "learning_rate": 0.00018074913264053545, "loss": 0.0002, "step": 984 }, { "epoch": 2.43, "learning_rate": 0.00018071090619916093, "loss": 0.0553, "step": 985 }, { "epoch": 2.43, "learning_rate": 0.00018067264589298945, "loss": 0.0981, "step": 986 }, { "epoch": 2.44, "learning_rate": 0.0001806343517380743, "loss": 0.001, "step": 987 }, { "epoch": 2.44, "learning_rate": 0.00018059602375048293, "loss": 0.0004, "step": 988 }, { "epoch": 2.44, "learning_rate": 0.00018055766194629715, "loss": 0.0001, "step": 989 }, { "epoch": 2.44, "learning_rate": 0.00018051926634161282, "loss": 0.0004, "step": 990 }, { "epoch": 2.45, "learning_rate": 0.00018048083695254005, "loss": 0.0494, "step": 991 }, { "epoch": 2.45, "learning_rate": 0.00018044237379520305, "loss": 0.089, "step": 992 }, { "epoch": 2.45, "learning_rate": 0.00018040387688574025, "loss": 0.0001, "step": 993 }, { "epoch": 2.45, "learning_rate": 0.0001803653462403043, "loss": 0.056, "step": 994 }, { "epoch": 2.46, "learning_rate": 0.00018032678187506187, "loss": 0.0343, "step": 995 }, { "epoch": 2.46, "learning_rate": 0.0001802881838061939, "loss": 0.041, "step": 996 }, { "epoch": 2.46, "learning_rate": 0.00018024955204989538, "loss": 0.0631, "step": 997 }, { "epoch": 2.46, "learning_rate": 0.00018021088662237552, "loss": 0.0001, "step": 998 }, { "epoch": 2.47, "learning_rate": 0.0001801721875398576, "loss": 0.0002, "step": 999 }, { "epoch": 2.47, "learning_rate": 0.00018013345481857903, "loss": 0.0001, "step": 1000 }, { "epoch": 2.47, "eval_loss": 0.016305094584822655, "eval_runtime": 126.0835, "eval_samples_per_second": 1.015, "eval_steps_per_second": 0.341, "step": 1000 }, { "epoch": 2.47, "learning_rate": 0.0001800946884747913, "loss": 0.0365, "step": 1001 }, { "epoch": 2.47, "learning_rate": 0.00018005588852476015, "loss": 0.0001, "step": 1002 }, { "epoch": 2.48, "learning_rate": 0.00018001705498476523, "loss": 0.0412, "step": 1003 }, { "epoch": 2.48, "learning_rate": 0.00017997818787110042, "loss": 0.0001, "step": 1004 }, { "epoch": 2.48, "learning_rate": 0.0001799392872000736, "loss": 0.0001, "step": 1005 }, { "epoch": 2.48, "learning_rate": 0.0001799003529880068, "loss": 0.0, "step": 1006 }, { "epoch": 2.49, "learning_rate": 0.00017986138525123607, "loss": 0.0739, "step": 1007 }, { "epoch": 2.49, "learning_rate": 0.0001798223840061116, "loss": 0.0001, "step": 1008 }, { "epoch": 2.49, "learning_rate": 0.0001797833492689975, "loss": 0.0863, "step": 1009 }, { "epoch": 2.49, "learning_rate": 0.00017974428105627208, "loss": 0.0002, "step": 1010 }, { "epoch": 2.5, "learning_rate": 0.00017970517938432765, "loss": 0.0538, "step": 1011 }, { "epoch": 2.5, "learning_rate": 0.00017966604426957047, "loss": 0.0001, "step": 1012 }, { "epoch": 2.5, "learning_rate": 0.00017962687572842102, "loss": 0.034, "step": 1013 }, { "epoch": 2.5, "learning_rate": 0.00017958767377731358, "loss": 0.0001, "step": 1014 }, { "epoch": 2.51, "learning_rate": 0.00017954843843269664, "loss": 0.0001, "step": 1015 }, { "epoch": 2.51, "learning_rate": 0.00017950916971103259, "loss": 0.0001, "step": 1016 }, { "epoch": 2.51, "learning_rate": 0.00017946986762879785, "loss": 0.0002, "step": 1017 }, { "epoch": 2.51, "learning_rate": 0.00017943053220248283, "loss": 0.0489, "step": 1018 }, { "epoch": 2.52, "learning_rate": 0.000179391163448592, "loss": 0.0002, "step": 1019 }, { "epoch": 2.52, "learning_rate": 0.0001793517613836437, "loss": 0.0003, "step": 1020 }, { "epoch": 2.52, "learning_rate": 0.00017931232602417033, "loss": 0.0003, "step": 1021 }, { "epoch": 2.52, "learning_rate": 0.00017927285738671825, "loss": 0.0641, "step": 1022 }, { "epoch": 2.53, "learning_rate": 0.00017923335548784773, "loss": 0.0714, "step": 1023 }, { "epoch": 2.53, "learning_rate": 0.00017919382034413305, "loss": 0.0004, "step": 1024 }, { "epoch": 2.53, "learning_rate": 0.00017915425197216245, "loss": 0.0272, "step": 1025 }, { "epoch": 2.53, "learning_rate": 0.00017911465038853805, "loss": 0.0003, "step": 1026 }, { "epoch": 2.54, "learning_rate": 0.00017907501560987594, "loss": 0.0003, "step": 1027 }, { "epoch": 2.54, "learning_rate": 0.00017903534765280614, "loss": 0.0001, "step": 1028 }, { "epoch": 2.54, "learning_rate": 0.00017899564653397262, "loss": 0.0357, "step": 1029 }, { "epoch": 2.54, "learning_rate": 0.00017895591227003315, "loss": 0.0015, "step": 1030 }, { "epoch": 2.55, "learning_rate": 0.00017891614487765959, "loss": 0.0957, "step": 1031 }, { "epoch": 2.55, "learning_rate": 0.00017887634437353754, "loss": 0.0, "step": 1032 }, { "epoch": 2.55, "learning_rate": 0.00017883651077436655, "loss": 0.0377, "step": 1033 }, { "epoch": 2.55, "learning_rate": 0.00017879664409686008, "loss": 0.0415, "step": 1034 }, { "epoch": 2.56, "learning_rate": 0.00017875674435774547, "loss": 0.0002, "step": 1035 }, { "epoch": 2.56, "learning_rate": 0.00017871681157376383, "loss": 0.0489, "step": 1036 }, { "epoch": 2.56, "learning_rate": 0.0001786768457616703, "loss": 0.0478, "step": 1037 }, { "epoch": 2.56, "learning_rate": 0.00017863684693823374, "loss": 0.0675, "step": 1038 }, { "epoch": 2.57, "learning_rate": 0.00017859681512023693, "loss": 0.0002, "step": 1039 }, { "epoch": 2.57, "learning_rate": 0.00017855675032447648, "loss": 0.06, "step": 1040 }, { "epoch": 2.57, "learning_rate": 0.00017851665256776283, "loss": 0.1278, "step": 1041 }, { "epoch": 2.57, "learning_rate": 0.00017847652186692026, "loss": 0.0002, "step": 1042 }, { "epoch": 2.58, "learning_rate": 0.00017843635823878686, "loss": 0.0, "step": 1043 }, { "epoch": 2.58, "learning_rate": 0.00017839616170021452, "loss": 0.0441, "step": 1044 }, { "epoch": 2.58, "learning_rate": 0.00017835593226806903, "loss": 0.0, "step": 1045 }, { "epoch": 2.58, "learning_rate": 0.00017831566995922985, "loss": 0.0446, "step": 1046 }, { "epoch": 2.59, "learning_rate": 0.00017827537479059026, "loss": 0.0006, "step": 1047 }, { "epoch": 2.59, "learning_rate": 0.0001782350467790575, "loss": 0.0002, "step": 1048 }, { "epoch": 2.59, "learning_rate": 0.00017819468594155232, "loss": 0.0002, "step": 1049 }, { "epoch": 2.59, "learning_rate": 0.00017815429229500946, "loss": 0.0001, "step": 1050 }, { "epoch": 2.6, "learning_rate": 0.00017811386585637727, "loss": 0.0392, "step": 1051 }, { "epoch": 2.6, "learning_rate": 0.00017807340664261802, "loss": 0.0756, "step": 1052 }, { "epoch": 2.6, "learning_rate": 0.0001780329146707076, "loss": 0.0001, "step": 1053 }, { "epoch": 2.6, "learning_rate": 0.00017799238995763568, "loss": 0.0509, "step": 1054 }, { "epoch": 2.6, "learning_rate": 0.00017795183252040567, "loss": 0.0, "step": 1055 }, { "epoch": 2.61, "learning_rate": 0.00017791124237603477, "loss": 0.1026, "step": 1056 }, { "epoch": 2.61, "learning_rate": 0.00017787061954155378, "loss": 0.0004, "step": 1057 }, { "epoch": 2.61, "learning_rate": 0.00017782996403400736, "loss": 0.0395, "step": 1058 }, { "epoch": 2.61, "learning_rate": 0.00017778927587045373, "loss": 0.029, "step": 1059 }, { "epoch": 2.62, "learning_rate": 0.00017774855506796496, "loss": 0.0002, "step": 1060 }, { "epoch": 2.62, "learning_rate": 0.00017770780164362665, "loss": 0.0211, "step": 1061 }, { "epoch": 2.62, "learning_rate": 0.0001776670156145383, "loss": 0.0596, "step": 1062 }, { "epoch": 2.62, "learning_rate": 0.00017762619699781287, "loss": 0.0001, "step": 1063 }, { "epoch": 2.63, "learning_rate": 0.00017758534581057718, "loss": 0.0499, "step": 1064 }, { "epoch": 2.63, "learning_rate": 0.0001775444620699715, "loss": 0.043, "step": 1065 }, { "epoch": 2.63, "learning_rate": 0.00017750354579315004, "loss": 0.0527, "step": 1066 }, { "epoch": 2.63, "learning_rate": 0.00017746259699728042, "loss": 0.0855, "step": 1067 }, { "epoch": 2.64, "learning_rate": 0.00017742161569954398, "loss": 0.0002, "step": 1068 }, { "epoch": 2.64, "learning_rate": 0.0001773806019171358, "loss": 0.076, "step": 1069 }, { "epoch": 2.64, "learning_rate": 0.0001773395556672644, "loss": 0.03, "step": 1070 }, { "epoch": 2.64, "learning_rate": 0.0001772984769671521, "loss": 0.0593, "step": 1071 }, { "epoch": 2.65, "learning_rate": 0.0001772573658340347, "loss": 0.0493, "step": 1072 }, { "epoch": 2.65, "learning_rate": 0.0001772162222851617, "loss": 0.0007, "step": 1073 }, { "epoch": 2.65, "learning_rate": 0.0001771750463377962, "loss": 0.0033, "step": 1074 }, { "epoch": 2.65, "learning_rate": 0.00017713383800921478, "loss": 0.0019, "step": 1075 }, { "epoch": 2.66, "learning_rate": 0.00017709259731670774, "loss": 0.0031, "step": 1076 }, { "epoch": 2.66, "learning_rate": 0.00017705132427757895, "loss": 0.0028, "step": 1077 }, { "epoch": 2.66, "learning_rate": 0.00017701001890914572, "loss": 0.0003, "step": 1078 }, { "epoch": 2.66, "learning_rate": 0.00017696868122873909, "loss": 0.0786, "step": 1079 }, { "epoch": 2.67, "learning_rate": 0.00017692731125370354, "loss": 0.0496, "step": 1080 }, { "epoch": 2.67, "learning_rate": 0.00017688590900139715, "loss": 0.1294, "step": 1081 }, { "epoch": 2.67, "learning_rate": 0.00017684447448919154, "loss": 0.0002, "step": 1082 }, { "epoch": 2.67, "learning_rate": 0.0001768030077344719, "loss": 0.0002, "step": 1083 }, { "epoch": 2.68, "learning_rate": 0.00017676150875463686, "loss": 0.0001, "step": 1084 }, { "epoch": 2.68, "learning_rate": 0.00017671997756709863, "loss": 0.0803, "step": 1085 }, { "epoch": 2.68, "learning_rate": 0.0001766784141892829, "loss": 0.0232, "step": 1086 }, { "epoch": 2.68, "learning_rate": 0.00017663681863862895, "loss": 0.0001, "step": 1087 }, { "epoch": 2.69, "learning_rate": 0.0001765951909325895, "loss": 0.0522, "step": 1088 }, { "epoch": 2.69, "learning_rate": 0.00017655353108863068, "loss": 0.0008, "step": 1089 }, { "epoch": 2.69, "learning_rate": 0.00017651183912423228, "loss": 0.0002, "step": 1090 }, { "epoch": 2.69, "learning_rate": 0.00017647011505688743, "loss": 0.0001, "step": 1091 }, { "epoch": 2.7, "learning_rate": 0.0001764283589041028, "loss": 0.0002, "step": 1092 }, { "epoch": 2.7, "learning_rate": 0.00017638657068339843, "loss": 0.0428, "step": 1093 }, { "epoch": 2.7, "learning_rate": 0.00017634475041230797, "loss": 0.0351, "step": 1094 }, { "epoch": 2.7, "learning_rate": 0.00017630289810837834, "loss": 0.0647, "step": 1095 }, { "epoch": 2.71, "learning_rate": 0.00017626101378917004, "loss": 0.0181, "step": 1096 }, { "epoch": 2.71, "learning_rate": 0.00017621909747225697, "loss": 0.0277, "step": 1097 }, { "epoch": 2.71, "learning_rate": 0.0001761771491752264, "loss": 0.0002, "step": 1098 }, { "epoch": 2.71, "learning_rate": 0.00017613516891567906, "loss": 0.0001, "step": 1099 }, { "epoch": 2.72, "learning_rate": 0.0001760931567112291, "loss": 0.0006, "step": 1100 }, { "epoch": 2.72, "learning_rate": 0.00017605111257950408, "loss": 0.0002, "step": 1101 }, { "epoch": 2.72, "learning_rate": 0.0001760090365381449, "loss": 0.065, "step": 1102 }, { "epoch": 2.72, "learning_rate": 0.00017596692860480593, "loss": 0.0582, "step": 1103 }, { "epoch": 2.73, "learning_rate": 0.0001759247887971548, "loss": 0.0932, "step": 1104 }, { "epoch": 2.73, "learning_rate": 0.00017588261713287267, "loss": 0.0686, "step": 1105 }, { "epoch": 2.73, "learning_rate": 0.00017584041362965396, "loss": 0.0451, "step": 1106 }, { "epoch": 2.73, "learning_rate": 0.00017579817830520644, "loss": 0.0481, "step": 1107 }, { "epoch": 2.74, "learning_rate": 0.0001757559111772513, "loss": 0.0387, "step": 1108 }, { "epoch": 2.74, "learning_rate": 0.00017571361226352306, "loss": 0.0003, "step": 1109 }, { "epoch": 2.74, "learning_rate": 0.00017567128158176953, "loss": 0.0873, "step": 1110 }, { "epoch": 2.74, "learning_rate": 0.0001756289191497519, "loss": 0.0605, "step": 1111 }, { "epoch": 2.75, "learning_rate": 0.0001755865249852446, "loss": 0.0674, "step": 1112 }, { "epoch": 2.75, "learning_rate": 0.00017554409910603552, "loss": 0.0238, "step": 1113 }, { "epoch": 2.75, "learning_rate": 0.00017550164152992573, "loss": 0.0009, "step": 1114 }, { "epoch": 2.75, "learning_rate": 0.00017545915227472965, "loss": 0.074, "step": 1115 }, { "epoch": 2.76, "learning_rate": 0.00017541663135827492, "loss": 0.0561, "step": 1116 }, { "epoch": 2.76, "learning_rate": 0.00017537407879840265, "loss": 0.0347, "step": 1117 }, { "epoch": 2.76, "learning_rate": 0.000175331494612967, "loss": 0.0088, "step": 1118 }, { "epoch": 2.76, "learning_rate": 0.0001752888788198355, "loss": 0.0704, "step": 1119 }, { "epoch": 2.77, "learning_rate": 0.00017524623143688902, "loss": 0.0011, "step": 1120 }, { "epoch": 2.77, "learning_rate": 0.00017520355248202158, "loss": 0.1127, "step": 1121 }, { "epoch": 2.77, "learning_rate": 0.00017516084197314046, "loss": 0.0006, "step": 1122 }, { "epoch": 2.77, "learning_rate": 0.00017511809992816618, "loss": 0.0438, "step": 1123 }, { "epoch": 2.78, "learning_rate": 0.00017507532636503256, "loss": 0.0614, "step": 1124 }, { "epoch": 2.78, "learning_rate": 0.00017503252130168657, "loss": 0.0639, "step": 1125 }, { "epoch": 2.78, "learning_rate": 0.00017498968475608838, "loss": 0.0354, "step": 1126 }, { "epoch": 2.78, "learning_rate": 0.00017494681674621148, "loss": 0.027, "step": 1127 }, { "epoch": 2.79, "learning_rate": 0.00017490391729004244, "loss": 0.0001, "step": 1128 }, { "epoch": 2.79, "learning_rate": 0.00017486098640558107, "loss": 0.0002, "step": 1129 }, { "epoch": 2.79, "learning_rate": 0.00017481802411084042, "loss": 0.0538, "step": 1130 }, { "epoch": 2.79, "learning_rate": 0.0001747750304238466, "loss": 0.0674, "step": 1131 }, { "epoch": 2.8, "learning_rate": 0.00017473200536263905, "loss": 0.0974, "step": 1132 }, { "epoch": 2.8, "learning_rate": 0.0001746889489452702, "loss": 0.0006, "step": 1133 }, { "epoch": 2.8, "learning_rate": 0.0001746458611898058, "loss": 0.0003, "step": 1134 }, { "epoch": 2.8, "learning_rate": 0.0001746027421143246, "loss": 0.0523, "step": 1135 }, { "epoch": 2.8, "learning_rate": 0.00017455959173691863, "loss": 0.0002, "step": 1136 }, { "epoch": 2.81, "learning_rate": 0.00017451641007569296, "loss": 0.0055, "step": 1137 }, { "epoch": 2.81, "learning_rate": 0.00017447319714876579, "loss": 0.0359, "step": 1138 }, { "epoch": 2.81, "learning_rate": 0.00017442995297426846, "loss": 0.0001, "step": 1139 }, { "epoch": 2.81, "learning_rate": 0.00017438667757034546, "loss": 0.0001, "step": 1140 }, { "epoch": 2.82, "learning_rate": 0.00017434337095515432, "loss": 0.0604, "step": 1141 }, { "epoch": 2.82, "learning_rate": 0.00017430003314686569, "loss": 0.0363, "step": 1142 }, { "epoch": 2.82, "learning_rate": 0.00017425666416366332, "loss": 0.0013, "step": 1143 }, { "epoch": 2.82, "learning_rate": 0.00017421326402374405, "loss": 0.0001, "step": 1144 }, { "epoch": 2.83, "learning_rate": 0.00017416983274531775, "loss": 0.1052, "step": 1145 }, { "epoch": 2.83, "learning_rate": 0.00017412637034660734, "loss": 0.0003, "step": 1146 }, { "epoch": 2.83, "learning_rate": 0.0001740828768458489, "loss": 0.0323, "step": 1147 }, { "epoch": 2.83, "learning_rate": 0.0001740393522612915, "loss": 0.0317, "step": 1148 }, { "epoch": 2.84, "learning_rate": 0.00017399579661119715, "loss": 0.0001, "step": 1149 }, { "epoch": 2.84, "learning_rate": 0.0001739522099138411, "loss": 0.0003, "step": 1150 }, { "epoch": 2.84, "learning_rate": 0.00017390859218751142, "loss": 0.0001, "step": 1151 }, { "epoch": 2.84, "learning_rate": 0.00017386494345050942, "loss": 0.0008, "step": 1152 }, { "epoch": 2.85, "learning_rate": 0.0001738212637211492, "loss": 0.0334, "step": 1153 }, { "epoch": 2.85, "learning_rate": 0.000173777553017758, "loss": 0.0001, "step": 1154 }, { "epoch": 2.85, "learning_rate": 0.00017373381135867604, "loss": 0.0549, "step": 1155 }, { "epoch": 2.85, "learning_rate": 0.00017369003876225642, "loss": 0.0469, "step": 1156 }, { "epoch": 2.86, "learning_rate": 0.00017364623524686543, "loss": 0.0053, "step": 1157 }, { "epoch": 2.86, "learning_rate": 0.00017360240083088213, "loss": 0.0833, "step": 1158 }, { "epoch": 2.86, "learning_rate": 0.00017355853553269865, "loss": 0.0145, "step": 1159 }, { "epoch": 2.86, "learning_rate": 0.00017351463937072004, "loss": 0.0002, "step": 1160 }, { "epoch": 2.87, "learning_rate": 0.00017347071236336437, "loss": 0.042, "step": 1161 }, { "epoch": 2.87, "learning_rate": 0.00017342675452906248, "loss": 0.0001, "step": 1162 }, { "epoch": 2.87, "learning_rate": 0.00017338276588625839, "loss": 0.002, "step": 1163 }, { "epoch": 2.87, "learning_rate": 0.00017333874645340884, "loss": 0.0003, "step": 1164 }, { "epoch": 2.88, "learning_rate": 0.0001732946962489836, "loss": 0.0416, "step": 1165 }, { "epoch": 2.88, "learning_rate": 0.0001732506152914653, "loss": 0.0002, "step": 1166 }, { "epoch": 2.88, "learning_rate": 0.0001732065035993495, "loss": 0.0005, "step": 1167 }, { "epoch": 2.88, "learning_rate": 0.00017316236119114463, "loss": 0.0531, "step": 1168 }, { "epoch": 2.89, "learning_rate": 0.00017311818808537206, "loss": 0.0492, "step": 1169 }, { "epoch": 2.89, "learning_rate": 0.00017307398430056593, "loss": 0.0001, "step": 1170 }, { "epoch": 2.89, "learning_rate": 0.00017302974985527344, "loss": 0.0001, "step": 1171 }, { "epoch": 2.89, "learning_rate": 0.00017298548476805446, "loss": 0.0628, "step": 1172 }, { "epoch": 2.9, "learning_rate": 0.00017294118905748182, "loss": 0.0001, "step": 1173 }, { "epoch": 2.9, "learning_rate": 0.00017289686274214118, "loss": 0.0185, "step": 1174 }, { "epoch": 2.9, "learning_rate": 0.000172852505840631, "loss": 0.0002, "step": 1175 }, { "epoch": 2.9, "learning_rate": 0.00017280811837156268, "loss": 0.0005, "step": 1176 }, { "epoch": 2.91, "learning_rate": 0.00017276370035356034, "loss": 0.0421, "step": 1177 }, { "epoch": 2.91, "learning_rate": 0.00017271925180526094, "loss": 0.0001, "step": 1178 }, { "epoch": 2.91, "learning_rate": 0.00017267477274531432, "loss": 0.0001, "step": 1179 }, { "epoch": 2.91, "learning_rate": 0.00017263026319238301, "loss": 0.0001, "step": 1180 }, { "epoch": 2.92, "learning_rate": 0.0001725857231651424, "loss": 0.0001, "step": 1181 }, { "epoch": 2.92, "learning_rate": 0.0001725411526822807, "loss": 0.0469, "step": 1182 }, { "epoch": 2.92, "learning_rate": 0.00017249655176249882, "loss": 0.0001, "step": 1183 }, { "epoch": 2.92, "learning_rate": 0.0001724519204245105, "loss": 0.0001, "step": 1184 }, { "epoch": 2.93, "learning_rate": 0.00017240725868704218, "loss": 0.0003, "step": 1185 }, { "epoch": 2.93, "learning_rate": 0.0001723625665688331, "loss": 0.0418, "step": 1186 }, { "epoch": 2.93, "learning_rate": 0.00017231784408863532, "loss": 0.0004, "step": 1187 }, { "epoch": 2.93, "learning_rate": 0.00017227309126521348, "loss": 0.0002, "step": 1188 }, { "epoch": 2.94, "learning_rate": 0.00017222830811734502, "loss": 0.0245, "step": 1189 }, { "epoch": 2.94, "learning_rate": 0.00017218349466382023, "loss": 0.0334, "step": 1190 }, { "epoch": 2.94, "learning_rate": 0.00017213865092344187, "loss": 0.0004, "step": 1191 }, { "epoch": 2.94, "learning_rate": 0.00017209377691502565, "loss": 0.0001, "step": 1192 }, { "epoch": 2.95, "learning_rate": 0.00017204887265739977, "loss": 0.0459, "step": 1193 }, { "epoch": 2.95, "learning_rate": 0.0001720039381694053, "loss": 0.0371, "step": 1194 }, { "epoch": 2.95, "learning_rate": 0.0001719589734698959, "loss": 0.0001, "step": 1195 }, { "epoch": 2.95, "learning_rate": 0.00017191397857773788, "loss": 0.0014, "step": 1196 }, { "epoch": 2.96, "learning_rate": 0.00017186895351181032, "loss": 0.0002, "step": 1197 }, { "epoch": 2.96, "learning_rate": 0.00017182389829100485, "loss": 0.0001, "step": 1198 }, { "epoch": 2.96, "learning_rate": 0.00017177881293422583, "loss": 0.0413, "step": 1199 }, { "epoch": 2.96, "learning_rate": 0.00017173369746039025, "loss": 0.0001, "step": 1200 }, { "epoch": 2.96, "eval_loss": 0.014002716168761253, "eval_runtime": 126.0996, "eval_samples_per_second": 1.015, "eval_steps_per_second": 0.341, "step": 1200 }, { "epoch": 2.97, "learning_rate": 0.00017168855188842773, "loss": 0.0004, "step": 1201 }, { "epoch": 2.97, "learning_rate": 0.00017164337623728045, "loss": 0.0012, "step": 1202 }, { "epoch": 2.97, "learning_rate": 0.0001715981705259033, "loss": 0.0001, "step": 1203 }, { "epoch": 2.97, "learning_rate": 0.00017155293477326384, "loss": 0.0009, "step": 1204 }, { "epoch": 2.98, "learning_rate": 0.00017150766899834204, "loss": 0.0307, "step": 1205 }, { "epoch": 2.98, "learning_rate": 0.00017146237322013068, "loss": 0.024, "step": 1206 }, { "epoch": 2.98, "learning_rate": 0.00017141704745763492, "loss": 0.0002, "step": 1207 }, { "epoch": 2.98, "learning_rate": 0.00017137169172987268, "loss": 0.0417, "step": 1208 }, { "epoch": 2.99, "learning_rate": 0.00017132630605587435, "loss": 0.0655, "step": 1209 }, { "epoch": 2.99, "learning_rate": 0.00017128089045468294, "loss": 0.0002, "step": 1210 }, { "epoch": 2.99, "learning_rate": 0.00017123544494535397, "loss": 0.0001, "step": 1211 }, { "epoch": 2.99, "learning_rate": 0.00017118996954695553, "loss": 0.091, "step": 1212 }, { "epoch": 3.0, "learning_rate": 0.00017114446427856828, "loss": 0.0471, "step": 1213 }, { "epoch": 3.0, "learning_rate": 0.00017109892915928535, "loss": 0.0436, "step": 1214 }, { "epoch": 3.0, "learning_rate": 0.00017105336420821247, "loss": 0.0, "step": 1215 } ], "logging_steps": 1, "max_steps": 4860, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 500, "total_flos": 1.8178730075703214e+18, "train_batch_size": 3, "trial_name": null, "trial_params": null }