{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.629735935706085, "eval_steps": 500, "global_step": 1378, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.00019999989520267283, "loss": 1.377, "step": 1 }, { "epoch": 0.01, "learning_rate": 0.00019999958081091102, "loss": 1.2613, "step": 2 }, { "epoch": 0.01, "learning_rate": 0.00019999905682537348, "loss": 1.1324, "step": 3 }, { "epoch": 0.02, "learning_rate": 0.00019999832324715846, "loss": 1.0451, "step": 4 }, { "epoch": 0.02, "learning_rate": 0.00019999738007780348, "loss": 1.0011, "step": 5 }, { "epoch": 0.03, "learning_rate": 0.0001999962273192854, "loss": 0.9464, "step": 6 }, { "epoch": 0.03, "learning_rate": 0.00019999486497402038, "loss": 0.9247, "step": 7 }, { "epoch": 0.04, "learning_rate": 0.00019999329304486377, "loss": 0.8769, "step": 8 }, { "epoch": 0.04, "learning_rate": 0.00019999151153511023, "loss": 0.8683, "step": 9 }, { "epoch": 0.05, "learning_rate": 0.00019998952044849376, "loss": 0.8325, "step": 10 }, { "epoch": 0.05, "learning_rate": 0.00019998731978918756, "loss": 0.7987, "step": 11 }, { "epoch": 0.06, "learning_rate": 0.00019998490956180405, "loss": 0.7877, "step": 12 }, { "epoch": 0.06, "learning_rate": 0.000199982289771395, "loss": 0.7496, "step": 13 }, { "epoch": 0.06, "learning_rate": 0.00019997946042345127, "loss": 0.7487, "step": 14 }, { "epoch": 0.07, "learning_rate": 0.00019997642152390314, "loss": 0.7427, "step": 15 }, { "epoch": 0.07, "learning_rate": 0.0001999731730791199, "loss": 0.718, "step": 16 }, { "epoch": 0.08, "learning_rate": 0.00019996971509591012, "loss": 0.7124, "step": 17 }, { "epoch": 0.08, "learning_rate": 0.00019996604758152158, "loss": 0.7083, "step": 18 }, { "epoch": 0.09, "learning_rate": 0.00019996217054364115, "loss": 0.6978, "step": 19 }, { "epoch": 0.09, "learning_rate": 0.00019995808399039496, "loss": 0.6929, "step": 20 }, { "epoch": 0.1, "learning_rate": 0.00019995378793034814, "loss": 0.6645, "step": 21 }, { "epoch": 0.1, "learning_rate": 0.000199949282372505, "loss": 0.6619, "step": 22 }, { "epoch": 0.11, "learning_rate": 0.00019994456732630903, "loss": 0.666, "step": 23 }, { "epoch": 0.11, "learning_rate": 0.00019993964280164264, "loss": 0.6554, "step": 24 }, { "epoch": 0.11, "learning_rate": 0.00019993450880882735, "loss": 0.6517, "step": 25 }, { "epoch": 0.12, "learning_rate": 0.00019992916535862385, "loss": 0.6453, "step": 26 }, { "epoch": 0.12, "learning_rate": 0.00019992361246223158, "loss": 0.644, "step": 27 }, { "epoch": 0.13, "learning_rate": 0.00019991785013128923, "loss": 0.6352, "step": 28 }, { "epoch": 0.13, "learning_rate": 0.0001999118783778743, "loss": 0.6269, "step": 29 }, { "epoch": 0.14, "learning_rate": 0.00019990569721450326, "loss": 0.6274, "step": 30 }, { "epoch": 0.14, "learning_rate": 0.00019989930665413147, "loss": 0.6236, "step": 31 }, { "epoch": 0.15, "learning_rate": 0.00019989270671015325, "loss": 0.6175, "step": 32 }, { "epoch": 0.15, "learning_rate": 0.00019988589739640172, "loss": 0.6203, "step": 33 }, { "epoch": 0.16, "learning_rate": 0.00019987887872714878, "loss": 0.6084, "step": 34 }, { "epoch": 0.16, "learning_rate": 0.00019987165071710527, "loss": 0.6063, "step": 35 }, { "epoch": 0.17, "learning_rate": 0.0001998642133814207, "loss": 0.6059, "step": 36 }, { "epoch": 0.17, "learning_rate": 0.00019985656673568328, "loss": 0.6011, "step": 37 }, { "epoch": 0.17, "learning_rate": 0.00019984871079591998, "loss": 0.6, "step": 38 }, { "epoch": 0.18, "learning_rate": 0.00019984064557859648, "loss": 0.6012, "step": 39 }, { "epoch": 0.18, "learning_rate": 0.00019983237110061697, "loss": 0.5957, "step": 40 }, { "epoch": 0.19, "learning_rate": 0.0001998238873793244, "loss": 0.6014, "step": 41 }, { "epoch": 0.19, "learning_rate": 0.0001998151944325001, "loss": 0.5976, "step": 42 }, { "epoch": 0.2, "learning_rate": 0.0001998062922783641, "loss": 0.5934, "step": 43 }, { "epoch": 0.2, "learning_rate": 0.0001997971809355748, "loss": 0.5947, "step": 44 }, { "epoch": 0.21, "learning_rate": 0.0001997878604232291, "loss": 0.5852, "step": 45 }, { "epoch": 0.21, "learning_rate": 0.00019977833076086229, "loss": 0.5919, "step": 46 }, { "epoch": 0.22, "learning_rate": 0.00019976859196844804, "loss": 0.5752, "step": 47 }, { "epoch": 0.22, "learning_rate": 0.00019975864406639833, "loss": 0.5744, "step": 48 }, { "epoch": 0.23, "learning_rate": 0.00019974848707556345, "loss": 0.5782, "step": 49 }, { "epoch": 0.23, "learning_rate": 0.00019973812101723188, "loss": 0.5759, "step": 50 }, { "epoch": 0.23, "learning_rate": 0.00019972754591313034, "loss": 0.5714, "step": 51 }, { "epoch": 0.24, "learning_rate": 0.00019971676178542368, "loss": 0.5622, "step": 52 }, { "epoch": 0.24, "learning_rate": 0.00019970576865671488, "loss": 0.5687, "step": 53 }, { "epoch": 0.25, "learning_rate": 0.0001996945665500449, "loss": 0.5712, "step": 54 }, { "epoch": 0.25, "learning_rate": 0.0001996831554888928, "loss": 0.5655, "step": 55 }, { "epoch": 0.26, "learning_rate": 0.00019967153549717553, "loss": 0.5612, "step": 56 }, { "epoch": 0.26, "learning_rate": 0.000199659706599248, "loss": 0.5619, "step": 57 }, { "epoch": 0.27, "learning_rate": 0.0001996476688199029, "loss": 0.5624, "step": 58 }, { "epoch": 0.27, "learning_rate": 0.0001996354221843708, "loss": 0.5701, "step": 59 }, { "epoch": 0.28, "learning_rate": 0.00019962296671832003, "loss": 0.5636, "step": 60 }, { "epoch": 0.28, "learning_rate": 0.0001996103024478565, "loss": 0.5652, "step": 61 }, { "epoch": 0.28, "learning_rate": 0.00019959742939952392, "loss": 0.5652, "step": 62 }, { "epoch": 0.29, "learning_rate": 0.00019958434760030346, "loss": 0.5581, "step": 63 }, { "epoch": 0.29, "learning_rate": 0.0001995710570776139, "loss": 0.5585, "step": 64 }, { "epoch": 0.3, "learning_rate": 0.00019955755785931145, "loss": 0.5539, "step": 65 }, { "epoch": 0.3, "learning_rate": 0.00019954384997368976, "loss": 0.5562, "step": 66 }, { "epoch": 0.31, "learning_rate": 0.00019952993344947984, "loss": 0.5521, "step": 67 }, { "epoch": 0.31, "learning_rate": 0.00019951580831584993, "loss": 0.5522, "step": 68 }, { "epoch": 0.32, "learning_rate": 0.0001995014746024056, "loss": 0.5546, "step": 69 }, { "epoch": 0.32, "learning_rate": 0.00019948693233918952, "loss": 0.5513, "step": 70 }, { "epoch": 0.33, "learning_rate": 0.00019947218155668152, "loss": 0.5574, "step": 71 }, { "epoch": 0.33, "learning_rate": 0.00019945722228579846, "loss": 0.5457, "step": 72 }, { "epoch": 0.34, "learning_rate": 0.00019944205455789415, "loss": 0.5487, "step": 73 }, { "epoch": 0.34, "learning_rate": 0.00019942667840475931, "loss": 0.5464, "step": 74 }, { "epoch": 0.34, "learning_rate": 0.0001994110938586216, "loss": 0.5435, "step": 75 }, { "epoch": 0.35, "learning_rate": 0.00019939530095214536, "loss": 0.5516, "step": 76 }, { "epoch": 0.35, "learning_rate": 0.00019937929971843165, "loss": 0.5482, "step": 77 }, { "epoch": 0.36, "learning_rate": 0.00019936309019101826, "loss": 0.542, "step": 78 }, { "epoch": 0.36, "learning_rate": 0.00019934667240387944, "loss": 0.5375, "step": 79 }, { "epoch": 0.37, "learning_rate": 0.00019933004639142605, "loss": 0.5394, "step": 80 }, { "epoch": 0.37, "learning_rate": 0.0001993132121885053, "loss": 0.5413, "step": 81 }, { "epoch": 0.38, "learning_rate": 0.00019929616983040073, "loss": 0.5382, "step": 82 }, { "epoch": 0.38, "learning_rate": 0.00019927891935283225, "loss": 0.5352, "step": 83 }, { "epoch": 0.39, "learning_rate": 0.00019926146079195594, "loss": 0.5415, "step": 84 }, { "epoch": 0.39, "learning_rate": 0.00019924379418436404, "loss": 0.5315, "step": 85 }, { "epoch": 0.39, "learning_rate": 0.00019922591956708478, "loss": 0.5352, "step": 86 }, { "epoch": 0.4, "learning_rate": 0.0001992078369775824, "loss": 0.5491, "step": 87 }, { "epoch": 0.4, "learning_rate": 0.00019918954645375706, "loss": 0.5348, "step": 88 }, { "epoch": 0.41, "learning_rate": 0.00019917104803394468, "loss": 0.534, "step": 89 }, { "epoch": 0.41, "learning_rate": 0.000199152341756917, "loss": 0.5417, "step": 90 }, { "epoch": 0.42, "learning_rate": 0.00019913342766188138, "loss": 0.5326, "step": 91 }, { "epoch": 0.42, "learning_rate": 0.00019911430578848074, "loss": 0.5308, "step": 92 }, { "epoch": 0.43, "learning_rate": 0.00019909497617679348, "loss": 0.5311, "step": 93 }, { "epoch": 0.43, "learning_rate": 0.00019907543886733348, "loss": 0.5348, "step": 94 }, { "epoch": 0.44, "learning_rate": 0.00019905569390104986, "loss": 0.5337, "step": 95 }, { "epoch": 0.44, "learning_rate": 0.00019903574131932702, "loss": 0.5378, "step": 96 }, { "epoch": 0.45, "learning_rate": 0.00019901558116398448, "loss": 0.5289, "step": 97 }, { "epoch": 0.45, "learning_rate": 0.0001989952134772769, "loss": 0.529, "step": 98 }, { "epoch": 0.45, "learning_rate": 0.00019897463830189388, "loss": 0.528, "step": 99 }, { "epoch": 0.46, "learning_rate": 0.00019895385568095982, "loss": 0.5201, "step": 100 }, { "epoch": 0.46, "learning_rate": 0.000198932865658034, "loss": 0.537, "step": 101 }, { "epoch": 0.47, "learning_rate": 0.00019891166827711037, "loss": 0.5262, "step": 102 }, { "epoch": 0.47, "learning_rate": 0.00019889026358261758, "loss": 0.5379, "step": 103 }, { "epoch": 0.48, "learning_rate": 0.00019886865161941866, "loss": 0.531, "step": 104 }, { "epoch": 0.48, "learning_rate": 0.00019884683243281116, "loss": 0.525, "step": 105 }, { "epoch": 0.49, "learning_rate": 0.0001988248060685269, "loss": 0.5221, "step": 106 }, { "epoch": 0.49, "learning_rate": 0.00019880257257273197, "loss": 0.5204, "step": 107 }, { "epoch": 0.5, "learning_rate": 0.00019878013199202662, "loss": 0.5289, "step": 108 }, { "epoch": 0.5, "learning_rate": 0.0001987574843734451, "loss": 0.518, "step": 109 }, { "epoch": 0.51, "learning_rate": 0.00019873462976445553, "loss": 0.5253, "step": 110 }, { "epoch": 0.51, "learning_rate": 0.00019871156821296007, "loss": 0.5255, "step": 111 }, { "epoch": 0.51, "learning_rate": 0.00019868829976729443, "loss": 0.5276, "step": 112 }, { "epoch": 0.52, "learning_rate": 0.00019866482447622802, "loss": 0.5192, "step": 113 }, { "epoch": 0.52, "learning_rate": 0.00019864114238896384, "loss": 0.5165, "step": 114 }, { "epoch": 0.53, "learning_rate": 0.00019861725355513823, "loss": 0.5185, "step": 115 }, { "epoch": 0.53, "learning_rate": 0.00019859315802482096, "loss": 0.5205, "step": 116 }, { "epoch": 0.54, "learning_rate": 0.0001985688558485149, "loss": 0.5187, "step": 117 }, { "epoch": 0.54, "learning_rate": 0.0001985443470771562, "loss": 0.5214, "step": 118 }, { "epoch": 0.55, "learning_rate": 0.00019851963176211387, "loss": 0.5195, "step": 119 }, { "epoch": 0.55, "learning_rate": 0.00019849470995518992, "loss": 0.5286, "step": 120 }, { "epoch": 0.56, "learning_rate": 0.0001984695817086191, "loss": 0.5184, "step": 121 }, { "epoch": 0.56, "learning_rate": 0.0001984442470750689, "loss": 0.5131, "step": 122 }, { "epoch": 0.56, "learning_rate": 0.00019841870610763938, "loss": 0.5225, "step": 123 }, { "epoch": 0.57, "learning_rate": 0.00019839295885986296, "loss": 0.51, "step": 124 }, { "epoch": 0.57, "learning_rate": 0.00019836700538570457, "loss": 0.5201, "step": 125 }, { "epoch": 0.58, "learning_rate": 0.00019834084573956128, "loss": 0.517, "step": 126 }, { "epoch": 0.58, "learning_rate": 0.0001983144799762623, "loss": 0.5176, "step": 127 }, { "epoch": 0.59, "learning_rate": 0.00019828790815106887, "loss": 0.5255, "step": 128 }, { "epoch": 0.59, "learning_rate": 0.0001982611303196741, "loss": 0.5221, "step": 129 }, { "epoch": 0.6, "learning_rate": 0.0001982341465382029, "loss": 0.5107, "step": 130 }, { "epoch": 0.6, "learning_rate": 0.00019820695686321185, "loss": 0.5196, "step": 131 }, { "epoch": 0.61, "learning_rate": 0.00019817956135168908, "loss": 0.513, "step": 132 }, { "epoch": 0.61, "learning_rate": 0.00019815196006105402, "loss": 0.5175, "step": 133 }, { "epoch": 0.62, "learning_rate": 0.0001981241530491576, "loss": 0.5188, "step": 134 }, { "epoch": 0.62, "learning_rate": 0.00019809614037428176, "loss": 0.5123, "step": 135 }, { "epoch": 0.62, "learning_rate": 0.00019806792209513968, "loss": 0.5089, "step": 136 }, { "epoch": 0.63, "learning_rate": 0.0001980394982708752, "loss": 0.5117, "step": 137 }, { "epoch": 0.63, "eval_loss": 0.5121904611587524, "eval_runtime": 28.8865, "eval_samples_per_second": 6.924, "eval_steps_per_second": 0.865, "step": 137 }, { "epoch": 1.0, "learning_rate": 0.00019801086896106327, "loss": 0.5101, "step": 138 }, { "epoch": 1.01, "learning_rate": 0.00019798203422570932, "loss": 0.5079, "step": 139 }, { "epoch": 1.01, "learning_rate": 0.00019795299412524945, "loss": 0.5082, "step": 140 }, { "epoch": 1.01, "learning_rate": 0.00019792374872055018, "loss": 0.5159, "step": 141 }, { "epoch": 1.02, "learning_rate": 0.00019789429807290823, "loss": 0.5135, "step": 142 }, { "epoch": 1.02, "learning_rate": 0.00019786464224405065, "loss": 0.5111, "step": 143 }, { "epoch": 1.03, "learning_rate": 0.00019783478129613448, "loss": 0.51, "step": 144 }, { "epoch": 1.03, "learning_rate": 0.00019780471529174664, "loss": 0.5014, "step": 145 }, { "epoch": 1.04, "learning_rate": 0.00019777444429390387, "loss": 0.5169, "step": 146 }, { "epoch": 1.04, "learning_rate": 0.00019774396836605255, "loss": 0.5144, "step": 147 }, { "epoch": 1.05, "learning_rate": 0.00019771328757206864, "loss": 0.5075, "step": 148 }, { "epoch": 1.05, "learning_rate": 0.00019768240197625742, "loss": 0.5074, "step": 149 }, { "epoch": 1.06, "learning_rate": 0.00019765131164335345, "loss": 0.5056, "step": 150 }, { "epoch": 1.06, "learning_rate": 0.0001976200166385204, "loss": 0.5025, "step": 151 }, { "epoch": 1.06, "learning_rate": 0.0001975885170273509, "loss": 0.5089, "step": 152 }, { "epoch": 1.07, "learning_rate": 0.00019755681287586652, "loss": 0.5077, "step": 153 }, { "epoch": 1.07, "learning_rate": 0.00019752490425051743, "loss": 0.5185, "step": 154 }, { "epoch": 1.08, "learning_rate": 0.00019749279121818235, "loss": 0.5096, "step": 155 }, { "epoch": 1.08, "learning_rate": 0.0001974604738461686, "loss": 0.5089, "step": 156 }, { "epoch": 1.09, "learning_rate": 0.00019742795220221155, "loss": 0.5075, "step": 157 }, { "epoch": 1.09, "learning_rate": 0.00019739522635447488, "loss": 0.509, "step": 158 }, { "epoch": 1.1, "learning_rate": 0.00019736229637155018, "loss": 0.5009, "step": 159 }, { "epoch": 1.1, "learning_rate": 0.000197329162322457, "loss": 0.502, "step": 160 }, { "epoch": 1.11, "learning_rate": 0.0001972958242766425, "loss": 0.5106, "step": 161 }, { "epoch": 1.11, "learning_rate": 0.00019726228230398146, "loss": 0.4997, "step": 162 }, { "epoch": 1.12, "learning_rate": 0.00019722853647477598, "loss": 0.4935, "step": 163 }, { "epoch": 1.12, "learning_rate": 0.0001971945868597556, "loss": 0.5075, "step": 164 }, { "epoch": 1.12, "learning_rate": 0.00019716043353007693, "loss": 0.5019, "step": 165 }, { "epoch": 1.13, "learning_rate": 0.00019712607655732338, "loss": 0.5011, "step": 166 }, { "epoch": 1.13, "learning_rate": 0.00019709151601350546, "loss": 0.5019, "step": 167 }, { "epoch": 1.14, "learning_rate": 0.00019705675197106016, "loss": 0.5005, "step": 168 }, { "epoch": 1.14, "learning_rate": 0.0001970217845028511, "loss": 0.5074, "step": 169 }, { "epoch": 1.15, "learning_rate": 0.00019698661368216817, "loss": 0.4961, "step": 170 }, { "epoch": 1.15, "learning_rate": 0.00019695123958272758, "loss": 0.499, "step": 171 }, { "epoch": 1.16, "learning_rate": 0.00019691566227867153, "loss": 0.4997, "step": 172 }, { "epoch": 1.16, "learning_rate": 0.00019687988184456814, "loss": 0.502, "step": 173 }, { "epoch": 1.17, "learning_rate": 0.00019684389835541129, "loss": 0.5003, "step": 174 }, { "epoch": 1.17, "learning_rate": 0.00019680771188662044, "loss": 0.4884, "step": 175 }, { "epoch": 1.18, "learning_rate": 0.0001967713225140405, "loss": 0.4958, "step": 176 }, { "epoch": 1.18, "learning_rate": 0.0001967347303139417, "loss": 0.4957, "step": 177 }, { "epoch": 1.18, "learning_rate": 0.00019669793536301926, "loss": 0.5016, "step": 178 }, { "epoch": 1.19, "learning_rate": 0.00019666093773839345, "loss": 0.5025, "step": 179 }, { "epoch": 1.19, "learning_rate": 0.00019662373751760934, "loss": 0.5035, "step": 180 }, { "epoch": 1.2, "learning_rate": 0.00019658633477863662, "loss": 0.5028, "step": 181 }, { "epoch": 1.2, "learning_rate": 0.00019654872959986937, "loss": 0.5011, "step": 182 }, { "epoch": 1.21, "learning_rate": 0.00019651092206012603, "loss": 0.4915, "step": 183 }, { "epoch": 1.21, "learning_rate": 0.00019647291223864928, "loss": 0.494, "step": 184 }, { "epoch": 1.22, "learning_rate": 0.0001964347002151056, "loss": 0.5037, "step": 185 }, { "epoch": 1.22, "learning_rate": 0.00019639628606958533, "loss": 0.4965, "step": 186 }, { "epoch": 1.23, "learning_rate": 0.00019635766988260254, "loss": 0.4897, "step": 187 }, { "epoch": 1.23, "learning_rate": 0.0001963188517350946, "loss": 0.4952, "step": 188 }, { "epoch": 1.23, "learning_rate": 0.00019627983170842234, "loss": 0.4903, "step": 189 }, { "epoch": 1.24, "learning_rate": 0.00019624060988436966, "loss": 0.4982, "step": 190 }, { "epoch": 1.24, "learning_rate": 0.00019620118634514335, "loss": 0.4999, "step": 191 }, { "epoch": 1.25, "learning_rate": 0.00019616156117337305, "loss": 0.4917, "step": 192 }, { "epoch": 1.25, "learning_rate": 0.00019612173445211106, "loss": 0.4878, "step": 193 }, { "epoch": 1.26, "learning_rate": 0.00019608170626483199, "loss": 0.4946, "step": 194 }, { "epoch": 1.26, "learning_rate": 0.00019604147669543282, "loss": 0.4968, "step": 195 }, { "epoch": 1.27, "learning_rate": 0.0001960010458282326, "loss": 0.4891, "step": 196 }, { "epoch": 1.27, "learning_rate": 0.00019596041374797218, "loss": 0.4952, "step": 197 }, { "epoch": 1.28, "learning_rate": 0.00019591958053981432, "loss": 0.4928, "step": 198 }, { "epoch": 1.28, "learning_rate": 0.00019587854628934319, "loss": 0.4998, "step": 199 }, { "epoch": 1.29, "learning_rate": 0.0001958373110825644, "loss": 0.496, "step": 200 }, { "epoch": 1.29, "learning_rate": 0.00019579587500590472, "loss": 0.4926, "step": 201 }, { "epoch": 1.29, "learning_rate": 0.00019575423814621198, "loss": 0.4939, "step": 202 }, { "epoch": 1.3, "learning_rate": 0.0001957124005907548, "loss": 0.495, "step": 203 }, { "epoch": 1.3, "learning_rate": 0.00019567036242722249, "loss": 0.4962, "step": 204 }, { "epoch": 1.31, "learning_rate": 0.00019562812374372473, "loss": 0.4931, "step": 205 }, { "epoch": 1.31, "learning_rate": 0.00019558568462879158, "loss": 0.4843, "step": 206 }, { "epoch": 1.32, "learning_rate": 0.00019554304517137316, "loss": 0.4963, "step": 207 }, { "epoch": 1.32, "learning_rate": 0.00019550020546083949, "loss": 0.4979, "step": 208 }, { "epoch": 1.33, "learning_rate": 0.0001954571655869803, "loss": 0.4925, "step": 209 }, { "epoch": 1.33, "learning_rate": 0.00019541392564000488, "loss": 0.4953, "step": 210 }, { "epoch": 1.34, "learning_rate": 0.00019537048571054185, "loss": 0.4921, "step": 211 }, { "epoch": 1.34, "learning_rate": 0.000195326845889639, "loss": 0.4917, "step": 212 }, { "epoch": 1.35, "learning_rate": 0.000195283006268763, "loss": 0.4956, "step": 213 }, { "epoch": 1.35, "learning_rate": 0.00019523896693979936, "loss": 0.4901, "step": 214 }, { "epoch": 1.35, "learning_rate": 0.0001951947279950522, "loss": 0.4865, "step": 215 }, { "epoch": 1.36, "learning_rate": 0.000195150289527244, "loss": 0.4871, "step": 216 }, { "epoch": 1.36, "learning_rate": 0.00019510565162951537, "loss": 0.4948, "step": 217 }, { "epoch": 1.37, "learning_rate": 0.00019506081439542495, "loss": 0.4861, "step": 218 }, { "epoch": 1.37, "learning_rate": 0.00019501577791894922, "loss": 0.4852, "step": 219 }, { "epoch": 1.38, "learning_rate": 0.00019497054229448223, "loss": 0.4754, "step": 220 }, { "epoch": 1.38, "learning_rate": 0.00019492510761683537, "loss": 0.4892, "step": 221 }, { "epoch": 1.39, "learning_rate": 0.00019487947398123736, "loss": 0.4894, "step": 222 }, { "epoch": 1.39, "learning_rate": 0.00019483364148333384, "loss": 0.4906, "step": 223 }, { "epoch": 1.4, "learning_rate": 0.00019478761021918728, "loss": 0.4932, "step": 224 }, { "epoch": 1.4, "learning_rate": 0.00019474138028527675, "loss": 0.4863, "step": 225 }, { "epoch": 1.4, "learning_rate": 0.00019469495177849768, "loss": 0.494, "step": 226 }, { "epoch": 1.41, "learning_rate": 0.00019464832479616182, "loss": 0.4893, "step": 227 }, { "epoch": 1.41, "learning_rate": 0.00019460149943599674, "loss": 0.4942, "step": 228 }, { "epoch": 1.42, "learning_rate": 0.00019455447579614594, "loss": 0.4893, "step": 229 }, { "epoch": 1.42, "learning_rate": 0.0001945072539751685, "loss": 0.4855, "step": 230 }, { "epoch": 1.43, "learning_rate": 0.00019445983407203872, "loss": 0.4828, "step": 231 }, { "epoch": 1.43, "learning_rate": 0.00019441221618614628, "loss": 0.4928, "step": 232 }, { "epoch": 1.44, "learning_rate": 0.00019436440041729569, "loss": 0.483, "step": 233 }, { "epoch": 1.44, "learning_rate": 0.00019431638686570623, "loss": 0.4854, "step": 234 }, { "epoch": 1.45, "learning_rate": 0.00019426817563201177, "loss": 0.4906, "step": 235 }, { "epoch": 1.45, "learning_rate": 0.00019421976681726046, "loss": 0.4856, "step": 236 }, { "epoch": 1.46, "learning_rate": 0.00019417116052291458, "loss": 0.4872, "step": 237 }, { "epoch": 1.46, "learning_rate": 0.00019412235685085035, "loss": 0.4786, "step": 238 }, { "epoch": 1.46, "learning_rate": 0.0001940733559033576, "loss": 0.49, "step": 239 }, { "epoch": 1.47, "learning_rate": 0.00019402415778313977, "loss": 0.4899, "step": 240 }, { "epoch": 1.47, "learning_rate": 0.00019397476259331344, "loss": 0.4839, "step": 241 }, { "epoch": 1.48, "learning_rate": 0.0001939251704374083, "loss": 0.4817, "step": 242 }, { "epoch": 1.48, "learning_rate": 0.0001938753814193669, "loss": 0.4818, "step": 243 }, { "epoch": 1.49, "learning_rate": 0.00019382539564354433, "loss": 0.4865, "step": 244 }, { "epoch": 1.49, "learning_rate": 0.00019377521321470805, "loss": 0.4935, "step": 245 }, { "epoch": 1.5, "learning_rate": 0.00019372483423803783, "loss": 0.4879, "step": 246 }, { "epoch": 1.5, "learning_rate": 0.00019367425881912525, "loss": 0.4833, "step": 247 }, { "epoch": 1.51, "learning_rate": 0.00019362348706397373, "loss": 0.4782, "step": 248 }, { "epoch": 1.51, "learning_rate": 0.00019357251907899814, "loss": 0.4805, "step": 249 }, { "epoch": 1.51, "learning_rate": 0.00019352135497102463, "loss": 0.4881, "step": 250 }, { "epoch": 1.52, "learning_rate": 0.00019346999484729047, "loss": 0.4766, "step": 251 }, { "epoch": 1.52, "learning_rate": 0.00019341843881544372, "loss": 0.4799, "step": 252 }, { "epoch": 1.53, "learning_rate": 0.00019336668698354304, "loss": 0.4875, "step": 253 }, { "epoch": 1.53, "learning_rate": 0.00019331473946005754, "loss": 0.4834, "step": 254 }, { "epoch": 1.54, "learning_rate": 0.00019326259635386644, "loss": 0.4852, "step": 255 }, { "epoch": 1.54, "learning_rate": 0.00019321025777425892, "loss": 0.477, "step": 256 }, { "epoch": 1.55, "learning_rate": 0.0001931577238309338, "loss": 0.4849, "step": 257 }, { "epoch": 1.55, "learning_rate": 0.00019310499463399947, "loss": 0.4854, "step": 258 }, { "epoch": 1.56, "learning_rate": 0.00019305207029397348, "loss": 0.4869, "step": 259 }, { "epoch": 1.56, "learning_rate": 0.0001929989509217824, "loss": 0.4772, "step": 260 }, { "epoch": 1.57, "learning_rate": 0.00019294563662876165, "loss": 0.4867, "step": 261 }, { "epoch": 1.57, "learning_rate": 0.00019289212752665507, "loss": 0.4806, "step": 262 }, { "epoch": 1.57, "learning_rate": 0.00019283842372761493, "loss": 0.4839, "step": 263 }, { "epoch": 1.58, "learning_rate": 0.00019278452534420145, "loss": 0.4857, "step": 264 }, { "epoch": 1.58, "learning_rate": 0.00019273043248938288, "loss": 0.477, "step": 265 }, { "epoch": 1.59, "learning_rate": 0.00019267614527653488, "loss": 0.4868, "step": 266 }, { "epoch": 1.59, "learning_rate": 0.00019262166381944052, "loss": 0.485, "step": 267 }, { "epoch": 1.6, "learning_rate": 0.00019256698823229008, "loss": 0.485, "step": 268 }, { "epoch": 1.6, "learning_rate": 0.00019251211862968059, "loss": 0.4824, "step": 269 }, { "epoch": 1.61, "learning_rate": 0.0001924570551266159, "loss": 0.4858, "step": 270 }, { "epoch": 1.61, "learning_rate": 0.00019240179783850612, "loss": 0.4793, "step": 271 }, { "epoch": 1.62, "learning_rate": 0.00019234634688116757, "loss": 0.4762, "step": 272 }, { "epoch": 1.62, "learning_rate": 0.00019229070237082252, "loss": 0.4808, "step": 273 }, { "epoch": 1.63, "learning_rate": 0.00019223486442409882, "loss": 0.4704, "step": 274 }, { "epoch": 1.63, "learning_rate": 0.00019217883315802991, "loss": 0.4733, "step": 275 }, { "epoch": 1.63, "eval_loss": 0.4869104027748108, "eval_runtime": 28.3583, "eval_samples_per_second": 7.053, "eval_steps_per_second": 0.882, "step": 275 }, { "epoch": 2.0, "learning_rate": 0.00019212260869005428, "loss": 0.4777, "step": 276 }, { "epoch": 2.01, "learning_rate": 0.0001920661911380154, "loss": 0.4813, "step": 277 }, { "epoch": 2.01, "learning_rate": 0.00019200958062016144, "loss": 0.48, "step": 278 }, { "epoch": 2.01, "learning_rate": 0.0001919527772551451, "loss": 0.4831, "step": 279 }, { "epoch": 2.02, "learning_rate": 0.00019189578116202307, "loss": 0.4649, "step": 280 }, { "epoch": 2.02, "learning_rate": 0.0001918385924602562, "loss": 0.4728, "step": 281 }, { "epoch": 2.03, "learning_rate": 0.00019178121126970895, "loss": 0.4836, "step": 282 }, { "epoch": 2.03, "learning_rate": 0.0001917236377106492, "loss": 0.4833, "step": 283 }, { "epoch": 2.04, "learning_rate": 0.00019166587190374808, "loss": 0.4733, "step": 284 }, { "epoch": 2.04, "learning_rate": 0.00019160791397007957, "loss": 0.483, "step": 285 }, { "epoch": 2.05, "learning_rate": 0.0001915497640311205, "loss": 0.4731, "step": 286 }, { "epoch": 2.05, "learning_rate": 0.0001914914222087499, "loss": 0.4825, "step": 287 }, { "epoch": 2.06, "learning_rate": 0.00019143288862524926, "loss": 0.4756, "step": 288 }, { "epoch": 2.06, "learning_rate": 0.00019137416340330175, "loss": 0.4796, "step": 289 }, { "epoch": 2.07, "learning_rate": 0.00019131524666599233, "loss": 0.479, "step": 290 }, { "epoch": 2.07, "learning_rate": 0.00019125613853680727, "loss": 0.4773, "step": 291 }, { "epoch": 2.07, "learning_rate": 0.00019119683913963417, "loss": 0.4782, "step": 292 }, { "epoch": 2.08, "learning_rate": 0.00019113734859876126, "loss": 0.48, "step": 293 }, { "epoch": 2.08, "learning_rate": 0.00019107766703887764, "loss": 0.4789, "step": 294 }, { "epoch": 2.09, "learning_rate": 0.00019101779458507263, "loss": 0.4792, "step": 295 }, { "epoch": 2.09, "learning_rate": 0.00019095773136283567, "loss": 0.4819, "step": 296 }, { "epoch": 2.1, "learning_rate": 0.00019089747749805608, "loss": 0.4715, "step": 297 }, { "epoch": 2.1, "learning_rate": 0.00019083703311702279, "loss": 0.4677, "step": 298 }, { "epoch": 2.11, "learning_rate": 0.00019077639834642388, "loss": 0.4688, "step": 299 }, { "epoch": 2.11, "learning_rate": 0.00019071557331334669, "loss": 0.4726, "step": 300 }, { "epoch": 2.12, "learning_rate": 0.00019065455814527718, "loss": 0.4777, "step": 301 }, { "epoch": 2.12, "learning_rate": 0.00019059335297009992, "loss": 0.4722, "step": 302 }, { "epoch": 2.13, "learning_rate": 0.00019053195791609765, "loss": 0.4827, "step": 303 }, { "epoch": 2.13, "learning_rate": 0.00019047037311195118, "loss": 0.4705, "step": 304 }, { "epoch": 2.13, "learning_rate": 0.00019040859868673887, "loss": 0.4725, "step": 305 }, { "epoch": 2.14, "learning_rate": 0.00019034663476993668, "loss": 0.4715, "step": 306 }, { "epoch": 2.14, "learning_rate": 0.00019028448149141766, "loss": 0.4779, "step": 307 }, { "epoch": 2.15, "learning_rate": 0.00019022213898145176, "loss": 0.4637, "step": 308 }, { "epoch": 2.15, "learning_rate": 0.00019015960737070556, "loss": 0.4794, "step": 309 }, { "epoch": 2.16, "learning_rate": 0.0001900968867902419, "loss": 0.4749, "step": 310 }, { "epoch": 2.16, "learning_rate": 0.00019003397737151989, "loss": 0.4724, "step": 311 }, { "epoch": 2.17, "learning_rate": 0.0001899708792463942, "loss": 0.4665, "step": 312 }, { "epoch": 2.17, "learning_rate": 0.00018990759254711517, "loss": 0.4668, "step": 313 }, { "epoch": 2.18, "learning_rate": 0.0001898441174063283, "loss": 0.475, "step": 314 }, { "epoch": 2.18, "learning_rate": 0.00018978045395707418, "loss": 0.4661, "step": 315 }, { "epoch": 2.18, "learning_rate": 0.0001897166023327879, "loss": 0.4732, "step": 316 }, { "epoch": 2.19, "learning_rate": 0.00018965256266729913, "loss": 0.4749, "step": 317 }, { "epoch": 2.19, "learning_rate": 0.00018958833509483155, "loss": 0.4762, "step": 318 }, { "epoch": 2.2, "learning_rate": 0.00018952391975000268, "loss": 0.4732, "step": 319 }, { "epoch": 2.2, "learning_rate": 0.00018945931676782373, "loss": 0.4773, "step": 320 }, { "epoch": 2.21, "learning_rate": 0.00018939452628369898, "loss": 0.4759, "step": 321 }, { "epoch": 2.21, "learning_rate": 0.00018932954843342591, "loss": 0.4681, "step": 322 }, { "epoch": 2.22, "learning_rate": 0.00018926438335319462, "loss": 0.4693, "step": 323 }, { "epoch": 2.22, "learning_rate": 0.00018919903117958756, "loss": 0.4684, "step": 324 }, { "epoch": 2.23, "learning_rate": 0.0001891334920495795, "loss": 0.4665, "step": 325 }, { "epoch": 2.23, "learning_rate": 0.00018906776610053686, "loss": 0.4683, "step": 326 }, { "epoch": 2.24, "learning_rate": 0.00018900185347021776, "loss": 0.4708, "step": 327 }, { "epoch": 2.24, "learning_rate": 0.00018893575429677157, "loss": 0.4659, "step": 328 }, { "epoch": 2.24, "learning_rate": 0.00018886946871873856, "loss": 0.466, "step": 329 }, { "epoch": 2.25, "learning_rate": 0.0001888029968750498, "loss": 0.4724, "step": 330 }, { "epoch": 2.25, "learning_rate": 0.00018873633890502674, "loss": 0.4686, "step": 331 }, { "epoch": 2.26, "learning_rate": 0.00018866949494838084, "loss": 0.469, "step": 332 }, { "epoch": 2.26, "learning_rate": 0.0001886024651452136, "loss": 0.4659, "step": 333 }, { "epoch": 2.27, "learning_rate": 0.00018853524963601575, "loss": 0.4707, "step": 334 }, { "epoch": 2.27, "learning_rate": 0.0001884678485616675, "loss": 0.4687, "step": 335 }, { "epoch": 2.28, "learning_rate": 0.00018840026206343784, "loss": 0.471, "step": 336 }, { "epoch": 2.28, "learning_rate": 0.00018833249028298455, "loss": 0.4644, "step": 337 }, { "epoch": 2.29, "learning_rate": 0.00018826453336235358, "loss": 0.4708, "step": 338 }, { "epoch": 2.29, "learning_rate": 0.000188196391443979, "loss": 0.4664, "step": 339 }, { "epoch": 2.3, "learning_rate": 0.00018812806467068268, "loss": 0.4771, "step": 340 }, { "epoch": 2.3, "learning_rate": 0.0001880595531856738, "loss": 0.4685, "step": 341 }, { "epoch": 2.3, "learning_rate": 0.00018799085713254888, "loss": 0.4635, "step": 342 }, { "epoch": 2.31, "learning_rate": 0.0001879219766552911, "loss": 0.4751, "step": 343 }, { "epoch": 2.31, "learning_rate": 0.0001878529118982703, "loss": 0.469, "step": 344 }, { "epoch": 2.32, "learning_rate": 0.00018778366300624245, "loss": 0.4735, "step": 345 }, { "epoch": 2.32, "learning_rate": 0.0001877142301243496, "loss": 0.4617, "step": 346 }, { "epoch": 2.33, "learning_rate": 0.00018764461339811935, "loss": 0.4664, "step": 347 }, { "epoch": 2.33, "learning_rate": 0.0001875748129734646, "loss": 0.4717, "step": 348 }, { "epoch": 2.34, "learning_rate": 0.00018750482899668332, "loss": 0.4676, "step": 349 }, { "epoch": 2.34, "learning_rate": 0.00018743466161445823, "loss": 0.4665, "step": 350 }, { "epoch": 2.35, "learning_rate": 0.00018736431097385634, "loss": 0.4635, "step": 351 }, { "epoch": 2.35, "learning_rate": 0.0001872937772223289, "loss": 0.4676, "step": 352 }, { "epoch": 2.35, "learning_rate": 0.0001872230605077108, "loss": 0.4666, "step": 353 }, { "epoch": 2.36, "learning_rate": 0.00018715216097822058, "loss": 0.4673, "step": 354 }, { "epoch": 2.36, "learning_rate": 0.00018708107878245977, "loss": 0.4632, "step": 355 }, { "epoch": 2.37, "learning_rate": 0.00018700981406941298, "loss": 0.4659, "step": 356 }, { "epoch": 2.37, "learning_rate": 0.0001869383669884471, "loss": 0.4683, "step": 357 }, { "epoch": 2.38, "learning_rate": 0.00018686673768931154, "loss": 0.4715, "step": 358 }, { "epoch": 2.38, "learning_rate": 0.00018679492632213735, "loss": 0.4748, "step": 359 }, { "epoch": 2.39, "learning_rate": 0.00018672293303743738, "loss": 0.4596, "step": 360 }, { "epoch": 2.39, "learning_rate": 0.00018665075798610567, "loss": 0.4586, "step": 361 }, { "epoch": 2.4, "learning_rate": 0.0001865784013194173, "loss": 0.4609, "step": 362 }, { "epoch": 2.4, "learning_rate": 0.00018650586318902802, "loss": 0.467, "step": 363 }, { "epoch": 2.41, "learning_rate": 0.00018643314374697378, "loss": 0.4602, "step": 364 }, { "epoch": 2.41, "learning_rate": 0.00018636024314567067, "loss": 0.4709, "step": 365 }, { "epoch": 2.41, "learning_rate": 0.0001862871615379145, "loss": 0.471, "step": 366 }, { "epoch": 2.42, "learning_rate": 0.00018621389907688037, "loss": 0.4605, "step": 367 }, { "epoch": 2.42, "learning_rate": 0.0001861404559161225, "loss": 0.468, "step": 368 }, { "epoch": 2.43, "learning_rate": 0.00018606683220957383, "loss": 0.4632, "step": 369 }, { "epoch": 2.43, "learning_rate": 0.00018599302811154572, "loss": 0.4633, "step": 370 }, { "epoch": 2.44, "learning_rate": 0.00018591904377672757, "loss": 0.4692, "step": 371 }, { "epoch": 2.44, "learning_rate": 0.00018584487936018661, "loss": 0.4693, "step": 372 }, { "epoch": 2.45, "learning_rate": 0.00018577053501736752, "loss": 0.4566, "step": 373 }, { "epoch": 2.45, "learning_rate": 0.0001856960109040921, "loss": 0.4679, "step": 374 }, { "epoch": 2.46, "learning_rate": 0.00018562130717655878, "loss": 0.4611, "step": 375 }, { "epoch": 2.46, "learning_rate": 0.0001855464239913427, "loss": 0.4654, "step": 376 }, { "epoch": 2.46, "learning_rate": 0.000185471361505395, "loss": 0.4626, "step": 377 }, { "epoch": 2.47, "learning_rate": 0.00018539611987604258, "loss": 0.4667, "step": 378 }, { "epoch": 2.47, "learning_rate": 0.0001853206992609879, "loss": 0.4624, "step": 379 }, { "epoch": 2.48, "learning_rate": 0.00018524509981830852, "loss": 0.4602, "step": 380 }, { "epoch": 2.48, "learning_rate": 0.0001851693217064569, "loss": 0.4644, "step": 381 }, { "epoch": 2.49, "learning_rate": 0.00018509336508425986, "loss": 0.4664, "step": 382 }, { "epoch": 2.49, "learning_rate": 0.0001850172301109184, "loss": 0.4665, "step": 383 }, { "epoch": 2.5, "learning_rate": 0.00018494091694600738, "loss": 0.4629, "step": 384 }, { "epoch": 2.5, "learning_rate": 0.00018486442574947511, "loss": 0.4609, "step": 385 }, { "epoch": 2.51, "learning_rate": 0.00018478775668164305, "loss": 0.4593, "step": 386 }, { "epoch": 2.51, "learning_rate": 0.00018471090990320547, "loss": 0.4625, "step": 387 }, { "epoch": 2.52, "learning_rate": 0.0001846338855752291, "loss": 0.4573, "step": 388 }, { "epoch": 2.52, "learning_rate": 0.00018455668385915284, "loss": 0.4578, "step": 389 }, { "epoch": 2.52, "learning_rate": 0.00018447930491678733, "loss": 0.4649, "step": 390 }, { "epoch": 2.53, "learning_rate": 0.0001844017489103147, "loss": 0.4641, "step": 391 }, { "epoch": 2.53, "learning_rate": 0.00018432401600228823, "loss": 0.4545, "step": 392 }, { "epoch": 2.54, "learning_rate": 0.0001842461063556319, "loss": 0.4574, "step": 393 }, { "epoch": 2.54, "learning_rate": 0.00018416802013364016, "loss": 0.4604, "step": 394 }, { "epoch": 2.55, "learning_rate": 0.00018408975749997759, "loss": 0.4628, "step": 395 }, { "epoch": 2.55, "learning_rate": 0.00018401131861867846, "loss": 0.469, "step": 396 }, { "epoch": 2.56, "learning_rate": 0.0001839327036541465, "loss": 0.4679, "step": 397 }, { "epoch": 2.56, "learning_rate": 0.00018385391277115444, "loss": 0.464, "step": 398 }, { "epoch": 2.57, "learning_rate": 0.00018377494613484378, "loss": 0.4658, "step": 399 }, { "epoch": 2.57, "learning_rate": 0.00018369580391072433, "loss": 0.4631, "step": 400 }, { "epoch": 2.58, "learning_rate": 0.00018361648626467406, "loss": 0.4567, "step": 401 }, { "epoch": 2.58, "learning_rate": 0.0001835369933629384, "loss": 0.4617, "step": 402 }, { "epoch": 2.58, "learning_rate": 0.00018345732537213027, "loss": 0.4632, "step": 403 }, { "epoch": 2.59, "learning_rate": 0.00018337748245922955, "loss": 0.4609, "step": 404 }, { "epoch": 2.59, "learning_rate": 0.00018329746479158265, "loss": 0.4571, "step": 405 }, { "epoch": 2.6, "learning_rate": 0.0001832172725369024, "loss": 0.4662, "step": 406 }, { "epoch": 2.6, "learning_rate": 0.00018313690586326743, "loss": 0.4649, "step": 407 }, { "epoch": 2.61, "learning_rate": 0.00018305636493912202, "loss": 0.4582, "step": 408 }, { "epoch": 2.61, "learning_rate": 0.00018297564993327562, "loss": 0.4627, "step": 409 }, { "epoch": 2.62, "learning_rate": 0.00018289476101490256, "loss": 0.4547, "step": 410 }, { "epoch": 2.62, "learning_rate": 0.0001828136983535417, "loss": 0.4623, "step": 411 }, { "epoch": 2.63, "learning_rate": 0.00018273246211909604, "loss": 0.4549, "step": 412 }, { "epoch": 2.63, "learning_rate": 0.00018265105248183242, "loss": 0.4585, "step": 413 }, { "epoch": 2.63, "eval_loss": 0.4760552644729614, "eval_runtime": 28.2621, "eval_samples_per_second": 7.077, "eval_steps_per_second": 0.885, "step": 413 }, { "epoch": 3.0, "learning_rate": 0.000182569469612381, "loss": 0.4544, "step": 414 }, { "epoch": 3.01, "learning_rate": 0.00018248771368173524, "loss": 0.4522, "step": 415 }, { "epoch": 3.01, "learning_rate": 0.00018240578486125112, "loss": 0.4587, "step": 416 }, { "epoch": 3.02, "learning_rate": 0.00018232368332264708, "loss": 0.4565, "step": 417 }, { "epoch": 3.02, "learning_rate": 0.00018224140923800354, "loss": 0.4661, "step": 418 }, { "epoch": 3.02, "learning_rate": 0.0001821589627797626, "loss": 0.4553, "step": 419 }, { "epoch": 3.03, "learning_rate": 0.00018207634412072764, "loss": 0.4566, "step": 420 }, { "epoch": 3.03, "learning_rate": 0.00018199355343406296, "loss": 0.4582, "step": 421 }, { "epoch": 3.04, "learning_rate": 0.00018191059089329333, "loss": 0.4577, "step": 422 }, { "epoch": 3.04, "learning_rate": 0.00018182745667230394, "loss": 0.4609, "step": 423 }, { "epoch": 3.05, "learning_rate": 0.00018174415094533957, "loss": 0.4656, "step": 424 }, { "epoch": 3.05, "learning_rate": 0.0001816606738870046, "loss": 0.4561, "step": 425 }, { "epoch": 3.06, "learning_rate": 0.00018157702567226248, "loss": 0.4582, "step": 426 }, { "epoch": 3.06, "learning_rate": 0.00018149320647643541, "loss": 0.4552, "step": 427 }, { "epoch": 3.07, "learning_rate": 0.00018140921647520392, "loss": 0.4623, "step": 428 }, { "epoch": 3.07, "learning_rate": 0.00018132505584460658, "loss": 0.4602, "step": 429 }, { "epoch": 3.08, "learning_rate": 0.00018124072476103956, "loss": 0.4647, "step": 430 }, { "epoch": 3.08, "learning_rate": 0.00018115622340125631, "loss": 0.4606, "step": 431 }, { "epoch": 3.08, "learning_rate": 0.00018107155194236718, "loss": 0.4661, "step": 432 }, { "epoch": 3.09, "learning_rate": 0.00018098671056183897, "loss": 0.4525, "step": 433 }, { "epoch": 3.09, "learning_rate": 0.00018090169943749476, "loss": 0.4534, "step": 434 }, { "epoch": 3.1, "learning_rate": 0.00018081651874751327, "loss": 0.4552, "step": 435 }, { "epoch": 3.1, "learning_rate": 0.00018073116867042862, "loss": 0.4536, "step": 436 }, { "epoch": 3.11, "learning_rate": 0.00018064564938513012, "loss": 0.4575, "step": 437 }, { "epoch": 3.11, "learning_rate": 0.00018055996107086157, "loss": 0.4505, "step": 438 }, { "epoch": 3.12, "learning_rate": 0.0001804741039072211, "loss": 0.4557, "step": 439 }, { "epoch": 3.12, "learning_rate": 0.00018038807807416068, "loss": 0.4523, "step": 440 }, { "epoch": 3.13, "learning_rate": 0.00018030188375198593, "loss": 0.4535, "step": 441 }, { "epoch": 3.13, "learning_rate": 0.00018021552112135552, "loss": 0.4579, "step": 442 }, { "epoch": 3.13, "learning_rate": 0.00018012899036328093, "loss": 0.4509, "step": 443 }, { "epoch": 3.14, "learning_rate": 0.00018004229165912596, "loss": 0.4583, "step": 444 }, { "epoch": 3.14, "learning_rate": 0.00017995542519060647, "loss": 0.4574, "step": 445 }, { "epoch": 3.15, "learning_rate": 0.00017986839113978996, "loss": 0.451, "step": 446 }, { "epoch": 3.15, "learning_rate": 0.00017978118968909508, "loss": 0.4511, "step": 447 }, { "epoch": 3.16, "learning_rate": 0.0001796938210212915, "loss": 0.4586, "step": 448 }, { "epoch": 3.16, "learning_rate": 0.00017960628531949927, "loss": 0.4465, "step": 449 }, { "epoch": 3.17, "learning_rate": 0.00017951858276718844, "loss": 0.4502, "step": 450 }, { "epoch": 3.17, "learning_rate": 0.00017943071354817897, "loss": 0.4553, "step": 451 }, { "epoch": 3.18, "learning_rate": 0.00017934267784664002, "loss": 0.456, "step": 452 }, { "epoch": 3.18, "learning_rate": 0.00017925447584708973, "loss": 0.448, "step": 453 }, { "epoch": 3.19, "learning_rate": 0.00017916610773439473, "loss": 0.4509, "step": 454 }, { "epoch": 3.19, "learning_rate": 0.00017907757369376985, "loss": 0.4544, "step": 455 }, { "epoch": 3.19, "learning_rate": 0.0001789888739107778, "loss": 0.4512, "step": 456 }, { "epoch": 3.2, "learning_rate": 0.00017890000857132853, "loss": 0.4561, "step": 457 }, { "epoch": 3.2, "learning_rate": 0.00017881097786167898, "loss": 0.462, "step": 458 }, { "epoch": 3.21, "learning_rate": 0.00017872178196843286, "loss": 0.4512, "step": 459 }, { "epoch": 3.21, "learning_rate": 0.00017863242107853995, "loss": 0.455, "step": 460 }, { "epoch": 3.22, "learning_rate": 0.00017854289537929587, "loss": 0.4562, "step": 461 }, { "epoch": 3.22, "learning_rate": 0.00017845320505834175, "loss": 0.4512, "step": 462 }, { "epoch": 3.23, "learning_rate": 0.00017836335030366367, "loss": 0.4532, "step": 463 }, { "epoch": 3.23, "learning_rate": 0.00017827333130359242, "loss": 0.4526, "step": 464 }, { "epoch": 3.24, "learning_rate": 0.000178183148246803, "loss": 0.4487, "step": 465 }, { "epoch": 3.24, "learning_rate": 0.00017809280132231425, "loss": 0.449, "step": 466 }, { "epoch": 3.25, "learning_rate": 0.00017800229071948854, "loss": 0.4507, "step": 467 }, { "epoch": 3.25, "learning_rate": 0.00017791161662803124, "loss": 0.4473, "step": 468 }, { "epoch": 3.25, "learning_rate": 0.0001778207792379904, "loss": 0.4518, "step": 469 }, { "epoch": 3.26, "learning_rate": 0.0001777297787397563, "loss": 0.452, "step": 470 }, { "epoch": 3.26, "learning_rate": 0.0001776386153240612, "loss": 0.4549, "step": 471 }, { "epoch": 3.27, "learning_rate": 0.00017754728918197864, "loss": 0.4459, "step": 472 }, { "epoch": 3.27, "learning_rate": 0.00017745580050492344, "loss": 0.4518, "step": 473 }, { "epoch": 3.28, "learning_rate": 0.00017736414948465087, "loss": 0.4495, "step": 474 }, { "epoch": 3.28, "learning_rate": 0.00017727233631325664, "loss": 0.4539, "step": 475 }, { "epoch": 3.29, "learning_rate": 0.0001771803611831762, "loss": 0.4498, "step": 476 }, { "epoch": 3.29, "learning_rate": 0.00017708822428718458, "loss": 0.4586, "step": 477 }, { "epoch": 3.3, "learning_rate": 0.00017699592581839574, "loss": 0.4531, "step": 478 }, { "epoch": 3.3, "learning_rate": 0.00017690346597026233, "loss": 0.4536, "step": 479 }, { "epoch": 3.3, "learning_rate": 0.00017681084493657525, "loss": 0.4485, "step": 480 }, { "epoch": 3.31, "learning_rate": 0.00017671806291146325, "loss": 0.4497, "step": 481 }, { "epoch": 3.31, "learning_rate": 0.00017662512008939247, "loss": 0.448, "step": 482 }, { "epoch": 3.32, "learning_rate": 0.00017653201666516615, "loss": 0.4465, "step": 483 }, { "epoch": 3.32, "learning_rate": 0.00017643875283392406, "loss": 0.4516, "step": 484 }, { "epoch": 3.33, "learning_rate": 0.0001763453287911422, "loss": 0.4529, "step": 485 }, { "epoch": 3.33, "learning_rate": 0.00017625174473263235, "loss": 0.4552, "step": 486 }, { "epoch": 3.34, "learning_rate": 0.00017615800085454171, "loss": 0.4474, "step": 487 }, { "epoch": 3.34, "learning_rate": 0.00017606409735335246, "loss": 0.4542, "step": 488 }, { "epoch": 3.35, "learning_rate": 0.00017597003442588132, "loss": 0.4471, "step": 489 }, { "epoch": 3.35, "learning_rate": 0.0001758758122692791, "loss": 0.4474, "step": 490 }, { "epoch": 3.36, "learning_rate": 0.00017578143108103048, "loss": 0.4514, "step": 491 }, { "epoch": 3.36, "learning_rate": 0.0001756868910589533, "loss": 0.4539, "step": 492 }, { "epoch": 3.36, "learning_rate": 0.00017559219240119846, "loss": 0.4504, "step": 493 }, { "epoch": 3.37, "learning_rate": 0.00017549733530624928, "loss": 0.4444, "step": 494 }, { "epoch": 3.37, "learning_rate": 0.00017540231997292114, "loss": 0.4485, "step": 495 }, { "epoch": 3.38, "learning_rate": 0.00017530714660036112, "loss": 0.4517, "step": 496 }, { "epoch": 3.38, "learning_rate": 0.00017521181538804746, "loss": 0.4478, "step": 497 }, { "epoch": 3.39, "learning_rate": 0.00017511632653578936, "loss": 0.4464, "step": 498 }, { "epoch": 3.39, "learning_rate": 0.00017502068024372633, "loss": 0.4463, "step": 499 }, { "epoch": 3.4, "learning_rate": 0.00017492487671232784, "loss": 0.4464, "step": 500 }, { "epoch": 3.4, "learning_rate": 0.00017482891614239304, "loss": 0.4496, "step": 501 }, { "epoch": 3.41, "learning_rate": 0.0001747327987350501, "loss": 0.4479, "step": 502 }, { "epoch": 3.41, "learning_rate": 0.00017463652469175599, "loss": 0.444, "step": 503 }, { "epoch": 3.42, "learning_rate": 0.00017454009421429597, "loss": 0.4507, "step": 504 }, { "epoch": 3.42, "learning_rate": 0.00017444350750478316, "loss": 0.4502, "step": 505 }, { "epoch": 3.42, "learning_rate": 0.0001743467647656581, "loss": 0.4535, "step": 506 }, { "epoch": 3.43, "learning_rate": 0.00017424986619968844, "loss": 0.444, "step": 507 }, { "epoch": 3.43, "learning_rate": 0.00017415281200996842, "loss": 0.4498, "step": 508 }, { "epoch": 3.44, "learning_rate": 0.00017405560239991833, "loss": 0.454, "step": 509 }, { "epoch": 3.44, "learning_rate": 0.00017395823757328444, "loss": 0.4479, "step": 510 }, { "epoch": 3.45, "learning_rate": 0.00017386071773413817, "loss": 0.4431, "step": 511 }, { "epoch": 3.45, "learning_rate": 0.00017376304308687587, "loss": 0.4455, "step": 512 }, { "epoch": 3.46, "learning_rate": 0.0001736652138362184, "loss": 0.4512, "step": 513 }, { "epoch": 3.46, "learning_rate": 0.00017356723018721067, "loss": 0.4504, "step": 514 }, { "epoch": 3.47, "learning_rate": 0.00017346909234522109, "loss": 0.4422, "step": 515 }, { "epoch": 3.47, "learning_rate": 0.00017337080051594138, "loss": 0.4535, "step": 516 }, { "epoch": 3.47, "learning_rate": 0.00017327235490538598, "loss": 0.4457, "step": 517 }, { "epoch": 3.48, "learning_rate": 0.00017317375571989158, "loss": 0.4519, "step": 518 }, { "epoch": 3.48, "learning_rate": 0.00017307500316611686, "loss": 0.446, "step": 519 }, { "epoch": 3.49, "learning_rate": 0.00017297609745104184, "loss": 0.4501, "step": 520 }, { "epoch": 3.49, "learning_rate": 0.00017287703878196762, "loss": 0.4427, "step": 521 }, { "epoch": 3.5, "learning_rate": 0.0001727778273665159, "loss": 0.4429, "step": 522 }, { "epoch": 3.5, "learning_rate": 0.00017267846341262848, "loss": 0.4423, "step": 523 }, { "epoch": 3.51, "learning_rate": 0.0001725789471285669, "loss": 0.4458, "step": 524 }, { "epoch": 3.51, "learning_rate": 0.000172479278722912, "loss": 0.4452, "step": 525 }, { "epoch": 3.52, "learning_rate": 0.0001723794584045634, "loss": 0.4426, "step": 526 }, { "epoch": 3.52, "learning_rate": 0.00017227948638273916, "loss": 0.4484, "step": 527 }, { "epoch": 3.53, "learning_rate": 0.0001721793628669753, "loss": 0.4431, "step": 528 }, { "epoch": 3.53, "learning_rate": 0.00017207908806712535, "loss": 0.4424, "step": 529 }, { "epoch": 3.53, "learning_rate": 0.0001719786621933599, "loss": 0.4469, "step": 530 }, { "epoch": 3.54, "learning_rate": 0.00017187808545616627, "loss": 0.4492, "step": 531 }, { "epoch": 3.54, "learning_rate": 0.00017177735806634789, "loss": 0.4464, "step": 532 }, { "epoch": 3.55, "learning_rate": 0.00017167648023502395, "loss": 0.4395, "step": 533 }, { "epoch": 3.55, "learning_rate": 0.0001715754521736291, "loss": 0.4483, "step": 534 }, { "epoch": 3.56, "learning_rate": 0.00017147427409391265, "loss": 0.4494, "step": 535 }, { "epoch": 3.56, "learning_rate": 0.00017137294620793848, "loss": 0.4474, "step": 536 }, { "epoch": 3.57, "learning_rate": 0.00017127146872808447, "loss": 0.4502, "step": 537 }, { "epoch": 3.57, "learning_rate": 0.00017116984186704194, "loss": 0.4417, "step": 538 }, { "epoch": 3.58, "learning_rate": 0.00017106806583781534, "loss": 0.4472, "step": 539 }, { "epoch": 3.58, "learning_rate": 0.00017096614085372185, "loss": 0.4442, "step": 540 }, { "epoch": 3.58, "learning_rate": 0.00017086406712839072, "loss": 0.4437, "step": 541 }, { "epoch": 3.59, "learning_rate": 0.0001707618448757631, "loss": 0.4473, "step": 542 }, { "epoch": 3.59, "learning_rate": 0.00017065947431009126, "loss": 0.4435, "step": 543 }, { "epoch": 3.6, "learning_rate": 0.00017055695564593853, "loss": 0.4458, "step": 544 }, { "epoch": 3.6, "learning_rate": 0.0001704542890981785, "loss": 0.4442, "step": 545 }, { "epoch": 3.61, "learning_rate": 0.00017035147488199482, "loss": 0.4444, "step": 546 }, { "epoch": 3.61, "learning_rate": 0.00017024851321288052, "loss": 0.4455, "step": 547 }, { "epoch": 3.62, "learning_rate": 0.0001701454043066378, "loss": 0.4467, "step": 548 }, { "epoch": 3.62, "learning_rate": 0.00017004214837937738, "loss": 0.4439, "step": 549 }, { "epoch": 3.63, "learning_rate": 0.00016993874564751822, "loss": 0.4376, "step": 550 }, { "epoch": 3.63, "learning_rate": 0.00016983519632778686, "loss": 0.4408, "step": 551 }, { "epoch": 3.63, "eval_loss": 0.4704599976539612, "eval_runtime": 28.2725, "eval_samples_per_second": 7.074, "eval_steps_per_second": 0.884, "step": 551 }, { "epoch": 4.0, "learning_rate": 0.00016973150063721718, "loss": 0.4355, "step": 552 }, { "epoch": 4.01, "learning_rate": 0.0001696276587931498, "loss": 0.4467, "step": 553 }, { "epoch": 4.01, "learning_rate": 0.00016952367101323162, "loss": 0.438, "step": 554 }, { "epoch": 4.02, "learning_rate": 0.00016941953751541553, "loss": 0.4454, "step": 555 }, { "epoch": 4.02, "learning_rate": 0.00016931525851795977, "loss": 0.4458, "step": 556 }, { "epoch": 4.03, "learning_rate": 0.0001692108342394275, "loss": 0.4359, "step": 557 }, { "epoch": 4.03, "learning_rate": 0.00016910626489868649, "loss": 0.4477, "step": 558 }, { "epoch": 4.03, "learning_rate": 0.00016900155071490844, "loss": 0.4363, "step": 559 }, { "epoch": 4.04, "learning_rate": 0.00016889669190756868, "loss": 0.448, "step": 560 }, { "epoch": 4.04, "learning_rate": 0.00016879168869644566, "loss": 0.4406, "step": 561 }, { "epoch": 4.05, "learning_rate": 0.00016868654130162056, "loss": 0.446, "step": 562 }, { "epoch": 4.05, "learning_rate": 0.00016858124994347665, "loss": 0.4448, "step": 563 }, { "epoch": 4.06, "learning_rate": 0.00016847581484269897, "loss": 0.4432, "step": 564 }, { "epoch": 4.06, "learning_rate": 0.00016837023622027388, "loss": 0.4376, "step": 565 }, { "epoch": 4.07, "learning_rate": 0.00016826451429748852, "loss": 0.4498, "step": 566 }, { "epoch": 4.07, "learning_rate": 0.00016815864929593043, "loss": 0.4434, "step": 567 }, { "epoch": 4.08, "learning_rate": 0.00016805264143748694, "loss": 0.4466, "step": 568 }, { "epoch": 4.08, "learning_rate": 0.00016794649094434486, "loss": 0.4378, "step": 569 }, { "epoch": 4.08, "learning_rate": 0.00016784019803899, "loss": 0.4401, "step": 570 }, { "epoch": 4.09, "learning_rate": 0.00016773376294420652, "loss": 0.454, "step": 571 }, { "epoch": 4.09, "learning_rate": 0.00016762718588307678, "loss": 0.4412, "step": 572 }, { "epoch": 4.1, "learning_rate": 0.00016752046707898055, "loss": 0.443, "step": 573 }, { "epoch": 4.1, "learning_rate": 0.00016741360675559473, "loss": 0.429, "step": 574 }, { "epoch": 4.11, "learning_rate": 0.00016730660513689292, "loss": 0.4325, "step": 575 }, { "epoch": 4.11, "learning_rate": 0.0001671994624471447, "loss": 0.4462, "step": 576 }, { "epoch": 4.12, "learning_rate": 0.00016709217891091547, "loss": 0.4417, "step": 577 }, { "epoch": 4.12, "learning_rate": 0.00016698475475306578, "loss": 0.4397, "step": 578 }, { "epoch": 4.13, "learning_rate": 0.00016687719019875088, "loss": 0.4433, "step": 579 }, { "epoch": 4.13, "learning_rate": 0.0001667694854734204, "loss": 0.4382, "step": 580 }, { "epoch": 4.14, "learning_rate": 0.00016666164080281765, "loss": 0.4325, "step": 581 }, { "epoch": 4.14, "learning_rate": 0.00016655365641297929, "loss": 0.4425, "step": 582 }, { "epoch": 4.14, "learning_rate": 0.00016644553253023484, "loss": 0.439, "step": 583 }, { "epoch": 4.15, "learning_rate": 0.00016633726938120616, "loss": 0.4433, "step": 584 }, { "epoch": 4.15, "learning_rate": 0.00016622886719280705, "loss": 0.4399, "step": 585 }, { "epoch": 4.16, "learning_rate": 0.0001661203261922427, "loss": 0.4387, "step": 586 }, { "epoch": 4.16, "learning_rate": 0.00016601164660700918, "loss": 0.438, "step": 587 }, { "epoch": 4.17, "learning_rate": 0.00016590282866489319, "loss": 0.4402, "step": 588 }, { "epoch": 4.17, "learning_rate": 0.00016579387259397127, "loss": 0.4404, "step": 589 }, { "epoch": 4.18, "learning_rate": 0.0001656847786226095, "loss": 0.4344, "step": 590 }, { "epoch": 4.18, "learning_rate": 0.00016557554697946308, "loss": 0.4362, "step": 591 }, { "epoch": 4.19, "learning_rate": 0.0001654661778934756, "loss": 0.4334, "step": 592 }, { "epoch": 4.19, "learning_rate": 0.0001653566715938789, "loss": 0.4401, "step": 593 }, { "epoch": 4.2, "learning_rate": 0.00016524702831019228, "loss": 0.4393, "step": 594 }, { "epoch": 4.2, "learning_rate": 0.00016513724827222227, "loss": 0.4356, "step": 595 }, { "epoch": 4.2, "learning_rate": 0.00016502733171006183, "loss": 0.4411, "step": 596 }, { "epoch": 4.21, "learning_rate": 0.0001649172788540903, "loss": 0.4404, "step": 597 }, { "epoch": 4.21, "learning_rate": 0.0001648070899349726, "loss": 0.4381, "step": 598 }, { "epoch": 4.22, "learning_rate": 0.00016469676518365874, "loss": 0.4444, "step": 599 }, { "epoch": 4.22, "learning_rate": 0.00016458630483138356, "loss": 0.4369, "step": 600 }, { "epoch": 4.23, "learning_rate": 0.00016447570910966603, "loss": 0.4361, "step": 601 }, { "epoch": 4.23, "learning_rate": 0.00016436497825030884, "loss": 0.4278, "step": 602 }, { "epoch": 4.24, "learning_rate": 0.00016425411248539805, "loss": 0.4392, "step": 603 }, { "epoch": 4.24, "learning_rate": 0.00016414311204730227, "loss": 0.438, "step": 604 }, { "epoch": 4.25, "learning_rate": 0.0001640319771686725, "loss": 0.4361, "step": 605 }, { "epoch": 4.25, "learning_rate": 0.00016392070808244155, "loss": 0.4302, "step": 606 }, { "epoch": 4.25, "learning_rate": 0.00016380930502182345, "loss": 0.4342, "step": 607 }, { "epoch": 4.26, "learning_rate": 0.0001636977682203131, "loss": 0.4379, "step": 608 }, { "epoch": 4.26, "learning_rate": 0.0001635860979116856, "loss": 0.433, "step": 609 }, { "epoch": 4.27, "learning_rate": 0.00016347429432999602, "loss": 0.4451, "step": 610 }, { "epoch": 4.27, "learning_rate": 0.00016336235770957863, "loss": 0.4356, "step": 611 }, { "epoch": 4.28, "learning_rate": 0.00016325028828504662, "loss": 0.4348, "step": 612 }, { "epoch": 4.28, "learning_rate": 0.00016313808629129156, "loss": 0.4389, "step": 613 }, { "epoch": 4.29, "learning_rate": 0.0001630257519634828, "loss": 0.4287, "step": 614 }, { "epoch": 4.29, "learning_rate": 0.00016291328553706704, "loss": 0.4381, "step": 615 }, { "epoch": 4.3, "learning_rate": 0.00016280068724776797, "loss": 0.439, "step": 616 }, { "epoch": 4.3, "learning_rate": 0.00016268795733158552, "loss": 0.4384, "step": 617 }, { "epoch": 4.31, "learning_rate": 0.00016257509602479563, "loss": 0.4389, "step": 618 }, { "epoch": 4.31, "learning_rate": 0.00016246210356394953, "loss": 0.4391, "step": 619 }, { "epoch": 4.31, "learning_rate": 0.00016234898018587337, "loss": 0.4393, "step": 620 }, { "epoch": 4.32, "learning_rate": 0.00016223572612766773, "loss": 0.428, "step": 621 }, { "epoch": 4.32, "learning_rate": 0.00016212234162670704, "loss": 0.4375, "step": 622 }, { "epoch": 4.33, "learning_rate": 0.00016200882692063917, "loss": 0.4392, "step": 623 }, { "epoch": 4.33, "learning_rate": 0.00016189518224738486, "loss": 0.4342, "step": 624 }, { "epoch": 4.34, "learning_rate": 0.0001617814078451373, "loss": 0.4408, "step": 625 }, { "epoch": 4.34, "learning_rate": 0.0001616675039523615, "loss": 0.4375, "step": 626 }, { "epoch": 4.35, "learning_rate": 0.000161553470807794, "loss": 0.4405, "step": 627 }, { "epoch": 4.35, "learning_rate": 0.00016143930865044213, "loss": 0.4301, "step": 628 }, { "epoch": 4.36, "learning_rate": 0.00016132501771958367, "loss": 0.432, "step": 629 }, { "epoch": 4.36, "learning_rate": 0.0001612105982547663, "loss": 0.4318, "step": 630 }, { "epoch": 4.37, "learning_rate": 0.0001610960504958071, "loss": 0.4351, "step": 631 }, { "epoch": 4.37, "learning_rate": 0.0001609813746827921, "loss": 0.4364, "step": 632 }, { "epoch": 4.37, "learning_rate": 0.00016086657105607562, "loss": 0.4346, "step": 633 }, { "epoch": 4.38, "learning_rate": 0.00016075163985627993, "loss": 0.4317, "step": 634 }, { "epoch": 4.38, "learning_rate": 0.0001606365813242947, "loss": 0.4321, "step": 635 }, { "epoch": 4.39, "learning_rate": 0.0001605213957012764, "loss": 0.4314, "step": 636 }, { "epoch": 4.39, "learning_rate": 0.00016040608322864808, "loss": 0.4389, "step": 637 }, { "epoch": 4.4, "learning_rate": 0.00016029064414809838, "loss": 0.4367, "step": 638 }, { "epoch": 4.4, "learning_rate": 0.00016017507870158147, "loss": 0.4361, "step": 639 }, { "epoch": 4.41, "learning_rate": 0.00016005938713131642, "loss": 0.4345, "step": 640 }, { "epoch": 4.41, "learning_rate": 0.0001599435696797865, "loss": 0.4337, "step": 641 }, { "epoch": 4.42, "learning_rate": 0.00015982762658973894, "loss": 0.4413, "step": 642 }, { "epoch": 4.42, "learning_rate": 0.00015971155810418422, "loss": 0.4385, "step": 643 }, { "epoch": 4.42, "learning_rate": 0.0001595953644663957, "loss": 0.4299, "step": 644 }, { "epoch": 4.43, "learning_rate": 0.00015947904591990907, "loss": 0.4294, "step": 645 }, { "epoch": 4.43, "learning_rate": 0.00015936260270852173, "loss": 0.4328, "step": 646 }, { "epoch": 4.44, "learning_rate": 0.00015924603507629244, "loss": 0.4281, "step": 647 }, { "epoch": 4.44, "learning_rate": 0.0001591293432675407, "loss": 0.4418, "step": 648 }, { "epoch": 4.45, "learning_rate": 0.0001590125275268464, "loss": 0.4322, "step": 649 }, { "epoch": 4.45, "learning_rate": 0.00015889558809904902, "loss": 0.4346, "step": 650 }, { "epoch": 4.46, "learning_rate": 0.00015877852522924732, "loss": 0.4282, "step": 651 }, { "epoch": 4.46, "learning_rate": 0.00015866133916279886, "loss": 0.4389, "step": 652 }, { "epoch": 4.47, "learning_rate": 0.00015854403014531937, "loss": 0.4303, "step": 653 }, { "epoch": 4.47, "learning_rate": 0.0001584265984226823, "loss": 0.4343, "step": 654 }, { "epoch": 4.48, "learning_rate": 0.0001583090442410182, "loss": 0.4348, "step": 655 }, { "epoch": 4.48, "learning_rate": 0.0001581913678467144, "loss": 0.4332, "step": 656 }, { "epoch": 4.48, "learning_rate": 0.0001580735694864143, "loss": 0.4269, "step": 657 }, { "epoch": 4.49, "learning_rate": 0.000157955649407017, "loss": 0.4343, "step": 658 }, { "epoch": 4.49, "learning_rate": 0.00015783760785567665, "loss": 0.4393, "step": 659 }, { "epoch": 4.5, "learning_rate": 0.00015771944507980207, "loss": 0.4323, "step": 660 }, { "epoch": 4.5, "learning_rate": 0.0001576011613270561, "loss": 0.4332, "step": 661 }, { "epoch": 4.51, "learning_rate": 0.00015748275684535515, "loss": 0.4323, "step": 662 }, { "epoch": 4.51, "learning_rate": 0.0001573642318828687, "loss": 0.4292, "step": 663 }, { "epoch": 4.52, "learning_rate": 0.00015724558668801875, "loss": 0.4303, "step": 664 }, { "epoch": 4.52, "learning_rate": 0.00015712682150947923, "loss": 0.4311, "step": 665 }, { "epoch": 4.53, "learning_rate": 0.00015700793659617567, "loss": 0.4302, "step": 666 }, { "epoch": 4.53, "learning_rate": 0.00015688893219728445, "loss": 0.4311, "step": 667 }, { "epoch": 4.54, "learning_rate": 0.00015676980856223248, "loss": 0.4349, "step": 668 }, { "epoch": 4.54, "learning_rate": 0.00015665056594069647, "loss": 0.4277, "step": 669 }, { "epoch": 4.54, "learning_rate": 0.00015653120458260263, "loss": 0.4334, "step": 670 }, { "epoch": 4.55, "learning_rate": 0.00015641172473812592, "loss": 0.4297, "step": 671 }, { "epoch": 4.55, "learning_rate": 0.00015629212665768978, "loss": 0.4328, "step": 672 }, { "epoch": 4.56, "learning_rate": 0.00015617241059196534, "loss": 0.4362, "step": 673 }, { "epoch": 4.56, "learning_rate": 0.00015605257679187113, "loss": 0.4316, "step": 674 }, { "epoch": 4.57, "learning_rate": 0.00015593262550857233, "loss": 0.4365, "step": 675 }, { "epoch": 4.57, "learning_rate": 0.00015581255699348046, "loss": 0.4316, "step": 676 }, { "epoch": 4.58, "learning_rate": 0.00015569237149825265, "loss": 0.4273, "step": 677 }, { "epoch": 4.58, "learning_rate": 0.00015557206927479137, "loss": 0.4296, "step": 678 }, { "epoch": 4.59, "learning_rate": 0.00015545165057524358, "loss": 0.4362, "step": 679 }, { "epoch": 4.59, "learning_rate": 0.00015533111565200044, "loss": 0.4323, "step": 680 }, { "epoch": 4.59, "learning_rate": 0.00015521046475769674, "loss": 0.4344, "step": 681 }, { "epoch": 4.6, "learning_rate": 0.00015508969814521025, "loss": 0.4291, "step": 682 }, { "epoch": 4.6, "learning_rate": 0.0001549688160676614, "loss": 0.4317, "step": 683 }, { "epoch": 4.61, "learning_rate": 0.0001548478187784125, "loss": 0.4304, "step": 684 }, { "epoch": 4.61, "learning_rate": 0.00015472670653106745, "loss": 0.4301, "step": 685 }, { "epoch": 4.62, "learning_rate": 0.00015460547957947104, "loss": 0.4316, "step": 686 }, { "epoch": 4.62, "learning_rate": 0.00015448413817770846, "loss": 0.4277, "step": 687 }, { "epoch": 4.63, "learning_rate": 0.00015436268258010478, "loss": 0.4234, "step": 688 }, { "epoch": 4.63, "learning_rate": 0.00015424111304122447, "loss": 0.4266, "step": 689 }, { "epoch": 4.63, "eval_loss": 0.46920087933540344, "eval_runtime": 28.2397, "eval_samples_per_second": 7.082, "eval_steps_per_second": 0.885, "step": 689 }, { "epoch": 5.0, "learning_rate": 0.0001541194298158708, "loss": 0.4297, "step": 690 }, { "epoch": 5.01, "learning_rate": 0.00015399763315908528, "loss": 0.4263, "step": 691 }, { "epoch": 5.01, "learning_rate": 0.0001538757233261472, "loss": 0.4264, "step": 692 }, { "epoch": 5.02, "learning_rate": 0.00015375370057257306, "loss": 0.4329, "step": 693 }, { "epoch": 5.02, "learning_rate": 0.00015363156515411602, "loss": 0.4166, "step": 694 }, { "epoch": 5.03, "learning_rate": 0.0001535093173267654, "loss": 0.4295, "step": 695 }, { "epoch": 5.03, "learning_rate": 0.00015338695734674605, "loss": 0.4378, "step": 696 }, { "epoch": 5.04, "learning_rate": 0.00015326448547051805, "loss": 0.4306, "step": 697 }, { "epoch": 5.04, "learning_rate": 0.00015314190195477584, "loss": 0.4321, "step": 698 }, { "epoch": 5.04, "learning_rate": 0.00015301920705644792, "loss": 0.4264, "step": 699 }, { "epoch": 5.05, "learning_rate": 0.00015289640103269625, "loss": 0.4284, "step": 700 }, { "epoch": 5.05, "learning_rate": 0.00015277348414091568, "loss": 0.434, "step": 701 }, { "epoch": 5.06, "learning_rate": 0.0001526504566387335, "loss": 0.4311, "step": 702 }, { "epoch": 5.06, "learning_rate": 0.00015252731878400864, "loss": 0.4269, "step": 703 }, { "epoch": 5.07, "learning_rate": 0.00015240407083483163, "loss": 0.4259, "step": 704 }, { "epoch": 5.07, "learning_rate": 0.0001522807130495235, "loss": 0.4341, "step": 705 }, { "epoch": 5.08, "learning_rate": 0.00015215724568663555, "loss": 0.4285, "step": 706 }, { "epoch": 5.08, "learning_rate": 0.0001520336690049488, "loss": 0.4284, "step": 707 }, { "epoch": 5.09, "learning_rate": 0.0001519099832634734, "loss": 0.4388, "step": 708 }, { "epoch": 5.09, "learning_rate": 0.00015178618872144797, "loss": 0.4327, "step": 709 }, { "epoch": 5.09, "learning_rate": 0.00015166228563833934, "loss": 0.4287, "step": 710 }, { "epoch": 5.1, "learning_rate": 0.00015153827427384173, "loss": 0.4297, "step": 711 }, { "epoch": 5.1, "learning_rate": 0.00015141415488787626, "loss": 0.4266, "step": 712 }, { "epoch": 5.11, "learning_rate": 0.00015128992774059063, "loss": 0.4245, "step": 713 }, { "epoch": 5.11, "learning_rate": 0.00015116559309235825, "loss": 0.4289, "step": 714 }, { "epoch": 5.12, "learning_rate": 0.00015104115120377783, "loss": 0.4219, "step": 715 }, { "epoch": 5.12, "learning_rate": 0.00015091660233567305, "loss": 0.4281, "step": 716 }, { "epoch": 5.13, "learning_rate": 0.0001507919467490916, "loss": 0.4241, "step": 717 }, { "epoch": 5.13, "learning_rate": 0.00015066718470530495, "loss": 0.4243, "step": 718 }, { "epoch": 5.14, "learning_rate": 0.00015054231646580764, "loss": 0.4218, "step": 719 }, { "epoch": 5.14, "learning_rate": 0.00015041734229231688, "loss": 0.4294, "step": 720 }, { "epoch": 5.15, "learning_rate": 0.00015029226244677178, "loss": 0.4273, "step": 721 }, { "epoch": 5.15, "learning_rate": 0.0001501670771913331, "loss": 0.4258, "step": 722 }, { "epoch": 5.15, "learning_rate": 0.0001500417867883824, "loss": 0.4278, "step": 723 }, { "epoch": 5.16, "learning_rate": 0.0001499163915005216, "loss": 0.4312, "step": 724 }, { "epoch": 5.16, "learning_rate": 0.00014979089159057265, "loss": 0.4278, "step": 725 }, { "epoch": 5.17, "learning_rate": 0.00014966528732157658, "loss": 0.422, "step": 726 }, { "epoch": 5.17, "learning_rate": 0.0001495395789567932, "loss": 0.4234, "step": 727 }, { "epoch": 5.18, "learning_rate": 0.0001494137667597006, "loss": 0.4192, "step": 728 }, { "epoch": 5.18, "learning_rate": 0.00014928785099399433, "loss": 0.4199, "step": 729 }, { "epoch": 5.19, "learning_rate": 0.00014916183192358718, "loss": 0.4263, "step": 730 }, { "epoch": 5.19, "learning_rate": 0.00014903570981260832, "loss": 0.4266, "step": 731 }, { "epoch": 5.2, "learning_rate": 0.000148909484925403, "loss": 0.4282, "step": 732 }, { "epoch": 5.2, "learning_rate": 0.00014878315752653185, "loss": 0.429, "step": 733 }, { "epoch": 5.2, "learning_rate": 0.00014865672788077027, "loss": 0.4195, "step": 734 }, { "epoch": 5.21, "learning_rate": 0.00014853019625310813, "loss": 0.4295, "step": 735 }, { "epoch": 5.21, "learning_rate": 0.00014840356290874888, "loss": 0.4232, "step": 736 }, { "epoch": 5.22, "learning_rate": 0.00014827682811310928, "loss": 0.4287, "step": 737 }, { "epoch": 5.22, "learning_rate": 0.00014814999213181866, "loss": 0.4249, "step": 738 }, { "epoch": 5.23, "learning_rate": 0.00014802305523071852, "loss": 0.425, "step": 739 }, { "epoch": 5.23, "learning_rate": 0.00014789601767586173, "loss": 0.4169, "step": 740 }, { "epoch": 5.24, "learning_rate": 0.0001477688797335123, "loss": 0.4206, "step": 741 }, { "epoch": 5.24, "learning_rate": 0.00014764164167014451, "loss": 0.4189, "step": 742 }, { "epoch": 5.25, "learning_rate": 0.00014751430375244256, "loss": 0.4256, "step": 743 }, { "epoch": 5.25, "learning_rate": 0.00014738686624729986, "loss": 0.4245, "step": 744 }, { "epoch": 5.26, "learning_rate": 0.00014725932942181872, "loss": 0.4244, "step": 745 }, { "epoch": 5.26, "learning_rate": 0.0001471316935433094, "loss": 0.4208, "step": 746 }, { "epoch": 5.26, "learning_rate": 0.00014700395887928995, "loss": 0.4229, "step": 747 }, { "epoch": 5.27, "learning_rate": 0.00014687612569748535, "loss": 0.4257, "step": 748 }, { "epoch": 5.27, "learning_rate": 0.00014674819426582712, "loss": 0.4261, "step": 749 }, { "epoch": 5.28, "learning_rate": 0.00014662016485245274, "loss": 0.4214, "step": 750 }, { "epoch": 5.28, "learning_rate": 0.000146492037725705, "loss": 0.4241, "step": 751 }, { "epoch": 5.29, "learning_rate": 0.0001463638131541315, "loss": 0.4305, "step": 752 }, { "epoch": 5.29, "learning_rate": 0.00014623549140648412, "loss": 0.4208, "step": 753 }, { "epoch": 5.3, "learning_rate": 0.0001461070727517183, "loss": 0.4258, "step": 754 }, { "epoch": 5.3, "learning_rate": 0.00014597855745899274, "loss": 0.4193, "step": 755 }, { "epoch": 5.31, "learning_rate": 0.00014584994579766865, "loss": 0.4285, "step": 756 }, { "epoch": 5.31, "learning_rate": 0.0001457212380373091, "loss": 0.4204, "step": 757 }, { "epoch": 5.32, "learning_rate": 0.00014559243444767878, "loss": 0.423, "step": 758 }, { "epoch": 5.32, "learning_rate": 0.0001454635352987431, "loss": 0.4253, "step": 759 }, { "epoch": 5.32, "learning_rate": 0.00014533454086066772, "loss": 0.4238, "step": 760 }, { "epoch": 5.33, "learning_rate": 0.00014520545140381816, "loss": 0.419, "step": 761 }, { "epoch": 5.33, "learning_rate": 0.00014507626719875897, "loss": 0.4245, "step": 762 }, { "epoch": 5.34, "learning_rate": 0.0001449469885162534, "loss": 0.4204, "step": 763 }, { "epoch": 5.34, "learning_rate": 0.00014481761562726262, "loss": 0.4262, "step": 764 }, { "epoch": 5.35, "learning_rate": 0.0001446881488029453, "loss": 0.4267, "step": 765 }, { "epoch": 5.35, "learning_rate": 0.00014455858831465695, "loss": 0.4235, "step": 766 }, { "epoch": 5.36, "learning_rate": 0.00014442893443394945, "loss": 0.4197, "step": 767 }, { "epoch": 5.36, "learning_rate": 0.00014429918743257044, "loss": 0.4216, "step": 768 }, { "epoch": 5.37, "learning_rate": 0.0001441693475824626, "loss": 0.4222, "step": 769 }, { "epoch": 5.37, "learning_rate": 0.00014403941515576344, "loss": 0.4184, "step": 770 }, { "epoch": 5.37, "learning_rate": 0.00014390939042480428, "loss": 0.4194, "step": 771 }, { "epoch": 5.38, "learning_rate": 0.0001437792736621101, "loss": 0.4213, "step": 772 }, { "epoch": 5.38, "learning_rate": 0.0001436490651403986, "loss": 0.4188, "step": 773 }, { "epoch": 5.39, "learning_rate": 0.00014351876513257986, "loss": 0.4192, "step": 774 }, { "epoch": 5.39, "learning_rate": 0.00014338837391175582, "loss": 0.4215, "step": 775 }, { "epoch": 5.4, "learning_rate": 0.00014325789175121946, "loss": 0.4198, "step": 776 }, { "epoch": 5.4, "learning_rate": 0.00014312731892445442, "loss": 0.4232, "step": 777 }, { "epoch": 5.41, "learning_rate": 0.00014299665570513437, "loss": 0.4246, "step": 778 }, { "epoch": 5.41, "learning_rate": 0.00014286590236712237, "loss": 0.4185, "step": 779 }, { "epoch": 5.42, "learning_rate": 0.00014273505918447054, "loss": 0.4214, "step": 780 }, { "epoch": 5.42, "learning_rate": 0.0001426041264314191, "loss": 0.418, "step": 781 }, { "epoch": 5.43, "learning_rate": 0.0001424731043823962, "loss": 0.4235, "step": 782 }, { "epoch": 5.43, "learning_rate": 0.00014234199331201696, "loss": 0.424, "step": 783 }, { "epoch": 5.43, "learning_rate": 0.0001422107934950832, "loss": 0.4219, "step": 784 }, { "epoch": 5.44, "learning_rate": 0.00014207950520658274, "loss": 0.4266, "step": 785 }, { "epoch": 5.44, "learning_rate": 0.0001419481287216888, "loss": 0.4193, "step": 786 }, { "epoch": 5.45, "learning_rate": 0.00014181666431575945, "loss": 0.4168, "step": 787 }, { "epoch": 5.45, "learning_rate": 0.0001416851122643371, "loss": 0.4196, "step": 788 }, { "epoch": 5.46, "learning_rate": 0.0001415534728431478, "loss": 0.4196, "step": 789 }, { "epoch": 5.46, "learning_rate": 0.00014142174632810072, "loss": 0.4205, "step": 790 }, { "epoch": 5.47, "learning_rate": 0.00014128993299528762, "loss": 0.4248, "step": 791 }, { "epoch": 5.47, "learning_rate": 0.0001411580331209822, "loss": 0.4161, "step": 792 }, { "epoch": 5.48, "learning_rate": 0.00014102604698163951, "loss": 0.4191, "step": 793 }, { "epoch": 5.48, "learning_rate": 0.00014089397485389548, "loss": 0.4233, "step": 794 }, { "epoch": 5.49, "learning_rate": 0.00014076181701456623, "loss": 0.4188, "step": 795 }, { "epoch": 5.49, "learning_rate": 0.00014062957374064752, "loss": 0.4274, "step": 796 }, { "epoch": 5.49, "learning_rate": 0.00014049724530931416, "loss": 0.4202, "step": 797 }, { "epoch": 5.5, "learning_rate": 0.00014036483199791948, "loss": 0.4164, "step": 798 }, { "epoch": 5.5, "learning_rate": 0.00014023233408399472, "loss": 0.4226, "step": 799 }, { "epoch": 5.51, "learning_rate": 0.0001400997518452484, "loss": 0.4134, "step": 800 }, { "epoch": 5.51, "learning_rate": 0.00013996708555956582, "loss": 0.4217, "step": 801 }, { "epoch": 5.52, "learning_rate": 0.0001398343355050084, "loss": 0.4215, "step": 802 }, { "epoch": 5.52, "learning_rate": 0.00013970150195981323, "loss": 0.4138, "step": 803 }, { "epoch": 5.53, "learning_rate": 0.00013956858520239223, "loss": 0.4121, "step": 804 }, { "epoch": 5.53, "learning_rate": 0.00013943558551133186, "loss": 0.416, "step": 805 }, { "epoch": 5.54, "learning_rate": 0.00013930250316539238, "loss": 0.4203, "step": 806 }, { "epoch": 5.54, "learning_rate": 0.0001391693384435072, "loss": 0.4236, "step": 807 }, { "epoch": 5.54, "learning_rate": 0.0001390360916247826, "loss": 0.4179, "step": 808 }, { "epoch": 5.55, "learning_rate": 0.0001389027629884966, "loss": 0.4136, "step": 809 }, { "epoch": 5.55, "learning_rate": 0.00013876935281409907, "loss": 0.4257, "step": 810 }, { "epoch": 5.56, "learning_rate": 0.0001386358613812105, "loss": 0.4234, "step": 811 }, { "epoch": 5.56, "learning_rate": 0.0001385022889696218, "loss": 0.4177, "step": 812 }, { "epoch": 5.57, "learning_rate": 0.00013836863585929365, "loss": 0.4185, "step": 813 }, { "epoch": 5.57, "learning_rate": 0.0001382349023303558, "loss": 0.4192, "step": 814 }, { "epoch": 5.58, "learning_rate": 0.0001381010886631066, "loss": 0.4187, "step": 815 }, { "epoch": 5.58, "learning_rate": 0.00013796719513801232, "loss": 0.4197, "step": 816 }, { "epoch": 5.59, "learning_rate": 0.0001378332220357066, "loss": 0.4184, "step": 817 }, { "epoch": 5.59, "learning_rate": 0.00013769916963698997, "loss": 0.4203, "step": 818 }, { "epoch": 5.6, "learning_rate": 0.000137565038222829, "loss": 0.4198, "step": 819 }, { "epoch": 5.6, "learning_rate": 0.00013743082807435615, "loss": 0.4211, "step": 820 }, { "epoch": 5.6, "learning_rate": 0.00013729653947286847, "loss": 0.4132, "step": 821 }, { "epoch": 5.61, "learning_rate": 0.00013716217269982788, "loss": 0.4168, "step": 822 }, { "epoch": 5.61, "learning_rate": 0.00013702772803685984, "loss": 0.417, "step": 823 }, { "epoch": 5.62, "learning_rate": 0.00013689320576575322, "loss": 0.4247, "step": 824 }, { "epoch": 5.62, "learning_rate": 0.00013675860616845954, "loss": 0.4186, "step": 825 }, { "epoch": 5.63, "learning_rate": 0.00013662392952709228, "loss": 0.4116, "step": 826 }, { "epoch": 5.63, "learning_rate": 0.0001364891761239266, "loss": 0.4087, "step": 827 }, { "epoch": 5.63, "eval_loss": 0.4685542583465576, "eval_runtime": 28.2711, "eval_samples_per_second": 7.074, "eval_steps_per_second": 0.884, "step": 827 }, { "epoch": 6.0, "learning_rate": 0.00013635434624139828, "loss": 0.4133, "step": 828 }, { "epoch": 6.01, "learning_rate": 0.00013621944016210366, "loss": 0.4181, "step": 829 }, { "epoch": 6.01, "learning_rate": 0.00013608445816879866, "loss": 0.414, "step": 830 }, { "epoch": 6.02, "learning_rate": 0.00013594940054439825, "loss": 0.4165, "step": 831 }, { "epoch": 6.02, "learning_rate": 0.00013581426757197605, "loss": 0.4153, "step": 832 }, { "epoch": 6.03, "learning_rate": 0.00013567905953476355, "loss": 0.4218, "step": 833 }, { "epoch": 6.03, "learning_rate": 0.00013554377671614958, "loss": 0.4177, "step": 834 }, { "epoch": 6.04, "learning_rate": 0.00013540841939967962, "loss": 0.4198, "step": 835 }, { "epoch": 6.04, "learning_rate": 0.00013527298786905544, "loss": 0.4194, "step": 836 }, { "epoch": 6.04, "learning_rate": 0.0001351374824081343, "loss": 0.4157, "step": 837 }, { "epoch": 6.05, "learning_rate": 0.0001350019033009283, "loss": 0.4109, "step": 838 }, { "epoch": 6.05, "learning_rate": 0.00013486625083160414, "loss": 0.4195, "step": 839 }, { "epoch": 6.06, "learning_rate": 0.00013473052528448201, "loss": 0.416, "step": 840 }, { "epoch": 6.06, "learning_rate": 0.00013459472694403552, "loss": 0.4198, "step": 841 }, { "epoch": 6.07, "learning_rate": 0.00013445885609489068, "loss": 0.4155, "step": 842 }, { "epoch": 6.07, "learning_rate": 0.0001343229130218255, "loss": 0.4221, "step": 843 }, { "epoch": 6.08, "learning_rate": 0.00013418689800976942, "loss": 0.419, "step": 844 }, { "epoch": 6.08, "learning_rate": 0.00013405081134380264, "loss": 0.4186, "step": 845 }, { "epoch": 6.09, "learning_rate": 0.00013391465330915556, "loss": 0.4148, "step": 846 }, { "epoch": 6.09, "learning_rate": 0.00013377842419120807, "loss": 0.4227, "step": 847 }, { "epoch": 6.1, "learning_rate": 0.00013364212427548916, "loss": 0.4155, "step": 848 }, { "epoch": 6.1, "learning_rate": 0.0001335057538476762, "loss": 0.4139, "step": 849 }, { "epoch": 6.1, "learning_rate": 0.00013336931319359426, "loss": 0.4136, "step": 850 }, { "epoch": 6.11, "learning_rate": 0.00013323280259921574, "loss": 0.4168, "step": 851 }, { "epoch": 6.11, "learning_rate": 0.00013309622235065942, "loss": 0.4132, "step": 852 }, { "epoch": 6.12, "learning_rate": 0.0001329595727341903, "loss": 0.4124, "step": 853 }, { "epoch": 6.12, "learning_rate": 0.00013282285403621864, "loss": 0.414, "step": 854 }, { "epoch": 6.13, "learning_rate": 0.0001326860665432995, "loss": 0.4164, "step": 855 }, { "epoch": 6.13, "learning_rate": 0.00013254921054213224, "loss": 0.41, "step": 856 }, { "epoch": 6.14, "learning_rate": 0.00013241228631955963, "loss": 0.41, "step": 857 }, { "epoch": 6.14, "learning_rate": 0.00013227529416256754, "loss": 0.4124, "step": 858 }, { "epoch": 6.15, "learning_rate": 0.0001321382343582842, "loss": 0.4134, "step": 859 }, { "epoch": 6.15, "learning_rate": 0.00013200110719397968, "loss": 0.4162, "step": 860 }, { "epoch": 6.15, "learning_rate": 0.00013186391295706517, "loss": 0.4216, "step": 861 }, { "epoch": 6.16, "learning_rate": 0.00013172665193509242, "loss": 0.412, "step": 862 }, { "epoch": 6.16, "learning_rate": 0.00013158932441575326, "loss": 0.4089, "step": 863 }, { "epoch": 6.17, "learning_rate": 0.00013145193068687876, "loss": 0.4132, "step": 864 }, { "epoch": 6.17, "learning_rate": 0.00013131447103643885, "loss": 0.4075, "step": 865 }, { "epoch": 6.18, "learning_rate": 0.00013117694575254162, "loss": 0.4165, "step": 866 }, { "epoch": 6.18, "learning_rate": 0.00013103935512343273, "loss": 0.4139, "step": 867 }, { "epoch": 6.19, "learning_rate": 0.00013090169943749476, "loss": 0.4095, "step": 868 }, { "epoch": 6.19, "learning_rate": 0.00013076397898324665, "loss": 0.416, "step": 869 }, { "epoch": 6.2, "learning_rate": 0.00013062619404934317, "loss": 0.4207, "step": 870 }, { "epoch": 6.2, "learning_rate": 0.00013048834492457415, "loss": 0.4042, "step": 871 }, { "epoch": 6.21, "learning_rate": 0.00013035043189786393, "loss": 0.4194, "step": 872 }, { "epoch": 6.21, "learning_rate": 0.00013021245525827096, "loss": 0.4159, "step": 873 }, { "epoch": 6.21, "learning_rate": 0.00013007441529498673, "loss": 0.4138, "step": 874 }, { "epoch": 6.22, "learning_rate": 0.00012993631229733582, "loss": 0.4129, "step": 875 }, { "epoch": 6.22, "learning_rate": 0.0001297981465547746, "loss": 0.4063, "step": 876 }, { "epoch": 6.23, "learning_rate": 0.0001296599183568911, "loss": 0.4112, "step": 877 }, { "epoch": 6.23, "learning_rate": 0.00012952162799340425, "loss": 0.4066, "step": 878 }, { "epoch": 6.24, "learning_rate": 0.00012938327575416327, "loss": 0.4118, "step": 879 }, { "epoch": 6.24, "learning_rate": 0.00012924486192914705, "loss": 0.4091, "step": 880 }, { "epoch": 6.25, "learning_rate": 0.00012910638680846358, "loss": 0.4155, "step": 881 }, { "epoch": 6.25, "learning_rate": 0.00012896785068234926, "loss": 0.4133, "step": 882 }, { "epoch": 6.26, "learning_rate": 0.00012882925384116842, "loss": 0.4117, "step": 883 }, { "epoch": 6.26, "learning_rate": 0.0001286905965754127, "loss": 0.4142, "step": 884 }, { "epoch": 6.27, "learning_rate": 0.0001285518791757002, "loss": 0.409, "step": 885 }, { "epoch": 6.27, "learning_rate": 0.00012841310193277528, "loss": 0.409, "step": 886 }, { "epoch": 6.27, "learning_rate": 0.0001282742651375076, "loss": 0.4124, "step": 887 }, { "epoch": 6.28, "learning_rate": 0.00012813536908089164, "loss": 0.4135, "step": 888 }, { "epoch": 6.28, "learning_rate": 0.0001279964140540461, "loss": 0.4045, "step": 889 }, { "epoch": 6.29, "learning_rate": 0.00012785740034821329, "loss": 0.4134, "step": 890 }, { "epoch": 6.29, "learning_rate": 0.00012771832825475852, "loss": 0.4157, "step": 891 }, { "epoch": 6.3, "learning_rate": 0.0001275791980651695, "loss": 0.4143, "step": 892 }, { "epoch": 6.3, "learning_rate": 0.0001274400100710556, "loss": 0.4146, "step": 893 }, { "epoch": 6.31, "learning_rate": 0.00012730076456414746, "loss": 0.411, "step": 894 }, { "epoch": 6.31, "learning_rate": 0.0001271614618362962, "loss": 0.4131, "step": 895 }, { "epoch": 6.32, "learning_rate": 0.00012702210217947288, "loss": 0.4162, "step": 896 }, { "epoch": 6.32, "learning_rate": 0.0001268826858857679, "loss": 0.4099, "step": 897 }, { "epoch": 6.32, "learning_rate": 0.00012674321324739038, "loss": 0.4135, "step": 898 }, { "epoch": 6.33, "learning_rate": 0.00012660368455666752, "loss": 0.4106, "step": 899 }, { "epoch": 6.33, "learning_rate": 0.00012646410010604397, "loss": 0.4027, "step": 900 }, { "epoch": 6.34, "learning_rate": 0.00012632446018808128, "loss": 0.4118, "step": 901 }, { "epoch": 6.34, "learning_rate": 0.00012618476509545725, "loss": 0.4143, "step": 902 }, { "epoch": 6.35, "learning_rate": 0.00012604501512096533, "loss": 0.414, "step": 903 }, { "epoch": 6.35, "learning_rate": 0.000125905210557514, "loss": 0.4064, "step": 904 }, { "epoch": 6.36, "learning_rate": 0.00012576535169812615, "loss": 0.4188, "step": 905 }, { "epoch": 6.36, "learning_rate": 0.00012562543883593848, "loss": 0.4048, "step": 906 }, { "epoch": 6.37, "learning_rate": 0.00012548547226420089, "loss": 0.4094, "step": 907 }, { "epoch": 6.37, "learning_rate": 0.0001253454522762758, "loss": 0.4055, "step": 908 }, { "epoch": 6.38, "learning_rate": 0.00012520537916563756, "loss": 0.4097, "step": 909 }, { "epoch": 6.38, "learning_rate": 0.00012506525322587207, "loss": 0.4064, "step": 910 }, { "epoch": 6.38, "learning_rate": 0.0001249250747506757, "loss": 0.4076, "step": 911 }, { "epoch": 6.39, "learning_rate": 0.00012478484403385506, "loss": 0.4081, "step": 912 }, { "epoch": 6.39, "learning_rate": 0.00012464456136932625, "loss": 0.4122, "step": 913 }, { "epoch": 6.4, "learning_rate": 0.0001245042270511142, "loss": 0.4073, "step": 914 }, { "epoch": 6.4, "learning_rate": 0.0001243638413733522, "loss": 0.4098, "step": 915 }, { "epoch": 6.41, "learning_rate": 0.00012422340463028107, "loss": 0.413, "step": 916 }, { "epoch": 6.41, "learning_rate": 0.00012408291711624877, "loss": 0.4101, "step": 917 }, { "epoch": 6.42, "learning_rate": 0.00012394237912570957, "loss": 0.4128, "step": 918 }, { "epoch": 6.42, "learning_rate": 0.00012380179095322364, "loss": 0.4062, "step": 919 }, { "epoch": 6.43, "learning_rate": 0.0001236611528934562, "loss": 0.4119, "step": 920 }, { "epoch": 6.43, "learning_rate": 0.00012352046524117716, "loss": 0.4093, "step": 921 }, { "epoch": 6.44, "learning_rate": 0.0001233797282912603, "loss": 0.4072, "step": 922 }, { "epoch": 6.44, "learning_rate": 0.00012323894233868274, "loss": 0.4048, "step": 923 }, { "epoch": 6.44, "learning_rate": 0.00012309810767852433, "loss": 0.4117, "step": 924 }, { "epoch": 6.45, "learning_rate": 0.00012295722460596697, "loss": 0.4123, "step": 925 }, { "epoch": 6.45, "learning_rate": 0.0001228162934162941, "loss": 0.4127, "step": 926 }, { "epoch": 6.46, "learning_rate": 0.00012267531440488986, "loss": 0.4106, "step": 927 }, { "epoch": 6.46, "learning_rate": 0.00012253428786723877, "loss": 0.4078, "step": 928 }, { "epoch": 6.47, "learning_rate": 0.00012239321409892494, "loss": 0.4111, "step": 929 }, { "epoch": 6.47, "learning_rate": 0.00012225209339563145, "loss": 0.4123, "step": 930 }, { "epoch": 6.48, "learning_rate": 0.00012211092605313972, "loss": 0.4049, "step": 931 }, { "epoch": 6.48, "learning_rate": 0.00012196971236732894, "loss": 0.406, "step": 932 }, { "epoch": 6.49, "learning_rate": 0.00012182845263417549, "loss": 0.4085, "step": 933 }, { "epoch": 6.49, "learning_rate": 0.00012168714714975218, "loss": 0.407, "step": 934 }, { "epoch": 6.49, "learning_rate": 0.00012154579621022777, "loss": 0.413, "step": 935 }, { "epoch": 6.5, "learning_rate": 0.0001214044001118663, "loss": 0.3998, "step": 936 }, { "epoch": 6.5, "learning_rate": 0.00012126295915102639, "loss": 0.4042, "step": 937 }, { "epoch": 6.51, "learning_rate": 0.00012112147362416076, "loss": 0.4118, "step": 938 }, { "epoch": 6.51, "learning_rate": 0.00012097994382781547, "loss": 0.4091, "step": 939 }, { "epoch": 6.52, "learning_rate": 0.00012083837005862946, "loss": 0.4009, "step": 940 }, { "epoch": 6.52, "learning_rate": 0.00012069675261333375, "loss": 0.4124, "step": 941 }, { "epoch": 6.53, "learning_rate": 0.00012055509178875097, "loss": 0.4041, "step": 942 }, { "epoch": 6.53, "learning_rate": 0.0001204133878817946, "loss": 0.4029, "step": 943 }, { "epoch": 6.54, "learning_rate": 0.00012027164118946844, "loss": 0.4104, "step": 944 }, { "epoch": 6.54, "learning_rate": 0.00012012985200886602, "loss": 0.4065, "step": 945 }, { "epoch": 6.55, "learning_rate": 0.00011998802063716987, "loss": 0.4074, "step": 946 }, { "epoch": 6.55, "learning_rate": 0.00011984614737165094, "loss": 0.4077, "step": 947 }, { "epoch": 6.55, "learning_rate": 0.00011970423250966807, "loss": 0.4136, "step": 948 }, { "epoch": 6.56, "learning_rate": 0.00011956227634866714, "loss": 0.4124, "step": 949 }, { "epoch": 6.56, "learning_rate": 0.00011942027918618074, "loss": 0.4058, "step": 950 }, { "epoch": 6.57, "learning_rate": 0.00011927824131982734, "loss": 0.4066, "step": 951 }, { "epoch": 6.57, "learning_rate": 0.00011913616304731063, "loss": 0.4091, "step": 952 }, { "epoch": 6.58, "learning_rate": 0.0001189940446664192, "loss": 0.41, "step": 953 }, { "epoch": 6.58, "learning_rate": 0.00011885188647502546, "loss": 0.3986, "step": 954 }, { "epoch": 6.59, "learning_rate": 0.00011870968877108546, "loss": 0.4116, "step": 955 }, { "epoch": 6.59, "learning_rate": 0.00011856745185263791, "loss": 0.4157, "step": 956 }, { "epoch": 6.6, "learning_rate": 0.00011842517601780388, "loss": 0.407, "step": 957 }, { "epoch": 6.6, "learning_rate": 0.00011828286156478585, "loss": 0.4098, "step": 958 }, { "epoch": 6.61, "learning_rate": 0.00011814050879186731, "loss": 0.4045, "step": 959 }, { "epoch": 6.61, "learning_rate": 0.0001179981179974121, "loss": 0.4067, "step": 960 }, { "epoch": 6.61, "learning_rate": 0.00011785568947986367, "loss": 0.4101, "step": 961 }, { "epoch": 6.62, "learning_rate": 0.0001177132235377446, "loss": 0.4046, "step": 962 }, { "epoch": 6.62, "learning_rate": 0.00011757072046965589, "loss": 0.4024, "step": 963 }, { "epoch": 6.63, "learning_rate": 0.00011742818057427636, "loss": 0.3982, "step": 964 }, { "epoch": 6.63, "learning_rate": 0.00011728560415036201, "loss": 0.399, "step": 965 }, { "epoch": 6.63, "eval_loss": 0.47236838936805725, "eval_runtime": 28.2487, "eval_samples_per_second": 7.08, "eval_steps_per_second": 0.885, "step": 965 }, { "epoch": 7.0, "learning_rate": 0.00011714299149674537, "loss": 0.402, "step": 966 }, { "epoch": 7.01, "learning_rate": 0.00011700034291233499, "loss": 0.4055, "step": 967 }, { "epoch": 7.01, "learning_rate": 0.00011685765869611463, "loss": 0.4065, "step": 968 }, { "epoch": 7.02, "learning_rate": 0.00011671493914714279, "loss": 0.4112, "step": 969 }, { "epoch": 7.02, "learning_rate": 0.00011657218456455206, "loss": 0.4032, "step": 970 }, { "epoch": 7.03, "learning_rate": 0.00011642939524754832, "loss": 0.404, "step": 971 }, { "epoch": 7.03, "learning_rate": 0.00011628657149541045, "loss": 0.402, "step": 972 }, { "epoch": 7.04, "learning_rate": 0.00011614371360748935, "loss": 0.407, "step": 973 }, { "epoch": 7.04, "learning_rate": 0.00011600082188320752, "loss": 0.4095, "step": 974 }, { "epoch": 7.05, "learning_rate": 0.00011585789662205835, "loss": 0.4102, "step": 975 }, { "epoch": 7.05, "learning_rate": 0.00011571493812360561, "loss": 0.407, "step": 976 }, { "epoch": 7.05, "learning_rate": 0.00011557194668748262, "loss": 0.4038, "step": 977 }, { "epoch": 7.06, "learning_rate": 0.00011542892261339178, "loss": 0.4073, "step": 978 }, { "epoch": 7.06, "learning_rate": 0.00011528586620110396, "loss": 0.3989, "step": 979 }, { "epoch": 7.07, "learning_rate": 0.00011514277775045768, "loss": 0.4077, "step": 980 }, { "epoch": 7.07, "learning_rate": 0.00011499965756135873, "loss": 0.404, "step": 981 }, { "epoch": 7.08, "learning_rate": 0.0001148565059337794, "loss": 0.4096, "step": 982 }, { "epoch": 7.08, "learning_rate": 0.00011471332316775773, "loss": 0.4091, "step": 983 }, { "epoch": 7.09, "learning_rate": 0.0001145701095633973, "loss": 0.4082, "step": 984 }, { "epoch": 7.09, "learning_rate": 0.00011442686542086609, "loss": 0.4112, "step": 985 }, { "epoch": 7.1, "learning_rate": 0.00011428359104039617, "loss": 0.404, "step": 986 }, { "epoch": 7.1, "learning_rate": 0.00011414028672228293, "loss": 0.4059, "step": 987 }, { "epoch": 7.11, "learning_rate": 0.00011399695276688469, "loss": 0.3987, "step": 988 }, { "epoch": 7.11, "learning_rate": 0.00011385358947462166, "loss": 0.4035, "step": 989 }, { "epoch": 7.11, "learning_rate": 0.00011371019714597562, "loss": 0.4009, "step": 990 }, { "epoch": 7.12, "learning_rate": 0.00011356677608148933, "loss": 0.4082, "step": 991 }, { "epoch": 7.12, "learning_rate": 0.00011342332658176555, "loss": 0.4025, "step": 992 }, { "epoch": 7.13, "learning_rate": 0.00011327984894746686, "loss": 0.4076, "step": 993 }, { "epoch": 7.13, "learning_rate": 0.00011313634347931466, "loss": 0.4026, "step": 994 }, { "epoch": 7.14, "learning_rate": 0.00011299281047808877, "loss": 0.4028, "step": 995 }, { "epoch": 7.14, "learning_rate": 0.00011284925024462665, "loss": 0.4021, "step": 996 }, { "epoch": 7.15, "learning_rate": 0.00011270566307982291, "loss": 0.4026, "step": 997 }, { "epoch": 7.15, "learning_rate": 0.00011256204928462857, "loss": 0.3941, "step": 998 }, { "epoch": 7.16, "learning_rate": 0.00011241840916005043, "loss": 0.4095, "step": 999 }, { "epoch": 7.16, "learning_rate": 0.00011227474300715055, "loss": 0.4073, "step": 1000 }, { "epoch": 7.16, "learning_rate": 0.0001121310511270455, "loss": 0.3998, "step": 1001 }, { "epoch": 7.17, "learning_rate": 0.00011198733382090576, "loss": 0.3974, "step": 1002 }, { "epoch": 7.17, "learning_rate": 0.00011184359138995517, "loss": 0.4039, "step": 1003 }, { "epoch": 7.18, "learning_rate": 0.00011169982413547012, "loss": 0.401, "step": 1004 }, { "epoch": 7.18, "learning_rate": 0.00011155603235877912, "loss": 0.3985, "step": 1005 }, { "epoch": 7.19, "learning_rate": 0.00011141221636126202, "loss": 0.3997, "step": 1006 }, { "epoch": 7.19, "learning_rate": 0.00011126837644434953, "loss": 0.4027, "step": 1007 }, { "epoch": 7.2, "learning_rate": 0.00011112451290952237, "loss": 0.4002, "step": 1008 }, { "epoch": 7.2, "learning_rate": 0.00011098062605831084, "loss": 0.4048, "step": 1009 }, { "epoch": 7.21, "learning_rate": 0.00011083671619229408, "loss": 0.4022, "step": 1010 }, { "epoch": 7.21, "learning_rate": 0.00011069278361309945, "loss": 0.3989, "step": 1011 }, { "epoch": 7.22, "learning_rate": 0.00011054882862240199, "loss": 0.4082, "step": 1012 }, { "epoch": 7.22, "learning_rate": 0.00011040485152192363, "loss": 0.3989, "step": 1013 }, { "epoch": 7.22, "learning_rate": 0.00011026085261343271, "loss": 0.4064, "step": 1014 }, { "epoch": 7.23, "learning_rate": 0.00011011683219874323, "loss": 0.3947, "step": 1015 }, { "epoch": 7.23, "learning_rate": 0.00010997279057971425, "loss": 0.3972, "step": 1016 }, { "epoch": 7.24, "learning_rate": 0.00010982872805824936, "loss": 0.4013, "step": 1017 }, { "epoch": 7.24, "learning_rate": 0.00010968464493629584, "loss": 0.3996, "step": 1018 }, { "epoch": 7.25, "learning_rate": 0.00010954054151584425, "loss": 0.404, "step": 1019 }, { "epoch": 7.25, "learning_rate": 0.00010939641809892767, "loss": 0.4021, "step": 1020 }, { "epoch": 7.26, "learning_rate": 0.00010925227498762106, "loss": 0.402, "step": 1021 }, { "epoch": 7.26, "learning_rate": 0.00010910811248404065, "loss": 0.398, "step": 1022 }, { "epoch": 7.27, "learning_rate": 0.00010896393089034336, "loss": 0.4011, "step": 1023 }, { "epoch": 7.27, "learning_rate": 0.00010881973050872612, "loss": 0.4004, "step": 1024 }, { "epoch": 7.27, "learning_rate": 0.0001086755116414252, "loss": 0.3949, "step": 1025 }, { "epoch": 7.28, "learning_rate": 0.00010853127459071567, "loss": 0.4068, "step": 1026 }, { "epoch": 7.28, "learning_rate": 0.00010838701965891063, "loss": 0.4015, "step": 1027 }, { "epoch": 7.29, "learning_rate": 0.00010824274714836073, "loss": 0.3982, "step": 1028 }, { "epoch": 7.29, "learning_rate": 0.00010809845736145346, "loss": 0.401, "step": 1029 }, { "epoch": 7.3, "learning_rate": 0.00010795415060061243, "loss": 0.4076, "step": 1030 }, { "epoch": 7.3, "learning_rate": 0.00010780982716829698, "loss": 0.4029, "step": 1031 }, { "epoch": 7.31, "learning_rate": 0.00010766548736700124, "loss": 0.3954, "step": 1032 }, { "epoch": 7.31, "learning_rate": 0.00010752113149925378, "loss": 0.4047, "step": 1033 }, { "epoch": 7.32, "learning_rate": 0.00010737675986761677, "loss": 0.3969, "step": 1034 }, { "epoch": 7.32, "learning_rate": 0.00010723237277468538, "loss": 0.4052, "step": 1035 }, { "epoch": 7.33, "learning_rate": 0.0001070879705230873, "loss": 0.4001, "step": 1036 }, { "epoch": 7.33, "learning_rate": 0.00010694355341548188, "loss": 0.4037, "step": 1037 }, { "epoch": 7.33, "learning_rate": 0.0001067991217545597, "loss": 0.3952, "step": 1038 }, { "epoch": 7.34, "learning_rate": 0.00010665467584304178, "loss": 0.4021, "step": 1039 }, { "epoch": 7.34, "learning_rate": 0.00010651021598367906, "loss": 0.3973, "step": 1040 }, { "epoch": 7.35, "learning_rate": 0.00010636574247925161, "loss": 0.3979, "step": 1041 }, { "epoch": 7.35, "learning_rate": 0.00010622125563256821, "loss": 0.4043, "step": 1042 }, { "epoch": 7.36, "learning_rate": 0.0001060767557464656, "loss": 0.4024, "step": 1043 }, { "epoch": 7.36, "learning_rate": 0.00010593224312380776, "loss": 0.398, "step": 1044 }, { "epoch": 7.37, "learning_rate": 0.00010578771806748546, "loss": 0.4003, "step": 1045 }, { "epoch": 7.37, "learning_rate": 0.00010564318088041551, "loss": 0.3953, "step": 1046 }, { "epoch": 7.38, "learning_rate": 0.0001054986318655401, "loss": 0.3937, "step": 1047 }, { "epoch": 7.38, "learning_rate": 0.00010535407132582622, "loss": 0.4051, "step": 1048 }, { "epoch": 7.39, "learning_rate": 0.00010520949956426505, "loss": 0.398, "step": 1049 }, { "epoch": 7.39, "learning_rate": 0.00010506491688387127, "loss": 0.3946, "step": 1050 }, { "epoch": 7.39, "learning_rate": 0.00010492032358768247, "loss": 0.3993, "step": 1051 }, { "epoch": 7.4, "learning_rate": 0.00010477571997875848, "loss": 0.397, "step": 1052 }, { "epoch": 7.4, "learning_rate": 0.00010463110636018065, "loss": 0.3992, "step": 1053 }, { "epoch": 7.41, "learning_rate": 0.00010448648303505151, "loss": 0.4015, "step": 1054 }, { "epoch": 7.41, "learning_rate": 0.00010434185030649372, "loss": 0.3984, "step": 1055 }, { "epoch": 7.42, "learning_rate": 0.0001041972084776498, "loss": 0.4009, "step": 1056 }, { "epoch": 7.42, "learning_rate": 0.00010405255785168131, "loss": 0.4009, "step": 1057 }, { "epoch": 7.43, "learning_rate": 0.00010390789873176818, "loss": 0.401, "step": 1058 }, { "epoch": 7.43, "learning_rate": 0.0001037632314211082, "loss": 0.3966, "step": 1059 }, { "epoch": 7.44, "learning_rate": 0.00010361855622291637, "loss": 0.3975, "step": 1060 }, { "epoch": 7.44, "learning_rate": 0.00010347387344042408, "loss": 0.3994, "step": 1061 }, { "epoch": 7.44, "learning_rate": 0.00010332918337687879, "loss": 0.3993, "step": 1062 }, { "epoch": 7.45, "learning_rate": 0.00010318448633554308, "loss": 0.3985, "step": 1063 }, { "epoch": 7.45, "learning_rate": 0.0001030397826196943, "loss": 0.3957, "step": 1064 }, { "epoch": 7.46, "learning_rate": 0.00010289507253262358, "loss": 0.4014, "step": 1065 }, { "epoch": 7.46, "learning_rate": 0.00010275035637763563, "loss": 0.3962, "step": 1066 }, { "epoch": 7.47, "learning_rate": 0.00010260563445804776, "loss": 0.3978, "step": 1067 }, { "epoch": 7.47, "learning_rate": 0.0001024609070771893, "loss": 0.3981, "step": 1068 }, { "epoch": 7.48, "learning_rate": 0.00010231617453840119, "loss": 0.4024, "step": 1069 }, { "epoch": 7.48, "learning_rate": 0.00010217143714503508, "loss": 0.3965, "step": 1070 }, { "epoch": 7.49, "learning_rate": 0.00010202669520045278, "loss": 0.3952, "step": 1071 }, { "epoch": 7.49, "learning_rate": 0.00010188194900802566, "loss": 0.401, "step": 1072 }, { "epoch": 7.5, "learning_rate": 0.00010173719887113402, "loss": 0.3989, "step": 1073 }, { "epoch": 7.5, "learning_rate": 0.00010159244509316644, "loss": 0.3986, "step": 1074 }, { "epoch": 7.5, "learning_rate": 0.00010144768797751905, "loss": 0.3936, "step": 1075 }, { "epoch": 7.51, "learning_rate": 0.00010130292782759507, "loss": 0.3933, "step": 1076 }, { "epoch": 7.51, "learning_rate": 0.00010115816494680399, "loss": 0.3998, "step": 1077 }, { "epoch": 7.52, "learning_rate": 0.00010101339963856111, "loss": 0.3937, "step": 1078 }, { "epoch": 7.52, "learning_rate": 0.00010086863220628675, "loss": 0.3925, "step": 1079 }, { "epoch": 7.53, "learning_rate": 0.00010072386295340572, "loss": 0.399, "step": 1080 }, { "epoch": 7.53, "learning_rate": 0.00010057909218334665, "loss": 0.4014, "step": 1081 }, { "epoch": 7.54, "learning_rate": 0.00010043432019954131, "loss": 0.3977, "step": 1082 }, { "epoch": 7.54, "learning_rate": 0.00010028954730542406, "loss": 0.401, "step": 1083 }, { "epoch": 7.55, "learning_rate": 0.0001001447738044311, "loss": 0.3986, "step": 1084 }, { "epoch": 7.55, "learning_rate": 0.0001, "loss": 0.3989, "step": 1085 }, { "epoch": 7.56, "learning_rate": 9.985522619556893e-05, "loss": 0.3977, "step": 1086 }, { "epoch": 7.56, "learning_rate": 9.971045269457598e-05, "loss": 0.3971, "step": 1087 }, { "epoch": 7.56, "learning_rate": 9.956567980045872e-05, "loss": 0.3948, "step": 1088 }, { "epoch": 7.57, "learning_rate": 9.942090781665336e-05, "loss": 0.402, "step": 1089 }, { "epoch": 7.57, "learning_rate": 9.927613704659429e-05, "loss": 0.3976, "step": 1090 }, { "epoch": 7.58, "learning_rate": 9.913136779371326e-05, "loss": 0.395, "step": 1091 }, { "epoch": 7.58, "learning_rate": 9.898660036143893e-05, "loss": 0.3977, "step": 1092 }, { "epoch": 7.59, "learning_rate": 9.884183505319604e-05, "loss": 0.3967, "step": 1093 }, { "epoch": 7.59, "learning_rate": 9.869707217240497e-05, "loss": 0.3939, "step": 1094 }, { "epoch": 7.6, "learning_rate": 9.855231202248097e-05, "loss": 0.4004, "step": 1095 }, { "epoch": 7.6, "learning_rate": 9.840755490683357e-05, "loss": 0.4001, "step": 1096 }, { "epoch": 7.61, "learning_rate": 9.8262801128866e-05, "loss": 0.3912, "step": 1097 }, { "epoch": 7.61, "learning_rate": 9.811805099197438e-05, "loss": 0.397, "step": 1098 }, { "epoch": 7.61, "learning_rate": 9.797330479954725e-05, "loss": 0.3939, "step": 1099 }, { "epoch": 7.62, "learning_rate": 9.782856285496495e-05, "loss": 0.4022, "step": 1100 }, { "epoch": 7.62, "learning_rate": 9.76838254615988e-05, "loss": 0.3908, "step": 1101 }, { "epoch": 7.63, "learning_rate": 9.75390929228107e-05, "loss": 0.3905, "step": 1102 }, { "epoch": 7.63, "learning_rate": 9.739436554195227e-05, "loss": 0.3875, "step": 1103 }, { "epoch": 7.63, "eval_loss": 0.47556066513061523, "eval_runtime": 28.1986, "eval_samples_per_second": 7.093, "eval_steps_per_second": 0.887, "step": 1103 }, { "epoch": 8.0, "learning_rate": 9.72496436223644e-05, "loss": 0.3972, "step": 1104 }, { "epoch": 8.01, "learning_rate": 9.710492746737643e-05, "loss": 0.3897, "step": 1105 }, { "epoch": 8.01, "learning_rate": 9.696021738030575e-05, "loss": 0.3939, "step": 1106 }, { "epoch": 8.02, "learning_rate": 9.681551366445694e-05, "loss": 0.3945, "step": 1107 }, { "epoch": 8.02, "learning_rate": 9.667081662312124e-05, "loss": 0.3993, "step": 1108 }, { "epoch": 8.03, "learning_rate": 9.652612655957596e-05, "loss": 0.3915, "step": 1109 }, { "epoch": 8.03, "learning_rate": 9.638144377708367e-05, "loss": 0.3993, "step": 1110 }, { "epoch": 8.04, "learning_rate": 9.62367685788918e-05, "loss": 0.3994, "step": 1111 }, { "epoch": 8.04, "learning_rate": 9.609210126823185e-05, "loss": 0.3963, "step": 1112 }, { "epoch": 8.05, "learning_rate": 9.59474421483187e-05, "loss": 0.3897, "step": 1113 }, { "epoch": 8.05, "learning_rate": 9.580279152235019e-05, "loss": 0.3982, "step": 1114 }, { "epoch": 8.06, "learning_rate": 9.565814969350629e-05, "loss": 0.3981, "step": 1115 }, { "epoch": 8.06, "learning_rate": 9.551351696494854e-05, "loss": 0.394, "step": 1116 }, { "epoch": 8.06, "learning_rate": 9.536889363981936e-05, "loss": 0.4008, "step": 1117 }, { "epoch": 8.07, "learning_rate": 9.522428002124157e-05, "loss": 0.3981, "step": 1118 }, { "epoch": 8.07, "learning_rate": 9.507967641231756e-05, "loss": 0.3999, "step": 1119 }, { "epoch": 8.08, "learning_rate": 9.493508311612874e-05, "loss": 0.3974, "step": 1120 }, { "epoch": 8.08, "learning_rate": 9.479050043573497e-05, "loss": 0.4002, "step": 1121 }, { "epoch": 8.09, "learning_rate": 9.46459286741738e-05, "loss": 0.3947, "step": 1122 }, { "epoch": 8.09, "learning_rate": 9.450136813445994e-05, "loss": 0.3945, "step": 1123 }, { "epoch": 8.1, "learning_rate": 9.43568191195845e-05, "loss": 0.3955, "step": 1124 }, { "epoch": 8.1, "learning_rate": 9.421228193251452e-05, "loss": 0.3937, "step": 1125 }, { "epoch": 8.11, "learning_rate": 9.406775687619223e-05, "loss": 0.3938, "step": 1126 }, { "epoch": 8.11, "learning_rate": 9.39232442535344e-05, "loss": 0.3895, "step": 1127 }, { "epoch": 8.11, "learning_rate": 9.377874436743184e-05, "loss": 0.394, "step": 1128 }, { "epoch": 8.12, "learning_rate": 9.363425752074844e-05, "loss": 0.3909, "step": 1129 }, { "epoch": 8.12, "learning_rate": 9.348978401632101e-05, "loss": 0.397, "step": 1130 }, { "epoch": 8.13, "learning_rate": 9.334532415695824e-05, "loss": 0.3906, "step": 1131 }, { "epoch": 8.13, "learning_rate": 9.320087824544031e-05, "loss": 0.3944, "step": 1132 }, { "epoch": 8.14, "learning_rate": 9.305644658451813e-05, "loss": 0.3941, "step": 1133 }, { "epoch": 8.14, "learning_rate": 9.291202947691271e-05, "loss": 0.3929, "step": 1134 }, { "epoch": 8.15, "learning_rate": 9.276762722531463e-05, "loss": 0.3929, "step": 1135 }, { "epoch": 8.15, "learning_rate": 9.262324013238325e-05, "loss": 0.3959, "step": 1136 }, { "epoch": 8.16, "learning_rate": 9.247886850074622e-05, "loss": 0.3941, "step": 1137 }, { "epoch": 8.16, "learning_rate": 9.233451263299875e-05, "loss": 0.3916, "step": 1138 }, { "epoch": 8.17, "learning_rate": 9.219017283170302e-05, "loss": 0.3891, "step": 1139 }, { "epoch": 8.17, "learning_rate": 9.204584939938762e-05, "loss": 0.3934, "step": 1140 }, { "epoch": 8.17, "learning_rate": 9.19015426385466e-05, "loss": 0.3918, "step": 1141 }, { "epoch": 8.18, "learning_rate": 9.175725285163932e-05, "loss": 0.3883, "step": 1142 }, { "epoch": 8.18, "learning_rate": 9.161298034108941e-05, "loss": 0.3926, "step": 1143 }, { "epoch": 8.19, "learning_rate": 9.146872540928437e-05, "loss": 0.3964, "step": 1144 }, { "epoch": 8.19, "learning_rate": 9.132448835857483e-05, "loss": 0.3939, "step": 1145 }, { "epoch": 8.2, "learning_rate": 9.118026949127389e-05, "loss": 0.3945, "step": 1146 }, { "epoch": 8.2, "learning_rate": 9.103606910965666e-05, "loss": 0.3953, "step": 1147 }, { "epoch": 8.21, "learning_rate": 9.089188751595936e-05, "loss": 0.3906, "step": 1148 }, { "epoch": 8.21, "learning_rate": 9.074772501237897e-05, "loss": 0.3933, "step": 1149 }, { "epoch": 8.22, "learning_rate": 9.060358190107234e-05, "loss": 0.3933, "step": 1150 }, { "epoch": 8.22, "learning_rate": 9.045945848415573e-05, "loss": 0.3858, "step": 1151 }, { "epoch": 8.23, "learning_rate": 9.031535506370417e-05, "loss": 0.3883, "step": 1152 }, { "epoch": 8.23, "learning_rate": 9.017127194175068e-05, "loss": 0.3885, "step": 1153 }, { "epoch": 8.23, "learning_rate": 9.002720942028577e-05, "loss": 0.3952, "step": 1154 }, { "epoch": 8.24, "learning_rate": 8.98831678012568e-05, "loss": 0.3939, "step": 1155 }, { "epoch": 8.24, "learning_rate": 8.97391473865673e-05, "loss": 0.3893, "step": 1156 }, { "epoch": 8.25, "learning_rate": 8.959514847807639e-05, "loss": 0.3895, "step": 1157 }, { "epoch": 8.25, "learning_rate": 8.945117137759802e-05, "loss": 0.392, "step": 1158 }, { "epoch": 8.26, "learning_rate": 8.930721638690056e-05, "loss": 0.385, "step": 1159 }, { "epoch": 8.26, "learning_rate": 8.916328380770595e-05, "loss": 0.3933, "step": 1160 }, { "epoch": 8.27, "learning_rate": 8.901937394168917e-05, "loss": 0.3954, "step": 1161 }, { "epoch": 8.27, "learning_rate": 8.887548709047764e-05, "loss": 0.3983, "step": 1162 }, { "epoch": 8.28, "learning_rate": 8.873162355565046e-05, "loss": 0.3864, "step": 1163 }, { "epoch": 8.28, "learning_rate": 8.858778363873796e-05, "loss": 0.386, "step": 1164 }, { "epoch": 8.28, "learning_rate": 8.844396764122093e-05, "loss": 0.3981, "step": 1165 }, { "epoch": 8.29, "learning_rate": 8.830017586452993e-05, "loss": 0.3939, "step": 1166 }, { "epoch": 8.29, "learning_rate": 8.815640861004488e-05, "loss": 0.3939, "step": 1167 }, { "epoch": 8.3, "learning_rate": 8.801266617909427e-05, "loss": 0.3859, "step": 1168 }, { "epoch": 8.3, "learning_rate": 8.786894887295451e-05, "loss": 0.3954, "step": 1169 }, { "epoch": 8.31, "learning_rate": 8.772525699284946e-05, "loss": 0.3909, "step": 1170 }, { "epoch": 8.31, "learning_rate": 8.75815908399496e-05, "loss": 0.3878, "step": 1171 }, { "epoch": 8.32, "learning_rate": 8.743795071537146e-05, "loss": 0.3881, "step": 1172 }, { "epoch": 8.32, "learning_rate": 8.729433692017711e-05, "loss": 0.3908, "step": 1173 }, { "epoch": 8.33, "learning_rate": 8.715074975537338e-05, "loss": 0.3899, "step": 1174 }, { "epoch": 8.33, "learning_rate": 8.700718952191124e-05, "loss": 0.397, "step": 1175 }, { "epoch": 8.34, "learning_rate": 8.686365652068535e-05, "loss": 0.3852, "step": 1176 }, { "epoch": 8.34, "learning_rate": 8.672015105253319e-05, "loss": 0.3877, "step": 1177 }, { "epoch": 8.34, "learning_rate": 8.657667341823448e-05, "loss": 0.393, "step": 1178 }, { "epoch": 8.35, "learning_rate": 8.643322391851072e-05, "loss": 0.3941, "step": 1179 }, { "epoch": 8.35, "learning_rate": 8.628980285402439e-05, "loss": 0.3949, "step": 1180 }, { "epoch": 8.36, "learning_rate": 8.614641052537838e-05, "loss": 0.3884, "step": 1181 }, { "epoch": 8.36, "learning_rate": 8.600304723311534e-05, "loss": 0.3872, "step": 1182 }, { "epoch": 8.37, "learning_rate": 8.585971327771707e-05, "loss": 0.3789, "step": 1183 }, { "epoch": 8.37, "learning_rate": 8.571640895960387e-05, "loss": 0.3925, "step": 1184 }, { "epoch": 8.38, "learning_rate": 8.557313457913394e-05, "loss": 0.3883, "step": 1185 }, { "epoch": 8.38, "learning_rate": 8.54298904366027e-05, "loss": 0.3875, "step": 1186 }, { "epoch": 8.39, "learning_rate": 8.528667683224225e-05, "loss": 0.3867, "step": 1187 }, { "epoch": 8.39, "learning_rate": 8.514349406622064e-05, "loss": 0.3839, "step": 1188 }, { "epoch": 8.39, "learning_rate": 8.50003424386413e-05, "loss": 0.393, "step": 1189 }, { "epoch": 8.4, "learning_rate": 8.485722224954237e-05, "loss": 0.3953, "step": 1190 }, { "epoch": 8.4, "learning_rate": 8.471413379889609e-05, "loss": 0.3903, "step": 1191 }, { "epoch": 8.41, "learning_rate": 8.457107738660826e-05, "loss": 0.3897, "step": 1192 }, { "epoch": 8.41, "learning_rate": 8.44280533125174e-05, "loss": 0.3942, "step": 1193 }, { "epoch": 8.42, "learning_rate": 8.428506187639443e-05, "loss": 0.3957, "step": 1194 }, { "epoch": 8.42, "learning_rate": 8.414210337794166e-05, "loss": 0.3908, "step": 1195 }, { "epoch": 8.43, "learning_rate": 8.39991781167925e-05, "loss": 0.3829, "step": 1196 }, { "epoch": 8.43, "learning_rate": 8.385628639251066e-05, "loss": 0.3885, "step": 1197 }, { "epoch": 8.44, "learning_rate": 8.371342850458955e-05, "loss": 0.388, "step": 1198 }, { "epoch": 8.44, "learning_rate": 8.357060475245166e-05, "loss": 0.3863, "step": 1199 }, { "epoch": 8.45, "learning_rate": 8.342781543544798e-05, "loss": 0.3897, "step": 1200 }, { "epoch": 8.45, "learning_rate": 8.328506085285724e-05, "loss": 0.389, "step": 1201 }, { "epoch": 8.45, "learning_rate": 8.31423413038854e-05, "loss": 0.3897, "step": 1202 }, { "epoch": 8.46, "learning_rate": 8.299965708766505e-05, "loss": 0.3896, "step": 1203 }, { "epoch": 8.46, "learning_rate": 8.285700850325467e-05, "loss": 0.3903, "step": 1204 }, { "epoch": 8.47, "learning_rate": 8.271439584963802e-05, "loss": 0.392, "step": 1205 }, { "epoch": 8.47, "learning_rate": 8.257181942572365e-05, "loss": 0.3932, "step": 1206 }, { "epoch": 8.48, "learning_rate": 8.242927953034412e-05, "loss": 0.3879, "step": 1207 }, { "epoch": 8.48, "learning_rate": 8.22867764622554e-05, "loss": 0.3923, "step": 1208 }, { "epoch": 8.49, "learning_rate": 8.214431052013634e-05, "loss": 0.3884, "step": 1209 }, { "epoch": 8.49, "learning_rate": 8.200188200258791e-05, "loss": 0.3855, "step": 1210 }, { "epoch": 8.5, "learning_rate": 8.18594912081327e-05, "loss": 0.3859, "step": 1211 }, { "epoch": 8.5, "learning_rate": 8.171713843521418e-05, "loss": 0.3871, "step": 1212 }, { "epoch": 8.51, "learning_rate": 8.157482398219613e-05, "loss": 0.3807, "step": 1213 }, { "epoch": 8.51, "learning_rate": 8.143254814736211e-05, "loss": 0.3863, "step": 1214 }, { "epoch": 8.51, "learning_rate": 8.129031122891459e-05, "loss": 0.3901, "step": 1215 }, { "epoch": 8.52, "learning_rate": 8.114811352497458e-05, "loss": 0.3927, "step": 1216 }, { "epoch": 8.52, "learning_rate": 8.100595533358084e-05, "loss": 0.3809, "step": 1217 }, { "epoch": 8.53, "learning_rate": 8.086383695268938e-05, "loss": 0.3854, "step": 1218 }, { "epoch": 8.53, "learning_rate": 8.072175868017268e-05, "loss": 0.3926, "step": 1219 }, { "epoch": 8.54, "learning_rate": 8.057972081381927e-05, "loss": 0.3955, "step": 1220 }, { "epoch": 8.54, "learning_rate": 8.043772365133287e-05, "loss": 0.388, "step": 1221 }, { "epoch": 8.55, "learning_rate": 8.029576749033194e-05, "loss": 0.3881, "step": 1222 }, { "epoch": 8.55, "learning_rate": 8.015385262834906e-05, "loss": 0.3888, "step": 1223 }, { "epoch": 8.56, "learning_rate": 8.001197936283014e-05, "loss": 0.3851, "step": 1224 }, { "epoch": 8.56, "learning_rate": 7.987014799113397e-05, "loss": 0.3905, "step": 1225 }, { "epoch": 8.56, "learning_rate": 7.972835881053159e-05, "loss": 0.3913, "step": 1226 }, { "epoch": 8.57, "learning_rate": 7.958661211820545e-05, "loss": 0.3884, "step": 1227 }, { "epoch": 8.57, "learning_rate": 7.944490821124908e-05, "loss": 0.3938, "step": 1228 }, { "epoch": 8.58, "learning_rate": 7.930324738666627e-05, "loss": 0.3892, "step": 1229 }, { "epoch": 8.58, "learning_rate": 7.916162994137056e-05, "loss": 0.3819, "step": 1230 }, { "epoch": 8.59, "learning_rate": 7.902005617218454e-05, "loss": 0.3874, "step": 1231 }, { "epoch": 8.59, "learning_rate": 7.887852637583926e-05, "loss": 0.3873, "step": 1232 }, { "epoch": 8.6, "learning_rate": 7.873704084897365e-05, "loss": 0.3865, "step": 1233 }, { "epoch": 8.6, "learning_rate": 7.85955998881337e-05, "loss": 0.3914, "step": 1234 }, { "epoch": 8.61, "learning_rate": 7.845420378977223e-05, "loss": 0.3866, "step": 1235 }, { "epoch": 8.61, "learning_rate": 7.831285285024781e-05, "loss": 0.3829, "step": 1236 }, { "epoch": 8.62, "learning_rate": 7.817154736582452e-05, "loss": 0.383, "step": 1237 }, { "epoch": 8.62, "learning_rate": 7.803028763267108e-05, "loss": 0.3942, "step": 1238 }, { "epoch": 8.62, "learning_rate": 7.788907394686033e-05, "loss": 0.3779, "step": 1239 }, { "epoch": 8.63, "learning_rate": 7.774790660436858e-05, "loss": 0.3832, "step": 1240 }, { "epoch": 8.63, "eval_loss": 0.4780256748199463, "eval_runtime": 28.2756, "eval_samples_per_second": 7.073, "eval_steps_per_second": 0.884, "step": 1240 }, { "epoch": 9.0, "learning_rate": 7.760678590107507e-05, "loss": 0.3799, "step": 1241 }, { "epoch": 9.01, "learning_rate": 7.746571213276125e-05, "loss": 0.3877, "step": 1242 }, { "epoch": 9.01, "learning_rate": 7.732468559511016e-05, "loss": 0.3894, "step": 1243 }, { "epoch": 9.01, "learning_rate": 7.718370658370596e-05, "loss": 0.3867, "step": 1244 }, { "epoch": 9.02, "learning_rate": 7.704277539403304e-05, "loss": 0.3849, "step": 1245 }, { "epoch": 9.02, "learning_rate": 7.690189232147566e-05, "loss": 0.3834, "step": 1246 }, { "epoch": 9.03, "learning_rate": 7.676105766131726e-05, "loss": 0.3855, "step": 1247 }, { "epoch": 9.03, "learning_rate": 7.66202717087397e-05, "loss": 0.3831, "step": 1248 }, { "epoch": 9.04, "learning_rate": 7.647953475882285e-05, "loss": 0.3849, "step": 1249 }, { "epoch": 9.04, "learning_rate": 7.633884710654383e-05, "loss": 0.3867, "step": 1250 }, { "epoch": 9.05, "learning_rate": 7.619820904677641e-05, "loss": 0.3883, "step": 1251 }, { "epoch": 9.05, "learning_rate": 7.605762087429044e-05, "loss": 0.3872, "step": 1252 }, { "epoch": 9.06, "learning_rate": 7.591708288375125e-05, "loss": 0.3896, "step": 1253 }, { "epoch": 9.06, "learning_rate": 7.577659536971895e-05, "loss": 0.3916, "step": 1254 }, { "epoch": 9.06, "learning_rate": 7.56361586266478e-05, "loss": 0.3898, "step": 1255 }, { "epoch": 9.07, "learning_rate": 7.549577294888581e-05, "loss": 0.3884, "step": 1256 }, { "epoch": 9.07, "learning_rate": 7.535543863067377e-05, "loss": 0.391, "step": 1257 }, { "epoch": 9.08, "learning_rate": 7.521515596614496e-05, "loss": 0.3867, "step": 1258 }, { "epoch": 9.08, "learning_rate": 7.507492524932433e-05, "loss": 0.3866, "step": 1259 }, { "epoch": 9.09, "learning_rate": 7.493474677412794e-05, "loss": 0.3903, "step": 1260 }, { "epoch": 9.09, "learning_rate": 7.479462083436241e-05, "loss": 0.3917, "step": 1261 }, { "epoch": 9.1, "learning_rate": 7.465454772372425e-05, "loss": 0.3833, "step": 1262 }, { "epoch": 9.1, "learning_rate": 7.451452773579915e-05, "loss": 0.3872, "step": 1263 }, { "epoch": 9.11, "learning_rate": 7.437456116406152e-05, "loss": 0.385, "step": 1264 }, { "epoch": 9.11, "learning_rate": 7.423464830187386e-05, "loss": 0.3861, "step": 1265 }, { "epoch": 9.12, "learning_rate": 7.409478944248602e-05, "loss": 0.3788, "step": 1266 }, { "epoch": 9.12, "learning_rate": 7.39549848790347e-05, "loss": 0.3886, "step": 1267 }, { "epoch": 9.12, "learning_rate": 7.381523490454278e-05, "loss": 0.3845, "step": 1268 }, { "epoch": 9.13, "learning_rate": 7.367553981191875e-05, "loss": 0.3844, "step": 1269 }, { "epoch": 9.13, "learning_rate": 7.353589989395604e-05, "loss": 0.3828, "step": 1270 }, { "epoch": 9.14, "learning_rate": 7.339631544333249e-05, "loss": 0.3886, "step": 1271 }, { "epoch": 9.14, "learning_rate": 7.325678675260961e-05, "loss": 0.3829, "step": 1272 }, { "epoch": 9.15, "learning_rate": 7.31173141142321e-05, "loss": 0.3859, "step": 1273 }, { "epoch": 9.15, "learning_rate": 7.297789782052717e-05, "loss": 0.3829, "step": 1274 }, { "epoch": 9.16, "learning_rate": 7.283853816370386e-05, "loss": 0.387, "step": 1275 }, { "epoch": 9.16, "learning_rate": 7.269923543585258e-05, "loss": 0.38, "step": 1276 }, { "epoch": 9.17, "learning_rate": 7.255998992894443e-05, "loss": 0.3875, "step": 1277 }, { "epoch": 9.17, "learning_rate": 7.242080193483051e-05, "loss": 0.3846, "step": 1278 }, { "epoch": 9.18, "learning_rate": 7.228167174524148e-05, "loss": 0.3852, "step": 1279 }, { "epoch": 9.18, "learning_rate": 7.214259965178674e-05, "loss": 0.3833, "step": 1280 }, { "epoch": 9.18, "learning_rate": 7.200358594595392e-05, "loss": 0.3797, "step": 1281 }, { "epoch": 9.19, "learning_rate": 7.186463091910838e-05, "loss": 0.3818, "step": 1282 }, { "epoch": 9.19, "learning_rate": 7.172573486249241e-05, "loss": 0.3867, "step": 1283 }, { "epoch": 9.2, "learning_rate": 7.15868980672247e-05, "loss": 0.3807, "step": 1284 }, { "epoch": 9.2, "learning_rate": 7.14481208242998e-05, "loss": 0.384, "step": 1285 }, { "epoch": 9.21, "learning_rate": 7.130940342458732e-05, "loss": 0.3863, "step": 1286 }, { "epoch": 9.21, "learning_rate": 7.117074615883162e-05, "loss": 0.3842, "step": 1287 }, { "epoch": 9.22, "learning_rate": 7.10321493176508e-05, "loss": 0.385, "step": 1288 }, { "epoch": 9.22, "learning_rate": 7.089361319153649e-05, "loss": 0.3844, "step": 1289 }, { "epoch": 9.23, "learning_rate": 7.075513807085299e-05, "loss": 0.3841, "step": 1290 }, { "epoch": 9.23, "learning_rate": 7.061672424583677e-05, "loss": 0.3828, "step": 1291 }, { "epoch": 9.23, "learning_rate": 7.047837200659579e-05, "loss": 0.3814, "step": 1292 }, { "epoch": 9.24, "learning_rate": 7.034008164310892e-05, "loss": 0.3807, "step": 1293 }, { "epoch": 9.24, "learning_rate": 7.020185344522543e-05, "loss": 0.3787, "step": 1294 }, { "epoch": 9.25, "learning_rate": 7.006368770266421e-05, "loss": 0.3809, "step": 1295 }, { "epoch": 9.25, "learning_rate": 6.992558470501325e-05, "loss": 0.3784, "step": 1296 }, { "epoch": 9.26, "learning_rate": 6.978754474172909e-05, "loss": 0.3808, "step": 1297 }, { "epoch": 9.26, "learning_rate": 6.964956810213605e-05, "loss": 0.3788, "step": 1298 }, { "epoch": 9.27, "learning_rate": 6.95116550754259e-05, "loss": 0.3927, "step": 1299 }, { "epoch": 9.27, "learning_rate": 6.937380595065685e-05, "loss": 0.3848, "step": 1300 }, { "epoch": 9.28, "learning_rate": 6.923602101675337e-05, "loss": 0.3848, "step": 1301 }, { "epoch": 9.28, "learning_rate": 6.909830056250527e-05, "loss": 0.3805, "step": 1302 }, { "epoch": 9.29, "learning_rate": 6.89606448765673e-05, "loss": 0.3786, "step": 1303 }, { "epoch": 9.29, "learning_rate": 6.88230542474584e-05, "loss": 0.3823, "step": 1304 }, { "epoch": 9.29, "learning_rate": 6.868552896356117e-05, "loss": 0.3795, "step": 1305 }, { "epoch": 9.3, "learning_rate": 6.854806931312128e-05, "loss": 0.3875, "step": 1306 }, { "epoch": 9.3, "learning_rate": 6.841067558424677e-05, "loss": 0.3849, "step": 1307 }, { "epoch": 9.31, "learning_rate": 6.827334806490757e-05, "loss": 0.3856, "step": 1308 }, { "epoch": 9.31, "learning_rate": 6.813608704293484e-05, "loss": 0.3851, "step": 1309 }, { "epoch": 9.32, "learning_rate": 6.799889280602031e-05, "loss": 0.379, "step": 1310 }, { "epoch": 9.32, "learning_rate": 6.786176564171582e-05, "loss": 0.3843, "step": 1311 }, { "epoch": 9.33, "learning_rate": 6.77247058374325e-05, "loss": 0.3855, "step": 1312 }, { "epoch": 9.33, "learning_rate": 6.758771368044042e-05, "loss": 0.3795, "step": 1313 }, { "epoch": 9.34, "learning_rate": 6.74507894578678e-05, "loss": 0.3809, "step": 1314 }, { "epoch": 9.34, "learning_rate": 6.731393345670051e-05, "loss": 0.3858, "step": 1315 }, { "epoch": 9.35, "learning_rate": 6.717714596378137e-05, "loss": 0.3788, "step": 1316 }, { "epoch": 9.35, "learning_rate": 6.704042726580972e-05, "loss": 0.3801, "step": 1317 }, { "epoch": 9.35, "learning_rate": 6.69037776493406e-05, "loss": 0.3835, "step": 1318 }, { "epoch": 9.36, "learning_rate": 6.67671974007843e-05, "loss": 0.3817, "step": 1319 }, { "epoch": 9.36, "learning_rate": 6.663068680640574e-05, "loss": 0.3846, "step": 1320 }, { "epoch": 9.37, "learning_rate": 6.649424615232382e-05, "loss": 0.3826, "step": 1321 }, { "epoch": 9.37, "learning_rate": 6.635787572451083e-05, "loss": 0.379, "step": 1322 }, { "epoch": 9.38, "learning_rate": 6.622157580879195e-05, "loss": 0.3808, "step": 1323 }, { "epoch": 9.38, "learning_rate": 6.608534669084449e-05, "loss": 0.3782, "step": 1324 }, { "epoch": 9.39, "learning_rate": 6.59491886561974e-05, "loss": 0.386, "step": 1325 }, { "epoch": 9.39, "learning_rate": 6.58131019902306e-05, "loss": 0.3801, "step": 1326 }, { "epoch": 9.4, "learning_rate": 6.567708697817453e-05, "loss": 0.3838, "step": 1327 }, { "epoch": 9.4, "learning_rate": 6.554114390510935e-05, "loss": 0.379, "step": 1328 }, { "epoch": 9.4, "learning_rate": 6.540527305596449e-05, "loss": 0.3781, "step": 1329 }, { "epoch": 9.41, "learning_rate": 6.526947471551798e-05, "loss": 0.3812, "step": 1330 }, { "epoch": 9.41, "learning_rate": 6.513374916839587e-05, "loss": 0.3787, "step": 1331 }, { "epoch": 9.42, "learning_rate": 6.499809669907169e-05, "loss": 0.3865, "step": 1332 }, { "epoch": 9.42, "learning_rate": 6.486251759186572e-05, "loss": 0.3822, "step": 1333 }, { "epoch": 9.43, "learning_rate": 6.472701213094456e-05, "loss": 0.3791, "step": 1334 }, { "epoch": 9.43, "learning_rate": 6.45915806003204e-05, "loss": 0.3839, "step": 1335 }, { "epoch": 9.44, "learning_rate": 6.445622328385047e-05, "loss": 0.3788, "step": 1336 }, { "epoch": 9.44, "learning_rate": 6.432094046523646e-05, "loss": 0.3763, "step": 1337 }, { "epoch": 9.45, "learning_rate": 6.418573242802397e-05, "loss": 0.3868, "step": 1338 }, { "epoch": 9.45, "learning_rate": 6.405059945560179e-05, "loss": 0.3823, "step": 1339 }, { "epoch": 9.46, "learning_rate": 6.391554183120138e-05, "loss": 0.3752, "step": 1340 }, { "epoch": 9.46, "learning_rate": 6.378055983789637e-05, "loss": 0.3808, "step": 1341 }, { "epoch": 9.46, "learning_rate": 6.364565375860174e-05, "loss": 0.3863, "step": 1342 }, { "epoch": 9.47, "learning_rate": 6.351082387607345e-05, "loss": 0.3849, "step": 1343 }, { "epoch": 9.47, "learning_rate": 6.337607047290774e-05, "loss": 0.3801, "step": 1344 }, { "epoch": 9.48, "learning_rate": 6.324139383154049e-05, "loss": 0.3777, "step": 1345 }, { "epoch": 9.48, "learning_rate": 6.310679423424679e-05, "loss": 0.3814, "step": 1346 }, { "epoch": 9.49, "learning_rate": 6.297227196314018e-05, "loss": 0.3865, "step": 1347 }, { "epoch": 9.49, "learning_rate": 6.283782730017218e-05, "loss": 0.3758, "step": 1348 }, { "epoch": 9.5, "learning_rate": 6.270346052713154e-05, "loss": 0.3815, "step": 1349 }, { "epoch": 9.5, "learning_rate": 6.25691719256439e-05, "loss": 0.3797, "step": 1350 }, { "epoch": 9.51, "learning_rate": 6.243496177717099e-05, "loss": 0.3763, "step": 1351 }, { "epoch": 9.51, "learning_rate": 6.230083036301004e-05, "loss": 0.3775, "step": 1352 }, { "epoch": 9.51, "learning_rate": 6.216677796429342e-05, "loss": 0.3753, "step": 1353 }, { "epoch": 9.52, "learning_rate": 6.20328048619877e-05, "loss": 0.3835, "step": 1354 }, { "epoch": 9.52, "learning_rate": 6.189891133689342e-05, "loss": 0.3751, "step": 1355 }, { "epoch": 9.53, "learning_rate": 6.176509766964421e-05, "loss": 0.3806, "step": 1356 }, { "epoch": 9.53, "learning_rate": 6.163136414070635e-05, "loss": 0.3853, "step": 1357 }, { "epoch": 9.54, "learning_rate": 6.149771103037821e-05, "loss": 0.3844, "step": 1358 }, { "epoch": 9.54, "learning_rate": 6.136413861878953e-05, "loss": 0.3819, "step": 1359 }, { "epoch": 9.55, "learning_rate": 6.123064718590099e-05, "loss": 0.3771, "step": 1360 }, { "epoch": 9.55, "learning_rate": 6.10972370115034e-05, "loss": 0.3866, "step": 1361 }, { "epoch": 9.56, "learning_rate": 6.096390837521746e-05, "loss": 0.3799, "step": 1362 }, { "epoch": 9.56, "learning_rate": 6.0830661556492806e-05, "loss": 0.3767, "step": 1363 }, { "epoch": 9.57, "learning_rate": 6.069749683460765e-05, "loss": 0.3779, "step": 1364 }, { "epoch": 9.57, "learning_rate": 6.0564414488668165e-05, "loss": 0.3843, "step": 1365 }, { "epoch": 9.57, "learning_rate": 6.043141479760778e-05, "loss": 0.3739, "step": 1366 }, { "epoch": 9.58, "learning_rate": 6.0298498040186794e-05, "loss": 0.3831, "step": 1367 }, { "epoch": 9.58, "learning_rate": 6.0165664494991594e-05, "loss": 0.3801, "step": 1368 }, { "epoch": 9.59, "learning_rate": 6.0032914440434186e-05, "loss": 0.3844, "step": 1369 }, { "epoch": 9.59, "learning_rate": 5.9900248154751616e-05, "loss": 0.3854, "step": 1370 }, { "epoch": 9.6, "learning_rate": 5.9767665916005286e-05, "loss": 0.382, "step": 1371 }, { "epoch": 9.6, "learning_rate": 5.9635168002080564e-05, "loss": 0.3805, "step": 1372 }, { "epoch": 9.61, "learning_rate": 5.950275469068588e-05, "loss": 0.3775, "step": 1373 }, { "epoch": 9.61, "learning_rate": 5.937042625935252e-05, "loss": 0.379, "step": 1374 }, { "epoch": 9.62, "learning_rate": 5.923818298543379e-05, "loss": 0.3786, "step": 1375 }, { "epoch": 9.62, "learning_rate": 5.9106025146104525e-05, "loss": 0.3797, "step": 1376 }, { "epoch": 9.63, "learning_rate": 5.897395301836051e-05, "loss": 0.3748, "step": 1377 }, { "epoch": 9.63, "learning_rate": 5.8841966879017816e-05, "loss": 0.3682, "step": 1378 }, { "epoch": 9.63, "eval_loss": 0.4814474880695343, "eval_runtime": 28.2712, "eval_samples_per_second": 7.074, "eval_steps_per_second": 0.884, "step": 1378 }, { "epoch": 9.63, "step": 1378, "total_flos": 5.88729111501865e+16, "train_loss": 0.4462547982638737, "train_runtime": 110414.5531, "train_samples_per_second": 2.524, "train_steps_per_second": 0.02 } ], "logging_steps": 1, "max_steps": 2170, "num_train_epochs": 10, "save_steps": 500, "total_flos": 5.88729111501865e+16, "trial_name": null, "trial_params": null }