diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7374 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9993876301285977, + "eval_steps": 500, + "global_step": 1224, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.000499999176532081, + "loss": 2.1594, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999967061337493, + "loss": 2.1003, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999925888212787, + "loss": 1.8351, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999868246217933, + "loss": 1.9533, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999794135732661, + "loss": 1.9901, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999703557245193, + "loss": 1.8264, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999596511352234, + "loss": 1.9285, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999472998758978, + "loss": 1.811, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999333020279093, + "loss": 1.8615, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999176576834721, + "loss": 1.7243, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999003669456472, + "loss": 1.9144, + "step": 11 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999003669456472, + "loss": 1.6444, + "step": 12 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004998814299283415, + "loss": 1.6947, + "step": 13 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004998608467563072, + "loss": 1.6256, + "step": 14 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004998386175651409, + "loss": 1.6551, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004998147425012829, + "loss": 1.8031, + "step": 16 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499789221722016, + "loss": 1.7751, + "step": 17 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004997620553954645, + "loss": 1.6896, + "step": 18 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004997332437005932, + "loss": 1.675, + "step": 19 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499702786827206, + "loss": 1.5378, + "step": 20 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004996706849759452, + "loss": 1.6407, + "step": 21 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004996369383582895, + "loss": 1.5797, + "step": 22 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499601547196553, + "loss": 1.5717, + "step": 23 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004995645117238834, + "loss": 1.5683, + "step": 24 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004995258321842611, + "loss": 1.6953, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004994855088324968, + "loss": 1.5689, + "step": 26 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004994435419342305, + "loss": 1.5684, + "step": 27 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004993999317659293, + "loss": 1.592, + "step": 28 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004993546786148858, + "loss": 1.5773, + "step": 29 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004993077827792162, + "loss": 1.7244, + "step": 30 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004992592445678582, + "loss": 1.5262, + "step": 31 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004992090643005691, + "loss": 1.5647, + "step": 32 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004991572423079236, + "loss": 1.5838, + "step": 33 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004991037789313117, + "loss": 1.5977, + "step": 34 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004990486745229364, + "loss": 1.4692, + "step": 35 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004989919294458114, + "loss": 1.5728, + "step": 36 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004989335440737586, + "loss": 1.4687, + "step": 37 + }, + { + "epoch": 0.03, + "learning_rate": 0.000498873518791406, + "loss": 1.5257, + "step": 38 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004988118539941848, + "loss": 1.5172, + "step": 39 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004987485500883267, + "loss": 1.6303, + "step": 40 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004986836074908615, + "loss": 1.5362, + "step": 41 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004986170266296148, + "loss": 1.4999, + "step": 42 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004985488079432037, + "loss": 1.3966, + "step": 43 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004984789518810357, + "loss": 1.5622, + "step": 44 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004984074589033044, + "loss": 1.615, + "step": 45 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004983343294809875, + "loss": 1.3983, + "step": 46 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004982595640958427, + "loss": 1.5111, + "step": 47 + }, + { + "epoch": 0.04, + "learning_rate": 0.000498183163240405, + "loss": 1.602, + "step": 48 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004981051274179839, + "loss": 1.487, + "step": 49 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004980254571426594, + "loss": 1.4709, + "step": 50 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004979441529392784, + "loss": 1.5622, + "step": 51 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004978612153434526, + "loss": 1.6075, + "step": 52 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004977766449015534, + "loss": 1.4113, + "step": 53 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004976904421707092, + "loss": 1.3687, + "step": 54 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004976026077188013, + "loss": 1.4445, + "step": 55 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004975131421244607, + "loss": 1.511, + "step": 56 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004974220459770638, + "loss": 1.6073, + "step": 57 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004973293198767285, + "loss": 1.5208, + "step": 58 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004972349644343108, + "loss": 1.4489, + "step": 59 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004971389802713999, + "loss": 1.5419, + "step": 60 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004970413680203149, + "loss": 1.4473, + "step": 61 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004969421283241003, + "loss": 1.3978, + "step": 62 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004968412618365216, + "loss": 1.4488, + "step": 63 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004967387692220614, + "loss": 1.4734, + "step": 64 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004966346511559149, + "loss": 1.4268, + "step": 65 + }, + { + "epoch": 0.05, + "learning_rate": 0.000496528908323985, + "loss": 1.5527, + "step": 66 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004964215414228786, + "loss": 1.4059, + "step": 67 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004963125511599009, + "loss": 1.4936, + "step": 68 + }, + { + "epoch": 0.06, + "learning_rate": 0.000496201938253052, + "loss": 1.5675, + "step": 69 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004960897034310215, + "loss": 1.4432, + "step": 70 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004959758474331833, + "loss": 1.3855, + "step": 71 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004958603710095916, + "loss": 1.4734, + "step": 72 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004957432749209755, + "loss": 1.3273, + "step": 73 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004956245599387339, + "loss": 1.4347, + "step": 74 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004955042268449307, + "loss": 1.4878, + "step": 75 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004953822764322895, + "loss": 1.5084, + "step": 76 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004952587095041882, + "loss": 1.4071, + "step": 77 + }, + { + "epoch": 0.06, + "learning_rate": 0.000495133526874654, + "loss": 1.4971, + "step": 78 + }, + { + "epoch": 0.06, + "learning_rate": 0.000495006729368358, + "loss": 1.5107, + "step": 79 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004948783178206096, + "loss": 1.4152, + "step": 80 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004947482930773512, + "loss": 1.6487, + "step": 81 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004946166559951523, + "loss": 1.5631, + "step": 82 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004944834074412042, + "loss": 1.3929, + "step": 83 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004943485482933144, + "loss": 1.3685, + "step": 84 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004942120794399002, + "loss": 1.493, + "step": 85 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004940740017799833, + "loss": 1.4697, + "step": 86 + }, + { + "epoch": 0.07, + "learning_rate": 0.000493934316223184, + "loss": 1.3028, + "step": 87 + }, + { + "epoch": 0.07, + "learning_rate": 0.000493793023689715, + "loss": 1.4564, + "step": 88 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004936501251103751, + "loss": 1.466, + "step": 89 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004935056214265435, + "loss": 1.3609, + "step": 90 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004933595135901732, + "loss": 1.5026, + "step": 91 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004932118025637854, + "loss": 1.5779, + "step": 92 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004930624893204624, + "loss": 1.5107, + "step": 93 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004929115748438414, + "loss": 1.4332, + "step": 94 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004927590601281084, + "loss": 1.4105, + "step": 95 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004926049461779909, + "loss": 1.3919, + "step": 96 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004924492340087524, + "loss": 1.436, + "step": 97 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004922919246461845, + "loss": 1.3344, + "step": 98 + }, + { + "epoch": 0.08, + "learning_rate": 0.000492133019126601, + "loss": 1.5432, + "step": 99 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004919725184968307, + "loss": 1.5113, + "step": 100 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004918104238142104, + "loss": 1.4125, + "step": 101 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004916467361465784, + "loss": 1.3211, + "step": 102 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004914814565722671, + "loss": 1.4846, + "step": 103 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004913145861800957, + "loss": 1.6591, + "step": 104 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004911461260693639, + "loss": 1.3527, + "step": 105 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004909760773498432, + "loss": 1.6009, + "step": 106 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004908044411417712, + "loss": 1.5183, + "step": 107 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004906312185758431, + "loss": 1.3647, + "step": 108 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004904564107932048, + "loss": 1.3824, + "step": 109 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004902800189454451, + "loss": 1.4677, + "step": 110 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004901020441945881, + "loss": 1.4092, + "step": 111 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004899224877130858, + "loss": 1.3776, + "step": 112 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004897413506838104, + "loss": 1.3135, + "step": 113 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004895586343000458, + "loss": 1.5419, + "step": 114 + }, + { + "epoch": 0.09, + "learning_rate": 0.000489374339765481, + "loss": 1.5139, + "step": 115 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004891884682942012, + "loss": 1.4482, + "step": 116 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004890010211106795, + "loss": 1.4793, + "step": 117 + }, + { + "epoch": 0.1, + "learning_rate": 0.00048881199944977, + "loss": 1.4989, + "step": 118 + }, + { + "epoch": 0.1, + "learning_rate": 0.000488621404556699, + "loss": 1.3618, + "step": 119 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004884292376870567, + "loss": 1.3491, + "step": 120 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004882355001067892, + "loss": 1.5396, + "step": 121 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004880401930921897, + "loss": 1.5266, + "step": 122 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004878433179298909, + "loss": 1.3162, + "step": 123 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004876448759168558, + "loss": 1.4951, + "step": 124 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004874448683603695, + "loss": 1.3594, + "step": 125 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004872432965780303, + "loss": 1.2981, + "step": 126 + }, + { + "epoch": 0.1, + "learning_rate": 0.00048704016189774147, + "loss": 1.3909, + "step": 127 + }, + { + "epoch": 0.1, + "learning_rate": 0.00048683546565770215, + "loss": 1.2777, + "step": 128 + }, + { + "epoch": 0.11, + "learning_rate": 0.00048662920920639866, + "loss": 1.2185, + "step": 129 + }, + { + "epoch": 0.11, + "learning_rate": 0.00048642139390259545, + "loss": 1.4069, + "step": 130 + }, + { + "epoch": 0.11, + "learning_rate": 0.00048621202111532653, + "loss": 1.3784, + "step": 131 + }, + { + "epoch": 0.11, + "learning_rate": 0.00048600109222388604, + "loss": 1.3602, + "step": 132 + }, + { + "epoch": 0.11, + "learning_rate": 0.00048578860861781935, + "loss": 1.4951, + "step": 133 + }, + { + "epoch": 0.11, + "learning_rate": 0.000485574571696914, + "loss": 1.3451, + "step": 134 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004853589828711902, + "loss": 1.3153, + "step": 135 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004851418435608919, + "loss": 1.3359, + "step": 136 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004849231551964771, + "loss": 1.4878, + "step": 137 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004847029192186086, + "loss": 1.4207, + "step": 138 + }, + { + "epoch": 0.11, + "learning_rate": 0.00048448113707814456, + "loss": 1.3223, + "step": 139 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004842578102361287, + "loss": 1.3065, + "step": 140 + }, + { + "epoch": 0.12, + "learning_rate": 0.000484032940163781, + "loss": 1.2831, + "step": 141 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004838065283424878, + "loss": 1.4204, + "step": 142 + }, + { + "epoch": 0.12, + "learning_rate": 0.000483578576263792, + "loss": 1.5151, + "step": 143 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004833490854293835, + "loss": 1.4167, + "step": 144 + }, + { + "epoch": 0.12, + "learning_rate": 0.00048311805735108893, + "loss": 1.4353, + "step": 145 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004828854935508621, + "loss": 1.4536, + "step": 146 + }, + { + "epoch": 0.12, + "learning_rate": 0.00048265139556077344, + "loss": 1.4906, + "step": 147 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004824157649230005, + "loss": 1.3239, + "step": 148 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004821786031898176, + "loss": 1.3811, + "step": 149 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004819399119235852, + "loss": 1.5675, + "step": 150 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004816996926967401, + "loss": 1.2896, + "step": 151 + }, + { + "epoch": 0.12, + "learning_rate": 0.000481457947091785, + "loss": 1.367, + "step": 152 + }, + { + "epoch": 0.12, + "learning_rate": 0.00048121467670127796, + "loss": 1.3822, + "step": 153 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004809698831278217, + "loss": 1.5132, + "step": 154 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004807235679840536, + "loss": 1.2856, + "step": 155 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048047573289263445, + "loss": 1.4076, + "step": 156 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048022637948623843, + "loss": 1.3933, + "step": 157 + }, + { + "epoch": 0.13, + "learning_rate": 0.00047997550940754165, + "loss": 1.2479, + "step": 158 + }, + { + "epoch": 0.13, + "learning_rate": 0.00047972312430921184, + "loss": 1.5397, + "step": 159 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004794692258538972, + "loss": 1.4965, + "step": 160 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004792138157142157, + "loss": 1.4688, + "step": 161 + }, + { + "epoch": 0.13, + "learning_rate": 0.00047895689557274373, + "loss": 1.3496, + "step": 162 + }, + { + "epoch": 0.13, + "learning_rate": 0.00047869846712200525, + "loss": 1.3115, + "step": 163 + }, + { + "epoch": 0.13, + "learning_rate": 0.00047843853206446055, + "loss": 1.5092, + "step": 164 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004781770921124951, + "loss": 1.494, + "step": 165 + }, + { + "epoch": 0.14, + "learning_rate": 0.00047791414898840816, + "loss": 1.3489, + "step": 166 + }, + { + "epoch": 0.14, + "learning_rate": 0.00047764970442440164, + "loss": 1.391, + "step": 167 + }, + { + "epoch": 0.14, + "learning_rate": 0.00047738376016256837, + "loss": 1.3822, + "step": 168 + }, + { + "epoch": 0.14, + "learning_rate": 0.00047711631795488093, + "loss": 1.3905, + "step": 169 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004768473795631799, + "loss": 1.4504, + "step": 170 + }, + { + "epoch": 0.14, + "learning_rate": 0.00047657694675916254, + "loss": 1.2375, + "step": 171 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004763050213243705, + "loss": 1.6112, + "step": 172 + }, + { + "epoch": 0.14, + "learning_rate": 0.00047603160505017893, + "loss": 1.5109, + "step": 173 + }, + { + "epoch": 0.14, + "learning_rate": 0.00047575669973778413, + "loss": 1.3239, + "step": 174 + }, + { + "epoch": 0.14, + "learning_rate": 0.00047548030719819155, + "loss": 1.3143, + "step": 175 + }, + { + "epoch": 0.14, + "learning_rate": 0.00047520242925220445, + "loss": 1.3333, + "step": 176 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004749230677304114, + "loss": 1.3209, + "step": 177 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004746422244731743, + "loss": 1.3362, + "step": 178 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004743599013306166, + "loss": 1.3013, + "step": 179 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004740761001626106, + "loss": 1.4733, + "step": 180 + }, + { + "epoch": 0.15, + "learning_rate": 0.00047379082283876563, + "loss": 1.2937, + "step": 181 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004735040712384154, + "loss": 1.4254, + "step": 182 + }, + { + "epoch": 0.15, + "learning_rate": 0.00047321584725060597, + "loss": 1.3224, + "step": 183 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004729261527740829, + "loss": 1.4175, + "step": 184 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004726349897172791, + "loss": 1.3974, + "step": 185 + }, + { + "epoch": 0.15, + "learning_rate": 0.00047234235999830205, + "loss": 1.2746, + "step": 186 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004720482655449212, + "loss": 1.4266, + "step": 187 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004717527082945554, + "loss": 1.3699, + "step": 188 + }, + { + "epoch": 0.15, + "learning_rate": 0.00047145569019425994, + "loss": 1.3896, + "step": 189 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047115721320071385, + "loss": 1.3213, + "step": 190 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047085727928020696, + "loss": 1.2941, + "step": 191 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047055589040862676, + "loss": 1.3324, + "step": 192 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047025304857144614, + "loss": 1.3306, + "step": 193 + }, + { + "epoch": 0.16, + "learning_rate": 0.00046994875576370914, + "loss": 1.4135, + "step": 194 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004696430139900188, + "loss": 1.4713, + "step": 195 + }, + { + "epoch": 0.16, + "learning_rate": 0.00046933582526452336, + "loss": 1.3872, + "step": 196 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004690271916109034, + "loss": 1.2793, + "step": 197 + }, + { + "epoch": 0.16, + "learning_rate": 0.00046871711506235814, + "loss": 1.5601, + "step": 198 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004684055976615924, + "loss": 1.2165, + "step": 199 + }, + { + "epoch": 0.16, + "learning_rate": 0.00046809264146080275, + "loss": 1.3709, + "step": 200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004677782485216644, + "loss": 1.2739, + "step": 201 + }, + { + "epoch": 0.16, + "learning_rate": 0.00046746242091531733, + "loss": 1.2434, + "step": 202 + }, + { + "epoch": 0.17, + "learning_rate": 0.00046714516072235277, + "loss": 1.2875, + "step": 203 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004668264700327994, + "loss": 1.2939, + "step": 204 + }, + { + "epoch": 0.17, + "learning_rate": 0.00046650635094610973, + "loss": 1.3576, + "step": 205 + }, + { + "epoch": 0.17, + "learning_rate": 0.00046618480557114605, + "loss": 1.3555, + "step": 206 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004658618360261669, + "loss": 1.3495, + "step": 207 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004655374444388127, + "loss": 1.4241, + "step": 208 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004652116329460919, + "loss": 1.317, + "step": 209 + }, + { + "epoch": 0.17, + "learning_rate": 0.00046488440369436715, + "loss": 1.3761, + "step": 210 + }, + { + "epoch": 0.17, + "learning_rate": 0.00046455575883934066, + "loss": 1.3128, + "step": 211 + }, + { + "epoch": 0.17, + "learning_rate": 0.00046422570054604047, + "loss": 1.2786, + "step": 212 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004638942309888058, + "loss": 1.3711, + "step": 213 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004635613523512731, + "loss": 1.3548, + "step": 214 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004632270668263614, + "loss": 1.3002, + "step": 215 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004628913766162579, + "loss": 1.2433, + "step": 216 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004625542839324036, + "loss": 1.2455, + "step": 217 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004622157909954785, + "loss": 1.3323, + "step": 218 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046187590003538725, + "loss": 1.3168, + "step": 219 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046153461329124433, + "loss": 1.229, + "step": 220 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046119193301135915, + "loss": 1.3456, + "step": 221 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046084786145322143, + "loss": 1.2573, + "step": 222 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046050240088348635, + "loss": 1.4324, + "step": 223 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004601555535779594, + "loss": 1.2507, + "step": 224 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004598073218215817, + "loss": 1.344, + "step": 225 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004594577079084146, + "loss": 1.2898, + "step": 226 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004591067141416249, + "loss": 1.3311, + "step": 227 + }, + { + "epoch": 0.19, + "learning_rate": 0.00045875434283346937, + "loss": 1.3894, + "step": 228 + }, + { + "epoch": 0.19, + "learning_rate": 0.00045840059630527987, + "loss": 1.2656, + "step": 229 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004580454768874477, + "loss": 1.2229, + "step": 230 + }, + { + "epoch": 0.19, + "learning_rate": 0.00045768898691940843, + "loss": 1.4048, + "step": 231 + }, + { + "epoch": 0.19, + "learning_rate": 0.00045733112874962636, + "loss": 1.2412, + "step": 232 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004569719047355795, + "loss": 1.3517, + "step": 233 + }, + { + "epoch": 0.19, + "learning_rate": 0.00045661131724374326, + "loss": 1.2536, + "step": 234 + }, + { + "epoch": 0.19, + "learning_rate": 0.00045624936864957554, + "loss": 1.6123, + "step": 235 + }, + { + "epoch": 0.19, + "learning_rate": 0.00045588606133750096, + "loss": 1.3931, + "step": 236 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004555213977008946, + "loss": 1.3345, + "step": 237 + }, + { + "epoch": 0.19, + "learning_rate": 0.000455155380142067, + "loss": 1.2914, + "step": 238 + }, + { + "epoch": 0.2, + "learning_rate": 0.00045478801107224796, + "loss": 1.2884, + "step": 239 + }, + { + "epoch": 0.2, + "learning_rate": 0.00045478801107224796, + "loss": 1.2817, + "step": 240 + }, + { + "epoch": 0.2, + "learning_rate": 0.00045441929291157056, + "loss": 1.3754, + "step": 241 + }, + { + "epoch": 0.2, + "learning_rate": 0.00045404922808905544, + "loss": 1.3951, + "step": 242 + }, + { + "epoch": 0.2, + "learning_rate": 0.00045367781904259464, + "loss": 1.1983, + "step": 243 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004533050682189357, + "loss": 1.2189, + "step": 244 + }, + { + "epoch": 0.2, + "learning_rate": 0.00045293097807366534, + "loss": 1.3403, + "step": 245 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004525555510711934, + "loss": 1.2915, + "step": 246 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004521787896847366, + "loss": 1.1561, + "step": 247 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004518006963963024, + "loss": 1.2843, + "step": 248 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004514212736966722, + "loss": 1.2785, + "step": 249 + }, + { + "epoch": 0.2, + "learning_rate": 0.00045104052408538543, + "loss": 1.3026, + "step": 250 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004506584500707228, + "loss": 1.3167, + "step": 251 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004502750541696899, + "loss": 1.4252, + "step": 252 + }, + { + "epoch": 0.21, + "learning_rate": 0.00044989033890800045, + "loss": 1.2327, + "step": 253 + }, + { + "epoch": 0.21, + "learning_rate": 0.00044950430682005996, + "loss": 1.3087, + "step": 254 + }, + { + "epoch": 0.21, + "learning_rate": 0.00044911696044894856, + "loss": 1.249, + "step": 255 + }, + { + "epoch": 0.21, + "learning_rate": 0.00044872830234640493, + "loss": 1.2928, + "step": 256 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004483383350728088, + "loss": 1.2434, + "step": 257 + }, + { + "epoch": 0.21, + "learning_rate": 0.00044794706119716455, + "loss": 1.2879, + "step": 258 + }, + { + "epoch": 0.21, + "learning_rate": 0.00044755448329708396, + "loss": 1.3037, + "step": 259 + }, + { + "epoch": 0.21, + "learning_rate": 0.00044716060395876955, + "loss": 1.2957, + "step": 260 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004467654257769974, + "loss": 1.2358, + "step": 261 + }, + { + "epoch": 0.21, + "learning_rate": 0.00044636895135509967, + "loss": 1.2179, + "step": 262 + }, + { + "epoch": 0.21, + "learning_rate": 0.00044597118330494845, + "loss": 1.1878, + "step": 263 + }, + { + "epoch": 0.22, + "learning_rate": 0.00044557212424693726, + "loss": 1.3805, + "step": 264 + }, + { + "epoch": 0.22, + "learning_rate": 0.00044517177680996494, + "loss": 1.26, + "step": 265 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004447701436314176, + "loss": 1.2524, + "step": 266 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004443672273571516, + "loss": 1.3619, + "step": 267 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004439630306414758, + "loss": 1.3873, + "step": 268 + }, + { + "epoch": 0.22, + "learning_rate": 0.00044355755614713455, + "loss": 1.2831, + "step": 269 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004431508065452897, + "loss": 1.392, + "step": 270 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004427427845155033, + "loss": 1.2447, + "step": 271 + }, + { + "epoch": 0.22, + "learning_rate": 0.00044233349274571976, + "loss": 1.3091, + "step": 272 + }, + { + "epoch": 0.22, + "learning_rate": 0.00044192293393224814, + "loss": 1.3385, + "step": 273 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004415111107797445, + "loss": 1.3911, + "step": 274 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004410980260011942, + "loss": 1.281, + "step": 275 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004406836823178937, + "loss": 1.3719, + "step": 276 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044026808245943284, + "loss": 1.3391, + "step": 277 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004398512291636768, + "loss": 1.2876, + "step": 278 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004394331251767481, + "loss": 1.245, + "step": 279 + }, + { + "epoch": 0.23, + "learning_rate": 0.00043901377325300864, + "loss": 1.3237, + "step": 280 + }, + { + "epoch": 0.23, + "learning_rate": 0.000438593176155041, + "loss": 1.2547, + "step": 281 + }, + { + "epoch": 0.23, + "learning_rate": 0.00043817133665363114, + "loss": 1.4516, + "step": 282 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004377482575277494, + "loss": 1.3214, + "step": 283 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004373239415645324, + "loss": 1.3894, + "step": 284 + }, + { + "epoch": 0.23, + "learning_rate": 0.00043689839155926474, + "loss": 1.3069, + "step": 285 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004364716103153609, + "loss": 1.1784, + "step": 286 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004360436006443459, + "loss": 1.3186, + "step": 287 + }, + { + "epoch": 0.24, + "learning_rate": 0.00043561436536583776, + "loss": 1.2797, + "step": 288 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004351839073075283, + "loss": 1.3483, + "step": 289 + }, + { + "epoch": 0.24, + "learning_rate": 0.00043475222930516476, + "loss": 1.4869, + "step": 290 + }, + { + "epoch": 0.24, + "learning_rate": 0.000434319334202531, + "loss": 1.2546, + "step": 291 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004338852248514289, + "loss": 1.2437, + "step": 292 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004334499041116593, + "loss": 1.2201, + "step": 293 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004330133748510037, + "loss": 1.3446, + "step": 294 + }, + { + "epoch": 0.24, + "learning_rate": 0.00043257563994520467, + "loss": 1.2456, + "step": 295 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004321367022779476, + "loss": 1.3211, + "step": 296 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004316965647408411, + "loss": 1.247, + "step": 297 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004312552302333982, + "loss": 1.4084, + "step": 298 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004308127016630175, + "loss": 1.294, + "step": 299 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004303689819449636, + "loss": 1.359, + "step": 300 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004299240740023482, + "loss": 1.3, + "step": 301 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004294779807661105, + "loss": 1.3214, + "step": 302 + }, + { + "epoch": 0.25, + "learning_rate": 0.00042903070517499837, + "loss": 1.2498, + "step": 303 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004285822501755485, + "loss": 1.2102, + "step": 304 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004281326187220675, + "loss": 1.38, + "step": 305 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004276818137766118, + "loss": 1.1944, + "step": 306 + }, + { + "epoch": 0.25, + "learning_rate": 0.00042722983830896884, + "loss": 1.2296, + "step": 307 + }, + { + "epoch": 0.25, + "learning_rate": 0.00042677669529663686, + "loss": 1.3378, + "step": 308 + }, + { + "epoch": 0.25, + "learning_rate": 0.00042632238772480585, + "loss": 1.299, + "step": 309 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004258669185863375, + "loss": 1.1825, + "step": 310 + }, + { + "epoch": 0.25, + "learning_rate": 0.00042541029088174543, + "loss": 1.2156, + "step": 311 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004249525076191759, + "loss": 1.2084, + "step": 312 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004244935718143876, + "loss": 1.4662, + "step": 313 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004240334864907317, + "loss": 1.2716, + "step": 314 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004235722546791323, + "loss": 1.3078, + "step": 315 + }, + { + "epoch": 0.26, + "learning_rate": 0.00042310987941806616, + "loss": 1.3159, + "step": 316 + }, + { + "epoch": 0.26, + "learning_rate": 0.00042264636375354285, + "loss": 1.3981, + "step": 317 + }, + { + "epoch": 0.26, + "learning_rate": 0.00042218171073908465, + "loss": 1.3738, + "step": 318 + }, + { + "epoch": 0.26, + "learning_rate": 0.00042171592343570634, + "loss": 1.2415, + "step": 319 + }, + { + "epoch": 0.26, + "learning_rate": 0.00042124900491189517, + "loss": 1.296, + "step": 320 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004207809582435904, + "loss": 1.2288, + "step": 321 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004203117865141635, + "loss": 1.2131, + "step": 322 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004198414928143974, + "loss": 1.2882, + "step": 323 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004193700802424663, + "loss": 1.2438, + "step": 324 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004188975519039151, + "loss": 1.3187, + "step": 325 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004184239109116393, + "loss": 1.4187, + "step": 326 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004179491603858643, + "loss": 1.2856, + "step": 327 + }, + { + "epoch": 0.27, + "learning_rate": 0.00041747330345412453, + "loss": 1.4805, + "step": 328 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004169963432512435, + "loss": 1.2335, + "step": 329 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004165182829193126, + "loss": 1.199, + "step": 330 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004160391256076704, + "loss": 1.4151, + "step": 331 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004155588744728826, + "loss": 1.2355, + "step": 332 + }, + { + "epoch": 0.27, + "learning_rate": 0.00041507753267872017, + "loss": 1.2175, + "step": 333 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004145951033961395, + "loss": 1.2043, + "step": 334 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004141115898032607, + "loss": 1.344, + "step": 335 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004136269950853473, + "loss": 1.2807, + "step": 336 + }, + { + "epoch": 0.28, + "learning_rate": 0.000413141322434785, + "loss": 1.2541, + "step": 337 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004126545750510605, + "loss": 1.1884, + "step": 338 + }, + { + "epoch": 0.28, + "learning_rate": 0.00041216675614074073, + "loss": 1.2217, + "step": 339 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004116778689174514, + "loss": 1.1973, + "step": 340 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004111879166018561, + "loss": 1.1581, + "step": 341 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004106969024216348, + "loss": 1.1267, + "step": 342 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004102048296114631, + "loss": 1.2995, + "step": 343 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004097117014129903, + "loss": 1.2325, + "step": 344 + }, + { + "epoch": 0.28, + "learning_rate": 0.00040921752107481846, + "loss": 1.2109, + "step": 345 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004087222918524807, + "loss": 1.2491, + "step": 346 + }, + { + "epoch": 0.28, + "learning_rate": 0.00040822601700842015, + "loss": 1.3012, + "step": 347 + }, + { + "epoch": 0.28, + "learning_rate": 0.00040772869981196803, + "loss": 1.4286, + "step": 348 + }, + { + "epoch": 0.28, + "learning_rate": 0.00040723034353932244, + "loss": 1.3211, + "step": 349 + }, + { + "epoch": 0.29, + "learning_rate": 0.00040673095147352665, + "loss": 1.2208, + "step": 350 + }, + { + "epoch": 0.29, + "learning_rate": 0.00040623052690444726, + "loss": 1.2816, + "step": 351 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004057290731287531, + "loss": 1.1285, + "step": 352 + }, + { + "epoch": 0.29, + "learning_rate": 0.00040522659344989286, + "loss": 1.3845, + "step": 353 + }, + { + "epoch": 0.29, + "learning_rate": 0.00040472309117807367, + "loss": 1.1976, + "step": 354 + }, + { + "epoch": 0.29, + "learning_rate": 0.00040421856963023927, + "loss": 1.2608, + "step": 355 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004037130321300482, + "loss": 1.2315, + "step": 356 + }, + { + "epoch": 0.29, + "learning_rate": 0.00040320648200785163, + "loss": 1.2976, + "step": 357 + }, + { + "epoch": 0.29, + "learning_rate": 0.00040269892260067197, + "loss": 1.1766, + "step": 358 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004021903572521802, + "loss": 1.1754, + "step": 359 + }, + { + "epoch": 0.29, + "learning_rate": 0.00040168078931267427, + "loss": 1.1879, + "step": 360 + }, + { + "epoch": 0.29, + "learning_rate": 0.00040117022213905705, + "loss": 1.2101, + "step": 361 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004006586590948141, + "loss": 1.2959, + "step": 362 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004001461035499914, + "loss": 1.2383, + "step": 363 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003996325588811733, + "loss": 1.2306, + "step": 364 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003991180284714603, + "loss": 1.2667, + "step": 365 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003986025157104467, + "loss": 1.1938, + "step": 366 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003980860239941983, + "loss": 1.2576, + "step": 367 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003975685567252299, + "loss": 1.2717, + "step": 368 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003970501173124831, + "loss": 1.1934, + "step": 369 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003965307091713037, + "loss": 1.2964, + "step": 370 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039601033572341914, + "loss": 1.4453, + "step": 371 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039548900039691634, + "loss": 1.2413, + "step": 372 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039496670662621843, + "loss": 1.2551, + "step": 373 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003944434578520628, + "loss": 1.2719, + "step": 374 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039391925752147805, + "loss": 1.2876, + "step": 375 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039339410908776154, + "loss": 1.1293, + "step": 376 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039286801601045625, + "loss": 1.0433, + "step": 377 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003923409817553284, + "loss": 1.2889, + "step": 378 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003918130097943444, + "loss": 1.2326, + "step": 379 + }, + { + "epoch": 0.31, + "learning_rate": 0.000391284103605648, + "loss": 1.0871, + "step": 380 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003907542666735374, + "loss": 1.1409, + "step": 381 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039022350248844244, + "loss": 1.2562, + "step": 382 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003896918145469013, + "loss": 1.265, + "step": 383 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003891592063515376, + "loss": 1.2468, + "step": 384 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003886256814110376, + "loss": 1.3957, + "step": 385 + }, + { + "epoch": 0.32, + "learning_rate": 0.00038809124324012647, + "loss": 1.1821, + "step": 386 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003875558953595459, + "loss": 1.1325, + "step": 387 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003870196412960302, + "loss": 1.137, + "step": 388 + }, + { + "epoch": 0.32, + "learning_rate": 0.00038648248458228365, + "loss": 1.2205, + "step": 389 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003859444287569567, + "loss": 1.3893, + "step": 390 + }, + { + "epoch": 0.32, + "learning_rate": 0.00038540547736462305, + "loss": 1.1998, + "step": 391 + }, + { + "epoch": 0.32, + "learning_rate": 0.00038486563395575624, + "loss": 1.2227, + "step": 392 + }, + { + "epoch": 0.32, + "learning_rate": 0.000384324902086706, + "loss": 1.231, + "step": 393 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003837832853196751, + "loss": 1.34, + "step": 394 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003832407872226959, + "loss": 1.4598, + "step": 395 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003826974113696065, + "loss": 1.2354, + "step": 396 + }, + { + "epoch": 0.32, + "learning_rate": 0.00038215316134002775, + "loss": 1.3935, + "step": 397 + }, + { + "epoch": 0.32, + "learning_rate": 0.00038160804071933895, + "loss": 1.1612, + "step": 398 + }, + { + "epoch": 0.33, + "learning_rate": 0.00038106205309865497, + "loss": 1.3368, + "step": 399 + }, + { + "epoch": 0.33, + "learning_rate": 0.00038051520207480204, + "loss": 1.3126, + "step": 400 + }, + { + "epoch": 0.33, + "learning_rate": 0.00037996749125029456, + "loss": 1.2208, + "step": 401 + }, + { + "epoch": 0.33, + "learning_rate": 0.00037941892423331067, + "loss": 1.2446, + "step": 402 + }, + { + "epoch": 0.33, + "learning_rate": 0.00037886950463766905, + "loss": 1.2942, + "step": 403 + }, + { + "epoch": 0.33, + "learning_rate": 0.00037831923608280517, + "loss": 1.2843, + "step": 404 + }, + { + "epoch": 0.33, + "learning_rate": 0.00037776812219374693, + "loss": 1.2525, + "step": 405 + }, + { + "epoch": 0.33, + "learning_rate": 0.00037721616660109125, + "loss": 1.217, + "step": 406 + }, + { + "epoch": 0.33, + "learning_rate": 0.00037666337294097986, + "loss": 1.3488, + "step": 407 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003761097448550755, + "loss": 1.3377, + "step": 408 + }, + { + "epoch": 0.33, + "learning_rate": 0.000375555285990538, + "loss": 1.253, + "step": 409 + }, + { + "epoch": 0.33, + "learning_rate": 0.000375, + "loss": 1.2185, + "step": 410 + }, + { + "epoch": 0.34, + "learning_rate": 0.00037444389054154314, + "loss": 1.2407, + "step": 411 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003738869612786737, + "loss": 1.4353, + "step": 412 + }, + { + "epoch": 0.34, + "learning_rate": 0.00037332921588029887, + "loss": 1.2325, + "step": 413 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003727706580207021, + "loss": 1.2247, + "step": 414 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003722112913795192, + "loss": 1.2354, + "step": 415 + }, + { + "epoch": 0.34, + "learning_rate": 0.00037165111964171404, + "loss": 1.1955, + "step": 416 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003710901464975544, + "loss": 1.1976, + "step": 417 + }, + { + "epoch": 0.34, + "learning_rate": 0.00037052837564258725, + "loss": 1.2496, + "step": 418 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003699658107776148, + "loss": 1.2556, + "step": 419 + }, + { + "epoch": 0.34, + "learning_rate": 0.00036940245560867005, + "loss": 1.3031, + "step": 420 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003688383138469923, + "loss": 1.2516, + "step": 421 + }, + { + "epoch": 0.34, + "learning_rate": 0.00036827338920900255, + "loss": 1.2514, + "step": 422 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003677076854162794, + "loss": 1.2642, + "step": 423 + }, + { + "epoch": 0.35, + "learning_rate": 0.00036714120619553436, + "loss": 1.3294, + "step": 424 + }, + { + "epoch": 0.35, + "learning_rate": 0.000366573955278587, + "loss": 1.2495, + "step": 425 + }, + { + "epoch": 0.35, + "learning_rate": 0.00036600593640234084, + "loss": 1.2672, + "step": 426 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003654371533087585, + "loss": 1.374, + "step": 427 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003648676097448369, + "loss": 1.1804, + "step": 428 + }, + { + "epoch": 0.35, + "learning_rate": 0.00036429730946258297, + "loss": 1.1609, + "step": 429 + }, + { + "epoch": 0.35, + "learning_rate": 0.00036372625621898866, + "loss": 1.147, + "step": 430 + }, + { + "epoch": 0.35, + "learning_rate": 0.000363154453776006, + "loss": 1.2187, + "step": 431 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003625819059005228, + "loss": 1.1923, + "step": 432 + }, + { + "epoch": 0.35, + "learning_rate": 0.00036200861636433753, + "loss": 1.3538, + "step": 433 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003614345889441346, + "loss": 1.1795, + "step": 434 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003608598274214592, + "loss": 1.2951, + "step": 435 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003602843355826928, + "loss": 1.3016, + "step": 436 + }, + { + "epoch": 0.36, + "learning_rate": 0.00035970811721902803, + "loss": 1.1018, + "step": 437 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003591311761264433, + "loss": 1.1931, + "step": 438 + }, + { + "epoch": 0.36, + "learning_rate": 0.00035855351610567876, + "loss": 1.3397, + "step": 439 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003579751409622103, + "loss": 1.3428, + "step": 440 + }, + { + "epoch": 0.36, + "learning_rate": 0.00035739605450622473, + "loss": 1.251, + "step": 441 + }, + { + "epoch": 0.36, + "learning_rate": 0.00035681626055259527, + "loss": 1.2292, + "step": 442 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003562357629208555, + "loss": 1.2881, + "step": 443 + }, + { + "epoch": 0.36, + "learning_rate": 0.00035565456543517487, + "loss": 1.2088, + "step": 444 + }, + { + "epoch": 0.36, + "learning_rate": 0.00035507267192433334, + "loss": 1.1621, + "step": 445 + }, + { + "epoch": 0.36, + "learning_rate": 0.00035449008622169586, + "loss": 1.302, + "step": 446 + }, + { + "epoch": 0.36, + "learning_rate": 0.00035390681216518766, + "loss": 1.1498, + "step": 447 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003533228535972685, + "loss": 1.1941, + "step": 448 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003527382143649075, + "loss": 1.1975, + "step": 449 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003521528983195579, + "loss": 1.1581, + "step": 450 + }, + { + "epoch": 0.37, + "learning_rate": 0.00035156690931713167, + "loss": 1.1009, + "step": 451 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003509802512179737, + "loss": 1.2261, + "step": 452 + }, + { + "epoch": 0.37, + "learning_rate": 0.00035039292788683734, + "loss": 1.2266, + "step": 453 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003498049431928577, + "loss": 1.1586, + "step": 454 + }, + { + "epoch": 0.37, + "learning_rate": 0.00034921630100952716, + "loss": 1.2721, + "step": 455 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003486270052146694, + "loss": 1.2289, + "step": 456 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003480370596904137, + "loss": 1.199, + "step": 457 + }, + { + "epoch": 0.37, + "learning_rate": 0.00034744646832316986, + "loss": 1.2142, + "step": 458 + }, + { + "epoch": 0.37, + "learning_rate": 0.00034685523500360227, + "loss": 1.1852, + "step": 459 + }, + { + "epoch": 0.38, + "learning_rate": 0.00034626336362660417, + "loss": 1.2193, + "step": 460 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003456708580912725, + "loss": 1.3826, + "step": 461 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003450777223008815, + "loss": 1.3648, + "step": 462 + }, + { + "epoch": 0.38, + "learning_rate": 0.00034448396016285745, + "loss": 1.2778, + "step": 463 + }, + { + "epoch": 0.38, + "learning_rate": 0.00034388957558875315, + "loss": 1.2513, + "step": 464 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003432945724942215, + "loss": 1.233, + "step": 465 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003426989547989902, + "loss": 1.191, + "step": 466 + }, + { + "epoch": 0.38, + "learning_rate": 0.00034210272642683584, + "loss": 1.194, + "step": 467 + }, + { + "epoch": 0.38, + "learning_rate": 0.00034150589130555777, + "loss": 1.2335, + "step": 468 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003409084533669526, + "loss": 1.2887, + "step": 469 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003403104165467883, + "loss": 1.2632, + "step": 470 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003397117847847777, + "loss": 1.1329, + "step": 471 + }, + { + "epoch": 0.39, + "learning_rate": 0.00033911256202455355, + "loss": 1.1585, + "step": 472 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003385127522136413, + "loss": 1.377, + "step": 473 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003385127522136413, + "loss": 1.2704, + "step": 474 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003379123593034342, + "loss": 1.2157, + "step": 475 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003373113872491666, + "loss": 1.2399, + "step": 476 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003367098400098881, + "loss": 1.3061, + "step": 477 + }, + { + "epoch": 0.39, + "learning_rate": 0.00033610772154843774, + "loss": 1.2058, + "step": 478 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003355050358314172, + "loss": 1.3083, + "step": 479 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003349017868291653, + "loss": 1.3221, + "step": 480 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003342979785157318, + "loss": 1.2649, + "step": 481 + }, + { + "epoch": 0.39, + "learning_rate": 0.00033369361486885085, + "loss": 1.3248, + "step": 482 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003330886998699149, + "loss": 1.1479, + "step": 483 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003324832375039489, + "loss": 1.319, + "step": 484 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033187723175958343, + "loss": 1.2363, + "step": 485 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003312706866290288, + "loss": 1.2657, + "step": 486 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033066360610804876, + "loss": 1.2452, + "step": 487 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033005599419593385, + "loss": 1.1893, + "step": 488 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003294478548954754, + "loss": 1.1913, + "step": 489 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003288391922129392, + "loss": 1.0939, + "step": 490 + }, + { + "epoch": 0.4, + "learning_rate": 0.00032823001015803863, + "loss": 1.1156, + "step": 491 + }, + { + "epoch": 0.4, + "learning_rate": 0.00032762031274390877, + "loss": 1.1812, + "step": 492 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003270101039870797, + "loss": 1.3445, + "step": 493 + }, + { + "epoch": 0.4, + "learning_rate": 0.00032639938790745014, + "loss": 1.2255, + "step": 494 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003257881685282609, + "loss": 1.2436, + "step": 495 + }, + { + "epoch": 0.4, + "learning_rate": 0.00032517644987606825, + "loss": 1.2174, + "step": 496 + }, + { + "epoch": 0.41, + "learning_rate": 0.00032456423598071783, + "loss": 1.4091, + "step": 497 + }, + { + "epoch": 0.41, + "learning_rate": 0.00032395153087531767, + "loss": 1.1183, + "step": 498 + }, + { + "epoch": 0.41, + "learning_rate": 0.00032333833859621156, + "loss": 1.292, + "step": 499 + }, + { + "epoch": 0.41, + "learning_rate": 0.00032272466318295303, + "loss": 1.1791, + "step": 500 + }, + { + "epoch": 0.41, + "learning_rate": 0.00032211050867827807, + "loss": 1.2849, + "step": 501 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003214958791280791, + "loss": 1.1812, + "step": 502 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003208807785813777, + "loss": 1.0762, + "step": 503 + }, + { + "epoch": 0.41, + "learning_rate": 0.00032026521109029853, + "loss": 1.1849, + "step": 504 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003196491807100422, + "loss": 1.114, + "step": 505 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003190326914988587, + "loss": 1.1195, + "step": 506 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003184157475180208, + "loss": 1.3137, + "step": 507 + }, + { + "epoch": 0.41, + "learning_rate": 0.000317798352831797, + "loss": 1.2317, + "step": 508 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003171805115074251, + "loss": 1.2292, + "step": 509 + }, + { + "epoch": 0.42, + "learning_rate": 0.00031656222761508525, + "loss": 1.1485, + "step": 510 + }, + { + "epoch": 0.42, + "learning_rate": 0.00031594350522787295, + "loss": 1.1674, + "step": 511 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003153243484217725, + "loss": 1.2141, + "step": 512 + }, + { + "epoch": 0.42, + "learning_rate": 0.00031470476127563017, + "loss": 1.1566, + "step": 513 + }, + { + "epoch": 0.42, + "learning_rate": 0.00031408474787112696, + "loss": 1.2299, + "step": 514 + }, + { + "epoch": 0.42, + "learning_rate": 0.00031346431229275194, + "loss": 1.2658, + "step": 515 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003128434586277757, + "loss": 1.2442, + "step": 516 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003122221909662227, + "loss": 1.2373, + "step": 517 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003116005134008447, + "loss": 1.2988, + "step": 518 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003109784300270943, + "loss": 1.0625, + "step": 519 + }, + { + "epoch": 0.42, + "learning_rate": 0.00031035594494309687, + "loss": 1.2094, + "step": 520 + }, + { + "epoch": 0.43, + "learning_rate": 0.00030973306224962437, + "loss": 1.1542, + "step": 521 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003091097860500683, + "loss": 1.1996, + "step": 522 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003084861204504122, + "loss": 1.1386, + "step": 523 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003078620695592048, + "loss": 1.2001, + "step": 524 + }, + { + "epoch": 0.43, + "learning_rate": 0.00030723763748753355, + "loss": 1.2714, + "step": 525 + }, + { + "epoch": 0.43, + "learning_rate": 0.00030661282834899644, + "loss": 1.1684, + "step": 526 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003059876462596758, + "loss": 1.1974, + "step": 527 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003053620953381107, + "loss": 1.1926, + "step": 528 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003047361797052702, + "loss": 1.2026, + "step": 529 + }, + { + "epoch": 0.43, + "learning_rate": 0.00030410990348452574, + "loss": 1.3174, + "step": 530 + }, + { + "epoch": 0.43, + "learning_rate": 0.00030348327080162435, + "loss": 1.336, + "step": 531 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003028562857846614, + "loss": 1.1656, + "step": 532 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003022289525640531, + "loss": 1.1433, + "step": 533 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003016012752725099, + "loss": 1.1748, + "step": 534 + }, + { + "epoch": 0.44, + "learning_rate": 0.00030097325804500865, + "loss": 1.2172, + "step": 535 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003003449050187656, + "loss": 1.2286, + "step": 536 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029971622033320914, + "loss": 1.289, + "step": 537 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029908720812995285, + "loss": 1.2428, + "step": 538 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002984578725527675, + "loss": 1.192, + "step": 539 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002978282177475545, + "loss": 1.2345, + "step": 540 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029719824786231795, + "loss": 1.2082, + "step": 541 + }, + { + "epoch": 0.44, + "learning_rate": 0.000296567967047138, + "loss": 1.1865, + "step": 542 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002959373794541426, + "loss": 1.242, + "step": 543 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002953064892374812, + "loss": 1.1575, + "step": 544 + }, + { + "epoch": 0.44, + "learning_rate": 0.00029467530055329655, + "loss": 1.2655, + "step": 545 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002940438175596978, + "loss": 1.3675, + "step": 546 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029341204441673266, + "loss": 1.0566, + "step": 547 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002927799852863606, + "loss": 1.3543, + "step": 548 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002921476443324248, + "loss": 1.173, + "step": 549 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029151502572062534, + "loss": 1.2981, + "step": 550 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029088213361849123, + "loss": 1.2544, + "step": 551 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029024897219535325, + "loss": 1.1469, + "step": 552 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002896155456223163, + "loss": 1.3687, + "step": 553 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002889818580722319, + "loss": 1.1021, + "step": 554 + }, + { + "epoch": 0.45, + "learning_rate": 0.00028834791371967137, + "loss": 1.1219, + "step": 555 + }, + { + "epoch": 0.45, + "learning_rate": 0.00028771371674089714, + "loss": 1.149, + "step": 556 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002870792713138362, + "loss": 1.2034, + "step": 557 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002864445816180521, + "loss": 1.2111, + "step": 558 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002858096518347179, + "loss": 1.2846, + "step": 559 + }, + { + "epoch": 0.46, + "learning_rate": 0.00028517448614658785, + "loss": 1.1094, + "step": 560 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002845390887379706, + "loss": 1.3161, + "step": 561 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002839034637947011, + "loss": 1.0742, + "step": 562 + }, + { + "epoch": 0.46, + "learning_rate": 0.00028326761550411347, + "loss": 1.252, + "step": 563 + }, + { + "epoch": 0.46, + "learning_rate": 0.000282631548055013, + "loss": 1.1724, + "step": 564 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002819952656376487, + "loss": 1.1005, + "step": 565 + }, + { + "epoch": 0.46, + "learning_rate": 0.00028135877244368594, + "loss": 1.2622, + "step": 566 + }, + { + "epoch": 0.46, + "learning_rate": 0.00028072207266617854, + "loss": 1.3489, + "step": 567 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002800851704995412, + "loss": 1.2933, + "step": 568 + }, + { + "epoch": 0.46, + "learning_rate": 0.00027944807013952197, + "loss": 1.2395, + "step": 569 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002788107757831744, + "loss": 1.1678, + "step": 570 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002781732916288303, + "loss": 1.2043, + "step": 571 + }, + { + "epoch": 0.47, + "learning_rate": 0.00027753562187607156, + "loss": 1.0324, + "step": 572 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002768977707257029, + "loss": 1.2013, + "step": 573 + }, + { + "epoch": 0.47, + "learning_rate": 0.00027625974237972394, + "loss": 1.1292, + "step": 574 + }, + { + "epoch": 0.47, + "learning_rate": 0.00027562154104130175, + "loss": 1.1604, + "step": 575 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002749831709147429, + "loss": 1.2818, + "step": 576 + }, + { + "epoch": 0.47, + "learning_rate": 0.00027434463620546596, + "loss": 1.1859, + "step": 577 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002737059411199738, + "loss": 1.21, + "step": 578 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002730670898658255, + "loss": 1.0954, + "step": 579 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002724280866516094, + "loss": 1.1393, + "step": 580 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002717889356869146, + "loss": 1.2647, + "step": 581 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027114964118230353, + "loss": 1.1508, + "step": 582 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027051020734928445, + "loss": 1.2546, + "step": 583 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026987063840028323, + "loss": 1.3218, + "step": 584 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026923093854861596, + "loss": 1.3143, + "step": 585 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002685911120084612, + "loss": 1.4008, + "step": 586 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026795116299483193, + "loss": 1.1986, + "step": 587 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026731109572354796, + "loss": 1.2502, + "step": 588 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026667091441120815, + "loss": 1.1647, + "step": 589 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002660306232751629, + "loss": 1.0875, + "step": 590 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026539022653348577, + "loss": 1.0774, + "step": 591 + }, + { + "epoch": 0.48, + "learning_rate": 0.000264749728404946, + "loss": 1.0957, + "step": 592 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002641091331089811, + "loss": 1.2185, + "step": 593 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026346844486566833, + "loss": 1.0557, + "step": 594 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002628276678956974, + "loss": 1.3422, + "step": 595 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026218680642034264, + "loss": 1.17, + "step": 596 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002615458646614349, + "loss": 1.1873, + "step": 597 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026090484684133404, + "loss": 1.2195, + "step": 598 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026026375718290085, + "loss": 1.1257, + "step": 599 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002596225999094696, + "loss": 1.0427, + "step": 600 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002589813792448196, + "loss": 1.33, + "step": 601 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025834009941314836, + "loss": 1.1926, + "step": 602 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002576987646390426, + "loss": 1.2288, + "step": 603 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002570573791474513, + "loss": 1.1561, + "step": 604 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002564159471636575, + "loss": 1.164, + "step": 605 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002557744729132503, + "loss": 1.2139, + "step": 606 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025513296062209764, + "loss": 1.268, + "step": 607 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002544914145163177, + "loss": 1.2438, + "step": 608 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025384983882225173, + "loss": 1.077, + "step": 609 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025320823776643566, + "loss": 1.3652, + "step": 610 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025256661557557247, + "loss": 1.1944, + "step": 611 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002519249764765047, + "loss": 1.1849, + "step": 612 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025128332469618594, + "loss": 1.1933, + "step": 613 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002506416644616533, + "loss": 1.2885, + "step": 614 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025, + "loss": 1.1539, + "step": 615 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002493583355383467, + "loss": 1.0778, + "step": 616 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002487166753038141, + "loss": 1.1326, + "step": 617 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002480750235234954, + "loss": 1.2101, + "step": 618 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024743338442442754, + "loss": 1.2191, + "step": 619 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002467917622335644, + "loss": 1.323, + "step": 620 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024615016117774833, + "loss": 1.1518, + "step": 621 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024550858548368234, + "loss": 1.2836, + "step": 622 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002448670393779024, + "loss": 1.176, + "step": 623 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002442255270867498, + "loss": 1.2703, + "step": 624 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024358405283634265, + "loss": 1.2077, + "step": 625 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024294262085254877, + "loss": 1.0884, + "step": 626 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024230123536095747, + "loss": 1.0855, + "step": 627 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024165990058685173, + "loss": 1.1083, + "step": 628 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002410186207551804, + "loss": 1.102, + "step": 629 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024037740009053054, + "loss": 1.1881, + "step": 630 + }, + { + "epoch": 0.52, + "learning_rate": 0.00023973624281709924, + "loss": 1.1866, + "step": 631 + }, + { + "epoch": 0.52, + "learning_rate": 0.00023909515315866605, + "loss": 1.1569, + "step": 632 + }, + { + "epoch": 0.52, + "learning_rate": 0.00023845413533856517, + "loss": 1.2431, + "step": 633 + }, + { + "epoch": 0.52, + "learning_rate": 0.00023781319357965745, + "loss": 1.206, + "step": 634 + }, + { + "epoch": 0.52, + "learning_rate": 0.00023717233210430257, + "loss": 1.1618, + "step": 635 + }, + { + "epoch": 0.52, + "learning_rate": 0.00023653155513433174, + "loss": 1.1611, + "step": 636 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002358908668910189, + "loss": 1.1733, + "step": 637 + }, + { + "epoch": 0.52, + "learning_rate": 0.00023525027159505398, + "loss": 1.1068, + "step": 638 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002346097734665143, + "loss": 1.1288, + "step": 639 + }, + { + "epoch": 0.52, + "learning_rate": 0.00023396937672483709, + "loss": 1.2002, + "step": 640 + }, + { + "epoch": 0.52, + "learning_rate": 0.00023332908558879178, + "loss": 1.1308, + "step": 641 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002326889042764521, + "loss": 1.1557, + "step": 642 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002320488370051681, + "loss": 1.0789, + "step": 643 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002314088879915388, + "loss": 1.1547, + "step": 644 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023076906145138407, + "loss": 1.1554, + "step": 645 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002301293615997168, + "loss": 1.3557, + "step": 646 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002294897926507156, + "loss": 1.305, + "step": 647 + }, + { + "epoch": 0.53, + "learning_rate": 0.00022885035881769654, + "loss": 1.1036, + "step": 648 + }, + { + "epoch": 0.53, + "learning_rate": 0.00022821106431308543, + "loss": 1.1285, + "step": 649 + }, + { + "epoch": 0.53, + "learning_rate": 0.00022757191334839063, + "loss": 1.0992, + "step": 650 + }, + { + "epoch": 0.53, + "learning_rate": 0.00022693291013417452, + "loss": 1.3331, + "step": 651 + }, + { + "epoch": 0.53, + "learning_rate": 0.00022629405888002627, + "loss": 1.249, + "step": 652 + }, + { + "epoch": 0.53, + "learning_rate": 0.00022565536379453406, + "loss": 1.1711, + "step": 653 + }, + { + "epoch": 0.53, + "learning_rate": 0.00022501682908525716, + "loss": 1.2036, + "step": 654 + }, + { + "epoch": 0.53, + "learning_rate": 0.00022437845895869826, + "loss": 1.221, + "step": 655 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002237402576202761, + "loss": 1.1568, + "step": 656 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002231022292742972, + "loss": 1.1505, + "step": 657 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022246437812392848, + "loss": 1.2478, + "step": 658 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022182670837116975, + "loss": 1.2124, + "step": 659 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022118922421682563, + "loss": 1.1446, + "step": 660 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022055192986047807, + "loss": 1.2629, + "step": 661 + }, + { + "epoch": 0.54, + "learning_rate": 0.00021991482950045884, + "loss": 1.2253, + "step": 662 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002192779273338215, + "loss": 1.1678, + "step": 663 + }, + { + "epoch": 0.54, + "learning_rate": 0.00021864122755631415, + "loss": 1.1142, + "step": 664 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002180047343623514, + "loss": 1.1841, + "step": 665 + }, + { + "epoch": 0.54, + "learning_rate": 0.00021736845194498717, + "loss": 1.0987, + "step": 666 + }, + { + "epoch": 0.54, + "learning_rate": 0.00021673238449588668, + "loss": 1.2257, + "step": 667 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021609653620529895, + "loss": 1.247, + "step": 668 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002154609112620295, + "loss": 1.1234, + "step": 669 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021482551385341224, + "loss": 1.1887, + "step": 670 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021419034816528218, + "loss": 1.1832, + "step": 671 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021355541838194794, + "loss": 1.0937, + "step": 672 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021292072868616383, + "loss": 1.1232, + "step": 673 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002122862832591029, + "loss": 1.1895, + "step": 674 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002116520862803286, + "loss": 1.136, + "step": 675 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021101814192776803, + "loss": 1.0739, + "step": 676 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021038445437768376, + "loss": 1.2399, + "step": 677 + }, + { + "epoch": 0.55, + "learning_rate": 0.00020975102780464676, + "loss": 1.2148, + "step": 678 + }, + { + "epoch": 0.55, + "learning_rate": 0.00020911786638150873, + "loss": 1.1559, + "step": 679 + }, + { + "epoch": 0.56, + "learning_rate": 0.00020848497427937464, + "loss": 1.0613, + "step": 680 + }, + { + "epoch": 0.56, + "learning_rate": 0.00020785235566757519, + "loss": 1.0899, + "step": 681 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002072200147136395, + "loss": 1.2101, + "step": 682 + }, + { + "epoch": 0.56, + "learning_rate": 0.00020658795558326743, + "loss": 1.1734, + "step": 683 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002059561824403023, + "loss": 1.2021, + "step": 684 + }, + { + "epoch": 0.56, + "learning_rate": 0.00020532469944670347, + "loss": 1.2547, + "step": 685 + }, + { + "epoch": 0.56, + "learning_rate": 0.00020469351076251885, + "loss": 1.1917, + "step": 686 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002040626205458574, + "loss": 1.1897, + "step": 687 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002034320329528621, + "loss": 1.0289, + "step": 688 + }, + { + "epoch": 0.56, + "learning_rate": 0.00020280175213768206, + "loss": 1.0023, + "step": 689 + }, + { + "epoch": 0.56, + "learning_rate": 0.00020217178225244553, + "loss": 1.2928, + "step": 690 + }, + { + "epoch": 0.56, + "learning_rate": 0.00020154212744723248, + "loss": 1.2823, + "step": 691 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002009127918700472, + "loss": 1.0725, + "step": 692 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002002837796667909, + "loss": 1.2401, + "step": 693 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019965509498123446, + "loss": 1.1985, + "step": 694 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019902674195499144, + "loss": 1.2224, + "step": 695 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019839872472749012, + "loss": 1.2232, + "step": 696 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019777104743594687, + "loss": 1.1954, + "step": 697 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019714371421533866, + "loss": 1.1161, + "step": 698 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019651672919837571, + "loss": 1.08, + "step": 699 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001958900965154743, + "loss": 1.3326, + "step": 700 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001952638202947299, + "loss": 1.2142, + "step": 701 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019463790466188937, + "loss": 1.1674, + "step": 702 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019401235374032428, + "loss": 1.0752, + "step": 703 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019338717165100363, + "loss": 1.2541, + "step": 704 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019276236251246655, + "loss": 1.1298, + "step": 705 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001921379304407952, + "loss": 1.2117, + "step": 706 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019151387954958793, + "loss": 1.2544, + "step": 707 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019089021394993178, + "loss": 1.2411, + "step": 708 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001902669377503756, + "loss": 1.1707, + "step": 709 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018964405505690323, + "loss": 1.1408, + "step": 710 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001890215699729057, + "loss": 1.187, + "step": 711 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018839948659915524, + "loss": 1.0687, + "step": 712 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018777780903377734, + "loss": 1.1897, + "step": 713 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018715654137222432, + "loss": 1.0239, + "step": 714 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018653568770724804, + "loss": 1.2316, + "step": 715 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018591525212887302, + "loss": 1.2184, + "step": 716 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001852952387243698, + "loss": 1.1243, + "step": 717 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018467565157822744, + "loss": 1.0837, + "step": 718 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018405649477212698, + "loss": 1.1351, + "step": 719 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018343777238491476, + "loss": 1.1836, + "step": 720 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001828194884925749, + "loss": 1.2087, + "step": 721 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001822016471682031, + "loss": 1.1975, + "step": 722 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001815842524819793, + "loss": 1.0393, + "step": 723 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018096730850114136, + "loss": 1.2075, + "step": 724 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001803508192899579, + "loss": 1.1717, + "step": 725 + }, + { + "epoch": 0.59, + "learning_rate": 0.00017973478890970145, + "loss": 1.2399, + "step": 726 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001791192214186223, + "loss": 1.1503, + "step": 727 + }, + { + "epoch": 0.59, + "learning_rate": 0.00017850412087192096, + "loss": 1.035, + "step": 728 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017788949132172194, + "loss": 1.2183, + "step": 729 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017727533681704706, + "loss": 1.129, + "step": 730 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017666166140378853, + "loss": 1.1666, + "step": 731 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017604846912468243, + "loss": 1.1265, + "step": 732 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017543576401928218, + "loss": 1.1714, + "step": 733 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017482355012393176, + "loss": 1.2929, + "step": 734 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017421183147173913, + "loss": 1.2391, + "step": 735 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001736006120925499, + "loss": 1.1351, + "step": 736 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017298989601292037, + "loss": 1.2163, + "step": 737 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017237968725609127, + "loss": 1.1982, + "step": 738 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017176998984196146, + "loss": 1.1178, + "step": 739 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017116080778706088, + "loss": 1.0615, + "step": 740 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001705521451045246, + "loss": 1.3243, + "step": 741 + }, + { + "epoch": 0.61, + "learning_rate": 0.00016994400580406624, + "loss": 1.1612, + "step": 742 + }, + { + "epoch": 0.61, + "learning_rate": 0.00016933639389195136, + "loss": 1.1716, + "step": 743 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001687293133709712, + "loss": 1.0288, + "step": 744 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001681227682404166, + "loss": 1.0895, + "step": 745 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001675167624960512, + "loss": 1.087, + "step": 746 + }, + { + "epoch": 0.61, + "learning_rate": 0.00016691130013008512, + "loss": 1.2198, + "step": 747 + }, + { + "epoch": 0.61, + "learning_rate": 0.00016630638513114927, + "loss": 1.2312, + "step": 748 + }, + { + "epoch": 0.61, + "learning_rate": 0.00016570202148426816, + "loss": 1.0933, + "step": 749 + }, + { + "epoch": 0.61, + "learning_rate": 0.00016509821317083466, + "loss": 1.2823, + "step": 750 + }, + { + "epoch": 0.61, + "learning_rate": 0.00016449496416858284, + "loss": 1.1326, + "step": 751 + }, + { + "epoch": 0.61, + "learning_rate": 0.00016389227845156225, + "loss": 1.2714, + "step": 752 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001632901599901118, + "loss": 1.0857, + "step": 753 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016268861275083342, + "loss": 1.0933, + "step": 754 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016208764069656578, + "loss": 1.0641, + "step": 755 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001614872477863587, + "loss": 1.1772, + "step": 756 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016088743797544654, + "loss": 1.2117, + "step": 757 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016028821521522224, + "loss": 1.2231, + "step": 758 + }, + { + "epoch": 0.62, + "learning_rate": 0.00015968958345321176, + "loss": 1.2319, + "step": 759 + }, + { + "epoch": 0.62, + "learning_rate": 0.00015909154663304743, + "loss": 1.2054, + "step": 760 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001584941086944423, + "loss": 1.1036, + "step": 761 + }, + { + "epoch": 0.62, + "learning_rate": 0.00015789727357316425, + "loss": 1.263, + "step": 762 + }, + { + "epoch": 0.62, + "learning_rate": 0.00015730104520100984, + "loss": 1.1646, + "step": 763 + }, + { + "epoch": 0.62, + "learning_rate": 0.00015670542750577848, + "loss": 1.1066, + "step": 764 + }, + { + "epoch": 0.62, + "learning_rate": 0.00015611042441124686, + "loss": 1.2196, + "step": 765 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015551603983714258, + "loss": 1.191, + "step": 766 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001549222776991186, + "loss": 1.2003, + "step": 767 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015432914190872756, + "loss": 1.1709, + "step": 768 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015373663637339587, + "loss": 1.2384, + "step": 769 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001531447649963978, + "loss": 1.2677, + "step": 770 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015255353167683018, + "loss": 1.1179, + "step": 771 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015196294030958639, + "loss": 1.1405, + "step": 772 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015137299478533065, + "loss": 1.1502, + "step": 773 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015078369899047285, + "loss": 1.2262, + "step": 774 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015019505680714232, + "loss": 1.111, + "step": 775 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001496070721131627, + "loss": 1.1294, + "step": 776 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014901974878202628, + "loss": 1.1955, + "step": 777 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014843309068286847, + "loss": 1.0532, + "step": 778 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014784710168044214, + "loss": 1.1781, + "step": 779 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014726178563509258, + "loss": 1.1577, + "step": 780 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001466771464027316, + "loss": 1.1312, + "step": 781 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014609318783481237, + "loss": 1.1241, + "step": 782 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014550991377830424, + "loss": 1.0212, + "step": 783 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014492732807566672, + "loss": 1.0744, + "step": 784 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001443454345648252, + "loss": 1.2294, + "step": 785 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001437642370791446, + "loss": 1.1392, + "step": 786 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014318373944740485, + "loss": 1.3109, + "step": 787 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014260394549377525, + "loss": 1.1492, + "step": 788 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014202485903778978, + "loss": 1.1972, + "step": 789 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014144648389432125, + "loss": 1.2617, + "step": 790 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014086882387355658, + "loss": 1.0715, + "step": 791 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014029188278097198, + "loss": 1.2241, + "step": 792 + }, + { + "epoch": 0.65, + "learning_rate": 0.00013971566441730714, + "loss": 1.2166, + "step": 793 + }, + { + "epoch": 0.65, + "learning_rate": 0.00013914017257854079, + "loss": 1.2289, + "step": 794 + }, + { + "epoch": 0.65, + "learning_rate": 0.00013856541105586545, + "loss": 1.1733, + "step": 795 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001379913836356625, + "loss": 1.0924, + "step": 796 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001374180940994773, + "loss": 1.357, + "step": 797 + }, + { + "epoch": 0.65, + "learning_rate": 0.00013684554622399404, + "loss": 1.1423, + "step": 798 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001362737437810114, + "loss": 1.1007, + "step": 799 + }, + { + "epoch": 0.65, + "learning_rate": 0.000135702690537417, + "loss": 1.207, + "step": 800 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001351323902551631, + "loss": 1.0954, + "step": 801 + }, + { + "epoch": 0.65, + "learning_rate": 0.00013456284669124158, + "loss": 1.1157, + "step": 802 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001339940635976592, + "loss": 1.0707, + "step": 803 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013342604472141295, + "loss": 1.1977, + "step": 804 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013285879380446565, + "loss": 1.1694, + "step": 805 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001322923145837206, + "loss": 1.1187, + "step": 806 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001317266107909975, + "loss": 1.1737, + "step": 807 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013116168615300784, + "loss": 1.111, + "step": 808 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013059754439133004, + "loss": 1.1211, + "step": 809 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001300341892223852, + "loss": 1.1414, + "step": 810 + }, + { + "epoch": 0.66, + "learning_rate": 0.00012947162435741278, + "loss": 1.1291, + "step": 811 + }, + { + "epoch": 0.66, + "learning_rate": 0.00012890985350244563, + "loss": 1.1705, + "step": 812 + }, + { + "epoch": 0.66, + "learning_rate": 0.00012834888035828597, + "loss": 1.1175, + "step": 813 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001277887086204809, + "loss": 1.198, + "step": 814 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012722934197929804, + "loss": 1.0646, + "step": 815 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012667078411970117, + "loss": 1.2132, + "step": 816 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012611303872132632, + "loss": 1.022, + "step": 817 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012555610945845692, + "loss": 1.0786, + "step": 818 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012500000000000006, + "loss": 1.0347, + "step": 819 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012444471400946207, + "loss": 1.1083, + "step": 820 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001238902551449246, + "loss": 1.127, + "step": 821 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012333662705902017, + "loss": 1.254, + "step": 822 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001227838333989088, + "loss": 1.0407, + "step": 823 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001222318778062531, + "loss": 1.0086, + "step": 824 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012168076391719491, + "loss": 1.1618, + "step": 825 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012113049536233092, + "loss": 1.1283, + "step": 826 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001205810757666894, + "loss": 1.119, + "step": 827 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012003250874970548, + "loss": 1.193, + "step": 828 + }, + { + "epoch": 0.68, + "learning_rate": 0.00011948479792519793, + "loss": 1.1842, + "step": 829 + }, + { + "epoch": 0.68, + "learning_rate": 0.00011893794690134499, + "loss": 1.1829, + "step": 830 + }, + { + "epoch": 0.68, + "learning_rate": 0.00011839195928066102, + "loss": 1.0626, + "step": 831 + }, + { + "epoch": 0.68, + "learning_rate": 0.00011784683865997229, + "loss": 1.1399, + "step": 832 + }, + { + "epoch": 0.68, + "learning_rate": 0.00011730258863039347, + "loss": 1.2088, + "step": 833 + }, + { + "epoch": 0.68, + "learning_rate": 0.00011675921277730414, + "loss": 1.3295, + "step": 834 + }, + { + "epoch": 0.68, + "learning_rate": 0.00011621671468032493, + "loss": 1.0416, + "step": 835 + }, + { + "epoch": 0.68, + "learning_rate": 0.000115675097913294, + "loss": 1.287, + "step": 836 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001151343660442438, + "loss": 1.2913, + "step": 837 + }, + { + "epoch": 0.68, + "learning_rate": 0.00011459452263537695, + "loss": 1.1021, + "step": 838 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011405557124304336, + "loss": 1.1548, + "step": 839 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011351751541771643, + "loss": 1.1196, + "step": 840 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011298035870396985, + "loss": 1.2203, + "step": 841 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011244410464045412, + "loss": 1.1008, + "step": 842 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011190875675987356, + "loss": 1.1293, + "step": 843 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011137431858896247, + "loss": 1.2009, + "step": 844 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011084079364846242, + "loss": 1.0981, + "step": 845 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011030818545309879, + "loss": 1.1827, + "step": 846 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001097764975115576, + "loss": 1.3151, + "step": 847 + }, + { + "epoch": 0.69, + "learning_rate": 0.00010924573332646257, + "loss": 1.2196, + "step": 848 + }, + { + "epoch": 0.69, + "learning_rate": 0.00010871589639435203, + "loss": 1.2505, + "step": 849 + }, + { + "epoch": 0.69, + "learning_rate": 0.00010818699020565564, + "loss": 1.272, + "step": 850 + }, + { + "epoch": 0.69, + "learning_rate": 0.00010765901824467166, + "loss": 1.1925, + "step": 851 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010713198398954382, + "loss": 1.0792, + "step": 852 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010660589091223854, + "loss": 1.3603, + "step": 853 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010608074247852193, + "loss": 1.0601, + "step": 854 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010555654214793722, + "loss": 1.272, + "step": 855 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010503329337378164, + "loss": 1.1748, + "step": 856 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010451099960308375, + "loss": 1.2163, + "step": 857 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010398966427658091, + "loss": 1.2352, + "step": 858 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001034692908286964, + "loss": 1.0979, + "step": 859 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010294988268751693, + "loss": 1.1728, + "step": 860 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010243144327477013, + "loss": 1.1381, + "step": 861 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010191397600580177, + "loss": 1.1454, + "step": 862 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010139748428955334, + "loss": 1.0453, + "step": 863 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001008819715285397, + "loss": 1.0805, + "step": 864 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010036744111882672, + "loss": 1.2874, + "step": 865 + }, + { + "epoch": 0.71, + "learning_rate": 9.985389645000865e-05, + "loss": 1.2998, + "step": 866 + }, + { + "epoch": 0.71, + "learning_rate": 9.934134090518593e-05, + "loss": 1.1244, + "step": 867 + }, + { + "epoch": 0.71, + "learning_rate": 9.882977786094288e-05, + "loss": 1.18, + "step": 868 + }, + { + "epoch": 0.71, + "learning_rate": 9.831921068732572e-05, + "loss": 0.981, + "step": 869 + }, + { + "epoch": 0.71, + "learning_rate": 9.780964274781984e-05, + "loss": 1.2201, + "step": 870 + }, + { + "epoch": 0.71, + "learning_rate": 9.730107739932806e-05, + "loss": 1.2101, + "step": 871 + }, + { + "epoch": 0.71, + "learning_rate": 9.679351799214837e-05, + "loss": 1.0001, + "step": 872 + }, + { + "epoch": 0.71, + "learning_rate": 9.628696786995189e-05, + "loss": 1.1488, + "step": 873 + }, + { + "epoch": 0.71, + "learning_rate": 9.578143036976073e-05, + "loss": 1.0487, + "step": 874 + }, + { + "epoch": 0.71, + "learning_rate": 9.527690882192635e-05, + "loss": 1.1362, + "step": 875 + }, + { + "epoch": 0.72, + "learning_rate": 9.477340655010716e-05, + "loss": 1.2093, + "step": 876 + }, + { + "epoch": 0.72, + "learning_rate": 9.427092687124691e-05, + "loss": 1.2663, + "step": 877 + }, + { + "epoch": 0.72, + "learning_rate": 9.376947309555273e-05, + "loss": 1.1629, + "step": 878 + }, + { + "epoch": 0.72, + "learning_rate": 9.326904852647344e-05, + "loss": 1.0622, + "step": 879 + }, + { + "epoch": 0.72, + "learning_rate": 9.276965646067753e-05, + "loss": 1.2569, + "step": 880 + }, + { + "epoch": 0.72, + "learning_rate": 9.227130018803195e-05, + "loss": 1.1841, + "step": 881 + }, + { + "epoch": 0.72, + "learning_rate": 9.177398299157988e-05, + "loss": 1.2218, + "step": 882 + }, + { + "epoch": 0.72, + "learning_rate": 9.127770814751932e-05, + "loss": 1.2433, + "step": 883 + }, + { + "epoch": 0.72, + "learning_rate": 9.07824789251816e-05, + "loss": 1.1107, + "step": 884 + }, + { + "epoch": 0.72, + "learning_rate": 9.028829858700973e-05, + "loss": 1.2066, + "step": 885 + }, + { + "epoch": 0.72, + "learning_rate": 8.979517038853685e-05, + "loss": 1.1586, + "step": 886 + }, + { + "epoch": 0.72, + "learning_rate": 8.930309757836516e-05, + "loss": 1.0483, + "step": 887 + }, + { + "epoch": 0.73, + "learning_rate": 8.881208339814394e-05, + "loss": 1.2277, + "step": 888 + }, + { + "epoch": 0.73, + "learning_rate": 8.832213108254864e-05, + "loss": 1.1566, + "step": 889 + }, + { + "epoch": 0.73, + "learning_rate": 8.783324385925931e-05, + "loss": 1.2038, + "step": 890 + }, + { + "epoch": 0.73, + "learning_rate": 8.734542494893955e-05, + "loss": 1.0776, + "step": 891 + }, + { + "epoch": 0.73, + "learning_rate": 8.685867756521501e-05, + "loss": 1.3272, + "step": 892 + }, + { + "epoch": 0.73, + "learning_rate": 8.637300491465272e-05, + "loss": 1.1463, + "step": 893 + }, + { + "epoch": 0.73, + "learning_rate": 8.588841019673937e-05, + "loss": 1.2504, + "step": 894 + }, + { + "epoch": 0.73, + "learning_rate": 8.540489660386064e-05, + "loss": 1.1214, + "step": 895 + }, + { + "epoch": 0.73, + "learning_rate": 8.49224673212799e-05, + "loss": 1.1741, + "step": 896 + }, + { + "epoch": 0.73, + "learning_rate": 8.444112552711752e-05, + "loss": 1.0977, + "step": 897 + }, + { + "epoch": 0.73, + "learning_rate": 8.396087439232966e-05, + "loss": 0.9768, + "step": 898 + }, + { + "epoch": 0.73, + "learning_rate": 8.348171708068747e-05, + "loss": 1.1513, + "step": 899 + }, + { + "epoch": 0.73, + "learning_rate": 8.300365674875651e-05, + "loss": 1.1469, + "step": 900 + }, + { + "epoch": 0.74, + "learning_rate": 8.252669654587549e-05, + "loss": 1.2347, + "step": 901 + }, + { + "epoch": 0.74, + "learning_rate": 8.205083961413573e-05, + "loss": 1.1628, + "step": 902 + }, + { + "epoch": 0.74, + "learning_rate": 8.15760890883607e-05, + "loss": 1.156, + "step": 903 + }, + { + "epoch": 0.74, + "learning_rate": 8.110244809608495e-05, + "loss": 1.1286, + "step": 904 + }, + { + "epoch": 0.74, + "learning_rate": 8.062991975753378e-05, + "loss": 1.2565, + "step": 905 + }, + { + "epoch": 0.74, + "learning_rate": 8.015850718560256e-05, + "loss": 1.2304, + "step": 906 + }, + { + "epoch": 0.74, + "learning_rate": 7.968821348583643e-05, + "loss": 1.214, + "step": 907 + }, + { + "epoch": 0.74, + "learning_rate": 7.921904175640957e-05, + "loss": 1.1538, + "step": 908 + }, + { + "epoch": 0.74, + "learning_rate": 7.875099508810486e-05, + "loss": 1.2075, + "step": 909 + }, + { + "epoch": 0.74, + "learning_rate": 7.828407656429365e-05, + "loss": 1.1165, + "step": 910 + }, + { + "epoch": 0.74, + "learning_rate": 7.781828926091536e-05, + "loss": 1.1181, + "step": 911 + }, + { + "epoch": 0.74, + "learning_rate": 7.735363624645711e-05, + "loss": 1.192, + "step": 912 + }, + { + "epoch": 0.75, + "learning_rate": 7.689012058193385e-05, + "loss": 1.2208, + "step": 913 + }, + { + "epoch": 0.75, + "learning_rate": 7.642774532086775e-05, + "loss": 1.2446, + "step": 914 + }, + { + "epoch": 0.75, + "learning_rate": 7.596651350926836e-05, + "loss": 1.0554, + "step": 915 + }, + { + "epoch": 0.75, + "learning_rate": 7.550642818561249e-05, + "loss": 1.0828, + "step": 916 + }, + { + "epoch": 0.75, + "learning_rate": 7.504749238082414e-05, + "loss": 1.1136, + "step": 917 + }, + { + "epoch": 0.75, + "learning_rate": 7.458970911825458e-05, + "loss": 1.1758, + "step": 918 + }, + { + "epoch": 0.75, + "learning_rate": 7.413308141366254e-05, + "loss": 1.2148, + "step": 919 + }, + { + "epoch": 0.75, + "learning_rate": 7.367761227519415e-05, + "loss": 1.1484, + "step": 920 + }, + { + "epoch": 0.75, + "learning_rate": 7.322330470336314e-05, + "loss": 1.1624, + "step": 921 + }, + { + "epoch": 0.75, + "learning_rate": 7.27701616910312e-05, + "loss": 1.098, + "step": 922 + }, + { + "epoch": 0.75, + "learning_rate": 7.231818622338823e-05, + "loss": 1.0362, + "step": 923 + }, + { + "epoch": 0.75, + "learning_rate": 7.186738127793257e-05, + "loss": 1.2082, + "step": 924 + }, + { + "epoch": 0.76, + "learning_rate": 7.141774982445148e-05, + "loss": 1.0154, + "step": 925 + }, + { + "epoch": 0.76, + "learning_rate": 7.096929482500167e-05, + "loss": 1.1318, + "step": 926 + }, + { + "epoch": 0.76, + "learning_rate": 7.052201923388955e-05, + "loss": 1.0863, + "step": 927 + }, + { + "epoch": 0.76, + "learning_rate": 7.007592599765188e-05, + "loss": 1.1101, + "step": 928 + }, + { + "epoch": 0.76, + "learning_rate": 6.963101805503647e-05, + "loss": 1.0511, + "step": 929 + }, + { + "epoch": 0.76, + "learning_rate": 6.918729833698259e-05, + "loss": 1.0216, + "step": 930 + }, + { + "epoch": 0.76, + "learning_rate": 6.874476976660185e-05, + "loss": 1.1037, + "step": 931 + }, + { + "epoch": 0.76, + "learning_rate": 6.8303435259159e-05, + "loss": 1.2339, + "step": 932 + }, + { + "epoch": 0.76, + "learning_rate": 6.786329772205247e-05, + "loss": 1.0232, + "step": 933 + }, + { + "epoch": 0.76, + "learning_rate": 6.742436005479536e-05, + "loss": 1.1132, + "step": 934 + }, + { + "epoch": 0.76, + "learning_rate": 6.69866251489964e-05, + "loss": 1.2427, + "step": 935 + }, + { + "epoch": 0.76, + "learning_rate": 6.655009588834077e-05, + "loss": 1.071, + "step": 936 + }, + { + "epoch": 0.77, + "learning_rate": 6.611477514857114e-05, + "loss": 1.0914, + "step": 937 + }, + { + "epoch": 0.77, + "learning_rate": 6.5680665797469e-05, + "loss": 1.1928, + "step": 938 + }, + { + "epoch": 0.77, + "learning_rate": 6.524777069483526e-05, + "loss": 1.1989, + "step": 939 + }, + { + "epoch": 0.77, + "learning_rate": 6.481609269247171e-05, + "loss": 1.2801, + "step": 940 + }, + { + "epoch": 0.77, + "learning_rate": 6.438563463416223e-05, + "loss": 1.0442, + "step": 941 + }, + { + "epoch": 0.77, + "learning_rate": 6.395639935565412e-05, + "loss": 1.1779, + "step": 942 + }, + { + "epoch": 0.77, + "learning_rate": 6.35283896846392e-05, + "loss": 1.1318, + "step": 943 + }, + { + "epoch": 0.77, + "learning_rate": 6.31016084407352e-05, + "loss": 1.2038, + "step": 944 + }, + { + "epoch": 0.77, + "learning_rate": 6.267605843546767e-05, + "loss": 1.1461, + "step": 945 + }, + { + "epoch": 0.77, + "learning_rate": 6.225174247225066e-05, + "loss": 1.327, + "step": 946 + }, + { + "epoch": 0.77, + "learning_rate": 6.182866334636889e-05, + "loss": 1.1828, + "step": 947 + }, + { + "epoch": 0.77, + "learning_rate": 6.140682384495902e-05, + "loss": 0.9814, + "step": 948 + }, + { + "epoch": 0.77, + "learning_rate": 6.098622674699147e-05, + "loss": 1.0257, + "step": 949 + }, + { + "epoch": 0.78, + "learning_rate": 6.0566874823251935e-05, + "loss": 1.2595, + "step": 950 + }, + { + "epoch": 0.78, + "learning_rate": 6.01487708363232e-05, + "loss": 1.127, + "step": 951 + }, + { + "epoch": 0.78, + "learning_rate": 5.9731917540567174e-05, + "loss": 1.1989, + "step": 952 + }, + { + "epoch": 0.78, + "learning_rate": 5.9316317682106294e-05, + "loss": 1.1274, + "step": 953 + }, + { + "epoch": 0.78, + "learning_rate": 5.8901973998805814e-05, + "loss": 1.0719, + "step": 954 + }, + { + "epoch": 0.78, + "learning_rate": 5.848888922025553e-05, + "loss": 1.2299, + "step": 955 + }, + { + "epoch": 0.78, + "learning_rate": 5.807706606775196e-05, + "loss": 1.2503, + "step": 956 + }, + { + "epoch": 0.78, + "learning_rate": 5.766650725428027e-05, + "loss": 1.1301, + "step": 957 + }, + { + "epoch": 0.78, + "learning_rate": 5.725721548449669e-05, + "loss": 1.1631, + "step": 958 + }, + { + "epoch": 0.78, + "learning_rate": 5.684919345471029e-05, + "loss": 1.2023, + "step": 959 + }, + { + "epoch": 0.78, + "learning_rate": 5.6442443852865485e-05, + "loss": 1.1223, + "step": 960 + }, + { + "epoch": 0.78, + "learning_rate": 5.603696935852426e-05, + "loss": 1.0044, + "step": 961 + }, + { + "epoch": 0.79, + "learning_rate": 5.56327726428485e-05, + "loss": 1.2944, + "step": 962 + }, + { + "epoch": 0.79, + "learning_rate": 5.522985636858238e-05, + "loss": 1.0393, + "step": 963 + }, + { + "epoch": 0.79, + "learning_rate": 5.4828223190035055e-05, + "loss": 1.0775, + "step": 964 + }, + { + "epoch": 0.79, + "learning_rate": 5.442787575306274e-05, + "loss": 1.18, + "step": 965 + }, + { + "epoch": 0.79, + "learning_rate": 5.402881669505164e-05, + "loss": 1.1477, + "step": 966 + }, + { + "epoch": 0.79, + "learning_rate": 5.3631048644900345e-05, + "loss": 1.1323, + "step": 967 + }, + { + "epoch": 0.79, + "learning_rate": 5.323457422300271e-05, + "loss": 1.2251, + "step": 968 + }, + { + "epoch": 0.79, + "learning_rate": 5.283939604123042e-05, + "loss": 1.1488, + "step": 969 + }, + { + "epoch": 0.79, + "learning_rate": 5.244551670291606e-05, + "loss": 1.1611, + "step": 970 + }, + { + "epoch": 0.79, + "learning_rate": 5.2052938802835515e-05, + "loss": 1.2961, + "step": 971 + }, + { + "epoch": 0.79, + "learning_rate": 5.1661664927191235e-05, + "loss": 1.2177, + "step": 972 + }, + { + "epoch": 0.79, + "learning_rate": 5.1271697653595156e-05, + "loss": 1.2855, + "step": 973 + }, + { + "epoch": 0.8, + "learning_rate": 5.088303955105153e-05, + "loss": 1.1278, + "step": 974 + }, + { + "epoch": 0.8, + "learning_rate": 5.049569317994013e-05, + "loss": 1.1728, + "step": 975 + }, + { + "epoch": 0.8, + "learning_rate": 5.01096610919996e-05, + "loss": 1.1809, + "step": 976 + }, + { + "epoch": 0.8, + "learning_rate": 4.972494583031015e-05, + "loss": 1.166, + "step": 977 + }, + { + "epoch": 0.8, + "learning_rate": 4.9341549929277227e-05, + "loss": 1.1333, + "step": 978 + }, + { + "epoch": 0.8, + "learning_rate": 4.895947591461455e-05, + "loss": 1.0972, + "step": 979 + }, + { + "epoch": 0.8, + "learning_rate": 4.85787263033278e-05, + "loss": 1.062, + "step": 980 + }, + { + "epoch": 0.8, + "learning_rate": 4.819930360369762e-05, + "loss": 1.1008, + "step": 981 + }, + { + "epoch": 0.8, + "learning_rate": 4.78212103152634e-05, + "loss": 1.1797, + "step": 982 + }, + { + "epoch": 0.8, + "learning_rate": 4.744444892880661e-05, + "loss": 1.1212, + "step": 983 + }, + { + "epoch": 0.8, + "learning_rate": 4.7069021926334696e-05, + "loss": 1.065, + "step": 984 + }, + { + "epoch": 0.8, + "learning_rate": 4.669493178106432e-05, + "loss": 1.1494, + "step": 985 + }, + { + "epoch": 0.81, + "learning_rate": 4.6322180957405366e-05, + "loss": 1.255, + "step": 986 + }, + { + "epoch": 0.81, + "learning_rate": 4.59507719109446e-05, + "loss": 1.0787, + "step": 987 + }, + { + "epoch": 0.81, + "learning_rate": 4.558070708842948e-05, + "loss": 1.0694, + "step": 988 + }, + { + "epoch": 0.81, + "learning_rate": 4.521198892775202e-05, + "loss": 1.0938, + "step": 989 + }, + { + "epoch": 0.81, + "learning_rate": 4.484461985793298e-05, + "loss": 1.1596, + "step": 990 + }, + { + "epoch": 0.81, + "learning_rate": 4.447860229910544e-05, + "loss": 1.1586, + "step": 991 + }, + { + "epoch": 0.81, + "learning_rate": 4.4113938662499103e-05, + "loss": 1.1063, + "step": 992 + }, + { + "epoch": 0.81, + "learning_rate": 4.3750631350424455e-05, + "loss": 1.2304, + "step": 993 + }, + { + "epoch": 0.81, + "learning_rate": 4.338868275625679e-05, + "loss": 1.0505, + "step": 994 + }, + { + "epoch": 0.81, + "learning_rate": 4.3028095264420534e-05, + "loss": 1.3133, + "step": 995 + }, + { + "epoch": 0.81, + "learning_rate": 4.266887125037364e-05, + "loss": 1.1243, + "step": 996 + }, + { + "epoch": 0.81, + "learning_rate": 4.231101308059165e-05, + "loss": 1.2433, + "step": 997 + }, + { + "epoch": 0.81, + "learning_rate": 4.195452311255235e-05, + "loss": 1.0728, + "step": 998 + }, + { + "epoch": 0.82, + "learning_rate": 4.159940369472015e-05, + "loss": 0.9998, + "step": 999 + }, + { + "epoch": 0.82, + "learning_rate": 4.124565716653067e-05, + "loss": 1.0315, + "step": 1000 + }, + { + "epoch": 0.82, + "learning_rate": 4.089328585837512e-05, + "loss": 1.2038, + "step": 1001 + }, + { + "epoch": 0.82, + "learning_rate": 4.054229209158544e-05, + "loss": 1.1679, + "step": 1002 + }, + { + "epoch": 0.82, + "learning_rate": 4.019267817841835e-05, + "loss": 1.1028, + "step": 1003 + }, + { + "epoch": 0.82, + "learning_rate": 3.984444642204063e-05, + "loss": 1.1775, + "step": 1004 + }, + { + "epoch": 0.82, + "learning_rate": 3.949759911651371e-05, + "loss": 1.0999, + "step": 1005 + }, + { + "epoch": 0.82, + "learning_rate": 3.9152138546778624e-05, + "loss": 1.2327, + "step": 1006 + }, + { + "epoch": 0.82, + "learning_rate": 3.880806698864087e-05, + "loss": 1.0221, + "step": 1007 + }, + { + "epoch": 0.82, + "learning_rate": 3.846538670875569e-05, + "loss": 1.2356, + "step": 1008 + }, + { + "epoch": 0.82, + "learning_rate": 3.812409996461275e-05, + "loss": 1.0937, + "step": 1009 + }, + { + "epoch": 0.82, + "learning_rate": 3.7784209004521565e-05, + "loss": 1.2011, + "step": 1010 + }, + { + "epoch": 0.83, + "learning_rate": 3.7445716067596506e-05, + "loss": 1.172, + "step": 1011 + }, + { + "epoch": 0.83, + "learning_rate": 3.710862338374219e-05, + "loss": 1.2182, + "step": 1012 + }, + { + "epoch": 0.83, + "learning_rate": 3.677293317363864e-05, + "loss": 1.0164, + "step": 1013 + }, + { + "epoch": 0.83, + "learning_rate": 3.6438647648726924e-05, + "loss": 1.1008, + "step": 1014 + }, + { + "epoch": 0.83, + "learning_rate": 3.610576901119422e-05, + "loss": 1.1986, + "step": 1015 + }, + { + "epoch": 0.83, + "learning_rate": 3.57742994539596e-05, + "loss": 1.1129, + "step": 1016 + }, + { + "epoch": 0.83, + "learning_rate": 3.544424116065931e-05, + "loss": 1.2602, + "step": 1017 + }, + { + "epoch": 0.83, + "learning_rate": 3.5115596305632854e-05, + "loss": 1.0993, + "step": 1018 + }, + { + "epoch": 0.83, + "learning_rate": 3.478836705390809e-05, + "loss": 1.1901, + "step": 1019 + }, + { + "epoch": 0.83, + "learning_rate": 3.4462555561187355e-05, + "loss": 1.1811, + "step": 1020 + }, + { + "epoch": 0.83, + "learning_rate": 3.41381639738331e-05, + "loss": 1.1807, + "step": 1021 + }, + { + "epoch": 0.83, + "learning_rate": 3.3815194428853945e-05, + "loss": 1.1534, + "step": 1022 + }, + { + "epoch": 0.84, + "learning_rate": 3.3493649053890325e-05, + "loss": 1.2909, + "step": 1023 + }, + { + "epoch": 0.84, + "learning_rate": 3.317352996720063e-05, + "loss": 1.2967, + "step": 1024 + }, + { + "epoch": 0.84, + "learning_rate": 3.2854839277647265e-05, + "loss": 1.2431, + "step": 1025 + }, + { + "epoch": 0.84, + "learning_rate": 3.253757908468269e-05, + "loss": 1.033, + "step": 1026 + }, + { + "epoch": 0.84, + "learning_rate": 3.222175147833556e-05, + "loss": 1.2833, + "step": 1027 + }, + { + "epoch": 0.84, + "learning_rate": 3.190735853919724e-05, + "loss": 1.0004, + "step": 1028 + }, + { + "epoch": 0.84, + "learning_rate": 3.159440233840763e-05, + "loss": 1.1293, + "step": 1029 + }, + { + "epoch": 0.84, + "learning_rate": 3.1282884937641865e-05, + "loss": 1.0512, + "step": 1030 + }, + { + "epoch": 0.84, + "learning_rate": 3.097280838909664e-05, + "loss": 1.0307, + "step": 1031 + }, + { + "epoch": 0.84, + "learning_rate": 3.066417473547667e-05, + "loss": 1.0601, + "step": 1032 + }, + { + "epoch": 0.84, + "learning_rate": 3.0356986009981212e-05, + "loss": 1.1182, + "step": 1033 + }, + { + "epoch": 0.84, + "learning_rate": 3.0051244236290854e-05, + "loss": 1.1825, + "step": 1034 + }, + { + "epoch": 0.85, + "learning_rate": 2.974695142855388e-05, + "loss": 1.0933, + "step": 1035 + }, + { + "epoch": 0.85, + "learning_rate": 2.9444109591373196e-05, + "loss": 1.1215, + "step": 1036 + }, + { + "epoch": 0.85, + "learning_rate": 2.9142720719793124e-05, + "loss": 1.2765, + "step": 1037 + }, + { + "epoch": 0.85, + "learning_rate": 2.88427867992862e-05, + "loss": 1.1549, + "step": 1038 + }, + { + "epoch": 0.85, + "learning_rate": 2.854430980574002e-05, + "loss": 1.2543, + "step": 1039 + }, + { + "epoch": 0.85, + "learning_rate": 2.824729170544457e-05, + "loss": 1.1894, + "step": 1040 + }, + { + "epoch": 0.85, + "learning_rate": 2.7951734455078786e-05, + "loss": 1.1371, + "step": 1041 + }, + { + "epoch": 0.85, + "learning_rate": 2.7657640001697986e-05, + "loss": 1.2182, + "step": 1042 + }, + { + "epoch": 0.85, + "learning_rate": 2.7365010282720953e-05, + "loss": 1.2534, + "step": 1043 + }, + { + "epoch": 0.85, + "learning_rate": 2.707384722591716e-05, + "loss": 1.0925, + "step": 1044 + }, + { + "epoch": 0.85, + "learning_rate": 2.678415274939408e-05, + "loss": 1.1895, + "step": 1045 + }, + { + "epoch": 0.85, + "learning_rate": 2.649592876158463e-05, + "loss": 1.1423, + "step": 1046 + }, + { + "epoch": 0.85, + "learning_rate": 2.6209177161234443e-05, + "loss": 1.1321, + "step": 1047 + }, + { + "epoch": 0.86, + "learning_rate": 2.5923899837389465e-05, + "loss": 1.2068, + "step": 1048 + }, + { + "epoch": 0.86, + "learning_rate": 2.5640098669383494e-05, + "loss": 1.146, + "step": 1049 + }, + { + "epoch": 0.86, + "learning_rate": 2.5357775526825777e-05, + "loss": 1.0929, + "step": 1050 + }, + { + "epoch": 0.86, + "learning_rate": 2.5076932269588708e-05, + "loss": 1.1261, + "step": 1051 + }, + { + "epoch": 0.86, + "learning_rate": 2.4797570747795596e-05, + "loss": 1.2947, + "step": 1052 + }, + { + "epoch": 0.86, + "learning_rate": 2.451969280180849e-05, + "loss": 1.1558, + "step": 1053 + }, + { + "epoch": 0.86, + "learning_rate": 2.424330026221594e-05, + "loss": 1.1588, + "step": 1054 + }, + { + "epoch": 0.86, + "learning_rate": 2.3968394949821032e-05, + "loss": 1.1148, + "step": 1055 + }, + { + "epoch": 0.86, + "learning_rate": 2.3694978675629473e-05, + "loss": 1.099, + "step": 1056 + }, + { + "epoch": 0.86, + "learning_rate": 2.3423053240837516e-05, + "loss": 1.107, + "step": 1057 + }, + { + "epoch": 0.86, + "learning_rate": 2.3152620436820092e-05, + "loss": 1.039, + "step": 1058 + }, + { + "epoch": 0.86, + "learning_rate": 2.2883682045119063e-05, + "loss": 1.1043, + "step": 1059 + }, + { + "epoch": 0.87, + "learning_rate": 2.2616239837431634e-05, + "loss": 1.1579, + "step": 1060 + }, + { + "epoch": 0.87, + "learning_rate": 2.2350295575598368e-05, + "loss": 1.1572, + "step": 1061 + }, + { + "epoch": 0.87, + "learning_rate": 2.208585101159183e-05, + "loss": 1.1346, + "step": 1062 + }, + { + "epoch": 0.87, + "learning_rate": 2.1822907887504934e-05, + "loss": 1.1404, + "step": 1063 + }, + { + "epoch": 0.87, + "learning_rate": 2.156146793553948e-05, + "loss": 1.1443, + "step": 1064 + }, + { + "epoch": 0.87, + "learning_rate": 2.1301532877994746e-05, + "loss": 1.0973, + "step": 1065 + }, + { + "epoch": 0.87, + "learning_rate": 2.1043104427256266e-05, + "loss": 1.0978, + "step": 1066 + }, + { + "epoch": 0.87, + "learning_rate": 2.07861842857843e-05, + "loss": 1.0767, + "step": 1067 + }, + { + "epoch": 0.87, + "learning_rate": 2.0530774146102798e-05, + "loss": 1.0728, + "step": 1068 + }, + { + "epoch": 0.87, + "learning_rate": 2.0276875690788203e-05, + "loss": 1.0472, + "step": 1069 + }, + { + "epoch": 0.87, + "learning_rate": 2.0024490592458395e-05, + "loss": 1.1073, + "step": 1070 + }, + { + "epoch": 0.87, + "learning_rate": 1.977362051376158e-05, + "loss": 1.1251, + "step": 1071 + }, + { + "epoch": 0.88, + "learning_rate": 1.9524267107365544e-05, + "loss": 1.1315, + "step": 1072 + }, + { + "epoch": 0.88, + "learning_rate": 1.927643201594645e-05, + "loss": 1.1705, + "step": 1073 + }, + { + "epoch": 0.88, + "learning_rate": 1.9030116872178316e-05, + "loss": 1.1808, + "step": 1074 + }, + { + "epoch": 0.88, + "learning_rate": 1.8785323298722097e-05, + "loss": 1.1152, + "step": 1075 + }, + { + "epoch": 0.88, + "learning_rate": 1.854205290821498e-05, + "loss": 1.1495, + "step": 1076 + }, + { + "epoch": 0.88, + "learning_rate": 1.8300307303259907e-05, + "loss": 1.1684, + "step": 1077 + }, + { + "epoch": 0.88, + "learning_rate": 1.806008807641482e-05, + "loss": 1.2384, + "step": 1078 + }, + { + "epoch": 0.88, + "learning_rate": 1.7821396810182438e-05, + "loss": 1.178, + "step": 1079 + }, + { + "epoch": 0.88, + "learning_rate": 1.7584235076999465e-05, + "loss": 1.2636, + "step": 1080 + }, + { + "epoch": 0.88, + "learning_rate": 1.7348604439226617e-05, + "loss": 1.3461, + "step": 1081 + }, + { + "epoch": 0.88, + "learning_rate": 1.711450644913798e-05, + "loss": 1.0695, + "step": 1082 + }, + { + "epoch": 0.88, + "learning_rate": 1.6881942648911074e-05, + "loss": 1.1345, + "step": 1083 + }, + { + "epoch": 0.89, + "learning_rate": 1.6650914570616486e-05, + "loss": 1.0923, + "step": 1084 + }, + { + "epoch": 0.89, + "learning_rate": 1.6421423736208e-05, + "loss": 1.0734, + "step": 1085 + }, + { + "epoch": 0.89, + "learning_rate": 1.6193471657512253e-05, + "loss": 1.009, + "step": 1086 + }, + { + "epoch": 0.89, + "learning_rate": 1.5967059836219043e-05, + "loss": 1.0944, + "step": 1087 + }, + { + "epoch": 0.89, + "learning_rate": 1.5742189763871374e-05, + "loss": 1.2493, + "step": 1088 + }, + { + "epoch": 0.89, + "learning_rate": 1.5518862921855532e-05, + "loss": 1.2198, + "step": 1089 + }, + { + "epoch": 0.89, + "learning_rate": 1.5297080781391416e-05, + "loss": 1.1519, + "step": 1090 + }, + { + "epoch": 0.89, + "learning_rate": 1.5076844803522921e-05, + "loss": 1.0305, + "step": 1091 + }, + { + "epoch": 0.89, + "learning_rate": 1.4858156439108094e-05, + "loss": 1.1734, + "step": 1092 + }, + { + "epoch": 0.89, + "learning_rate": 1.4641017128809802e-05, + "loss": 1.0773, + "step": 1093 + }, + { + "epoch": 0.89, + "learning_rate": 1.442542830308599e-05, + "loss": 1.1516, + "step": 1094 + }, + { + "epoch": 0.89, + "learning_rate": 1.4211391382180639e-05, + "loss": 1.1512, + "step": 1095 + }, + { + "epoch": 0.89, + "learning_rate": 1.3998907776113984e-05, + "loss": 1.1634, + "step": 1096 + }, + { + "epoch": 0.9, + "learning_rate": 1.3787978884673452e-05, + "loss": 1.1719, + "step": 1097 + }, + { + "epoch": 0.9, + "learning_rate": 1.357860609740455e-05, + "loss": 1.0838, + "step": 1098 + }, + { + "epoch": 0.9, + "learning_rate": 1.3370790793601372e-05, + "loss": 1.058, + "step": 1099 + }, + { + "epoch": 0.9, + "learning_rate": 1.3164534342297863e-05, + "loss": 1.1387, + "step": 1100 + }, + { + "epoch": 0.9, + "learning_rate": 1.2959838102258537e-05, + "loss": 1.1291, + "step": 1101 + }, + { + "epoch": 0.9, + "learning_rate": 1.2756703421969745e-05, + "loss": 1.1521, + "step": 1102 + }, + { + "epoch": 0.9, + "learning_rate": 1.2555131639630568e-05, + "loss": 1.162, + "step": 1103 + }, + { + "epoch": 0.9, + "learning_rate": 1.2355124083144182e-05, + "loss": 1.2249, + "step": 1104 + }, + { + "epoch": 0.9, + "learning_rate": 1.2156682070109087e-05, + "loss": 1.1332, + "step": 1105 + }, + { + "epoch": 0.9, + "learning_rate": 1.1959806907810289e-05, + "loss": 1.2263, + "step": 1106 + }, + { + "epoch": 0.9, + "learning_rate": 1.1764499893210877e-05, + "loss": 1.1723, + "step": 1107 + }, + { + "epoch": 0.9, + "learning_rate": 1.1570762312943296e-05, + "loss": 1.1984, + "step": 1108 + }, + { + "epoch": 0.91, + "learning_rate": 1.1378595443301e-05, + "loss": 1.1274, + "step": 1109 + }, + { + "epoch": 0.91, + "learning_rate": 1.1188000550230004e-05, + "loss": 1.0725, + "step": 1110 + }, + { + "epoch": 0.91, + "learning_rate": 1.0998978889320582e-05, + "loss": 1.2412, + "step": 1111 + }, + { + "epoch": 0.91, + "learning_rate": 1.0811531705798905e-05, + "loss": 1.2438, + "step": 1112 + }, + { + "epoch": 0.91, + "learning_rate": 1.0625660234518913e-05, + "loss": 1.156, + "step": 1113 + }, + { + "epoch": 0.91, + "learning_rate": 1.0441365699954159e-05, + "loss": 1.0937, + "step": 1114 + }, + { + "epoch": 0.91, + "learning_rate": 1.0258649316189723e-05, + "loss": 1.1832, + "step": 1115 + }, + { + "epoch": 0.91, + "learning_rate": 1.0077512286914226e-05, + "loss": 1.1857, + "step": 1116 + }, + { + "epoch": 0.91, + "learning_rate": 9.897955805412e-06, + "loss": 1.0269, + "step": 1117 + }, + { + "epoch": 0.91, + "learning_rate": 9.719981054555017e-06, + "loss": 1.1238, + "step": 1118 + }, + { + "epoch": 0.91, + "learning_rate": 9.54358920679524e-06, + "loss": 1.0953, + "step": 1119 + }, + { + "epoch": 0.91, + "learning_rate": 9.368781424156924e-06, + "loss": 1.0879, + "step": 1120 + }, + { + "epoch": 0.92, + "learning_rate": 9.19555885822887e-06, + "loss": 1.1497, + "step": 1121 + }, + { + "epoch": 0.92, + "learning_rate": 9.023922650156863e-06, + "loss": 1.074, + "step": 1122 + }, + { + "epoch": 0.92, + "learning_rate": 8.853873930636218e-06, + "loss": 1.1549, + "step": 1123 + }, + { + "epoch": 0.92, + "learning_rate": 8.68541381990426e-06, + "loss": 1.0667, + "step": 1124 + }, + { + "epoch": 0.92, + "learning_rate": 8.51854342773295e-06, + "loss": 1.0716, + "step": 1125 + }, + { + "epoch": 0.92, + "learning_rate": 8.353263853421584e-06, + "loss": 1.1075, + "step": 1126 + }, + { + "epoch": 0.92, + "learning_rate": 8.189576185789638e-06, + "loss": 0.9856, + "step": 1127 + }, + { + "epoch": 0.92, + "learning_rate": 8.02748150316937e-06, + "loss": 1.0225, + "step": 1128 + }, + { + "epoch": 0.92, + "learning_rate": 7.866980873399016e-06, + "loss": 1.2488, + "step": 1129 + }, + { + "epoch": 0.92, + "learning_rate": 7.708075353815513e-06, + "loss": 1.1536, + "step": 1130 + }, + { + "epoch": 0.92, + "learning_rate": 7.550765991247654e-06, + "loss": 1.2557, + "step": 1131 + }, + { + "epoch": 0.92, + "learning_rate": 7.3950538220090846e-06, + "loss": 1.2443, + "step": 1132 + }, + { + "epoch": 0.93, + "learning_rate": 7.2409398718916994e-06, + "loss": 1.1899, + "step": 1133 + }, + { + "epoch": 0.93, + "learning_rate": 7.088425156158623e-06, + "loss": 1.1983, + "step": 1134 + }, + { + "epoch": 0.93, + "learning_rate": 6.937510679537628e-06, + "loss": 1.0985, + "step": 1135 + }, + { + "epoch": 0.93, + "learning_rate": 6.78819743621456e-06, + "loss": 1.094, + "step": 1136 + }, + { + "epoch": 0.93, + "learning_rate": 6.640486409826785e-06, + "loss": 1.138, + "step": 1137 + }, + { + "epoch": 0.93, + "learning_rate": 6.494378573456611e-06, + "loss": 1.1446, + "step": 1138 + }, + { + "epoch": 0.93, + "learning_rate": 6.349874889624963e-06, + "loss": 1.1106, + "step": 1139 + }, + { + "epoch": 0.93, + "learning_rate": 6.206976310284995e-06, + "loss": 1.0965, + "step": 1140 + }, + { + "epoch": 0.93, + "learning_rate": 6.065683776815933e-06, + "loss": 1.1614, + "step": 1141 + }, + { + "epoch": 0.93, + "learning_rate": 5.92599822001666e-06, + "loss": 1.0881, + "step": 1142 + }, + { + "epoch": 0.93, + "learning_rate": 5.78792056009983e-06, + "loss": 1.0994, + "step": 1143 + }, + { + "epoch": 0.93, + "learning_rate": 5.651451706685601e-06, + "loss": 1.2027, + "step": 1144 + }, + { + "epoch": 0.93, + "learning_rate": 5.516592558795747e-06, + "loss": 1.034, + "step": 1145 + }, + { + "epoch": 0.94, + "learning_rate": 5.383344004847773e-06, + "loss": 1.0785, + "step": 1146 + }, + { + "epoch": 0.94, + "learning_rate": 5.251706922648869e-06, + "loss": 1.15, + "step": 1147 + }, + { + "epoch": 0.94, + "learning_rate": 5.121682179390408e-06, + "loss": 1.1274, + "step": 1148 + }, + { + "epoch": 0.94, + "learning_rate": 4.993270631642038e-06, + "loss": 1.1822, + "step": 1149 + }, + { + "epoch": 0.94, + "learning_rate": 4.866473125346049e-06, + "loss": 1.1644, + "step": 1150 + }, + { + "epoch": 0.94, + "learning_rate": 4.741290495811873e-06, + "loss": 1.2427, + "step": 1151 + }, + { + "epoch": 0.94, + "learning_rate": 4.6177235677105636e-06, + "loss": 1.0606, + "step": 1152 + }, + { + "epoch": 0.94, + "learning_rate": 4.4957731550692995e-06, + "loss": 1.0533, + "step": 1153 + }, + { + "epoch": 0.94, + "learning_rate": 4.3754400612661115e-06, + "loss": 1.0582, + "step": 1154 + }, + { + "epoch": 0.94, + "learning_rate": 4.256725079024554e-06, + "loss": 0.9833, + "step": 1155 + }, + { + "epoch": 0.94, + "learning_rate": 4.139628990408456e-06, + "loss": 1.2803, + "step": 1156 + }, + { + "epoch": 0.94, + "learning_rate": 4.024152566816791e-06, + "loss": 1.0903, + "step": 1157 + }, + { + "epoch": 0.95, + "learning_rate": 3.910296568978622e-06, + "loss": 1.1445, + "step": 1158 + }, + { + "epoch": 0.95, + "learning_rate": 3.798061746947995e-06, + "loss": 1.1353, + "step": 1159 + }, + { + "epoch": 0.95, + "learning_rate": 3.687448840099139e-06, + "loss": 1.198, + "step": 1160 + }, + { + "epoch": 0.95, + "learning_rate": 3.578458577121524e-06, + "loss": 1.1087, + "step": 1161 + }, + { + "epoch": 0.95, + "learning_rate": 3.471091676014976e-06, + "loss": 1.2356, + "step": 1162 + }, + { + "epoch": 0.95, + "learning_rate": 3.3653488440851255e-06, + "loss": 1.2982, + "step": 1163 + }, + { + "epoch": 0.95, + "learning_rate": 3.2612307779386065e-06, + "loss": 1.0539, + "step": 1164 + }, + { + "epoch": 0.95, + "learning_rate": 3.158738163478475e-06, + "loss": 1.1077, + "step": 1165 + }, + { + "epoch": 0.95, + "learning_rate": 3.057871675899826e-06, + "loss": 1.1119, + "step": 1166 + }, + { + "epoch": 0.95, + "learning_rate": 2.9586319796851556e-06, + "loss": 1.1195, + "step": 1167 + }, + { + "epoch": 0.95, + "learning_rate": 2.861019728600117e-06, + "loss": 1.0234, + "step": 1168 + }, + { + "epoch": 0.95, + "learning_rate": 2.7650355656892167e-06, + "loss": 1.2179, + "step": 1169 + }, + { + "epoch": 0.96, + "learning_rate": 2.6706801232714018e-06, + "loss": 1.0648, + "step": 1170 + }, + { + "epoch": 0.96, + "learning_rate": 2.5779540229361743e-06, + "loss": 1.1121, + "step": 1171 + }, + { + "epoch": 0.96, + "learning_rate": 2.486857875539261e-06, + "loss": 1.0745, + "step": 1172 + }, + { + "epoch": 0.96, + "learning_rate": 2.397392281198729e-06, + "loss": 1.0614, + "step": 1173 + }, + { + "epoch": 0.96, + "learning_rate": 2.309557829290876e-06, + "loss": 1.1138, + "step": 1174 + }, + { + "epoch": 0.96, + "learning_rate": 2.2233550984466224e-06, + "loss": 1.0559, + "step": 1175 + }, + { + "epoch": 0.96, + "learning_rate": 2.1387846565474044e-06, + "loss": 0.9898, + "step": 1176 + }, + { + "epoch": 0.96, + "learning_rate": 2.055847060721566e-06, + "loss": 1.1394, + "step": 1177 + }, + { + "epoch": 0.96, + "learning_rate": 1.9745428573406943e-06, + "loss": 1.2373, + "step": 1178 + }, + { + "epoch": 0.96, + "learning_rate": 1.8948725820160662e-06, + "loss": 1.1048, + "step": 1179 + }, + { + "epoch": 0.96, + "learning_rate": 1.8168367595949586e-06, + "loss": 1.0287, + "step": 1180 + }, + { + "epoch": 0.96, + "learning_rate": 1.7404359041573725e-06, + "loss": 1.161, + "step": 1181 + }, + { + "epoch": 0.97, + "learning_rate": 1.6656705190125076e-06, + "loss": 1.0959, + "step": 1182 + }, + { + "epoch": 0.97, + "learning_rate": 1.5925410966955712e-06, + "loss": 1.2044, + "step": 1183 + }, + { + "epoch": 0.97, + "learning_rate": 1.521048118964391e-06, + "loss": 1.2111, + "step": 1184 + }, + { + "epoch": 0.97, + "learning_rate": 1.451192056796391e-06, + "loss": 1.365, + "step": 1185 + }, + { + "epoch": 0.97, + "learning_rate": 1.3829733703853154e-06, + "loss": 1.1618, + "step": 1186 + }, + { + "epoch": 0.97, + "learning_rate": 1.3163925091384533e-06, + "loss": 1.2241, + "step": 1187 + }, + { + "epoch": 0.97, + "learning_rate": 1.2514499116733636e-06, + "loss": 1.0862, + "step": 1188 + }, + { + "epoch": 0.97, + "learning_rate": 1.1881460058152383e-06, + "loss": 1.2027, + "step": 1189 + }, + { + "epoch": 0.97, + "learning_rate": 1.1264812085939602e-06, + "loss": 1.1396, + "step": 1190 + }, + { + "epoch": 0.97, + "learning_rate": 1.066455926241383e-06, + "loss": 1.3238, + "step": 1191 + }, + { + "epoch": 0.97, + "learning_rate": 1.0080705541886392e-06, + "loss": 1.1636, + "step": 1192 + }, + { + "epoch": 0.97, + "learning_rate": 9.513254770636137e-07, + "loss": 1.2061, + "step": 1193 + }, + { + "epoch": 0.97, + "learning_rate": 8.962210686883076e-07, + "loss": 1.138, + "step": 1194 + }, + { + "epoch": 0.98, + "learning_rate": 8.427576920763958e-07, + "loss": 1.18, + "step": 1195 + }, + { + "epoch": 0.98, + "learning_rate": 7.909356994308947e-07, + "loss": 1.0543, + "step": 1196 + }, + { + "epoch": 0.98, + "learning_rate": 7.407554321417765e-07, + "loss": 1.3065, + "step": 1197 + }, + { + "epoch": 0.98, + "learning_rate": 6.922172207838029e-07, + "loss": 1.3796, + "step": 1198 + }, + { + "epoch": 0.98, + "learning_rate": 6.453213851142226e-07, + "loss": 1.2853, + "step": 1199 + }, + { + "epoch": 0.98, + "learning_rate": 6.00068234070772e-07, + "loss": 1.0838, + "step": 1200 + }, + { + "epoch": 0.98, + "learning_rate": 5.564580657695939e-07, + "loss": 0.9754, + "step": 1201 + }, + { + "epoch": 0.98, + "learning_rate": 5.144911675032671e-07, + "loss": 1.2056, + "step": 1202 + }, + { + "epoch": 0.98, + "learning_rate": 4.741678157389739e-07, + "loss": 1.1819, + "step": 1203 + }, + { + "epoch": 0.98, + "learning_rate": 4.3548827611661344e-07, + "loss": 1.1037, + "step": 1204 + }, + { + "epoch": 0.98, + "learning_rate": 3.984528034470525e-07, + "loss": 1.0535, + "step": 1205 + }, + { + "epoch": 0.98, + "learning_rate": 3.630616417104604e-07, + "loss": 1.1327, + "step": 1206 + }, + { + "epoch": 0.99, + "learning_rate": 3.293150240547549e-07, + "loss": 1.0755, + "step": 1207 + }, + { + "epoch": 0.99, + "learning_rate": 2.972131727939642e-07, + "loss": 1.0664, + "step": 1208 + }, + { + "epoch": 0.99, + "learning_rate": 2.6675629940689505e-07, + "loss": 1.2237, + "step": 1209 + }, + { + "epoch": 0.99, + "learning_rate": 2.3794460453555045e-07, + "loss": 1.2799, + "step": 1210 + }, + { + "epoch": 0.99, + "learning_rate": 2.1077827798404726e-07, + "loss": 1.0723, + "step": 1211 + }, + { + "epoch": 0.99, + "learning_rate": 1.852574987171174e-07, + "loss": 1.3057, + "step": 1212 + }, + { + "epoch": 0.99, + "learning_rate": 1.6138243485910864e-07, + "loss": 1.1932, + "step": 1213 + }, + { + "epoch": 0.99, + "learning_rate": 1.3915324369284665e-07, + "loss": 1.245, + "step": 1214 + }, + { + "epoch": 0.99, + "learning_rate": 1.1857007165852473e-07, + "loss": 1.0783, + "step": 1215 + }, + { + "epoch": 0.99, + "learning_rate": 9.963305435278791e-08, + "loss": 1.0482, + "step": 1216 + }, + { + "epoch": 0.99, + "learning_rate": 8.23423165278725e-08, + "loss": 1.1656, + "step": 1217 + }, + { + "epoch": 0.99, + "learning_rate": 6.669797209069017e-08, + "loss": 1.0242, + "step": 1218 + }, + { + "epoch": 1.0, + "learning_rate": 5.270012410216185e-08, + "loss": 1.2263, + "step": 1219 + }, + { + "epoch": 1.0, + "learning_rate": 4.034886477655153e-08, + "loss": 1.1416, + "step": 1220 + }, + { + "epoch": 1.0, + "learning_rate": 2.964427548077242e-08, + "loss": 1.1767, + "step": 1221 + }, + { + "epoch": 1.0, + "learning_rate": 2.0586426733887332e-08, + "loss": 1.0344, + "step": 1222 + }, + { + "epoch": 1.0, + "learning_rate": 1.3175378206720102e-08, + "loss": 1.1361, + "step": 1223 + }, + { + "epoch": 1.0, + "learning_rate": 7.4111787213282375e-09, + "loss": 1.0677, + "step": 1224 + }, + { + "epoch": 1.0, + "step": 1224, + "total_flos": 1.804319615782748e+17, + "train_loss": 1.241359075640931, + "train_runtime": 34128.9816, + "train_samples_per_second": 0.287, + "train_steps_per_second": 0.036 + } + ], + "logging_steps": 1, + "max_steps": 1224, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "total_flos": 1.804319615782748e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}