{ "best_metric": 10.490081787109375, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.176056338028169, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008802816901408451, "grad_norm": 1.0416910648345947, "learning_rate": 1.1000000000000001e-05, "loss": 10.8354, "step": 1 }, { "epoch": 0.0008802816901408451, "eval_loss": 10.828083038330078, "eval_runtime": 40.666, "eval_samples_per_second": 47.042, "eval_steps_per_second": 11.779, "step": 1 }, { "epoch": 0.0017605633802816902, "grad_norm": 1.0834181308746338, "learning_rate": 2.2000000000000003e-05, "loss": 10.8404, "step": 2 }, { "epoch": 0.002640845070422535, "grad_norm": 0.968176007270813, "learning_rate": 3.3e-05, "loss": 10.8331, "step": 3 }, { "epoch": 0.0035211267605633804, "grad_norm": 0.827340304851532, "learning_rate": 4.4000000000000006e-05, "loss": 10.8353, "step": 4 }, { "epoch": 0.0044014084507042256, "grad_norm": 0.8887184858322144, "learning_rate": 5.5e-05, "loss": 10.8322, "step": 5 }, { "epoch": 0.00528169014084507, "grad_norm": 0.8796689510345459, "learning_rate": 6.6e-05, "loss": 10.8238, "step": 6 }, { "epoch": 0.006161971830985915, "grad_norm": 0.8759607672691345, "learning_rate": 7.7e-05, "loss": 10.8217, "step": 7 }, { "epoch": 0.007042253521126761, "grad_norm": 0.9419655203819275, "learning_rate": 8.800000000000001e-05, "loss": 10.8036, "step": 8 }, { "epoch": 0.007922535211267605, "grad_norm": 0.8953964710235596, "learning_rate": 9.900000000000001e-05, "loss": 10.8025, "step": 9 }, { "epoch": 0.008802816901408451, "grad_norm": 0.8648999333381653, "learning_rate": 0.00011, "loss": 10.7915, "step": 10 }, { "epoch": 0.009683098591549295, "grad_norm": 0.9457488059997559, "learning_rate": 0.0001099924817745858, "loss": 10.7801, "step": 11 }, { "epoch": 0.01056338028169014, "grad_norm": 0.7545454502105713, "learning_rate": 0.00010996992915375093, "loss": 10.7735, "step": 12 }, { "epoch": 0.011443661971830986, "grad_norm": 0.7653600573539734, "learning_rate": 0.00010993234830315676, "loss": 10.7706, "step": 13 }, { "epoch": 0.01232394366197183, "grad_norm": 0.8165815472602844, "learning_rate": 0.0001098797494970326, "loss": 10.7605, "step": 14 }, { "epoch": 0.013204225352112676, "grad_norm": 0.7317748069763184, "learning_rate": 0.00010981214711536684, "loss": 10.7501, "step": 15 }, { "epoch": 0.014084507042253521, "grad_norm": 0.8150272369384766, "learning_rate": 0.00010972955963997563, "loss": 10.7434, "step": 16 }, { "epoch": 0.014964788732394365, "grad_norm": 0.7701242566108704, "learning_rate": 0.00010963200964945011, "loss": 10.7278, "step": 17 }, { "epoch": 0.01584507042253521, "grad_norm": 0.7349888682365417, "learning_rate": 0.00010951952381298364, "loss": 10.7288, "step": 18 }, { "epoch": 0.016725352112676055, "grad_norm": 0.6739762425422668, "learning_rate": 0.00010939213288308077, "loss": 10.7154, "step": 19 }, { "epoch": 0.017605633802816902, "grad_norm": 0.6542978286743164, "learning_rate": 0.00010924987168714973, "loss": 10.7205, "step": 20 }, { "epoch": 0.018485915492957746, "grad_norm": 0.6472326517105103, "learning_rate": 0.00010909277911798103, "loss": 10.7262, "step": 21 }, { "epoch": 0.01936619718309859, "grad_norm": 0.6118732690811157, "learning_rate": 0.00010892089812311451, "loss": 10.6898, "step": 22 }, { "epoch": 0.020246478873239437, "grad_norm": 0.6045047640800476, "learning_rate": 0.00010873427569309797, "loss": 10.7162, "step": 23 }, { "epoch": 0.02112676056338028, "grad_norm": 0.7520612478256226, "learning_rate": 0.00010853296284864032, "loss": 10.6945, "step": 24 }, { "epoch": 0.022007042253521125, "grad_norm": 0.6803677678108215, "learning_rate": 0.00010831701462666318, "loss": 10.7144, "step": 25 }, { "epoch": 0.022887323943661973, "grad_norm": 0.6361639499664307, "learning_rate": 0.00010808649006525419, "loss": 10.702, "step": 26 }, { "epoch": 0.023767605633802816, "grad_norm": 0.5413468480110168, "learning_rate": 0.00010784145218752665, "loss": 10.7203, "step": 27 }, { "epoch": 0.02464788732394366, "grad_norm": 0.5481899976730347, "learning_rate": 0.00010758196798438968, "loss": 10.6863, "step": 28 }, { "epoch": 0.025528169014084508, "grad_norm": 0.5831198692321777, "learning_rate": 0.00010730810839623346, "loss": 10.6874, "step": 29 }, { "epoch": 0.02640845070422535, "grad_norm": 0.5215359926223755, "learning_rate": 0.0001070199482935349, "loss": 10.6734, "step": 30 }, { "epoch": 0.027288732394366196, "grad_norm": 0.48211073875427246, "learning_rate": 0.00010671756645638888, "loss": 10.6803, "step": 31 }, { "epoch": 0.028169014084507043, "grad_norm": 0.5410356521606445, "learning_rate": 0.00010640104555297034, "loss": 10.6592, "step": 32 }, { "epoch": 0.029049295774647887, "grad_norm": 0.4434191882610321, "learning_rate": 0.00010607047211693389, "loss": 10.6771, "step": 33 }, { "epoch": 0.02992957746478873, "grad_norm": 0.5003845691680908, "learning_rate": 0.00010572593652375616, "loss": 10.6756, "step": 34 }, { "epoch": 0.030809859154929578, "grad_norm": 0.44623205065727234, "learning_rate": 0.00010536753296602816, "loss": 10.6654, "step": 35 }, { "epoch": 0.03169014084507042, "grad_norm": 0.48966875672340393, "learning_rate": 0.00010499535942770394, "loss": 10.6103, "step": 36 }, { "epoch": 0.032570422535211266, "grad_norm": 0.42709842324256897, "learning_rate": 0.00010460951765731275, "loss": 10.6529, "step": 37 }, { "epoch": 0.03345070422535211, "grad_norm": 0.3827378451824188, "learning_rate": 0.000104210113140142, "loss": 10.6431, "step": 38 }, { "epoch": 0.03433098591549296, "grad_norm": 0.39901039004325867, "learning_rate": 0.00010379725506939865, "loss": 10.622, "step": 39 }, { "epoch": 0.035211267605633804, "grad_norm": 0.43945929408073425, "learning_rate": 0.0001033710563163569, "loss": 10.6221, "step": 40 }, { "epoch": 0.03609154929577465, "grad_norm": 0.3692222237586975, "learning_rate": 0.00010293163339950024, "loss": 10.6182, "step": 41 }, { "epoch": 0.03697183098591549, "grad_norm": 0.37782034277915955, "learning_rate": 0.00010247910645266658, "loss": 10.6313, "step": 42 }, { "epoch": 0.037852112676056336, "grad_norm": 0.4363635182380676, "learning_rate": 0.00010201359919220464, "loss": 10.606, "step": 43 }, { "epoch": 0.03873239436619718, "grad_norm": 0.3427577614784241, "learning_rate": 0.00010153523888315144, "loss": 10.6184, "step": 44 }, { "epoch": 0.03961267605633803, "grad_norm": 0.4304777681827545, "learning_rate": 0.00010104415630443907, "loss": 10.6178, "step": 45 }, { "epoch": 0.040492957746478875, "grad_norm": 0.41288602352142334, "learning_rate": 0.0001005404857131411, "loss": 10.6375, "step": 46 }, { "epoch": 0.04137323943661972, "grad_norm": 0.4251928925514221, "learning_rate": 0.00010002436480776809, "loss": 10.6102, "step": 47 }, { "epoch": 0.04225352112676056, "grad_norm": 0.4017108976840973, "learning_rate": 9.949593469062211e-05, "loss": 10.6077, "step": 48 }, { "epoch": 0.043133802816901406, "grad_norm": 0.4274183511734009, "learning_rate": 9.895533982922087e-05, "loss": 10.6156, "step": 49 }, { "epoch": 0.04401408450704225, "grad_norm": 0.5586544871330261, "learning_rate": 9.840272801680165e-05, "loss": 10.5981, "step": 50 }, { "epoch": 0.04401408450704225, "eval_loss": 10.611796379089355, "eval_runtime": 33.253, "eval_samples_per_second": 57.529, "eval_steps_per_second": 14.405, "step": 50 }, { "epoch": 0.0448943661971831, "grad_norm": 0.8303582668304443, "learning_rate": 9.783825033191619e-05, "loss": 10.6168, "step": 51 }, { "epoch": 0.045774647887323945, "grad_norm": 0.7027772665023804, "learning_rate": 9.726206109712725e-05, "loss": 10.623, "step": 52 }, { "epoch": 0.04665492957746479, "grad_norm": 0.7661027908325195, "learning_rate": 9.667431783681842e-05, "loss": 10.627, "step": 53 }, { "epoch": 0.04753521126760563, "grad_norm": 0.7784258723258972, "learning_rate": 9.607518123412847e-05, "loss": 10.6067, "step": 54 }, { "epoch": 0.04841549295774648, "grad_norm": 0.70957350730896, "learning_rate": 9.546481508702224e-05, "loss": 10.6065, "step": 55 }, { "epoch": 0.04929577464788732, "grad_norm": 0.7046729326248169, "learning_rate": 9.48433862635099e-05, "loss": 10.5984, "step": 56 }, { "epoch": 0.05017605633802817, "grad_norm": 0.6503332853317261, "learning_rate": 9.421106465602684e-05, "loss": 10.5944, "step": 57 }, { "epoch": 0.051056338028169015, "grad_norm": 0.6239966154098511, "learning_rate": 9.356802313498687e-05, "loss": 10.6094, "step": 58 }, { "epoch": 0.05193661971830986, "grad_norm": 0.6146323680877686, "learning_rate": 9.291443750152112e-05, "loss": 10.579, "step": 59 }, { "epoch": 0.0528169014084507, "grad_norm": 0.5953949689865112, "learning_rate": 9.225048643941577e-05, "loss": 10.5865, "step": 60 }, { "epoch": 0.05369718309859155, "grad_norm": 0.5488158464431763, "learning_rate": 9.157635146626164e-05, "loss": 10.5657, "step": 61 }, { "epoch": 0.05457746478873239, "grad_norm": 0.468292236328125, "learning_rate": 9.089221688382928e-05, "loss": 10.5856, "step": 62 }, { "epoch": 0.05545774647887324, "grad_norm": 0.484653115272522, "learning_rate": 9.019826972768242e-05, "loss": 10.5658, "step": 63 }, { "epoch": 0.056338028169014086, "grad_norm": 0.3967801332473755, "learning_rate": 8.949469971604454e-05, "loss": 10.5907, "step": 64 }, { "epoch": 0.05721830985915493, "grad_norm": 0.41012856364250183, "learning_rate": 8.878169919793173e-05, "loss": 10.5643, "step": 65 }, { "epoch": 0.058098591549295774, "grad_norm": 0.3663182556629181, "learning_rate": 8.805946310056638e-05, "loss": 10.5643, "step": 66 }, { "epoch": 0.05897887323943662, "grad_norm": 0.40321776270866394, "learning_rate": 8.732818887608602e-05, "loss": 10.5693, "step": 67 }, { "epoch": 0.05985915492957746, "grad_norm": 0.31598755717277527, "learning_rate": 8.65880764475619e-05, "loss": 10.5841, "step": 68 }, { "epoch": 0.06073943661971831, "grad_norm": 0.2976154386997223, "learning_rate": 8.583932815434201e-05, "loss": 10.5584, "step": 69 }, { "epoch": 0.061619718309859156, "grad_norm": 0.2813892662525177, "learning_rate": 8.50821486967335e-05, "loss": 10.558, "step": 70 }, { "epoch": 0.0625, "grad_norm": 0.2954387366771698, "learning_rate": 8.431674508003966e-05, "loss": 10.5793, "step": 71 }, { "epoch": 0.06338028169014084, "grad_norm": 0.32197305560112, "learning_rate": 8.354332655796683e-05, "loss": 10.5817, "step": 72 }, { "epoch": 0.06426056338028169, "grad_norm": 0.43186065554618835, "learning_rate": 8.276210457541642e-05, "loss": 10.5966, "step": 73 }, { "epoch": 0.06514084507042253, "grad_norm": 0.34619054198265076, "learning_rate": 8.197329271067796e-05, "loss": 10.5901, "step": 74 }, { "epoch": 0.06602112676056338, "grad_norm": 0.3864317238330841, "learning_rate": 8.117710661703905e-05, "loss": 10.6212, "step": 75 }, { "epoch": 0.06690140845070422, "grad_norm": 0.4301004111766815, "learning_rate": 8.037376396382784e-05, "loss": 10.606, "step": 76 }, { "epoch": 0.06778169014084508, "grad_norm": 0.3685256838798523, "learning_rate": 7.956348437690437e-05, "loss": 10.58, "step": 77 }, { "epoch": 0.06866197183098592, "grad_norm": 0.4085758328437805, "learning_rate": 7.87464893786171e-05, "loss": 10.5759, "step": 78 }, { "epoch": 0.06954225352112677, "grad_norm": 0.38861989974975586, "learning_rate": 7.792300232724097e-05, "loss": 10.5768, "step": 79 }, { "epoch": 0.07042253521126761, "grad_norm": 0.4298862814903259, "learning_rate": 7.709324835591332e-05, "loss": 10.5884, "step": 80 }, { "epoch": 0.07130281690140845, "grad_norm": 0.3438059091567993, "learning_rate": 7.625745431108487e-05, "loss": 10.5703, "step": 81 }, { "epoch": 0.0721830985915493, "grad_norm": 0.5023322105407715, "learning_rate": 7.541584869050213e-05, "loss": 10.5616, "step": 82 }, { "epoch": 0.07306338028169014, "grad_norm": 0.34021085500717163, "learning_rate": 7.456866158073842e-05, "loss": 10.5553, "step": 83 }, { "epoch": 0.07394366197183098, "grad_norm": 0.4679986536502838, "learning_rate": 7.371612459429037e-05, "loss": 10.5767, "step": 84 }, { "epoch": 0.07482394366197183, "grad_norm": 0.4663142263889313, "learning_rate": 7.28584708062576e-05, "loss": 10.5478, "step": 85 }, { "epoch": 0.07570422535211267, "grad_norm": 0.3878236413002014, "learning_rate": 7.19959346906221e-05, "loss": 10.5509, "step": 86 }, { "epoch": 0.07658450704225352, "grad_norm": 0.3434685170650482, "learning_rate": 7.112875205614558e-05, "loss": 10.5629, "step": 87 }, { "epoch": 0.07746478873239436, "grad_norm": 0.40229496359825134, "learning_rate": 7.025715998190145e-05, "loss": 10.5896, "step": 88 }, { "epoch": 0.07834507042253522, "grad_norm": 0.4033350944519043, "learning_rate": 6.938139675246009e-05, "loss": 10.5509, "step": 89 }, { "epoch": 0.07922535211267606, "grad_norm": 0.36661842465400696, "learning_rate": 6.850170179274395e-05, "loss": 10.558, "step": 90 }, { "epoch": 0.0801056338028169, "grad_norm": 0.3692269027233124, "learning_rate": 6.761831560257134e-05, "loss": 10.5177, "step": 91 }, { "epoch": 0.08098591549295775, "grad_norm": 0.37002119421958923, "learning_rate": 6.673147969090608e-05, "loss": 10.539, "step": 92 }, { "epoch": 0.0818661971830986, "grad_norm": 0.3126441240310669, "learning_rate": 6.584143650983141e-05, "loss": 10.5218, "step": 93 }, { "epoch": 0.08274647887323944, "grad_norm": 0.3806772828102112, "learning_rate": 6.494842938826605e-05, "loss": 10.5072, "step": 94 }, { "epoch": 0.08362676056338028, "grad_norm": 0.34609004855155945, "learning_rate": 6.405270246544037e-05, "loss": 10.5073, "step": 95 }, { "epoch": 0.08450704225352113, "grad_norm": 0.3094387352466583, "learning_rate": 6.31545006241513e-05, "loss": 10.5837, "step": 96 }, { "epoch": 0.08538732394366197, "grad_norm": 0.3375723659992218, "learning_rate": 6.22540694238138e-05, "loss": 10.5428, "step": 97 }, { "epoch": 0.08626760563380281, "grad_norm": 0.3610488772392273, "learning_rate": 6.135165503332725e-05, "loss": 10.5195, "step": 98 }, { "epoch": 0.08714788732394366, "grad_norm": 0.3436351418495178, "learning_rate": 6.0447504163775465e-05, "loss": 10.5591, "step": 99 }, { "epoch": 0.0880281690140845, "grad_norm": 0.46384698152542114, "learning_rate": 5.954186400097829e-05, "loss": 10.5287, "step": 100 }, { "epoch": 0.0880281690140845, "eval_loss": 10.527392387390137, "eval_runtime": 28.8434, "eval_samples_per_second": 66.324, "eval_steps_per_second": 16.607, "step": 100 }, { "epoch": 0.08890845070422536, "grad_norm": 0.5941808819770813, "learning_rate": 5.8634982137913465e-05, "loss": 10.5435, "step": 101 }, { "epoch": 0.0897887323943662, "grad_norm": 0.5891147255897522, "learning_rate": 5.772710650702723e-05, "loss": 10.5219, "step": 102 }, { "epoch": 0.09066901408450705, "grad_norm": 0.6356647610664368, "learning_rate": 5.681848531245195e-05, "loss": 10.5299, "step": 103 }, { "epoch": 0.09154929577464789, "grad_norm": 0.5486959218978882, "learning_rate": 5.590936696214972e-05, "loss": 10.5153, "step": 104 }, { "epoch": 0.09242957746478873, "grad_norm": 0.5351731181144714, "learning_rate": 5.5e-05, "loss": 10.5248, "step": 105 }, { "epoch": 0.09330985915492958, "grad_norm": 0.5519973635673523, "learning_rate": 5.409063303785029e-05, "loss": 10.5358, "step": 106 }, { "epoch": 0.09419014084507042, "grad_norm": 0.5336778163909912, "learning_rate": 5.318151468754805e-05, "loss": 10.5088, "step": 107 }, { "epoch": 0.09507042253521127, "grad_norm": 0.4727165997028351, "learning_rate": 5.227289349297277e-05, "loss": 10.5292, "step": 108 }, { "epoch": 0.09595070422535211, "grad_norm": 0.5302690267562866, "learning_rate": 5.136501786208654e-05, "loss": 10.5202, "step": 109 }, { "epoch": 0.09683098591549295, "grad_norm": 0.5168509483337402, "learning_rate": 5.045813599902173e-05, "loss": 10.5157, "step": 110 }, { "epoch": 0.0977112676056338, "grad_norm": 0.4725133776664734, "learning_rate": 4.955249583622455e-05, "loss": 10.4946, "step": 111 }, { "epoch": 0.09859154929577464, "grad_norm": 0.45433416962623596, "learning_rate": 4.8648344966672767e-05, "loss": 10.5199, "step": 112 }, { "epoch": 0.0994718309859155, "grad_norm": 0.42494186758995056, "learning_rate": 4.774593057618621e-05, "loss": 10.4857, "step": 113 }, { "epoch": 0.10035211267605634, "grad_norm": 0.35899239778518677, "learning_rate": 4.6845499375848686e-05, "loss": 10.5143, "step": 114 }, { "epoch": 0.10123239436619719, "grad_norm": 0.3459785282611847, "learning_rate": 4.5947297534559625e-05, "loss": 10.5069, "step": 115 }, { "epoch": 0.10211267605633803, "grad_norm": 0.3599611222743988, "learning_rate": 4.5051570611733976e-05, "loss": 10.5378, "step": 116 }, { "epoch": 0.10299295774647887, "grad_norm": 0.3270639479160309, "learning_rate": 4.415856349016859e-05, "loss": 10.5354, "step": 117 }, { "epoch": 0.10387323943661972, "grad_norm": 0.3274960219860077, "learning_rate": 4.326852030909393e-05, "loss": 10.4849, "step": 118 }, { "epoch": 0.10475352112676056, "grad_norm": 0.3062029182910919, "learning_rate": 4.238168439742867e-05, "loss": 10.5315, "step": 119 }, { "epoch": 0.1056338028169014, "grad_norm": 0.3626440763473511, "learning_rate": 4.149829820725605e-05, "loss": 10.5202, "step": 120 }, { "epoch": 0.10651408450704225, "grad_norm": 0.3809753656387329, "learning_rate": 4.0618603247539916e-05, "loss": 10.5317, "step": 121 }, { "epoch": 0.1073943661971831, "grad_norm": 0.3086736798286438, "learning_rate": 3.9742840018098564e-05, "loss": 10.5318, "step": 122 }, { "epoch": 0.10827464788732394, "grad_norm": 0.3391195237636566, "learning_rate": 3.887124794385445e-05, "loss": 10.5107, "step": 123 }, { "epoch": 0.10915492957746478, "grad_norm": 0.34638404846191406, "learning_rate": 3.80040653093779e-05, "loss": 10.5355, "step": 124 }, { "epoch": 0.11003521126760564, "grad_norm": 0.45098984241485596, "learning_rate": 3.714152919374241e-05, "loss": 10.55, "step": 125 }, { "epoch": 0.11091549295774648, "grad_norm": 0.39215147495269775, "learning_rate": 3.628387540570963e-05, "loss": 10.5317, "step": 126 }, { "epoch": 0.11179577464788733, "grad_norm": 0.3315400779247284, "learning_rate": 3.543133841926159e-05, "loss": 10.5396, "step": 127 }, { "epoch": 0.11267605633802817, "grad_norm": 0.3427654504776001, "learning_rate": 3.458415130949785e-05, "loss": 10.5269, "step": 128 }, { "epoch": 0.11355633802816902, "grad_norm": 0.33311086893081665, "learning_rate": 3.374254568891514e-05, "loss": 10.5672, "step": 129 }, { "epoch": 0.11443661971830986, "grad_norm": 0.31714025139808655, "learning_rate": 3.290675164408669e-05, "loss": 10.5272, "step": 130 }, { "epoch": 0.1153169014084507, "grad_norm": 0.37221792340278625, "learning_rate": 3.207699767275904e-05, "loss": 10.5114, "step": 131 }, { "epoch": 0.11619718309859155, "grad_norm": 0.298801451921463, "learning_rate": 3.12535106213829e-05, "loss": 10.5184, "step": 132 }, { "epoch": 0.11707746478873239, "grad_norm": 0.3310137093067169, "learning_rate": 3.0436515623095647e-05, "loss": 10.5289, "step": 133 }, { "epoch": 0.11795774647887323, "grad_norm": 0.3380754590034485, "learning_rate": 2.962623603617218e-05, "loss": 10.5012, "step": 134 }, { "epoch": 0.11883802816901408, "grad_norm": 0.32100123167037964, "learning_rate": 2.8822893382960955e-05, "loss": 10.5141, "step": 135 }, { "epoch": 0.11971830985915492, "grad_norm": 0.3254896402359009, "learning_rate": 2.802670728932207e-05, "loss": 10.5539, "step": 136 }, { "epoch": 0.12059859154929578, "grad_norm": 0.3702283799648285, "learning_rate": 2.723789542458361e-05, "loss": 10.512, "step": 137 }, { "epoch": 0.12147887323943662, "grad_norm": 0.32699835300445557, "learning_rate": 2.6456673442033183e-05, "loss": 10.5026, "step": 138 }, { "epoch": 0.12235915492957747, "grad_norm": 0.33314529061317444, "learning_rate": 2.5683254919960356e-05, "loss": 10.5311, "step": 139 }, { "epoch": 0.12323943661971831, "grad_norm": 0.3764292597770691, "learning_rate": 2.4917851303266533e-05, "loss": 10.4995, "step": 140 }, { "epoch": 0.12411971830985916, "grad_norm": 0.3277135491371155, "learning_rate": 2.4160671845658007e-05, "loss": 10.5025, "step": 141 }, { "epoch": 0.125, "grad_norm": 0.3547554314136505, "learning_rate": 2.3411923552438105e-05, "loss": 10.5012, "step": 142 }, { "epoch": 0.12588028169014084, "grad_norm": 0.3986217677593231, "learning_rate": 2.2671811123913983e-05, "loss": 10.451, "step": 143 }, { "epoch": 0.1267605633802817, "grad_norm": 0.37957799434661865, "learning_rate": 2.194053689943362e-05, "loss": 10.4798, "step": 144 }, { "epoch": 0.12764084507042253, "grad_norm": 0.3498072028160095, "learning_rate": 2.121830080206827e-05, "loss": 10.5078, "step": 145 }, { "epoch": 0.12852112676056338, "grad_norm": 0.36097705364227295, "learning_rate": 2.0505300283955464e-05, "loss": 10.4758, "step": 146 }, { "epoch": 0.12940140845070422, "grad_norm": 0.30697742104530334, "learning_rate": 1.9801730272317585e-05, "loss": 10.5026, "step": 147 }, { "epoch": 0.13028169014084506, "grad_norm": 0.42456531524658203, "learning_rate": 1.910778311617072e-05, "loss": 10.4967, "step": 148 }, { "epoch": 0.1311619718309859, "grad_norm": 0.47736701369285583, "learning_rate": 1.8423648533738342e-05, "loss": 10.5141, "step": 149 }, { "epoch": 0.13204225352112675, "grad_norm": 0.5607922673225403, "learning_rate": 1.7749513560584252e-05, "loss": 10.5202, "step": 150 }, { "epoch": 0.13204225352112675, "eval_loss": 10.495588302612305, "eval_runtime": 26.6017, "eval_samples_per_second": 71.913, "eval_steps_per_second": 18.006, "step": 150 }, { "epoch": 0.1329225352112676, "grad_norm": 0.655456006526947, "learning_rate": 1.7085562498478883e-05, "loss": 10.4826, "step": 151 }, { "epoch": 0.13380281690140844, "grad_norm": 0.4750436246395111, "learning_rate": 1.6431976865013128e-05, "loss": 10.5067, "step": 152 }, { "epoch": 0.13468309859154928, "grad_norm": 0.5088787078857422, "learning_rate": 1.5788935343973164e-05, "loss": 10.4799, "step": 153 }, { "epoch": 0.13556338028169015, "grad_norm": 0.4471653401851654, "learning_rate": 1.5156613736490108e-05, "loss": 10.5016, "step": 154 }, { "epoch": 0.136443661971831, "grad_norm": 0.4595582187175751, "learning_rate": 1.4535184912977763e-05, "loss": 10.4885, "step": 155 }, { "epoch": 0.13732394366197184, "grad_norm": 0.5350455641746521, "learning_rate": 1.3924818765871553e-05, "loss": 10.4808, "step": 156 }, { "epoch": 0.1382042253521127, "grad_norm": 0.5852507948875427, "learning_rate": 1.3325682163181601e-05, "loss": 10.4695, "step": 157 }, { "epoch": 0.13908450704225353, "grad_norm": 0.5205310583114624, "learning_rate": 1.2737938902872767e-05, "loss": 10.4588, "step": 158 }, { "epoch": 0.13996478873239437, "grad_norm": 0.5743169784545898, "learning_rate": 1.2161749668083823e-05, "loss": 10.4531, "step": 159 }, { "epoch": 0.14084507042253522, "grad_norm": 0.4958171844482422, "learning_rate": 1.159727198319836e-05, "loss": 10.4593, "step": 160 }, { "epoch": 0.14172535211267606, "grad_norm": 0.43265965580940247, "learning_rate": 1.1044660170779142e-05, "loss": 10.4656, "step": 161 }, { "epoch": 0.1426056338028169, "grad_norm": 0.5412198901176453, "learning_rate": 1.0504065309377897e-05, "loss": 10.4804, "step": 162 }, { "epoch": 0.14348591549295775, "grad_norm": 0.4394540786743164, "learning_rate": 9.97563519223192e-06, "loss": 10.4911, "step": 163 }, { "epoch": 0.1443661971830986, "grad_norm": 0.4882407784461975, "learning_rate": 9.459514286858898e-06, "loss": 10.5067, "step": 164 }, { "epoch": 0.14524647887323944, "grad_norm": 0.3944062888622284, "learning_rate": 8.95584369556093e-06, "loss": 10.4899, "step": 165 }, { "epoch": 0.14612676056338028, "grad_norm": 0.3780195415019989, "learning_rate": 8.464761116848546e-06, "loss": 10.4926, "step": 166 }, { "epoch": 0.14700704225352113, "grad_norm": 0.31584465503692627, "learning_rate": 7.986400807795349e-06, "loss": 10.4902, "step": 167 }, { "epoch": 0.14788732394366197, "grad_norm": 0.2965972125530243, "learning_rate": 7.520893547333436e-06, "loss": 10.4713, "step": 168 }, { "epoch": 0.1487676056338028, "grad_norm": 0.3396422266960144, "learning_rate": 7.068366600499744e-06, "loss": 10.5113, "step": 169 }, { "epoch": 0.14964788732394366, "grad_norm": 0.27547672390937805, "learning_rate": 6.6289436836431076e-06, "loss": 10.4863, "step": 170 }, { "epoch": 0.1505281690140845, "grad_norm": 0.21695564687252045, "learning_rate": 6.20274493060135e-06, "loss": 10.5045, "step": 171 }, { "epoch": 0.15140845070422534, "grad_norm": 0.2346537858247757, "learning_rate": 5.789886859858009e-06, "loss": 10.5259, "step": 172 }, { "epoch": 0.1522887323943662, "grad_norm": 0.23305346071720123, "learning_rate": 5.3904823426872605e-06, "loss": 10.5179, "step": 173 }, { "epoch": 0.15316901408450703, "grad_norm": 0.3447682559490204, "learning_rate": 5.004640572296062e-06, "loss": 10.5116, "step": 174 }, { "epoch": 0.15404929577464788, "grad_norm": 0.2727600038051605, "learning_rate": 4.632467033971838e-06, "loss": 10.5402, "step": 175 }, { "epoch": 0.15492957746478872, "grad_norm": 0.3000151813030243, "learning_rate": 4.274063476243839e-06, "loss": 10.5252, "step": 176 }, { "epoch": 0.15580985915492956, "grad_norm": 0.24301907420158386, "learning_rate": 3.929527883066117e-06, "loss": 10.5167, "step": 177 }, { "epoch": 0.15669014084507044, "grad_norm": 0.29026785492897034, "learning_rate": 3.5989544470296595e-06, "loss": 10.5109, "step": 178 }, { "epoch": 0.15757042253521128, "grad_norm": 0.30544570088386536, "learning_rate": 3.282433543611136e-06, "loss": 10.4965, "step": 179 }, { "epoch": 0.15845070422535212, "grad_norm": 0.3145160973072052, "learning_rate": 2.980051706465095e-06, "loss": 10.4923, "step": 180 }, { "epoch": 0.15933098591549297, "grad_norm": 0.29084667563438416, "learning_rate": 2.691891603766556e-06, "loss": 10.5384, "step": 181 }, { "epoch": 0.1602112676056338, "grad_norm": 0.2924867570400238, "learning_rate": 2.4180320156103298e-06, "loss": 10.5349, "step": 182 }, { "epoch": 0.16109154929577466, "grad_norm": 0.31382471323013306, "learning_rate": 2.158547812473352e-06, "loss": 10.5328, "step": 183 }, { "epoch": 0.1619718309859155, "grad_norm": 0.2887389361858368, "learning_rate": 1.9135099347458293e-06, "loss": 10.5209, "step": 184 }, { "epoch": 0.16285211267605634, "grad_norm": 0.35892584919929504, "learning_rate": 1.6829853733368294e-06, "loss": 10.5239, "step": 185 }, { "epoch": 0.1637323943661972, "grad_norm": 0.24830038845539093, "learning_rate": 1.4670371513596842e-06, "loss": 10.5013, "step": 186 }, { "epoch": 0.16461267605633803, "grad_norm": 0.33427461981773376, "learning_rate": 1.2657243069020402e-06, "loss": 10.4891, "step": 187 }, { "epoch": 0.16549295774647887, "grad_norm": 0.3475538492202759, "learning_rate": 1.0791018768854855e-06, "loss": 10.4896, "step": 188 }, { "epoch": 0.16637323943661972, "grad_norm": 0.3785645365715027, "learning_rate": 9.072208820189698e-07, "loss": 10.5247, "step": 189 }, { "epoch": 0.16725352112676056, "grad_norm": 0.3462846279144287, "learning_rate": 7.501283128502722e-07, "loss": 10.5091, "step": 190 }, { "epoch": 0.1681338028169014, "grad_norm": 0.3662095367908478, "learning_rate": 6.07867116919233e-07, "loss": 10.465, "step": 191 }, { "epoch": 0.16901408450704225, "grad_norm": 0.46672433614730835, "learning_rate": 4.804761870163643e-07, "loss": 10.4353, "step": 192 }, { "epoch": 0.1698943661971831, "grad_norm": 0.3511195778846741, "learning_rate": 3.6799035054990215e-07, "loss": 10.4936, "step": 193 }, { "epoch": 0.17077464788732394, "grad_norm": 0.3752671778202057, "learning_rate": 2.704403600243721e-07, "loss": 10.4624, "step": 194 }, { "epoch": 0.17165492957746478, "grad_norm": 0.3640790283679962, "learning_rate": 1.878528846331584e-07, "loss": 10.495, "step": 195 }, { "epoch": 0.17253521126760563, "grad_norm": 0.37574926018714905, "learning_rate": 1.202505029674006e-07, "loss": 10.4919, "step": 196 }, { "epoch": 0.17341549295774647, "grad_norm": 0.32661178708076477, "learning_rate": 6.765169684323947e-08, "loss": 10.4605, "step": 197 }, { "epoch": 0.1742957746478873, "grad_norm": 0.415720671415329, "learning_rate": 3.007084624906731e-08, "loss": 10.4822, "step": 198 }, { "epoch": 0.17517605633802816, "grad_norm": 0.4906235337257385, "learning_rate": 7.518225414204771e-09, "loss": 10.5168, "step": 199 }, { "epoch": 0.176056338028169, "grad_norm": 0.49893417954444885, "learning_rate": 0.0, "loss": 10.4814, "step": 200 }, { "epoch": 0.176056338028169, "eval_loss": 10.490081787109375, "eval_runtime": 30.7966, "eval_samples_per_second": 62.117, "eval_steps_per_second": 15.554, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 136620225331200.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }