|
{ |
|
"best_metric": 10.490081787109375, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.176056338028169, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008802816901408451, |
|
"grad_norm": 1.0416910648345947, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 10.8354, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0008802816901408451, |
|
"eval_loss": 10.828083038330078, |
|
"eval_runtime": 40.666, |
|
"eval_samples_per_second": 47.042, |
|
"eval_steps_per_second": 11.779, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0017605633802816902, |
|
"grad_norm": 1.0834181308746338, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 10.8404, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002640845070422535, |
|
"grad_norm": 0.968176007270813, |
|
"learning_rate": 3.3e-05, |
|
"loss": 10.8331, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0035211267605633804, |
|
"grad_norm": 0.827340304851532, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 10.8353, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0044014084507042256, |
|
"grad_norm": 0.8887184858322144, |
|
"learning_rate": 5.5e-05, |
|
"loss": 10.8322, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00528169014084507, |
|
"grad_norm": 0.8796689510345459, |
|
"learning_rate": 6.6e-05, |
|
"loss": 10.8238, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.006161971830985915, |
|
"grad_norm": 0.8759607672691345, |
|
"learning_rate": 7.7e-05, |
|
"loss": 10.8217, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.007042253521126761, |
|
"grad_norm": 0.9419655203819275, |
|
"learning_rate": 8.800000000000001e-05, |
|
"loss": 10.8036, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007922535211267605, |
|
"grad_norm": 0.8953964710235596, |
|
"learning_rate": 9.900000000000001e-05, |
|
"loss": 10.8025, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.008802816901408451, |
|
"grad_norm": 0.8648999333381653, |
|
"learning_rate": 0.00011, |
|
"loss": 10.7915, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.009683098591549295, |
|
"grad_norm": 0.9457488059997559, |
|
"learning_rate": 0.0001099924817745858, |
|
"loss": 10.7801, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.01056338028169014, |
|
"grad_norm": 0.7545454502105713, |
|
"learning_rate": 0.00010996992915375093, |
|
"loss": 10.7735, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.011443661971830986, |
|
"grad_norm": 0.7653600573539734, |
|
"learning_rate": 0.00010993234830315676, |
|
"loss": 10.7706, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01232394366197183, |
|
"grad_norm": 0.8165815472602844, |
|
"learning_rate": 0.0001098797494970326, |
|
"loss": 10.7605, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.013204225352112676, |
|
"grad_norm": 0.7317748069763184, |
|
"learning_rate": 0.00010981214711536684, |
|
"loss": 10.7501, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.014084507042253521, |
|
"grad_norm": 0.8150272369384766, |
|
"learning_rate": 0.00010972955963997563, |
|
"loss": 10.7434, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.014964788732394365, |
|
"grad_norm": 0.7701242566108704, |
|
"learning_rate": 0.00010963200964945011, |
|
"loss": 10.7278, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01584507042253521, |
|
"grad_norm": 0.7349888682365417, |
|
"learning_rate": 0.00010951952381298364, |
|
"loss": 10.7288, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.016725352112676055, |
|
"grad_norm": 0.6739762425422668, |
|
"learning_rate": 0.00010939213288308077, |
|
"loss": 10.7154, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.017605633802816902, |
|
"grad_norm": 0.6542978286743164, |
|
"learning_rate": 0.00010924987168714973, |
|
"loss": 10.7205, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.018485915492957746, |
|
"grad_norm": 0.6472326517105103, |
|
"learning_rate": 0.00010909277911798103, |
|
"loss": 10.7262, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01936619718309859, |
|
"grad_norm": 0.6118732690811157, |
|
"learning_rate": 0.00010892089812311451, |
|
"loss": 10.6898, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.020246478873239437, |
|
"grad_norm": 0.6045047640800476, |
|
"learning_rate": 0.00010873427569309797, |
|
"loss": 10.7162, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.02112676056338028, |
|
"grad_norm": 0.7520612478256226, |
|
"learning_rate": 0.00010853296284864032, |
|
"loss": 10.6945, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.022007042253521125, |
|
"grad_norm": 0.6803677678108215, |
|
"learning_rate": 0.00010831701462666318, |
|
"loss": 10.7144, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.022887323943661973, |
|
"grad_norm": 0.6361639499664307, |
|
"learning_rate": 0.00010808649006525419, |
|
"loss": 10.702, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.023767605633802816, |
|
"grad_norm": 0.5413468480110168, |
|
"learning_rate": 0.00010784145218752665, |
|
"loss": 10.7203, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.02464788732394366, |
|
"grad_norm": 0.5481899976730347, |
|
"learning_rate": 0.00010758196798438968, |
|
"loss": 10.6863, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.025528169014084508, |
|
"grad_norm": 0.5831198692321777, |
|
"learning_rate": 0.00010730810839623346, |
|
"loss": 10.6874, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02640845070422535, |
|
"grad_norm": 0.5215359926223755, |
|
"learning_rate": 0.0001070199482935349, |
|
"loss": 10.6734, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.027288732394366196, |
|
"grad_norm": 0.48211073875427246, |
|
"learning_rate": 0.00010671756645638888, |
|
"loss": 10.6803, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.028169014084507043, |
|
"grad_norm": 0.5410356521606445, |
|
"learning_rate": 0.00010640104555297034, |
|
"loss": 10.6592, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.029049295774647887, |
|
"grad_norm": 0.4434191882610321, |
|
"learning_rate": 0.00010607047211693389, |
|
"loss": 10.6771, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.02992957746478873, |
|
"grad_norm": 0.5003845691680908, |
|
"learning_rate": 0.00010572593652375616, |
|
"loss": 10.6756, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.030809859154929578, |
|
"grad_norm": 0.44623205065727234, |
|
"learning_rate": 0.00010536753296602816, |
|
"loss": 10.6654, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03169014084507042, |
|
"grad_norm": 0.48966875672340393, |
|
"learning_rate": 0.00010499535942770394, |
|
"loss": 10.6103, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.032570422535211266, |
|
"grad_norm": 0.42709842324256897, |
|
"learning_rate": 0.00010460951765731275, |
|
"loss": 10.6529, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03345070422535211, |
|
"grad_norm": 0.3827378451824188, |
|
"learning_rate": 0.000104210113140142, |
|
"loss": 10.6431, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03433098591549296, |
|
"grad_norm": 0.39901039004325867, |
|
"learning_rate": 0.00010379725506939865, |
|
"loss": 10.622, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.035211267605633804, |
|
"grad_norm": 0.43945929408073425, |
|
"learning_rate": 0.0001033710563163569, |
|
"loss": 10.6221, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03609154929577465, |
|
"grad_norm": 0.3692222237586975, |
|
"learning_rate": 0.00010293163339950024, |
|
"loss": 10.6182, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03697183098591549, |
|
"grad_norm": 0.37782034277915955, |
|
"learning_rate": 0.00010247910645266658, |
|
"loss": 10.6313, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.037852112676056336, |
|
"grad_norm": 0.4363635182380676, |
|
"learning_rate": 0.00010201359919220464, |
|
"loss": 10.606, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03873239436619718, |
|
"grad_norm": 0.3427577614784241, |
|
"learning_rate": 0.00010153523888315144, |
|
"loss": 10.6184, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03961267605633803, |
|
"grad_norm": 0.4304777681827545, |
|
"learning_rate": 0.00010104415630443907, |
|
"loss": 10.6178, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.040492957746478875, |
|
"grad_norm": 0.41288602352142334, |
|
"learning_rate": 0.0001005404857131411, |
|
"loss": 10.6375, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.04137323943661972, |
|
"grad_norm": 0.4251928925514221, |
|
"learning_rate": 0.00010002436480776809, |
|
"loss": 10.6102, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.04225352112676056, |
|
"grad_norm": 0.4017108976840973, |
|
"learning_rate": 9.949593469062211e-05, |
|
"loss": 10.6077, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.043133802816901406, |
|
"grad_norm": 0.4274183511734009, |
|
"learning_rate": 9.895533982922087e-05, |
|
"loss": 10.6156, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04401408450704225, |
|
"grad_norm": 0.5586544871330261, |
|
"learning_rate": 9.840272801680165e-05, |
|
"loss": 10.5981, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04401408450704225, |
|
"eval_loss": 10.611796379089355, |
|
"eval_runtime": 33.253, |
|
"eval_samples_per_second": 57.529, |
|
"eval_steps_per_second": 14.405, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0448943661971831, |
|
"grad_norm": 0.8303582668304443, |
|
"learning_rate": 9.783825033191619e-05, |
|
"loss": 10.6168, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.045774647887323945, |
|
"grad_norm": 0.7027772665023804, |
|
"learning_rate": 9.726206109712725e-05, |
|
"loss": 10.623, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.04665492957746479, |
|
"grad_norm": 0.7661027908325195, |
|
"learning_rate": 9.667431783681842e-05, |
|
"loss": 10.627, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.04753521126760563, |
|
"grad_norm": 0.7784258723258972, |
|
"learning_rate": 9.607518123412847e-05, |
|
"loss": 10.6067, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.04841549295774648, |
|
"grad_norm": 0.70957350730896, |
|
"learning_rate": 9.546481508702224e-05, |
|
"loss": 10.6065, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04929577464788732, |
|
"grad_norm": 0.7046729326248169, |
|
"learning_rate": 9.48433862635099e-05, |
|
"loss": 10.5984, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.05017605633802817, |
|
"grad_norm": 0.6503332853317261, |
|
"learning_rate": 9.421106465602684e-05, |
|
"loss": 10.5944, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.051056338028169015, |
|
"grad_norm": 0.6239966154098511, |
|
"learning_rate": 9.356802313498687e-05, |
|
"loss": 10.6094, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.05193661971830986, |
|
"grad_norm": 0.6146323680877686, |
|
"learning_rate": 9.291443750152112e-05, |
|
"loss": 10.579, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0528169014084507, |
|
"grad_norm": 0.5953949689865112, |
|
"learning_rate": 9.225048643941577e-05, |
|
"loss": 10.5865, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05369718309859155, |
|
"grad_norm": 0.5488158464431763, |
|
"learning_rate": 9.157635146626164e-05, |
|
"loss": 10.5657, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.05457746478873239, |
|
"grad_norm": 0.468292236328125, |
|
"learning_rate": 9.089221688382928e-05, |
|
"loss": 10.5856, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.05545774647887324, |
|
"grad_norm": 0.484653115272522, |
|
"learning_rate": 9.019826972768242e-05, |
|
"loss": 10.5658, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.056338028169014086, |
|
"grad_norm": 0.3967801332473755, |
|
"learning_rate": 8.949469971604454e-05, |
|
"loss": 10.5907, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.05721830985915493, |
|
"grad_norm": 0.41012856364250183, |
|
"learning_rate": 8.878169919793173e-05, |
|
"loss": 10.5643, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.058098591549295774, |
|
"grad_norm": 0.3663182556629181, |
|
"learning_rate": 8.805946310056638e-05, |
|
"loss": 10.5643, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.05897887323943662, |
|
"grad_norm": 0.40321776270866394, |
|
"learning_rate": 8.732818887608602e-05, |
|
"loss": 10.5693, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.05985915492957746, |
|
"grad_norm": 0.31598755717277527, |
|
"learning_rate": 8.65880764475619e-05, |
|
"loss": 10.5841, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.06073943661971831, |
|
"grad_norm": 0.2976154386997223, |
|
"learning_rate": 8.583932815434201e-05, |
|
"loss": 10.5584, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.061619718309859156, |
|
"grad_norm": 0.2813892662525177, |
|
"learning_rate": 8.50821486967335e-05, |
|
"loss": 10.558, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 0.2954387366771698, |
|
"learning_rate": 8.431674508003966e-05, |
|
"loss": 10.5793, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.06338028169014084, |
|
"grad_norm": 0.32197305560112, |
|
"learning_rate": 8.354332655796683e-05, |
|
"loss": 10.5817, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.06426056338028169, |
|
"grad_norm": 0.43186065554618835, |
|
"learning_rate": 8.276210457541642e-05, |
|
"loss": 10.5966, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.06514084507042253, |
|
"grad_norm": 0.34619054198265076, |
|
"learning_rate": 8.197329271067796e-05, |
|
"loss": 10.5901, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.06602112676056338, |
|
"grad_norm": 0.3864317238330841, |
|
"learning_rate": 8.117710661703905e-05, |
|
"loss": 10.6212, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06690140845070422, |
|
"grad_norm": 0.4301004111766815, |
|
"learning_rate": 8.037376396382784e-05, |
|
"loss": 10.606, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.06778169014084508, |
|
"grad_norm": 0.3685256838798523, |
|
"learning_rate": 7.956348437690437e-05, |
|
"loss": 10.58, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.06866197183098592, |
|
"grad_norm": 0.4085758328437805, |
|
"learning_rate": 7.87464893786171e-05, |
|
"loss": 10.5759, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.06954225352112677, |
|
"grad_norm": 0.38861989974975586, |
|
"learning_rate": 7.792300232724097e-05, |
|
"loss": 10.5768, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.07042253521126761, |
|
"grad_norm": 0.4298862814903259, |
|
"learning_rate": 7.709324835591332e-05, |
|
"loss": 10.5884, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07130281690140845, |
|
"grad_norm": 0.3438059091567993, |
|
"learning_rate": 7.625745431108487e-05, |
|
"loss": 10.5703, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0721830985915493, |
|
"grad_norm": 0.5023322105407715, |
|
"learning_rate": 7.541584869050213e-05, |
|
"loss": 10.5616, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.07306338028169014, |
|
"grad_norm": 0.34021085500717163, |
|
"learning_rate": 7.456866158073842e-05, |
|
"loss": 10.5553, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.07394366197183098, |
|
"grad_norm": 0.4679986536502838, |
|
"learning_rate": 7.371612459429037e-05, |
|
"loss": 10.5767, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.07482394366197183, |
|
"grad_norm": 0.4663142263889313, |
|
"learning_rate": 7.28584708062576e-05, |
|
"loss": 10.5478, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.07570422535211267, |
|
"grad_norm": 0.3878236413002014, |
|
"learning_rate": 7.19959346906221e-05, |
|
"loss": 10.5509, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.07658450704225352, |
|
"grad_norm": 0.3434685170650482, |
|
"learning_rate": 7.112875205614558e-05, |
|
"loss": 10.5629, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.07746478873239436, |
|
"grad_norm": 0.40229496359825134, |
|
"learning_rate": 7.025715998190145e-05, |
|
"loss": 10.5896, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.07834507042253522, |
|
"grad_norm": 0.4033350944519043, |
|
"learning_rate": 6.938139675246009e-05, |
|
"loss": 10.5509, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.07922535211267606, |
|
"grad_norm": 0.36661842465400696, |
|
"learning_rate": 6.850170179274395e-05, |
|
"loss": 10.558, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0801056338028169, |
|
"grad_norm": 0.3692269027233124, |
|
"learning_rate": 6.761831560257134e-05, |
|
"loss": 10.5177, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.08098591549295775, |
|
"grad_norm": 0.37002119421958923, |
|
"learning_rate": 6.673147969090608e-05, |
|
"loss": 10.539, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0818661971830986, |
|
"grad_norm": 0.3126441240310669, |
|
"learning_rate": 6.584143650983141e-05, |
|
"loss": 10.5218, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.08274647887323944, |
|
"grad_norm": 0.3806772828102112, |
|
"learning_rate": 6.494842938826605e-05, |
|
"loss": 10.5072, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.08362676056338028, |
|
"grad_norm": 0.34609004855155945, |
|
"learning_rate": 6.405270246544037e-05, |
|
"loss": 10.5073, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.08450704225352113, |
|
"grad_norm": 0.3094387352466583, |
|
"learning_rate": 6.31545006241513e-05, |
|
"loss": 10.5837, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.08538732394366197, |
|
"grad_norm": 0.3375723659992218, |
|
"learning_rate": 6.22540694238138e-05, |
|
"loss": 10.5428, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.08626760563380281, |
|
"grad_norm": 0.3610488772392273, |
|
"learning_rate": 6.135165503332725e-05, |
|
"loss": 10.5195, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.08714788732394366, |
|
"grad_norm": 0.3436351418495178, |
|
"learning_rate": 6.0447504163775465e-05, |
|
"loss": 10.5591, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0880281690140845, |
|
"grad_norm": 0.46384698152542114, |
|
"learning_rate": 5.954186400097829e-05, |
|
"loss": 10.5287, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0880281690140845, |
|
"eval_loss": 10.527392387390137, |
|
"eval_runtime": 28.8434, |
|
"eval_samples_per_second": 66.324, |
|
"eval_steps_per_second": 16.607, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08890845070422536, |
|
"grad_norm": 0.5941808819770813, |
|
"learning_rate": 5.8634982137913465e-05, |
|
"loss": 10.5435, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.0897887323943662, |
|
"grad_norm": 0.5891147255897522, |
|
"learning_rate": 5.772710650702723e-05, |
|
"loss": 10.5219, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.09066901408450705, |
|
"grad_norm": 0.6356647610664368, |
|
"learning_rate": 5.681848531245195e-05, |
|
"loss": 10.5299, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.09154929577464789, |
|
"grad_norm": 0.5486959218978882, |
|
"learning_rate": 5.590936696214972e-05, |
|
"loss": 10.5153, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.09242957746478873, |
|
"grad_norm": 0.5351731181144714, |
|
"learning_rate": 5.5e-05, |
|
"loss": 10.5248, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.09330985915492958, |
|
"grad_norm": 0.5519973635673523, |
|
"learning_rate": 5.409063303785029e-05, |
|
"loss": 10.5358, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.09419014084507042, |
|
"grad_norm": 0.5336778163909912, |
|
"learning_rate": 5.318151468754805e-05, |
|
"loss": 10.5088, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.09507042253521127, |
|
"grad_norm": 0.4727165997028351, |
|
"learning_rate": 5.227289349297277e-05, |
|
"loss": 10.5292, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.09595070422535211, |
|
"grad_norm": 0.5302690267562866, |
|
"learning_rate": 5.136501786208654e-05, |
|
"loss": 10.5202, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.09683098591549295, |
|
"grad_norm": 0.5168509483337402, |
|
"learning_rate": 5.045813599902173e-05, |
|
"loss": 10.5157, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0977112676056338, |
|
"grad_norm": 0.4725133776664734, |
|
"learning_rate": 4.955249583622455e-05, |
|
"loss": 10.4946, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.09859154929577464, |
|
"grad_norm": 0.45433416962623596, |
|
"learning_rate": 4.8648344966672767e-05, |
|
"loss": 10.5199, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.0994718309859155, |
|
"grad_norm": 0.42494186758995056, |
|
"learning_rate": 4.774593057618621e-05, |
|
"loss": 10.4857, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.10035211267605634, |
|
"grad_norm": 0.35899239778518677, |
|
"learning_rate": 4.6845499375848686e-05, |
|
"loss": 10.5143, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.10123239436619719, |
|
"grad_norm": 0.3459785282611847, |
|
"learning_rate": 4.5947297534559625e-05, |
|
"loss": 10.5069, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.10211267605633803, |
|
"grad_norm": 0.3599611222743988, |
|
"learning_rate": 4.5051570611733976e-05, |
|
"loss": 10.5378, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.10299295774647887, |
|
"grad_norm": 0.3270639479160309, |
|
"learning_rate": 4.415856349016859e-05, |
|
"loss": 10.5354, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.10387323943661972, |
|
"grad_norm": 0.3274960219860077, |
|
"learning_rate": 4.326852030909393e-05, |
|
"loss": 10.4849, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.10475352112676056, |
|
"grad_norm": 0.3062029182910919, |
|
"learning_rate": 4.238168439742867e-05, |
|
"loss": 10.5315, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.1056338028169014, |
|
"grad_norm": 0.3626440763473511, |
|
"learning_rate": 4.149829820725605e-05, |
|
"loss": 10.5202, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.10651408450704225, |
|
"grad_norm": 0.3809753656387329, |
|
"learning_rate": 4.0618603247539916e-05, |
|
"loss": 10.5317, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.1073943661971831, |
|
"grad_norm": 0.3086736798286438, |
|
"learning_rate": 3.9742840018098564e-05, |
|
"loss": 10.5318, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.10827464788732394, |
|
"grad_norm": 0.3391195237636566, |
|
"learning_rate": 3.887124794385445e-05, |
|
"loss": 10.5107, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.10915492957746478, |
|
"grad_norm": 0.34638404846191406, |
|
"learning_rate": 3.80040653093779e-05, |
|
"loss": 10.5355, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.11003521126760564, |
|
"grad_norm": 0.45098984241485596, |
|
"learning_rate": 3.714152919374241e-05, |
|
"loss": 10.55, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.11091549295774648, |
|
"grad_norm": 0.39215147495269775, |
|
"learning_rate": 3.628387540570963e-05, |
|
"loss": 10.5317, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.11179577464788733, |
|
"grad_norm": 0.3315400779247284, |
|
"learning_rate": 3.543133841926159e-05, |
|
"loss": 10.5396, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.11267605633802817, |
|
"grad_norm": 0.3427654504776001, |
|
"learning_rate": 3.458415130949785e-05, |
|
"loss": 10.5269, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.11355633802816902, |
|
"grad_norm": 0.33311086893081665, |
|
"learning_rate": 3.374254568891514e-05, |
|
"loss": 10.5672, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.11443661971830986, |
|
"grad_norm": 0.31714025139808655, |
|
"learning_rate": 3.290675164408669e-05, |
|
"loss": 10.5272, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1153169014084507, |
|
"grad_norm": 0.37221792340278625, |
|
"learning_rate": 3.207699767275904e-05, |
|
"loss": 10.5114, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.11619718309859155, |
|
"grad_norm": 0.298801451921463, |
|
"learning_rate": 3.12535106213829e-05, |
|
"loss": 10.5184, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.11707746478873239, |
|
"grad_norm": 0.3310137093067169, |
|
"learning_rate": 3.0436515623095647e-05, |
|
"loss": 10.5289, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.11795774647887323, |
|
"grad_norm": 0.3380754590034485, |
|
"learning_rate": 2.962623603617218e-05, |
|
"loss": 10.5012, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.11883802816901408, |
|
"grad_norm": 0.32100123167037964, |
|
"learning_rate": 2.8822893382960955e-05, |
|
"loss": 10.5141, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.11971830985915492, |
|
"grad_norm": 0.3254896402359009, |
|
"learning_rate": 2.802670728932207e-05, |
|
"loss": 10.5539, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.12059859154929578, |
|
"grad_norm": 0.3702283799648285, |
|
"learning_rate": 2.723789542458361e-05, |
|
"loss": 10.512, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.12147887323943662, |
|
"grad_norm": 0.32699835300445557, |
|
"learning_rate": 2.6456673442033183e-05, |
|
"loss": 10.5026, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.12235915492957747, |
|
"grad_norm": 0.33314529061317444, |
|
"learning_rate": 2.5683254919960356e-05, |
|
"loss": 10.5311, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.12323943661971831, |
|
"grad_norm": 0.3764292597770691, |
|
"learning_rate": 2.4917851303266533e-05, |
|
"loss": 10.4995, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12411971830985916, |
|
"grad_norm": 0.3277135491371155, |
|
"learning_rate": 2.4160671845658007e-05, |
|
"loss": 10.5025, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 0.3547554314136505, |
|
"learning_rate": 2.3411923552438105e-05, |
|
"loss": 10.5012, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.12588028169014084, |
|
"grad_norm": 0.3986217677593231, |
|
"learning_rate": 2.2671811123913983e-05, |
|
"loss": 10.451, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.1267605633802817, |
|
"grad_norm": 0.37957799434661865, |
|
"learning_rate": 2.194053689943362e-05, |
|
"loss": 10.4798, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.12764084507042253, |
|
"grad_norm": 0.3498072028160095, |
|
"learning_rate": 2.121830080206827e-05, |
|
"loss": 10.5078, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.12852112676056338, |
|
"grad_norm": 0.36097705364227295, |
|
"learning_rate": 2.0505300283955464e-05, |
|
"loss": 10.4758, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.12940140845070422, |
|
"grad_norm": 0.30697742104530334, |
|
"learning_rate": 1.9801730272317585e-05, |
|
"loss": 10.5026, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.13028169014084506, |
|
"grad_norm": 0.42456531524658203, |
|
"learning_rate": 1.910778311617072e-05, |
|
"loss": 10.4967, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.1311619718309859, |
|
"grad_norm": 0.47736701369285583, |
|
"learning_rate": 1.8423648533738342e-05, |
|
"loss": 10.5141, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.13204225352112675, |
|
"grad_norm": 0.5607922673225403, |
|
"learning_rate": 1.7749513560584252e-05, |
|
"loss": 10.5202, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13204225352112675, |
|
"eval_loss": 10.495588302612305, |
|
"eval_runtime": 26.6017, |
|
"eval_samples_per_second": 71.913, |
|
"eval_steps_per_second": 18.006, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1329225352112676, |
|
"grad_norm": 0.655456006526947, |
|
"learning_rate": 1.7085562498478883e-05, |
|
"loss": 10.4826, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.13380281690140844, |
|
"grad_norm": 0.4750436246395111, |
|
"learning_rate": 1.6431976865013128e-05, |
|
"loss": 10.5067, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.13468309859154928, |
|
"grad_norm": 0.5088787078857422, |
|
"learning_rate": 1.5788935343973164e-05, |
|
"loss": 10.4799, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.13556338028169015, |
|
"grad_norm": 0.4471653401851654, |
|
"learning_rate": 1.5156613736490108e-05, |
|
"loss": 10.5016, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.136443661971831, |
|
"grad_norm": 0.4595582187175751, |
|
"learning_rate": 1.4535184912977763e-05, |
|
"loss": 10.4885, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.13732394366197184, |
|
"grad_norm": 0.5350455641746521, |
|
"learning_rate": 1.3924818765871553e-05, |
|
"loss": 10.4808, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.1382042253521127, |
|
"grad_norm": 0.5852507948875427, |
|
"learning_rate": 1.3325682163181601e-05, |
|
"loss": 10.4695, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.13908450704225353, |
|
"grad_norm": 0.5205310583114624, |
|
"learning_rate": 1.2737938902872767e-05, |
|
"loss": 10.4588, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.13996478873239437, |
|
"grad_norm": 0.5743169784545898, |
|
"learning_rate": 1.2161749668083823e-05, |
|
"loss": 10.4531, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.14084507042253522, |
|
"grad_norm": 0.4958171844482422, |
|
"learning_rate": 1.159727198319836e-05, |
|
"loss": 10.4593, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14172535211267606, |
|
"grad_norm": 0.43265965580940247, |
|
"learning_rate": 1.1044660170779142e-05, |
|
"loss": 10.4656, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.1426056338028169, |
|
"grad_norm": 0.5412198901176453, |
|
"learning_rate": 1.0504065309377897e-05, |
|
"loss": 10.4804, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.14348591549295775, |
|
"grad_norm": 0.4394540786743164, |
|
"learning_rate": 9.97563519223192e-06, |
|
"loss": 10.4911, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.1443661971830986, |
|
"grad_norm": 0.4882407784461975, |
|
"learning_rate": 9.459514286858898e-06, |
|
"loss": 10.5067, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.14524647887323944, |
|
"grad_norm": 0.3944062888622284, |
|
"learning_rate": 8.95584369556093e-06, |
|
"loss": 10.4899, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.14612676056338028, |
|
"grad_norm": 0.3780195415019989, |
|
"learning_rate": 8.464761116848546e-06, |
|
"loss": 10.4926, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.14700704225352113, |
|
"grad_norm": 0.31584465503692627, |
|
"learning_rate": 7.986400807795349e-06, |
|
"loss": 10.4902, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.14788732394366197, |
|
"grad_norm": 0.2965972125530243, |
|
"learning_rate": 7.520893547333436e-06, |
|
"loss": 10.4713, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.1487676056338028, |
|
"grad_norm": 0.3396422266960144, |
|
"learning_rate": 7.068366600499744e-06, |
|
"loss": 10.5113, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.14964788732394366, |
|
"grad_norm": 0.27547672390937805, |
|
"learning_rate": 6.6289436836431076e-06, |
|
"loss": 10.4863, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1505281690140845, |
|
"grad_norm": 0.21695564687252045, |
|
"learning_rate": 6.20274493060135e-06, |
|
"loss": 10.5045, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.15140845070422534, |
|
"grad_norm": 0.2346537858247757, |
|
"learning_rate": 5.789886859858009e-06, |
|
"loss": 10.5259, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.1522887323943662, |
|
"grad_norm": 0.23305346071720123, |
|
"learning_rate": 5.3904823426872605e-06, |
|
"loss": 10.5179, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.15316901408450703, |
|
"grad_norm": 0.3447682559490204, |
|
"learning_rate": 5.004640572296062e-06, |
|
"loss": 10.5116, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.15404929577464788, |
|
"grad_norm": 0.2727600038051605, |
|
"learning_rate": 4.632467033971838e-06, |
|
"loss": 10.5402, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.15492957746478872, |
|
"grad_norm": 0.3000151813030243, |
|
"learning_rate": 4.274063476243839e-06, |
|
"loss": 10.5252, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.15580985915492956, |
|
"grad_norm": 0.24301907420158386, |
|
"learning_rate": 3.929527883066117e-06, |
|
"loss": 10.5167, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.15669014084507044, |
|
"grad_norm": 0.29026785492897034, |
|
"learning_rate": 3.5989544470296595e-06, |
|
"loss": 10.5109, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.15757042253521128, |
|
"grad_norm": 0.30544570088386536, |
|
"learning_rate": 3.282433543611136e-06, |
|
"loss": 10.4965, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.15845070422535212, |
|
"grad_norm": 0.3145160973072052, |
|
"learning_rate": 2.980051706465095e-06, |
|
"loss": 10.4923, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.15933098591549297, |
|
"grad_norm": 0.29084667563438416, |
|
"learning_rate": 2.691891603766556e-06, |
|
"loss": 10.5384, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.1602112676056338, |
|
"grad_norm": 0.2924867570400238, |
|
"learning_rate": 2.4180320156103298e-06, |
|
"loss": 10.5349, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.16109154929577466, |
|
"grad_norm": 0.31382471323013306, |
|
"learning_rate": 2.158547812473352e-06, |
|
"loss": 10.5328, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.1619718309859155, |
|
"grad_norm": 0.2887389361858368, |
|
"learning_rate": 1.9135099347458293e-06, |
|
"loss": 10.5209, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.16285211267605634, |
|
"grad_norm": 0.35892584919929504, |
|
"learning_rate": 1.6829853733368294e-06, |
|
"loss": 10.5239, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1637323943661972, |
|
"grad_norm": 0.24830038845539093, |
|
"learning_rate": 1.4670371513596842e-06, |
|
"loss": 10.5013, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.16461267605633803, |
|
"grad_norm": 0.33427461981773376, |
|
"learning_rate": 1.2657243069020402e-06, |
|
"loss": 10.4891, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.16549295774647887, |
|
"grad_norm": 0.3475538492202759, |
|
"learning_rate": 1.0791018768854855e-06, |
|
"loss": 10.4896, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.16637323943661972, |
|
"grad_norm": 0.3785645365715027, |
|
"learning_rate": 9.072208820189698e-07, |
|
"loss": 10.5247, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.16725352112676056, |
|
"grad_norm": 0.3462846279144287, |
|
"learning_rate": 7.501283128502722e-07, |
|
"loss": 10.5091, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1681338028169014, |
|
"grad_norm": 0.3662095367908478, |
|
"learning_rate": 6.07867116919233e-07, |
|
"loss": 10.465, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.16901408450704225, |
|
"grad_norm": 0.46672433614730835, |
|
"learning_rate": 4.804761870163643e-07, |
|
"loss": 10.4353, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.1698943661971831, |
|
"grad_norm": 0.3511195778846741, |
|
"learning_rate": 3.6799035054990215e-07, |
|
"loss": 10.4936, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.17077464788732394, |
|
"grad_norm": 0.3752671778202057, |
|
"learning_rate": 2.704403600243721e-07, |
|
"loss": 10.4624, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.17165492957746478, |
|
"grad_norm": 0.3640790283679962, |
|
"learning_rate": 1.878528846331584e-07, |
|
"loss": 10.495, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.17253521126760563, |
|
"grad_norm": 0.37574926018714905, |
|
"learning_rate": 1.202505029674006e-07, |
|
"loss": 10.4919, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.17341549295774647, |
|
"grad_norm": 0.32661178708076477, |
|
"learning_rate": 6.765169684323947e-08, |
|
"loss": 10.4605, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.1742957746478873, |
|
"grad_norm": 0.415720671415329, |
|
"learning_rate": 3.007084624906731e-08, |
|
"loss": 10.4822, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.17517605633802816, |
|
"grad_norm": 0.4906235337257385, |
|
"learning_rate": 7.518225414204771e-09, |
|
"loss": 10.5168, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.176056338028169, |
|
"grad_norm": 0.49893417954444885, |
|
"learning_rate": 0.0, |
|
"loss": 10.4814, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.176056338028169, |
|
"eval_loss": 10.490081787109375, |
|
"eval_runtime": 30.7966, |
|
"eval_samples_per_second": 62.117, |
|
"eval_steps_per_second": 15.554, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 136620225331200.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|