{ "best_metric": 1.1628694534301758, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.3104384943733023, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015521924718665113, "grad_norm": 0.18503686785697937, "learning_rate": 5e-06, "loss": 0.6515, "step": 1 }, { "epoch": 0.0015521924718665113, "eval_loss": 1.4058570861816406, "eval_runtime": 83.3584, "eval_samples_per_second": 13.016, "eval_steps_per_second": 6.514, "step": 1 }, { "epoch": 0.0031043849437330227, "grad_norm": 0.21703191101551056, "learning_rate": 1e-05, "loss": 0.8251, "step": 2 }, { "epoch": 0.004656577415599534, "grad_norm": 0.2690574526786804, "learning_rate": 1.5e-05, "loss": 0.9786, "step": 3 }, { "epoch": 0.006208769887466045, "grad_norm": 0.26256296038627625, "learning_rate": 2e-05, "loss": 0.9597, "step": 4 }, { "epoch": 0.007760962359332557, "grad_norm": 0.28159835934638977, "learning_rate": 2.5e-05, "loss": 0.9861, "step": 5 }, { "epoch": 0.009313154831199068, "grad_norm": 0.2422163337469101, "learning_rate": 3e-05, "loss": 0.9501, "step": 6 }, { "epoch": 0.01086534730306558, "grad_norm": 0.2566586434841156, "learning_rate": 3.5e-05, "loss": 1.1036, "step": 7 }, { "epoch": 0.01241753977493209, "grad_norm": 0.25555717945098877, "learning_rate": 4e-05, "loss": 1.0891, "step": 8 }, { "epoch": 0.013969732246798603, "grad_norm": 0.25616592168807983, "learning_rate": 4.5e-05, "loss": 1.0693, "step": 9 }, { "epoch": 0.015521924718665115, "grad_norm": 0.2807532846927643, "learning_rate": 5e-05, "loss": 1.1407, "step": 10 }, { "epoch": 0.017074117190531625, "grad_norm": 0.29218876361846924, "learning_rate": 5.500000000000001e-05, "loss": 1.1332, "step": 11 }, { "epoch": 0.018626309662398137, "grad_norm": 0.3137388825416565, "learning_rate": 6e-05, "loss": 1.1886, "step": 12 }, { "epoch": 0.02017850213426465, "grad_norm": 0.3073325455188751, "learning_rate": 6.500000000000001e-05, "loss": 1.2433, "step": 13 }, { "epoch": 0.02173069460613116, "grad_norm": 0.3186430037021637, "learning_rate": 7e-05, "loss": 1.1397, "step": 14 }, { "epoch": 0.023282887077997673, "grad_norm": 0.32625216245651245, "learning_rate": 7.500000000000001e-05, "loss": 1.1369, "step": 15 }, { "epoch": 0.02483507954986418, "grad_norm": 0.37930625677108765, "learning_rate": 8e-05, "loss": 1.2547, "step": 16 }, { "epoch": 0.026387272021730693, "grad_norm": 0.3755185902118683, "learning_rate": 8.5e-05, "loss": 1.2252, "step": 17 }, { "epoch": 0.027939464493597205, "grad_norm": 0.38259395956993103, "learning_rate": 9e-05, "loss": 1.2257, "step": 18 }, { "epoch": 0.029491656965463717, "grad_norm": 0.3669893741607666, "learning_rate": 9.5e-05, "loss": 1.1849, "step": 19 }, { "epoch": 0.03104384943733023, "grad_norm": 0.34851202368736267, "learning_rate": 0.0001, "loss": 1.1709, "step": 20 }, { "epoch": 0.03259604190919674, "grad_norm": 0.36455070972442627, "learning_rate": 9.999238475781957e-05, "loss": 1.2213, "step": 21 }, { "epoch": 0.03414823438106325, "grad_norm": 0.38422465324401855, "learning_rate": 9.99695413509548e-05, "loss": 1.2971, "step": 22 }, { "epoch": 0.03570042685292976, "grad_norm": 0.36575639247894287, "learning_rate": 9.99314767377287e-05, "loss": 1.2899, "step": 23 }, { "epoch": 0.037252619324796274, "grad_norm": 0.3730488419532776, "learning_rate": 9.987820251299122e-05, "loss": 1.3122, "step": 24 }, { "epoch": 0.038804811796662786, "grad_norm": 0.3623234033584595, "learning_rate": 9.980973490458728e-05, "loss": 1.2621, "step": 25 }, { "epoch": 0.0403570042685293, "grad_norm": 0.40793418884277344, "learning_rate": 9.972609476841367e-05, "loss": 1.2989, "step": 26 }, { "epoch": 0.04190919674039581, "grad_norm": 0.39662837982177734, "learning_rate": 9.962730758206611e-05, "loss": 1.3058, "step": 27 }, { "epoch": 0.04346138921226232, "grad_norm": 0.42494845390319824, "learning_rate": 9.951340343707852e-05, "loss": 1.3119, "step": 28 }, { "epoch": 0.045013581684128834, "grad_norm": 0.429860383272171, "learning_rate": 9.938441702975689e-05, "loss": 1.2452, "step": 29 }, { "epoch": 0.046565774155995346, "grad_norm": 0.4707893133163452, "learning_rate": 9.924038765061042e-05, "loss": 1.1891, "step": 30 }, { "epoch": 0.04811796662786186, "grad_norm": 0.5009111762046814, "learning_rate": 9.908135917238321e-05, "loss": 1.3307, "step": 31 }, { "epoch": 0.04967015909972836, "grad_norm": 0.49140918254852295, "learning_rate": 9.890738003669029e-05, "loss": 1.2803, "step": 32 }, { "epoch": 0.051222351571594875, "grad_norm": 0.49774688482284546, "learning_rate": 9.871850323926177e-05, "loss": 1.3875, "step": 33 }, { "epoch": 0.05277454404346139, "grad_norm": 0.5047544836997986, "learning_rate": 9.851478631379982e-05, "loss": 1.2994, "step": 34 }, { "epoch": 0.0543267365153279, "grad_norm": 0.5006894469261169, "learning_rate": 9.829629131445342e-05, "loss": 1.332, "step": 35 }, { "epoch": 0.05587892898719441, "grad_norm": 0.515229344367981, "learning_rate": 9.806308479691595e-05, "loss": 1.2985, "step": 36 }, { "epoch": 0.05743112145906092, "grad_norm": 0.5397118926048279, "learning_rate": 9.781523779815179e-05, "loss": 1.3506, "step": 37 }, { "epoch": 0.058983313930927435, "grad_norm": 0.47588130831718445, "learning_rate": 9.755282581475769e-05, "loss": 1.3326, "step": 38 }, { "epoch": 0.06053550640279395, "grad_norm": 0.5251979827880859, "learning_rate": 9.727592877996585e-05, "loss": 1.3764, "step": 39 }, { "epoch": 0.06208769887466046, "grad_norm": 0.5344756841659546, "learning_rate": 9.698463103929542e-05, "loss": 1.2655, "step": 40 }, { "epoch": 0.06363989134652696, "grad_norm": 0.5431151390075684, "learning_rate": 9.667902132486009e-05, "loss": 1.3312, "step": 41 }, { "epoch": 0.06519208381839348, "grad_norm": 0.5688284039497375, "learning_rate": 9.635919272833938e-05, "loss": 1.2599, "step": 42 }, { "epoch": 0.06674427629025999, "grad_norm": 0.6097058653831482, "learning_rate": 9.602524267262203e-05, "loss": 1.3808, "step": 43 }, { "epoch": 0.0682964687621265, "grad_norm": 0.5607826709747314, "learning_rate": 9.567727288213005e-05, "loss": 1.3462, "step": 44 }, { "epoch": 0.06984866123399301, "grad_norm": 0.5400423407554626, "learning_rate": 9.53153893518325e-05, "loss": 1.2345, "step": 45 }, { "epoch": 0.07140085370585952, "grad_norm": 0.5579301118850708, "learning_rate": 9.493970231495835e-05, "loss": 1.3386, "step": 46 }, { "epoch": 0.07295304617772604, "grad_norm": 0.600250780582428, "learning_rate": 9.45503262094184e-05, "loss": 1.473, "step": 47 }, { "epoch": 0.07450523864959255, "grad_norm": 0.609580934047699, "learning_rate": 9.414737964294636e-05, "loss": 1.4143, "step": 48 }, { "epoch": 0.07605743112145906, "grad_norm": 0.7040746808052063, "learning_rate": 9.373098535696979e-05, "loss": 1.465, "step": 49 }, { "epoch": 0.07760962359332557, "grad_norm": 1.0788086652755737, "learning_rate": 9.330127018922194e-05, "loss": 1.5379, "step": 50 }, { "epoch": 0.07760962359332557, "eval_loss": 1.2365593910217285, "eval_runtime": 84.2918, "eval_samples_per_second": 12.872, "eval_steps_per_second": 6.442, "step": 50 }, { "epoch": 0.07916181606519208, "grad_norm": 0.208244189620018, "learning_rate": 9.285836503510562e-05, "loss": 0.7064, "step": 51 }, { "epoch": 0.0807140085370586, "grad_norm": 0.25803104043006897, "learning_rate": 9.24024048078213e-05, "loss": 0.835, "step": 52 }, { "epoch": 0.08226620100892511, "grad_norm": 0.2872063219547272, "learning_rate": 9.193352839727121e-05, "loss": 0.9635, "step": 53 }, { "epoch": 0.08381839348079162, "grad_norm": 0.27456480264663696, "learning_rate": 9.145187862775209e-05, "loss": 1.0085, "step": 54 }, { "epoch": 0.08537058595265813, "grad_norm": 0.2541908323764801, "learning_rate": 9.09576022144496e-05, "loss": 0.98, "step": 55 }, { "epoch": 0.08692277842452464, "grad_norm": 0.24798186123371124, "learning_rate": 9.045084971874738e-05, "loss": 1.043, "step": 56 }, { "epoch": 0.08847497089639116, "grad_norm": 0.24266217648983002, "learning_rate": 8.993177550236464e-05, "loss": 0.975, "step": 57 }, { "epoch": 0.09002716336825767, "grad_norm": 0.23755359649658203, "learning_rate": 8.940053768033609e-05, "loss": 1.103, "step": 58 }, { "epoch": 0.09157935584012418, "grad_norm": 0.2594914734363556, "learning_rate": 8.885729807284856e-05, "loss": 1.1108, "step": 59 }, { "epoch": 0.09313154831199069, "grad_norm": 0.260532945394516, "learning_rate": 8.83022221559489e-05, "loss": 0.9986, "step": 60 }, { "epoch": 0.0946837407838572, "grad_norm": 0.26638638973236084, "learning_rate": 8.773547901113862e-05, "loss": 1.1747, "step": 61 }, { "epoch": 0.09623593325572372, "grad_norm": 0.25695279240608215, "learning_rate": 8.715724127386972e-05, "loss": 1.1011, "step": 62 }, { "epoch": 0.09778812572759023, "grad_norm": 0.28216707706451416, "learning_rate": 8.656768508095853e-05, "loss": 1.1402, "step": 63 }, { "epoch": 0.09934031819945673, "grad_norm": 0.27320596575737, "learning_rate": 8.596699001693255e-05, "loss": 1.1755, "step": 64 }, { "epoch": 0.10089251067132324, "grad_norm": 0.30737578868865967, "learning_rate": 8.535533905932738e-05, "loss": 1.106, "step": 65 }, { "epoch": 0.10244470314318975, "grad_norm": 0.28403839468955994, "learning_rate": 8.473291852294987e-05, "loss": 1.1238, "step": 66 }, { "epoch": 0.10399689561505626, "grad_norm": 0.2960684597492218, "learning_rate": 8.409991800312493e-05, "loss": 1.1425, "step": 67 }, { "epoch": 0.10554908808692277, "grad_norm": 0.29324308037757874, "learning_rate": 8.345653031794292e-05, "loss": 1.1544, "step": 68 }, { "epoch": 0.10710128055878929, "grad_norm": 0.30567237734794617, "learning_rate": 8.280295144952536e-05, "loss": 1.2075, "step": 69 }, { "epoch": 0.1086534730306558, "grad_norm": 0.32144761085510254, "learning_rate": 8.213938048432697e-05, "loss": 1.2646, "step": 70 }, { "epoch": 0.11020566550252231, "grad_norm": 0.3119107186794281, "learning_rate": 8.146601955249188e-05, "loss": 1.154, "step": 71 }, { "epoch": 0.11175785797438882, "grad_norm": 0.34419557452201843, "learning_rate": 8.07830737662829e-05, "loss": 1.1839, "step": 72 }, { "epoch": 0.11331005044625533, "grad_norm": 0.32192665338516235, "learning_rate": 8.009075115760243e-05, "loss": 1.2252, "step": 73 }, { "epoch": 0.11486224291812185, "grad_norm": 0.3307979106903076, "learning_rate": 7.938926261462366e-05, "loss": 1.1931, "step": 74 }, { "epoch": 0.11641443538998836, "grad_norm": 0.3643626272678375, "learning_rate": 7.86788218175523e-05, "loss": 1.2355, "step": 75 }, { "epoch": 0.11796662786185487, "grad_norm": 0.3622789680957794, "learning_rate": 7.795964517353735e-05, "loss": 1.2064, "step": 76 }, { "epoch": 0.11951882033372138, "grad_norm": 0.3453439474105835, "learning_rate": 7.723195175075136e-05, "loss": 1.0944, "step": 77 }, { "epoch": 0.1210710128055879, "grad_norm": 0.373737633228302, "learning_rate": 7.649596321166024e-05, "loss": 1.2976, "step": 78 }, { "epoch": 0.1226232052774544, "grad_norm": 0.40315911173820496, "learning_rate": 7.575190374550272e-05, "loss": 1.2818, "step": 79 }, { "epoch": 0.12417539774932092, "grad_norm": 0.44741833209991455, "learning_rate": 7.500000000000001e-05, "loss": 1.3029, "step": 80 }, { "epoch": 0.12572759022118743, "grad_norm": 0.41700729727745056, "learning_rate": 7.424048101231686e-05, "loss": 1.3157, "step": 81 }, { "epoch": 0.12727978269305393, "grad_norm": 0.42594003677368164, "learning_rate": 7.347357813929454e-05, "loss": 1.2919, "step": 82 }, { "epoch": 0.12883197516492045, "grad_norm": 0.4305015206336975, "learning_rate": 7.269952498697734e-05, "loss": 1.2893, "step": 83 }, { "epoch": 0.13038416763678695, "grad_norm": 0.4550113081932068, "learning_rate": 7.191855733945387e-05, "loss": 1.4103, "step": 84 }, { "epoch": 0.13193636010865348, "grad_norm": 0.4780341386795044, "learning_rate": 7.113091308703498e-05, "loss": 1.3371, "step": 85 }, { "epoch": 0.13348855258051998, "grad_norm": 0.4826207160949707, "learning_rate": 7.033683215379002e-05, "loss": 1.3083, "step": 86 }, { "epoch": 0.1350407450523865, "grad_norm": 0.5050732493400574, "learning_rate": 6.953655642446368e-05, "loss": 1.3091, "step": 87 }, { "epoch": 0.136592937524253, "grad_norm": 0.5040425658226013, "learning_rate": 6.873032967079561e-05, "loss": 1.2558, "step": 88 }, { "epoch": 0.13814512999611953, "grad_norm": 0.5459226965904236, "learning_rate": 6.7918397477265e-05, "loss": 1.1998, "step": 89 }, { "epoch": 0.13969732246798602, "grad_norm": 0.5227588415145874, "learning_rate": 6.710100716628344e-05, "loss": 1.3123, "step": 90 }, { "epoch": 0.14124951493985255, "grad_norm": 0.5383272767066956, "learning_rate": 6.627840772285784e-05, "loss": 1.264, "step": 91 }, { "epoch": 0.14280170741171905, "grad_norm": 0.5507062077522278, "learning_rate": 6.545084971874738e-05, "loss": 1.3205, "step": 92 }, { "epoch": 0.14435389988358557, "grad_norm": 0.5608528256416321, "learning_rate": 6.461858523613684e-05, "loss": 1.3012, "step": 93 }, { "epoch": 0.14590609235545207, "grad_norm": 0.5358137488365173, "learning_rate": 6.378186779084995e-05, "loss": 1.2865, "step": 94 }, { "epoch": 0.1474582848273186, "grad_norm": 0.5364001393318176, "learning_rate": 6.294095225512603e-05, "loss": 1.3138, "step": 95 }, { "epoch": 0.1490104772991851, "grad_norm": 0.5460485816001892, "learning_rate": 6.209609477998338e-05, "loss": 1.4544, "step": 96 }, { "epoch": 0.15056266977105162, "grad_norm": 0.5636151432991028, "learning_rate": 6.124755271719325e-05, "loss": 1.3556, "step": 97 }, { "epoch": 0.15211486224291812, "grad_norm": 0.5942332148551941, "learning_rate": 6.0395584540887963e-05, "loss": 1.331, "step": 98 }, { "epoch": 0.15366705471478465, "grad_norm": 0.6374626159667969, "learning_rate": 5.9540449768827246e-05, "loss": 1.4722, "step": 99 }, { "epoch": 0.15521924718665114, "grad_norm": 0.8560624122619629, "learning_rate": 5.868240888334653e-05, "loss": 1.2749, "step": 100 }, { "epoch": 0.15521924718665114, "eval_loss": 1.1981837749481201, "eval_runtime": 84.0952, "eval_samples_per_second": 12.902, "eval_steps_per_second": 6.457, "step": 100 }, { "epoch": 0.15677143965851767, "grad_norm": 0.17679108679294586, "learning_rate": 5.782172325201155e-05, "loss": 0.799, "step": 101 }, { "epoch": 0.15832363213038417, "grad_norm": 0.21500533819198608, "learning_rate": 5.695865504800327e-05, "loss": 0.8486, "step": 102 }, { "epoch": 0.15987582460225067, "grad_norm": 0.22146056592464447, "learning_rate": 5.6093467170257374e-05, "loss": 0.8614, "step": 103 }, { "epoch": 0.1614280170741172, "grad_norm": 0.26255136728286743, "learning_rate": 5.522642316338268e-05, "loss": 1.0168, "step": 104 }, { "epoch": 0.1629802095459837, "grad_norm": 0.23788823187351227, "learning_rate": 5.435778713738292e-05, "loss": 1.0063, "step": 105 }, { "epoch": 0.16453240201785022, "grad_norm": 0.24406470358371735, "learning_rate": 5.348782368720626e-05, "loss": 0.9463, "step": 106 }, { "epoch": 0.1660845944897167, "grad_norm": 0.2359701246023178, "learning_rate": 5.26167978121472e-05, "loss": 1.0042, "step": 107 }, { "epoch": 0.16763678696158324, "grad_norm": 0.45982709527015686, "learning_rate": 5.174497483512506e-05, "loss": 1.0701, "step": 108 }, { "epoch": 0.16918897943344974, "grad_norm": 0.2334708571434021, "learning_rate": 5.0872620321864185e-05, "loss": 1.0063, "step": 109 }, { "epoch": 0.17074117190531626, "grad_norm": 0.24308626353740692, "learning_rate": 5e-05, "loss": 1.0113, "step": 110 }, { "epoch": 0.17229336437718276, "grad_norm": 0.2540592551231384, "learning_rate": 4.912737967813583e-05, "loss": 1.0861, "step": 111 }, { "epoch": 0.1738455568490493, "grad_norm": 0.2556550204753876, "learning_rate": 4.825502516487497e-05, "loss": 1.1639, "step": 112 }, { "epoch": 0.17539774932091579, "grad_norm": 0.2509619891643524, "learning_rate": 4.738320218785281e-05, "loss": 1.1022, "step": 113 }, { "epoch": 0.1769499417927823, "grad_norm": 0.27927160263061523, "learning_rate": 4.6512176312793736e-05, "loss": 1.0619, "step": 114 }, { "epoch": 0.1785021342646488, "grad_norm": 0.27921226620674133, "learning_rate": 4.564221286261709e-05, "loss": 1.135, "step": 115 }, { "epoch": 0.18005432673651534, "grad_norm": 0.27985909581184387, "learning_rate": 4.477357683661734e-05, "loss": 1.2055, "step": 116 }, { "epoch": 0.18160651920838183, "grad_norm": 0.28013497591018677, "learning_rate": 4.390653282974264e-05, "loss": 1.1392, "step": 117 }, { "epoch": 0.18315871168024836, "grad_norm": 0.2879810631275177, "learning_rate": 4.3041344951996746e-05, "loss": 1.1487, "step": 118 }, { "epoch": 0.18471090415211486, "grad_norm": 0.29554373025894165, "learning_rate": 4.2178276747988446e-05, "loss": 1.1189, "step": 119 }, { "epoch": 0.18626309662398138, "grad_norm": 0.29500454664230347, "learning_rate": 4.131759111665349e-05, "loss": 1.1911, "step": 120 }, { "epoch": 0.18781528909584788, "grad_norm": 0.2961357831954956, "learning_rate": 4.045955023117276e-05, "loss": 1.1043, "step": 121 }, { "epoch": 0.1893674815677144, "grad_norm": 0.3352125287055969, "learning_rate": 3.960441545911204e-05, "loss": 1.2578, "step": 122 }, { "epoch": 0.1909196740395809, "grad_norm": 0.33993181586265564, "learning_rate": 3.875244728280676e-05, "loss": 1.2228, "step": 123 }, { "epoch": 0.19247186651144743, "grad_norm": 0.3543814718723297, "learning_rate": 3.790390522001662e-05, "loss": 1.2523, "step": 124 }, { "epoch": 0.19402405898331393, "grad_norm": 0.34547507762908936, "learning_rate": 3.705904774487396e-05, "loss": 1.233, "step": 125 }, { "epoch": 0.19557625145518046, "grad_norm": 0.33998626470565796, "learning_rate": 3.6218132209150045e-05, "loss": 1.2057, "step": 126 }, { "epoch": 0.19712844392704695, "grad_norm": 0.37567129731178284, "learning_rate": 3.5381414763863166e-05, "loss": 1.3237, "step": 127 }, { "epoch": 0.19868063639891345, "grad_norm": 0.36463621258735657, "learning_rate": 3.4549150281252636e-05, "loss": 1.2959, "step": 128 }, { "epoch": 0.20023282887077998, "grad_norm": 0.38328975439071655, "learning_rate": 3.372159227714218e-05, "loss": 1.2529, "step": 129 }, { "epoch": 0.20178502134264648, "grad_norm": 0.3917052447795868, "learning_rate": 3.289899283371657e-05, "loss": 1.2928, "step": 130 }, { "epoch": 0.203337213814513, "grad_norm": 0.3970443308353424, "learning_rate": 3.2081602522734986e-05, "loss": 1.1732, "step": 131 }, { "epoch": 0.2048894062863795, "grad_norm": 0.43389272689819336, "learning_rate": 3.12696703292044e-05, "loss": 1.3013, "step": 132 }, { "epoch": 0.20644159875824603, "grad_norm": 0.4360358715057373, "learning_rate": 3.046344357553632e-05, "loss": 1.2674, "step": 133 }, { "epoch": 0.20799379123011252, "grad_norm": 0.45030710101127625, "learning_rate": 2.9663167846209998e-05, "loss": 1.2964, "step": 134 }, { "epoch": 0.20954598370197905, "grad_norm": 0.45357832312583923, "learning_rate": 2.886908691296504e-05, "loss": 1.2428, "step": 135 }, { "epoch": 0.21109817617384555, "grad_norm": 0.4770860970020294, "learning_rate": 2.8081442660546125e-05, "loss": 1.252, "step": 136 }, { "epoch": 0.21265036864571207, "grad_norm": 0.46414923667907715, "learning_rate": 2.7300475013022663e-05, "loss": 1.2981, "step": 137 }, { "epoch": 0.21420256111757857, "grad_norm": 0.4736550748348236, "learning_rate": 2.6526421860705473e-05, "loss": 1.269, "step": 138 }, { "epoch": 0.2157547535894451, "grad_norm": 0.47675976157188416, "learning_rate": 2.575951898768315e-05, "loss": 1.2472, "step": 139 }, { "epoch": 0.2173069460613116, "grad_norm": 0.48701146245002747, "learning_rate": 2.500000000000001e-05, "loss": 1.3839, "step": 140 }, { "epoch": 0.21885913853317812, "grad_norm": 0.5033087730407715, "learning_rate": 2.4248096254497288e-05, "loss": 1.1504, "step": 141 }, { "epoch": 0.22041133100504462, "grad_norm": 0.5181295871734619, "learning_rate": 2.350403678833976e-05, "loss": 1.3645, "step": 142 }, { "epoch": 0.22196352347691115, "grad_norm": 0.5079509019851685, "learning_rate": 2.2768048249248648e-05, "loss": 1.2558, "step": 143 }, { "epoch": 0.22351571594877764, "grad_norm": 0.5224129557609558, "learning_rate": 2.2040354826462668e-05, "loss": 1.3698, "step": 144 }, { "epoch": 0.22506790842064417, "grad_norm": 0.5362732410430908, "learning_rate": 2.132117818244771e-05, "loss": 1.2566, "step": 145 }, { "epoch": 0.22662010089251067, "grad_norm": 0.5596816539764404, "learning_rate": 2.061073738537635e-05, "loss": 1.396, "step": 146 }, { "epoch": 0.2281722933643772, "grad_norm": 0.5064431428909302, "learning_rate": 1.9909248842397584e-05, "loss": 1.2597, "step": 147 }, { "epoch": 0.2297244858362437, "grad_norm": 0.5758273601531982, "learning_rate": 1.9216926233717085e-05, "loss": 1.2895, "step": 148 }, { "epoch": 0.23127667830811022, "grad_norm": 0.6258172988891602, "learning_rate": 1.8533980447508137e-05, "loss": 1.3318, "step": 149 }, { "epoch": 0.23282887077997672, "grad_norm": 0.8499401211738586, "learning_rate": 1.7860619515673033e-05, "loss": 1.519, "step": 150 }, { "epoch": 0.23282887077997672, "eval_loss": 1.1672673225402832, "eval_runtime": 84.0738, "eval_samples_per_second": 12.905, "eval_steps_per_second": 6.459, "step": 150 }, { "epoch": 0.23438106325184324, "grad_norm": 0.17299634218215942, "learning_rate": 1.7197048550474643e-05, "loss": 0.7786, "step": 151 }, { "epoch": 0.23593325572370974, "grad_norm": 0.19577302038669586, "learning_rate": 1.6543469682057106e-05, "loss": 0.8748, "step": 152 }, { "epoch": 0.23748544819557627, "grad_norm": 0.20664645731449127, "learning_rate": 1.5900081996875083e-05, "loss": 0.8597, "step": 153 }, { "epoch": 0.23903764066744276, "grad_norm": 0.20882728695869446, "learning_rate": 1.526708147705013e-05, "loss": 0.95, "step": 154 }, { "epoch": 0.24058983313930926, "grad_norm": 0.22323079407215118, "learning_rate": 1.4644660940672627e-05, "loss": 0.936, "step": 155 }, { "epoch": 0.2421420256111758, "grad_norm": 0.23030349612236023, "learning_rate": 1.4033009983067452e-05, "loss": 1.0173, "step": 156 }, { "epoch": 0.24369421808304229, "grad_norm": 0.23430734872817993, "learning_rate": 1.3432314919041478e-05, "loss": 1.0162, "step": 157 }, { "epoch": 0.2452464105549088, "grad_norm": 0.23629391193389893, "learning_rate": 1.2842758726130283e-05, "loss": 1.0534, "step": 158 }, { "epoch": 0.2467986030267753, "grad_norm": 0.2447493076324463, "learning_rate": 1.22645209888614e-05, "loss": 1.0471, "step": 159 }, { "epoch": 0.24835079549864184, "grad_norm": 0.25912949442863464, "learning_rate": 1.1697777844051105e-05, "loss": 1.0991, "step": 160 }, { "epoch": 0.24990298797050833, "grad_norm": 0.2570056617259979, "learning_rate": 1.1142701927151456e-05, "loss": 1.0746, "step": 161 }, { "epoch": 0.25145518044237486, "grad_norm": 0.25544220209121704, "learning_rate": 1.0599462319663905e-05, "loss": 1.124, "step": 162 }, { "epoch": 0.25300737291424136, "grad_norm": 0.27445948123931885, "learning_rate": 1.006822449763537e-05, "loss": 1.1435, "step": 163 }, { "epoch": 0.25455956538610786, "grad_norm": 0.2732482850551605, "learning_rate": 9.549150281252633e-06, "loss": 1.2122, "step": 164 }, { "epoch": 0.2561117578579744, "grad_norm": 0.2644514739513397, "learning_rate": 9.042397785550405e-06, "loss": 1.114, "step": 165 }, { "epoch": 0.2576639503298409, "grad_norm": 0.27963027358055115, "learning_rate": 8.548121372247918e-06, "loss": 1.2008, "step": 166 }, { "epoch": 0.2592161428017074, "grad_norm": 0.28873342275619507, "learning_rate": 8.066471602728803e-06, "loss": 1.157, "step": 167 }, { "epoch": 0.2607683352735739, "grad_norm": 0.2958706021308899, "learning_rate": 7.597595192178702e-06, "loss": 1.174, "step": 168 }, { "epoch": 0.26232052774544046, "grad_norm": 0.3139137029647827, "learning_rate": 7.1416349648943894e-06, "loss": 1.139, "step": 169 }, { "epoch": 0.26387272021730696, "grad_norm": 0.3384767174720764, "learning_rate": 6.698729810778065e-06, "loss": 1.2659, "step": 170 }, { "epoch": 0.26542491268917345, "grad_norm": 0.32462865114212036, "learning_rate": 6.269014643030213e-06, "loss": 1.1862, "step": 171 }, { "epoch": 0.26697710516103995, "grad_norm": 0.3157460689544678, "learning_rate": 5.852620357053651e-06, "loss": 1.2289, "step": 172 }, { "epoch": 0.2685292976329065, "grad_norm": 0.35785654187202454, "learning_rate": 5.449673790581611e-06, "loss": 1.2688, "step": 173 }, { "epoch": 0.270081490104773, "grad_norm": 0.3600394129753113, "learning_rate": 5.060297685041659e-06, "loss": 1.249, "step": 174 }, { "epoch": 0.2716336825766395, "grad_norm": 0.35414648056030273, "learning_rate": 4.684610648167503e-06, "loss": 1.1348, "step": 175 }, { "epoch": 0.273185875048506, "grad_norm": 0.39005619287490845, "learning_rate": 4.322727117869951e-06, "loss": 1.2229, "step": 176 }, { "epoch": 0.27473806752037255, "grad_norm": 0.3633173108100891, "learning_rate": 3.974757327377981e-06, "loss": 1.0981, "step": 177 }, { "epoch": 0.27629025999223905, "grad_norm": 0.3896339237689972, "learning_rate": 3.6408072716606346e-06, "loss": 1.239, "step": 178 }, { "epoch": 0.27784245246410555, "grad_norm": 0.3801780343055725, "learning_rate": 3.3209786751399187e-06, "loss": 1.1479, "step": 179 }, { "epoch": 0.27939464493597205, "grad_norm": 0.410643994808197, "learning_rate": 3.0153689607045845e-06, "loss": 1.2703, "step": 180 }, { "epoch": 0.28094683740783855, "grad_norm": 0.41522127389907837, "learning_rate": 2.724071220034158e-06, "loss": 1.2798, "step": 181 }, { "epoch": 0.2824990298797051, "grad_norm": 0.4511876404285431, "learning_rate": 2.4471741852423237e-06, "loss": 1.2399, "step": 182 }, { "epoch": 0.2840512223515716, "grad_norm": 0.44222402572631836, "learning_rate": 2.1847622018482283e-06, "loss": 1.3106, "step": 183 }, { "epoch": 0.2856034148234381, "grad_norm": 0.4436941146850586, "learning_rate": 1.9369152030840556e-06, "loss": 1.2857, "step": 184 }, { "epoch": 0.2871556072953046, "grad_norm": 0.4583612382411957, "learning_rate": 1.70370868554659e-06, "loss": 1.3339, "step": 185 }, { "epoch": 0.28870779976717115, "grad_norm": 0.4590003490447998, "learning_rate": 1.4852136862001764e-06, "loss": 1.2323, "step": 186 }, { "epoch": 0.29025999223903765, "grad_norm": 0.4813213348388672, "learning_rate": 1.2814967607382432e-06, "loss": 1.2357, "step": 187 }, { "epoch": 0.29181218471090414, "grad_norm": 0.4743642210960388, "learning_rate": 1.0926199633097157e-06, "loss": 1.3341, "step": 188 }, { "epoch": 0.29336437718277064, "grad_norm": 0.5076556205749512, "learning_rate": 9.186408276168013e-07, "loss": 1.2657, "step": 189 }, { "epoch": 0.2949165696546372, "grad_norm": 0.5171471238136292, "learning_rate": 7.596123493895991e-07, "loss": 1.3414, "step": 190 }, { "epoch": 0.2964687621265037, "grad_norm": 0.5316108465194702, "learning_rate": 6.15582970243117e-07, "loss": 1.2698, "step": 191 }, { "epoch": 0.2980209545983702, "grad_norm": 0.5149205327033997, "learning_rate": 4.865965629214819e-07, "loss": 1.1859, "step": 192 }, { "epoch": 0.2995731470702367, "grad_norm": 0.5591493248939514, "learning_rate": 3.7269241793390085e-07, "loss": 1.3979, "step": 193 }, { "epoch": 0.30112533954210324, "grad_norm": 0.5425105690956116, "learning_rate": 2.7390523158633554e-07, "loss": 1.2699, "step": 194 }, { "epoch": 0.30267753201396974, "grad_norm": 0.5106943845748901, "learning_rate": 1.9026509541272275e-07, "loss": 1.3963, "step": 195 }, { "epoch": 0.30422972448583624, "grad_norm": 0.520468533039093, "learning_rate": 1.2179748700879012e-07, "loss": 1.3648, "step": 196 }, { "epoch": 0.30578191695770274, "grad_norm": 0.5118442177772522, "learning_rate": 6.852326227130834e-08, "loss": 1.3784, "step": 197 }, { "epoch": 0.3073341094295693, "grad_norm": 0.5514056086540222, "learning_rate": 3.04586490452119e-08, "loss": 1.4421, "step": 198 }, { "epoch": 0.3088863019014358, "grad_norm": 0.6693518757820129, "learning_rate": 7.615242180436522e-09, "loss": 1.376, "step": 199 }, { "epoch": 0.3104384943733023, "grad_norm": 1.0523006916046143, "learning_rate": 0.0, "loss": 1.4615, "step": 200 }, { "epoch": 0.3104384943733023, "eval_loss": 1.1628694534301758, "eval_runtime": 84.1145, "eval_samples_per_second": 12.899, "eval_steps_per_second": 6.455, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.8757698661056512e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }