|
{ |
|
"best_metric": 10.293888092041016, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.03175532521918617, |
|
"eval_steps": 25, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00015877662609593086, |
|
"grad_norm": 0.07810629904270172, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 10.3764, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00015877662609593086, |
|
"eval_loss": 10.363986015319824, |
|
"eval_runtime": 0.0965, |
|
"eval_samples_per_second": 518.162, |
|
"eval_steps_per_second": 72.543, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00031755325219186173, |
|
"grad_norm": 0.061406999826431274, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 10.3633, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00047632987828779254, |
|
"grad_norm": 0.05004843696951866, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 10.3681, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0006351065043837235, |
|
"grad_norm": 0.05747171863913536, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 10.3682, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0007938831304796542, |
|
"grad_norm": 0.05080826207995415, |
|
"learning_rate": 0.00015, |
|
"loss": 10.3647, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0009526597565755851, |
|
"grad_norm": 0.046676479279994965, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 10.3577, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.001111436382671516, |
|
"grad_norm": 0.046294715255498886, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 10.366, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.001270213008767447, |
|
"grad_norm": 0.04639292135834694, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 10.3691, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0014289896348633777, |
|
"grad_norm": 0.04217684268951416, |
|
"learning_rate": 0.00027, |
|
"loss": 10.3534, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0015877662609593084, |
|
"grad_norm": 0.04545888677239418, |
|
"learning_rate": 0.0003, |
|
"loss": 10.3577, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0017465428870552394, |
|
"grad_norm": 0.045676153153181076, |
|
"learning_rate": 0.0002999794957488703, |
|
"loss": 10.3567, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0019053195131511701, |
|
"grad_norm": 0.044181909412145615, |
|
"learning_rate": 0.0002999179886011389, |
|
"loss": 10.3612, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.002064096139247101, |
|
"grad_norm": 0.04575149342417717, |
|
"learning_rate": 0.0002998154953722457, |
|
"loss": 10.3539, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.002222872765343032, |
|
"grad_norm": 0.04331066459417343, |
|
"learning_rate": 0.00029967204408281613, |
|
"loss": 10.357, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.002381649391438963, |
|
"grad_norm": 0.04593302682042122, |
|
"learning_rate": 0.00029948767395100045, |
|
"loss": 10.3536, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.002540426017534894, |
|
"grad_norm": 0.046111367642879486, |
|
"learning_rate": 0.0002992624353817517, |
|
"loss": 10.3586, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0026992026436308244, |
|
"grad_norm": 0.05106338486075401, |
|
"learning_rate": 0.0002989963899530457, |
|
"loss": 10.3551, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0028579792697267553, |
|
"grad_norm": 0.052090976387262344, |
|
"learning_rate": 0.00029868961039904624, |
|
"loss": 10.3548, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0030167558958226863, |
|
"grad_norm": 0.04916371405124664, |
|
"learning_rate": 0.00029834218059022024, |
|
"loss": 10.3536, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.003175532521918617, |
|
"grad_norm": 0.05516723915934563, |
|
"learning_rate": 0.00029795419551040833, |
|
"loss": 10.3551, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.003334309148014548, |
|
"grad_norm": 0.053350552916526794, |
|
"learning_rate": 0.00029752576123085736, |
|
"loss": 10.3571, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.003493085774110479, |
|
"grad_norm": 0.05715618655085564, |
|
"learning_rate": 0.0002970569948812214, |
|
"loss": 10.3542, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0036518624002064098, |
|
"grad_norm": 0.06765611469745636, |
|
"learning_rate": 0.0002965480246175399, |
|
"loss": 10.349, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0038106390263023403, |
|
"grad_norm": 0.06198740378022194, |
|
"learning_rate": 0.0002959989895872009, |
|
"loss": 10.3561, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.003969415652398271, |
|
"grad_norm": 0.05947557091712952, |
|
"learning_rate": 0.0002954100398908995, |
|
"loss": 10.3525, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.003969415652398271, |
|
"eval_loss": 10.353515625, |
|
"eval_runtime": 0.0934, |
|
"eval_samples_per_second": 535.238, |
|
"eval_steps_per_second": 74.933, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.004128192278494202, |
|
"grad_norm": 0.071525439620018, |
|
"learning_rate": 0.0002947813365416023, |
|
"loss": 10.3526, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.004286968904590133, |
|
"grad_norm": 0.07154348492622375, |
|
"learning_rate": 0.0002941130514205272, |
|
"loss": 10.3504, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.004445745530686064, |
|
"grad_norm": 0.06603620201349258, |
|
"learning_rate": 0.0002934053672301536, |
|
"loss": 10.3462, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.004604522156781995, |
|
"grad_norm": 0.07479649782180786, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 10.3493, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.004763298782877926, |
|
"grad_norm": 0.0767395943403244, |
|
"learning_rate": 0.00029187258625509513, |
|
"loss": 10.349, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.004922075408973856, |
|
"grad_norm": 0.07685164362192154, |
|
"learning_rate": 0.00029104790851742417, |
|
"loss": 10.3495, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.005080852035069788, |
|
"grad_norm": 0.08169625699520111, |
|
"learning_rate": 0.0002901846696899191, |
|
"loss": 10.3573, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.005239628661165718, |
|
"grad_norm": 0.07723478972911835, |
|
"learning_rate": 0.00028928310577345606, |
|
"loss": 10.3489, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.005398405287261649, |
|
"grad_norm": 0.0714058205485344, |
|
"learning_rate": 0.0002883434632466077, |
|
"loss": 10.344, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.00555718191335758, |
|
"grad_norm": 0.07186929881572723, |
|
"learning_rate": 0.00028736599899825856, |
|
"loss": 10.3461, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.005715958539453511, |
|
"grad_norm": 0.07883802056312561, |
|
"learning_rate": 0.00028635098025737434, |
|
"loss": 10.3465, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.005874735165549441, |
|
"grad_norm": 0.07939917594194412, |
|
"learning_rate": 0.00028529868451994384, |
|
"loss": 10.3391, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.006033511791645373, |
|
"grad_norm": 0.07693421095609665, |
|
"learning_rate": 0.0002842093994731145, |
|
"loss": 10.3466, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.006192288417741303, |
|
"grad_norm": 0.08108057081699371, |
|
"learning_rate": 0.00028308342291654174, |
|
"loss": 10.3399, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.006351065043837234, |
|
"grad_norm": 0.07625582814216614, |
|
"learning_rate": 0.00028192106268097334, |
|
"loss": 10.3424, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.006509841669933165, |
|
"grad_norm": 0.08772553503513336, |
|
"learning_rate": 0.00028072263654409154, |
|
"loss": 10.3357, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.006668618296029096, |
|
"grad_norm": 0.09969928115606308, |
|
"learning_rate": 0.0002794884721436361, |
|
"loss": 10.3313, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.006827394922125027, |
|
"grad_norm": 0.10627187043428421, |
|
"learning_rate": 0.00027821890688783083, |
|
"loss": 10.334, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.006986171548220958, |
|
"grad_norm": 0.11822281032800674, |
|
"learning_rate": 0.0002769142878631403, |
|
"loss": 10.3403, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.007144948174316888, |
|
"grad_norm": 0.13020247220993042, |
|
"learning_rate": 0.00027557497173937923, |
|
"loss": 10.3354, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0073037248004128195, |
|
"grad_norm": 0.153047114610672, |
|
"learning_rate": 0.000274201324672203, |
|
"loss": 10.3254, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.00746250142650875, |
|
"grad_norm": 0.18278735876083374, |
|
"learning_rate": 0.00027279372220300385, |
|
"loss": 10.3353, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.007621278052604681, |
|
"grad_norm": 0.2163456678390503, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 10.3213, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.007780054678700612, |
|
"grad_norm": 0.16671767830848694, |
|
"learning_rate": 0.00026987819953423867, |
|
"loss": 10.3174, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.007938831304796543, |
|
"grad_norm": 0.22391104698181152, |
|
"learning_rate": 0.00026837107640945905, |
|
"loss": 10.3147, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.007938831304796543, |
|
"eval_loss": 10.32917594909668, |
|
"eval_runtime": 0.0932, |
|
"eval_samples_per_second": 536.364, |
|
"eval_steps_per_second": 75.091, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.008097607930892474, |
|
"grad_norm": 0.1727171391248703, |
|
"learning_rate": 0.0002668315918143169, |
|
"loss": 10.3255, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.008256384556988404, |
|
"grad_norm": 0.1261977106332779, |
|
"learning_rate": 0.00026526016662852886, |
|
"loss": 10.3373, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.008415161183084335, |
|
"grad_norm": 0.19324587285518646, |
|
"learning_rate": 0.00026365723046405023, |
|
"loss": 10.3299, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.008573937809180266, |
|
"grad_norm": 0.16581648588180542, |
|
"learning_rate": 0.0002620232215476231, |
|
"loss": 10.3303, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.008732714435276196, |
|
"grad_norm": 0.16929411888122559, |
|
"learning_rate": 0.0002603585866009697, |
|
"loss": 10.3314, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.008891491061372127, |
|
"grad_norm": 0.14727461338043213, |
|
"learning_rate": 0.00025866378071866334, |
|
"loss": 10.3248, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.009050267687468059, |
|
"grad_norm": 0.1087571531534195, |
|
"learning_rate": 0.00025693926724370956, |
|
"loss": 10.325, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.00920904431356399, |
|
"grad_norm": 0.09721245616674423, |
|
"learning_rate": 0.00025518551764087326, |
|
"loss": 10.3173, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.00936782093965992, |
|
"grad_norm": 0.08206453919410706, |
|
"learning_rate": 0.00025340301136778483, |
|
"loss": 10.3209, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.009526597565755851, |
|
"grad_norm": 0.09024045616388321, |
|
"learning_rate": 0.00025159223574386114, |
|
"loss": 10.3219, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.009685374191851783, |
|
"grad_norm": 0.07984571903944016, |
|
"learning_rate": 0.0002497536858170772, |
|
"loss": 10.3224, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.009844150817947712, |
|
"grad_norm": 0.11346311867237091, |
|
"learning_rate": 0.00024788786422862526, |
|
"loss": 10.3194, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.010002927444043644, |
|
"grad_norm": 0.10196421295404434, |
|
"learning_rate": 0.00024599528107549745, |
|
"loss": 10.3233, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.010161704070139575, |
|
"grad_norm": 0.11686540395021439, |
|
"learning_rate": 0.00024407645377103054, |
|
"loss": 10.3199, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.010320480696235505, |
|
"grad_norm": 0.10796058177947998, |
|
"learning_rate": 0.00024213190690345018, |
|
"loss": 10.3213, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.010479257322331436, |
|
"grad_norm": 0.10543781518936157, |
|
"learning_rate": 0.00024016217209245374, |
|
"loss": 10.3191, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.010638033948427368, |
|
"grad_norm": 0.1500771939754486, |
|
"learning_rate": 0.00023816778784387094, |
|
"loss": 10.3217, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.010796810574523297, |
|
"grad_norm": 0.13386686146259308, |
|
"learning_rate": 0.0002361492994024415, |
|
"loss": 10.3224, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.010955587200619229, |
|
"grad_norm": 0.17342054843902588, |
|
"learning_rate": 0.0002341072586027509, |
|
"loss": 10.3188, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.01111436382671516, |
|
"grad_norm": 0.1327526718378067, |
|
"learning_rate": 0.00023204222371836405, |
|
"loss": 10.3154, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01127314045281109, |
|
"grad_norm": 0.15230214595794678, |
|
"learning_rate": 0.00022995475930919905, |
|
"loss": 10.3179, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.011431917078907021, |
|
"grad_norm": 0.1093672513961792, |
|
"learning_rate": 0.00022784543606718227, |
|
"loss": 10.3217, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.011590693705002953, |
|
"grad_norm": 0.10216083377599716, |
|
"learning_rate": 0.00022571483066022657, |
|
"loss": 10.321, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.011749470331098882, |
|
"grad_norm": 0.11733946949243546, |
|
"learning_rate": 0.0002235635255745762, |
|
"loss": 10.3107, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.011908246957194814, |
|
"grad_norm": 0.1087634265422821, |
|
"learning_rate": 0.00022139210895556104, |
|
"loss": 10.3107, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.011908246957194814, |
|
"eval_loss": 10.310944557189941, |
|
"eval_runtime": 0.0928, |
|
"eval_samples_per_second": 538.899, |
|
"eval_steps_per_second": 75.446, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.012067023583290745, |
|
"grad_norm": 0.09749813377857208, |
|
"learning_rate": 0.00021920117444680317, |
|
"loss": 10.3223, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.012225800209386675, |
|
"grad_norm": 0.08614695817232132, |
|
"learning_rate": 0.00021699132102792097, |
|
"loss": 10.3191, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.012384576835482606, |
|
"grad_norm": 0.10386686772108078, |
|
"learning_rate": 0.0002147631528507739, |
|
"loss": 10.3194, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.012543353461578538, |
|
"grad_norm": 0.09013906866312027, |
|
"learning_rate": 0.00021251727907429355, |
|
"loss": 10.3176, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.012702130087674467, |
|
"grad_norm": 0.12356393784284592, |
|
"learning_rate": 0.0002102543136979454, |
|
"loss": 10.3124, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.012860906713770399, |
|
"grad_norm": 0.11175656318664551, |
|
"learning_rate": 0.0002079748753938678, |
|
"loss": 10.313, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.01301968333986633, |
|
"grad_norm": 0.10210341215133667, |
|
"learning_rate": 0.0002056795873377331, |
|
"loss": 10.3088, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.013178459965962262, |
|
"grad_norm": 0.11279959231615067, |
|
"learning_rate": 0.00020336907703837748, |
|
"loss": 10.3101, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.013337236592058191, |
|
"grad_norm": 0.09707577526569366, |
|
"learning_rate": 0.00020104397616624645, |
|
"loss": 10.3137, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.013496013218154123, |
|
"grad_norm": 0.0997617170214653, |
|
"learning_rate": 0.00019870492038070252, |
|
"loss": 10.3163, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.013654789844250054, |
|
"grad_norm": 0.12623490393161774, |
|
"learning_rate": 0.0001963525491562421, |
|
"loss": 10.3136, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.013813566470345984, |
|
"grad_norm": 0.10603071749210358, |
|
"learning_rate": 0.0001939875056076697, |
|
"loss": 10.308, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.013972343096441915, |
|
"grad_norm": 0.10383056104183197, |
|
"learning_rate": 0.00019161043631427666, |
|
"loss": 10.3122, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.014131119722537847, |
|
"grad_norm": 0.11057064682245255, |
|
"learning_rate": 0.00018922199114307294, |
|
"loss": 10.317, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.014289896348633776, |
|
"grad_norm": 0.1066858321428299, |
|
"learning_rate": 0.00018682282307111987, |
|
"loss": 10.3097, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.014448672974729708, |
|
"grad_norm": 0.08680516481399536, |
|
"learning_rate": 0.00018441358800701273, |
|
"loss": 10.3105, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.014607449600825639, |
|
"grad_norm": 0.10082846879959106, |
|
"learning_rate": 0.00018199494461156203, |
|
"loss": 10.3076, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.014766226226921569, |
|
"grad_norm": 0.11305494606494904, |
|
"learning_rate": 0.000179567554117722, |
|
"loss": 10.3123, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0149250028530175, |
|
"grad_norm": 0.09946515411138535, |
|
"learning_rate": 0.00017713208014981648, |
|
"loss": 10.2971, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.015083779479113432, |
|
"grad_norm": 0.11427634954452515, |
|
"learning_rate": 0.00017468918854211007, |
|
"loss": 10.3018, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.015242556105209361, |
|
"grad_norm": 0.11798146367073059, |
|
"learning_rate": 0.00017223954715677627, |
|
"loss": 10.3051, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.015401332731305293, |
|
"grad_norm": 0.13001081347465515, |
|
"learning_rate": 0.00016978382570131034, |
|
"loss": 10.2983, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.015560109357401224, |
|
"grad_norm": 0.11518680304288864, |
|
"learning_rate": 0.00016732269554543794, |
|
"loss": 10.3018, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.015718885983497154, |
|
"grad_norm": 0.15253396332263947, |
|
"learning_rate": 0.00016485682953756942, |
|
"loss": 10.2871, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.015877662609593085, |
|
"grad_norm": 0.20822547376155853, |
|
"learning_rate": 0.00016238690182084986, |
|
"loss": 10.288, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.015877662609593085, |
|
"eval_loss": 10.301692008972168, |
|
"eval_runtime": 0.0933, |
|
"eval_samples_per_second": 535.978, |
|
"eval_steps_per_second": 75.037, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.016036439235689016, |
|
"grad_norm": 0.24361427128314972, |
|
"learning_rate": 0.0001599135876488549, |
|
"loss": 10.3028, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.016195215861784948, |
|
"grad_norm": 0.23904871940612793, |
|
"learning_rate": 0.00015743756320098332, |
|
"loss": 10.2934, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.01635399248788088, |
|
"grad_norm": 0.19915081560611725, |
|
"learning_rate": 0.0001549595053975962, |
|
"loss": 10.3083, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.016512769113976807, |
|
"grad_norm": 0.15487897396087646, |
|
"learning_rate": 0.00015248009171495378, |
|
"loss": 10.3023, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.01667154574007274, |
|
"grad_norm": 0.17113174498081207, |
|
"learning_rate": 0.00015, |
|
"loss": 10.2996, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.01683032236616867, |
|
"grad_norm": 0.12119137495756149, |
|
"learning_rate": 0.00014751990828504622, |
|
"loss": 10.3016, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.0169890989922646, |
|
"grad_norm": 0.15259002149105072, |
|
"learning_rate": 0.00014504049460240375, |
|
"loss": 10.2929, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.017147875618360533, |
|
"grad_norm": 0.11336802691221237, |
|
"learning_rate": 0.00014256243679901663, |
|
"loss": 10.2947, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.017306652244456464, |
|
"grad_norm": 0.10615869611501694, |
|
"learning_rate": 0.00014008641235114508, |
|
"loss": 10.2989, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.017465428870552392, |
|
"grad_norm": 0.10214941948652267, |
|
"learning_rate": 0.00013761309817915014, |
|
"loss": 10.2982, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.017624205496648324, |
|
"grad_norm": 0.07917061448097229, |
|
"learning_rate": 0.00013514317046243058, |
|
"loss": 10.2973, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.017782982122744255, |
|
"grad_norm": 0.11092983931303024, |
|
"learning_rate": 0.00013267730445456208, |
|
"loss": 10.2984, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.017941758748840186, |
|
"grad_norm": 0.08627593517303467, |
|
"learning_rate": 0.00013021617429868963, |
|
"loss": 10.2967, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.018100535374936118, |
|
"grad_norm": 0.10853682458400726, |
|
"learning_rate": 0.00012776045284322368, |
|
"loss": 10.3083, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.01825931200103205, |
|
"grad_norm": 0.08355826884508133, |
|
"learning_rate": 0.00012531081145788987, |
|
"loss": 10.3039, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.01841808862712798, |
|
"grad_norm": 0.12834160029888153, |
|
"learning_rate": 0.00012286791985018355, |
|
"loss": 10.3058, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.01857686525322391, |
|
"grad_norm": 0.10183387994766235, |
|
"learning_rate": 0.00012043244588227796, |
|
"loss": 10.3207, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.01873564187931984, |
|
"grad_norm": 0.0810333862900734, |
|
"learning_rate": 0.00011800505538843798, |
|
"loss": 10.3022, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.01889441850541577, |
|
"grad_norm": 0.11709333211183548, |
|
"learning_rate": 0.00011558641199298727, |
|
"loss": 10.3086, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.019053195131511703, |
|
"grad_norm": 0.06999313831329346, |
|
"learning_rate": 0.00011317717692888012, |
|
"loss": 10.3066, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.019211971757607634, |
|
"grad_norm": 0.0975615531206131, |
|
"learning_rate": 0.00011077800885692702, |
|
"loss": 10.3069, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.019370748383703566, |
|
"grad_norm": 0.08743242919445038, |
|
"learning_rate": 0.00010838956368572334, |
|
"loss": 10.3202, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.019529525009799494, |
|
"grad_norm": 0.12698757648468018, |
|
"learning_rate": 0.0001060124943923303, |
|
"loss": 10.2906, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.019688301635895425, |
|
"grad_norm": 0.06964551657438278, |
|
"learning_rate": 0.0001036474508437579, |
|
"loss": 10.3047, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.019847078261991356, |
|
"grad_norm": 0.0764366015791893, |
|
"learning_rate": 0.00010129507961929748, |
|
"loss": 10.2964, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.019847078261991356, |
|
"eval_loss": 10.29774284362793, |
|
"eval_runtime": 0.0936, |
|
"eval_samples_per_second": 534.393, |
|
"eval_steps_per_second": 74.815, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.020005854888087288, |
|
"grad_norm": 0.08608151227235794, |
|
"learning_rate": 9.895602383375353e-05, |
|
"loss": 10.2955, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.02016463151418322, |
|
"grad_norm": 0.08954727649688721, |
|
"learning_rate": 9.663092296162251e-05, |
|
"loss": 10.3007, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.02032340814027915, |
|
"grad_norm": 0.10686002671718597, |
|
"learning_rate": 9.432041266226686e-05, |
|
"loss": 10.3021, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.02048218476637508, |
|
"grad_norm": 0.08813920617103577, |
|
"learning_rate": 9.202512460613219e-05, |
|
"loss": 10.3064, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.02064096139247101, |
|
"grad_norm": 0.07965640723705292, |
|
"learning_rate": 8.97456863020546e-05, |
|
"loss": 10.299, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02079973801856694, |
|
"grad_norm": 0.09392104297876358, |
|
"learning_rate": 8.748272092570646e-05, |
|
"loss": 10.2917, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.020958514644662873, |
|
"grad_norm": 0.08309674263000488, |
|
"learning_rate": 8.523684714922608e-05, |
|
"loss": 10.2916, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.021117291270758804, |
|
"grad_norm": 0.09519728273153305, |
|
"learning_rate": 8.300867897207903e-05, |
|
"loss": 10.3004, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.021276067896854736, |
|
"grad_norm": 0.06975460052490234, |
|
"learning_rate": 8.079882555319684e-05, |
|
"loss": 10.3025, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.021434844522950663, |
|
"grad_norm": 0.07696589082479477, |
|
"learning_rate": 7.860789104443896e-05, |
|
"loss": 10.2976, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.021593621149046595, |
|
"grad_norm": 0.10613939166069031, |
|
"learning_rate": 7.643647442542382e-05, |
|
"loss": 10.3022, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.021752397775142526, |
|
"grad_norm": 0.11416519433259964, |
|
"learning_rate": 7.428516933977347e-05, |
|
"loss": 10.3094, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.021911174401238458, |
|
"grad_norm": 0.0883881077170372, |
|
"learning_rate": 7.215456393281776e-05, |
|
"loss": 10.2996, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.02206995102733439, |
|
"grad_norm": 0.10318446159362793, |
|
"learning_rate": 7.004524069080096e-05, |
|
"loss": 10.3031, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.02222872765343032, |
|
"grad_norm": 0.10122831910848618, |
|
"learning_rate": 6.795777628163599e-05, |
|
"loss": 10.2992, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.022387504279526252, |
|
"grad_norm": 0.0863846093416214, |
|
"learning_rate": 6.58927413972491e-05, |
|
"loss": 10.307, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.02254628090562218, |
|
"grad_norm": 0.10809484869241714, |
|
"learning_rate": 6.385070059755846e-05, |
|
"loss": 10.3024, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.02270505753171811, |
|
"grad_norm": 0.0929742157459259, |
|
"learning_rate": 6.183221215612904e-05, |
|
"loss": 10.2905, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.022863834157814043, |
|
"grad_norm": 0.11549130827188492, |
|
"learning_rate": 5.983782790754623e-05, |
|
"loss": 10.2985, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.023022610783909974, |
|
"grad_norm": 0.13555151224136353, |
|
"learning_rate": 5.786809309654982e-05, |
|
"loss": 10.2905, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.023181387410005905, |
|
"grad_norm": 0.08594315499067307, |
|
"learning_rate": 5.592354622896944e-05, |
|
"loss": 10.2922, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.023340164036101837, |
|
"grad_norm": 0.1327342391014099, |
|
"learning_rate": 5.40047189245025e-05, |
|
"loss": 10.2812, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.023498940662197765, |
|
"grad_norm": 0.09222450852394104, |
|
"learning_rate": 5.211213577137469e-05, |
|
"loss": 10.2914, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.023657717288293696, |
|
"grad_norm": 0.12407458573579788, |
|
"learning_rate": 5.024631418292274e-05, |
|
"loss": 10.2818, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.023816493914389628, |
|
"grad_norm": 0.10363201797008514, |
|
"learning_rate": 4.840776425613886e-05, |
|
"loss": 10.2824, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.023816493914389628, |
|
"eval_loss": 10.295632362365723, |
|
"eval_runtime": 0.093, |
|
"eval_samples_per_second": 537.649, |
|
"eval_steps_per_second": 75.271, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02397527054048556, |
|
"grad_norm": 0.23319634795188904, |
|
"learning_rate": 4.659698863221513e-05, |
|
"loss": 10.2971, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.02413404716658149, |
|
"grad_norm": 0.17366944253444672, |
|
"learning_rate": 4.481448235912671e-05, |
|
"loss": 10.2898, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.024292823792677422, |
|
"grad_norm": 0.1751234531402588, |
|
"learning_rate": 4.306073275629044e-05, |
|
"loss": 10.2979, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.02445160041877335, |
|
"grad_norm": 0.20233570039272308, |
|
"learning_rate": 4.133621928133665e-05, |
|
"loss": 10.2901, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.02461037704486928, |
|
"grad_norm": 0.2154175490140915, |
|
"learning_rate": 3.964141339903026e-05, |
|
"loss": 10.2894, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.024769153670965213, |
|
"grad_norm": 0.15442685782909393, |
|
"learning_rate": 3.797677845237696e-05, |
|
"loss": 10.297, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.024927930297061144, |
|
"grad_norm": 0.12210218608379364, |
|
"learning_rate": 3.634276953594982e-05, |
|
"loss": 10.2925, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.025086706923157075, |
|
"grad_norm": 0.10777822881937027, |
|
"learning_rate": 3.473983337147118e-05, |
|
"loss": 10.2849, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.025245483549253007, |
|
"grad_norm": 0.07246610522270203, |
|
"learning_rate": 3.316840818568315e-05, |
|
"loss": 10.2933, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.025404260175348935, |
|
"grad_norm": 0.08766282349824905, |
|
"learning_rate": 3.162892359054098e-05, |
|
"loss": 10.2998, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.025563036801444866, |
|
"grad_norm": 0.06367377191781998, |
|
"learning_rate": 3.0121800465761293e-05, |
|
"loss": 10.3024, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.025721813427540798, |
|
"grad_norm": 0.10457596927881241, |
|
"learning_rate": 2.8647450843757897e-05, |
|
"loss": 10.289, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.02588059005363673, |
|
"grad_norm": 0.0702897310256958, |
|
"learning_rate": 2.7206277796996144e-05, |
|
"loss": 10.3078, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.02603936667973266, |
|
"grad_norm": 0.09472493827342987, |
|
"learning_rate": 2.5798675327796993e-05, |
|
"loss": 10.2987, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.026198143305828592, |
|
"grad_norm": 0.08356080204248428, |
|
"learning_rate": 2.4425028260620715e-05, |
|
"loss": 10.3073, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.026356919931924523, |
|
"grad_norm": 0.06730841100215912, |
|
"learning_rate": 2.3085712136859668e-05, |
|
"loss": 10.3081, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.02651569655802045, |
|
"grad_norm": 0.07436297088861465, |
|
"learning_rate": 2.178109311216913e-05, |
|
"loss": 10.3148, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.026674473184116382, |
|
"grad_norm": 0.09109427034854889, |
|
"learning_rate": 2.0511527856363912e-05, |
|
"loss": 10.2942, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.026833249810212314, |
|
"grad_norm": 0.07138719409704208, |
|
"learning_rate": 1.927736345590839e-05, |
|
"loss": 10.2939, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.026992026436308245, |
|
"grad_norm": 0.0833205133676529, |
|
"learning_rate": 1.8078937319026654e-05, |
|
"loss": 10.3001, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.027150803062404177, |
|
"grad_norm": 0.08817581087350845, |
|
"learning_rate": 1.6916577083458228e-05, |
|
"loss": 10.3022, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.027309579688500108, |
|
"grad_norm": 0.08368958532810211, |
|
"learning_rate": 1.579060052688548e-05, |
|
"loss": 10.3065, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.027468356314596036, |
|
"grad_norm": 0.11247318983078003, |
|
"learning_rate": 1.4701315480056164e-05, |
|
"loss": 10.3019, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.027627132940691967, |
|
"grad_norm": 0.10502248257398605, |
|
"learning_rate": 1.3649019742625623e-05, |
|
"loss": 10.3019, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.0277859095667879, |
|
"grad_norm": 0.0866779237985611, |
|
"learning_rate": 1.2634001001741373e-05, |
|
"loss": 10.3097, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.0277859095667879, |
|
"eval_loss": 10.29531478881836, |
|
"eval_runtime": 0.0934, |
|
"eval_samples_per_second": 535.372, |
|
"eval_steps_per_second": 74.952, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.02794468619288383, |
|
"grad_norm": 0.0854175016283989, |
|
"learning_rate": 1.1656536753392287e-05, |
|
"loss": 10.3084, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.02810346281897976, |
|
"grad_norm": 0.07053595036268234, |
|
"learning_rate": 1.0716894226543953e-05, |
|
"loss": 10.3021, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.028262239445075693, |
|
"grad_norm": 0.07512841373682022, |
|
"learning_rate": 9.815330310080887e-06, |
|
"loss": 10.3039, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.02842101607117162, |
|
"grad_norm": 0.06513608247041702, |
|
"learning_rate": 8.952091482575824e-06, |
|
"loss": 10.2931, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.028579792697267552, |
|
"grad_norm": 0.0893177017569542, |
|
"learning_rate": 8.127413744904804e-06, |
|
"loss": 10.2949, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.028738569323363484, |
|
"grad_norm": 0.0912618339061737, |
|
"learning_rate": 7.34152255572697e-06, |
|
"loss": 10.2961, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.028897345949459415, |
|
"grad_norm": 0.07610266655683517, |
|
"learning_rate": 6.594632769846353e-06, |
|
"loss": 10.3046, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.029056122575555347, |
|
"grad_norm": 0.07157953828573227, |
|
"learning_rate": 5.886948579472778e-06, |
|
"loss": 10.3041, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.029214899201651278, |
|
"grad_norm": 0.07713142782449722, |
|
"learning_rate": 5.218663458397715e-06, |
|
"loss": 10.2997, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.02937367582774721, |
|
"grad_norm": 0.07938244193792343, |
|
"learning_rate": 4.589960109100444e-06, |
|
"loss": 10.3044, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.029532452453843137, |
|
"grad_norm": 0.11273758113384247, |
|
"learning_rate": 4.001010412799138e-06, |
|
"loss": 10.306, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.02969122907993907, |
|
"grad_norm": 0.08427654206752777, |
|
"learning_rate": 3.451975382460109e-06, |
|
"loss": 10.3039, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.029850005706035, |
|
"grad_norm": 0.09040991216897964, |
|
"learning_rate": 2.9430051187785962e-06, |
|
"loss": 10.2942, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.03000878233213093, |
|
"grad_norm": 0.0981961265206337, |
|
"learning_rate": 2.4742387691426445e-06, |
|
"loss": 10.296, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.030167558958226863, |
|
"grad_norm": 0.06700126826763153, |
|
"learning_rate": 2.0458044895916513e-06, |
|
"loss": 10.3013, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.030326335584322794, |
|
"grad_norm": 0.07272301614284515, |
|
"learning_rate": 1.6578194097797258e-06, |
|
"loss": 10.2967, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.030485112210418722, |
|
"grad_norm": 0.10850653797388077, |
|
"learning_rate": 1.3103896009537207e-06, |
|
"loss": 10.2957, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.030643888836514654, |
|
"grad_norm": 0.08605331182479858, |
|
"learning_rate": 1.0036100469542786e-06, |
|
"loss": 10.2992, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.030802665462610585, |
|
"grad_norm": 0.08742884546518326, |
|
"learning_rate": 7.375646182482875e-07, |
|
"loss": 10.2881, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.030961442088706517, |
|
"grad_norm": 0.12425903230905533, |
|
"learning_rate": 5.123260489995229e-07, |
|
"loss": 10.3004, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.031120218714802448, |
|
"grad_norm": 0.14287036657333374, |
|
"learning_rate": 3.2795591718381975e-07, |
|
"loss": 10.2931, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.031278995340898376, |
|
"grad_norm": 0.10366120934486389, |
|
"learning_rate": 1.8450462775428942e-07, |
|
"loss": 10.2972, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.03143777196699431, |
|
"grad_norm": 0.14705786108970642, |
|
"learning_rate": 8.201139886109264e-08, |
|
"loss": 10.2905, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.03159654859309024, |
|
"grad_norm": 0.10673689842224121, |
|
"learning_rate": 2.0504251129649374e-08, |
|
"loss": 10.288, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.03175532521918617, |
|
"grad_norm": 0.14931601285934448, |
|
"learning_rate": 0.0, |
|
"loss": 10.2792, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03175532521918617, |
|
"eval_loss": 10.293888092041016, |
|
"eval_runtime": 0.0926, |
|
"eval_samples_per_second": 539.938, |
|
"eval_steps_per_second": 75.591, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 22228539801600.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|