|
{ |
|
"best_metric": 2.4329843521118164, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.028667670035117897, |
|
"eval_steps": 25, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00014333835017558947, |
|
"grad_norm": 4.095588207244873, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 6.7525, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00014333835017558947, |
|
"eval_loss": 6.518734931945801, |
|
"eval_runtime": 4.4502, |
|
"eval_samples_per_second": 11.235, |
|
"eval_steps_per_second": 1.573, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00028667670035117894, |
|
"grad_norm": 3.9740452766418457, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 6.4827, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00043001505052676843, |
|
"grad_norm": 4.090057373046875, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 6.803, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0005733534007023579, |
|
"grad_norm": 4.096340656280518, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 6.3331, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0007166917508779474, |
|
"grad_norm": 3.8779053688049316, |
|
"learning_rate": 0.00015, |
|
"loss": 5.8767, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0008600301010535369, |
|
"grad_norm": 3.677866220474243, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 5.0901, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0010033684512291263, |
|
"grad_norm": 3.940865993499756, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 4.4307, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0011467068014047158, |
|
"grad_norm": 3.4478397369384766, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 3.9003, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0012900451515803052, |
|
"grad_norm": 1.962295651435852, |
|
"learning_rate": 0.00027, |
|
"loss": 3.133, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0014333835017558947, |
|
"grad_norm": 2.3240482807159424, |
|
"learning_rate": 0.0003, |
|
"loss": 2.9516, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0015767218519314842, |
|
"grad_norm": 1.8678102493286133, |
|
"learning_rate": 0.0002999794957488703, |
|
"loss": 2.8182, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0017200602021070737, |
|
"grad_norm": 1.6835949420928955, |
|
"learning_rate": 0.0002999179886011389, |
|
"loss": 2.6626, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0018633985522826632, |
|
"grad_norm": 1.4440191984176636, |
|
"learning_rate": 0.0002998154953722457, |
|
"loss": 2.6, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0020067369024582525, |
|
"grad_norm": 3.3333661556243896, |
|
"learning_rate": 0.00029967204408281613, |
|
"loss": 2.6397, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0021500752526338422, |
|
"grad_norm": 2.430225372314453, |
|
"learning_rate": 0.00029948767395100045, |
|
"loss": 2.7674, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0022934136028094315, |
|
"grad_norm": 1.6387251615524292, |
|
"learning_rate": 0.0002992624353817517, |
|
"loss": 2.5239, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.002436751952985021, |
|
"grad_norm": 1.1381381750106812, |
|
"learning_rate": 0.0002989963899530457, |
|
"loss": 2.5054, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0025800903031606105, |
|
"grad_norm": 1.326271414756775, |
|
"learning_rate": 0.00029868961039904624, |
|
"loss": 2.5822, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0027234286533362, |
|
"grad_norm": 1.0840177536010742, |
|
"learning_rate": 0.00029834218059022024, |
|
"loss": 2.4767, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0028667670035117895, |
|
"grad_norm": 1.4158834218978882, |
|
"learning_rate": 0.00029795419551040833, |
|
"loss": 2.484, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.003010105353687379, |
|
"grad_norm": 1.3069770336151123, |
|
"learning_rate": 0.00029752576123085736, |
|
"loss": 2.4704, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0031534437038629685, |
|
"grad_norm": 0.8556431531906128, |
|
"learning_rate": 0.0002970569948812214, |
|
"loss": 2.5037, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.003296782054038558, |
|
"grad_norm": 0.8674086332321167, |
|
"learning_rate": 0.0002965480246175399, |
|
"loss": 2.5533, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0034401204042141475, |
|
"grad_norm": 0.8356068134307861, |
|
"learning_rate": 0.0002959989895872009, |
|
"loss": 2.5738, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.003583458754389737, |
|
"grad_norm": 1.8884528875350952, |
|
"learning_rate": 0.0002954100398908995, |
|
"loss": 2.4843, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.003583458754389737, |
|
"eval_loss": 2.4727981090545654, |
|
"eval_runtime": 4.572, |
|
"eval_samples_per_second": 10.936, |
|
"eval_steps_per_second": 1.531, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0037267971045653265, |
|
"grad_norm": 0.7543926239013672, |
|
"learning_rate": 0.0002947813365416023, |
|
"loss": 2.4307, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0038701354547409157, |
|
"grad_norm": 0.5183073282241821, |
|
"learning_rate": 0.0002941130514205272, |
|
"loss": 2.4546, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.004013473804916505, |
|
"grad_norm": 2.17268443107605, |
|
"learning_rate": 0.0002934053672301536, |
|
"loss": 2.4966, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.004156812155092095, |
|
"grad_norm": 0.5066993236541748, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 2.4952, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0043001505052676844, |
|
"grad_norm": 0.4776642918586731, |
|
"learning_rate": 0.00029187258625509513, |
|
"loss": 2.5126, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.004443488855443274, |
|
"grad_norm": 0.6363259553909302, |
|
"learning_rate": 0.00029104790851742417, |
|
"loss": 2.5075, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.004586827205618863, |
|
"grad_norm": 0.4743508994579315, |
|
"learning_rate": 0.0002901846696899191, |
|
"loss": 2.4578, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.004730165555794453, |
|
"grad_norm": 0.3669068217277527, |
|
"learning_rate": 0.00028928310577345606, |
|
"loss": 2.4547, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.004873503905970042, |
|
"grad_norm": 0.48330530524253845, |
|
"learning_rate": 0.0002883434632466077, |
|
"loss": 2.6293, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.005016842256145632, |
|
"grad_norm": 0.44809746742248535, |
|
"learning_rate": 0.00028736599899825856, |
|
"loss": 2.4301, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.005160180606321221, |
|
"grad_norm": 0.5167154669761658, |
|
"learning_rate": 0.00028635098025737434, |
|
"loss": 2.5549, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.005303518956496811, |
|
"grad_norm": 0.5323503613471985, |
|
"learning_rate": 0.00028529868451994384, |
|
"loss": 2.4602, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0054468573066724, |
|
"grad_norm": 0.49367713928222656, |
|
"learning_rate": 0.0002842093994731145, |
|
"loss": 2.3723, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.005590195656847989, |
|
"grad_norm": 0.4007982909679413, |
|
"learning_rate": 0.00028308342291654174, |
|
"loss": 2.404, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.005733534007023579, |
|
"grad_norm": 0.5526387095451355, |
|
"learning_rate": 0.00028192106268097334, |
|
"loss": 2.5208, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.005876872357199169, |
|
"grad_norm": 0.6063050627708435, |
|
"learning_rate": 0.00028072263654409154, |
|
"loss": 2.5084, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.006020210707374758, |
|
"grad_norm": 0.5957720875740051, |
|
"learning_rate": 0.0002794884721436361, |
|
"loss": 2.4823, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.006163549057550347, |
|
"grad_norm": 0.4266456663608551, |
|
"learning_rate": 0.00027821890688783083, |
|
"loss": 2.4275, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.006306887407725937, |
|
"grad_norm": 0.6097196340560913, |
|
"learning_rate": 0.0002769142878631403, |
|
"loss": 2.4463, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.006450225757901527, |
|
"grad_norm": 0.4464048445224762, |
|
"learning_rate": 0.00027557497173937923, |
|
"loss": 2.441, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.006593564108077116, |
|
"grad_norm": 0.7037805914878845, |
|
"learning_rate": 0.000274201324672203, |
|
"loss": 2.5366, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.006736902458252705, |
|
"grad_norm": 0.5217775702476501, |
|
"learning_rate": 0.00027279372220300385, |
|
"loss": 2.4081, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.006880240808428295, |
|
"grad_norm": 0.4792337417602539, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 2.5082, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.007023579158603885, |
|
"grad_norm": 0.3848729729652405, |
|
"learning_rate": 0.00026987819953423867, |
|
"loss": 2.5108, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.007166917508779474, |
|
"grad_norm": 0.4266701638698578, |
|
"learning_rate": 0.00026837107640945905, |
|
"loss": 2.4282, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.007166917508779474, |
|
"eval_loss": 2.515470027923584, |
|
"eval_runtime": 4.5689, |
|
"eval_samples_per_second": 10.944, |
|
"eval_steps_per_second": 1.532, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.007310255858955063, |
|
"grad_norm": 0.4437111020088196, |
|
"learning_rate": 0.0002668315918143169, |
|
"loss": 2.3658, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.007453594209130653, |
|
"grad_norm": 0.3480053246021271, |
|
"learning_rate": 0.00026526016662852886, |
|
"loss": 2.4018, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.007596932559306243, |
|
"grad_norm": 0.45664915442466736, |
|
"learning_rate": 0.00026365723046405023, |
|
"loss": 2.4212, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0077402709094818315, |
|
"grad_norm": 0.30473992228507996, |
|
"learning_rate": 0.0002620232215476231, |
|
"loss": 2.3651, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.007883609259657422, |
|
"grad_norm": 0.3738688826560974, |
|
"learning_rate": 0.0002603585866009697, |
|
"loss": 2.3824, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.00802694760983301, |
|
"grad_norm": 0.4079970717430115, |
|
"learning_rate": 0.00025866378071866334, |
|
"loss": 2.5106, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0081702859600086, |
|
"grad_norm": 0.6227981448173523, |
|
"learning_rate": 0.00025693926724370956, |
|
"loss": 2.5133, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.00831362431018419, |
|
"grad_norm": 0.5902373194694519, |
|
"learning_rate": 0.00025518551764087326, |
|
"loss": 2.4108, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.00845696266035978, |
|
"grad_norm": 0.3553559482097626, |
|
"learning_rate": 0.00025340301136778483, |
|
"loss": 2.4769, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.008600301010535369, |
|
"grad_norm": 0.30603644251823425, |
|
"learning_rate": 0.00025159223574386114, |
|
"loss": 2.3449, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.008743639360710959, |
|
"grad_norm": 0.38652244210243225, |
|
"learning_rate": 0.0002497536858170772, |
|
"loss": 2.4605, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.008886977710886548, |
|
"grad_norm": 0.8515211939811707, |
|
"learning_rate": 0.00024788786422862526, |
|
"loss": 2.5039, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.009030316061062138, |
|
"grad_norm": 0.48116958141326904, |
|
"learning_rate": 0.00024599528107549745, |
|
"loss": 2.4173, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.009173654411237726, |
|
"grad_norm": 0.394767701625824, |
|
"learning_rate": 0.00024407645377103054, |
|
"loss": 2.4306, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.009316992761413316, |
|
"grad_norm": 0.3945147693157196, |
|
"learning_rate": 0.00024213190690345018, |
|
"loss": 2.4364, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.009460331111588905, |
|
"grad_norm": 3.018016815185547, |
|
"learning_rate": 0.00024016217209245374, |
|
"loss": 2.458, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.009603669461764495, |
|
"grad_norm": 0.4277852773666382, |
|
"learning_rate": 0.00023816778784387094, |
|
"loss": 2.5136, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.009747007811940085, |
|
"grad_norm": 0.4447251856327057, |
|
"learning_rate": 0.0002361492994024415, |
|
"loss": 2.4805, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.009890346162115675, |
|
"grad_norm": 0.48989495635032654, |
|
"learning_rate": 0.0002341072586027509, |
|
"loss": 2.4012, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.010033684512291264, |
|
"grad_norm": 1.8352046012878418, |
|
"learning_rate": 0.00023204222371836405, |
|
"loss": 2.5, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.010177022862466852, |
|
"grad_norm": 0.4361478388309479, |
|
"learning_rate": 0.00022995475930919905, |
|
"loss": 2.3609, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.010320361212642442, |
|
"grad_norm": 0.3762364387512207, |
|
"learning_rate": 0.00022784543606718227, |
|
"loss": 2.4299, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.010463699562818032, |
|
"grad_norm": 0.5353628993034363, |
|
"learning_rate": 0.00022571483066022657, |
|
"loss": 2.4636, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.010607037912993621, |
|
"grad_norm": 0.4717017710208893, |
|
"learning_rate": 0.0002235635255745762, |
|
"loss": 2.3873, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.010750376263169211, |
|
"grad_norm": 0.4951079189777374, |
|
"learning_rate": 0.00022139210895556104, |
|
"loss": 2.3833, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.010750376263169211, |
|
"eval_loss": 2.5027923583984375, |
|
"eval_runtime": 4.575, |
|
"eval_samples_per_second": 10.929, |
|
"eval_steps_per_second": 1.53, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0108937146133448, |
|
"grad_norm": 1.1163285970687866, |
|
"learning_rate": 0.00021920117444680317, |
|
"loss": 2.4308, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.01103705296352039, |
|
"grad_norm": 0.5007464289665222, |
|
"learning_rate": 0.00021699132102792097, |
|
"loss": 2.4557, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.011180391313695979, |
|
"grad_norm": 0.48471787571907043, |
|
"learning_rate": 0.0002147631528507739, |
|
"loss": 2.4868, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.011323729663871568, |
|
"grad_norm": 0.4553197920322418, |
|
"learning_rate": 0.00021251727907429355, |
|
"loss": 2.4538, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.011467068014047158, |
|
"grad_norm": 0.49402543902397156, |
|
"learning_rate": 0.0002102543136979454, |
|
"loss": 2.4061, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.011610406364222748, |
|
"grad_norm": 0.43397530913352966, |
|
"learning_rate": 0.0002079748753938678, |
|
"loss": 2.5284, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.011753744714398337, |
|
"grad_norm": 0.39252761006355286, |
|
"learning_rate": 0.0002056795873377331, |
|
"loss": 2.4795, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.011897083064573927, |
|
"grad_norm": 0.4337272644042969, |
|
"learning_rate": 0.00020336907703837748, |
|
"loss": 2.4098, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.012040421414749517, |
|
"grad_norm": 0.5519135594367981, |
|
"learning_rate": 0.00020104397616624645, |
|
"loss": 2.4282, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.012183759764925106, |
|
"grad_norm": 0.46673280000686646, |
|
"learning_rate": 0.00019870492038070252, |
|
"loss": 2.483, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.012327098115100694, |
|
"grad_norm": 0.42640626430511475, |
|
"learning_rate": 0.0001963525491562421, |
|
"loss": 2.5023, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.012470436465276284, |
|
"grad_norm": 0.4718412458896637, |
|
"learning_rate": 0.0001939875056076697, |
|
"loss": 2.5087, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.012613774815451874, |
|
"grad_norm": 0.5302379727363586, |
|
"learning_rate": 0.00019161043631427666, |
|
"loss": 2.4402, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.012757113165627464, |
|
"grad_norm": 0.3727835416793823, |
|
"learning_rate": 0.00018922199114307294, |
|
"loss": 2.4963, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.012900451515803053, |
|
"grad_norm": 0.9599354863166809, |
|
"learning_rate": 0.00018682282307111987, |
|
"loss": 2.437, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.013043789865978643, |
|
"grad_norm": 0.40621206164360046, |
|
"learning_rate": 0.00018441358800701273, |
|
"loss": 2.3686, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.013187128216154233, |
|
"grad_norm": 0.40959206223487854, |
|
"learning_rate": 0.00018199494461156203, |
|
"loss": 2.4784, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.01333046656632982, |
|
"grad_norm": 0.4783549904823303, |
|
"learning_rate": 0.000179567554117722, |
|
"loss": 2.5026, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.01347380491650541, |
|
"grad_norm": 0.4089532792568207, |
|
"learning_rate": 0.00017713208014981648, |
|
"loss": 2.4641, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.013617143266681, |
|
"grad_norm": 0.40697500109672546, |
|
"learning_rate": 0.00017468918854211007, |
|
"loss": 2.4023, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.01376048161685659, |
|
"grad_norm": 0.44492053985595703, |
|
"learning_rate": 0.00017223954715677627, |
|
"loss": 2.4689, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.01390381996703218, |
|
"grad_norm": 0.4386192560195923, |
|
"learning_rate": 0.00016978382570131034, |
|
"loss": 2.3685, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.01404715831720777, |
|
"grad_norm": 0.9035962820053101, |
|
"learning_rate": 0.00016732269554543794, |
|
"loss": 2.434, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.014190496667383359, |
|
"grad_norm": 0.5646589994430542, |
|
"learning_rate": 0.00016485682953756942, |
|
"loss": 2.5078, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.014333835017558949, |
|
"grad_norm": 0.6752585172653198, |
|
"learning_rate": 0.00016238690182084986, |
|
"loss": 2.3888, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.014333835017558949, |
|
"eval_loss": 2.4761135578155518, |
|
"eval_runtime": 4.5655, |
|
"eval_samples_per_second": 10.952, |
|
"eval_steps_per_second": 1.533, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.014477173367734537, |
|
"grad_norm": 0.6777223348617554, |
|
"learning_rate": 0.0001599135876488549, |
|
"loss": 2.4187, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.014620511717910126, |
|
"grad_norm": 0.6800750494003296, |
|
"learning_rate": 0.00015743756320098332, |
|
"loss": 2.4502, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.014763850068085716, |
|
"grad_norm": 0.52974534034729, |
|
"learning_rate": 0.0001549595053975962, |
|
"loss": 2.4709, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.014907188418261306, |
|
"grad_norm": 0.502795934677124, |
|
"learning_rate": 0.00015248009171495378, |
|
"loss": 2.3035, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.015050526768436896, |
|
"grad_norm": 0.6192975044250488, |
|
"learning_rate": 0.00015, |
|
"loss": 2.6338, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.015193865118612485, |
|
"grad_norm": 0.4301483929157257, |
|
"learning_rate": 0.00014751990828504622, |
|
"loss": 2.3471, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.015337203468788075, |
|
"grad_norm": 0.4026905596256256, |
|
"learning_rate": 0.00014504049460240375, |
|
"loss": 2.4869, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.015480541818963663, |
|
"grad_norm": 0.33096885681152344, |
|
"learning_rate": 0.00014256243679901663, |
|
"loss": 2.3908, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.015623880169139253, |
|
"grad_norm": 0.3435579836368561, |
|
"learning_rate": 0.00014008641235114508, |
|
"loss": 2.4443, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.015767218519314844, |
|
"grad_norm": 0.5849462151527405, |
|
"learning_rate": 0.00013761309817915014, |
|
"loss": 2.357, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.015910556869490434, |
|
"grad_norm": 0.415574848651886, |
|
"learning_rate": 0.00013514317046243058, |
|
"loss": 2.4204, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.01605389521966602, |
|
"grad_norm": 0.5506088137626648, |
|
"learning_rate": 0.00013267730445456208, |
|
"loss": 2.4529, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.01619723356984161, |
|
"grad_norm": 0.4681348204612732, |
|
"learning_rate": 0.00013021617429868963, |
|
"loss": 2.4031, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.0163405719200172, |
|
"grad_norm": 0.33748188614845276, |
|
"learning_rate": 0.00012776045284322368, |
|
"loss": 2.3714, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.01648391027019279, |
|
"grad_norm": 0.36723262071609497, |
|
"learning_rate": 0.00012531081145788987, |
|
"loss": 2.4555, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.01662724862036838, |
|
"grad_norm": 0.454904168844223, |
|
"learning_rate": 0.00012286791985018355, |
|
"loss": 2.4527, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.01677058697054397, |
|
"grad_norm": 0.4296797215938568, |
|
"learning_rate": 0.00012043244588227796, |
|
"loss": 2.4918, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.01691392532071956, |
|
"grad_norm": 0.433634877204895, |
|
"learning_rate": 0.00011800505538843798, |
|
"loss": 2.5004, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.017057263670895148, |
|
"grad_norm": 0.3770894706249237, |
|
"learning_rate": 0.00011558641199298727, |
|
"loss": 2.4621, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.017200602021070738, |
|
"grad_norm": 0.44861501455307007, |
|
"learning_rate": 0.00011317717692888012, |
|
"loss": 2.3952, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.017343940371246327, |
|
"grad_norm": 0.4860612154006958, |
|
"learning_rate": 0.00011077800885692702, |
|
"loss": 2.3897, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.017487278721421917, |
|
"grad_norm": 0.3798341751098633, |
|
"learning_rate": 0.00010838956368572334, |
|
"loss": 2.4805, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.017630617071597507, |
|
"grad_norm": 0.3528059720993042, |
|
"learning_rate": 0.0001060124943923303, |
|
"loss": 2.3983, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.017773955421773097, |
|
"grad_norm": 0.37472400069236755, |
|
"learning_rate": 0.0001036474508437579, |
|
"loss": 2.3448, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.017917293771948686, |
|
"grad_norm": 0.450785756111145, |
|
"learning_rate": 0.00010129507961929748, |
|
"loss": 2.4047, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.017917293771948686, |
|
"eval_loss": 2.4208545684814453, |
|
"eval_runtime": 4.5669, |
|
"eval_samples_per_second": 10.948, |
|
"eval_steps_per_second": 1.533, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.018060632122124276, |
|
"grad_norm": 0.42055612802505493, |
|
"learning_rate": 9.895602383375353e-05, |
|
"loss": 2.3805, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.018203970472299862, |
|
"grad_norm": 0.4635365605354309, |
|
"learning_rate": 9.663092296162251e-05, |
|
"loss": 2.3714, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.018347308822475452, |
|
"grad_norm": 0.36117833852767944, |
|
"learning_rate": 9.432041266226686e-05, |
|
"loss": 2.4163, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.01849064717265104, |
|
"grad_norm": 0.3636980950832367, |
|
"learning_rate": 9.202512460613219e-05, |
|
"loss": 2.362, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.01863398552282663, |
|
"grad_norm": 0.4152284860610962, |
|
"learning_rate": 8.97456863020546e-05, |
|
"loss": 2.4751, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.01877732387300222, |
|
"grad_norm": 0.5936709642410278, |
|
"learning_rate": 8.748272092570646e-05, |
|
"loss": 2.4146, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.01892066222317781, |
|
"grad_norm": 0.43432939052581787, |
|
"learning_rate": 8.523684714922608e-05, |
|
"loss": 2.3515, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.0190640005733534, |
|
"grad_norm": 0.5816676020622253, |
|
"learning_rate": 8.300867897207903e-05, |
|
"loss": 2.4006, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.01920733892352899, |
|
"grad_norm": 0.4577726423740387, |
|
"learning_rate": 8.079882555319684e-05, |
|
"loss": 2.4041, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.01935067727370458, |
|
"grad_norm": 0.5329848527908325, |
|
"learning_rate": 7.860789104443896e-05, |
|
"loss": 2.4021, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.01949401562388017, |
|
"grad_norm": 0.4278225302696228, |
|
"learning_rate": 7.643647442542382e-05, |
|
"loss": 2.3308, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.01963735397405576, |
|
"grad_norm": 0.42905911803245544, |
|
"learning_rate": 7.428516933977347e-05, |
|
"loss": 2.4348, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.01978069232423135, |
|
"grad_norm": 0.4884713292121887, |
|
"learning_rate": 7.215456393281776e-05, |
|
"loss": 2.3535, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.01992403067440694, |
|
"grad_norm": 0.5074369311332703, |
|
"learning_rate": 7.004524069080096e-05, |
|
"loss": 2.4361, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.02006736902458253, |
|
"grad_norm": 0.5184245109558105, |
|
"learning_rate": 6.795777628163599e-05, |
|
"loss": 2.48, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02021070737475812, |
|
"grad_norm": 0.4724279046058655, |
|
"learning_rate": 6.58927413972491e-05, |
|
"loss": 2.3833, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.020354045724933705, |
|
"grad_norm": 0.42493098974227905, |
|
"learning_rate": 6.385070059755846e-05, |
|
"loss": 2.4109, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.020497384075109294, |
|
"grad_norm": 0.46368831396102905, |
|
"learning_rate": 6.183221215612904e-05, |
|
"loss": 2.403, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.020640722425284884, |
|
"grad_norm": 0.5204209685325623, |
|
"learning_rate": 5.983782790754623e-05, |
|
"loss": 2.4407, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.020784060775460474, |
|
"grad_norm": 0.5628048777580261, |
|
"learning_rate": 5.786809309654982e-05, |
|
"loss": 2.4423, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.020927399125636063, |
|
"grad_norm": 0.43388831615448, |
|
"learning_rate": 5.592354622896944e-05, |
|
"loss": 2.4248, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.021070737475811653, |
|
"grad_norm": 0.43663108348846436, |
|
"learning_rate": 5.40047189245025e-05, |
|
"loss": 2.4316, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.021214075825987243, |
|
"grad_norm": 0.4974912703037262, |
|
"learning_rate": 5.211213577137469e-05, |
|
"loss": 2.4239, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.021357414176162832, |
|
"grad_norm": 0.48720306158065796, |
|
"learning_rate": 5.024631418292274e-05, |
|
"loss": 2.4115, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.021500752526338422, |
|
"grad_norm": 0.4307795763015747, |
|
"learning_rate": 4.840776425613886e-05, |
|
"loss": 2.4406, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.021500752526338422, |
|
"eval_loss": 2.442666530609131, |
|
"eval_runtime": 4.5664, |
|
"eval_samples_per_second": 10.95, |
|
"eval_steps_per_second": 1.533, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.021644090876514012, |
|
"grad_norm": 0.42068323493003845, |
|
"learning_rate": 4.659698863221513e-05, |
|
"loss": 2.343, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.0217874292266896, |
|
"grad_norm": 0.436985045671463, |
|
"learning_rate": 4.481448235912671e-05, |
|
"loss": 2.3775, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.02193076757686519, |
|
"grad_norm": 0.43744659423828125, |
|
"learning_rate": 4.306073275629044e-05, |
|
"loss": 2.366, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.02207410592704078, |
|
"grad_norm": 0.42853182554244995, |
|
"learning_rate": 4.133621928133665e-05, |
|
"loss": 2.3583, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.02221744427721637, |
|
"grad_norm": 0.43902191519737244, |
|
"learning_rate": 3.964141339903026e-05, |
|
"loss": 2.3905, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.022360782627391957, |
|
"grad_norm": 0.46048280596733093, |
|
"learning_rate": 3.797677845237696e-05, |
|
"loss": 2.4043, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.022504120977567547, |
|
"grad_norm": 0.3899269998073578, |
|
"learning_rate": 3.634276953594982e-05, |
|
"loss": 2.3547, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.022647459327743136, |
|
"grad_norm": 0.37371909618377686, |
|
"learning_rate": 3.473983337147118e-05, |
|
"loss": 2.3287, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.022790797677918726, |
|
"grad_norm": 0.404742956161499, |
|
"learning_rate": 3.316840818568315e-05, |
|
"loss": 2.3772, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.022934136028094316, |
|
"grad_norm": 0.38302767276763916, |
|
"learning_rate": 3.162892359054098e-05, |
|
"loss": 2.3533, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.023077474378269906, |
|
"grad_norm": 0.3811044991016388, |
|
"learning_rate": 3.0121800465761293e-05, |
|
"loss": 2.3194, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.023220812728445495, |
|
"grad_norm": 0.5596585869789124, |
|
"learning_rate": 2.8647450843757897e-05, |
|
"loss": 2.4266, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.023364151078621085, |
|
"grad_norm": 0.5400771498680115, |
|
"learning_rate": 2.7206277796996144e-05, |
|
"loss": 2.3642, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.023507489428796675, |
|
"grad_norm": 0.4592722952365875, |
|
"learning_rate": 2.5798675327796993e-05, |
|
"loss": 2.327, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.023650827778972264, |
|
"grad_norm": 0.47848427295684814, |
|
"learning_rate": 2.4425028260620715e-05, |
|
"loss": 2.4503, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.023794166129147854, |
|
"grad_norm": 0.45060548186302185, |
|
"learning_rate": 2.3085712136859668e-05, |
|
"loss": 2.3749, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.023937504479323444, |
|
"grad_norm": 0.4228126108646393, |
|
"learning_rate": 2.178109311216913e-05, |
|
"loss": 2.4633, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.024080842829499034, |
|
"grad_norm": 0.5396104454994202, |
|
"learning_rate": 2.0511527856363912e-05, |
|
"loss": 2.4974, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.024224181179674623, |
|
"grad_norm": 0.47498688101768494, |
|
"learning_rate": 1.927736345590839e-05, |
|
"loss": 2.5275, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.024367519529850213, |
|
"grad_norm": 0.5508636832237244, |
|
"learning_rate": 1.8078937319026654e-05, |
|
"loss": 2.4751, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0245108578800258, |
|
"grad_norm": 0.4817218780517578, |
|
"learning_rate": 1.6916577083458228e-05, |
|
"loss": 2.5245, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.02465419623020139, |
|
"grad_norm": 0.4276881217956543, |
|
"learning_rate": 1.579060052688548e-05, |
|
"loss": 2.403, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.02479753458037698, |
|
"grad_norm": 0.4423224627971649, |
|
"learning_rate": 1.4701315480056164e-05, |
|
"loss": 2.4871, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.02494087293055257, |
|
"grad_norm": 0.4730720818042755, |
|
"learning_rate": 1.3649019742625623e-05, |
|
"loss": 2.4419, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.025084211280728158, |
|
"grad_norm": 0.4102948009967804, |
|
"learning_rate": 1.2634001001741373e-05, |
|
"loss": 2.3216, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.025084211280728158, |
|
"eval_loss": 2.4315061569213867, |
|
"eval_runtime": 4.5667, |
|
"eval_samples_per_second": 10.949, |
|
"eval_steps_per_second": 1.533, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.025227549630903748, |
|
"grad_norm": 0.43688416481018066, |
|
"learning_rate": 1.1656536753392287e-05, |
|
"loss": 2.3618, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.025370887981079338, |
|
"grad_norm": 0.4183753728866577, |
|
"learning_rate": 1.0716894226543953e-05, |
|
"loss": 2.4004, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.025514226331254927, |
|
"grad_norm": 0.3798510432243347, |
|
"learning_rate": 9.815330310080887e-06, |
|
"loss": 2.3351, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.025657564681430517, |
|
"grad_norm": 0.4367026090621948, |
|
"learning_rate": 8.952091482575824e-06, |
|
"loss": 2.339, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.025800903031606107, |
|
"grad_norm": 0.43516334891319275, |
|
"learning_rate": 8.127413744904804e-06, |
|
"loss": 2.3865, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.025944241381781696, |
|
"grad_norm": 0.39649835228919983, |
|
"learning_rate": 7.34152255572697e-06, |
|
"loss": 2.4152, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.026087579731957286, |
|
"grad_norm": 0.4496510326862335, |
|
"learning_rate": 6.594632769846353e-06, |
|
"loss": 2.4076, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.026230918082132876, |
|
"grad_norm": 0.36271652579307556, |
|
"learning_rate": 5.886948579472778e-06, |
|
"loss": 2.349, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.026374256432308466, |
|
"grad_norm": 0.43088510632514954, |
|
"learning_rate": 5.218663458397715e-06, |
|
"loss": 2.412, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.026517594782484055, |
|
"grad_norm": 0.4110885262489319, |
|
"learning_rate": 4.589960109100444e-06, |
|
"loss": 2.3669, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.02666093313265964, |
|
"grad_norm": 0.44089290499687195, |
|
"learning_rate": 4.001010412799138e-06, |
|
"loss": 2.3644, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.02680427148283523, |
|
"grad_norm": 0.46191123127937317, |
|
"learning_rate": 3.451975382460109e-06, |
|
"loss": 2.4576, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.02694760983301082, |
|
"grad_norm": 0.43181681632995605, |
|
"learning_rate": 2.9430051187785962e-06, |
|
"loss": 2.3365, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.02709094818318641, |
|
"grad_norm": 0.5321618914604187, |
|
"learning_rate": 2.4742387691426445e-06, |
|
"loss": 2.3837, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.027234286533362, |
|
"grad_norm": 0.4526923894882202, |
|
"learning_rate": 2.0458044895916513e-06, |
|
"loss": 2.3506, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.02737762488353759, |
|
"grad_norm": 0.42679014801979065, |
|
"learning_rate": 1.6578194097797258e-06, |
|
"loss": 2.4344, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.02752096323371318, |
|
"grad_norm": 0.5031974911689758, |
|
"learning_rate": 1.3103896009537207e-06, |
|
"loss": 2.3758, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.02766430158388877, |
|
"grad_norm": 0.4715871214866638, |
|
"learning_rate": 1.0036100469542786e-06, |
|
"loss": 2.4062, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.02780763993406436, |
|
"grad_norm": 0.5107702612876892, |
|
"learning_rate": 7.375646182482875e-07, |
|
"loss": 2.4455, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.02795097828423995, |
|
"grad_norm": 0.44611087441444397, |
|
"learning_rate": 5.123260489995229e-07, |
|
"loss": 2.456, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.02809431663441554, |
|
"grad_norm": 0.4629242718219757, |
|
"learning_rate": 3.2795591718381975e-07, |
|
"loss": 2.3595, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.02823765498459113, |
|
"grad_norm": 0.4822857975959778, |
|
"learning_rate": 1.8450462775428942e-07, |
|
"loss": 2.4289, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.028380993334766718, |
|
"grad_norm": 0.4529360234737396, |
|
"learning_rate": 8.201139886109264e-08, |
|
"loss": 2.3392, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.028524331684942308, |
|
"grad_norm": 0.43792641162872314, |
|
"learning_rate": 2.0504251129649374e-08, |
|
"loss": 2.4626, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.028667670035117897, |
|
"grad_norm": 0.4136735796928406, |
|
"learning_rate": 0.0, |
|
"loss": 2.35, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.028667670035117897, |
|
"eval_loss": 2.4329843521118164, |
|
"eval_runtime": 4.566, |
|
"eval_samples_per_second": 10.95, |
|
"eval_steps_per_second": 1.533, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.39939410345984e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|