|
{ |
|
"best_metric": 0.47654790915623974, |
|
"best_model_checkpoint": "/leonardo_work/EUHPC_A02_045/scandinavian-lm/robin/fw-classifier-checkpoints-no-70b/checkpoint-33000", |
|
"epoch": 19.976498237367803, |
|
"eval_steps": 1000, |
|
"global_step": 34000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0, |
|
"eval_accuracy": 0.6101936630634727, |
|
"eval_f1_macro": 0.12631889713244165, |
|
"eval_loss": 4.240240097045898, |
|
"eval_precision": 0.1016989438439121, |
|
"eval_recall": 0.16666666666666666, |
|
"eval_runtime": 571.773, |
|
"eval_samples_per_second": 84.619, |
|
"eval_steps_per_second": 0.661, |
|
"step": 0 |
|
}, |
|
{ |
|
"epoch": 0.05875440658049354, |
|
"grad_norm": 4.152867794036865, |
|
"learning_rate": 0.00029911868390129255, |
|
"loss": 0.5817, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11750881316098707, |
|
"grad_norm": 8.71384334564209, |
|
"learning_rate": 0.0002982373678025852, |
|
"loss": 0.3984, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1762632197414806, |
|
"grad_norm": 13.754929542541504, |
|
"learning_rate": 0.00029735605170387776, |
|
"loss": 0.3821, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.23501762632197415, |
|
"grad_norm": 0.5506241917610168, |
|
"learning_rate": 0.0002964747356051704, |
|
"loss": 0.3869, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2937720329024677, |
|
"grad_norm": 7.549045562744141, |
|
"learning_rate": 0.00029559341950646296, |
|
"loss": 0.346, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3525264394829612, |
|
"grad_norm": 8.952414512634277, |
|
"learning_rate": 0.00029471210340775554, |
|
"loss": 0.3535, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4112808460634548, |
|
"grad_norm": 23.95926856994629, |
|
"learning_rate": 0.00029383078730904817, |
|
"loss": 0.3638, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4700352526439483, |
|
"grad_norm": 5.874898910522461, |
|
"learning_rate": 0.00029294947121034074, |
|
"loss": 0.3458, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5287896592244419, |
|
"grad_norm": 1.2818429470062256, |
|
"learning_rate": 0.0002920681551116333, |
|
"loss": 0.3183, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5875440658049353, |
|
"grad_norm": 11.916783332824707, |
|
"learning_rate": 0.00029118683901292595, |
|
"loss": 0.3415, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5875440658049353, |
|
"eval_accuracy": 0.7214517495814645, |
|
"eval_f1_macro": 0.36894750243003943, |
|
"eval_loss": 0.28744974732398987, |
|
"eval_precision": 0.4977336949727908, |
|
"eval_recall": 0.35626936254459146, |
|
"eval_runtime": 546.9073, |
|
"eval_samples_per_second": 88.467, |
|
"eval_steps_per_second": 0.691, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6462984723854289, |
|
"grad_norm": 9.043974876403809, |
|
"learning_rate": 0.0002903055229142185, |
|
"loss": 0.3187, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7050528789659224, |
|
"grad_norm": 4.782003879547119, |
|
"learning_rate": 0.00028942420681551115, |
|
"loss": 0.3466, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.763807285546416, |
|
"grad_norm": 18.947124481201172, |
|
"learning_rate": 0.00028854289071680373, |
|
"loss": 0.3365, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.8225616921269095, |
|
"grad_norm": 3.7607452869415283, |
|
"learning_rate": 0.00028766157461809636, |
|
"loss": 0.3307, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.881316098707403, |
|
"grad_norm": 7.354115009307861, |
|
"learning_rate": 0.00028678025851938894, |
|
"loss": 0.327, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9400705052878966, |
|
"grad_norm": 12.593878746032715, |
|
"learning_rate": 0.00028589894242068157, |
|
"loss": 0.3143, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9988249118683902, |
|
"grad_norm": 10.552128791809082, |
|
"learning_rate": 0.00028501762632197414, |
|
"loss": 0.3082, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.0575793184488838, |
|
"grad_norm": 16.90986442565918, |
|
"learning_rate": 0.0002841363102232667, |
|
"loss": 0.3153, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.1163337250293772, |
|
"grad_norm": 5.408417224884033, |
|
"learning_rate": 0.00028325499412455935, |
|
"loss": 0.3051, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.1750881316098707, |
|
"grad_norm": 10.757403373718262, |
|
"learning_rate": 0.0002823736780258519, |
|
"loss": 0.2999, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.1750881316098707, |
|
"eval_accuracy": 0.6296013062439286, |
|
"eval_f1_macro": 0.39274972019560317, |
|
"eval_loss": 0.34983837604522705, |
|
"eval_precision": 0.4577861635371156, |
|
"eval_recall": 0.40350488729829226, |
|
"eval_runtime": 573.3711, |
|
"eval_samples_per_second": 84.383, |
|
"eval_steps_per_second": 0.659, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.2338425381903644, |
|
"grad_norm": 1.7697181701660156, |
|
"learning_rate": 0.0002814923619271445, |
|
"loss": 0.3158, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.2925969447708578, |
|
"grad_norm": 9.139455795288086, |
|
"learning_rate": 0.0002806110458284371, |
|
"loss": 0.2964, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.3513513513513513, |
|
"grad_norm": 1.4484944343566895, |
|
"learning_rate": 0.0002797297297297297, |
|
"loss": 0.296, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.410105757931845, |
|
"grad_norm": 1.3425700664520264, |
|
"learning_rate": 0.0002788484136310223, |
|
"loss": 0.2906, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.4688601645123385, |
|
"grad_norm": 9.240585327148438, |
|
"learning_rate": 0.0002779670975323149, |
|
"loss": 0.2901, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.527614571092832, |
|
"grad_norm": 8.707176208496094, |
|
"learning_rate": 0.0002770857814336075, |
|
"loss": 0.2903, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.5863689776733256, |
|
"grad_norm": 3.509387969970703, |
|
"learning_rate": 0.0002762044653349001, |
|
"loss": 0.2811, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.6451233842538189, |
|
"grad_norm": 3.0751891136169434, |
|
"learning_rate": 0.0002753231492361927, |
|
"loss": 0.2759, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.7038777908343126, |
|
"grad_norm": 4.9634013175964355, |
|
"learning_rate": 0.0002744418331374853, |
|
"loss": 0.2842, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.7626321974148063, |
|
"grad_norm": 10.635833740234375, |
|
"learning_rate": 0.0002735605170387779, |
|
"loss": 0.2909, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7626321974148063, |
|
"eval_accuracy": 0.7319306367939152, |
|
"eval_f1_macro": 0.3785908786929529, |
|
"eval_loss": 0.26183947920799255, |
|
"eval_precision": 0.5055778316715768, |
|
"eval_recall": 0.3727759846099492, |
|
"eval_runtime": 546.7019, |
|
"eval_samples_per_second": 88.5, |
|
"eval_steps_per_second": 0.691, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.8213866039952995, |
|
"grad_norm": 10.297226905822754, |
|
"learning_rate": 0.0002726792009400705, |
|
"loss": 0.2826, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.8801410105757932, |
|
"grad_norm": 2.603403329849243, |
|
"learning_rate": 0.0002717978848413631, |
|
"loss": 0.2787, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.9388954171562869, |
|
"grad_norm": 12.185776710510254, |
|
"learning_rate": 0.0002709165687426557, |
|
"loss": 0.276, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.9976498237367801, |
|
"grad_norm": 4.290465354919434, |
|
"learning_rate": 0.00027003525264394825, |
|
"loss": 0.2846, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.056404230317274, |
|
"grad_norm": 3.0092501640319824, |
|
"learning_rate": 0.00026915393654524083, |
|
"loss": 0.2805, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.1151586368977675, |
|
"grad_norm": 4.7245893478393555, |
|
"learning_rate": 0.00026827262044653346, |
|
"loss": 0.27, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.1739130434782608, |
|
"grad_norm": 9.71957778930664, |
|
"learning_rate": 0.00026739130434782604, |
|
"loss": 0.2722, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.2326674500587544, |
|
"grad_norm": 2.2740237712860107, |
|
"learning_rate": 0.00026650998824911867, |
|
"loss": 0.2663, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.291421856639248, |
|
"grad_norm": 1.9909628629684448, |
|
"learning_rate": 0.00026562867215041124, |
|
"loss": 0.266, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.3501762632197414, |
|
"grad_norm": 5.9694366455078125, |
|
"learning_rate": 0.00026474735605170387, |
|
"loss": 0.2724, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.3501762632197414, |
|
"eval_accuracy": 0.7637393299299341, |
|
"eval_f1_macro": 0.4012224284754011, |
|
"eval_loss": 0.24033646285533905, |
|
"eval_precision": 0.5199015574271643, |
|
"eval_recall": 0.37999768266709827, |
|
"eval_runtime": 549.807, |
|
"eval_samples_per_second": 88.0, |
|
"eval_steps_per_second": 0.688, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.408930669800235, |
|
"grad_norm": 0.8608851432800293, |
|
"learning_rate": 0.00026386603995299645, |
|
"loss": 0.2665, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.4676850763807288, |
|
"grad_norm": 3.54764723777771, |
|
"learning_rate": 0.0002629847238542891, |
|
"loss": 0.262, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.526439482961222, |
|
"grad_norm": 4.2886481285095215, |
|
"learning_rate": 0.00026210340775558165, |
|
"loss": 0.2651, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.5851938895417157, |
|
"grad_norm": 9.533616065979004, |
|
"learning_rate": 0.00026122209165687423, |
|
"loss": 0.2705, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.6439482961222094, |
|
"grad_norm": 1.6619293689727783, |
|
"learning_rate": 0.00026034077555816686, |
|
"loss": 0.2728, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.7027027027027026, |
|
"grad_norm": 5.174167633056641, |
|
"learning_rate": 0.00025945945945945944, |
|
"loss": 0.265, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.7614571092831963, |
|
"grad_norm": 3.4489777088165283, |
|
"learning_rate": 0.000258578143360752, |
|
"loss": 0.2608, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.82021151586369, |
|
"grad_norm": 5.9784111976623535, |
|
"learning_rate": 0.00025769682726204464, |
|
"loss": 0.268, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.8789659224441833, |
|
"grad_norm": 1.8021718263626099, |
|
"learning_rate": 0.0002568155111633372, |
|
"loss": 0.2604, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.937720329024677, |
|
"grad_norm": 1.6711304187774658, |
|
"learning_rate": 0.0002559341950646298, |
|
"loss": 0.2673, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.937720329024677, |
|
"eval_accuracy": 0.7662815451708245, |
|
"eval_f1_macro": 0.42256323386502537, |
|
"eval_loss": 0.23298443853855133, |
|
"eval_precision": 0.4985552431790052, |
|
"eval_recall": 0.4017059313674283, |
|
"eval_runtime": 572.543, |
|
"eval_samples_per_second": 84.505, |
|
"eval_steps_per_second": 0.66, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.9964747356051706, |
|
"grad_norm": 0.876846432685852, |
|
"learning_rate": 0.0002550528789659224, |
|
"loss": 0.2559, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 3.055229142185664, |
|
"grad_norm": 2.291898250579834, |
|
"learning_rate": 0.000254171562867215, |
|
"loss": 0.2614, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.1139835487661576, |
|
"grad_norm": 2.2618095874786377, |
|
"learning_rate": 0.00025329024676850763, |
|
"loss": 0.2578, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 3.172737955346651, |
|
"grad_norm": 2.6534600257873535, |
|
"learning_rate": 0.0002524089306698002, |
|
"loss": 0.2568, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.2314923619271445, |
|
"grad_norm": 3.1308279037475586, |
|
"learning_rate": 0.00025152761457109283, |
|
"loss": 0.2531, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.290246768507638, |
|
"grad_norm": 2.781928300857544, |
|
"learning_rate": 0.0002506462984723854, |
|
"loss": 0.2526, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 3.3490011750881314, |
|
"grad_norm": 3.065544366836548, |
|
"learning_rate": 0.00024976498237367804, |
|
"loss": 0.2654, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 3.407755581668625, |
|
"grad_norm": 1.8798550367355347, |
|
"learning_rate": 0.0002488836662749706, |
|
"loss": 0.2504, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 3.466509988249119, |
|
"grad_norm": 6.618080139160156, |
|
"learning_rate": 0.0002480023501762632, |
|
"loss": 0.2593, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 3.525264394829612, |
|
"grad_norm": 3.1250927448272705, |
|
"learning_rate": 0.00024712103407755577, |
|
"loss": 0.2592, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.525264394829612, |
|
"eval_accuracy": 0.7667155819192691, |
|
"eval_f1_macro": 0.38853739645942603, |
|
"eval_loss": 0.24237428605556488, |
|
"eval_precision": 0.5286680026993323, |
|
"eval_recall": 0.36739233287894374, |
|
"eval_runtime": 551.8425, |
|
"eval_samples_per_second": 87.675, |
|
"eval_steps_per_second": 0.685, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.5840188014101058, |
|
"grad_norm": 1.4983100891113281, |
|
"learning_rate": 0.0002462397179788484, |
|
"loss": 0.2492, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 3.6427732079905994, |
|
"grad_norm": 7.058569431304932, |
|
"learning_rate": 0.000245358401880141, |
|
"loss": 0.2594, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.7015276145710927, |
|
"grad_norm": 1.7073079347610474, |
|
"learning_rate": 0.0002444770857814336, |
|
"loss": 0.259, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 3.7602820211515864, |
|
"grad_norm": 3.5612850189208984, |
|
"learning_rate": 0.00024359576968272618, |
|
"loss": 0.2499, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.8190364277320796, |
|
"grad_norm": 3.4439921379089355, |
|
"learning_rate": 0.00024271445358401875, |
|
"loss": 0.2478, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.8777908343125733, |
|
"grad_norm": 1.5288629531860352, |
|
"learning_rate": 0.00024183313748531138, |
|
"loss": 0.251, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.936545240893067, |
|
"grad_norm": 4.820594787597656, |
|
"learning_rate": 0.00024095182138660396, |
|
"loss": 0.2556, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 3.9952996474735603, |
|
"grad_norm": 2.041408061981201, |
|
"learning_rate": 0.00024007050528789656, |
|
"loss": 0.2559, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 4.054054054054054, |
|
"grad_norm": 7.40335750579834, |
|
"learning_rate": 0.00023918918918918917, |
|
"loss": 0.252, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 4.112808460634548, |
|
"grad_norm": 2.86159086227417, |
|
"learning_rate": 0.00023830787309048177, |
|
"loss": 0.2504, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.112808460634548, |
|
"eval_accuracy": 0.7713866440691979, |
|
"eval_f1_macro": 0.4170037789256203, |
|
"eval_loss": 0.23320935666561127, |
|
"eval_precision": 0.520598590788087, |
|
"eval_recall": 0.39262170670987384, |
|
"eval_runtime": 570.908, |
|
"eval_samples_per_second": 84.747, |
|
"eval_steps_per_second": 0.662, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.171562867215041, |
|
"grad_norm": 4.3792805671691895, |
|
"learning_rate": 0.00023742655699177434, |
|
"loss": 0.2511, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 4.230317273795535, |
|
"grad_norm": 1.2670930624008179, |
|
"learning_rate": 0.00023654524089306697, |
|
"loss": 0.2467, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 4.289071680376028, |
|
"grad_norm": 4.641327381134033, |
|
"learning_rate": 0.00023566392479435955, |
|
"loss": 0.252, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 4.3478260869565215, |
|
"grad_norm": 2.356194257736206, |
|
"learning_rate": 0.00023478260869565215, |
|
"loss": 0.2471, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 4.406580493537016, |
|
"grad_norm": 3.5696866512298584, |
|
"learning_rate": 0.00023390129259694476, |
|
"loss": 0.2444, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.465334900117509, |
|
"grad_norm": 8.092639923095703, |
|
"learning_rate": 0.00023301997649823736, |
|
"loss": 0.2458, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 4.524089306698002, |
|
"grad_norm": 3.4449079036712646, |
|
"learning_rate": 0.00023213866039952993, |
|
"loss": 0.2512, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 4.582843713278496, |
|
"grad_norm": 5.513228416442871, |
|
"learning_rate": 0.00023125734430082256, |
|
"loss": 0.2518, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 4.6415981198589895, |
|
"grad_norm": 2.5132598876953125, |
|
"learning_rate": 0.00023037602820211514, |
|
"loss": 0.2519, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 4.700352526439483, |
|
"grad_norm": 2.164031982421875, |
|
"learning_rate": 0.00022949471210340774, |
|
"loss": 0.2455, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.700352526439483, |
|
"eval_accuracy": 0.7628505880164521, |
|
"eval_f1_macro": 0.4485850425849249, |
|
"eval_loss": 0.2332322597503662, |
|
"eval_precision": 0.484949663618545, |
|
"eval_recall": 0.4351442261907779, |
|
"eval_runtime": 569.2371, |
|
"eval_samples_per_second": 84.996, |
|
"eval_steps_per_second": 0.664, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.759106933019977, |
|
"grad_norm": 8.147943496704102, |
|
"learning_rate": 0.00022861339600470035, |
|
"loss": 0.2538, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 4.81786133960047, |
|
"grad_norm": 3.1226038932800293, |
|
"learning_rate": 0.00022773207990599292, |
|
"loss": 0.2481, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 4.876615746180963, |
|
"grad_norm": 1.5910353660583496, |
|
"learning_rate": 0.00022685076380728553, |
|
"loss": 0.2432, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 4.9353701527614575, |
|
"grad_norm": 2.3687844276428223, |
|
"learning_rate": 0.0002259694477085781, |
|
"loss": 0.2464, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.994124559341951, |
|
"grad_norm": 1.5202206373214722, |
|
"learning_rate": 0.00022508813160987073, |
|
"loss": 0.2491, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 5.052878965922444, |
|
"grad_norm": 8.09229850769043, |
|
"learning_rate": 0.0002242068155111633, |
|
"loss": 0.245, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 5.111633372502938, |
|
"grad_norm": 4.977721691131592, |
|
"learning_rate": 0.0002233254994124559, |
|
"loss": 0.2414, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 5.170387779083431, |
|
"grad_norm": 2.690870523452759, |
|
"learning_rate": 0.0002224441833137485, |
|
"loss": 0.2437, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 5.229142185663925, |
|
"grad_norm": 4.5524373054504395, |
|
"learning_rate": 0.00022156286721504112, |
|
"loss": 0.2476, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 5.287896592244419, |
|
"grad_norm": 3.5273966789245605, |
|
"learning_rate": 0.0002206815511163337, |
|
"loss": 0.2424, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.287896592244419, |
|
"eval_accuracy": 0.7704152284893454, |
|
"eval_f1_macro": 0.40212235960322845, |
|
"eval_loss": 0.22698020935058594, |
|
"eval_precision": 0.5605989028870036, |
|
"eval_recall": 0.3866441363925465, |
|
"eval_runtime": 588.8368, |
|
"eval_samples_per_second": 82.167, |
|
"eval_steps_per_second": 0.642, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.346650998824912, |
|
"grad_norm": 4.150397777557373, |
|
"learning_rate": 0.00021980023501762632, |
|
"loss": 0.2476, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 5.405405405405405, |
|
"grad_norm": 1.5151199102401733, |
|
"learning_rate": 0.0002189189189189189, |
|
"loss": 0.2458, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 5.464159811985899, |
|
"grad_norm": 2.2463040351867676, |
|
"learning_rate": 0.0002180376028202115, |
|
"loss": 0.2388, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 5.522914218566393, |
|
"grad_norm": 2.767045259475708, |
|
"learning_rate": 0.0002171562867215041, |
|
"loss": 0.2433, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 5.581668625146886, |
|
"grad_norm": 5.879153728485107, |
|
"learning_rate": 0.0002162749706227967, |
|
"loss": 0.2452, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 5.64042303172738, |
|
"grad_norm": 4.529464244842529, |
|
"learning_rate": 0.00021539365452408928, |
|
"loss": 0.2437, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 5.699177438307873, |
|
"grad_norm": 2.579648017883301, |
|
"learning_rate": 0.00021451233842538186, |
|
"loss": 0.2429, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 5.7579318448883665, |
|
"grad_norm": 1.4765149354934692, |
|
"learning_rate": 0.0002136310223266745, |
|
"loss": 0.2347, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 5.816686251468861, |
|
"grad_norm": 6.136841297149658, |
|
"learning_rate": 0.00021274970622796706, |
|
"loss": 0.2407, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 5.875440658049354, |
|
"grad_norm": 5.470715045928955, |
|
"learning_rate": 0.0002118683901292597, |
|
"loss": 0.2476, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.875440658049354, |
|
"eval_accuracy": 0.7542525267139284, |
|
"eval_f1_macro": 0.4479732740452163, |
|
"eval_loss": 0.2347133606672287, |
|
"eval_precision": 0.495252873527299, |
|
"eval_recall": 0.4323174862666415, |
|
"eval_runtime": 555.2826, |
|
"eval_samples_per_second": 87.132, |
|
"eval_steps_per_second": 0.681, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.934195064629847, |
|
"grad_norm": 2.95473575592041, |
|
"learning_rate": 0.00021098707403055227, |
|
"loss": 0.2443, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 5.992949471210341, |
|
"grad_norm": 1.8928413391113281, |
|
"learning_rate": 0.00021010575793184487, |
|
"loss": 0.237, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 6.0517038777908345, |
|
"grad_norm": 5.004413604736328, |
|
"learning_rate": 0.00020922444183313745, |
|
"loss": 0.2419, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 6.110458284371328, |
|
"grad_norm": 2.2294819355010986, |
|
"learning_rate": 0.00020834312573443008, |
|
"loss": 0.2393, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 6.169212690951821, |
|
"grad_norm": 3.4622702598571777, |
|
"learning_rate": 0.00020746180963572265, |
|
"loss": 0.2467, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 6.227967097532315, |
|
"grad_norm": 11.164566993713379, |
|
"learning_rate": 0.00020658049353701526, |
|
"loss": 0.2405, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 6.286721504112808, |
|
"grad_norm": 8.36145305633545, |
|
"learning_rate": 0.00020569917743830786, |
|
"loss": 0.2366, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 6.345475910693302, |
|
"grad_norm": 2.6593246459960938, |
|
"learning_rate": 0.00020481786133960046, |
|
"loss": 0.244, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 6.404230317273796, |
|
"grad_norm": 1.6066139936447144, |
|
"learning_rate": 0.00020393654524089304, |
|
"loss": 0.2412, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 6.462984723854289, |
|
"grad_norm": 2.962965250015259, |
|
"learning_rate": 0.00020305522914218567, |
|
"loss": 0.2385, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 6.462984723854289, |
|
"eval_accuracy": 0.7729161068970506, |
|
"eval_f1_macro": 0.4305720986934003, |
|
"eval_loss": 0.22273238003253937, |
|
"eval_precision": 0.5147561915779448, |
|
"eval_recall": 0.40734695218979783, |
|
"eval_runtime": 531.4648, |
|
"eval_samples_per_second": 91.037, |
|
"eval_steps_per_second": 0.711, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 6.521739130434782, |
|
"grad_norm": 3.933185338973999, |
|
"learning_rate": 0.00020217391304347824, |
|
"loss": 0.2389, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 6.580493537015276, |
|
"grad_norm": 6.2505316734313965, |
|
"learning_rate": 0.00020129259694477085, |
|
"loss": 0.2345, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 6.63924794359577, |
|
"grad_norm": 2.8899261951446533, |
|
"learning_rate": 0.00020041128084606345, |
|
"loss": 0.2404, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 6.698002350176263, |
|
"grad_norm": 4.886023998260498, |
|
"learning_rate": 0.00019952996474735602, |
|
"loss": 0.2405, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 6.756756756756757, |
|
"grad_norm": 3.6124258041381836, |
|
"learning_rate": 0.00019864864864864863, |
|
"loss": 0.2388, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 6.81551116333725, |
|
"grad_norm": 2.6905336380004883, |
|
"learning_rate": 0.0001977673325499412, |
|
"loss": 0.2419, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 6.8742655699177435, |
|
"grad_norm": 1.7078518867492676, |
|
"learning_rate": 0.00019688601645123383, |
|
"loss": 0.2351, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 6.933019976498238, |
|
"grad_norm": 4.933712482452393, |
|
"learning_rate": 0.0001960047003525264, |
|
"loss": 0.2357, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 6.991774383078731, |
|
"grad_norm": 4.086423873901367, |
|
"learning_rate": 0.00019512338425381904, |
|
"loss": 0.241, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 7.050528789659224, |
|
"grad_norm": 3.3847010135650635, |
|
"learning_rate": 0.00019424206815511161, |
|
"loss": 0.2343, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 7.050528789659224, |
|
"eval_accuracy": 0.7735568278114213, |
|
"eval_f1_macro": 0.4611187564793586, |
|
"eval_loss": 0.2231457531452179, |
|
"eval_precision": 0.48558443620097314, |
|
"eval_recall": 0.4472545142062536, |
|
"eval_runtime": 576.3861, |
|
"eval_samples_per_second": 83.942, |
|
"eval_steps_per_second": 0.656, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 7.109283196239718, |
|
"grad_norm": 2.0805375576019287, |
|
"learning_rate": 0.00019336075205640422, |
|
"loss": 0.238, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 7.1680376028202115, |
|
"grad_norm": 3.5111265182495117, |
|
"learning_rate": 0.0001924794359576968, |
|
"loss": 0.2365, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 7.226792009400705, |
|
"grad_norm": 2.2142751216888428, |
|
"learning_rate": 0.00019159811985898942, |
|
"loss": 0.2387, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 7.285546415981199, |
|
"grad_norm": 5.468302249908447, |
|
"learning_rate": 0.000190716803760282, |
|
"loss": 0.2366, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 7.344300822561692, |
|
"grad_norm": 4.9833598136901855, |
|
"learning_rate": 0.0001898354876615746, |
|
"loss": 0.2345, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 7.403055229142185, |
|
"grad_norm": 2.4710216522216797, |
|
"learning_rate": 0.0001889541715628672, |
|
"loss": 0.2335, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 7.4618096357226795, |
|
"grad_norm": 1.4311057329177856, |
|
"learning_rate": 0.0001880728554641598, |
|
"loss": 0.242, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 7.520564042303173, |
|
"grad_norm": 3.9087047576904297, |
|
"learning_rate": 0.00018719153936545238, |
|
"loss": 0.2369, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 7.579318448883666, |
|
"grad_norm": 2.0385680198669434, |
|
"learning_rate": 0.000186310223266745, |
|
"loss": 0.2295, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 7.63807285546416, |
|
"grad_norm": 3.9989728927612305, |
|
"learning_rate": 0.0001854289071680376, |
|
"loss": 0.2353, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 7.63807285546416, |
|
"eval_accuracy": 0.7637393299299341, |
|
"eval_f1_macro": 0.42822074709027813, |
|
"eval_loss": 0.2291877716779709, |
|
"eval_precision": 0.5240563218793618, |
|
"eval_recall": 0.4106384213710441, |
|
"eval_runtime": 580.7226, |
|
"eval_samples_per_second": 83.315, |
|
"eval_steps_per_second": 0.651, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 7.696827262044653, |
|
"grad_norm": 1.630708932876587, |
|
"learning_rate": 0.00018454759106933017, |
|
"loss": 0.2373, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 7.755581668625147, |
|
"grad_norm": 3.2567617893218994, |
|
"learning_rate": 0.0001836662749706228, |
|
"loss": 0.2326, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 7.814336075205641, |
|
"grad_norm": 2.3300867080688477, |
|
"learning_rate": 0.00018278495887191537, |
|
"loss": 0.2369, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 7.873090481786134, |
|
"grad_norm": 2.068678379058838, |
|
"learning_rate": 0.00018190364277320797, |
|
"loss": 0.2337, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 7.931844888366627, |
|
"grad_norm": 2.0448477268218994, |
|
"learning_rate": 0.00018102232667450055, |
|
"loss": 0.2335, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 7.990599294947121, |
|
"grad_norm": 2.7080137729644775, |
|
"learning_rate": 0.00018014101057579318, |
|
"loss": 0.238, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 8.049353701527615, |
|
"grad_norm": 1.9964938163757324, |
|
"learning_rate": 0.00017925969447708576, |
|
"loss": 0.2359, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 8.108108108108109, |
|
"grad_norm": 2.795433759689331, |
|
"learning_rate": 0.00017837837837837839, |
|
"loss": 0.2374, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 8.166862514688601, |
|
"grad_norm": 2.685382843017578, |
|
"learning_rate": 0.00017749706227967096, |
|
"loss": 0.2364, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 8.225616921269095, |
|
"grad_norm": 2.214505195617676, |
|
"learning_rate": 0.00017661574618096356, |
|
"loss": 0.2328, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 8.225616921269095, |
|
"eval_accuracy": 0.7689477709112705, |
|
"eval_f1_macro": 0.4571007537767639, |
|
"eval_loss": 0.22145947813987732, |
|
"eval_precision": 0.4969408215399489, |
|
"eval_recall": 0.44354312726305817, |
|
"eval_runtime": 570.5829, |
|
"eval_samples_per_second": 84.796, |
|
"eval_steps_per_second": 0.662, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 8.28437132784959, |
|
"grad_norm": 3.0027620792388916, |
|
"learning_rate": 0.00017573443008225614, |
|
"loss": 0.2328, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 8.343125734430082, |
|
"grad_norm": 1.9569076299667358, |
|
"learning_rate": 0.00017485311398354877, |
|
"loss": 0.2315, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 8.401880141010576, |
|
"grad_norm": 2.1613545417785645, |
|
"learning_rate": 0.00017397179788484135, |
|
"loss": 0.2314, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 8.46063454759107, |
|
"grad_norm": 4.501012802124023, |
|
"learning_rate": 0.00017309048178613395, |
|
"loss": 0.2307, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 8.519388954171562, |
|
"grad_norm": 4.018213272094727, |
|
"learning_rate": 0.00017220916568742655, |
|
"loss": 0.2337, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 8.578143360752057, |
|
"grad_norm": 3.4571876525878906, |
|
"learning_rate": 0.00017132784958871913, |
|
"loss": 0.2309, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 8.63689776733255, |
|
"grad_norm": 1.6010338068008423, |
|
"learning_rate": 0.00017044653349001173, |
|
"loss": 0.2304, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 8.695652173913043, |
|
"grad_norm": 5.177122592926025, |
|
"learning_rate": 0.00016956521739130433, |
|
"loss": 0.2315, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 8.754406580493537, |
|
"grad_norm": 1.3421051502227783, |
|
"learning_rate": 0.00016868390129259694, |
|
"loss": 0.2358, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 8.813160987074031, |
|
"grad_norm": 5.40761137008667, |
|
"learning_rate": 0.0001678025851938895, |
|
"loss": 0.2297, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 8.813160987074031, |
|
"eval_accuracy": 0.7784759109604613, |
|
"eval_f1_macro": 0.42357466614962985, |
|
"eval_loss": 0.22359371185302734, |
|
"eval_precision": 0.5285253073603687, |
|
"eval_recall": 0.3968963203390616, |
|
"eval_runtime": 558.7605, |
|
"eval_samples_per_second": 86.59, |
|
"eval_steps_per_second": 0.676, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 8.871915393654524, |
|
"grad_norm": 2.718010902404785, |
|
"learning_rate": 0.00016692126909518214, |
|
"loss": 0.2282, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 8.930669800235018, |
|
"grad_norm": 2.1908445358276367, |
|
"learning_rate": 0.00016603995299647472, |
|
"loss": 0.2342, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 8.989424206815512, |
|
"grad_norm": 1.3827928304672241, |
|
"learning_rate": 0.00016515863689776732, |
|
"loss": 0.2336, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 9.048178613396004, |
|
"grad_norm": 2.2856316566467285, |
|
"learning_rate": 0.0001642773207990599, |
|
"loss": 0.2316, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 9.106933019976498, |
|
"grad_norm": 9.475784301757812, |
|
"learning_rate": 0.00016339600470035253, |
|
"loss": 0.2338, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 9.165687426556993, |
|
"grad_norm": 5.561756610870361, |
|
"learning_rate": 0.0001625146886016451, |
|
"loss": 0.2345, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 9.224441833137485, |
|
"grad_norm": 3.0887973308563232, |
|
"learning_rate": 0.00016163337250293773, |
|
"loss": 0.2377, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 9.283196239717979, |
|
"grad_norm": 2.1840600967407227, |
|
"learning_rate": 0.0001607520564042303, |
|
"loss": 0.2279, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 9.341950646298473, |
|
"grad_norm": 1.5278443098068237, |
|
"learning_rate": 0.0001598707403055229, |
|
"loss": 0.2281, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 9.400705052878966, |
|
"grad_norm": 1.8652377128601074, |
|
"learning_rate": 0.00015898942420681549, |
|
"loss": 0.2261, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 9.400705052878966, |
|
"eval_accuracy": 0.778496579377054, |
|
"eval_f1_macro": 0.4118529460883977, |
|
"eval_loss": 0.2205253690481186, |
|
"eval_precision": 0.5265129533046812, |
|
"eval_recall": 0.38989546728234464, |
|
"eval_runtime": 564.1949, |
|
"eval_samples_per_second": 85.756, |
|
"eval_steps_per_second": 0.67, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 9.45945945945946, |
|
"grad_norm": 3.021077871322632, |
|
"learning_rate": 0.0001581081081081081, |
|
"loss": 0.2291, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 9.518213866039954, |
|
"grad_norm": 1.2995972633361816, |
|
"learning_rate": 0.0001572267920094007, |
|
"loss": 0.2279, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 9.576968272620446, |
|
"grad_norm": 4.3413801193237305, |
|
"learning_rate": 0.00015634547591069327, |
|
"loss": 0.2254, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 9.63572267920094, |
|
"grad_norm": 1.8537020683288574, |
|
"learning_rate": 0.0001554641598119859, |
|
"loss": 0.2284, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 9.694477085781434, |
|
"grad_norm": 2.3501524925231934, |
|
"learning_rate": 0.00015458284371327847, |
|
"loss": 0.2315, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 9.753231492361927, |
|
"grad_norm": 4.062187671661377, |
|
"learning_rate": 0.00015370152761457108, |
|
"loss": 0.2271, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 9.811985898942421, |
|
"grad_norm": 2.9765398502349854, |
|
"learning_rate": 0.00015282021151586368, |
|
"loss": 0.2341, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 9.870740305522915, |
|
"grad_norm": 3.3737270832061768, |
|
"learning_rate": 0.00015193889541715628, |
|
"loss": 0.2302, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 9.929494712103407, |
|
"grad_norm": 3.7637851238250732, |
|
"learning_rate": 0.00015105757931844886, |
|
"loss": 0.2306, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 9.988249118683902, |
|
"grad_norm": 4.947080135345459, |
|
"learning_rate": 0.0001501762632197415, |
|
"loss": 0.2226, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 9.988249118683902, |
|
"eval_accuracy": 0.7804600789533513, |
|
"eval_f1_macro": 0.45841854146967403, |
|
"eval_loss": 0.21761466562747955, |
|
"eval_precision": 0.4921930948461919, |
|
"eval_recall": 0.43836252511254314, |
|
"eval_runtime": 558.7853, |
|
"eval_samples_per_second": 86.586, |
|
"eval_steps_per_second": 0.676, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 10.047003525264396, |
|
"grad_norm": 4.100146293640137, |
|
"learning_rate": 0.00014929494712103406, |
|
"loss": 0.2301, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 10.105757931844888, |
|
"grad_norm": 2.0130274295806885, |
|
"learning_rate": 0.00014841363102232667, |
|
"loss": 0.2288, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 10.164512338425382, |
|
"grad_norm": 2.4523582458496094, |
|
"learning_rate": 0.00014753231492361924, |
|
"loss": 0.2257, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 10.223266745005876, |
|
"grad_norm": 2.4732425212860107, |
|
"learning_rate": 0.00014665099882491185, |
|
"loss": 0.2253, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 10.282021151586369, |
|
"grad_norm": 2.8159022331237793, |
|
"learning_rate": 0.00014576968272620445, |
|
"loss": 0.2297, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 10.340775558166863, |
|
"grad_norm": 2.784027338027954, |
|
"learning_rate": 0.00014488836662749705, |
|
"loss": 0.2282, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 10.399529964747355, |
|
"grad_norm": 2.8521196842193604, |
|
"learning_rate": 0.00014400705052878965, |
|
"loss": 0.2294, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 10.45828437132785, |
|
"grad_norm": 3.453033685684204, |
|
"learning_rate": 0.00014312573443008226, |
|
"loss": 0.2252, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 10.517038777908343, |
|
"grad_norm": 1.889672875404358, |
|
"learning_rate": 0.00014224441833137483, |
|
"loss": 0.2266, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 10.575793184488838, |
|
"grad_norm": 2.153575897216797, |
|
"learning_rate": 0.00014136310223266744, |
|
"loss": 0.2248, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 10.575793184488838, |
|
"eval_accuracy": 0.778909947708906, |
|
"eval_f1_macro": 0.44892131069383673, |
|
"eval_loss": 0.2148253470659256, |
|
"eval_precision": 0.4964295361361209, |
|
"eval_recall": 0.42785352086722733, |
|
"eval_runtime": 556.2941, |
|
"eval_samples_per_second": 86.974, |
|
"eval_steps_per_second": 0.679, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 10.63454759106933, |
|
"grad_norm": 2.2354812622070312, |
|
"learning_rate": 0.00014048178613396004, |
|
"loss": 0.2303, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 10.693301997649824, |
|
"grad_norm": 6.267688751220703, |
|
"learning_rate": 0.00013960047003525264, |
|
"loss": 0.2219, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 10.752056404230316, |
|
"grad_norm": 4.277271270751953, |
|
"learning_rate": 0.00013871915393654524, |
|
"loss": 0.2256, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 10.81081081081081, |
|
"grad_norm": 4.068231582641602, |
|
"learning_rate": 0.00013783783783783782, |
|
"loss": 0.2278, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 10.869565217391305, |
|
"grad_norm": 1.9653985500335693, |
|
"learning_rate": 0.00013695652173913042, |
|
"loss": 0.2326, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 10.928319623971799, |
|
"grad_norm": 1.7713191509246826, |
|
"learning_rate": 0.00013607520564042303, |
|
"loss": 0.2312, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 10.987074030552291, |
|
"grad_norm": 3.856257438659668, |
|
"learning_rate": 0.0001351938895417156, |
|
"loss": 0.2313, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 11.045828437132785, |
|
"grad_norm": 3.794623851776123, |
|
"learning_rate": 0.0001343125734430082, |
|
"loss": 0.2237, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 11.104582843713278, |
|
"grad_norm": 2.6339073181152344, |
|
"learning_rate": 0.0001334312573443008, |
|
"loss": 0.2237, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 11.163337250293772, |
|
"grad_norm": 2.57064151763916, |
|
"learning_rate": 0.0001325499412455934, |
|
"loss": 0.2238, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 11.163337250293772, |
|
"eval_accuracy": 0.783353657276316, |
|
"eval_f1_macro": 0.456551925690663, |
|
"eval_loss": 0.21650880575180054, |
|
"eval_precision": 0.5127229337198491, |
|
"eval_recall": 0.42967693938948215, |
|
"eval_runtime": 585.3328, |
|
"eval_samples_per_second": 82.659, |
|
"eval_steps_per_second": 0.646, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 11.222091656874266, |
|
"grad_norm": 2.160663366317749, |
|
"learning_rate": 0.000131668625146886, |
|
"loss": 0.2275, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 11.280846063454758, |
|
"grad_norm": 5.847405910491943, |
|
"learning_rate": 0.0001307873090481786, |
|
"loss": 0.2271, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 11.339600470035252, |
|
"grad_norm": 2.103134870529175, |
|
"learning_rate": 0.0001299059929494712, |
|
"loss": 0.2295, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 11.398354876615747, |
|
"grad_norm": 2.2549660205841064, |
|
"learning_rate": 0.0001290246768507638, |
|
"loss": 0.2229, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 11.457109283196239, |
|
"grad_norm": 3.1517040729522705, |
|
"learning_rate": 0.0001281433607520564, |
|
"loss": 0.2221, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 11.515863689776733, |
|
"grad_norm": 2.703953266143799, |
|
"learning_rate": 0.000127262044653349, |
|
"loss": 0.2216, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 11.574618096357227, |
|
"grad_norm": 1.5301584005355835, |
|
"learning_rate": 0.0001263807285546416, |
|
"loss": 0.2301, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 11.63337250293772, |
|
"grad_norm": 3.967664957046509, |
|
"learning_rate": 0.00012549941245593418, |
|
"loss": 0.2257, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 11.692126909518214, |
|
"grad_norm": 3.278876543045044, |
|
"learning_rate": 0.00012461809635722678, |
|
"loss": 0.2277, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 11.750881316098708, |
|
"grad_norm": 3.274348497390747, |
|
"learning_rate": 0.00012373678025851938, |
|
"loss": 0.2262, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 11.750881316098708, |
|
"eval_accuracy": 0.7706632494884567, |
|
"eval_f1_macro": 0.46808422191837634, |
|
"eval_loss": 0.21933460235595703, |
|
"eval_precision": 0.4863537664371959, |
|
"eval_recall": 0.4608368369449391, |
|
"eval_runtime": 542.8512, |
|
"eval_samples_per_second": 89.128, |
|
"eval_steps_per_second": 0.696, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 11.8096357226792, |
|
"grad_norm": 2.5668694972991943, |
|
"learning_rate": 0.00012285546415981196, |
|
"loss": 0.2212, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 11.868390129259694, |
|
"grad_norm": 2.187702178955078, |
|
"learning_rate": 0.00012197414806110456, |
|
"loss": 0.2254, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 11.927144535840188, |
|
"grad_norm": 2.247164487838745, |
|
"learning_rate": 0.00012109283196239717, |
|
"loss": 0.2268, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 11.98589894242068, |
|
"grad_norm": 4.919483184814453, |
|
"learning_rate": 0.00012021151586368976, |
|
"loss": 0.2251, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 12.044653349001175, |
|
"grad_norm": 2.580787181854248, |
|
"learning_rate": 0.00011933019976498236, |
|
"loss": 0.2255, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 12.103407755581669, |
|
"grad_norm": 3.7776031494140625, |
|
"learning_rate": 0.00011844888366627496, |
|
"loss": 0.2217, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 12.162162162162161, |
|
"grad_norm": 2.159958839416504, |
|
"learning_rate": 0.00011756756756756755, |
|
"loss": 0.2213, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 12.220916568742656, |
|
"grad_norm": 1.7245205640792847, |
|
"learning_rate": 0.00011668625146886015, |
|
"loss": 0.2217, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 12.27967097532315, |
|
"grad_norm": 1.598755955696106, |
|
"learning_rate": 0.00011580493537015276, |
|
"loss": 0.2227, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 12.338425381903642, |
|
"grad_norm": 1.9064700603485107, |
|
"learning_rate": 0.00011492361927144535, |
|
"loss": 0.2239, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 12.338425381903642, |
|
"eval_accuracy": 0.7744042328917181, |
|
"eval_f1_macro": 0.46072293060919217, |
|
"eval_loss": 0.21473053097724915, |
|
"eval_precision": 0.5166471813664621, |
|
"eval_recall": 0.44065307647654417, |
|
"eval_runtime": 586.4383, |
|
"eval_samples_per_second": 82.503, |
|
"eval_steps_per_second": 0.645, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 12.397179788484136, |
|
"grad_norm": 2.0441579818725586, |
|
"learning_rate": 0.00011404230317273795, |
|
"loss": 0.2208, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 12.45593419506463, |
|
"grad_norm": 4.460620880126953, |
|
"learning_rate": 0.00011316098707403055, |
|
"loss": 0.2233, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 12.514688601645123, |
|
"grad_norm": 2.7372050285339355, |
|
"learning_rate": 0.00011227967097532314, |
|
"loss": 0.2204, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 12.573443008225617, |
|
"grad_norm": 3.1166772842407227, |
|
"learning_rate": 0.00011139835487661574, |
|
"loss": 0.2283, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 12.632197414806111, |
|
"grad_norm": 3.481877565383911, |
|
"learning_rate": 0.00011051703877790835, |
|
"loss": 0.2206, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 12.690951821386603, |
|
"grad_norm": 2.6548030376434326, |
|
"learning_rate": 0.00010963572267920094, |
|
"loss": 0.2241, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 12.749706227967097, |
|
"grad_norm": 2.3535709381103516, |
|
"learning_rate": 0.00010875440658049353, |
|
"loss": 0.2213, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 12.808460634547592, |
|
"grad_norm": 2.523663282394409, |
|
"learning_rate": 0.00010787309048178611, |
|
"loss": 0.2222, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 12.867215041128084, |
|
"grad_norm": 1.9537861347198486, |
|
"learning_rate": 0.00010699177438307872, |
|
"loss": 0.2221, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 12.925969447708578, |
|
"grad_norm": 1.9098992347717285, |
|
"learning_rate": 0.00010611045828437131, |
|
"loss": 0.2196, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 12.925969447708578, |
|
"eval_accuracy": 0.782361573279871, |
|
"eval_f1_macro": 0.45321392135920985, |
|
"eval_loss": 0.21087060868740082, |
|
"eval_precision": 0.5160027197721393, |
|
"eval_recall": 0.4277450812600511, |
|
"eval_runtime": 561.4808, |
|
"eval_samples_per_second": 86.17, |
|
"eval_steps_per_second": 0.673, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 12.984723854289072, |
|
"grad_norm": 2.665001153945923, |
|
"learning_rate": 0.00010522914218566391, |
|
"loss": 0.2224, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 13.043478260869565, |
|
"grad_norm": 3.2731380462646484, |
|
"learning_rate": 0.00010434782608695651, |
|
"loss": 0.2161, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 13.102232667450059, |
|
"grad_norm": 1.7394378185272217, |
|
"learning_rate": 0.0001034665099882491, |
|
"loss": 0.2213, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 13.160987074030553, |
|
"grad_norm": 4.027496337890625, |
|
"learning_rate": 0.0001025851938895417, |
|
"loss": 0.2232, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 13.219741480611045, |
|
"grad_norm": 4.968031883239746, |
|
"learning_rate": 0.00010170387779083431, |
|
"loss": 0.2228, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 13.27849588719154, |
|
"grad_norm": 2.2942428588867188, |
|
"learning_rate": 0.0001008225616921269, |
|
"loss": 0.2243, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 13.337250293772033, |
|
"grad_norm": 1.5325312614440918, |
|
"learning_rate": 9.99412455934195e-05, |
|
"loss": 0.2191, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 13.396004700352526, |
|
"grad_norm": 4.171008586883545, |
|
"learning_rate": 9.90599294947121e-05, |
|
"loss": 0.225, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 13.45475910693302, |
|
"grad_norm": 2.144474506378174, |
|
"learning_rate": 9.817861339600469e-05, |
|
"loss": 0.2191, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 13.513513513513514, |
|
"grad_norm": 1.8419458866119385, |
|
"learning_rate": 9.72972972972973e-05, |
|
"loss": 0.2244, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 13.513513513513514, |
|
"eval_accuracy": 0.7836016782754274, |
|
"eval_f1_macro": 0.46679865317019514, |
|
"eval_loss": 0.21025818586349487, |
|
"eval_precision": 0.5055179262576418, |
|
"eval_recall": 0.4468885382196594, |
|
"eval_runtime": 567.4084, |
|
"eval_samples_per_second": 85.27, |
|
"eval_steps_per_second": 0.666, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 13.572267920094006, |
|
"grad_norm": 2.069737672805786, |
|
"learning_rate": 9.64159811985899e-05, |
|
"loss": 0.2171, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 13.6310223266745, |
|
"grad_norm": 2.8181750774383545, |
|
"learning_rate": 9.553466509988249e-05, |
|
"loss": 0.2199, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 13.689776733254995, |
|
"grad_norm": 1.9151453971862793, |
|
"learning_rate": 9.465334900117508e-05, |
|
"loss": 0.2215, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 13.748531139835487, |
|
"grad_norm": 2.501735210418701, |
|
"learning_rate": 9.377203290246767e-05, |
|
"loss": 0.2224, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 13.807285546415981, |
|
"grad_norm": 4.269018173217773, |
|
"learning_rate": 9.289071680376027e-05, |
|
"loss": 0.2226, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 13.866039952996475, |
|
"grad_norm": 3.0230236053466797, |
|
"learning_rate": 9.200940070505287e-05, |
|
"loss": 0.2254, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 13.924794359576968, |
|
"grad_norm": 1.6129169464111328, |
|
"learning_rate": 9.112808460634546e-05, |
|
"loss": 0.223, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 13.983548766157462, |
|
"grad_norm": 3.050380229949951, |
|
"learning_rate": 9.024676850763806e-05, |
|
"loss": 0.2157, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 14.042303172737956, |
|
"grad_norm": 1.8896129131317139, |
|
"learning_rate": 8.936545240893067e-05, |
|
"loss": 0.2203, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 14.101057579318448, |
|
"grad_norm": 2.357605218887329, |
|
"learning_rate": 8.848413631022326e-05, |
|
"loss": 0.2181, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 14.101057579318448, |
|
"eval_accuracy": 0.7799227001219436, |
|
"eval_f1_macro": 0.4646864264769805, |
|
"eval_loss": 0.2089788019657135, |
|
"eval_precision": 0.5189278565795705, |
|
"eval_recall": 0.4427888401365953, |
|
"eval_runtime": 561.1944, |
|
"eval_samples_per_second": 86.214, |
|
"eval_steps_per_second": 0.674, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 14.159811985898942, |
|
"grad_norm": 2.8449645042419434, |
|
"learning_rate": 8.760282021151586e-05, |
|
"loss": 0.2158, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 14.218566392479437, |
|
"grad_norm": 3.220463752746582, |
|
"learning_rate": 8.672150411280845e-05, |
|
"loss": 0.2234, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 14.277320799059929, |
|
"grad_norm": 2.0377910137176514, |
|
"learning_rate": 8.584018801410105e-05, |
|
"loss": 0.222, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 14.336075205640423, |
|
"grad_norm": 2.213088274002075, |
|
"learning_rate": 8.495887191539365e-05, |
|
"loss": 0.217, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 14.394829612220917, |
|
"grad_norm": 3.5318024158477783, |
|
"learning_rate": 8.407755581668624e-05, |
|
"loss": 0.218, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 14.45358401880141, |
|
"grad_norm": 2.010096549987793, |
|
"learning_rate": 8.319623971797885e-05, |
|
"loss": 0.2189, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 14.512338425381904, |
|
"grad_norm": 1.9498238563537598, |
|
"learning_rate": 8.231492361927145e-05, |
|
"loss": 0.2238, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 14.571092831962398, |
|
"grad_norm": 2.8972408771514893, |
|
"learning_rate": 8.143360752056404e-05, |
|
"loss": 0.2133, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 14.62984723854289, |
|
"grad_norm": 1.39292311668396, |
|
"learning_rate": 8.055229142185663e-05, |
|
"loss": 0.2229, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 14.688601645123384, |
|
"grad_norm": 3.813009738922119, |
|
"learning_rate": 7.967097532314922e-05, |
|
"loss": 0.2165, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 14.688601645123384, |
|
"eval_accuracy": 0.7839530413575017, |
|
"eval_f1_macro": 0.4677806630551704, |
|
"eval_loss": 0.21110670268535614, |
|
"eval_precision": 0.5050811199516158, |
|
"eval_recall": 0.44609252100680624, |
|
"eval_runtime": 551.7967, |
|
"eval_samples_per_second": 87.683, |
|
"eval_steps_per_second": 0.685, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 14.747356051703878, |
|
"grad_norm": 2.7742602825164795, |
|
"learning_rate": 7.878965922444182e-05, |
|
"loss": 0.2139, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 14.80611045828437, |
|
"grad_norm": 2.4670934677124023, |
|
"learning_rate": 7.790834312573442e-05, |
|
"loss": 0.2183, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 14.864864864864865, |
|
"grad_norm": 3.2507874965667725, |
|
"learning_rate": 7.702702702702701e-05, |
|
"loss": 0.2229, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 14.923619271445359, |
|
"grad_norm": 1.7584885358810425, |
|
"learning_rate": 7.614571092831962e-05, |
|
"loss": 0.2171, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 14.982373678025851, |
|
"grad_norm": 2.5273969173431396, |
|
"learning_rate": 7.526439482961222e-05, |
|
"loss": 0.2184, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 15.041128084606346, |
|
"grad_norm": 2.622952699661255, |
|
"learning_rate": 7.438307873090481e-05, |
|
"loss": 0.2162, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 15.09988249118684, |
|
"grad_norm": 2.1974570751190186, |
|
"learning_rate": 7.350176263219741e-05, |
|
"loss": 0.2104, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 15.158636897767332, |
|
"grad_norm": 2.2584497928619385, |
|
"learning_rate": 7.262044653349001e-05, |
|
"loss": 0.2152, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 15.217391304347826, |
|
"grad_norm": 3.1817431449890137, |
|
"learning_rate": 7.17391304347826e-05, |
|
"loss": 0.2144, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 15.27614571092832, |
|
"grad_norm": 3.306057929992676, |
|
"learning_rate": 7.08578143360752e-05, |
|
"loss": 0.2197, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 15.27614571092832, |
|
"eval_accuracy": 0.7750036169729037, |
|
"eval_f1_macro": 0.4676251296697811, |
|
"eval_loss": 0.21305988729000092, |
|
"eval_precision": 0.49501914130610697, |
|
"eval_recall": 0.45513146662613874, |
|
"eval_runtime": 565.178, |
|
"eval_samples_per_second": 85.607, |
|
"eval_steps_per_second": 0.669, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 15.334900117508813, |
|
"grad_norm": 2.0208778381347656, |
|
"learning_rate": 6.99764982373678e-05, |
|
"loss": 0.2214, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 15.393654524089307, |
|
"grad_norm": 3.2097744941711426, |
|
"learning_rate": 6.909518213866038e-05, |
|
"loss": 0.2195, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 15.452408930669801, |
|
"grad_norm": 2.718372344970703, |
|
"learning_rate": 6.821386603995299e-05, |
|
"loss": 0.2233, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 15.511163337250293, |
|
"grad_norm": 3.371232032775879, |
|
"learning_rate": 6.733254994124559e-05, |
|
"loss": 0.2148, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 15.569917743830787, |
|
"grad_norm": 1.748062014579773, |
|
"learning_rate": 6.645123384253818e-05, |
|
"loss": 0.2151, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 15.628672150411282, |
|
"grad_norm": 2.6323885917663574, |
|
"learning_rate": 6.556991774383078e-05, |
|
"loss": 0.2193, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 15.687426556991774, |
|
"grad_norm": 3.380427598953247, |
|
"learning_rate": 6.468860164512338e-05, |
|
"loss": 0.2151, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 15.746180963572268, |
|
"grad_norm": 2.617914915084839, |
|
"learning_rate": 6.380728554641597e-05, |
|
"loss": 0.2175, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 15.804935370152762, |
|
"grad_norm": 2.5959670543670654, |
|
"learning_rate": 6.292596944770856e-05, |
|
"loss": 0.2158, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 15.863689776733255, |
|
"grad_norm": 1.8247867822647095, |
|
"learning_rate": 6.204465334900117e-05, |
|
"loss": 0.2173, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 15.863689776733255, |
|
"eval_accuracy": 0.7821548891139449, |
|
"eval_f1_macro": 0.46965749568257437, |
|
"eval_loss": 0.2087218463420868, |
|
"eval_precision": 0.5036667780561973, |
|
"eval_recall": 0.45166012346117074, |
|
"eval_runtime": 551.32, |
|
"eval_samples_per_second": 87.758, |
|
"eval_steps_per_second": 0.686, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 15.922444183313749, |
|
"grad_norm": 2.068295478820801, |
|
"learning_rate": 6.116333725029377e-05, |
|
"loss": 0.2161, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 15.981198589894243, |
|
"grad_norm": 2.173018455505371, |
|
"learning_rate": 6.0282021151586365e-05, |
|
"loss": 0.2192, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 16.039952996474735, |
|
"grad_norm": 3.564419746398926, |
|
"learning_rate": 5.940070505287896e-05, |
|
"loss": 0.213, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 16.09870740305523, |
|
"grad_norm": 2.131643772125244, |
|
"learning_rate": 5.851938895417156e-05, |
|
"loss": 0.219, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 16.157461809635723, |
|
"grad_norm": 1.6084250211715698, |
|
"learning_rate": 5.7638072855464154e-05, |
|
"loss": 0.2168, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 16.216216216216218, |
|
"grad_norm": 1.8609333038330078, |
|
"learning_rate": 5.6756756756756757e-05, |
|
"loss": 0.2203, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 16.274970622796708, |
|
"grad_norm": 1.929494857788086, |
|
"learning_rate": 5.5875440658049346e-05, |
|
"loss": 0.2126, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 16.333725029377202, |
|
"grad_norm": 1.7891273498535156, |
|
"learning_rate": 5.499412455934194e-05, |
|
"loss": 0.2125, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 16.392479435957696, |
|
"grad_norm": 1.935006022453308, |
|
"learning_rate": 5.411280846063454e-05, |
|
"loss": 0.2136, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 16.45123384253819, |
|
"grad_norm": 2.7039895057678223, |
|
"learning_rate": 5.323149236192714e-05, |
|
"loss": 0.2204, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 16.45123384253819, |
|
"eval_accuracy": 0.781844862865056, |
|
"eval_f1_macro": 0.47010557287584404, |
|
"eval_loss": 0.20962630212306976, |
|
"eval_precision": 0.5006758936230861, |
|
"eval_recall": 0.45393923309607365, |
|
"eval_runtime": 539.5236, |
|
"eval_samples_per_second": 89.677, |
|
"eval_steps_per_second": 0.701, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 16.509988249118685, |
|
"grad_norm": 2.465174913406372, |
|
"learning_rate": 5.235017626321974e-05, |
|
"loss": 0.215, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 16.56874265569918, |
|
"grad_norm": 3.626897096633911, |
|
"learning_rate": 5.146886016451233e-05, |
|
"loss": 0.2159, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 16.62749706227967, |
|
"grad_norm": 1.9083856344223022, |
|
"learning_rate": 5.0587544065804936e-05, |
|
"loss": 0.2146, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 16.686251468860164, |
|
"grad_norm": 1.8644742965698242, |
|
"learning_rate": 4.970622796709753e-05, |
|
"loss": 0.2155, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 16.745005875440658, |
|
"grad_norm": 2.8223023414611816, |
|
"learning_rate": 4.882491186839013e-05, |
|
"loss": 0.2127, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 16.803760282021152, |
|
"grad_norm": 2.9986822605133057, |
|
"learning_rate": 4.794359576968272e-05, |
|
"loss": 0.2168, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 16.862514688601646, |
|
"grad_norm": 2.5670571327209473, |
|
"learning_rate": 4.7062279670975314e-05, |
|
"loss": 0.2105, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 16.92126909518214, |
|
"grad_norm": 4.0372467041015625, |
|
"learning_rate": 4.6180963572267917e-05, |
|
"loss": 0.2116, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 16.98002350176263, |
|
"grad_norm": 2.199449300765991, |
|
"learning_rate": 4.529964747356051e-05, |
|
"loss": 0.2188, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 17.038777908343125, |
|
"grad_norm": 2.2641525268554688, |
|
"learning_rate": 4.441833137485311e-05, |
|
"loss": 0.2157, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 17.038777908343125, |
|
"eval_accuracy": 0.7866192670979476, |
|
"eval_f1_macro": 0.46819371251173775, |
|
"eval_loss": 0.2086167186498642, |
|
"eval_precision": 0.5101989602452385, |
|
"eval_recall": 0.44473033751495855, |
|
"eval_runtime": 537.4744, |
|
"eval_samples_per_second": 90.019, |
|
"eval_steps_per_second": 0.703, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 17.09753231492362, |
|
"grad_norm": 1.7599774599075317, |
|
"learning_rate": 4.353701527614571e-05, |
|
"loss": 0.2147, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 17.156286721504113, |
|
"grad_norm": 3.7391819953918457, |
|
"learning_rate": 4.265569917743831e-05, |
|
"loss": 0.2131, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 17.215041128084607, |
|
"grad_norm": 1.628392219543457, |
|
"learning_rate": 4.1774383078730904e-05, |
|
"loss": 0.211, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 17.2737955346651, |
|
"grad_norm": 2.0813705921173096, |
|
"learning_rate": 4.089306698002349e-05, |
|
"loss": 0.213, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 17.332549941245592, |
|
"grad_norm": 1.5833709239959717, |
|
"learning_rate": 4.0011750881316096e-05, |
|
"loss": 0.2122, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 17.391304347826086, |
|
"grad_norm": 2.216641664505005, |
|
"learning_rate": 3.913043478260869e-05, |
|
"loss": 0.2123, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 17.45005875440658, |
|
"grad_norm": 1.9753063917160034, |
|
"learning_rate": 3.824911868390129e-05, |
|
"loss": 0.2121, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 17.508813160987074, |
|
"grad_norm": 2.2607269287109375, |
|
"learning_rate": 3.7367802585193884e-05, |
|
"loss": 0.2179, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 17.56756756756757, |
|
"grad_norm": 4.074460506439209, |
|
"learning_rate": 3.648648648648649e-05, |
|
"loss": 0.2104, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 17.626321974148063, |
|
"grad_norm": 1.758702039718628, |
|
"learning_rate": 3.560517038777908e-05, |
|
"loss": 0.2135, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 17.626321974148063, |
|
"eval_accuracy": 0.781369489283426, |
|
"eval_f1_macro": 0.4699363142405637, |
|
"eval_loss": 0.20687498152256012, |
|
"eval_precision": 0.5076781202687786, |
|
"eval_recall": 0.4518498245169957, |
|
"eval_runtime": 584.9222, |
|
"eval_samples_per_second": 82.717, |
|
"eval_steps_per_second": 0.646, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 17.685076380728553, |
|
"grad_norm": 2.3470921516418457, |
|
"learning_rate": 3.472385428907168e-05, |
|
"loss": 0.2152, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 17.743830787309047, |
|
"grad_norm": 2.4279568195343018, |
|
"learning_rate": 3.3842538190364276e-05, |
|
"loss": 0.2144, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 17.80258519388954, |
|
"grad_norm": 1.7226651906967163, |
|
"learning_rate": 3.296122209165687e-05, |
|
"loss": 0.214, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 17.861339600470036, |
|
"grad_norm": 3.090634346008301, |
|
"learning_rate": 3.207990599294947e-05, |
|
"loss": 0.2175, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 17.92009400705053, |
|
"grad_norm": 1.787026286125183, |
|
"learning_rate": 3.1198589894242064e-05, |
|
"loss": 0.2112, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 17.978848413631024, |
|
"grad_norm": 1.844420313835144, |
|
"learning_rate": 3.0317273795534663e-05, |
|
"loss": 0.2132, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 18.037602820211514, |
|
"grad_norm": 1.7395440340042114, |
|
"learning_rate": 2.9435957696827263e-05, |
|
"loss": 0.2091, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 18.09635722679201, |
|
"grad_norm": 1.9990227222442627, |
|
"learning_rate": 2.8554641598119856e-05, |
|
"loss": 0.2123, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 18.155111633372503, |
|
"grad_norm": 2.4807918071746826, |
|
"learning_rate": 2.7673325499412452e-05, |
|
"loss": 0.2082, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 18.213866039952997, |
|
"grad_norm": 2.496959924697876, |
|
"learning_rate": 2.679200940070505e-05, |
|
"loss": 0.2088, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 18.213866039952997, |
|
"eval_accuracy": 0.783353657276316, |
|
"eval_f1_macro": 0.4700197150315801, |
|
"eval_loss": 0.20601825416088104, |
|
"eval_precision": 0.5112111682738877, |
|
"eval_recall": 0.45050131045693015, |
|
"eval_runtime": 542.6455, |
|
"eval_samples_per_second": 89.161, |
|
"eval_steps_per_second": 0.697, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 18.27262044653349, |
|
"grad_norm": 2.345229387283325, |
|
"learning_rate": 2.591069330199765e-05, |
|
"loss": 0.2152, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 18.331374853113985, |
|
"grad_norm": 2.492675542831421, |
|
"learning_rate": 2.5029377203290243e-05, |
|
"loss": 0.2115, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 18.390129259694476, |
|
"grad_norm": 2.251948833465576, |
|
"learning_rate": 2.4148061104582843e-05, |
|
"loss": 0.2111, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 18.44888366627497, |
|
"grad_norm": 2.327437400817871, |
|
"learning_rate": 2.326674500587544e-05, |
|
"loss": 0.2085, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 18.507638072855464, |
|
"grad_norm": 2.292947292327881, |
|
"learning_rate": 2.238542890716804e-05, |
|
"loss": 0.2141, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 18.566392479435958, |
|
"grad_norm": 2.319504499435425, |
|
"learning_rate": 2.150411280846063e-05, |
|
"loss": 0.2133, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 18.625146886016452, |
|
"grad_norm": 2.6240415573120117, |
|
"learning_rate": 2.062279670975323e-05, |
|
"loss": 0.2161, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 18.683901292596946, |
|
"grad_norm": 1.637320637702942, |
|
"learning_rate": 1.9741480611045827e-05, |
|
"loss": 0.2084, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 18.742655699177437, |
|
"grad_norm": 1.8866758346557617, |
|
"learning_rate": 1.8860164512338426e-05, |
|
"loss": 0.2099, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 18.80141010575793, |
|
"grad_norm": 1.5909967422485352, |
|
"learning_rate": 1.7978848413631022e-05, |
|
"loss": 0.215, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 18.80141010575793, |
|
"eval_accuracy": 0.7825475890292045, |
|
"eval_f1_macro": 0.4729882480682901, |
|
"eval_loss": 0.2060166597366333, |
|
"eval_precision": 0.5092421494744399, |
|
"eval_recall": 0.45578106777502264, |
|
"eval_runtime": 530.5298, |
|
"eval_samples_per_second": 91.198, |
|
"eval_steps_per_second": 0.712, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 18.860164512338425, |
|
"grad_norm": 2.3732552528381348, |
|
"learning_rate": 1.709753231492362e-05, |
|
"loss": 0.2126, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 18.91891891891892, |
|
"grad_norm": 2.5773391723632812, |
|
"learning_rate": 1.6216216216216215e-05, |
|
"loss": 0.2095, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 18.977673325499413, |
|
"grad_norm": 2.3607606887817383, |
|
"learning_rate": 1.533490011750881e-05, |
|
"loss": 0.2099, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 19.036427732079908, |
|
"grad_norm": 3.814934730529785, |
|
"learning_rate": 1.445358401880141e-05, |
|
"loss": 0.21, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 19.095182138660398, |
|
"grad_norm": 1.9512494802474976, |
|
"learning_rate": 1.3572267920094006e-05, |
|
"loss": 0.2098, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 19.153936545240892, |
|
"grad_norm": 2.072913408279419, |
|
"learning_rate": 1.2690951821386604e-05, |
|
"loss": 0.2156, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 19.212690951821386, |
|
"grad_norm": 3.2823166847229004, |
|
"learning_rate": 1.18096357226792e-05, |
|
"loss": 0.2058, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 19.27144535840188, |
|
"grad_norm": 2.4737155437469482, |
|
"learning_rate": 1.0928319623971798e-05, |
|
"loss": 0.2084, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 19.330199764982375, |
|
"grad_norm": 2.195495843887329, |
|
"learning_rate": 1.0047003525264394e-05, |
|
"loss": 0.2099, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 19.38895417156287, |
|
"grad_norm": 2.7346057891845703, |
|
"learning_rate": 9.16568742655699e-06, |
|
"loss": 0.2138, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 19.38895417156287, |
|
"eval_accuracy": 0.7822788996135006, |
|
"eval_f1_macro": 0.47654790915623974, |
|
"eval_loss": 0.2069149762392044, |
|
"eval_precision": 0.5055123102220627, |
|
"eval_recall": 0.4617181765781322, |
|
"eval_runtime": 563.9047, |
|
"eval_samples_per_second": 85.8, |
|
"eval_steps_per_second": 0.67, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 19.44770857814336, |
|
"grad_norm": 1.7967709302902222, |
|
"learning_rate": 8.284371327849588e-06, |
|
"loss": 0.2113, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 19.506462984723854, |
|
"grad_norm": 2.043199300765991, |
|
"learning_rate": 7.403055229142185e-06, |
|
"loss": 0.2134, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 19.565217391304348, |
|
"grad_norm": 2.2731423377990723, |
|
"learning_rate": 6.521739130434782e-06, |
|
"loss": 0.2053, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 19.623971797884842, |
|
"grad_norm": 1.7844184637069702, |
|
"learning_rate": 5.640423031727379e-06, |
|
"loss": 0.2102, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 19.682726204465336, |
|
"grad_norm": 2.1626009941101074, |
|
"learning_rate": 4.759106933019976e-06, |
|
"loss": 0.2165, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 19.74148061104583, |
|
"grad_norm": 2.5223422050476074, |
|
"learning_rate": 3.877790834312573e-06, |
|
"loss": 0.2061, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 19.80023501762632, |
|
"grad_norm": 2.5433812141418457, |
|
"learning_rate": 2.99647473560517e-06, |
|
"loss": 0.2083, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 19.858989424206815, |
|
"grad_norm": 2.087890148162842, |
|
"learning_rate": 2.1151586368977672e-06, |
|
"loss": 0.2117, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 19.91774383078731, |
|
"grad_norm": 2.9558768272399902, |
|
"learning_rate": 1.2338425381903642e-06, |
|
"loss": 0.2139, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 19.976498237367803, |
|
"grad_norm": 2.22611927986145, |
|
"learning_rate": 3.525264394829612e-07, |
|
"loss": 0.2125, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 19.976498237367803, |
|
"eval_accuracy": 0.7837876940247608, |
|
"eval_f1_macro": 0.4752409882222652, |
|
"eval_loss": 0.20548085868358612, |
|
"eval_precision": 0.506773667085311, |
|
"eval_recall": 0.45863653128297405, |
|
"eval_runtime": 556.0383, |
|
"eval_samples_per_second": 87.014, |
|
"eval_steps_per_second": 0.68, |
|
"step": 34000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 34040, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.2885422443855616e+18, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|