|
{ |
|
"best_metric": 0.728, |
|
"best_model_checkpoint": "VT_15/checkpoint-7279", |
|
"epoch": 29.0, |
|
"eval_steps": 500, |
|
"global_step": 7279, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.398406374501992, |
|
"grad_norm": 11.727704048156738, |
|
"learning_rate": 9.867197875166003e-05, |
|
"loss": 1.0157, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.796812749003984, |
|
"grad_norm": 11.50269603729248, |
|
"learning_rate": 9.734395750332006e-05, |
|
"loss": 0.8938, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.548, |
|
"eval_f1": 0.4882155949880584, |
|
"eval_loss": 0.9423586130142212, |
|
"eval_precision": 0.5733499456903712, |
|
"eval_recall": 0.5448468191272983, |
|
"eval_runtime": 16.5911, |
|
"eval_samples_per_second": 30.137, |
|
"eval_steps_per_second": 3.797, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.1952191235059761, |
|
"grad_norm": 10.47740650177002, |
|
"learning_rate": 9.601593625498009e-05, |
|
"loss": 0.8485, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.593625498007968, |
|
"grad_norm": 12.222431182861328, |
|
"learning_rate": 9.468791500664011e-05, |
|
"loss": 0.7978, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.9920318725099602, |
|
"grad_norm": 13.02956771850586, |
|
"learning_rate": 9.335989375830013e-05, |
|
"loss": 0.8093, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.61, |
|
"eval_f1": 0.584707628587346, |
|
"eval_loss": 0.8303987979888916, |
|
"eval_precision": 0.6022599150943656, |
|
"eval_recall": 0.6097290164964017, |
|
"eval_runtime": 15.8756, |
|
"eval_samples_per_second": 31.495, |
|
"eval_steps_per_second": 3.968, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 2.3904382470119523, |
|
"grad_norm": 11.920299530029297, |
|
"learning_rate": 9.203187250996016e-05, |
|
"loss": 0.7275, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.7888446215139444, |
|
"grad_norm": 12.057920455932617, |
|
"learning_rate": 9.070385126162018e-05, |
|
"loss": 0.7444, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.578, |
|
"eval_f1": 0.5180026990553307, |
|
"eval_loss": 0.8966869115829468, |
|
"eval_precision": 0.6246163183733936, |
|
"eval_recall": 0.5751176873923239, |
|
"eval_runtime": 16.5617, |
|
"eval_samples_per_second": 30.19, |
|
"eval_steps_per_second": 3.804, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 3.187250996015936, |
|
"grad_norm": 12.730193138122559, |
|
"learning_rate": 8.937583001328021e-05, |
|
"loss": 0.7091, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.585657370517928, |
|
"grad_norm": 11.721458435058594, |
|
"learning_rate": 8.804780876494024e-05, |
|
"loss": 0.6502, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.9840637450199203, |
|
"grad_norm": 11.902241706848145, |
|
"learning_rate": 8.671978751660027e-05, |
|
"loss": 0.6391, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.624, |
|
"eval_f1": 0.6176260916415671, |
|
"eval_loss": 0.8131240010261536, |
|
"eval_precision": 0.6213088498802785, |
|
"eval_recall": 0.6225291409540601, |
|
"eval_runtime": 16.9186, |
|
"eval_samples_per_second": 29.553, |
|
"eval_steps_per_second": 3.724, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 4.382470119521912, |
|
"grad_norm": 11.794739723205566, |
|
"learning_rate": 8.539176626826029e-05, |
|
"loss": 0.5683, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.780876494023905, |
|
"grad_norm": 11.683808326721191, |
|
"learning_rate": 8.406374501992032e-05, |
|
"loss": 0.5691, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.622, |
|
"eval_f1": 0.5716700610476999, |
|
"eval_loss": 0.8928351998329163, |
|
"eval_precision": 0.6410496659546192, |
|
"eval_recall": 0.6199959090306707, |
|
"eval_runtime": 16.6354, |
|
"eval_samples_per_second": 30.056, |
|
"eval_steps_per_second": 3.787, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 5.179282868525896, |
|
"grad_norm": 13.705164909362793, |
|
"learning_rate": 8.273572377158035e-05, |
|
"loss": 0.5855, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.577689243027889, |
|
"grad_norm": 11.817400932312012, |
|
"learning_rate": 8.140770252324038e-05, |
|
"loss": 0.513, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.9760956175298805, |
|
"grad_norm": 11.755231857299805, |
|
"learning_rate": 8.00796812749004e-05, |
|
"loss": 0.5009, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.656, |
|
"eval_f1": 0.6379970132206839, |
|
"eval_loss": 0.8215978741645813, |
|
"eval_precision": 0.6485419139105967, |
|
"eval_recall": 0.6545787165880164, |
|
"eval_runtime": 17.7464, |
|
"eval_samples_per_second": 28.175, |
|
"eval_steps_per_second": 3.55, |
|
"step": 1506 |
|
}, |
|
{ |
|
"epoch": 6.374501992031872, |
|
"grad_norm": 10.913691520690918, |
|
"learning_rate": 7.875166002656043e-05, |
|
"loss": 0.4488, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.772908366533865, |
|
"grad_norm": 10.615025520324707, |
|
"learning_rate": 7.742363877822046e-05, |
|
"loss": 0.4855, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.654, |
|
"eval_f1": 0.6439337408377656, |
|
"eval_loss": 0.8311923742294312, |
|
"eval_precision": 0.6495670995670996, |
|
"eval_recall": 0.6520354012895196, |
|
"eval_runtime": 17.519, |
|
"eval_samples_per_second": 28.54, |
|
"eval_steps_per_second": 3.596, |
|
"step": 1757 |
|
}, |
|
{ |
|
"epoch": 7.171314741035856, |
|
"grad_norm": 12.026023864746094, |
|
"learning_rate": 7.609561752988048e-05, |
|
"loss": 0.4177, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.569721115537849, |
|
"grad_norm": 10.010376930236816, |
|
"learning_rate": 7.476759628154051e-05, |
|
"loss": 0.409, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7.968127490039841, |
|
"grad_norm": 12.596341133117676, |
|
"learning_rate": 7.343957503320054e-05, |
|
"loss": 0.39, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.682, |
|
"eval_f1": 0.6538606492353024, |
|
"eval_loss": 0.9214051365852356, |
|
"eval_precision": 0.7101278814728985, |
|
"eval_recall": 0.6804800262743945, |
|
"eval_runtime": 17.2651, |
|
"eval_samples_per_second": 28.96, |
|
"eval_steps_per_second": 3.649, |
|
"step": 2008 |
|
}, |
|
{ |
|
"epoch": 8.366533864541832, |
|
"grad_norm": 14.169309616088867, |
|
"learning_rate": 7.211155378486057e-05, |
|
"loss": 0.3646, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.764940239043824, |
|
"grad_norm": 12.020890235900879, |
|
"learning_rate": 7.07835325365206e-05, |
|
"loss": 0.3708, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.598, |
|
"eval_f1": 0.5725541685903895, |
|
"eval_loss": 1.0422428846359253, |
|
"eval_precision": 0.6090800979488745, |
|
"eval_recall": 0.5948744763847355, |
|
"eval_runtime": 17.2984, |
|
"eval_samples_per_second": 28.904, |
|
"eval_steps_per_second": 3.642, |
|
"step": 2259 |
|
}, |
|
{ |
|
"epoch": 9.163346613545817, |
|
"grad_norm": 10.583425521850586, |
|
"learning_rate": 6.945551128818062e-05, |
|
"loss": 0.3533, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.56175298804781, |
|
"grad_norm": 13.7178316116333, |
|
"learning_rate": 6.812749003984064e-05, |
|
"loss": 0.2984, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.9601593625498, |
|
"grad_norm": 10.42063045501709, |
|
"learning_rate": 6.679946879150066e-05, |
|
"loss": 0.3328, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.718, |
|
"eval_f1": 0.715429962270956, |
|
"eval_loss": 0.7483692765235901, |
|
"eval_precision": 0.7195966559320596, |
|
"eval_recall": 0.7178075285359515, |
|
"eval_runtime": 17.492, |
|
"eval_samples_per_second": 28.584, |
|
"eval_steps_per_second": 3.602, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 10.358565737051793, |
|
"grad_norm": 10.752534866333008, |
|
"learning_rate": 6.547144754316069e-05, |
|
"loss": 0.2561, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10.756972111553784, |
|
"grad_norm": 10.988365173339844, |
|
"learning_rate": 6.414342629482072e-05, |
|
"loss": 0.3092, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.72, |
|
"eval_f1": 0.7160176967190494, |
|
"eval_loss": 0.8250208497047424, |
|
"eval_precision": 0.7184132303947973, |
|
"eval_recall": 0.7186398391269527, |
|
"eval_runtime": 17.2463, |
|
"eval_samples_per_second": 28.992, |
|
"eval_steps_per_second": 3.653, |
|
"step": 2761 |
|
}, |
|
{ |
|
"epoch": 11.155378486055778, |
|
"grad_norm": 10.967025756835938, |
|
"learning_rate": 6.281540504648075e-05, |
|
"loss": 0.28, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.55378486055777, |
|
"grad_norm": 11.933313369750977, |
|
"learning_rate": 6.148738379814077e-05, |
|
"loss": 0.2747, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 11.952191235059761, |
|
"grad_norm": 12.90857219696045, |
|
"learning_rate": 6.01593625498008e-05, |
|
"loss": 0.281, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.694, |
|
"eval_f1": 0.6781251589992235, |
|
"eval_loss": 0.9806899428367615, |
|
"eval_precision": 0.6976797604396068, |
|
"eval_recall": 0.6921398650556313, |
|
"eval_runtime": 17.2981, |
|
"eval_samples_per_second": 28.905, |
|
"eval_steps_per_second": 3.642, |
|
"step": 3012 |
|
}, |
|
{ |
|
"epoch": 12.350597609561753, |
|
"grad_norm": 11.363082885742188, |
|
"learning_rate": 5.883134130146083e-05, |
|
"loss": 0.2596, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 12.749003984063744, |
|
"grad_norm": 12.736093521118164, |
|
"learning_rate": 5.7503320053120855e-05, |
|
"loss": 0.2162, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.708, |
|
"eval_f1": 0.7021505447248022, |
|
"eval_loss": 0.9850034117698669, |
|
"eval_precision": 0.7101112865680695, |
|
"eval_recall": 0.7063781669000248, |
|
"eval_runtime": 17.6848, |
|
"eval_samples_per_second": 28.273, |
|
"eval_steps_per_second": 3.562, |
|
"step": 3263 |
|
}, |
|
{ |
|
"epoch": 13.147410358565738, |
|
"grad_norm": 11.957535743713379, |
|
"learning_rate": 5.6175298804780876e-05, |
|
"loss": 0.2271, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 13.54581673306773, |
|
"grad_norm": 13.91019058227539, |
|
"learning_rate": 5.48472775564409e-05, |
|
"loss": 0.2284, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 13.944223107569721, |
|
"grad_norm": 10.606439590454102, |
|
"learning_rate": 5.351925630810093e-05, |
|
"loss": 0.2352, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.698, |
|
"eval_f1": 0.6875106838790609, |
|
"eval_loss": 0.9122900366783142, |
|
"eval_precision": 0.6937544840437923, |
|
"eval_recall": 0.6964621758194903, |
|
"eval_runtime": 17.3918, |
|
"eval_samples_per_second": 28.749, |
|
"eval_steps_per_second": 3.622, |
|
"step": 3514 |
|
}, |
|
{ |
|
"epoch": 14.342629482071713, |
|
"grad_norm": 10.729408264160156, |
|
"learning_rate": 5.219123505976096e-05, |
|
"loss": 0.2339, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 14.741035856573705, |
|
"grad_norm": 10.717667579650879, |
|
"learning_rate": 5.0863213811420985e-05, |
|
"loss": 0.1947, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.694, |
|
"eval_f1": 0.6847697638967624, |
|
"eval_loss": 1.0269464254379272, |
|
"eval_precision": 0.6984319398216817, |
|
"eval_recall": 0.6918425495381815, |
|
"eval_runtime": 17.3612, |
|
"eval_samples_per_second": 28.8, |
|
"eval_steps_per_second": 3.629, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 15.139442231075698, |
|
"grad_norm": 15.637863159179688, |
|
"learning_rate": 4.953519256308101e-05, |
|
"loss": 0.2033, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 15.53784860557769, |
|
"grad_norm": 11.52315902709961, |
|
"learning_rate": 4.820717131474104e-05, |
|
"loss": 0.2073, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 15.936254980079681, |
|
"grad_norm": 10.585031509399414, |
|
"learning_rate": 4.687915006640107e-05, |
|
"loss": 0.1902, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.702, |
|
"eval_f1": 0.6936988146223305, |
|
"eval_loss": 1.0092582702636719, |
|
"eval_precision": 0.704450992084371, |
|
"eval_recall": 0.6998746780522377, |
|
"eval_runtime": 17.2437, |
|
"eval_samples_per_second": 28.996, |
|
"eval_steps_per_second": 3.654, |
|
"step": 4016 |
|
}, |
|
{ |
|
"epoch": 16.334661354581673, |
|
"grad_norm": 13.553791046142578, |
|
"learning_rate": 4.555112881806109e-05, |
|
"loss": 0.1961, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 16.733067729083665, |
|
"grad_norm": 10.33850383758545, |
|
"learning_rate": 4.4223107569721116e-05, |
|
"loss": 0.1912, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.712, |
|
"eval_f1": 0.7037256290589013, |
|
"eval_loss": 0.9451501369476318, |
|
"eval_precision": 0.7113280708081392, |
|
"eval_recall": 0.710350440499444, |
|
"eval_runtime": 17.5777, |
|
"eval_samples_per_second": 28.445, |
|
"eval_steps_per_second": 3.584, |
|
"step": 4267 |
|
}, |
|
{ |
|
"epoch": 17.131474103585656, |
|
"grad_norm": 10.550813674926758, |
|
"learning_rate": 4.289508632138114e-05, |
|
"loss": 0.1724, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 17.529880478087648, |
|
"grad_norm": 11.269770622253418, |
|
"learning_rate": 4.156706507304117e-05, |
|
"loss": 0.166, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 17.92828685258964, |
|
"grad_norm": 11.22702693939209, |
|
"learning_rate": 4.02390438247012e-05, |
|
"loss": 0.1626, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.71, |
|
"eval_f1": 0.6965991557822268, |
|
"eval_loss": 1.0229520797729492, |
|
"eval_precision": 0.7119117791531585, |
|
"eval_recall": 0.7080606500607883, |
|
"eval_runtime": 17.3563, |
|
"eval_samples_per_second": 28.808, |
|
"eval_steps_per_second": 3.63, |
|
"step": 4518 |
|
}, |
|
{ |
|
"epoch": 18.326693227091635, |
|
"grad_norm": 11.750710487365723, |
|
"learning_rate": 3.8911022576361225e-05, |
|
"loss": 0.146, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 18.725099601593627, |
|
"grad_norm": 10.37628173828125, |
|
"learning_rate": 3.758300132802125e-05, |
|
"loss": 0.1524, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.716, |
|
"eval_f1": 0.7120852228214192, |
|
"eval_loss": 0.9977978467941284, |
|
"eval_precision": 0.7210033022533023, |
|
"eval_recall": 0.7144085668354911, |
|
"eval_runtime": 17.5249, |
|
"eval_samples_per_second": 28.531, |
|
"eval_steps_per_second": 3.595, |
|
"step": 4769 |
|
}, |
|
{ |
|
"epoch": 19.12350597609562, |
|
"grad_norm": 12.99516487121582, |
|
"learning_rate": 3.625498007968128e-05, |
|
"loss": 0.1321, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 19.52191235059761, |
|
"grad_norm": 11.697456359863281, |
|
"learning_rate": 3.492695883134131e-05, |
|
"loss": 0.1508, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 19.9203187250996, |
|
"grad_norm": 11.452008247375488, |
|
"learning_rate": 3.359893758300133e-05, |
|
"loss": 0.1258, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.71, |
|
"eval_f1": 0.7074786456937486, |
|
"eval_loss": 1.050653338432312, |
|
"eval_precision": 0.7215994120996662, |
|
"eval_recall": 0.7083547965174904, |
|
"eval_runtime": 29.4041, |
|
"eval_samples_per_second": 17.004, |
|
"eval_steps_per_second": 2.143, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 20.318725099601593, |
|
"grad_norm": 12.027978897094727, |
|
"learning_rate": 3.2270916334661356e-05, |
|
"loss": 0.1387, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 20.717131474103585, |
|
"grad_norm": 13.427599906921387, |
|
"learning_rate": 3.094289508632138e-05, |
|
"loss": 0.1116, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.724, |
|
"eval_f1": 0.7153409174915838, |
|
"eval_loss": 1.0689764022827148, |
|
"eval_precision": 0.7231692880094706, |
|
"eval_recall": 0.722209642011374, |
|
"eval_runtime": 17.4789, |
|
"eval_samples_per_second": 28.606, |
|
"eval_steps_per_second": 3.604, |
|
"step": 5271 |
|
}, |
|
{ |
|
"epoch": 21.115537848605577, |
|
"grad_norm": 12.410263061523438, |
|
"learning_rate": 2.961487383798141e-05, |
|
"loss": 0.1378, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 21.51394422310757, |
|
"grad_norm": 12.236252784729004, |
|
"learning_rate": 2.8286852589641438e-05, |
|
"loss": 0.1181, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 21.91235059760956, |
|
"grad_norm": 11.362260818481445, |
|
"learning_rate": 2.6958831341301462e-05, |
|
"loss": 0.1158, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.702, |
|
"eval_f1": 0.6967439243675191, |
|
"eval_loss": 1.1378962993621826, |
|
"eval_precision": 0.7034445997704206, |
|
"eval_recall": 0.7004485661440597, |
|
"eval_runtime": 17.752, |
|
"eval_samples_per_second": 28.166, |
|
"eval_steps_per_second": 3.549, |
|
"step": 5522 |
|
}, |
|
{ |
|
"epoch": 22.310756972111555, |
|
"grad_norm": 12.567873001098633, |
|
"learning_rate": 2.563081009296149e-05, |
|
"loss": 0.1089, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 22.709163346613547, |
|
"grad_norm": 10.872307777404785, |
|
"learning_rate": 2.4302788844621517e-05, |
|
"loss": 0.1069, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.722, |
|
"eval_f1": 0.7172758119553166, |
|
"eval_loss": 1.157360553741455, |
|
"eval_precision": 0.727205590108816, |
|
"eval_recall": 0.7205813209797584, |
|
"eval_runtime": 17.8753, |
|
"eval_samples_per_second": 27.972, |
|
"eval_steps_per_second": 3.524, |
|
"step": 5773 |
|
}, |
|
{ |
|
"epoch": 23.10756972111554, |
|
"grad_norm": 10.918773651123047, |
|
"learning_rate": 2.297476759628154e-05, |
|
"loss": 0.1112, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 23.50597609561753, |
|
"grad_norm": 11.303016662597656, |
|
"learning_rate": 2.1646746347941568e-05, |
|
"loss": 0.0954, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 23.904382470119522, |
|
"grad_norm": 10.82700252532959, |
|
"learning_rate": 2.0318725099601595e-05, |
|
"loss": 0.1089, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.712, |
|
"eval_f1": 0.7075426800060708, |
|
"eval_loss": 1.1160012483596802, |
|
"eval_precision": 0.7194793034050283, |
|
"eval_recall": 0.7103697429603638, |
|
"eval_runtime": 17.7441, |
|
"eval_samples_per_second": 28.178, |
|
"eval_steps_per_second": 3.55, |
|
"step": 6024 |
|
}, |
|
{ |
|
"epoch": 24.302788844621514, |
|
"grad_norm": 10.5631742477417, |
|
"learning_rate": 1.899070385126162e-05, |
|
"loss": 0.0864, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 24.701195219123505, |
|
"grad_norm": 10.918201446533203, |
|
"learning_rate": 1.7662682602921647e-05, |
|
"loss": 0.0999, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.716, |
|
"eval_f1": 0.7090935362771184, |
|
"eval_loss": 1.0727450847625732, |
|
"eval_precision": 0.7106284520077623, |
|
"eval_recall": 0.7145099767794276, |
|
"eval_runtime": 17.8845, |
|
"eval_samples_per_second": 27.957, |
|
"eval_steps_per_second": 3.523, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 25.099601593625497, |
|
"grad_norm": 11.402228355407715, |
|
"learning_rate": 1.6334661354581674e-05, |
|
"loss": 0.1042, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 25.49800796812749, |
|
"grad_norm": 11.740915298461914, |
|
"learning_rate": 1.5006640106241702e-05, |
|
"loss": 0.089, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 25.89641434262948, |
|
"grad_norm": 11.218791961669922, |
|
"learning_rate": 1.3678618857901726e-05, |
|
"loss": 0.0738, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.706, |
|
"eval_f1": 0.699550751079995, |
|
"eval_loss": 1.2584666013717651, |
|
"eval_precision": 0.7133200179296525, |
|
"eval_recall": 0.704105950343699, |
|
"eval_runtime": 18.0765, |
|
"eval_samples_per_second": 27.66, |
|
"eval_steps_per_second": 3.485, |
|
"step": 6526 |
|
}, |
|
{ |
|
"epoch": 26.294820717131476, |
|
"grad_norm": 9.9302339553833, |
|
"learning_rate": 1.2350597609561753e-05, |
|
"loss": 0.0914, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 26.693227091633467, |
|
"grad_norm": 11.051177024841309, |
|
"learning_rate": 1.102257636122178e-05, |
|
"loss": 0.0836, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.718, |
|
"eval_f1": 0.7104355302219595, |
|
"eval_loss": 1.1709084510803223, |
|
"eval_precision": 0.7172922964310544, |
|
"eval_recall": 0.7163028008735083, |
|
"eval_runtime": 17.5336, |
|
"eval_samples_per_second": 28.517, |
|
"eval_steps_per_second": 3.593, |
|
"step": 6777 |
|
}, |
|
{ |
|
"epoch": 27.09163346613546, |
|
"grad_norm": 10.727697372436523, |
|
"learning_rate": 9.694555112881806e-06, |
|
"loss": 0.0986, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 27.49003984063745, |
|
"grad_norm": 11.817888259887695, |
|
"learning_rate": 8.366533864541832e-06, |
|
"loss": 0.07, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 27.888446215139442, |
|
"grad_norm": 10.357769966125488, |
|
"learning_rate": 7.03851261620186e-06, |
|
"loss": 0.0775, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.722, |
|
"eval_f1": 0.7145836341124611, |
|
"eval_loss": 1.2422434091567993, |
|
"eval_precision": 0.7256025662918439, |
|
"eval_recall": 0.720296105512437, |
|
"eval_runtime": 17.5709, |
|
"eval_samples_per_second": 28.456, |
|
"eval_steps_per_second": 3.585, |
|
"step": 7028 |
|
}, |
|
{ |
|
"epoch": 28.286852589641434, |
|
"grad_norm": 10.4796142578125, |
|
"learning_rate": 5.710491367861886e-06, |
|
"loss": 0.0713, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 28.685258964143426, |
|
"grad_norm": 9.803996086120605, |
|
"learning_rate": 4.382470119521913e-06, |
|
"loss": 0.0752, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.728, |
|
"eval_f1": 0.7205734767025089, |
|
"eval_loss": 1.2145317792892456, |
|
"eval_precision": 0.730059540405073, |
|
"eval_recall": 0.7263936664880468, |
|
"eval_runtime": 17.625, |
|
"eval_samples_per_second": 28.369, |
|
"eval_steps_per_second": 3.574, |
|
"step": 7279 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 7530, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.995893225012062e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|